From 05c684d75773a9884de413c5e95ecdd8e17b13d1 Mon Sep 17 00:00:00 2001 From: youhuanghe <1051233107@qq.com> Date: Sat, 18 Apr 2026 11:30:11 +0000 Subject: [PATCH] feat(provider): add Chroma built-in embedding requester Add chromaembed.py using Chroma's DefaultEmbeddingFunction (all-MiniLM-L6-v2) for local embedding generation via ONNX Runtime. Also simplify seekdbembed.py and add ndarray-to-list conversion for JSON serialization compatibility. --- .../provider/modelmgr/requesters/chroma.svg | 7 +++ .../modelmgr/requesters/chromaembed.py | 61 +++++++++++++++++++ .../modelmgr/requesters/chromaembed.yaml | 21 +++++++ .../modelmgr/requesters/seekdbembed.py | 15 ++--- 4 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/chroma.svg create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/chromaembed.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/chromaembed.yaml diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chroma.svg b/src/langbot/pkg/provider/modelmgr/requesters/chroma.svg new file mode 100644 index 00000000..15252321 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/chroma.svg @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chromaembed.py b/src/langbot/pkg/provider/modelmgr/requesters/chromaembed.py new file mode 100644 index 00000000..61e22ea4 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/chromaembed.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import typing + +from .. import requester + +REQUESTER_NAME: str = 'chroma-embedding' + + +class ChromaEmbedding(requester.ProviderAPIRequester): + """Chroma built-in embedding requester. + + Uses chromadb's DefaultEmbeddingFunction (all-MiniLM-L6-v2). + The embedding function runs locally using ONNX Runtime. + """ + + default_config: dict[str, typing.Any] = { + 'base_url': '', + } + + _embedding_function = None + + async def initialize(self): + try: + from chromadb.utils import embedding_functions + except ImportError: + raise ImportError('chromadb is not installed. Install it with: pip install chromadb') + + self._embedding_function = embedding_functions.DefaultEmbeddingFunction() + + async def invoke_llm( + self, + query, + model: requester.RuntimeLLMModel, + messages: typing.List, + funcs: typing.List = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ): + raise NotImplementedError('Chroma embedding does not support LLM inference') + + async def invoke_embedding( + self, + model: requester.RuntimeEmbeddingModel, + input_text: typing.List[str], + extra_args: dict[str, typing.Any] = {}, + ) -> typing.List[typing.List[float]]: + """Generate embeddings using Chroma's DefaultEmbeddingFunction.""" + if self._embedding_function is None: + await self.initialize() + + try: + result = self._embedding_function(input_text) + # DefaultEmbeddingFunction returns list of ndarray, convert for JSON + if isinstance(result, list): + return [item.tolist() if hasattr(item, 'tolist') else item for item in result] + return result.tolist() if hasattr(result, 'tolist') else result + except Exception as e: + from .. import errors + + raise errors.RequesterError(f'Chroma embedding failed: {str(e)}') diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chromaembed.yaml b/src/langbot/pkg/provider/modelmgr/requesters/chromaembed.yaml new file mode 100644 index 00000000..396b8c16 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/chromaembed.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: chroma-embedding + label: + en_US: Chroma Embedding + zh_Hans: Chroma 嵌入 + description: + en_US: Chroma built-in embedding model (all-MiniLM-L6-v2), runs locally using ONNX Runtime. First-time use will download model files automatically. + zh_Hans: 使用 Chroma 内置嵌入模型 (all-MiniLM-L6-v2),基于 ONNX Runtime 本地运行。首次使用时将自动下载模型文件。 + ja_JP: Chroma 組み込み埋め込みモデル (all-MiniLM-L6-v2) を使用します。ONNX Runtime でローカル実行。初回使用時にモデルファイルが自動ダウンロードされます。 + icon: chroma.svg +spec: + config: [] + support_type: + - text-embedding + provider_category: builtin +execution: + python: + path: ./chromaembed.py + attr: ChromaEmbedding \ No newline at end of file diff --git a/src/langbot/pkg/provider/modelmgr/requesters/seekdbembed.py b/src/langbot/pkg/provider/modelmgr/requesters/seekdbembed.py index 7fd98d69..4b881dbf 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/seekdbembed.py +++ b/src/langbot/pkg/provider/modelmgr/requesters/seekdbembed.py @@ -46,14 +46,15 @@ class SeekDBEmbedding(requester.ProviderAPIRequester): extra_args: dict[str, typing.Any] = {}, ) -> typing.List[typing.List[float]]: """Generate embeddings using SeekDB's built-in embedding function.""" + if self._embedding_function is None: + await self.initialize() + try: - if self._embedding_function is None: - await self.initialize() - - if self._embedding_function is None: - raise RuntimeError('SeekDB embedding function initialization failed') - - return self._embedding_function(input_text) + result = self._embedding_function(input_text) + # Ensure JSON serialization compatibility + if isinstance(result, list): + return [item.tolist() if hasattr(item, 'tolist') else item for item in result] + return result.tolist() if hasattr(result, 'tolist') else result except Exception as e: from .. import errors