feat(provider): add Chroma built-in embedding requester

Add chromaembed.py using Chroma's DefaultEmbeddingFunction (all-MiniLM-L6-v2)
for local embedding generation via ONNX Runtime. Also simplify seekdbembed.py
and add ndarray-to-list conversion for JSON serialization compatibility.
This commit is contained in:
youhuanghe
2026-04-18 11:30:11 +00:00
parent 2838020580
commit 05c684d757
4 changed files with 97 additions and 7 deletions

View File

@@ -0,0 +1,7 @@
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect width="24" height="24" rx="5" fill="#7B68EE"/>
<circle cx="12" cy="12" r="6" fill="#FF6B35"/>
<circle cx="12" cy="12" r="3" fill="#7B68EE"/>
<path d="M12 6V18" stroke="#FFF" stroke-width="1.5" stroke-linecap="round"/>
<path d="M6 12H18" stroke="#FFF" stroke-width="1.5" stroke-linecap="round"/>
</svg>

After

Width:  |  Height:  |  Size: 413 B

View File

@@ -0,0 +1,61 @@
from __future__ import annotations
import typing
from .. import requester
REQUESTER_NAME: str = 'chroma-embedding'
class ChromaEmbedding(requester.ProviderAPIRequester):
"""Chroma built-in embedding requester.
Uses chromadb's DefaultEmbeddingFunction (all-MiniLM-L6-v2).
The embedding function runs locally using ONNX Runtime.
"""
default_config: dict[str, typing.Any] = {
'base_url': '',
}
_embedding_function = None
async def initialize(self):
try:
from chromadb.utils import embedding_functions
except ImportError:
raise ImportError('chromadb is not installed. Install it with: pip install chromadb')
self._embedding_function = embedding_functions.DefaultEmbeddingFunction()
async def invoke_llm(
self,
query,
model: requester.RuntimeLLMModel,
messages: typing.List,
funcs: typing.List = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
):
raise NotImplementedError('Chroma embedding does not support LLM inference')
async def invoke_embedding(
self,
model: requester.RuntimeEmbeddingModel,
input_text: typing.List[str],
extra_args: dict[str, typing.Any] = {},
) -> typing.List[typing.List[float]]:
"""Generate embeddings using Chroma's DefaultEmbeddingFunction."""
if self._embedding_function is None:
await self.initialize()
try:
result = self._embedding_function(input_text)
# DefaultEmbeddingFunction returns list of ndarray, convert for JSON
if isinstance(result, list):
return [item.tolist() if hasattr(item, 'tolist') else item for item in result]
return result.tolist() if hasattr(result, 'tolist') else result
except Exception as e:
from .. import errors
raise errors.RequesterError(f'Chroma embedding failed: {str(e)}')

View File

@@ -0,0 +1,21 @@
apiVersion: v1
kind: LLMAPIRequester
metadata:
name: chroma-embedding
label:
en_US: Chroma Embedding
zh_Hans: Chroma 嵌入
description:
en_US: Chroma built-in embedding model (all-MiniLM-L6-v2), runs locally using ONNX Runtime. First-time use will download model files automatically.
zh_Hans: 使用 Chroma 内置嵌入模型 (all-MiniLM-L6-v2),基于 ONNX Runtime 本地运行。首次使用时将自动下载模型文件。
ja_JP: Chroma 組み込み埋め込みモデル (all-MiniLM-L6-v2) を使用します。ONNX Runtime でローカル実行。初回使用時にモデルファイルが自動ダウンロードされます。
icon: chroma.svg
spec:
config: []
support_type:
- text-embedding
provider_category: builtin
execution:
python:
path: ./chromaembed.py
attr: ChromaEmbedding

View File

@@ -46,14 +46,15 @@ class SeekDBEmbedding(requester.ProviderAPIRequester):
extra_args: dict[str, typing.Any] = {},
) -> typing.List[typing.List[float]]:
"""Generate embeddings using SeekDB's built-in embedding function."""
if self._embedding_function is None:
await self.initialize()
try:
if self._embedding_function is None:
await self.initialize()
if self._embedding_function is None:
raise RuntimeError('SeekDB embedding function initialization failed')
return self._embedding_function(input_text)
result = self._embedding_function(input_text)
# Ensure JSON serialization compatibility
if isinstance(result, list):
return [item.tolist() if hasattr(item, 'tolist') else item for item in result]
return result.tolist() if hasattr(result, 'tolist') else result
except Exception as e:
from .. import errors