mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-08 06:46:02 +00:00
Add chromaembed.py using Chroma's DefaultEmbeddingFunction (all-MiniLM-L6-v2) for local embedding generation via ONNX Runtime. Also simplify seekdbembed.py and add ndarray-to-list conversion for JSON serialization compatibility.
62 lines
1.9 KiB
Python
62 lines
1.9 KiB
Python
from __future__ import annotations
|
|
|
|
import typing
|
|
|
|
from .. import requester
|
|
|
|
REQUESTER_NAME: str = 'seekdb-embedding'
|
|
|
|
|
|
class SeekDBEmbedding(requester.ProviderAPIRequester):
|
|
"""SeekDB built-in embedding requester.
|
|
|
|
Uses pyseekdb's local embedding function (all-MiniLM-L6-v2).
|
|
The base_url config is reserved for future remote embedding support.
|
|
"""
|
|
|
|
default_config: dict[str, typing.Any] = {
|
|
'base_url': '',
|
|
}
|
|
|
|
_embedding_function = None
|
|
|
|
async def initialize(self):
|
|
try:
|
|
import pyseekdb
|
|
except ImportError:
|
|
raise ImportError('pyseekdb is not installed. Install it with: pip install pyseekdb')
|
|
|
|
self._embedding_function = pyseekdb.get_default_embedding_function()
|
|
|
|
async def invoke_llm(
|
|
self,
|
|
query,
|
|
model: requester.RuntimeLLMModel,
|
|
messages: typing.List,
|
|
funcs: typing.List = None,
|
|
extra_args: dict[str, typing.Any] = {},
|
|
remove_think: bool = False,
|
|
):
|
|
raise NotImplementedError('SeekDB embedding does not support LLM inference')
|
|
|
|
async def invoke_embedding(
|
|
self,
|
|
model: requester.RuntimeEmbeddingModel,
|
|
input_text: typing.List[str],
|
|
extra_args: dict[str, typing.Any] = {},
|
|
) -> typing.List[typing.List[float]]:
|
|
"""Generate embeddings using SeekDB's built-in embedding function."""
|
|
if self._embedding_function is None:
|
|
await self.initialize()
|
|
|
|
try:
|
|
result = self._embedding_function(input_text)
|
|
# Ensure JSON serialization compatibility
|
|
if isinstance(result, list):
|
|
return [item.tolist() if hasattr(item, 'tolist') else item for item in result]
|
|
return result.tolist() if hasattr(result, 'tolist') else result
|
|
except Exception as e:
|
|
from .. import errors
|
|
|
|
raise errors.RequesterError(f'SeekDB embedding failed: {str(e)}')
|