chore: stash

This commit is contained in:
Junyan Qin
2025-07-16 11:31:55 +08:00
parent 67bc065ccd
commit f731115805
10 changed files with 123 additions and 103 deletions

View File

@@ -1,13 +1,18 @@
from __future__ import annotations
from ..core import app
from .vdb import VectorDatabase
from .vdbs.chroma import ChromaVectorDatabase
class VectorDBManager:
ap: app.Application
vector_db: VectorDatabase = None
def __init__(self, ap: app.Application):
self.ap = ap
async def initialize(self):
pass
# 初始化 Chroma 向量数据库(可扩展为多种实现)
if self.vector_db is None:
self.vector_db = ChromaVectorDatabase(self.ap)

View File

@@ -1,7 +1,33 @@
from __future__ import annotations
import abc
from typing import Any, List, Dict
import numpy as np
class VectorDatabase(abc.ABC):
pass
@abc.abstractmethod
def add_embeddings(
self,
collection: str,
ids: List[str],
embeddings: np.ndarray,
metadatas: List[Dict[str, Any]],
documents: List[str],
) -> None:
"""向指定 collection 添加向量数据。"""
pass
@abc.abstractmethod
def search(self, collection: str, query_embedding: np.ndarray, k: int = 5) -> Dict[str, Any]:
"""在指定 collection 中检索最相似的向量。"""
pass
@abc.abstractmethod
def delete_by_metadata(self, collection: str, where: Dict[str, Any]) -> None:
"""根据元数据删除指定 collection 中的向量。"""
pass
@abc.abstractmethod
def get_or_create_collection(self, collection: str):
"""获取或创建 collection。"""
pass

View File

46
pkg/vector/vdbs/chroma.py Normal file
View File

@@ -0,0 +1,46 @@
from __future__ import annotations
import numpy as np
from typing import Any, List, Dict
from chromadb import PersistentClient
from pkg.vector.vdb import VectorDatabase
from pkg.core import app
class ChromaVectorDatabase(VectorDatabase):
def __init__(self, ap: app.Application, base_path: str = './data/chroma'):
self.ap = ap
self.client = PersistentClient(path=base_path)
self._collections = {}
def get_or_create_collection(self, collection: str):
if collection not in self._collections:
self._collections[collection] = self.client.get_or_create_collection(name=collection)
self.ap.logger.info(f"Chroma collection '{collection}' accessed/created.")
return self._collections[collection]
def add_embeddings(
self,
collection: str,
ids: List[str],
embeddings: np.ndarray,
metadatas: List[Dict[str, Any]],
documents: List[str],
) -> None:
col = self.get_or_create_collection(collection)
col.add(embeddings=embeddings.tolist(), ids=ids, metadatas=metadatas, documents=documents)
self.ap.logger.info(f"Added {len(ids)} embeddings to Chroma collection '{collection}'.")
def search(self, collection: str, query_embedding: np.ndarray, k: int = 5) -> Dict[str, Any]:
col = self.get_or_create_collection(collection)
results = col.query(
query_embeddings=query_embedding.tolist(),
n_results=k,
include=['metadatas', 'distances', 'documents'],
)
self.ap.logger.debug(f"Chroma search in '{collection}' returned {len(results.get('ids', [[]])[0])} results.")
return results
def delete_by_metadata(self, collection: str, where: Dict[str, Any]) -> None:
col = self.get_or_create_collection(collection)
col.delete(where=where)
self.ap.logger.info(f"Deleted embeddings from Chroma collection '{collection}' with filter: {where}")