mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-04 21:06:03 +00:00
Feat/qdrant vdb (#1649)
* feat: Qdrant vector search support Signed-off-by: Anush008 <anushshetty90@gmail.com> * fix: modify env * fix: fix the old version problem * fix: For older versions * perf: minor perf --------- Signed-off-by: Anush008 <anushshetty90@gmail.com> Co-authored-by: Anush008 <anushshetty90@gmail.com> Co-authored-by: Junyan Qin <rockchinq@gmail.com>
This commit is contained in:
@@ -24,23 +24,23 @@ class Retriever(base_service.BaseService):
|
||||
extra_args={}, # TODO: add extra args
|
||||
)
|
||||
|
||||
chroma_results = await self.ap.vector_db_mgr.vector_db.search(kb_id, query_embedding[0], k)
|
||||
vector_results = await self.ap.vector_db_mgr.vector_db.search(kb_id, query_embedding[0], k)
|
||||
|
||||
# 'ids' is always returned by ChromaDB, even if not explicitly in 'include'
|
||||
matched_chroma_ids = chroma_results.get('ids', [[]])[0]
|
||||
distances = chroma_results.get('distances', [[]])[0]
|
||||
chroma_metadatas = chroma_results.get('metadatas', [[]])[0]
|
||||
# 'ids' shape mirrors the Chroma-style response contract for compatibility
|
||||
matched_vector_ids = vector_results.get('ids', [[]])[0]
|
||||
distances = vector_results.get('distances', [[]])[0]
|
||||
vector_metadatas = vector_results.get('metadatas', [[]])[0]
|
||||
|
||||
if not matched_chroma_ids:
|
||||
self.ap.logger.info('No relevant chunks found in Chroma.')
|
||||
if not matched_vector_ids:
|
||||
self.ap.logger.info('No relevant chunks found in vector database.')
|
||||
return []
|
||||
|
||||
result: list[retriever_entities.RetrieveResultEntry] = []
|
||||
|
||||
for i, id in enumerate(matched_chroma_ids):
|
||||
for i, id in enumerate(matched_vector_ids):
|
||||
entry = retriever_entities.RetrieveResultEntry(
|
||||
id=id,
|
||||
metadata=chroma_metadatas[i],
|
||||
metadata=vector_metadatas[i],
|
||||
distance=distances[i],
|
||||
)
|
||||
result.append(entry)
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
from ..core import app
|
||||
from .vdb import VectorDatabase
|
||||
from .vdbs.chroma import ChromaVectorDatabase
|
||||
from .vdbs.qdrant import QdrantVectorDatabase
|
||||
|
||||
|
||||
class VectorDBManager:
|
||||
@@ -13,6 +14,17 @@ class VectorDBManager:
|
||||
self.ap = ap
|
||||
|
||||
async def initialize(self):
|
||||
# 初始化 Chroma 向量数据库(可扩展为多种实现)
|
||||
if self.vector_db is None:
|
||||
kb_config = self.ap.instance_config.data.get('vdb')
|
||||
if kb_config:
|
||||
if kb_config.get('use') == 'chroma':
|
||||
self.vector_db = ChromaVectorDatabase(self.ap)
|
||||
self.ap.logger.info('Initialized Chroma vector database backend.')
|
||||
elif kb_config.get('use') == 'qdrant':
|
||||
self.vector_db = QdrantVectorDatabase(self.ap)
|
||||
self.ap.logger.info('Initialized Qdrant vector database backend.')
|
||||
else:
|
||||
self.vector_db = ChromaVectorDatabase(self.ap)
|
||||
self.ap.logger.warning('No valid vector database backend configured, defaulting to Chroma.')
|
||||
else:
|
||||
self.vector_db = ChromaVectorDatabase(self.ap)
|
||||
self.ap.logger.warning('No vector database backend configured, defaulting to Chroma.')
|
||||
|
||||
@@ -14,24 +14,25 @@ class VectorDatabase(abc.ABC):
|
||||
metadatas: list[dict[str, Any]],
|
||||
documents: list[str],
|
||||
) -> None:
|
||||
"""向指定 collection 添加向量数据。"""
|
||||
"""Add vector data to the specified collection."""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def search(self, collection: str, query_embedding: np.ndarray, k: int = 5) -> Dict[str, Any]:
|
||||
"""在指定 collection 中检索最相似的向量。"""
|
||||
"""Search for the most similar vectors in the specified collection."""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def delete_by_file_id(self, collection: str, file_id: str) -> None:
|
||||
"""根据 file_id 删除指定 collection 中的向量。"""
|
||||
"""Delete vectors from the specified collection by file_id."""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def get_or_create_collection(self, collection: str):
|
||||
"""获取或创建 collection。"""
|
||||
"""Get or create collection."""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def delete_collection(self, collection: str):
|
||||
"""Delete collection."""
|
||||
pass
|
||||
|
||||
104
pkg/vector/vdbs/qdrant.py
Normal file
104
pkg/vector/vdbs/qdrant.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from qdrant_client import AsyncQdrantClient, models
|
||||
from pkg.core import app
|
||||
from pkg.vector.vdb import VectorDatabase
|
||||
|
||||
|
||||
class QdrantVectorDatabase(VectorDatabase):
|
||||
def __init__(self, ap: app.Application):
|
||||
self.ap = ap
|
||||
url = self.ap.instance_config.data['vdb']['qdrant']['url']
|
||||
host = self.ap.instance_config.data['vdb']['qdrant']['host']
|
||||
port = self.ap.instance_config.data['vdb']['qdrant']['port']
|
||||
api_key = self.ap.instance_config.data['vdb']['qdrant']['api_key']
|
||||
|
||||
if url:
|
||||
self.client = AsyncQdrantClient(url=url, api_key=api_key)
|
||||
else:
|
||||
self.client = AsyncQdrantClient(host=host, port=int(port), api_key=api_key)
|
||||
|
||||
self._collections: set[str] = set()
|
||||
|
||||
async def _ensure_collection(self, collection: str, vector_size: int) -> None:
|
||||
if collection in self._collections:
|
||||
return
|
||||
|
||||
exists = await self.client.collection_exists(collection)
|
||||
if exists:
|
||||
self._collections.add(collection)
|
||||
return
|
||||
|
||||
await self.client.create_collection(
|
||||
collection_name=collection,
|
||||
vectors_config=models.VectorParams(size=vector_size, distance=models.Distance.COSINE),
|
||||
)
|
||||
self._collections.add(collection)
|
||||
self.ap.logger.info(f"Qdrant collection '{collection}' created with dim={vector_size}.")
|
||||
|
||||
async def get_or_create_collection(self, collection: str):
|
||||
# Qdrant requires vector size to create a collection; no-op here.
|
||||
pass
|
||||
|
||||
async def add_embeddings(
|
||||
self,
|
||||
collection: str,
|
||||
ids: List[str],
|
||||
embeddings_list: List[List[float]],
|
||||
metadatas: List[Dict[str, Any]],
|
||||
) -> None:
|
||||
if not embeddings_list:
|
||||
return
|
||||
|
||||
await self._ensure_collection(collection, len(embeddings_list[0]))
|
||||
|
||||
points = [
|
||||
models.PointStruct(id=ids[i], vector=embeddings_list[i], payload=metadatas[i]) for i in range(len(ids))
|
||||
]
|
||||
await self.client.upsert(collection_name=collection, points=points)
|
||||
self.ap.logger.info(f"Added {len(ids)} embeddings to Qdrant collection '{collection}'.")
|
||||
|
||||
async def search(self, collection: str, query_embedding: list[float], k: int = 5) -> dict[str, Any]:
|
||||
exists = await self.client.collection_exists(collection)
|
||||
if not exists:
|
||||
return {'ids': [[]], 'metadatas': [[]], 'distances': [[]]}
|
||||
|
||||
hits = (
|
||||
await self.client.query_points(
|
||||
collection_name=collection,
|
||||
query=query_embedding,
|
||||
limit=k,
|
||||
with_payload=True,
|
||||
)
|
||||
).points
|
||||
ids = [str(hit.id) for hit in hits]
|
||||
metadatas = [hit.payload or {} for hit in hits]
|
||||
# Qdrant's score is similarity; convert to a pseudo-distance for consistency
|
||||
distances = [1 - float(hit.score) if hit.score is not None else 1.0 for hit in hits]
|
||||
results = {'ids': [ids], 'metadatas': [metadatas], 'distances': [distances]}
|
||||
|
||||
self.ap.logger.info(f"Qdrant search in '{collection}' returned {len(results.get('ids', [[]])[0])} results.")
|
||||
return results
|
||||
|
||||
async def delete_by_file_id(self, collection: str, file_id: str) -> None:
|
||||
exists = await self.client.collection_exists(collection)
|
||||
if not exists:
|
||||
return
|
||||
|
||||
await self.client.delete(
|
||||
collection_name=collection,
|
||||
points_selector=models.Filter(
|
||||
must=[models.FieldCondition(key='file_id', match=models.MatchValue(value=file_id))]
|
||||
),
|
||||
)
|
||||
self.ap.logger.info(f"Deleted embeddings from Qdrant collection '{collection}' with file_id: {file_id}")
|
||||
|
||||
async def delete_collection(self, collection: str):
|
||||
try:
|
||||
await self.client.delete_collection(collection)
|
||||
self._collections.discard(collection)
|
||||
self.ap.logger.info(f"Qdrant collection '{collection}' deleted.")
|
||||
except Exception:
|
||||
self.ap.logger.warning(f"Qdrant collection '{collection}' not found.")
|
||||
@@ -1,9 +1,9 @@
|
||||
[project]
|
||||
name = "langbot"
|
||||
version = "4.2.2"
|
||||
description = "高稳定、支持扩展、多模态 - 大模型原生即时通信机器人平台"
|
||||
description = "Easy-to-use global IM bot platform designed for LLM era"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10.1"
|
||||
requires-python = ">=3.10.1,<4.0"
|
||||
dependencies = [
|
||||
"aiocqhttp>=1.4.4",
|
||||
"aiofiles>=24.1.0",
|
||||
@@ -60,6 +60,7 @@ dependencies = [
|
||||
"html2text>=2024.2.26",
|
||||
"langchain>=0.2.0",
|
||||
"chromadb>=0.4.24",
|
||||
"qdrant-client (>=1.15.1,<2.0.0)",
|
||||
]
|
||||
keywords = [
|
||||
"bot",
|
||||
|
||||
@@ -20,3 +20,10 @@ system:
|
||||
jwt:
|
||||
expire: 604800
|
||||
secret: ''
|
||||
vdb:
|
||||
use: chroma
|
||||
qdrant:
|
||||
url: ''
|
||||
host: localhost
|
||||
port: 6333
|
||||
api_key: ''
|
||||
|
||||
Reference in New Issue
Block a user