mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-05 05:16:03 +00:00
* Initial plan * Add package structure and resource path utilities - Created langbot/ package with __init__.py and __main__.py entry point - Added paths utility to find frontend and resource files from package installation - Updated config loading to use resource paths - Updated frontend serving to use resource paths - Added MANIFEST.in for package data inclusion - Updated pyproject.toml with build system and entry points Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * Add PyPI publishing workflow and update license - Created GitHub Actions workflow to build frontend and publish to PyPI - Added license field to pyproject.toml to fix deprecation warning - Updated .gitignore to exclude build artifacts - Tested package building successfully Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * Add PyPI installation documentation - Created PYPI_INSTALLATION.md with detailed installation and usage instructions - Updated README.md to feature uvx/pip installation as recommended method - Updated README_EN.md with same changes for English documentation Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * Address code review feedback - Made package-data configuration more specific to langbot package only - Improved path detection with caching to avoid repeated file I/O - Removed sys.path searching which was incorrect for package data - Removed interactive input() call for non-interactive environment compatibility - Simplified error messages for version check Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * Fix code review issues - Use specific exception types instead of bare except - Fix misleading comments about directory levels - Remove redundant existence check before makedirs with exist_ok=True - Use context manager for file opening to ensure proper cleanup Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * Simplify package configuration and document behavioral differences - Removed redundant package-data configuration, relying on MANIFEST.in - Added documentation about behavioral differences between package and source installation - Clarified that include-package-data=true uses MANIFEST.in for data files Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * chore: update pyproject.toml * chore: try pack templates in langbot/ * chore: update * chore: update * chore: update * chore: update * chore: update * chore: adjust dir structure * chore: fix imports * fix: read default-pipeline-config.json * fix: read default-pipeline-config.json * fix: tests * ci: publish pypi * chore: bump version 4.6.0-beta.1 for testing * chore: add templates/** * fix: send adapters and requesters icons * chore: bump version 4.6.0b2 for testing * chore: add platform field for docker-compose.yaml --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> Co-authored-by: Junyan Qin <rockchinq@gmail.com>
105 lines
4.1 KiB
Python
105 lines
4.1 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any, Dict, List
|
|
|
|
from qdrant_client import AsyncQdrantClient, models
|
|
from langbot.pkg.core import app
|
|
from langbot.pkg.vector.vdb import VectorDatabase
|
|
|
|
|
|
class QdrantVectorDatabase(VectorDatabase):
|
|
def __init__(self, ap: app.Application):
|
|
self.ap = ap
|
|
url = self.ap.instance_config.data['vdb']['qdrant']['url']
|
|
host = self.ap.instance_config.data['vdb']['qdrant']['host']
|
|
port = self.ap.instance_config.data['vdb']['qdrant']['port']
|
|
api_key = self.ap.instance_config.data['vdb']['qdrant']['api_key']
|
|
|
|
if url:
|
|
self.client = AsyncQdrantClient(url=url, api_key=api_key)
|
|
else:
|
|
self.client = AsyncQdrantClient(host=host, port=int(port), api_key=api_key)
|
|
|
|
self._collections: set[str] = set()
|
|
|
|
async def _ensure_collection(self, collection: str, vector_size: int) -> None:
|
|
if collection in self._collections:
|
|
return
|
|
|
|
exists = await self.client.collection_exists(collection)
|
|
if exists:
|
|
self._collections.add(collection)
|
|
return
|
|
|
|
await self.client.create_collection(
|
|
collection_name=collection,
|
|
vectors_config=models.VectorParams(size=vector_size, distance=models.Distance.COSINE),
|
|
)
|
|
self._collections.add(collection)
|
|
self.ap.logger.info(f"Qdrant collection '{collection}' created with dim={vector_size}.")
|
|
|
|
async def get_or_create_collection(self, collection: str):
|
|
# Qdrant requires vector size to create a collection; no-op here.
|
|
pass
|
|
|
|
async def add_embeddings(
|
|
self,
|
|
collection: str,
|
|
ids: List[str],
|
|
embeddings_list: List[List[float]],
|
|
metadatas: List[Dict[str, Any]],
|
|
) -> None:
|
|
if not embeddings_list:
|
|
return
|
|
|
|
await self._ensure_collection(collection, len(embeddings_list[0]))
|
|
|
|
points = [
|
|
models.PointStruct(id=ids[i], vector=embeddings_list[i], payload=metadatas[i]) for i in range(len(ids))
|
|
]
|
|
await self.client.upsert(collection_name=collection, points=points)
|
|
self.ap.logger.info(f"Added {len(ids)} embeddings to Qdrant collection '{collection}'.")
|
|
|
|
async def search(self, collection: str, query_embedding: list[float], k: int = 5) -> dict[str, Any]:
|
|
exists = await self.client.collection_exists(collection)
|
|
if not exists:
|
|
return {'ids': [[]], 'metadatas': [[]], 'distances': [[]]}
|
|
|
|
hits = (
|
|
await self.client.query_points(
|
|
collection_name=collection,
|
|
query=query_embedding,
|
|
limit=k,
|
|
with_payload=True,
|
|
)
|
|
).points
|
|
ids = [str(hit.id) for hit in hits]
|
|
metadatas = [hit.payload or {} for hit in hits]
|
|
# Qdrant's score is similarity; convert to a pseudo-distance for consistency
|
|
distances = [1 - float(hit.score) if hit.score is not None else 1.0 for hit in hits]
|
|
results = {'ids': [ids], 'metadatas': [metadatas], 'distances': [distances]}
|
|
|
|
self.ap.logger.info(f"Qdrant search in '{collection}' returned {len(results.get('ids', [[]])[0])} results.")
|
|
return results
|
|
|
|
async def delete_by_file_id(self, collection: str, file_id: str) -> None:
|
|
exists = await self.client.collection_exists(collection)
|
|
if not exists:
|
|
return
|
|
|
|
await self.client.delete(
|
|
collection_name=collection,
|
|
points_selector=models.Filter(
|
|
must=[models.FieldCondition(key='file_id', match=models.MatchValue(value=file_id))]
|
|
),
|
|
)
|
|
self.ap.logger.info(f"Deleted embeddings from Qdrant collection '{collection}' with file_id: {file_id}")
|
|
|
|
async def delete_collection(self, collection: str):
|
|
try:
|
|
await self.client.delete_collection(collection)
|
|
self._collections.discard(collection)
|
|
self.ap.logger.info(f"Qdrant collection '{collection}' deleted.")
|
|
except Exception:
|
|
self.ap.logger.warning(f"Qdrant collection '{collection}' not found.")
|