chore: Add PyPI package support for uvx/pip installation (#1764)

* Initial plan

* Add package structure and resource path utilities

- Created langbot/ package with __init__.py and __main__.py entry point
- Added paths utility to find frontend and resource files from package installation
- Updated config loading to use resource paths
- Updated frontend serving to use resource paths
- Added MANIFEST.in for package data inclusion
- Updated pyproject.toml with build system and entry points

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* Add PyPI publishing workflow and update license

- Created GitHub Actions workflow to build frontend and publish to PyPI
- Added license field to pyproject.toml to fix deprecation warning
- Updated .gitignore to exclude build artifacts
- Tested package building successfully

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* Add PyPI installation documentation

- Created PYPI_INSTALLATION.md with detailed installation and usage instructions
- Updated README.md to feature uvx/pip installation as recommended method
- Updated README_EN.md with same changes for English documentation

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* Address code review feedback

- Made package-data configuration more specific to langbot package only
- Improved path detection with caching to avoid repeated file I/O
- Removed sys.path searching which was incorrect for package data
- Removed interactive input() call for non-interactive environment compatibility
- Simplified error messages for version check

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* Fix code review issues

- Use specific exception types instead of bare except
- Fix misleading comments about directory levels
- Remove redundant existence check before makedirs with exist_ok=True
- Use context manager for file opening to ensure proper cleanup

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* Simplify package configuration and document behavioral differences

- Removed redundant package-data configuration, relying on MANIFEST.in
- Added documentation about behavioral differences between package and source installation
- Clarified that include-package-data=true uses MANIFEST.in for data files

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* chore: update pyproject.toml

* chore: try pack templates in langbot/

* chore: update

* chore: update

* chore: update

* chore: update

* chore: update

* chore: adjust dir structure

* chore: fix imports

* fix: read default-pipeline-config.json

* fix: read default-pipeline-config.json

* fix: tests

* ci: publish pypi

* chore: bump version 4.6.0-beta.1 for testing

* chore: add templates/**

* fix: send adapters and requesters icons

* chore: bump version 4.6.0b2 for testing

* chore: add platform field for docker-compose.yaml

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>
Co-authored-by: Junyan Qin <rockchinq@gmail.com>
This commit is contained in:
Copilot
2025-11-16 19:53:01 +08:00
committed by GitHub
parent 6a24c951e0
commit e642ffa5b3
477 changed files with 1001 additions and 1002 deletions

View File

View File

@@ -0,0 +1,30 @@
from __future__ import annotations
from ..core import app
from .vdb import VectorDatabase
from .vdbs.chroma import ChromaVectorDatabase
from .vdbs.qdrant import QdrantVectorDatabase
class VectorDBManager:
ap: app.Application
vector_db: VectorDatabase = None
def __init__(self, ap: app.Application):
self.ap = ap
async def initialize(self):
kb_config = self.ap.instance_config.data.get('vdb')
if kb_config:
if kb_config.get('use') == 'chroma':
self.vector_db = ChromaVectorDatabase(self.ap)
self.ap.logger.info('Initialized Chroma vector database backend.')
elif kb_config.get('use') == 'qdrant':
self.vector_db = QdrantVectorDatabase(self.ap)
self.ap.logger.info('Initialized Qdrant vector database backend.')
else:
self.vector_db = ChromaVectorDatabase(self.ap)
self.ap.logger.warning('No valid vector database backend configured, defaulting to Chroma.')
else:
self.vector_db = ChromaVectorDatabase(self.ap)
self.ap.logger.warning('No vector database backend configured, defaulting to Chroma.')

View File

@@ -0,0 +1,38 @@
from __future__ import annotations
import abc
from typing import Any, Dict
import numpy as np
class VectorDatabase(abc.ABC):
@abc.abstractmethod
async def add_embeddings(
self,
collection: str,
ids: list[str],
embeddings_list: list[list[float]],
metadatas: list[dict[str, Any]],
documents: list[str],
) -> None:
"""Add vector data to the specified collection."""
pass
@abc.abstractmethod
async def search(self, collection: str, query_embedding: np.ndarray, k: int = 5) -> Dict[str, Any]:
"""Search for the most similar vectors in the specified collection."""
pass
@abc.abstractmethod
async def delete_by_file_id(self, collection: str, file_id: str) -> None:
"""Delete vectors from the specified collection by file_id."""
pass
@abc.abstractmethod
async def get_or_create_collection(self, collection: str):
"""Get or create collection."""
pass
@abc.abstractmethod
async def delete_collection(self, collection: str):
"""Delete collection."""
pass

View File

View File

@@ -0,0 +1,61 @@
from __future__ import annotations
import asyncio
from typing import Any
from chromadb import PersistentClient
from langbot.pkg.vector.vdb import VectorDatabase
from langbot.pkg.core import app
import chromadb
import chromadb.errors
class ChromaVectorDatabase(VectorDatabase):
def __init__(self, ap: app.Application, base_path: str = './data/chroma'):
self.ap = ap
self.client = PersistentClient(path=base_path)
self._collections = {}
async def get_or_create_collection(self, collection: str) -> chromadb.Collection:
if collection not in self._collections:
self._collections[collection] = await asyncio.to_thread(
self.client.get_or_create_collection, name=collection
)
self.ap.logger.info(f"Chroma collection '{collection}' accessed/created.")
return self._collections[collection]
async def add_embeddings(
self,
collection: str,
ids: list[str],
embeddings_list: list[list[float]],
metadatas: list[dict[str, Any]],
) -> None:
col = await self.get_or_create_collection(collection)
await asyncio.to_thread(col.add, embeddings=embeddings_list, ids=ids, metadatas=metadatas)
self.ap.logger.info(f"Added {len(ids)} embeddings to Chroma collection '{collection}'.")
async def search(self, collection: str, query_embedding: list[float], k: int = 5) -> dict[str, Any]:
col = await self.get_or_create_collection(collection)
results = await asyncio.to_thread(
col.query,
query_embeddings=query_embedding,
n_results=k,
include=['metadatas', 'distances', 'documents'],
)
self.ap.logger.info(f"Chroma search in '{collection}' returned {len(results.get('ids', [[]])[0])} results.")
return results
async def delete_by_file_id(self, collection: str, file_id: str) -> None:
col = await self.get_or_create_collection(collection)
await asyncio.to_thread(col.delete, where={'file_id': file_id})
self.ap.logger.info(f"Deleted embeddings from Chroma collection '{collection}' with file_id: {file_id}")
async def delete_collection(self, collection: str):
if collection in self._collections:
del self._collections[collection]
try:
await asyncio.to_thread(self.client.delete_collection, name=collection)
except chromadb.errors.NotFoundError:
self.ap.logger.warning(f"Chroma collection '{collection}' not found.")
return
self.ap.logger.info(f"Chroma collection '{collection}' deleted.")

View File

@@ -0,0 +1,104 @@
from __future__ import annotations
from typing import Any, Dict, List
from qdrant_client import AsyncQdrantClient, models
from langbot.pkg.core import app
from langbot.pkg.vector.vdb import VectorDatabase
class QdrantVectorDatabase(VectorDatabase):
def __init__(self, ap: app.Application):
self.ap = ap
url = self.ap.instance_config.data['vdb']['qdrant']['url']
host = self.ap.instance_config.data['vdb']['qdrant']['host']
port = self.ap.instance_config.data['vdb']['qdrant']['port']
api_key = self.ap.instance_config.data['vdb']['qdrant']['api_key']
if url:
self.client = AsyncQdrantClient(url=url, api_key=api_key)
else:
self.client = AsyncQdrantClient(host=host, port=int(port), api_key=api_key)
self._collections: set[str] = set()
async def _ensure_collection(self, collection: str, vector_size: int) -> None:
if collection in self._collections:
return
exists = await self.client.collection_exists(collection)
if exists:
self._collections.add(collection)
return
await self.client.create_collection(
collection_name=collection,
vectors_config=models.VectorParams(size=vector_size, distance=models.Distance.COSINE),
)
self._collections.add(collection)
self.ap.logger.info(f"Qdrant collection '{collection}' created with dim={vector_size}.")
async def get_or_create_collection(self, collection: str):
# Qdrant requires vector size to create a collection; no-op here.
pass
async def add_embeddings(
self,
collection: str,
ids: List[str],
embeddings_list: List[List[float]],
metadatas: List[Dict[str, Any]],
) -> None:
if not embeddings_list:
return
await self._ensure_collection(collection, len(embeddings_list[0]))
points = [
models.PointStruct(id=ids[i], vector=embeddings_list[i], payload=metadatas[i]) for i in range(len(ids))
]
await self.client.upsert(collection_name=collection, points=points)
self.ap.logger.info(f"Added {len(ids)} embeddings to Qdrant collection '{collection}'.")
async def search(self, collection: str, query_embedding: list[float], k: int = 5) -> dict[str, Any]:
exists = await self.client.collection_exists(collection)
if not exists:
return {'ids': [[]], 'metadatas': [[]], 'distances': [[]]}
hits = (
await self.client.query_points(
collection_name=collection,
query=query_embedding,
limit=k,
with_payload=True,
)
).points
ids = [str(hit.id) for hit in hits]
metadatas = [hit.payload or {} for hit in hits]
# Qdrant's score is similarity; convert to a pseudo-distance for consistency
distances = [1 - float(hit.score) if hit.score is not None else 1.0 for hit in hits]
results = {'ids': [ids], 'metadatas': [metadatas], 'distances': [distances]}
self.ap.logger.info(f"Qdrant search in '{collection}' returned {len(results.get('ids', [[]])[0])} results.")
return results
async def delete_by_file_id(self, collection: str, file_id: str) -> None:
exists = await self.client.collection_exists(collection)
if not exists:
return
await self.client.delete(
collection_name=collection,
points_selector=models.Filter(
must=[models.FieldCondition(key='file_id', match=models.MatchValue(value=file_id))]
),
)
self.ap.logger.info(f"Deleted embeddings from Qdrant collection '{collection}' with file_id: {file_id}")
async def delete_collection(self, collection: str):
try:
await self.client.delete_collection(collection)
self._collections.discard(collection)
self.ap.logger.info(f"Qdrant collection '{collection}' deleted.")
except Exception:
self.ap.logger.warning(f"Qdrant collection '{collection}' not found.")