mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-08 06:46:02 +00:00
feat: add GitHub Actions workflow for linting with Ruff (#1929)
* feat: add GitHub Actions workflow for linting with Ruff * refactor: rename lint job and add formatting step to Ruff workflow * chore: run ruff format * chore: rename Ruff lint job to 'Lint' and add frontend linting workflow
This commit is contained in:
committed by
GitHub
parent
e60cb6ad0e
commit
fc6e414be4
@@ -55,12 +55,7 @@ class VectorDBManager:
|
||||
user = pgvector_config.get('user', 'postgres')
|
||||
password = pgvector_config.get('password', 'postgres')
|
||||
self.vector_db = PgVectorDatabase(
|
||||
self.ap,
|
||||
host=host,
|
||||
port=port,
|
||||
database=database,
|
||||
user=user,
|
||||
password=password
|
||||
self.ap, host=host, port=port, database=database, user=user, password=password
|
||||
)
|
||||
self.ap.logger.info('Initialized pgvector database backend.')
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from langbot.pkg.core import app
|
||||
class MilvusVectorDatabase(VectorDatabase):
|
||||
"""Milvus vector database implementation"""
|
||||
|
||||
def __init__(self, ap: app.Application, uri: str = "milvus.db", token: str = None, db_name: str = None):
|
||||
def __init__(self, ap: app.Application, uri: str = 'milvus.db', token: str = None, db_name: str = None):
|
||||
"""Initialize Milvus vector database
|
||||
|
||||
Args:
|
||||
@@ -34,32 +34,32 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
self.client = MilvusClient(uri=self.uri, token=self.token, db_name=self.db_name)
|
||||
else:
|
||||
self.client = MilvusClient(uri=self.uri, db_name=self.db_name)
|
||||
self.ap.logger.info(f"Connected to Milvus at {self.uri}")
|
||||
self.ap.logger.info(f'Connected to Milvus at {self.uri}')
|
||||
except Exception as e:
|
||||
self.ap.logger.error(f"Failed to connect to Milvus: {e}")
|
||||
self.ap.logger.error(f'Failed to connect to Milvus: {e}')
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def _normalize_collection_name(collection: str) -> str:
|
||||
"""Normalize collection name to comply with Milvus naming requirements.
|
||||
|
||||
|
||||
Milvus requirements:
|
||||
- First character must be an underscore or letter
|
||||
- Can only contain numbers, letters and underscores
|
||||
|
||||
|
||||
Args:
|
||||
collection: Original collection name (e.g., UUID with hyphens)
|
||||
|
||||
|
||||
Returns:
|
||||
Normalized collection name that complies with Milvus requirements
|
||||
"""
|
||||
# Replace hyphens with underscores
|
||||
normalized = collection.replace('-', '_')
|
||||
|
||||
|
||||
# If first character is not a letter or underscore, prepend 'kb_'
|
||||
if normalized and not (normalized[0].isalpha() or normalized[0] == '_'):
|
||||
normalized = 'kb_' + normalized
|
||||
|
||||
|
||||
return normalized
|
||||
|
||||
async def _ensure_vector_index(self, collection: str) -> None:
|
||||
@@ -70,15 +70,11 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
"""
|
||||
index_params = IndexParams()
|
||||
index_params.add_index(
|
||||
field_name="vector",
|
||||
index_type="AUTOINDEX",
|
||||
metric_type="COSINE",
|
||||
)
|
||||
await asyncio.to_thread(
|
||||
self.client.create_index,
|
||||
collection_name=collection,
|
||||
index_params=index_params
|
||||
field_name='vector',
|
||||
index_type='AUTOINDEX',
|
||||
metric_type='COSINE',
|
||||
)
|
||||
await asyncio.to_thread(self.client.create_index, collection_name=collection, index_params=index_params)
|
||||
|
||||
async def _get_or_create_collection_internal(self, collection: str, vector_size: int = None):
|
||||
"""Internal method to get or create a Milvus collection with proper configuration.
|
||||
@@ -89,14 +85,12 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
"""
|
||||
# Normalize collection name for Milvus compatibility
|
||||
collection = self._normalize_collection_name(collection)
|
||||
|
||||
|
||||
if collection in self._collections:
|
||||
return collection
|
||||
|
||||
# Check if collection exists
|
||||
has_collection = await asyncio.to_thread(
|
||||
self.client.has_collection, collection_name=collection
|
||||
)
|
||||
has_collection = await asyncio.to_thread(self.client.has_collection, collection_name=collection)
|
||||
|
||||
if not has_collection:
|
||||
# Default dimension if not specified (for backward compatibility)
|
||||
@@ -104,24 +98,26 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
vector_size = 1536
|
||||
|
||||
fields = [
|
||||
FieldSchema(name="id", dtype=DataType.VARCHAR, is_primary=True, max_length=255),
|
||||
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=vector_size),
|
||||
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
|
||||
FieldSchema(name="file_id", dtype=DataType.VARCHAR, max_length=255),
|
||||
FieldSchema(name="chunk_uuid", dtype=DataType.VARCHAR, max_length=255),
|
||||
FieldSchema(name='id', dtype=DataType.VARCHAR, is_primary=True, max_length=255),
|
||||
FieldSchema(name='vector', dtype=DataType.FLOAT_VECTOR, dim=vector_size),
|
||||
FieldSchema(name='text', dtype=DataType.VARCHAR, max_length=65535),
|
||||
FieldSchema(name='file_id', dtype=DataType.VARCHAR, max_length=255),
|
||||
FieldSchema(name='chunk_uuid', dtype=DataType.VARCHAR, max_length=255),
|
||||
]
|
||||
|
||||
schema = CollectionSchema(fields=fields, description="LangBot knowledge base vectors")
|
||||
schema = CollectionSchema(fields=fields, description='LangBot knowledge base vectors')
|
||||
|
||||
await asyncio.to_thread(
|
||||
self.client.create_collection,
|
||||
collection_name=collection,
|
||||
schema=schema,
|
||||
metric_type="COSINE",
|
||||
metric_type='COSINE',
|
||||
)
|
||||
|
||||
await self._ensure_vector_index(collection)
|
||||
self.ap.logger.info(f"Created Milvus collection '{collection}' with dimension={vector_size}, index=AUTOINDEX")
|
||||
self.ap.logger.info(
|
||||
f"Created Milvus collection '{collection}' with dimension={vector_size}, index=AUTOINDEX"
|
||||
)
|
||||
else:
|
||||
# Ensure index exists for existing collection
|
||||
await self._ensure_index_if_missing(collection)
|
||||
@@ -137,11 +133,8 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
collection: Normalized collection name
|
||||
"""
|
||||
try:
|
||||
indexes = await asyncio.to_thread(
|
||||
self.client.list_indexes,
|
||||
collection_name=collection
|
||||
)
|
||||
if "vector" not in indexes:
|
||||
indexes = await asyncio.to_thread(self.client.list_indexes, collection_name=collection)
|
||||
if 'vector' not in indexes:
|
||||
await self._ensure_vector_index(collection)
|
||||
self.ap.logger.info(f"Created index for existing Milvus collection '{collection}'")
|
||||
except Exception as e:
|
||||
@@ -172,7 +165,7 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
metadatas: List of metadata dictionaries for each vector
|
||||
"""
|
||||
collection = self._normalize_collection_name(collection)
|
||||
|
||||
|
||||
if not embeddings_list:
|
||||
return
|
||||
|
||||
@@ -184,39 +177,30 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
data = []
|
||||
for i, vector_id in enumerate(ids):
|
||||
entry = {
|
||||
"id": vector_id,
|
||||
"vector": embeddings_list[i],
|
||||
'id': vector_id,
|
||||
'vector': embeddings_list[i],
|
||||
}
|
||||
# Add metadata fields
|
||||
if metadatas and i < len(metadatas):
|
||||
metadata = metadatas[i]
|
||||
# Add common metadata fields
|
||||
if "text" in metadata:
|
||||
entry["text"] = metadata["text"]
|
||||
if "file_id" in metadata:
|
||||
entry["file_id"] = metadata["file_id"]
|
||||
if "uuid" in metadata:
|
||||
entry["chunk_uuid"] = metadata["uuid"]
|
||||
if 'text' in metadata:
|
||||
entry['text'] = metadata['text']
|
||||
if 'file_id' in metadata:
|
||||
entry['file_id'] = metadata['file_id']
|
||||
if 'uuid' in metadata:
|
||||
entry['chunk_uuid'] = metadata['uuid']
|
||||
data.append(entry)
|
||||
|
||||
# Insert data into Milvus
|
||||
await asyncio.to_thread(
|
||||
self.client.insert,
|
||||
collection_name=collection,
|
||||
data=data
|
||||
)
|
||||
await asyncio.to_thread(self.client.insert, collection_name=collection, data=data)
|
||||
|
||||
# Load collection for searching (Milvus requires this)
|
||||
await asyncio.to_thread(
|
||||
self.client.load_collection,
|
||||
collection_name=collection
|
||||
)
|
||||
await asyncio.to_thread(self.client.load_collection, collection_name=collection)
|
||||
|
||||
self.ap.logger.info(f"Added {len(ids)} embeddings to Milvus collection '{collection}'")
|
||||
|
||||
async def search(
|
||||
self, collection: str, query_embedding: list[float], k: int = 5
|
||||
) -> Dict[str, Any]:
|
||||
async def search(self, collection: str, query_embedding: list[float], k: int = 5) -> Dict[str, Any]:
|
||||
"""Search for similar vectors in Milvus collection
|
||||
|
||||
Args:
|
||||
@@ -231,10 +215,7 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
await self.get_or_create_collection(collection)
|
||||
|
||||
# Perform search
|
||||
search_params = {
|
||||
"metric_type": "COSINE",
|
||||
"params": {}
|
||||
}
|
||||
search_params = {'metric_type': 'COSINE', 'params': {}}
|
||||
|
||||
results = await asyncio.to_thread(
|
||||
self.client.search,
|
||||
@@ -242,7 +223,7 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
data=[query_embedding],
|
||||
limit=k,
|
||||
search_params=search_params,
|
||||
output_fields=["text", "file_id", "chunk_uuid"]
|
||||
output_fields=['text', 'file_id', 'chunk_uuid'],
|
||||
)
|
||||
|
||||
# Convert results to Chroma-compatible format
|
||||
@@ -253,30 +234,24 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
|
||||
if results and len(results) > 0:
|
||||
for hit in results[0]:
|
||||
ids.append(hit.get("id", ""))
|
||||
distances.append(hit.get("distance", 0.0))
|
||||
ids.append(hit.get('id', ''))
|
||||
distances.append(hit.get('distance', 0.0))
|
||||
|
||||
# Build metadata from entity fields
|
||||
entity = hit.get("entity", {})
|
||||
entity = hit.get('entity', {})
|
||||
metadata = {}
|
||||
if "text" in entity:
|
||||
metadata["text"] = entity["text"]
|
||||
if "file_id" in entity:
|
||||
metadata["file_id"] = entity["file_id"]
|
||||
if "chunk_uuid" in entity:
|
||||
metadata["uuid"] = entity["chunk_uuid"]
|
||||
if 'text' in entity:
|
||||
metadata['text'] = entity['text']
|
||||
if 'file_id' in entity:
|
||||
metadata['file_id'] = entity['file_id']
|
||||
if 'chunk_uuid' in entity:
|
||||
metadata['uuid'] = entity['chunk_uuid']
|
||||
metadatas.append(metadata)
|
||||
|
||||
# Return in Chroma-compatible format (nested lists)
|
||||
result = {
|
||||
"ids": [ids],
|
||||
"distances": [distances],
|
||||
"metadatas": [metadatas]
|
||||
}
|
||||
result = {'ids': [ids], 'distances': [distances], 'metadatas': [metadatas]}
|
||||
|
||||
self.ap.logger.info(
|
||||
f"Milvus search in '{collection}' returned {len(ids)} results"
|
||||
)
|
||||
self.ap.logger.info(f"Milvus search in '{collection}' returned {len(ids)} results")
|
||||
return result
|
||||
|
||||
async def delete_by_file_id(self, collection: str, file_id: str) -> None:
|
||||
@@ -290,14 +265,8 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
await self.get_or_create_collection(collection)
|
||||
|
||||
# Delete entities matching the file_id
|
||||
await asyncio.to_thread(
|
||||
self.client.delete,
|
||||
collection_name=collection,
|
||||
filter=f'file_id == "{file_id}"'
|
||||
)
|
||||
self.ap.logger.info(
|
||||
f"Deleted embeddings from Milvus collection '{collection}' with file_id: {file_id}"
|
||||
)
|
||||
await asyncio.to_thread(self.client.delete, collection_name=collection, filter=f'file_id == "{file_id}"')
|
||||
self.ap.logger.info(f"Deleted embeddings from Milvus collection '{collection}' with file_id: {file_id}")
|
||||
|
||||
async def delete_collection(self, collection: str):
|
||||
"""Delete a Milvus collection
|
||||
@@ -306,18 +275,14 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
collection: Collection name to delete
|
||||
"""
|
||||
collection = self._normalize_collection_name(collection)
|
||||
|
||||
|
||||
self._collections.discard(collection)
|
||||
|
||||
# Check if collection exists before attempting deletion
|
||||
has_collection = await asyncio.to_thread(
|
||||
self.client.has_collection, collection_name=collection
|
||||
)
|
||||
has_collection = await asyncio.to_thread(self.client.has_collection, collection_name=collection)
|
||||
|
||||
if has_collection:
|
||||
await asyncio.to_thread(
|
||||
self.client.drop_collection, collection_name=collection
|
||||
)
|
||||
await asyncio.to_thread(self.client.drop_collection, collection_name=collection)
|
||||
self.ap.logger.info(f"Deleted Milvus collection '{collection}'")
|
||||
else:
|
||||
self.ap.logger.warning(f"Milvus collection '{collection}' not found")
|
||||
|
||||
Reference in New Issue
Block a user