mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-12 16:56:02 +00:00
feat: external knowledge bases (#1783)
* Initial plan * Add backend support for external knowledge bases Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * Add frontend support for external knowledge bases with tabs UI Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * Add i18n translations for all languages (Traditional Chinese and Japanese) Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * Update knowledge base tab list styling to match plugins page Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * perf: margin-top for kb page * refactor: switch RetrievalResultEntry to langbot_plugin pkg ones * feat: knowledge retriever listing and creating * stash * refactor: unify sync mechanism for polymorphic components * feat: use unified retireval result struct in retrieval test page * chore: remove unused methods * feat: retriever icon displaying * feat: localagent retrieval with external kbs * chore: bump version of langbot-plugin to 0.2.0b1 * fix: i18n --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> Co-authored-by: Junyan Qin <rockchinq@gmail.com>
This commit is contained in:
@@ -10,10 +10,12 @@ from langbot.pkg.rag.knowledge.services.retriever import Retriever
|
||||
import sqlalchemy
|
||||
from langbot.pkg.entity.persistence import rag as persistence_rag
|
||||
from langbot.pkg.core import taskmgr
|
||||
from langbot.pkg.entity.rag import retriever as retriever_entities
|
||||
from langbot_plugin.api.entities.builtin.rag import context as rag_context
|
||||
from .base import KnowledgeBaseInterface
|
||||
from .external import ExternalKnowledgeBase
|
||||
|
||||
|
||||
class RuntimeKnowledgeBase:
|
||||
class RuntimeKnowledgeBase(KnowledgeBaseInterface):
|
||||
ap: app.Application
|
||||
|
||||
knowledge_base_entity: persistence_rag.KnowledgeBase
|
||||
@@ -27,7 +29,7 @@ class RuntimeKnowledgeBase:
|
||||
retriever: Retriever
|
||||
|
||||
def __init__(self, ap: app.Application, knowledge_base_entity: persistence_rag.KnowledgeBase):
|
||||
self.ap = ap
|
||||
super().__init__(ap)
|
||||
self.knowledge_base_entity = knowledge_base_entity
|
||||
self.parser = parser.FileParser(ap=self.ap)
|
||||
self.chunker = chunker.Chunker(ap=self.ap)
|
||||
@@ -187,7 +189,7 @@ class RuntimeKnowledgeBase:
|
||||
|
||||
return stored_file_tasks[0] if stored_file_tasks else ''
|
||||
|
||||
async def retrieve(self, query: str, top_k: int) -> list[retriever_entities.RetrieveResultEntry]:
|
||||
async def retrieve(self, query: str, top_k: int) -> list[rag_context.RetrievalResultEntry]:
|
||||
embedding_model = await self.ap.model_mgr.get_embedding_model_by_uuid(
|
||||
self.knowledge_base_entity.embedding_model_uuid
|
||||
)
|
||||
@@ -206,6 +208,18 @@ class RuntimeKnowledgeBase:
|
||||
sqlalchemy.delete(persistence_rag.File).where(persistence_rag.File.uuid == file_id)
|
||||
)
|
||||
|
||||
def get_uuid(self) -> str:
|
||||
"""Get the UUID of the knowledge base"""
|
||||
return self.knowledge_base_entity.uuid
|
||||
|
||||
def get_name(self) -> str:
|
||||
"""Get the name of the knowledge base"""
|
||||
return self.knowledge_base_entity.name
|
||||
|
||||
def get_type(self) -> str:
|
||||
"""Get the type of knowledge base"""
|
||||
return 'internal'
|
||||
|
||||
async def dispose(self):
|
||||
await self.ap.vector_db_mgr.vector_db.delete_collection(self.knowledge_base_entity.uuid)
|
||||
|
||||
@@ -213,7 +227,7 @@ class RuntimeKnowledgeBase:
|
||||
class RAGManager:
|
||||
ap: app.Application
|
||||
|
||||
knowledge_bases: list[RuntimeKnowledgeBase]
|
||||
knowledge_bases: list[KnowledgeBaseInterface]
|
||||
|
||||
def __init__(self, ap: app.Application):
|
||||
self.ap = ap
|
||||
@@ -227,8 +241,8 @@ class RAGManager:
|
||||
|
||||
self.knowledge_bases = []
|
||||
|
||||
# Load internal knowledge bases
|
||||
result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_rag.KnowledgeBase))
|
||||
|
||||
knowledge_bases = result.all()
|
||||
|
||||
for knowledge_base in knowledge_bases:
|
||||
@@ -239,6 +253,21 @@ class RAGManager:
|
||||
f'Error loading knowledge base {knowledge_base.uuid}: {e}\n{traceback.format_exc()}'
|
||||
)
|
||||
|
||||
# Load external knowledge bases
|
||||
external_result = await self.ap.persistence_mgr.execute_async(
|
||||
sqlalchemy.select(persistence_rag.ExternalKnowledgeBase)
|
||||
)
|
||||
external_kbs = external_result.all()
|
||||
|
||||
for external_kb in external_kbs:
|
||||
try:
|
||||
# Don't trigger sync during batch loading - will sync once after LangBot connects to runtime
|
||||
await self.load_external_knowledge_base(external_kb, trigger_sync=False)
|
||||
except Exception as e:
|
||||
self.ap.logger.error(
|
||||
f'Error loading external knowledge base {external_kb.uuid}: {e}\n{traceback.format_exc()}'
|
||||
)
|
||||
|
||||
async def load_knowledge_base(
|
||||
self,
|
||||
knowledge_base_entity: persistence_rag.KnowledgeBase | sqlalchemy.Row | dict,
|
||||
@@ -256,21 +285,54 @@ class RAGManager:
|
||||
|
||||
return runtime_knowledge_base
|
||||
|
||||
async def get_knowledge_base_by_uuid(self, kb_uuid: str) -> RuntimeKnowledgeBase | None:
|
||||
async def load_external_knowledge_base(
|
||||
self,
|
||||
external_kb_entity: persistence_rag.ExternalKnowledgeBase | sqlalchemy.Row | dict,
|
||||
trigger_sync: bool = True,
|
||||
) -> ExternalKnowledgeBase:
|
||||
"""Load external knowledge base into runtime
|
||||
|
||||
Args:
|
||||
external_kb_entity: External KB entity to load
|
||||
trigger_sync: Whether to trigger sync after loading (default True for manual creation, False for batch loading)
|
||||
"""
|
||||
if isinstance(external_kb_entity, sqlalchemy.Row):
|
||||
external_kb_entity = persistence_rag.ExternalKnowledgeBase(**external_kb_entity._mapping)
|
||||
elif isinstance(external_kb_entity, dict):
|
||||
external_kb_entity = persistence_rag.ExternalKnowledgeBase(**external_kb_entity)
|
||||
|
||||
external_kb = ExternalKnowledgeBase(ap=self.ap, external_kb_entity=external_kb_entity)
|
||||
|
||||
await external_kb.initialize()
|
||||
|
||||
self.knowledge_bases.append(external_kb)
|
||||
|
||||
# Trigger sync to create the instance immediately (for manual creation)
|
||||
# Skip sync during batch loading from DB to avoid multiple sync calls
|
||||
if trigger_sync:
|
||||
try:
|
||||
await self.ap.plugin_connector.sync_polymorphic_component_instances()
|
||||
self.ap.logger.info(f'Triggered sync after loading external KB {external_kb_entity.uuid}')
|
||||
except Exception as e:
|
||||
self.ap.logger.error(f'Failed to sync after loading external KB: {e}')
|
||||
|
||||
return external_kb
|
||||
|
||||
async def get_knowledge_base_by_uuid(self, kb_uuid: str) -> KnowledgeBaseInterface | None:
|
||||
for kb in self.knowledge_bases:
|
||||
if kb.knowledge_base_entity.uuid == kb_uuid:
|
||||
if kb.get_uuid() == kb_uuid:
|
||||
return kb
|
||||
return None
|
||||
|
||||
async def remove_knowledge_base_from_runtime(self, kb_uuid: str):
|
||||
for kb in self.knowledge_bases:
|
||||
if kb.knowledge_base_entity.uuid == kb_uuid:
|
||||
if kb.get_uuid() == kb_uuid:
|
||||
self.knowledge_bases.remove(kb)
|
||||
return
|
||||
|
||||
async def delete_knowledge_base(self, kb_uuid: str):
|
||||
for kb in self.knowledge_bases:
|
||||
if kb.knowledge_base_entity.uuid == kb_uuid:
|
||||
if kb.get_uuid() == kb_uuid:
|
||||
await kb.dispose()
|
||||
self.knowledge_bases.remove(kb)
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user