feat: Support multiple knowledge base binding in pipelines (#1766)

* Initial plan

* Add multi-knowledge base support to pipelines

- Created database migration dbm010 to convert knowledge-base field from string to array
- Updated default pipeline config to use knowledge-bases array
- Updated pipeline metadata to use knowledge-base-multi-selector type
- Modified localagent.py to retrieve from multiple knowledge bases and concatenate results
- Added KNOWLEDGE_BASE_MULTI_SELECTOR type to frontend form entities
- Implemented multi-selector UI component with dialog for selecting multiple knowledge bases

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* Add i18n translations for multi-knowledge base selector

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* Fix prettier formatting errors in DynamicFormItemComponent

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* Add accessibility attributes to knowledge base selector checkbox

Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>

* fix: minor fix

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>
Co-authored-by: Junyan Qin <rockchinq@gmail.com>
This commit is contained in:
Copilot
2025-11-08 13:45:09 +08:00
committed by GitHub
parent dd2254203c
commit 3edae3e678
12 changed files with 271 additions and 20 deletions

View File

@@ -0,0 +1,88 @@
from .. import migration
import sqlalchemy
from ...entity.persistence import pipeline as persistence_pipeline
@migration.migration_class(10)
class DBMigratePipelineMultiKnowledgeBase(migration.DBMigration):
"""Pipeline support multiple knowledge base binding"""
async def upgrade(self):
"""Upgrade"""
# read all pipelines
pipelines = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_pipeline.LegacyPipeline))
for pipeline in pipelines:
serialized_pipeline = self.ap.persistence_mgr.serialize_model(persistence_pipeline.LegacyPipeline, pipeline)
config = serialized_pipeline['config']
# Convert knowledge-base from string to array
if 'local-agent' in config['ai']:
current_kb = config['ai']['local-agent'].get('knowledge-base', '')
# If it's already a list, skip
if isinstance(current_kb, list):
continue
# Convert string to list
if current_kb and current_kb != '__none__':
config['ai']['local-agent']['knowledge-bases'] = [current_kb]
else:
config['ai']['local-agent']['knowledge-bases'] = []
# Remove old field
if 'knowledge-base' in config['ai']['local-agent']:
del config['ai']['local-agent']['knowledge-base']
await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_pipeline.LegacyPipeline)
.where(persistence_pipeline.LegacyPipeline.uuid == serialized_pipeline['uuid'])
.values(
{
'config': config,
'for_version': self.ap.ver_mgr.get_current_version(),
}
)
)
async def downgrade(self):
"""Downgrade"""
# read all pipelines
pipelines = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_pipeline.LegacyPipeline))
for pipeline in pipelines:
serialized_pipeline = self.ap.persistence_mgr.serialize_model(persistence_pipeline.LegacyPipeline, pipeline)
config = serialized_pipeline['config']
# Convert knowledge-bases from array back to string
if 'local-agent' in config['ai']:
current_kbs = config['ai']['local-agent'].get('knowledge-bases', [])
# If it's already a string, skip
if isinstance(current_kbs, str):
continue
# Convert list to string (take first one or empty)
if current_kbs and len(current_kbs) > 0:
config['ai']['local-agent']['knowledge-base'] = current_kbs[0]
else:
config['ai']['local-agent']['knowledge-base'] = ''
# Remove new field
if 'knowledge-bases' in config['ai']['local-agent']:
del config['ai']['local-agent']['knowledge-bases']
await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_pipeline.LegacyPipeline)
.where(persistence_pipeline.LegacyPipeline.uuid == serialized_pipeline['uuid'])
.values(
{
'config': config,
'for_version': self.ap.ver_mgr.get_current_version(),
}
)
)

View File

@@ -40,10 +40,14 @@ class LocalAgentRunner(runner.RequestRunner):
"""运行请求"""
pending_tool_calls = []
kb_uuid = query.pipeline_config['ai']['local-agent']['knowledge-base']
if kb_uuid == '__none__':
kb_uuid = None
# Get knowledge bases list (new field)
kb_uuids = query.pipeline_config['ai']['local-agent'].get('knowledge-bases', [])
# Fallback to old field for backward compatibility
if not kb_uuids:
old_kb_uuid = query.pipeline_config['ai']['local-agent'].get('knowledge-base', '')
if old_kb_uuid and old_kb_uuid != '__none__':
kb_uuids = [old_kb_uuid]
user_message = copy.deepcopy(query.user_message)
@@ -57,21 +61,28 @@ class LocalAgentRunner(runner.RequestRunner):
user_message_text += ce.text
break
if kb_uuid and user_message_text:
if kb_uuids and user_message_text:
# only support text for now
kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid)
all_results = []
# Retrieve from each knowledge base
for kb_uuid in kb_uuids:
kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid)
if not kb:
self.ap.logger.warning(f'Knowledge base {kb_uuid} not found')
raise ValueError(f'Knowledge base {kb_uuid} not found')
if not kb:
self.ap.logger.warning(f'Knowledge base {kb_uuid} not found, skipping')
continue
result = await kb.retrieve(user_message_text, kb.knowledge_base_entity.top_k)
result = await kb.retrieve(user_message_text, kb.knowledge_base_entity.top_k)
if result:
all_results.extend(result)
final_user_message_text = ''
if result:
if all_results:
rag_context = '\n\n'.join(
f'[{i + 1}] {entry.metadata.get("text", "")}' for i, entry in enumerate(result)
f'[{i + 1}] {entry.metadata.get("text", "")}' for i, entry in enumerate(all_results)
)
final_user_message_text = rag_combined_prompt_template.format(
rag_context=rag_context, user_message=user_message_text

View File

@@ -1,6 +1,6 @@
semantic_version = 'v4.4.1'
required_database_version = 9
required_database_version = 10
"""Tag the version of the database schema, used to check if the database needs to be migrated"""
debug_mode = False