From 2a74a8d6ae52202cc5a5e4795bc1619cd1bbede7 Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Mon, 9 Mar 2026 20:05:38 +0800 Subject: [PATCH] Feat/dbm20 rag (#2037) * feat(rag): add knowledge base migration from v4.9.0 to plugin architecture Rewrite dbm020 to backup old knowledge_bases data and preserve external_knowledge_bases table. Add migration API endpoints and frontend dialog so users can opt-in to auto-install LangRAG plugin and restore their knowledge bases with original UUIDs preserved. Co-Authored-By: Claude Opus 4.6 * fix(rag): query marketplace for actual plugin version instead of 'latest' The marketplace API does not support 'latest' as a version string. Fetch the plugin info first to get latest_version, then use that concrete version for installation. Co-Authored-By: Claude Opus 4.6 * feat(rag): add data-only migration option and fix dialog width Add option to migrate knowledge base data without auto-installing the LangRAG plugin (for offline/intranet environments). Also narrow the migration dialog to match other confirmation dialogs. Co-Authored-By: Claude Opus 4.6 * refactor: to red and no more * fix lint * fix ruff lint * feat: add external migration * fix: show * feat: add external plugin auto download * feat: update migration messages for knowledge base in multiple languages --------- Co-authored-by: Claude Opus 4.6 Co-authored-by: Junyan Qin --- .../controller/groups/knowledge/migration.py | 372 ++++++++++++++++++ ...20_knowledge_engine_plugin_architecture.py | 161 ++++---- src/langbot/pkg/utils/constants.py | 2 +- .../kb-migration-dialog/KBMigrationDialog.tsx | 157 ++++++++ web/src/app/home/knowledge/page.tsx | 32 ++ web/src/app/infra/entities/api/index.ts | 6 + web/src/app/infra/http/BackendClient.ts | 18 + web/src/i18n/locales/en-US.ts | 17 + web/src/i18n/locales/ja-JP.ts | 17 + web/src/i18n/locales/zh-Hans.ts | 17 + web/src/i18n/locales/zh-Hant.ts | 17 + 11 files changed, 723 insertions(+), 93 deletions(-) create mode 100644 src/langbot/pkg/api/http/controller/groups/knowledge/migration.py create mode 100644 web/src/app/home/knowledge/components/kb-migration-dialog/KBMigrationDialog.tsx diff --git a/src/langbot/pkg/api/http/controller/groups/knowledge/migration.py b/src/langbot/pkg/api/http/controller/groups/knowledge/migration.py new file mode 100644 index 00000000..2db835d8 --- /dev/null +++ b/src/langbot/pkg/api/http/controller/groups/knowledge/migration.py @@ -0,0 +1,372 @@ +import asyncio +import json + +import httpx +import quart +import sqlalchemy + +from ... import group +from ......core import taskmgr +from ......entity.persistence import metadata as persistence_metadata +from langbot_plugin.runtime.plugin.mgr import PluginInstallSource + +LANGRAG_PLUGIN_AUTHOR = 'langbot-team' +LANGRAG_PLUGIN_NAME = 'LangRAG' +LANGRAG_PLUGIN_ID = f'{LANGRAG_PLUGIN_AUTHOR}/{LANGRAG_PLUGIN_NAME}' +DEFAULT_SPACE_URL = 'https://space.langbot.app' + +# Old Retriever plugin_name -> New Connector plugin_name +EXTERNAL_PLUGIN_NAME_MAPPING = { + 'DifyDatasetsRetriever': 'DifyDatasetsConnector', + 'RAGFlowRetriever': 'RAGFlowConnector', + 'FastGPTRetriever': 'FastGPTConnector', +} + +# Per-plugin: which old retriever_config fields belong to creation_settings. +# Remaining fields go to retrieval_settings. +# None means ALL fields go to creation_settings (no retrieval_schema). +EXTERNAL_PLUGIN_CREATION_FIELDS: dict[str, set[str] | None] = { + 'langbot-team/DifyDatasetsConnector': {'api_base_url', 'dify_apikey', 'dataset_id'}, + 'langbot-team/RAGFlowConnector': {'api_base_url', 'api_key', 'dataset_ids'}, + 'langbot-team/FastGPTConnector': None, # all fields -> creation_settings +} + + +@group.group_class('knowledge/migration', '/api/v1/knowledge/migration') +class KnowledgeMigrationRouterGroup(group.RouterGroup): + async def _get_migration_flag(self) -> bool: + """Check if rag_plugin_migration_needed flag is set.""" + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.select(persistence_metadata.Metadata).where( + persistence_metadata.Metadata.key == 'rag_plugin_migration_needed' + ) + ) + row = result.first() + return row is not None and row.value == 'true' + + async def _set_migration_flag(self, value: str): + """Set rag_plugin_migration_needed flag.""" + await self.ap.persistence_mgr.execute_async( + sqlalchemy.update(persistence_metadata.Metadata) + .where(persistence_metadata.Metadata.key == 'rag_plugin_migration_needed') + .values(value=value) + ) + + async def _table_exists(self, table_name: str) -> bool: + """Check if a table exists.""" + if self.ap.persistence_mgr.db.name == 'postgresql': + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text( + 'SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = :table_name);' + ).bindparams(table_name=table_name) + ) + return result.scalar() + else: + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text("SELECT name FROM sqlite_master WHERE type='table' AND name=:table_name;").bindparams( + table_name=table_name + ) + ) + return result.first() is not None + + async def _install_plugin_from_marketplace( + self, plugin_id: str, task_context: taskmgr.TaskContext, space_url: str + ) -> None: + """Install a single plugin from the marketplace.""" + p_author, p_name = plugin_id.split('/', 1) + self.ap.logger.info(f'RAG migration: installing plugin {plugin_id} from marketplace...') + task_context.trace(f'Installing plugin {plugin_id} from marketplace...') + + async with httpx.AsyncClient(trust_env=True, timeout=15) as client: + resp = await client.get(f'{space_url}/api/v1/marketplace/plugins/{p_author}/{p_name}') + resp.raise_for_status() + p_data = resp.json().get('data', {}).get('plugin', {}) + p_version = p_data.get('latest_version') + if not p_version: + raise Exception(f'Could not determine latest version for {plugin_id}') + + await self.ap.plugin_connector.install_plugin( + PluginInstallSource.MARKETPLACE, + { + 'plugin_author': p_author, + 'plugin_name': p_name, + 'plugin_version': p_version, + }, + task_context=task_context, + ) + self.ap.logger.info(f'RAG migration: plugin {plugin_id} install request sent.') + + async def _execute_rag_migration(self, task_context: taskmgr.TaskContext, install_plugin: bool = True): + """Execute RAG migration: install required plugins and restore backup data.""" + warnings = [] + + # Collect all plugins we need: LangRAG (always) + connector plugins (from external KBs) + needed_plugins: dict[str, str] = { + LANGRAG_PLUGIN_ID: LANGRAG_PLUGIN_NAME, + } + + has_external = await self._table_exists('external_knowledge_bases') + if has_external: + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text('SELECT DISTINCT plugin_author, plugin_name FROM external_knowledge_bases;') + ) + for row in result.fetchall(): + plugin_author = row[0] or '' + plugin_name = row[1] or '' + mapped_name = EXTERNAL_PLUGIN_NAME_MAPPING.get(plugin_name, plugin_name) + plugin_id = f'{plugin_author}/{mapped_name}' + if plugin_id not in needed_plugins: + needed_plugins[plugin_id] = mapped_name + + self.ap.logger.info(f'RAG migration: plugins needed: {list(needed_plugins.keys())}') + + if install_plugin: + # Step 1: Install all required plugins from marketplace + task_context.trace('Installing required plugins...', action='install-plugin') + space_url = self.ap.instance_config.data.get('space', {}).get('url', DEFAULT_SPACE_URL).rstrip('/') + + for plugin_id in needed_plugins: + try: + await self._install_plugin_from_marketplace(plugin_id, task_context, space_url) + except Exception as e: + self.ap.logger.warning(f'RAG migration: plugin {plugin_id} install returned: {e}') + task_context.trace(f'Plugin install note ({plugin_id}): {e}') + + # Step 2: Wait for all plugins to become available as knowledge engines + task_context.trace( + f'Waiting for plugins to become available: {list(needed_plugins.keys())}...', + action='wait-plugin', + ) + max_retries = 30 + engine_id_set: set[str] = set() + for i in range(max_retries): + try: + engines = await self.ap.plugin_connector.list_knowledge_engines() + engine_id_set = {e.get('plugin_id') for e in engines} + except Exception: + pass + if all(pid in engine_id_set for pid in needed_plugins): + self.ap.logger.info(f'RAG migration: all plugins ready: {engine_id_set}') + task_context.trace('All required plugins are ready.') + break + if i == max_retries - 1: + still_missing = [pid for pid in needed_plugins if pid not in engine_id_set] + warning = f'Plugin(s) {still_missing} did not become available after {max_retries} retries' + self.ap.logger.warning(f'RAG migration: {warning}') + warnings.append(warning) + task_context.trace(warning) + await asyncio.sleep(2) + else: + try: + engines = await self.ap.plugin_connector.list_knowledge_engines() + engine_id_set = {e.get('plugin_id') for e in engines} + except Exception: + engine_id_set = set() + + # Step 3: Restore internal knowledge bases from backup + task_context.trace('Restoring internal knowledge bases...', action='restore-internal') + if await self._table_exists('knowledge_bases_backup'): + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text('SELECT * FROM knowledge_bases_backup;') + ) + rows = result.fetchall() + columns = result.keys() + + for row in rows: + row_dict = dict(zip(columns, row)) + kb_uuid = row_dict.get('uuid') + name = row_dict.get('name', 'Untitled') + description = row_dict.get('description', '') + emoji = row_dict.get('emoji', '\U0001f4da') + embedding_model_uuid = row_dict.get('embedding_model_uuid', '') + top_k = row_dict.get('top_k', 5) + created_at = row_dict.get('created_at') + updated_at = row_dict.get('updated_at') + + creation_settings = json.dumps({'embedding_model_uuid': embedding_model_uuid}) + retrieval_settings = json.dumps({'top_k': top_k}) + + await self.ap.persistence_mgr.execute_async( + sqlalchemy.text( + 'INSERT INTO knowledge_bases ' + '(uuid, name, description, emoji, created_at, updated_at, ' + 'knowledge_engine_plugin_id, collection_id, creation_settings, retrieval_settings) ' + 'VALUES (:uuid, :name, :description, :emoji, :created_at, :updated_at, ' + ':plugin_id, :collection_id, :creation_settings, :retrieval_settings);' + ).bindparams( + uuid=kb_uuid, + name=name, + description=description, + emoji=emoji, + created_at=created_at, + updated_at=updated_at, + plugin_id=LANGRAG_PLUGIN_ID, + collection_id=kb_uuid, + creation_settings=creation_settings, + retrieval_settings=retrieval_settings, + ) + ) + + try: + config = {'embedding_model_uuid': embedding_model_uuid} + await self.ap.plugin_connector.rag_on_kb_create(LANGRAG_PLUGIN_ID, kb_uuid, config) + task_context.trace(f'Restored internal KB: {name} ({kb_uuid})') + except Exception as e: + warning = f'Failed to notify plugin for KB {name} ({kb_uuid}): {e}' + warnings.append(warning) + task_context.trace(warning) + + await self.ap.rag_mgr.load_knowledge_bases_from_db() + + # Step 4: Restore external knowledge bases + task_context.trace('Restoring external knowledge bases...', action='restore-external') + if has_external: + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text('SELECT * FROM external_knowledge_bases;') + ) + rows = result.fetchall() + columns = result.keys() + + self.ap.logger.info( + f'RAG migration: {len(rows)} external KB(s) to restore. Available engines: {engine_id_set}' + ) + task_context.trace(f'Found {len(rows)} external KB(s). Available engines: {engine_id_set}') + + for row in rows: + row_dict = dict(zip(columns, row)) + kb_uuid = row_dict.get('uuid') + name = row_dict.get('name', 'Untitled') + description = row_dict.get('description', '') + emoji = row_dict.get('emoji', '\U0001f517') + plugin_author = row_dict.get('plugin_author', '') + plugin_name = row_dict.get('plugin_name', '') + retriever_config = row_dict.get('retriever_config', {}) + created_at = row_dict.get('created_at') + + mapped_plugin_name = EXTERNAL_PLUGIN_NAME_MAPPING.get(plugin_name, plugin_name) + external_plugin_id = f'{plugin_author}/{mapped_plugin_name}' + + self.ap.logger.info( + f'RAG migration: processing external KB "{name}" ({kb_uuid}), ' + f'plugin: {plugin_author}/{plugin_name} -> {external_plugin_id}' + ) + + if isinstance(retriever_config, str): + try: + retriever_config = json.loads(retriever_config) + except (json.JSONDecodeError, TypeError): + retriever_config = {} + + creation_fields = EXTERNAL_PLUGIN_CREATION_FIELDS.get(external_plugin_id) + if creation_fields is None: + creation_settings_dict = retriever_config + retrieval_settings_dict = {} + else: + creation_settings_dict = {k: v for k, v in retriever_config.items() if k in creation_fields} + retrieval_settings_dict = {k: v for k, v in retriever_config.items() if k not in creation_fields} + + await self.ap.persistence_mgr.execute_async( + sqlalchemy.text( + 'INSERT INTO knowledge_bases ' + '(uuid, name, description, emoji, created_at, updated_at, ' + 'knowledge_engine_plugin_id, collection_id, creation_settings, retrieval_settings) ' + 'VALUES (:uuid, :name, :description, :emoji, :created_at, :updated_at, ' + ':plugin_id, :collection_id, :creation_settings, :retrieval_settings);' + ).bindparams( + uuid=kb_uuid, + name=name, + description=description, + emoji=emoji, + created_at=created_at, + updated_at=created_at, + plugin_id=external_plugin_id, + collection_id=kb_uuid, + creation_settings=json.dumps(creation_settings_dict), + retrieval_settings=json.dumps(retrieval_settings_dict), + ) + ) + + if external_plugin_id not in engine_id_set: + warning = ( + f'External KB "{name}" ({kb_uuid}) record saved, but plugin {external_plugin_id} ' + f'is not installed yet. Install the connector plugin to use it.' + ) + warnings.append(warning) + task_context.trace(warning) + else: + try: + await self.ap.plugin_connector.rag_on_kb_create( + external_plugin_id, kb_uuid, creation_settings_dict + ) + task_context.trace(f'Restored external KB: {name} ({kb_uuid})') + except Exception as e: + warning = f'Failed to notify plugin for external KB {name} ({kb_uuid}): {e}' + warnings.append(warning) + task_context.trace(warning) + + await self.ap.rag_mgr.load_knowledge_bases_from_db() + + # Step 5: Clear migration flag + await self._set_migration_flag('false') + task_context.trace('RAG migration completed.', action='done') + + if warnings: + task_context.trace(f'Completed with {len(warnings)} warning(s).') + + async def initialize(self) -> None: + @self.route('/status', methods=['GET'], auth_type=group.AuthType.USER_TOKEN) + async def _() -> str: + needed = await self._get_migration_flag() + + internal_kb_count = 0 + external_kb_count = 0 + + if needed: + if await self._table_exists('knowledge_bases_backup'): + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text('SELECT COUNT(*) FROM knowledge_bases_backup;') + ) + internal_kb_count = result.scalar() or 0 + + if await self._table_exists('external_knowledge_bases'): + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text('SELECT COUNT(*) FROM external_knowledge_bases;') + ) + external_kb_count = result.scalar() or 0 + + return self.success( + data={ + 'needed': needed, + 'internal_kb_count': internal_kb_count, + 'external_kb_count': external_kb_count, + } + ) + + @self.route('/execute', methods=['POST'], auth_type=group.AuthType.USER_TOKEN) + async def _() -> str: + needed = await self._get_migration_flag() + if not needed: + return self.http_status(400, -1, 'RAG migration is not needed') + + data = await quart.request.get_json(silent=True) or {} + install_plugin = data.get('install_plugin', True) + + ctx = taskmgr.TaskContext.new() + wrapper = self.ap.task_mgr.create_user_task( + self._execute_rag_migration(task_context=ctx, install_plugin=install_plugin), + kind='rag-migration', + name='rag-migration-execute', + label='Migrating knowledge bases to plugin architecture', + context=ctx, + ) + + return self.success(data={'task_id': wrapper.id}) + + @self.route('/dismiss', methods=['POST'], auth_type=group.AuthType.USER_TOKEN) + async def _() -> str: + needed = await self._get_migration_flag() + if not needed: + return self.http_status(400, -1, 'RAG migration is not needed') + + await self._set_migration_flag('false') + return self.success() diff --git a/src/langbot/pkg/persistence/migrations/dbm020_knowledge_engine_plugin_architecture.py b/src/langbot/pkg/persistence/migrations/dbm020_knowledge_engine_plugin_architecture.py index 7bca300c..616cb91a 100644 --- a/src/langbot/pkg/persistence/migrations/dbm020_knowledge_engine_plugin_architecture.py +++ b/src/langbot/pkg/persistence/migrations/dbm020_knowledge_engine_plugin_architecture.py @@ -1,5 +1,3 @@ -import json - import sqlalchemy from .. import migration @@ -9,20 +7,22 @@ class DBMigrateKnowledgeEnginePluginArchitecture(migration.DBMigration): """Migrate to unified Knowledge Engine plugin architecture. Changes: - - Add knowledge_engine_plugin_id, collection_id, creation_settings, retrieval_settings columns to knowledge_bases - - Migrate existing top_k values into retrieval_settings JSON - - Migrate existing embedding_model_uuid into creation_settings JSON - - Drop embedding_model_uuid and top_k columns (PostgreSQL only; SQLite leaves them unmapped) - - Drop external_knowledge_bases table (no longer needed; external KB data is not migrated) + - Backup existing knowledge_bases data to knowledge_bases_backup + - Clear knowledge_bases table and add new plugin architecture columns + - Drop old columns (PostgreSQL only; SQLite leaves them unmapped) + - Preserve external_knowledge_bases table as-is for future migration + - Set rag_plugin_migration_needed flag in metadata if old data exists """ async def upgrade(self): """Upgrade""" + has_internal_data = await self._backup_knowledge_bases() + has_external_data = await self._check_external_knowledge_bases() + await self._clear_knowledge_bases() await self._add_columns_to_knowledge_bases() - await self._migrate_top_k_to_retrieval_settings() - await self._migrate_embedding_model_uuid_to_creation_settings() await self._drop_old_columns() - await self._drop_external_knowledge_bases_table() + if has_internal_data or has_external_data: + await self._set_migration_flag() async def _get_table_columns(self, table_name: str) -> list[str]: """Get column names from a table (works for both SQLite and PostgreSQL).""" @@ -57,6 +57,50 @@ class DBMigrateKnowledgeEnginePluginArchitecture(migration.DBMigration): ) return result.first() is not None + async def _backup_knowledge_bases(self) -> bool: + """Backup knowledge_bases data. Returns True if data was backed up.""" + result = await self.ap.persistence_mgr.execute_async(sqlalchemy.text('SELECT COUNT(*) FROM knowledge_bases;')) + count = result.scalar() + if count == 0: + return False + + # Drop backup table if it already exists (from a previous failed migration) + if await self._table_exists('knowledge_bases_backup'): + await self.ap.persistence_mgr.execute_async(sqlalchemy.text('DROP TABLE knowledge_bases_backup;')) + + await self.ap.persistence_mgr.execute_async( + sqlalchemy.text('CREATE TABLE knowledge_bases_backup AS SELECT * FROM knowledge_bases;') + ) + self.ap.logger.info( + 'Backed up %d knowledge base(s) to knowledge_bases_backup table.', + count, + ) + return True + + async def _check_external_knowledge_bases(self) -> bool: + """Check if external_knowledge_bases table exists and has data. + + The table is preserved as-is (not dropped) for future migration. + """ + if not await self._table_exists('external_knowledge_bases'): + return False + + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text('SELECT COUNT(*) FROM external_knowledge_bases;') + ) + count = result.scalar() + if count > 0: + self.ap.logger.info( + 'Found %d external knowledge base(s) in external_knowledge_bases table. ' + 'Table preserved for future migration.', + count, + ) + return count > 0 + + async def _clear_knowledge_bases(self): + """Clear all rows from knowledge_bases table (preserve table structure).""" + await self.ap.persistence_mgr.execute_async(sqlalchemy.text('DELETE FROM knowledge_bases;')) + async def _add_columns_to_knowledge_bases(self): """Add new RAG plugin architecture columns to knowledge_bases table.""" columns = await self._get_table_columns('knowledge_bases') @@ -74,73 +118,6 @@ class DBMigrateKnowledgeEnginePluginArchitecture(migration.DBMigration): sqlalchemy.text(f'ALTER TABLE knowledge_bases ADD COLUMN {col_name} {col_type};') ) - # For existing knowledge bases without knowledge_engine_plugin_id, - # set collection_id = uuid (same default as new KBs) - await self.ap.persistence_mgr.execute_async( - sqlalchemy.text('UPDATE knowledge_bases SET collection_id = uuid WHERE collection_id IS NULL;') - ) - - async def _migrate_top_k_to_retrieval_settings(self): - """Migrate existing top_k values into retrieval_settings JSON.""" - columns = await self._get_table_columns('knowledge_bases') - if 'top_k' not in columns: - return - - result = await self.ap.persistence_mgr.execute_async( - sqlalchemy.text( - 'SELECT uuid, top_k FROM knowledge_bases WHERE top_k IS NOT NULL AND retrieval_settings IS NULL;' - ) - ) - rows = result.fetchall() - - for row in rows: - kb_uuid = row[0] - top_k = row[1] - retrieval_settings = json.dumps({'top_k': top_k}) - await self.ap.persistence_mgr.execute_async( - sqlalchemy.text('UPDATE knowledge_bases SET retrieval_settings = :rs WHERE uuid = :uuid;').bindparams( - rs=retrieval_settings, uuid=kb_uuid - ) - ) - - async def _migrate_embedding_model_uuid_to_creation_settings(self): - """Migrate existing embedding_model_uuid into creation_settings JSON.""" - columns = await self._get_table_columns('knowledge_bases') - if 'embedding_model_uuid' not in columns: - return - - result = await self.ap.persistence_mgr.execute_async( - sqlalchemy.text( - 'SELECT uuid, embedding_model_uuid, creation_settings FROM knowledge_bases ' - "WHERE embedding_model_uuid IS NOT NULL AND embedding_model_uuid != '';" - ) - ) - rows = result.fetchall() - - for row in rows: - kb_uuid = row[0] - emb_uuid = row[1] - existing_settings = row[2] - - if existing_settings and isinstance(existing_settings, str): - try: - settings = json.loads(existing_settings) - except (json.JSONDecodeError, TypeError): - settings = {} - elif isinstance(existing_settings, dict): - settings = existing_settings - else: - settings = {} - - if 'embedding_model_uuid' not in settings: - settings['embedding_model_uuid'] = emb_uuid - new_settings = json.dumps(settings) - await self.ap.persistence_mgr.execute_async( - sqlalchemy.text( - 'UPDATE knowledge_bases SET creation_settings = :cs WHERE uuid = :uuid;' - ).bindparams(cs=new_settings, uuid=kb_uuid) - ) - async def _drop_old_columns(self): """Drop embedding_model_uuid and top_k columns (PostgreSQL only). @@ -162,22 +139,22 @@ class DBMigrateKnowledgeEnginePluginArchitecture(migration.DBMigration): sqlalchemy.text('ALTER TABLE knowledge_bases DROP COLUMN top_k;') ) - async def _drop_external_knowledge_bases_table(self): - """Drop the external_knowledge_bases table if it exists.""" - if await self._table_exists('external_knowledge_bases'): - # Log existing external KBs before dropping, so users are aware of data loss - rows = await self.ap.persistence_mgr.execute_async( - sqlalchemy.text('SELECT * FROM external_knowledge_bases;') + async def _set_migration_flag(self): + """Set rag_plugin_migration_needed flag in metadata table.""" + # Check if the key already exists + result = await self.ap.persistence_mgr.execute_async( + sqlalchemy.text("SELECT value FROM metadata WHERE key = 'rag_plugin_migration_needed';") + ) + row = result.first() + if row is not None: + await self.ap.persistence_mgr.execute_async( + sqlalchemy.text("UPDATE metadata SET value = 'true' WHERE key = 'rag_plugin_migration_needed';") ) - existing = rows.fetchall() - if existing: - self.ap.logger.warning( - 'Dropping external_knowledge_bases table with %d existing record(s). ' - 'These external KB configurations will be removed: %s', - len(existing), - [dict(row._mapping) for row in existing], - ) - await self.ap.persistence_mgr.execute_async(sqlalchemy.text('DROP TABLE external_knowledge_bases;')) + else: + await self.ap.persistence_mgr.execute_async( + sqlalchemy.text("INSERT INTO metadata (key, value) VALUES ('rag_plugin_migration_needed', 'true');") + ) + self.ap.logger.info('Set rag_plugin_migration_needed=true in metadata.') async def downgrade(self): """Downgrade""" diff --git a/src/langbot/pkg/utils/constants.py b/src/langbot/pkg/utils/constants.py index 393012c1..cd058a88 100644 --- a/src/langbot/pkg/utils/constants.py +++ b/src/langbot/pkg/utils/constants.py @@ -2,7 +2,7 @@ import langbot semantic_version = f'v{langbot.__version__}' -required_database_version = 19 +required_database_version = 20 """Tag the version of the database schema, used to check if the database needs to be migrated""" debug_mode = False diff --git a/web/src/app/home/knowledge/components/kb-migration-dialog/KBMigrationDialog.tsx b/web/src/app/home/knowledge/components/kb-migration-dialog/KBMigrationDialog.tsx new file mode 100644 index 00000000..8e04513b --- /dev/null +++ b/web/src/app/home/knowledge/components/kb-migration-dialog/KBMigrationDialog.tsx @@ -0,0 +1,157 @@ +'use client'; + +import { useState } from 'react'; +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogDescription, + DialogFooter, +} from '@/components/ui/dialog'; +import { Button } from '@/components/ui/button'; +import { useTranslation } from 'react-i18next'; +import { httpClient } from '@/app/infra/http/HttpClient'; +import { useAsyncTask, AsyncTaskStatus } from '@/hooks/useAsyncTask'; +import { toast } from 'sonner'; +import { Loader2 } from 'lucide-react'; + +interface KBMigrationDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + internalKbCount: number; + externalKbCount: number; + onMigrationComplete: () => void; +} + +export default function KBMigrationDialog({ + open, + onOpenChange, + internalKbCount, + externalKbCount, + onMigrationComplete, +}: KBMigrationDialogProps) { + const { t } = useTranslation(); + const [dismissing, setDismissing] = useState(false); + + const asyncTask = useAsyncTask({ + onSuccess: () => { + toast.success(t('knowledge.migration.success')); + onOpenChange(false); + onMigrationComplete(); + }, + onError: (error) => { + toast.error(`${t('knowledge.migration.error')}${error}`); + }, + }); + + const handleMigration = async (installPlugin: boolean) => { + try { + const resp = await httpClient.executeRagMigration(installPlugin); + asyncTask.startTask(resp.task_id); + } catch { + toast.error(t('knowledge.migration.error')); + } + }; + + const handleDismiss = async () => { + setDismissing(true); + try { + await httpClient.dismissRagMigration(); + onOpenChange(false); + } catch { + toast.error(t('knowledge.migration.dismissError')); + } finally { + setDismissing(false); + } + }; + + const isRunning = asyncTask.status === AsyncTaskStatus.RUNNING; + const isError = asyncTask.status === AsyncTaskStatus.ERROR; + const totalCount = internalKbCount + externalKbCount; + + return ( + { + if (!isRunning) onOpenChange(v); + }} + > + + + {t('knowledge.migration.title')} + + {t('knowledge.migration.description')} + + + +
+ {!isRunning && !isError && ( +

+ {t('knowledge.migration.detected', { + total: totalCount, + internal: internalKbCount, + external: externalKbCount, + })} +

+ )} + + {isRunning && ( +
+ +

{t('knowledge.migration.running')}

+
+ )} + + {isError && ( +
+

+ {t('knowledge.migration.error')} +

+ {asyncTask.error && ( +

+ {asyncTask.error} +

+ )} +
+ )} +
+ + + {!isRunning && !isError && ( + <> + + +

+ {t('knowledge.migration.dataOnlyHint')} +

+ + )} + {isError && ( + + )} + {!isRunning && ( + + )} +
+
+
+ ); +} diff --git a/web/src/app/home/knowledge/page.tsx b/web/src/app/home/knowledge/page.tsx index de214f0d..9fd64d5b 100644 --- a/web/src/app/home/knowledge/page.tsx +++ b/web/src/app/home/knowledge/page.tsx @@ -7,6 +7,7 @@ import { useEffect, useState } from 'react'; import { KnowledgeBaseVO } from '@/app/home/knowledge/components/kb-card/KBCardVO'; import KBCard from '@/app/home/knowledge/components/kb-card/KBCard'; import KBDetailDialog from '@/app/home/knowledge/KBDetailDialog'; +import KBMigrationDialog from '@/app/home/knowledge/components/kb-migration-dialog/KBMigrationDialog'; import { httpClient } from '@/app/infra/http/HttpClient'; import { KnowledgeBase } from '@/app/infra/entities/api'; @@ -18,10 +19,29 @@ export default function KnowledgePage() { const [selectedKbId, setSelectedKbId] = useState(''); const [detailDialogOpen, setDetailDialogOpen] = useState(false); + // Migration dialog state + const [migrationDialogOpen, setMigrationDialogOpen] = useState(false); + const [migrationInternalCount, setMigrationInternalCount] = useState(0); + const [migrationExternalCount, setMigrationExternalCount] = useState(0); + useEffect(() => { getKnowledgeBaseList(); + checkMigrationStatus(); }, []); + async function checkMigrationStatus() { + try { + const resp = await httpClient.getRagMigrationStatus(); + if (resp.needed) { + setMigrationInternalCount(resp.internal_kb_count); + setMigrationExternalCount(resp.external_kb_count); + setMigrationDialogOpen(true); + } + } catch { + // Silently ignore - migration check is non-critical + } + } + async function getKnowledgeBaseList() { const resp = await httpClient.getKnowledgeBases(); @@ -85,8 +105,20 @@ export default function KnowledgePage() { getKnowledgeBaseList(); }; + const handleMigrationComplete = () => { + getKnowledgeBaseList(); + }; + return (
+ + { + return this.get('/api/v1/knowledge/migration/status'); + } + + public executeRagMigration( + installPlugin: boolean = true, + ): Promise { + return this.post('/api/v1/knowledge/migration/execute', { + install_plugin: installPlugin, + }); + } + + public dismissRagMigration(): Promise { + return this.post('/api/v1/knowledge/migration/dismiss'); + } + public getPluginDebugInfo(): Promise<{ debug_url: string; plugin_debug_key: string; diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index 77453182..0e06bba9 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -773,6 +773,23 @@ const enUS = { retrieverConfiguration: 'Retriever Configuration', retrieverInstallInfo: 'You can install Knowledge Retriever plugins from', retrieverMarketLink: 'here', + migration: { + title: 'Knowledge Base Migration', + description: + 'The new version has refactored the knowledge base into a plugin-based architecture, unifying built-in and external knowledge bases as "Knowledge Engine" plugins. Migration of legacy knowledge base data is required. Your old data has been automatically backed up in the database.', + detected: + 'Found {{total}} knowledge base(s) to migrate ({{internal}} internal, {{external}} external).', + startWithInstall: 'Auto-install Plugin & Migrate', + startDataOnly: 'Migrate Data Only', + dataOnlyHint: + '"Migrate Data Only" is for offline/intranet environments. Please install the corresponding plugin manually after migration.', + dismiss: 'Discard Original Data', + running: 'Migrating knowledge bases, please wait...', + success: 'Knowledge base migration completed', + error: 'Knowledge base migration failed: ', + dismissError: 'Operation failed', + retry: 'Retry', + }, }, register: { title: 'Initialize LangBot 👋', diff --git a/web/src/i18n/locales/ja-JP.ts b/web/src/i18n/locales/ja-JP.ts index 57928774..4b529b35 100644 --- a/web/src/i18n/locales/ja-JP.ts +++ b/web/src/i18n/locales/ja-JP.ts @@ -762,6 +762,23 @@ const jaJP = { retrieverConfiguration: '検索器設定', retrieverInstallInfo: 'ナレッジ検索器プラグインは', retrieverMarketLink: 'こちらからインストールできます', + migration: { + title: 'ナレッジベースの移行', + description: + '新バージョンではナレッジベースをプラグインベースのアーキテクチャに再構築し、内蔵ナレッジベースと外部ナレッジベースを「ナレッジエンジン」プラグインとして統合しました。旧ナレッジベースデータの移行が必要です。旧データはデータベースに自動的にバックアップされています。', + detected: + '移行が必要なナレッジベースが{{total}}件見つかりました(内部{{internal}}件、外部{{external}}件)。', + startWithInstall: 'プラグインを自動インストールして移行', + startDataOnly: 'データのみ移行', + dataOnlyHint: + '「データのみ移行」はオフライン環境向けです。移行完了後に対応するプラグインを手動でインストールしてください。', + dismiss: '元データを破棄', + running: 'ナレッジベースを移行中です。しばらくお待ちください...', + success: 'ナレッジベースの移行が完了しました', + error: 'ナレッジベースの移行に失敗しました:', + dismissError: '操作に失敗しました', + retry: 'リトライ', + }, }, register: { title: 'LangBot を初期化 👋', diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts index 5b0d3609..7e4661d9 100644 --- a/web/src/i18n/locales/zh-Hans.ts +++ b/web/src/i18n/locales/zh-Hans.ts @@ -742,6 +742,23 @@ const zhHans = { retrieverConfiguration: '检索器配置', retrieverInstallInfo: '您可以从', retrieverMarketLink: '此处安装知识检索器插件', + migration: { + title: '知识库迁移', + description: + '新版本已将知识库重构为插件化架构,并统一内置知识库和外部知识库为「知识引擎」插件,需要对旧知识库数据进行迁移。您的旧数据已自动备份在数据库中。', + detected: + '共检测到 {{total}} 个知识库需要迁移({{internal}} 个内置知识库,{{external}} 个外部知识库)。', + startWithInstall: '自动安装插件并迁移', + startDataOnly: '仅迁移数据', + dataOnlyHint: + '「仅迁移数据」适合内网环境使用,请在迁移完成后自行安装对应插件', + dismiss: '丢弃原数据', + running: '正在迁移知识库,请稍候...', + success: '知识库迁移完成', + error: '知识库迁移失败:', + dismissError: '操作失败', + retry: '重试', + }, }, register: { title: '初始化 LangBot 👋', diff --git a/web/src/i18n/locales/zh-Hant.ts b/web/src/i18n/locales/zh-Hant.ts index 683bc5f5..87e691f9 100644 --- a/web/src/i18n/locales/zh-Hant.ts +++ b/web/src/i18n/locales/zh-Hant.ts @@ -722,6 +722,23 @@ const zhHant = { retrieverConfiguration: '檢索器配置', retrieverInstallInfo: '您可以從', retrieverMarketLink: '此處安裝知識檢索器插件', + migration: { + title: '知識庫遷移', + description: + '新版本已將知識庫重構為插件化架構,並統一內建知識庫和外部知識庫為「知識引擎」插件,需要對舊知識庫資料進行遷移。您的舊資料已自動備份在資料庫中。', + detected: + '共檢測到 {{total}} 個知識庫需要遷移({{internal}} 個內建知識庫,{{external}} 個外部知識庫)。', + startWithInstall: '自動安裝插件並遷移', + startDataOnly: '僅遷移資料', + dataOnlyHint: + '「僅遷移資料」適合內網環境使用,請在遷移完成後自行安裝對應插件', + dismiss: '丟棄原數據', + running: '正在遷移知識庫,請稍候...', + success: '知識庫遷移完成', + error: '知識庫遷移失敗:', + dismissError: '操作失敗', + retry: '重試', + }, }, register: { title: '初始化 LangBot 👋',