Add plugin rerank invocation action

chore(deps): bump langbot-plugin to 0.4.5 (#2266 )
Bumps the pinned langbot-plugin SDK from 0.4.4 to 0.4.5, which adds `provider_specific_fields` to the Message/ToolCall entities. This is the SDK dependency required by the Gemini thought_signature fix (#1899, #2265). The lock update is scoped to langbot-plugin only. pylibseekdb is deliberately held at 1.1.0: a free re-resolve drifts it to 1.3.0 (pyseekdb==1.1.0.post3 has no upper bound on it), which is out of scope here and should be handled in a separate dependency-upgrade PR.
2026-08-03 10:06:06 +00:00 · 2026-06-19 23:16:33 +08:00 · 2026-06-19 23:13:56 +08:00 · 2026-06-19 18:39:58 +08:00 · 2026-06-19 06:20:17 -04:00 · 2026-06-19 06:15:31 -04:00
336 changed files with 13055 additions and 9008 deletions
@@ -0,0 +1,46 @@
+name: Frontend Tests
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths:
+      - 'web/**'
+      - '.github/workflows/frontend-tests.yml'
+  push:
+    branches:
+      - master
+      - develop
+    paths:
+      - 'web/**'
+      - '.github/workflows/frontend-tests.yml'
+
+jobs:
+  playwright-smoke:
+    name: Playwright Smoke
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '25'
+
+      - name: Install pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 8.9.2
+
+      - name: Install dependencies
+        working-directory: web
+        run: pnpm install --frozen-lockfile
+
+      - name: Install Playwright browsers
+        working-directory: web
+        run: pnpm exec playwright install --with-deps chromium
+
+      - name: Run Playwright smoke tests
+        working-directory: web
+        run: pnpm test:e2e
@@ -29,7 +29,7 @@ jobs:
        run: uv sync --dev

      - name: Run ruff check
-        run: uv run ruff check src
+        run: uv run ruff check src/langbot/ tests/ --output-format=concise

      - name: Run ruff format
        run: uv run ruff format src --check
@@ -84,6 +84,67 @@ jobs:
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Test Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY

+  e2e:
+    name: E2E Startup Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Run E2E startup tests
+        run: uv run pytest tests/e2e -q --tb=short
+
+      - name: E2E Test Summary
+        if: always()
+        run: |
+          echo "## E2E Startup Test Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Test Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
+
+  box-integration:
+    name: Box Integration Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Check Docker runtime
+        run: docker info
+
+      - name: Run Box integration tests
+        run: uv run pytest tests/integration_tests -q --tb=short
+
+      - name: Box Integration Test Summary
+        if: always()
+        run: |
+          echo "## Box Integration Test Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Test Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
+
  coverage:
    name: Coverage Gate
    runs-on: ubuntu-latest
@@ -129,4 +190,4 @@ jobs:
          echo "## Coverage Results" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Threshold: 18%" >> $GITHUB_STEP_SUMMARY
-          echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
+          echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
@@ -125,6 +125,14 @@ uv run python -m langbot.pkg.persistence.alembic_runner autogenerate "descriptio

 Review and edit the generated script before committing. Migrations execute automatically on startup. `autogenerate` detects schema changes (add/drop columns, tables, type changes) but **data migrations** (e.g. mutating JSON field contents) must be hand-written into the generated script. `env.py` sets `render_as_batch=True`, so SQLite's ALTER TABLE limits are handled automatically — no need to branch per database. More in the wiki ["开发配置"](https://docs.langbot.app/zh/develop/dev-config#数据库迁移).

+When writing a migration, follow these rules:
+
+- **Revision id ≤ 32 characters.** PostgreSQL stores `alembic_version.version_num` as `varchar(32)`; a longer id raises `StringDataRightTruncationError` at runtime. Prefer short, descriptive ids like `0005_add_llm_context_length`.
+- **Guard every operation against missing tables/columns.** Fresh installs build the schema via `create_all()` and then stamp the Alembic baseline, so a migration may run against a table that already has the change — or, in tests, against an empty database. Check `inspector.get_table_names()` / `inspector.get_columns(...)` before `add_column` / `drop_column`, mirroring the existing migrations.
+- **Keep a single linear head.** Chain `down_revision` to the current head; do not create branches. Run the migration tests after adding one: `uv run pytest tests/integration/persistence/ -q` (the PostgreSQL test needs a running PG via `TEST_POSTGRES_URL`).
+
+> **Legacy migration system (deprecated — do not extend).** The old 3.x migration system under `src/langbot/pkg/persistence/migrations/` (`DBMigration` subclasses in `dbmXXX_*.py`, run from `pkg/persistence/mgr.py`) is **frozen**. Do **not** add new `dbmXXX_*.py` files. The chain is capped at `required_database_version = 25` (`pkg/utils/constants.py`); those files only exist to upgrade pre-existing 3.x databases up to the Alembic baseline and are kept read-only. All new schema changes go through Alembic.
+
 ## Some Principles

 - Keep it simple, stupid.
@@ -1,6 +1,6 @@
 [project]
 name = "langbot"
-version = "4.10.1"
+version = "4.10.2"
 description = "Production-grade platform for building agentic IM bots"
 readme = "README.md"
 license-files = ["LICENSE"]
@@ -70,7 +70,7 @@ dependencies = [
    "chromadb>=1.0.0,<2.0.0",
    "qdrant-client (>=1.15.1,<2.0.0)",
    "pyseekdb==1.1.0.post3",
-    "langbot-plugin==0.4.2",
+    "langbot-plugin==0.4.5",
    "asyncpg>=0.30.0",
    "line-bot-sdk>=3.19.0",
    "matrix-nio>=0.25.2",
@@ -79,6 +79,7 @@ dependencies = [
    "pymilvus>=2.6.4",
    "pgvector>=0.4.1",
    "botocore>=1.42.39",
+    "litellm>=1.0.0",
 ]
 keywords = [
    "bot",
@@ -1,3 +1,3 @@
 """LangBot - Production-grade platform for building agentic IM bots"""

-__version__ = '4.10.1'
+__version__ = '4.10.2'
@@ -46,6 +46,30 @@ class MonitoringRouterGroup(group.RouterGroup):

            return self.success(data=metrics)

+        @self.route('/token-statistics', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
+        async def get_token_statistics() -> str:
+            """Get detailed token usage statistics (summary, per-model, timeseries)."""
+            bot_ids = quart.request.args.getlist('botId')
+            pipeline_ids = quart.request.args.getlist('pipelineId')
+            start_time_str = quart.request.args.get('startTime')
+            end_time_str = quart.request.args.get('endTime')
+            bucket = quart.request.args.get('bucket', 'hour')
+            if bucket not in ('hour', 'day'):
+                bucket = 'hour'
+
+            start_time = parse_iso_datetime(start_time_str)
+            end_time = parse_iso_datetime(end_time_str)
+
+            stats = await self.ap.monitoring_service.get_token_statistics(
+                bot_ids=bot_ids if bot_ids else None,
+                pipeline_ids=pipeline_ids if pipeline_ids else None,
+                start_time=start_time,
+                end_time=end_time,
+                bucket=bucket,
+            )
+
+            return self.success(data=stats)
+
        @self.route('/messages', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
        async def get_messages() -> str:
            """Get message logs"""
@@ -271,6 +271,20 @@ class PluginsRouterGroup(group.RouterGroup):
            readme = await self.ap.plugin_connector.get_plugin_readme(author, plugin_name, language=language)
            return self.success(data={'readme': readme})

+        @self.route(
+            '/<author>/<plugin_name>/logs',
+            methods=['GET'],
+            auth_type=group.AuthType.USER_TOKEN_OR_API_KEY,
+        )
+        async def _(author: str, plugin_name: str) -> quart.Response:
+            try:
+                limit = int(quart.request.args.get('limit', 200))
+            except (TypeError, ValueError):
+                limit = 200
+            level = quart.request.args.get('level') or None
+            logs = await self.ap.plugin_connector.get_plugin_logs(author, plugin_name, limit=limit, level=level)
+            return self.success(data={'logs': logs})
+
        @self.route(
            '/<author>/<plugin_name>/icon',
            methods=['GET'],
@@ -34,6 +34,46 @@ def _runtime_model_data(model_uuid: str, model_data: dict) -> dict:
    return {**model_data, 'uuid': model_uuid}


+async def _validate_provider_supports(ap: app.Application, provider_uuid: str, model_type: str) -> None:
+    """Validate that the provider's requester declares support for ``model_type``.
+
+    ``model_type`` is one of the manifest ``support_type`` values:
+    'llm', 'text-embedding', 'rerank'. Raises ValueError when the requester
+    manifest does not list the requested type. This is a server-side guard so
+    a model cannot be attached to a provider that does not support it, even if
+    the frontend tab restriction is bypassed.
+    """
+    model_mgr = getattr(ap, 'model_mgr', None)
+    if model_mgr is None:
+        return
+
+    provider_dict = getattr(model_mgr, 'provider_dict', None)
+    if not provider_dict:
+        return
+    runtime_provider = provider_dict.get(provider_uuid)
+    if runtime_provider is None:
+        return
+
+    requester_name = getattr(getattr(runtime_provider, 'provider_entity', None), 'requester', None)
+    if not requester_name:
+        return
+
+    get_manifest = getattr(model_mgr, 'get_available_requester_manifest_by_name', None)
+    if not callable(get_manifest):
+        return
+    manifest = get_manifest(requester_name)
+    if manifest is None:
+        return
+
+    spec = getattr(manifest, 'spec', None) or {}
+    support_type = spec.get('support_type') if isinstance(spec, dict) else None
+    # When a manifest omits support_type, do not block (backward compatible).
+    if not support_type:
+        return
+    if model_type not in support_type:
+        raise ValueError(f'Provider requester "{requester_name}" does not support {model_type} models')
+
+
 class LLMModelsService:
    ap: app.Application

@@ -96,6 +136,8 @@ class LLMModelsService:
                )
                model_data['provider_uuid'] = provider_uuid

+        await _validate_provider_supports(self.ap, model_data['provider_uuid'], 'llm')
+
        await self.ap.persistence_mgr.execute_async(sqlalchemy.insert(persistence_model.LLMModel).values(**model_data))

        runtime_provider = self.ap.model_mgr.provider_dict.get(model_data['provider_uuid'])
@@ -274,6 +316,8 @@ class EmbeddingModelsService:
                )
                model_data['provider_uuid'] = provider_uuid

+        await _validate_provider_supports(self.ap, model_data['provider_uuid'], 'text-embedding')
+
        await self.ap.persistence_mgr.execute_async(
            sqlalchemy.insert(persistence_model.EmbeddingModel).values(**model_data)
        )
@@ -434,6 +478,8 @@ class RerankModelsService:
                )
                model_data['provider_uuid'] = provider_uuid

+        await _validate_provider_supports(self.ap, model_data['provider_uuid'], 'rerank')
+
        await self.ap.persistence_mgr.execute_async(
            sqlalchemy.insert(persistence_model.RerankModel).values(**model_data)
        )
@@ -472,6 +472,179 @@ class MonitoringService:
            'active_sessions': active_sessions,
        }

+    async def get_token_statistics(
+        self,
+        bot_ids: list[str] | None = None,
+        pipeline_ids: list[str] | None = None,
+        start_time: datetime.datetime | None = None,
+        end_time: datetime.datetime | None = None,
+        bucket: str = 'hour',
+    ) -> dict:
+        """Get detailed token usage statistics for production observability.
+
+        Returns:
+        - summary: aggregate token counters and call/latency stats over the window
+        - by_model: per-model token + call breakdown (sorted by total tokens desc)
+        - timeseries: token usage bucketed by `bucket` ('hour' or 'day')
+
+        Only successful LLM calls are counted toward token totals; error calls are
+        reported separately so a spike in failures is visible without polluting
+        token accounting.
+        """
+        LLMCall = persistence_monitoring.MonitoringLLMCall
+
+        conditions = []
+        if bot_ids:
+            conditions.append(LLMCall.bot_id.in_(bot_ids))
+        if pipeline_ids:
+            conditions.append(LLMCall.pipeline_id.in_(pipeline_ids))
+        if start_time:
+            conditions.append(LLMCall.timestamp >= start_time)
+        if end_time:
+            conditions.append(LLMCall.timestamp <= end_time)
+
+        def _apply(query):
+            if conditions:
+                query = query.where(sqlalchemy.and_(*conditions))
+            return query
+
+        # ---- Summary aggregates ----
+        summary_query = _apply(
+            sqlalchemy.select(
+                sqlalchemy.func.count(LLMCall.id),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.input_tokens), 0),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.output_tokens), 0),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.total_tokens), 0),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.duration), 0),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.cost), 0.0),
+                sqlalchemy.func.sum(sqlalchemy.case((LLMCall.status == 'success', 1), else_=0)),
+                sqlalchemy.func.sum(sqlalchemy.case((LLMCall.status == 'error', 1), else_=0)),
+                # Count of successful calls that nonetheless recorded zero tokens —
+                # a data-quality signal that usage reporting may be broken upstream.
+                sqlalchemy.func.sum(
+                    sqlalchemy.case(
+                        (sqlalchemy.and_(LLMCall.status == 'success', LLMCall.total_tokens == 0), 1),
+                        else_=0,
+                    )
+                ),
+            )
+        )
+        summary_result = await self.ap.persistence_mgr.execute_async(summary_query)
+        row = summary_result.first()
+        (
+            total_calls,
+            total_input_tokens,
+            total_output_tokens,
+            total_tokens,
+            total_duration,
+            total_cost,
+            success_calls,
+            error_calls,
+            zero_token_success_calls,
+        ) = row if row else (0, 0, 0, 0, 0, 0.0, 0, 0, 0)
+
+        total_calls = total_calls or 0
+        success_calls = success_calls or 0
+        error_calls = error_calls or 0
+        zero_token_success_calls = zero_token_success_calls or 0
+
+        summary = {
+            'total_calls': total_calls,
+            'success_calls': success_calls,
+            'error_calls': error_calls,
+            'total_input_tokens': int(total_input_tokens or 0),
+            'total_output_tokens': int(total_output_tokens or 0),
+            'total_tokens': int(total_tokens or 0),
+            'total_cost': round(float(total_cost or 0.0), 6),
+            'avg_tokens_per_call': int((total_tokens or 0) / total_calls) if total_calls > 0 else 0,
+            'avg_duration_ms': int((total_duration or 0) / total_calls) if total_calls > 0 else 0,
+            'avg_tokens_per_second': round((total_output_tokens or 0) / (total_duration / 1000), 2)
+            if total_duration and total_duration > 0
+            else 0,
+            'zero_token_success_calls': zero_token_success_calls,
+        }
+
+        # ---- Per-model breakdown ----
+        by_model_query = _apply(
+            sqlalchemy.select(
+                LLMCall.model_name,
+                sqlalchemy.func.count(LLMCall.id),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.input_tokens), 0),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.output_tokens), 0),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.total_tokens), 0),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.duration), 0),
+                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.cost), 0.0),
+                sqlalchemy.func.sum(sqlalchemy.case((LLMCall.status == 'error', 1), else_=0)),
+            ).group_by(LLMCall.model_name)
+        )
+        by_model_result = await self.ap.persistence_mgr.execute_async(by_model_query)
+        by_model = []
+        for mrow in by_model_result.all():
+            (
+                model_name,
+                m_calls,
+                m_in,
+                m_out,
+                m_total,
+                m_duration,
+                m_cost,
+                m_errors,
+            ) = mrow
+            m_calls = m_calls or 0
+            by_model.append(
+                {
+                    'model_name': model_name,
+                    'calls': m_calls,
+                    'error_calls': m_errors or 0,
+                    'input_tokens': int(m_in or 0),
+                    'output_tokens': int(m_out or 0),
+                    'total_tokens': int(m_total or 0),
+                    'cost': round(float(m_cost or 0.0), 6),
+                    'avg_tokens_per_call': int((m_total or 0) / m_calls) if m_calls > 0 else 0,
+                    'avg_duration_ms': int((m_duration or 0) / m_calls) if m_calls > 0 else 0,
+                }
+            )
+        by_model.sort(key=lambda x: x['total_tokens'], reverse=True)
+
+        # ---- Time-bucketed series ----
+        # Use a DB-agnostic bucketing approach: fetch (timestamp, tokens) rows and
+        # aggregate in Python. The window is bounded by the time filter, so this is
+        # cheap for typical dashboard ranges (hours/days).
+        series_query = _apply(
+            sqlalchemy.select(
+                LLMCall.timestamp,
+                LLMCall.input_tokens,
+                LLMCall.output_tokens,
+                LLMCall.total_tokens,
+            ).order_by(LLMCall.timestamp.asc())
+        )
+        series_result = await self.ap.persistence_mgr.execute_async(series_query)
+
+        bucket_fmt = '%Y-%m-%d %H:00' if bucket == 'hour' else '%Y-%m-%d'
+        buckets: dict[str, dict] = {}
+        for srow in series_result.all():
+            ts, s_in, s_out, s_total = srow
+            if ts is None:
+                continue
+            key = ts.strftime(bucket_fmt)
+            b = buckets.setdefault(
+                key,
+                {'bucket': key, 'input_tokens': 0, 'output_tokens': 0, 'total_tokens': 0, 'calls': 0},
+            )
+            b['input_tokens'] += int(s_in or 0)
+            b['output_tokens'] += int(s_out or 0)
+            b['total_tokens'] += int(s_total or 0)
+            b['calls'] += 1
+
+        timeseries = [buckets[k] for k in sorted(buckets.keys())]
+
+        return {
+            'summary': summary,
+            'by_model': by_model,
+            'timeseries': timeseries,
+            'bucket': bucket,
+        }
+
    async def get_messages(
        self,
        bot_ids: list[str] | None = None,
@@ -82,7 +82,7 @@ class UserService:
        payload = {
            'user': user_email,
            'iss': 'LangBot-' + constants.edition,
-            'exp': datetime.datetime.now() + datetime.timedelta(seconds=jwt_expire),
+            'exp': datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(seconds=jwt_expire),
        }

        return jwt.encode(payload, jwt_secret, algorithm='HS256')
@@ -12,6 +12,7 @@ import pydantic

 from langbot_plugin.box.client import BoxRuntimeClient
 from .connector import BoxRuntimeConnector, _get_box_config
+from ..telemetry import features as telemetry_features
 from langbot_plugin.box.errors import BoxError, BoxValidationError
 from langbot_plugin.box.models import (
    BUILTIN_PROFILES,
@@ -104,6 +105,7 @@ class BoxService:
                f'LangBot Box runtime initialized: profile={self.profile.name} '
                f'default_workspace={self.default_workspace or "(none)"}'
            )
+            await self._purge_attachment_dirs()
        except Exception as exc:
            self.ap.logger.warning(f'LangBot Box runtime unavailable, sandbox features disabled: {exc}')
            self._available = False
@@ -218,6 +220,7 @@ class BoxService:
            f'query_id={query.query_id} '
            f'summary={json.dumps(self._summarize_result(result), ensure_ascii=False)}'
        )
+        telemetry_features.increment(query, 'sandbox', 'execs')
        return self._serialize_result(result)

    def resolve_box_session_id(self, query: pipeline_query.Query) -> str:
@@ -333,6 +336,507 @@ class BoxService:

        return await self.execute_spec_payload(spec_payload, query)

+    # ── Attachment passthrough (inbound / outbound) ──────────────────
+    #
+    # IM/webchat attachments (images, voices, files) reach the LLM as
+    # multimodal content, but historically never landed on the sandbox
+    # filesystem, so the agent's exec/read/write tools could not operate on
+    # them. Conversely, files the agent produced inside the sandbox were
+    # never surfaced back to the user. These two helpers close both gaps:
+    #
+    #   inbound  : message_chain attachments -> /workspace/inbox/<query_id>/
+    #   outbound : /workspace/outbox/<query_id>/ -> reply MessageChain
+    #
+    # Transfer prefers DIRECT HOST FILESYSTEM access to the bind-mounted
+    # workspace (default_workspace on the host maps to /workspace inside the
+    # container), which has no size limit. This covers the local docker /
+    # nsjail / stdio backends. For backends where the workspace is NOT visible
+    # on the LangBot host (E2B, an external remote runtime.endpoint), it falls
+    # back to a base64-through-exec round-trip. The exec channel can only move
+    # small files reliably — the docker backend passes the command as a single
+    # argv (ARG_MAX) and exec stdout is truncated by output_limit_chars — so
+    # the host path is strongly preferred and used whenever available.
+
+    INBOX_MOUNT_DIR = '/workspace/inbox'
+    OUTBOX_MOUNT_DIR = '/workspace/outbox'
+    INBOX_SUBDIR = 'inbox'
+    OUTBOX_SUBDIR = 'outbox'
+    # Hard cap on a single attachment. The HTTP upload endpoints already cap
+    # uploads at 10MiB; keep parity.
+    _ATTACHMENT_MAX_BYTES = 10 * _MIB
+    # Conservative cap for the exec FALLBACK path only (ARG_MAX / stdout
+    # truncation). The host-filesystem path has no such limit.
+    _EXEC_FALLBACK_MAX_BYTES = 256 * 1024
+
+    def _host_query_dir(self, subdir: str, query_id) -> str | None:
+        """Host path for ``/workspace/<subdir>/<query_id>`` when LangBot can
+        access the bind-mounted workspace directly, else ``None``.
+
+        ``default_workspace`` is the host directory bind-mounted to
+        ``/workspace`` for the local docker/nsjail backends and shared
+        outright in stdio mode, so a file written there by LangBot is visible
+        to the sandbox (and vice-versa). It is ``None`` / not a local dir for
+        E2B and remote runtimes, where we must fall back to the exec channel.
+        """
+        root = self.default_workspace
+        if not root or not os.path.isdir(root):
+            return None
+        return os.path.join(root, subdir, str(query_id))
+
+    async def _purge_attachment_dirs(self) -> None:
+        """Remove leftover inbox/outbox directories on startup.
+
+        ``query_id`` is a process-local counter (see pipeline query pool) that
+        resets to 0 on every restart, so per-query attachment directories from
+        a previous process would otherwise be silently reused — leaking a prior
+        run's inbound files and re-sending stale outbound files.
+
+        Outbox files are written by the sandbox **container**, which runs as
+        root over the bind-mount, so the LangBot host process (a non-root user)
+        cannot ``rmtree`` them. We therefore try a host-side delete first (fast,
+        works for host-owned inbox files) and, for anything that survives,
+        delete from *inside* the sandbox via exec where the container's root can
+        remove its own files. Best-effort: never block startup.
+        """
+        root = self.default_workspace
+        if not root or not os.path.isdir(root):
+            return
+
+        import shutil
+
+        host_survivors: list[str] = []
+
+        def _host_purge() -> list[str]:
+            survivors: list[str] = []
+            for subdir in (self.INBOX_SUBDIR, self.OUTBOX_SUBDIR):
+                path = os.path.join(root, subdir)
+                if not os.path.isdir(path):
+                    continue
+                shutil.rmtree(path, ignore_errors=True)
+                if os.path.exists(path):
+                    survivors.append(subdir)
+            return survivors
+
+        try:
+            host_survivors = await asyncio.to_thread(_host_purge)
+        except Exception as exc:  # pragma: no cover - defensive
+            self.ap.logger.warning(f'Host-side purge of sandbox attachment dirs failed: {exc}')
+            host_survivors = [self.INBOX_SUBDIR, self.OUTBOX_SUBDIR]
+
+        if not host_survivors:
+            self.ap.logger.info('Purged leftover sandbox attachment dirs from a previous process.')
+            return
+
+        # Root-owned leftovers (container output): delete from inside the box.
+        targets = ' '.join(f'/workspace/{sub}' for sub in host_survivors)
+        try:
+            spec = self.build_spec({'cmd': f'rm -rf {targets}', 'session_id': '__startup_purge__', 'timeout_sec': 30})
+            await self.client.execute(spec)
+            self.ap.logger.info(
+                f'Purged root-owned leftover sandbox attachment dirs via sandbox exec: {host_survivors}'
+            )
+        except Exception as exc:
+            self.ap.logger.warning(
+                f'Failed to purge root-owned sandbox attachment dirs {host_survivors} via exec: {exc}'
+            )
+
+    @staticmethod
+    def _sanitize_attachment_name(name: str, fallback: str) -> str:
+        """Reduce an arbitrary attachment name to a safe basename.
+
+        Strips directory separators and parent refs so a crafted file name
+        can never escape the inbox/outbox directory.
+        """
+        base = os.path.basename(str(name or '').replace('\\', '/').strip())
+        base = base.lstrip('.') or ''
+        # Drop anything that is not a conservative filename charset.
+        cleaned = ''.join(c for c in base if c.isalnum() or c in ('.', '_', '-', ' ')).strip()
+        cleaned = cleaned.replace(' ', '_')
+        return cleaned or fallback
+
+    @staticmethod
+    async def _component_to_bytes(component) -> tuple[bytes, str] | None:
+        """Best-effort extraction of (bytes, mime) from a platform component.
+
+        Handles base64, http(s) url and local path sources. Returns None when
+        no payload can be resolved.
+        """
+        import base64 as _b64
+
+        b64 = getattr(component, 'base64', None)
+        if b64:
+            data = b64
+            mime = 'application/octet-stream'
+            if isinstance(data, str) and data.startswith('data:'):
+                split_index = data.find(';base64,')
+                if split_index != -1:
+                    mime = data[5:split_index]
+                    data = data[split_index + 8 :]
+            try:
+                return _b64.b64decode(data), mime
+            except Exception:
+                return None
+
+        url = getattr(component, 'url', None)
+        if url:
+            try:
+                import httpx
+
+                async with httpx.AsyncClient(timeout=30) as client:
+                    resp = await client.get(url)
+                    resp.raise_for_status()
+                    return resp.content, resp.headers.get('Content-Type', 'application/octet-stream')
+            except Exception:
+                return None
+
+        path = getattr(component, 'path', None)
+        if path:
+            try:
+                import aiofiles
+
+                async with aiofiles.open(path, 'rb') as f:
+                    return await f.read(), 'application/octet-stream'
+            except Exception:
+                return None
+
+        return None
+
+    async def _write_files_into_sandbox(
+        self,
+        query: pipeline_query.Query,
+        subdir: str,
+        target_mount_dir: str,
+        files: list[tuple[str, bytes]],
+    ) -> list[str]:
+        """Write *files* (name, bytes) into the per-query directory.
+
+        Prefers a direct host-filesystem write to the bind-mounted workspace
+        (no size limit). Falls back to a base64-through-exec round-trip only
+        when the workspace is not visible on the LangBot host (E2B / remote).
+        Returns the list of in-sandbox paths actually written.
+        """
+        if not files:
+            return []
+
+        host_dir = self._host_query_dir(subdir, query.query_id)
+        if host_dir is not None:
+            return await asyncio.to_thread(self._write_files_host, host_dir, target_mount_dir, files)
+
+        return await self._write_files_via_exec(query, target_mount_dir, files)
+
+    def _write_files_host(
+        self,
+        host_dir: str,
+        target_mount_dir: str,
+        files: list[tuple[str, bytes]],
+    ) -> list[str]:
+        """Write attachments straight onto the bind-mounted host directory.
+
+        Recreates the per-query directory from scratch so a reused query_id
+        (the webchat session uses small sequential ids) never inherits stale
+        files from an earlier turn.
+        """
+        import shutil
+
+        shutil.rmtree(host_dir, ignore_errors=True)
+        os.makedirs(host_dir, exist_ok=True)
+        written: list[str] = []
+        for name, data in files:
+            with open(os.path.join(host_dir, name), 'wb') as fh:
+                fh.write(data)
+            written.append(f'{target_mount_dir}/{name}')
+        return written
+
+    async def _write_files_via_exec(
+        self,
+        query: pipeline_query.Query,
+        target_dir: str,
+        files: list[tuple[str, bytes]],
+    ) -> list[str]:
+        """Fallback: ship files into the sandbox over the exec channel.
+
+        Only used for backends without host-filesystem access (E2B / remote).
+        Each file is base64-decoded inside the sandbox. Files larger than the
+        conservative exec cap are skipped (ARG_MAX / stdout limits).
+        """
+        import base64 as _b64
+        import json as _json
+
+        manifest = []
+        for name, data in files:
+            if len(data) > self._EXEC_FALLBACK_MAX_BYTES:
+                self.ap.logger.warning(
+                    f'Attachment "{name}" ({len(data)} bytes) exceeds the exec-channel '
+                    f'fallback limit ({self._EXEC_FALLBACK_MAX_BYTES} bytes); skipping. '
+                    f'Configure a host-shared workspace to transfer large files.'
+                )
+                continue
+            manifest.append({'name': name, 'b64': _b64.b64encode(data).decode('ascii')})
+        if not manifest:
+            return []
+
+        manifest_b64 = _b64.b64encode(_json.dumps(manifest).encode('utf-8')).decode('ascii')
+        script = (
+            'import base64, json, os, shutil\n'
+            f'target = {target_dir!r}\n'
+            'shutil.rmtree(target, ignore_errors=True)\n'
+            'os.makedirs(target, exist_ok=True)\n'
+            f'manifest = json.loads(base64.b64decode({manifest_b64!r}))\n'
+            'written = []\n'
+            'for item in manifest:\n'
+            "    p = os.path.join(target, item['name'])\n"
+            "    with open(p, 'wb') as f:\n"
+            "        f.write(base64.b64decode(item['b64']))\n"
+            '    written.append(p)\n'
+            'print(json.dumps(written))\n'
+        )
+        result = await self.execute_tool(
+            {'command': f"python3 - <<'LBPY'\n{script}\nLBPY", 'timeout_sec': 120},
+            query,
+        )
+        if not result.get('ok'):
+            self.ap.logger.warning(
+                f'Failed to write inbound attachments into sandbox via exec: '
+                f'query_id={query.query_id} stderr={result.get("stderr", "")[:200]}'
+            )
+            return []
+        try:
+            return _json.loads(str(result.get('stdout') or '').strip().splitlines()[-1])
+        except Exception:
+            return []
+
+    async def materialize_inbound_attachments(self, query: pipeline_query.Query) -> list[dict]:
+        """Persist message-chain attachments into the sandbox inbox.
+
+        Returns a list of ``{path, name, type, size}`` describing what was
+        written, so the runner can tell the LLM the exact in-sandbox paths.
+        Returns ``[]`` when sandbox is unavailable or there are no attachments.
+        """
+        if not self._available:
+            return []
+
+        import langbot_plugin.api.entities.builtin.platform.message as platform_message
+
+        message_chain = getattr(query, 'message_chain', None)
+        if not message_chain:
+            return []
+
+        type_map = [
+            (platform_message.Image, 'Image', 'image', 'png'),
+            (platform_message.Voice, 'Voice', 'voice', 'wav'),
+            (platform_message.File, 'File', 'file', 'bin'),
+        ]
+
+        pending: list[tuple[str, bytes]] = []
+        descriptors: list[dict] = []
+        index = 0
+        for component in message_chain:
+            matched = None
+            for cls, kind, prefix, default_ext in type_map:
+                if isinstance(component, cls):
+                    matched = (kind, prefix, default_ext)
+                    break
+            if matched is None:
+                continue
+            kind, prefix, default_ext = matched
+
+            payload = await self._component_to_bytes(component)
+            if payload is None:
+                continue
+            data, _mime = payload
+            if not data or len(data) > self._ATTACHMENT_MAX_BYTES:
+                continue
+
+            index += 1
+            raw_name = getattr(component, 'name', None) or f'{prefix}_{index}.{default_ext}'
+            safe_name = self._sanitize_attachment_name(raw_name, f'{prefix}_{index}.{default_ext}')
+            pending.append((safe_name, data))
+            descriptors.append(
+                {
+                    'name': safe_name,
+                    'type': kind,
+                    'size': len(data),
+                }
+            )
+
+        if not pending:
+            return []
+
+        target_dir = f'{self.INBOX_MOUNT_DIR}/{query.query_id}'
+        written = await self._write_files_into_sandbox(query, self.INBOX_SUBDIR, target_dir, pending)
+        written_basenames = {os.path.basename(p) for p in written}
+
+        result: list[dict] = []
+        for desc in descriptors:
+            if desc['name'] in written_basenames:
+                desc['path'] = f'{target_dir}/{desc["name"]}'
+                result.append(desc)
+        if result:
+            self.ap.logger.info(
+                f'Materialized {len(result)} inbound attachment(s) into sandbox: '
+                f'query_id={query.query_id} dir={target_dir}'
+            )
+        return result
+
+    async def collect_outbound_attachments(self, query: pipeline_query.Query) -> list[dict]:
+        """Collect files the agent produced in the sandbox outbox.
+
+        Reads ``/workspace/outbox/<query_id>/`` (recursively) — directly from
+        the bind-mounted host directory when available (no size limit), else
+        via the exec channel — returns a list of ``{type, name, base64}``
+        ready to become platform message components, then clears the outbox so
+        a later turn in the same session does not re-send stale files. Returns
+        ``[]`` when nothing was produced.
+        """
+        if not self._available:
+            return []
+
+        host_dir = self._host_query_dir(self.OUTBOX_SUBDIR, query.query_id)
+        if host_dir is not None:
+            entries = await asyncio.to_thread(self._read_outbox_host, host_dir)
+        else:
+            entries = await self._read_outbox_via_exec(query)
+
+        attachments = self._classify_outbound_entries(entries)
+
+        # Always clear the per-query outbox after reading — even when nothing
+        # was collected — so a later turn that reuses the same query_id (the
+        # counter resets across restarts) never inherits stale files.
+        await self._clear_outbox(query, host_dir)
+        if attachments:
+            self.ap.logger.info(
+                f'Collected {len(attachments)} outbound attachment(s) from sandbox: query_id={query.query_id}'
+            )
+        return attachments
+
+    def _read_outbox_host(self, host_dir: str) -> list[dict]:
+        """Read outbox files straight off the bind-mounted host directory."""
+        import base64 as _b64
+
+        entries: list[dict] = []
+        if not os.path.isdir(host_dir):
+            return entries
+        for root, _dirs, names in os.walk(host_dir):
+            for name in sorted(names):
+                path = os.path.join(root, name)
+                try:
+                    if os.path.getsize(path) > self._ATTACHMENT_MAX_BYTES:
+                        continue
+                    with open(path, 'rb') as fh:
+                        data = fh.read()
+                except OSError:
+                    continue
+                rel = os.path.relpath(path, host_dir)
+                entries.append({'name': rel, 'b64': _b64.b64encode(data).decode('ascii')})
+        return entries
+
+    async def _read_outbox_via_exec(self, query: pipeline_query.Query) -> list[dict]:
+        """Fallback: read the outbox over the exec channel (E2B / remote).
+
+        Note: exec stdout is truncated by ``output_limit_chars``, so this path
+        only reliably transfers small files. The host path is preferred.
+        """
+        import json as _json
+
+        target_dir = f'{self.OUTBOX_MOUNT_DIR}/{query.query_id}'
+        max_bytes = self._EXEC_FALLBACK_MAX_BYTES
+        script = (
+            'import base64, json, os\n'
+            f'target = {target_dir!r}\n'
+            f'max_bytes = {max_bytes}\n'
+            'out = []\n'
+            'if os.path.isdir(target):\n'
+            '    for root, _dirs, names in os.walk(target):\n'
+            '        for n in sorted(names):\n'
+            '            p = os.path.join(root, n)\n'
+            '            try:\n'
+            '                if os.path.getsize(p) > max_bytes:\n'
+            '                    continue\n'
+            "                with open(p, 'rb') as f:\n"
+            '                    data = f.read()\n'
+            '            except OSError:\n'
+            '                continue\n'
+            '            rel = os.path.relpath(p, target)\n'
+            "            out.append({'name': rel, 'b64': base64.b64encode(data).decode('ascii')})\n"
+            'print(json.dumps(out))\n'
+        )
+        result = await self.execute_tool(
+            {'command': f"python3 - <<'LBPY'\n{script}\nLBPY", 'timeout_sec': 120},
+            query,
+        )
+        if not result.get('ok'):
+            return []
+        try:
+            return _json.loads(str(result.get('stdout') or '').strip().splitlines()[-1])
+        except Exception:
+            return []
+
+    async def _clear_outbox(self, query: pipeline_query.Query, host_dir: str | None) -> None:
+        """Empty the per-query outbox after collection.
+
+        Tries a host-side ``rmtree`` first (fast, no container round-trip).
+        Outbox files are created by the sandbox container as root over the
+        bind-mount, so when LangBot runs as a non-root user the host delete
+        fails silently and the files survive — they would then be re-collected
+        on the next turn that reuses the same query_id. So if anything survives
+        the host delete, clear it from *inside* the sandbox via exec, where the
+        container's root can remove its own files. Best-effort: never raise
+        into the pipeline.
+        """
+        target_dir = f'{self.OUTBOX_MOUNT_DIR}/{query.query_id}'
+
+        if host_dir is not None:
+            import shutil
+
+            def _clear() -> bool:
+                shutil.rmtree(host_dir, ignore_errors=True)
+                survived = os.path.exists(host_dir) and bool(os.listdir(host_dir))
+                os.makedirs(host_dir, exist_ok=True)
+                return survived
+
+            survived = await asyncio.to_thread(_clear)
+            if not survived:
+                return
+            # Root-owned container files survived the host delete — fall through.
+
+        try:
+            await self.execute_tool(
+                {'command': f'rm -rf {target_dir} && mkdir -p {target_dir}', 'timeout_sec': 30},
+                query,
+            )
+        except Exception as exc:
+            self.ap.logger.warning(f'Failed to clear sandbox outbox {target_dir}: {exc}')
+
+    @staticmethod
+    def _classify_outbound_entries(entries: list[dict]) -> list[dict]:
+        """Classify outbox files into Image/Voice/File component descriptors."""
+        image_exts = {'png', 'jpg', 'jpeg', 'gif', 'webp', 'bmp'}
+        voice_exts = {'wav', 'mp3', 'silk', 'amr', 'ogg', 'm4a', 'aac'}
+        mime_by_ext = {
+            'png': 'image/png',
+            'jpg': 'image/jpeg',
+            'jpeg': 'image/jpeg',
+            'gif': 'image/gif',
+            'webp': 'image/webp',
+            'bmp': 'image/bmp',
+        }
+        attachments: list[dict] = []
+        for entry in entries or []:
+            name = str(entry.get('name', '') or '')
+            b64 = entry.get('b64')
+            if not name or not b64:
+                continue
+            ext = name.rsplit('.', 1)[-1].lower() if '.' in name else ''
+            base_name = os.path.basename(name)
+            if ext in image_exts:
+                mime = mime_by_ext.get(ext, 'image/png')
+                attachments.append({'type': 'Image', 'name': base_name, 'base64': f'data:{mime};base64,{b64}'})
+            elif ext in voice_exts:
+                attachments.append({'type': 'Voice', 'name': base_name, 'base64': f'data:audio/{ext};base64,{b64}'})
+            else:
+                attachments.append({'type': 'File', 'name': base_name, 'base64': b64})
+        return attachments
+
    async def shutdown(self):
        await self.client.shutdown()

@@ -785,6 +1289,7 @@ class BoxService:
    # ── Observability ─────────────────────────────────────────────────

    def _record_error(self, exc: Exception, query: pipeline_query.Query):
+        telemetry_features.increment(query, 'sandbox', 'errors')
        self._recent_errors.append(
            {
                'timestamp': _dt.datetime.now(_UTC).isoformat(),
@@ -797,11 +1302,19 @@ class BoxService:
    def get_recent_errors(self) -> list[dict]:
        return list(self._recent_errors)

-    def get_system_guidance(self) -> str:
+    def get_system_guidance(self, query_id=None) -> str:
        """Return LLM system-prompt guidance for the exec tool.

        All execution-specific prompt text is kept here so that callers
        (e.g. LocalAgentRunner) stay free of box domain knowledge.
+
+        ``query_id`` is the current turn's pipeline query id. When provided,
+        the guidance ALWAYS advertises the per-query outbox path so the agent
+        knows how to deliver generated files back to the user — even on turns
+        where the user sent no inbound attachment (e.g. "generate a QR code"),
+        which is exactly when the inbound-attachment note never fires. Outbound
+        collection in the wrapper runs on every turn regardless of inbound
+        files, so without this the file would be produced and silently dropped.
        """
        guidance = (
            'When the exec tool is available, use it for exact calculations, statistics, structured data parsing, '
@@ -816,6 +1329,13 @@ class BoxService:
                'modify local files in the working directory, use exec with /workspace paths directly; do not ask the '
                'user for directory parameters unless they explicitly need a different directory.'
            )
+        if query_id is not None:
+            outbox_dir = f'{self.OUTBOX_MOUNT_DIR}/{query_id}'
+            guidance += (
+                f' If you produce any file (image, audio, document, etc.) that should be sent back to the user, '
+                f'write it into {outbox_dir}/ (create the directory if needed). Every file placed there will be '
+                'delivered to the user automatically; do not paste file contents or base64 into your reply.'
+            )
        return guidance

    async def get_status(self) -> dict:
@@ -146,13 +146,19 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
        _LB_PIP_CACHE_DIR="{mount_path}/.cache/pip"

        mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
+        _LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"
+        if [ -z "$_LB_SYSTEM_PYTHON" ]; then
+          echo "python3 or python is required to prepare the workspace Python environment" >&2
+          exit 127
+        fi
+
        export TMPDIR="$_LB_TMP_DIR"
        export TEMP="$_LB_TMP_DIR"
        export TMP="$_LB_TMP_DIR"
        export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"

        _lb_python_meta() {{
-          python - <<'PY'
+          "$_LB_SYSTEM_PYTHON" - <<'PY'
        import hashlib
        import json
        import os
@@ -201,15 +207,26 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
          _LB_LOCK_WAIT=0
          while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
            if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
+              _LB_LOCK_OWNER="$(cat "$_LB_LOCK_DIR/pid" 2>/dev/null || true)"
+              if [ -n "$_LB_LOCK_OWNER" ] && kill -0 "$_LB_LOCK_OWNER" 2>/dev/null; then
+                echo "Timed out waiting for active Python environment lock: $_LB_LOCK_DIR" >&2
+                exit 1
+              fi
+              echo "Timed out waiting for Python environment lock, clearing stale lock: $_LB_LOCK_DIR" >&2
+              rm -rf "$_LB_LOCK_DIR" 2>/dev/null || true
+              if mkdir "$_LB_LOCK_DIR" 2>/dev/null; then
+                break
+              fi
              echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
              exit 1
            fi
            sleep 1
            _LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
          done
+          printf '%s\\n' "$$" > "$_LB_LOCK_DIR/pid" 2>/dev/null || true

          _lb_cleanup_lock() {{
-            rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
+            rm -rf "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
          }}
          trap _lb_cleanup_lock EXIT INT TERM

@@ -225,7 +242,7 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'

          if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
            rm -rf "$_LB_VENV_DIR"
-            python -m venv "$_LB_VENV_DIR"
+            "$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"
            . "$_LB_VENV_DIR/bin/activate"
            python -m pip install --upgrade pip setuptools wheel
            if [ -f "{mount_path}/requirements.txt" ]; then
@@ -200,6 +200,17 @@ class Application:
                scopes=[core_entities.LifecycleControlScope.APPLICATION],
            )

+            # Telemetry instance heartbeat (startup + daily); respects
+            # space.disable_telemetry via TelemetryManager.send().
+            if self.telemetry is not None:
+                from ..telemetry import heartbeat as telemetry_heartbeat
+
+                self.task_mgr.create_task(
+                    telemetry_heartbeat.heartbeat_loop(self),
+                    name='telemetry-heartbeat',
+                    scopes=[core_entities.LifecycleControlScope.APPLICATION],
+                )
+
            # Start monitoring data cleanup task if enabled
            monitoring_cfg = self.instance_config.data.get('monitoring', {})
            auto_cleanup_cfg = monitoring_cfg.get('auto_cleanup', {})
@@ -16,7 +16,6 @@ importutil.import_modules_in_pkg(stages)

 stage_order = [
    'LoadConfigStage',
-    'MigrationStage',
    'GenKeysStage',
    'SetupLoggerStage',
    'BuildAppStage',
@@ -42,6 +42,7 @@ required_deps = {
    'telegramify_markdown': 'telegramify-markdown',
    'slack_sdk': 'slack_sdk',
    'asyncpg': 'asyncpg',
+    'litellm': 'litellm',
 }


@@ -1,45 +0,0 @@
-from __future__ import annotations
-
-import abc
-import typing
-
-from . import app
-
-
-preregistered_migrations: list[typing.Type[Migration]] = []
-"""Currently not supported for extension"""
-
-
-def migration_class(name: str, number: int):
-    """Register a migration"""
-
-    def decorator(cls: typing.Type[Migration]) -> typing.Type[Migration]:
-        cls.name = name
-        cls.number = number
-        preregistered_migrations.append(cls)
-        return cls
-
-    return decorator
-
-
-class Migration(abc.ABC):
-    """A version migration"""
-
-    name: str
-
-    number: int
-
-    ap: app.Application
-
-    def __init__(self, ap: app.Application):
-        self.ap = ap
-
-    @abc.abstractmethod
-    async def need_migrate(self) -> bool:
-        """Determine if the current environment needs to run this migration"""
-        pass
-
-    @abc.abstractmethod
-    async def run(self):
-        """Run migration"""
-        pass
@@ -1,24 +0,0 @@
-from __future__ import annotations
-
-import os
-
-from .. import migration
-
-
-@migration.migration_class('sensitive-word-migration', 1)
-class SensitiveWordMigration(migration.Migration):
-    """敏感词迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return os.path.exists('data/config/sensitive-words.json') and not os.path.exists(
-            'data/metadata/sensitive-words.json'
-        )
-
-    async def run(self):
-        """执行迁移"""
-        # 移动文件
-        os.rename('data/config/sensitive-words.json', 'data/metadata/sensitive-words.json')
-
-        # 重新加载配置
-        await self.ap.sensitive_meta.load_config()
@@ -1,44 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('openai-config-migration', 2)
-class OpenAIConfigMigration(migration.Migration):
-    """OpenAI配置迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'openai-config' in self.ap.provider_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-        old_openai_config = self.ap.provider_cfg.data['openai-config'].copy()
-
-        if 'keys' not in self.ap.provider_cfg.data:
-            self.ap.provider_cfg.data['keys'] = {}
-
-        if 'openai' not in self.ap.provider_cfg.data['keys']:
-            self.ap.provider_cfg.data['keys']['openai'] = []
-
-        self.ap.provider_cfg.data['keys']['openai'] = old_openai_config['api-keys']
-
-        self.ap.provider_cfg.data['model'] = old_openai_config['chat-completions-params']['model']
-
-        del old_openai_config['chat-completions-params']['model']
-
-        if 'requester' not in self.ap.provider_cfg.data:
-            self.ap.provider_cfg.data['requester'] = {}
-
-        if 'openai-chat-completions' not in self.ap.provider_cfg.data['requester']:
-            self.ap.provider_cfg.data['requester']['openai-chat-completions'] = {}
-
-        self.ap.provider_cfg.data['requester']['openai-chat-completions'] = {
-            'base-url': old_openai_config['base_url'],
-            'args': old_openai_config['chat-completions-params'],
-            'timeout': old_openai_config['request-timeout'],
-        }
-
-        del self.ap.provider_cfg.data['openai-config']
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,29 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('anthropic-requester-config-completion', 3)
-class AnthropicRequesterConfigCompletionMigration(migration.Migration):
-    """OpenAI配置迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return (
-            'anthropic-messages' not in self.ap.provider_cfg.data['requester']
-            or 'anthropic' not in self.ap.provider_cfg.data['keys']
-        )
-
-    async def run(self):
-        """执行迁移"""
-        if 'anthropic-messages' not in self.ap.provider_cfg.data['requester']:
-            self.ap.provider_cfg.data['requester']['anthropic-messages'] = {
-                'base-url': 'https://api.anthropic.com',
-                'args': {'max_tokens': 1024},
-                'timeout': 120,
-            }
-
-        if 'anthropic' not in self.ap.provider_cfg.data['keys']:
-            self.ap.provider_cfg.data['keys']['anthropic'] = []
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,29 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('moonshot-config-completion', 4)
-class MoonshotConfigCompletionMigration(migration.Migration):
-    """OpenAI配置迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return (
-            'moonshot-chat-completions' not in self.ap.provider_cfg.data['requester']
-            or 'moonshot' not in self.ap.provider_cfg.data['keys']
-        )
-
-    async def run(self):
-        """执行迁移"""
-        if 'moonshot-chat-completions' not in self.ap.provider_cfg.data['requester']:
-            self.ap.provider_cfg.data['requester']['moonshot-chat-completions'] = {
-                'base-url': 'https://api.moonshot.cn/v1',
-                'args': {},
-                'timeout': 120,
-            }
-
-        if 'moonshot' not in self.ap.provider_cfg.data['keys']:
-            self.ap.provider_cfg.data['keys']['moonshot'] = []
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,29 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('deepseek-config-completion', 5)
-class DeepseekConfigCompletionMigration(migration.Migration):
-    """OpenAI配置迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return (
-            'deepseek-chat-completions' not in self.ap.provider_cfg.data['requester']
-            or 'deepseek' not in self.ap.provider_cfg.data['keys']
-        )
-
-    async def run(self):
-        """执行迁移"""
-        if 'deepseek-chat-completions' not in self.ap.provider_cfg.data['requester']:
-            self.ap.provider_cfg.data['requester']['deepseek-chat-completions'] = {
-                'base-url': 'https://api.deepseek.com',
-                'args': {},
-                'timeout': 120,
-            }
-
-        if 'deepseek' not in self.ap.provider_cfg.data['keys']:
-            self.ap.provider_cfg.data['keys']['deepseek'] = []
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,19 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('vision-config', 6)
-class VisionConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'enable-vision' not in self.ap.provider_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-        if 'enable-vision' not in self.ap.provider_cfg.data:
-            self.ap.provider_cfg.data['enable-vision'] = False
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,20 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('qcg-center-url-config', 7)
-class QCGCenterURLConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'qcg-center-url' not in self.ap.system_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-
-        if 'qcg-center-url' not in self.ap.system_cfg.data:
-            self.ap.system_cfg.data['qcg-center-url'] = 'https://api.qchatgpt.rockchin.top/api/v2'
-
-        await self.ap.system_cfg.dump_config()
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('ad-fixwin-cfg-migration', 8)
-class AdFixwinConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return isinstance(self.ap.pipeline_cfg.data['rate-limit']['fixwin']['default'], int)
-
-    async def run(self):
-        """执行迁移"""
-
-        for session_name in self.ap.pipeline_cfg.data['rate-limit']['fixwin']:
-            temp_dict = {
-                'window-size': 60,
-                'limit': self.ap.pipeline_cfg.data['rate-limit']['fixwin'][session_name],
-            }
-
-            self.ap.pipeline_cfg.data['rate-limit']['fixwin'][session_name] = temp_dict
-
-        await self.ap.pipeline_cfg.dump_config()
@@ -1,22 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('msg-truncator-cfg-migration', 9)
-class MsgTruncatorConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'msg-truncate' not in self.ap.pipeline_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-
-        self.ap.pipeline_cfg.data['msg-truncate'] = {
-            'method': 'round',
-            'round': {'max-round': 10},
-        }
-
-        await self.ap.pipeline_cfg.dump_config()
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('ollama-requester-config', 10)
-class MsgTruncatorConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'ollama-chat' not in self.ap.provider_cfg.data['requester']
-
-    async def run(self):
-        """执行迁移"""
-
-        self.ap.provider_cfg.data['requester']['ollama-chat'] = {
-            'base-url': 'http://127.0.0.1:11434',
-            'args': {},
-            'timeout': 600,
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,19 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('command-prefix-config', 11)
-class CommandPrefixConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'command-prefix' not in self.ap.command_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-
-        self.ap.command_cfg.data['command-prefix'] = ['!', '！']
-
-        await self.ap.command_cfg.dump_config()
@@ -1,19 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('runner-config', 12)
-class RunnerConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'runner' not in self.ap.provider_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-
-        self.ap.provider_cfg.data['runner'] = 'local-agent'
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,29 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('http-api-config', 13)
-class HttpApiConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'http-api' not in self.ap.system_cfg.data or 'persistence' not in self.ap.system_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-
-        self.ap.system_cfg.data['http-api'] = {
-            'enable': True,
-            'host': '0.0.0.0',
-            'port': 5300,
-            'jwt-expire': 604800,
-        }
-
-        self.ap.system_cfg.data['persistence'] = {
-            'sqlite': {'path': 'data/persistence.db'},
-            'use': 'sqlite',
-        }
-
-        await self.ap.system_cfg.dump_config()
@@ -1,22 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('force-delay-config', 14)
-class ForceDelayConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return isinstance(self.ap.platform_cfg.data['force-delay'], list)
-
-    async def run(self):
-        """执行迁移"""
-
-        self.ap.platform_cfg.data['force-delay'] = {
-            'min': self.ap.platform_cfg.data['force-delay'][0],
-            'max': self.ap.platform_cfg.data['force-delay'][1],
-        }
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,27 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('gitee-ai-config', 15)
-class GiteeAIConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return (
-            'gitee-ai-chat-completions' not in self.ap.provider_cfg.data['requester']
-            or 'gitee-ai' not in self.ap.provider_cfg.data['keys']
-        )
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['requester']['gitee-ai-chat-completions'] = {
-            'base-url': 'https://ai.gitee.com/v1',
-            'args': {},
-            'timeout': 120,
-        }
-
-        self.ap.provider_cfg.data['keys']['gitee-ai'] = ['XXXXX']
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('dify-service-api-config', 16)
-class DifyServiceAPICfgMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'dify-service-api' not in self.ap.provider_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['dify-service-api'] = {
-            'base-url': 'https://api.dify.ai/v1',
-            'app-type': 'chat',
-            'chat': {'api-key': 'app-1234567890'},
-            'workflow': {'api-key': 'app-1234567890', 'output-key': 'summary'},
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,27 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('dify-api-timeout-params', 17)
-class DifyAPITimeoutParamsMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return (
-            'timeout' not in self.ap.provider_cfg.data['dify-service-api']['chat']
-            or 'timeout' not in self.ap.provider_cfg.data['dify-service-api']['workflow']
-            or 'agent' not in self.ap.provider_cfg.data['dify-service-api']
-        )
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['dify-service-api']['chat']['timeout'] = 120
-        self.ap.provider_cfg.data['dify-service-api']['workflow']['timeout'] = 120
-        self.ap.provider_cfg.data['dify-service-api']['agent'] = {
-            'api-key': 'app-1234567890',
-            'timeout': 120,
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('xai-config', 18)
-class XaiConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'xai-chat-completions' not in self.ap.provider_cfg.data['requester']
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['requester']['xai-chat-completions'] = {
-            'base-url': 'https://api.x.ai/v1',
-            'args': {},
-            'timeout': 120,
-        }
-        self.ap.provider_cfg.data['keys']['xai'] = ['xai-1234567890']
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('zhipuai-config', 19)
-class ZhipuaiConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'zhipuai-chat-completions' not in self.ap.provider_cfg.data['requester']
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['requester']['zhipuai-chat-completions'] = {
-            'base-url': 'https://open.bigmodel.cn/api/paas/v4',
-            'args': {},
-            'timeout': 120,
-        }
-        self.ap.provider_cfg.data['keys']['zhipuai'] = ['xxxxxxx']
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,36 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('wecom-config', 20)
-class WecomConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        # for adapter in self.ap.platform_cfg.data['platform-adapters']:
-        #     if adapter['adapter'] == 'wecom':
-        #         return False
-
-        # return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.platform_cfg.data['platform-adapters'].append(
-            {
-                'adapter': 'wecom',
-                'enable': False,
-                'host': '0.0.0.0',
-                'port': 2290,
-                'corpid': '',
-                'secret': '',
-                'token': '',
-                'EncodingAESKey': '',
-                'contacts_secret': '',
-            }
-        )
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,35 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('lark-config', 21)
-class LarkConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        # for adapter in self.ap.platform_cfg.data['platform-adapters']:
-        #     if adapter['adapter'] == 'lark':
-        #         return False
-
-        # return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.platform_cfg.data['platform-adapters'].append(
-            {
-                'adapter': 'lark',
-                'enable': False,
-                'app_id': 'cli_abcdefgh',
-                'app_secret': 'XXXXXXXXXX',
-                'bot_name': 'LangBot',
-                'enable-webhook': False,
-                'port': 2285,
-                'encrypt-key': 'xxxxxxxxx',
-            }
-        )
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('lmstudio-config', 22)
-class LmStudioConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        return 'lmstudio-chat-completions' not in self.ap.provider_cfg.data['requester']
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['requester']['lmstudio-chat-completions'] = {
-            'base-url': 'http://127.0.0.1:1234/v1',
-            'args': {},
-            'timeout': 120,
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('siliconflow-config', 23)
-class SiliconFlowConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        return 'siliconflow-chat-completions' not in self.ap.provider_cfg.data['requester']
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['keys']['siliconflow'] = ['xxxxxxx']
-
-        self.ap.provider_cfg.data['requester']['siliconflow-chat-completions'] = {
-            'base-url': 'https://api.siliconflow.cn/v1',
-            'args': {},
-            'timeout': 120,
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,31 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('discord-config', 24)
-class DiscordConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        # for adapter in self.ap.platform_cfg.data['platform-adapters']:
-        #     if adapter['adapter'] == 'discord':
-        #         return False
-
-        # return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.platform_cfg.data['platform-adapters'].append(
-            {
-                'adapter': 'discord',
-                'enable': False,
-                'client_id': '1234567890',
-                'token': 'XXXXXXXXXX',
-            }
-        )
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,35 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('gewechat-config', 25)
-class GewechatConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        # for adapter in self.ap.platform_cfg.data['platform-adapters']:
-        #     if adapter['adapter'] == 'gewechat':
-        #         return False
-
-        # return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.platform_cfg.data['platform-adapters'].append(
-            {
-                'adapter': 'gewechat',
-                'enable': False,
-                'gewechat_url': 'http://your-gewechat-server:2531',
-                'gewechat_file_url': 'http://your-gewechat-server:2532',
-                'port': 2286,
-                'callback_url': 'http://your-callback-url:2286/gewechat/callback',
-                'app_id': '',
-                'token': '',
-            }
-        )
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,33 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('qqofficial-config', 26)
-class QQOfficialConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        # for adapter in self.ap.platform_cfg.data['platform-adapters']:
-        #     if adapter['adapter'] == 'qqofficial':
-        #         return False
-
-        # return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.platform_cfg.data['platform-adapters'].append(
-            {
-                'adapter': 'qqofficial',
-                'enable': False,
-                'appid': '',
-                'secret': '',
-                'port': 2284,
-                'token': '',
-            }
-        )
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,35 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('wx-official-account-config', 27)
-class WXOfficialAccountConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        # for adapter in self.ap.platform_cfg.data['platform-adapters']:
-        #     if adapter['adapter'] == 'officialaccount':
-        #         return False
-
-        # return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.platform_cfg.data['platform-adapters'].append(
-            {
-                'adapter': 'officialaccount',
-                'enable': False,
-                'token': '',
-                'EncodingAESKey': '',
-                'AppID': '',
-                'AppSecret': '',
-                'host': '0.0.0.0',
-                'port': 2287,
-            }
-        )
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('bailian-requester-config', 28)
-class BailianRequesterConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        return 'bailian-chat-completions' not in self.ap.provider_cfg.data['requester']
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['keys']['bailian'] = ['sk-xxxxxxx']
-
-        self.ap.provider_cfg.data['requester']['bailian-chat-completions'] = {
-            'base-url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
-            'args': {},
-            'timeout': 120,
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,27 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('dashscope-app-api-config', 29)
-class DashscopeAppAPICfgMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'dashscope-app-api' not in self.ap.provider_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['dashscope-app-api'] = {
-            'app-type': 'agent',
-            'api-key': 'sk-1234567890',
-            'agent': {'app-id': 'Your_app_id', 'references_quote': '参考资料来自:'},
-            'workflow': {
-                'app-id': 'Your_app_id',
-                'references_quote': '参考资料来自:',
-                'biz_params': {'city': '北京', 'date': '2023-08-10'},
-            },
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,31 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('lark-config-cmpl', 30)
-class LarkConfigCmplMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] == 'lark':
-                if 'enable-webhook' not in adapter:
-                    return True
-
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] == 'lark':
-                if 'enable-webhook' not in adapter:
-                    adapter['enable-webhook'] = False
-                if 'port' not in adapter:
-                    adapter['port'] = 2285
-                if 'encrypt-key' not in adapter:
-                    adapter['encrypt-key'] = 'xxxxxxxxx'
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,33 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('dingtalk-config', 31)
-class DingTalkConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        # for adapter in self.ap.platform_cfg.data['platform-adapters']:
-        #     if adapter['adapter'] == 'dingtalk':
-        #         return False
-
-        # return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.platform_cfg.data['platform-adapters'].append(
-            {
-                'adapter': 'dingtalk',
-                'enable': False,
-                'client_id': '',
-                'client_secret': '',
-                'robot_code': '',
-                'robot_name': '',
-            }
-        )
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('volcark-requester-config', 32)
-class VolcArkRequesterConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        return 'volcark-chat-completions' not in self.ap.provider_cfg.data['requester']
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['keys']['volcark'] = ['xxxxxxxx']
-
-        self.ap.provider_cfg.data['requester']['volcark-chat-completions'] = {
-            'base-url': 'https://ark.cn-beijing.volces.com/api/v3',
-            'args': {},
-            'timeout': 120,
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,24 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('dify-thinking-config', 33)
-class DifyThinkingConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        if 'options' not in self.ap.provider_cfg.data['dify-service-api']:
-            return True
-
-        if 'convert-thinking-tips' not in self.ap.provider_cfg.data['dify-service-api']['options']:
-            return True
-
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['dify-service-api']['options'] = {'convert-thinking-tips': 'plain'}
-        await self.ap.provider_cfg.dump_config()
@@ -1,29 +0,0 @@
-from __future__ import annotations
-
-from urllib.parse import urlparse
-
-from .. import migration
-
-
-@migration.migration_class('gewechat-file-url-config', 34)
-class GewechatFileUrlConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] == 'gewechat':
-                if 'gewechat_file_url' not in adapter:
-                    return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] == 'gewechat':
-                if 'gewechat_file_url' not in adapter:
-                    parsed_url = urlparse(adapter['gewechat_url'])
-                    adapter['gewechat_file_url'] = f'{parsed_url.scheme}://{parsed_url.hostname}:2532'
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,26 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('wxoa-mode', 35)
-class WxoaModeMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] == 'officialaccount':
-                if 'Mode' not in adapter:
-                    return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] == 'officialaccount':
-                if 'Mode' not in adapter:
-                    adapter['Mode'] = 'drop'
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,26 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('wxoa-loading-message', 36)
-class WxoaLoadingMessageMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] == 'officialaccount':
-                if 'LoadingMessage' not in adapter:
-                    return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] == 'officialaccount':
-                if 'LoadingMessage' not in adapter:
-                    adapter['LoadingMessage'] = 'AI正在思考中，请发送任意内容获取回复。'
-
-        await self.ap.platform_cfg.dump_config()
@@ -1,18 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('mcp-config', 37)
-class MCPConfigMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'mcp' not in self.ap.provider_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['mcp'] = {'servers': []}
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('tg-dingtalk-markdown', 38)
-class TgDingtalkMarkdownMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] in ['dingtalk', 'telegram']:
-                if 'markdown_card' not in adapter:
-                    return True
-        return False
-
-    async def run(self):
-        """执行迁移"""
-        for adapter in self.ap.platform_cfg.data['platform-adapters']:
-            if adapter['adapter'] in ['dingtalk', 'telegram']:
-                if 'markdown_card' not in adapter:
-                    adapter['markdown_card'] = False
-        await self.ap.platform_cfg.dump_config()
@@ -1,29 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('modelscope-config-completion', 39)
-class ModelScopeConfigCompletionMigration(migration.Migration):
-    """ModelScope配置迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return (
-            'modelscope-chat-completions' not in self.ap.provider_cfg.data['requester']
-            or 'modelscope' not in self.ap.provider_cfg.data['keys']
-        )
-
-    async def run(self):
-        """执行迁移"""
-        if 'modelscope-chat-completions' not in self.ap.provider_cfg.data['requester']:
-            self.ap.provider_cfg.data['requester']['modelscope-chat-completions'] = {
-                'base-url': 'https://api-inference.modelscope.cn/v1',
-                'args': {},
-                'timeout': 120,
-            }
-
-        if 'modelscope' not in self.ap.provider_cfg.data['keys']:
-            self.ap.provider_cfg.data['keys']['modelscope'] = []
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,29 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('ppio-config', 40)
-class PPIOConfigMigration(migration.Migration):
-    """PPIO配置迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return (
-            'ppio-chat-completions' not in self.ap.provider_cfg.data['requester']
-            or 'ppio' not in self.ap.provider_cfg.data['keys']
-        )
-
-    async def run(self):
-        """执行迁移"""
-        if 'ppio-chat-completions' not in self.ap.provider_cfg.data['requester']:
-            self.ap.provider_cfg.data['requester']['ppio-chat-completions'] = {
-                'base-url': 'https://api.ppinfra.com/v3/openai',
-                'args': {},
-                'timeout': 120,
-            }
-
-        if 'ppio' not in self.ap.provider_cfg.data['keys']:
-            self.ap.provider_cfg.data['keys']['ppio'] = []
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('dingtalk_card_auto_layout', 41)
-class DingTalkCardAutoLayoutMigration(migration.Migration):
-    """迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return True
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.platform_cfg.data['platform-adapters']['app']['dingtalk']['card_auto_layout'] = False
-        await self.ap.platform_cfg.dump_config()
@@ -1,27 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('weknora-api-config', 42)
-class WeKnoraAPICfgMigration(migration.Migration):
-    """WeKnora API 配置迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'weknora-api' not in self.ap.provider_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['weknora-api'] = {
-            'base-url': 'http://localhost:8080/api/v1',
-            'app-type': 'agent',
-            'api-key': '',
-            'agent-id': 'builtin-smart-reasoning',
-            'knowledge-base-ids': [],
-            'web-search-enabled': False,
-            'timeout': 120,
-            'base-prompt': '请回答用户的问题。',
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -1,30 +0,0 @@
-from __future__ import annotations
-
-from .. import migration
-
-
-@migration.migration_class('deerflow-api-config', 43)
-class DeerFlowAPICfgMigration(migration.Migration):
-    """DeerFlow API 配置迁移"""
-
-    async def need_migrate(self) -> bool:
-        """判断当前环境是否需要运行此迁移"""
-        return 'deerflow-api' not in self.ap.provider_cfg.data
-
-    async def run(self):
-        """执行迁移"""
-        self.ap.provider_cfg.data['deerflow-api'] = {
-            'api-base': 'http://127.0.0.1:2026',
-            'api-key': '',
-            'auth-header': '',
-            'assistant-id': 'lead_agent',
-            'model-name': '',
-            'thinking-enabled': False,
-            'plan-mode': False,
-            'subagent-enabled': False,
-            'max-concurrent-subagents': 3,
-            'timeout': 300,
-            'recursion-limit': 1000,
-        }
-
-        await self.ap.provider_cfg.dump_config()
@@ -202,6 +202,16 @@ class LoadConfigStage(stage.BootingStage):
                constants.instance_id = new_id
        constants.edition = ap.instance_config.data.get('system', {}).get('edition', 'community')

+        # Instance creation timestamp: sourced from data/labels/instance_id.json.
+        # Instances created before this field existed (or supplied via
+        # system.instance_id) won't have it, so backfill with the current time
+        # and persist it via the dump below — from then on it stays stable.
+        instance_create_ts = ap.instance_id.data.get('instance_create_ts', 0)
+        if not isinstance(instance_create_ts, int) or instance_create_ts <= 0:
+            instance_create_ts = int(time.time())
+            ap.instance_id.data['instance_create_ts'] = instance_create_ts
+        constants.instance_create_ts = instance_create_ts
+
        print(f'LangBot instance id: {constants.instance_id}')
        print(f'LangBot edition: {constants.edition}')

@@ -1,43 +0,0 @@
-from __future__ import annotations
-
-
-from .. import stage, app
-from .. import migration
-from ...utils import importutil
-from .. import migrations
-
-importutil.import_modules_in_pkg(migrations)
-
-
-@stage.stage_class('MigrationStage')
-class MigrationStage(stage.BootingStage):
-    """Migration stage
-
-    These migrations are legacy, only performed in version 3.x
-    """
-
-    async def run(self, ap: app.Application):
-        """Run migration"""
-
-        if any(
-            [
-                ap.command_cfg is None,
-                ap.pipeline_cfg is None,
-                ap.platform_cfg is None,
-                ap.provider_cfg is None,
-                ap.system_cfg is None,
-            ]
-        ):  # only run migration when version is 3.x
-            return
-
-        migrations = migration.preregistered_migrations
-
-        # Sort by migration number
-        migrations.sort(key=lambda x: x.number)
-
-        for migration_cls in migrations:
-            migration_instance = migration_cls(ap)
-
-            if await migration_instance.need_migrate():
-                await migration_instance.run()
-                print(f'Migration {migration_instance.name} executed')
@@ -31,6 +31,7 @@ class LLMModel(Base):
    name = sqlalchemy.Column(sqlalchemy.String(255), nullable=False)
    provider_uuid = sqlalchemy.Column(sqlalchemy.String(255), nullable=False)
    abilities = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default=[])
+    context_length = sqlalchemy.Column(sqlalchemy.Integer, nullable=True)
    extra_args = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default={})
    prefered_ranking = sqlalchemy.Column(sqlalchemy.Integer, nullable=False, default=0)
    created_at = sqlalchemy.Column(sqlalchemy.DateTime, nullable=False, server_default=sqlalchemy.func.now())
@@ -0,0 +1,39 @@
+"""add llm model context length
+
+Revision ID: 0005_add_llm_context_length
+Revises: 0004_add_mcp_readme
+Create Date: 2026-06-07
+"""
+
+import sqlalchemy as sa
+from alembic import op
+
+revision = '0005_add_llm_context_length'
+down_revision = '0004_add_mcp_readme'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add ``context_length`` to llm_models if the table exists and the column is
+    # missing. The table may have been created by create_all() with the column
+    # already present on fresh installs, so guard against duplicate-add; it may
+    # also be absent entirely (e.g. migrating a truly empty DB), so guard against
+    # a missing table too.
+    conn = op.get_bind()
+    inspector = sa.inspect(conn)
+    if 'llm_models' not in inspector.get_table_names():
+        return
+    columns = {column['name'] for column in inspector.get_columns('llm_models')}
+    if 'context_length' not in columns:
+        op.add_column('llm_models', sa.Column('context_length', sa.Integer(), nullable=True))
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+    inspector = sa.inspect(conn)
+    if 'llm_models' not in inspector.get_table_names():
+        return
+    columns = {column['name'] for column in inspector.get_columns('llm_models')}
+    if 'context_length' in columns:
+        op.drop_column('llm_models', 'context_length')
@@ -0,0 +1,36 @@
+# Legacy migrations (DEPRECATED — do not add new files here)
+
+This directory holds the **legacy 3.x database migration system**
+(`DBMigration` subclasses in `dbmXXX_*.py`, registered via
+`@migration.migration_class(N)` and run from `pkg/persistence/mgr.py`).
+
+**This system is frozen. Do not add new `dbmXXX_*.py` migrations.**
+
+The chain is capped at version 25 (`required_database_version = 25` in
+`pkg/utils/constants.py`). These files exist only to upgrade pre-existing
+3.x databases up to the Alembic baseline (`0001_baseline`). Removing them
+would break in-place upgrades from old installations, so they are kept
+read-only.
+
+## All new schema changes use Alembic
+
+Migrations now live in `pkg/persistence/alembic/versions/`. To create one:
+
+```bash
+uv run python -m langbot.pkg.persistence.alembic_runner autogenerate "description of your change"
+```
+
+(requires `data/config.yaml` to exist). Review and edit the generated
+script before committing — Alembic migrations run automatically on startup
+and must be idempotent and guard against missing tables (the test suite
+runs them against empty databases).
+
+### Rules for Alembic revision ids
+
+- Keep the revision id **≤ 32 characters** — PostgreSQL stores
+  `alembic_version.version_num` as `varchar(32)` and will raise
+  `StringDataRightTruncationError` on overflow.
+- Guard every `op` call against a missing table / missing column
+  (`inspector.get_table_names()` / `inspector.get_columns()`); fresh
+  installs create the schema via `create_all()` and stamp the baseline,
+  so migrations may run against tables that already match or do not exist.
@@ -109,7 +109,7 @@ class PreProcessor(stage.PipelineStage):
            if llm_model:
                query.use_llm_model_uuid = llm_model.model_entity.uuid

-                if llm_model.model_entity.abilities.__contains__('func_call'):
+                if 'func_call' in (llm_model.model_entity.abilities or []):
                    # Get bound plugins and MCP servers for filtering tools
                    bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
                    bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
@@ -159,11 +159,7 @@ class PreProcessor(stage.PipelineStage):

        # Check if this model supports vision, if not, remove all images
        # TODO this checking should be performed in runner, and in this stage, the image should be reserved
-        if (
-            selected_runner == 'local-agent'
-            and llm_model
-            and not llm_model.model_entity.abilities.__contains__('vision')
-        ):
+        if selected_runner == 'local-agent' and llm_model and 'vision' not in (llm_model.model_entity.abilities or []):
            for msg in query.messages:
                if isinstance(msg.content, list):
                    for me in msg.content:
@@ -181,7 +177,7 @@ class PreProcessor(stage.PipelineStage):
                plain_text += me.text
            elif isinstance(me, platform_message.Image):
                if selected_runner != 'local-agent' or (
-                    llm_model and llm_model.model_entity.abilities.__contains__('vision')
+                    llm_model and 'vision' in (llm_model.model_entity.abilities or [])
                ):
                    if me.base64 is not None:
                        content_list.append(provider_message.ContentElement.from_image_base64(me.base64))
@@ -202,7 +198,7 @@ class PreProcessor(stage.PipelineStage):
                        content_list.append(provider_message.ContentElement.from_text(msg.text))
                    elif isinstance(msg, platform_message.Image):
                        if selected_runner != 'local-agent' or (
-                            llm_model and llm_model.model_entity.abilities.__contains__('vision')
+                            llm_model and 'vision' in (llm_model.model_entity.abilities or [])
                        ):
                            if msg.base64 is not None:
                                content_list.append(provider_message.ContentElement.from_image_base64(msg.base64))
@@ -13,6 +13,7 @@ from ....provider import runner as runner_module

 import langbot_plugin.api.entities.events as events
 from ....utils import importutil, constants, runner as runner_utils
+from ....telemetry import features as telemetry_features
 from ....provider import runners
 import langbot_plugin.api.entities.builtin.provider.session as provider_session
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
@@ -201,7 +202,12 @@ class ChatMessageHandler(handler.MessageHandler):
                        runner_name, runner, query.pipeline_config
                    )

+                    # Feature usage collected during query processing (tool calls,
+                    # knowledge base usage, sandbox executions, activated skills, ...)
+                    features = telemetry_features.collect_features(query)
+
                    payload = {
+                        'event_type': 'query',
                        'query_id': query.query_id,
                        'adapter': adapter_name,
                        'runner': runner_name,
@@ -212,6 +218,7 @@ class ChatMessageHandler(handler.MessageHandler):
                        'instance_id': constants.instance_id,
                        'edition': constants.edition,
                        'pipeline_plugins': pipeline_plugins,
+                        'features': features,
                        'error': locals().get('error_info', None),
                        'timestamp': datetime.utcnow().isoformat(),
                    }
@@ -219,10 +226,12 @@ class ChatMessageHandler(handler.MessageHandler):
                    # Send telemetry asynchronously and do not block pipeline via app's telemetry manager
                    await self.ap.telemetry.start_send_task(payload)

-                    # Trigger survey event on first successful non-WebSocket response
+                    # Trigger survey events on successful non-WebSocket responses
                    if not locals().get('error_info') and adapter_name and 'WebSocket' not in adapter_name:
                        if self.ap.survey:
                            await self.ap.survey.trigger_event('first_bot_response_success')
+                            # Counts toward the bot_response_success_100 milestone event
+                            await self.ap.survey.record_bot_response_success()
                except Exception as ex:
                    # Ensure telemetry issues do not affect normal flow
                    self.ap.logger.warning(f'Failed to send telemetry: {ex}')
@@ -7,6 +7,7 @@ from .. import stage

 import langbot_plugin.api.entities.builtin.platform.message as platform_message
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
 import langbot_plugin.api.entities.events as events


@@ -23,6 +24,50 @@ class ResponseWrapper(stage.PipelineStage):
    async def initialize(self, pipeline_config: dict):
        pass

+    def _is_final_assistant_message(self, result) -> bool:
+        """Whether *result* is the agent's final, tool-call-free answer.
+
+        Intermediate streaming chunks and tool-call rounds must NOT trigger
+        outbound attachment collection — only the terminal assistant message.
+        """
+        if getattr(result, 'role', None) != 'assistant':
+            return False
+        if result.tool_calls:
+            return False
+        if isinstance(result, provider_message.MessageChunk):
+            return bool(result.is_final)
+        return True
+
+    async def _append_outbound_attachments(
+        self,
+        query: pipeline_query.Query,
+        message_chain: platform_message.MessageChain,
+    ) -> None:
+        """Collect sandbox outbox files and append them to *message_chain*.
+
+        Runs at most once per query (guarded by a query variable) and never
+        raises into the pipeline — attachment delivery is best-effort.
+        """
+        if query.variables.get('_sandbox_outbound_collected'):
+            return
+        box_service = getattr(self.ap, 'box_service', None)
+        if box_service is None or not getattr(box_service, 'available', False):
+            return
+        query.variables['_sandbox_outbound_collected'] = True
+        try:
+            attachments = await box_service.collect_outbound_attachments(query)
+        except Exception as e:
+            self.ap.logger.warning(f'Outbound attachment collection failed: {e}')
+            return
+        for att in attachments:
+            att_type = att.get('type')
+            if att_type == 'Image':
+                message_chain.append(platform_message.Image(base64=att['base64']))
+            elif att_type == 'Voice':
+                message_chain.append(platform_message.Voice(base64=att['base64']))
+            else:
+                message_chain.append(platform_message.File(name=att.get('name', 'file'), base64=att['base64']))
+
    async def process(
        self,
        query: pipeline_query.Query,
@@ -83,10 +128,16 @@ class ResponseWrapper(stage.PipelineStage):
                            )
                        else:
                            if event_ctx.event.reply_message_chain is not None:
-                                query.resp_message_chain.append(event_ctx.event.reply_message_chain)
-
+                                reply_chain = event_ctx.event.reply_message_chain
                            else:
-                                query.resp_message_chain.append(result.get_content_platform_message_chain())
+                                reply_chain = result.get_content_platform_message_chain()
+
+                            # Attach files the agent produced in the sandbox
+                            # outbox, but only on the terminal assistant message.
+                            if self._is_final_assistant_message(result):
+                                await self._append_outbound_attachments(query, reply_chain)
+
+                            query.resp_message_chain.append(reply_chain)

                            yield entities.StageProcessResult(
                                result_type=entities.ResultType.CONTINUE,
@@ -84,6 +84,18 @@ class WebPageBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter
    ):
        self.listeners.pop(event_type, None)

+    async def is_stream_output_supported(self) -> bool:
+        """Delegate stream output check to ws_adapter."""
+        if self._ws_adapter is not None:
+            return await self._ws_adapter.is_stream_output_supported()
+        return False
+
+    async def create_message_card(self, message_id: str | int, event: platform_events.MessageEvent) -> bool:
+        """Delegate create_message_card to ws_adapter."""
+        if self._ws_adapter is not None:
+            return await self._ws_adapter.create_message_card(message_id, event)
+        return False
+
    async def is_muted(self, group_id: int) -> bool:
        return False

@@ -312,12 +312,18 @@ class WebSocketAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter)

    async def _process_image_components(self, message_chain_obj: list):
        """
-        处理消息链中的图片和文件组件，将path转换为base64
+        处理消息链中的图片、语音和文件组件，将 path 转换为 base64
+
+        Image / Voice / File components uploaded from the web client carry a
+        storage key in ``path``. Resolve it to a base64 data URI so downstream
+        stages (multimodal LLM input and the Box sandbox inbox) have a usable
+        payload, then drop the now-consumed storage object.

        Args:
            message_chain_obj: 消息链对象列表
        """
        import base64
+        import mimetypes

        storage_mgr = self.ap.storage_mgr

@@ -325,31 +331,33 @@ class WebSocketAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter)
            comp_type = component.get('type', '')
            comp_path = component.get('path', '')

-            if not comp_path:
+            if not comp_path or comp_type not in ('Image', 'Voice', 'File'):
                continue

-            if comp_type == 'Image':
-                try:
-                    file_content = await storage_mgr.storage_provider.load(comp_path)
-                    base64_str = base64.b64encode(file_content).decode('utf-8')
+            try:
+                file_content = await storage_mgr.storage_provider.load(comp_path)
+                base64_str = base64.b64encode(file_content).decode('utf-8')

-                    file_key = comp_path
-                    if file_key.lower().endswith(('.jpg', '.jpeg')):
+                lowered = comp_path.lower()
+                if comp_type == 'Image':
+                    if lowered.endswith(('.jpg', '.jpeg')):
                        mime_type = 'image/jpeg'
-                    elif file_key.lower().endswith('.png'):
-                        mime_type = 'image/png'
-                    elif file_key.lower().endswith('.gif'):
+                    elif lowered.endswith('.gif'):
                        mime_type = 'image/gif'
-                    elif file_key.lower().endswith('.webp'):
+                    elif lowered.endswith('.webp'):
                        mime_type = 'image/webp'
                    else:
                        mime_type = 'image/png'
+                elif comp_type == 'Voice':
+                    mime_type = mimetypes.guess_type(comp_path)[0] or 'audio/wav'
+                else:  # File
+                    mime_type = mimetypes.guess_type(comp_path)[0] or 'application/octet-stream'

-                    component['base64'] = f'data:{mime_type};base64,{base64_str}'
-                    await storage_mgr.storage_provider.delete(comp_path)
-                    component['path'] = ''
-                except Exception as e:
-                    await self.logger.error(f'Failed to load image file {comp_path}: {e}')
+                component['base64'] = f'data:{mime_type};base64,{base64_str}'
+                await storage_mgr.storage_provider.delete(comp_path)
+                component['path'] = ''
+            except Exception as e:
+                await self.logger.error(f'Failed to load {comp_type} file {comp_path}: {e}')

    async def handle_websocket_message(
        self,
@@ -689,6 +689,16 @@ class PluginRuntimeConnector(ManagedRuntimeConnector):
    async def get_plugin_readme(self, plugin_author: str, plugin_name: str, language: str = 'en') -> str:
        return await self.handler.get_plugin_readme(plugin_author, plugin_name, language)

+    async def get_plugin_logs(
+        self,
+        plugin_author: str,
+        plugin_name: str,
+        limit: int = 200,
+        level: str | None = None,
+    ) -> list[dict[str, Any]]:
+        # Not cached: logs are live and change constantly.
+        return await self.handler.get_plugin_logs(plugin_author, plugin_name, limit, level)
+
    @alru_cache(ttl=5 * 60)
    async def get_plugin_assets(self, plugin_author: str, plugin_name: str, filepath: str) -> dict[str, Any]:
        return await self.handler.get_plugin_assets(plugin_author, plugin_name, filepath)
@@ -514,6 +514,35 @@ class RuntimeConnectionHandler(handler.Handler):
            except Exception as e:
                return _make_rag_error_response(e, 'EmbeddingError', embedding_model_uuid=embedding_model_uuid)

+        @self.action(PluginToRuntimeAction.INVOKE_RERANK)
+        async def invoke_rerank(data: dict[str, Any]) -> handler.ActionResponse:
+            rerank_model_uuid = data['rerank_model_uuid']
+            query = data['query']
+            documents = data['documents']
+            top_k = data.get('top_k')
+            extra_args = data.get('extra_args', {})
+
+            try:
+                rerank_model = await self.ap.model_mgr.get_rerank_model_by_uuid(rerank_model_uuid)
+            except ValueError:
+                return handler.ActionResponse.error(
+                    message=f'Rerank model with rerank_model_uuid {rerank_model_uuid} not found',
+                )
+
+            try:
+                scores = await rerank_model.provider.invoke_rerank(
+                    model=rerank_model,
+                    query=query,
+                    documents=documents[:64],
+                    extra_args=extra_args,
+                )
+                scored = sorted(scores, key=lambda x: x.get('relevance_score', 0), reverse=True)
+                if top_k is not None:
+                    scored = scored[: int(top_k)]
+                return handler.ActionResponse.success(data={'results': scored})
+            except Exception as e:
+                return _make_rag_error_response(e, 'RerankError', rerank_model_uuid=rerank_model_uuid)
+
        @self.action(PluginToRuntimeAction.VECTOR_UPSERT)
        async def vector_upsert(data: dict[str, Any]) -> handler.ActionResponse:
            collection_id = data['collection_id']
@@ -953,6 +982,31 @@ class RuntimeConnectionHandler(handler.Handler):

        return readme_bytes.decode('utf-8')

+    async def get_plugin_logs(
+        self,
+        plugin_author: str,
+        plugin_name: str,
+        limit: int = 200,
+        level: str | None = None,
+    ) -> list[dict[str, Any]]:
+        """Get recent log lines captured from the plugin's stderr."""
+        try:
+            result = await self.call_action(
+                LangBotToRuntimeAction.GET_PLUGIN_LOGS,
+                {
+                    'plugin_author': plugin_author,
+                    'plugin_name': plugin_name,
+                    'limit': limit,
+                    'level': level,
+                },
+                timeout=20,
+            )
+        except Exception:
+            traceback.print_exc()
+            return []
+
+        return result.get('logs', [])
+
    async def get_plugin_assets(self, plugin_author: str, plugin_name: str, filepath: str) -> dict[str, Any]:
        """Get plugin assets"""
        result = await self.call_action(
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import asyncio
 import sqlalchemy
 import traceback

@@ -37,11 +38,41 @@ class ModelManager:
        self.requester_components = []
        self.requester_dict = {}

+    @staticmethod
+    def _get_litellm_provider_from_manifest(component: engine.Component | None) -> str | None:
+        if component is None:
+            return None
+
+        spec = getattr(component, 'spec', None) or {}
+        litellm_provider = None
+
+        if isinstance(spec, dict):
+            litellm_provider = spec.get('litellm_provider')
+        else:
+            getter = getattr(spec, 'get', None)
+            if callable(getter):
+                try:
+                    litellm_provider = getter('litellm_provider')
+                except Exception:
+                    litellm_provider = None
+
+        if isinstance(litellm_provider, str) and litellm_provider:
+            return litellm_provider
+        return None
+
    async def initialize(self):
        self.requester_components = self.ap.discover.get_components_by_kind('LLMAPIRequester')

        requester_dict: dict[str, type[requester.ProviderAPIRequester]] = {}
        for component in self.requester_components:
+            # Skip components that use litellm_provider (they will use litellmchat.py instead)
+            litellm_provider = self._get_litellm_provider_from_manifest(component)
+            if litellm_provider:
+                self.ap.logger.debug(
+                    f'Skipping Python class loading for {component.metadata.name} '
+                    f'(uses litellm_provider={litellm_provider})'
+                )
+                continue
            requester_dict[component.metadata.name] = component.get_python_component_class()

        self.requester_dict = requester_dict
@@ -54,8 +85,17 @@ class ModelManager:
            self.ap.logger.info('LangBot Space Models service is disabled, skipping sync.')
            return

+        sync_timeout = space_config.get('models_sync_timeout')
        try:
-            await self.sync_new_models_from_space()
+            if sync_timeout:
+                await asyncio.wait_for(
+                    self.sync_new_models_from_space(),
+                    timeout=float(sync_timeout),
+                )
+            else:
+                await self.sync_new_models_from_space()
+        except asyncio.TimeoutError:
+            self.ap.logger.warning(f'LangBot Space model sync timed out after {sync_timeout}s, skipping startup sync.')
        except Exception as e:
            self.ap.logger.warning('Failed to sync new models from LangBot Space, model list may not be updated.')
            self.ap.logger.warning(f'  - Error: {e}')
@@ -236,6 +276,7 @@ class ModelManager:
                name=model_info.get('name', ''),
                provider_uuid='',
                abilities=model_info.get('abilities', []),
+                context_length=model_info.get('context_length'),
                extra_args=model_info.get('extra_args', {}),
            ),
            provider=runtime_provider,
@@ -294,13 +335,37 @@ class ModelManager:
        else:
            provider_entity = provider_info

-        if provider_entity.requester not in self.requester_dict:
-            raise provider_errors.RequesterNotFoundError(provider_entity.requester)
+        # Get requester manifest to check for litellm_provider
+        requester_manifest = self.get_available_requester_manifest_by_name(provider_entity.requester)
+        litellm_provider = self._get_litellm_provider_from_manifest(requester_manifest)
+
+        # Build config from base_url
+        config = {'base_url': provider_entity.base_url}
+
+        # Check if requester manifest specifies litellm_provider
+        if litellm_provider:
+            from .requesters import litellmchat
+
+            # Use unified LiteLLMRequester with provider prefix
+            # Map litellm_provider (YAML spec) to custom_llm_provider (config)
+            config['custom_llm_provider'] = litellm_provider
+            requester_inst = litellmchat.LiteLLMRequester(
+                ap=self.ap,
+                config=config,
+            )
+            self.ap.logger.debug(
+                f'Using LiteLLMRequester for {provider_entity.requester} '
+                f'with custom_llm_provider={config["custom_llm_provider"]}'
+            )
+        else:
+            # Use original requester class (for backward compatibility)
+            if provider_entity.requester not in self.requester_dict:
+                raise provider_errors.RequesterNotFoundError(provider_entity.requester)
+            requester_inst = self.requester_dict[provider_entity.requester](
+                ap=self.ap,
+                config=config,
+            )

-        requester_inst = self.requester_dict[provider_entity.requester](
-            ap=self.ap,
-            config={'base_url': provider_entity.base_url},
-        )
        await requester_inst.initialize()

        token_mgr = token.TokenManager(name=provider_entity.uuid, tokens=provider_entity.api_keys or [])
@@ -406,6 +471,7 @@ class ModelManager:
            name=model_info.get('name', ''),
            provider_uuid=model_info.get('provider_uuid', ''),
            abilities=model_info.get('abilities', []),
+            context_length=model_info.get('context_length'),
            extra_args=model_info.get('extra_args', {}),
        )

@@ -12,6 +12,19 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message


+LLM_USAGE_QUERY_VARIABLE = '_llm_usage'
+STREAM_USAGE_QUERY_VARIABLE = '_stream_usage'
+
+
+def _store_llm_usage(query: pipeline_query.Query | None, usage_info: dict | None) -> None:
+    """Store the latest provider usage on the query for upstream action handlers."""
+    if query is None or not usage_info:
+        return
+    if query.variables is None:
+        query.variables = {}
+    query.variables[LLM_USAGE_QUERY_VARIABLE] = dict(usage_info)
+
+
 class RuntimeProvider:
    """运行时模型提供商"""

@@ -67,8 +80,9 @@ class RuntimeProvider:
            if isinstance(result, tuple):
                msg, usage_info = result
                if usage_info:
-                    input_tokens = usage_info.get('input_tokens', 0)
-                    output_tokens = usage_info.get('output_tokens', 0)
+                    _store_llm_usage(query, usage_info)
+                    input_tokens = usage_info.get('prompt_tokens', 0)
+                    output_tokens = usage_info.get('completion_tokens', 0)
                return msg
            else:
                return result
@@ -128,7 +142,6 @@ class RuntimeProvider:
        start_time = time.time()
        status = 'success'
        error_message = None
-        # Note: Stream doesn't easily provide token counts, set to 0
        input_tokens = 0
        output_tokens = 0

@@ -143,6 +156,16 @@ class RuntimeProvider:
                remove_think=remove_think,
            ):
                yield chunk
+            # Extract usage from stream if available (stored by LiteLLM requester)
+            if query:
+                if query.variables is None:
+                    query.variables = {}
+                if STREAM_USAGE_QUERY_VARIABLE in query.variables:
+                    usage_info = query.variables[STREAM_USAGE_QUERY_VARIABLE]
+                    _store_llm_usage(query, usage_info)
+                    input_tokens = usage_info.get('prompt_tokens', 0)
+                    output_tokens = usage_info.get('completion_tokens', 0)
+                    del query.variables[STREAM_USAGE_QUERY_VARIABLE]
        except Exception as e:
            status = 'error'
            error_message = str(e)
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """302.AI ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.302.ai/v1',
-        'timeout': 120,
-    }
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 302.AI
  icon: 302ai.png
 spec:
+  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -22,6 +23,7 @@ spec:
    type: integer
    required: true
    default: 120
+  alias: "302ai 302.AI 302 ai 中转 中转站 aggregator gpt claude gemini"
  support_type:
  - llm
  - text-embedding
@@ -1,370 +0,0 @@
-from __future__ import annotations
-
-import typing
-import json
-import platform
-import socket
-import anthropic
-import httpx
-
-from .. import errors, requester
-
-from ....utils import image
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class AnthropicMessages(requester.ProviderAPIRequester):
-    """Anthropic Messages API 请求器"""
-
-    client: anthropic.AsyncAnthropic
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.anthropic.com',
-        'timeout': 120,
-    }
-
-    async def initialize(self):
-        # 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题
-        if platform.system() == 'Windows':
-            if not hasattr(socket, 'TCP_KEEPINTVL'):
-                socket.TCP_KEEPINTVL = 0
-            if not hasattr(socket, 'TCP_KEEPCNT'):
-                socket.TCP_KEEPCNT = 0
-        httpx_client = anthropic._base_client.AsyncHttpxClientWrapper(
-            base_url=self.requester_cfg['base_url'],
-            # cast to a valid type because mypy doesn't understand our type narrowing
-            timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']),
-            limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS,
-            follow_redirects=True,
-            trust_env=True,
-        )
-
-        self.client = anthropic.AsyncAnthropic(
-            api_key='',
-            http_client=httpx_client,
-            base_url=self.requester_cfg['base_url'],
-        )
-
-    async def invoke_llm(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message:
-        self.client.api_key = model.provider.token_mgr.get_token()
-
-        args = extra_args.copy()
-        args['model'] = model.model_entity.name
-
-        # 处理消息
-
-        # system
-        system_role_message = None
-
-        for i, m in enumerate(messages):
-            if m.role == 'system':
-                system_role_message = m
-
-                break
-
-        if system_role_message:
-            messages.pop(i)
-
-        if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
-            args['system'] = system_role_message.content
-
-        req_messages = []
-
-        for m in messages:
-            if m.role == 'tool':
-                tool_call_id = m.tool_call_id
-
-                req_messages.append(
-                    {
-                        'role': 'user',
-                        'content': [
-                            {
-                                'type': 'tool_result',
-                                'tool_use_id': tool_call_id,
-                                'is_error': False,
-                                'content': [{'type': 'text', 'text': m.content}],
-                            }
-                        ],
-                    }
-                )
-
-                continue
-
-            msg_dict = m.dict(exclude_none=True)
-
-            if isinstance(m.content, str) and m.content.strip() != '':
-                msg_dict['content'] = [{'type': 'text', 'text': m.content}]
-            elif isinstance(m.content, list):
-                for i, ce in enumerate(m.content):
-                    if ce.type == 'image_base64':
-                        image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
-
-                        alter_image_ele = {
-                            'type': 'image',
-                            'source': {
-                                'type': 'base64',
-                                'media_type': f'image/{image_format}',
-                                'data': image_b64,
-                            },
-                        }
-                        msg_dict['content'][i] = alter_image_ele
-
-            if m.tool_calls:
-                for tool_call in m.tool_calls:
-                    msg_dict['content'].append(
-                        {
-                            'type': 'tool_use',
-                            'id': tool_call.id,
-                            'name': tool_call.function.name,
-                            'input': json.loads(tool_call.function.arguments),
-                        }
-                    )
-
-                del msg_dict['tool_calls']
-
-            req_messages.append(msg_dict)
-
-        args['messages'] = req_messages
-
-        if 'thinking' in args:
-            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
-
-        if funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        try:
-            resp = await self.client.messages.create(**args)
-
-            args = {
-                'content': '',
-                'role': resp.role,
-            }
-            assert type(resp) is anthropic.types.message.Message
-
-            for block in resp.content:
-                if not remove_think and block.type == 'thinking':
-                    args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
-                elif block.type == 'text':
-                    args['content'] += block.text
-                elif block.type == 'tool_use':
-                    assert type(block) is anthropic.types.tool_use_block.ToolUseBlock
-                    tool_call = provider_message.ToolCall(
-                        id=block.id,
-                        type='function',
-                        function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)),
-                    )
-                    if 'tool_calls' not in args:
-                        args['tool_calls'] = []
-                    args['tool_calls'].append(tool_call)
-
-            return provider_message.Message(**args)
-        except anthropic.AuthenticationError as e:
-            raise errors.RequesterError(f'api-key 无效: {e.message}')
-        except anthropic.BadRequestError as e:
-            raise errors.RequesterError(str(e.message))
-        except anthropic.NotFoundError as e:
-            if 'model: ' in str(e):
-                raise errors.RequesterError(f'模型无效: {e.message}')
-            else:
-                raise errors.RequesterError(f'请求地址无效: {e.message}')
-
-    async def invoke_llm_stream(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message:
-        self.client.api_key = model.provider.token_mgr.get_token()
-
-        args = extra_args.copy()
-        args['model'] = model.model_entity.name
-        args['stream'] = True
-
-        # 处理消息
-
-        # system
-        system_role_message = None
-
-        for i, m in enumerate(messages):
-            if m.role == 'system':
-                system_role_message = m
-
-                break
-
-        if system_role_message:
-            messages.pop(i)
-
-        if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
-            args['system'] = system_role_message.content
-
-        req_messages = []
-
-        for m in messages:
-            if m.role == 'tool':
-                tool_call_id = m.tool_call_id
-
-                req_messages.append(
-                    {
-                        'role': 'user',
-                        'content': [
-                            {
-                                'type': 'tool_result',
-                                'tool_use_id': tool_call_id,
-                                'is_error': False,  # 暂时直接写false
-                                'content': [
-                                    {'type': 'text', 'text': m.content}
-                                ],  # 这里要是list包裹，应该是多个返回的情况？type类型好像也可以填其他的，暂时只写text
-                            }
-                        ],
-                    }
-                )
-
-                continue
-
-            msg_dict = m.dict(exclude_none=True)
-
-            if isinstance(m.content, str) and m.content.strip() != '':
-                msg_dict['content'] = [{'type': 'text', 'text': m.content}]
-            elif isinstance(m.content, list):
-                for i, ce in enumerate(m.content):
-                    if ce.type == 'image_base64':
-                        image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
-
-                        alter_image_ele = {
-                            'type': 'image',
-                            'source': {
-                                'type': 'base64',
-                                'media_type': f'image/{image_format}',
-                                'data': image_b64,
-                            },
-                        }
-                        msg_dict['content'][i] = alter_image_ele
-            if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
-                msg_dict['content'] = []  # 这里不知道为什么会莫名有个空导致content为字符
-            if m.tool_calls:
-                for tool_call in m.tool_calls:
-                    msg_dict['content'].append(
-                        {
-                            'type': 'tool_use',
-                            'id': tool_call.id,
-                            'name': tool_call.function.name,
-                            'input': json.loads(tool_call.function.arguments),
-                        }
-                    )
-
-                del msg_dict['tool_calls']
-
-            req_messages.append(msg_dict)
-        if 'thinking' in args:
-            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
-
-        args['messages'] = req_messages
-
-        if funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        try:
-            role = 'assistant'  # 默认角色
-            # chunk_idx = 0
-            think_started = False
-            think_ended = False
-            finish_reason = False
-            tool_name = ''
-            tool_id = ''
-            async for chunk in await self.client.messages.create(**args):
-                content = ''
-                tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
-                if isinstance(
-                    chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
-                ):  # 记录开始
-                    if chunk.content_block.type == 'tool_use':
-                        if chunk.content_block.name is not None:
-                            tool_name = chunk.content_block.name
-                        if chunk.content_block.id is not None:
-                            tool_id = chunk.content_block.id
-
-                        tool_call['function']['name'] = tool_name
-                        tool_call['function']['arguments'] = ''
-                        tool_call['id'] = tool_id
-
-                    if not remove_think:
-                        if chunk.content_block.type == 'thinking' and not remove_think:
-                            think_started = True
-                        elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
-                            think_ended = True
-                        continue
-                elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
-                    if chunk.delta.type == 'thinking_delta':
-                        if think_started:
-                            think_started = False
-                            content = '<think>\n' + chunk.delta.thinking
-                        elif remove_think:
-                            continue
-                        else:
-                            content = chunk.delta.thinking
-                    elif chunk.delta.type == 'text_delta':
-                        if think_ended:
-                            think_ended = False
-                            content = '\n</think>\n' + chunk.delta.text
-                        else:
-                            content = chunk.delta.text
-                    elif chunk.delta.type == 'input_json_delta':
-                        tool_call['function']['arguments'] = chunk.delta.partial_json
-                        tool_call['function']['name'] = tool_name
-                        tool_call['id'] = tool_id
-                elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
-                    continue  # 记录raw_content_block结束的
-
-                elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
-                    if chunk.delta.stop_reason == 'end_turn':
-                        finish_reason = True
-                elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
-                    continue  # 这个好像是完全结束
-                else:
-                    # print(chunk)
-                    self.ap.logger.debug(f'anthropic chunk: {chunk}')
-                    continue
-
-                args = {
-                    'content': content,
-                    'role': role,
-                    'is_final': finish_reason,
-                    'tool_calls': None if tool_call['id'] is None else [tool_call],
-                }
-                # if chunk_idx == 0:
-                #     chunk_idx += 1
-                #     continue
-
-                # assert type(chunk) is anthropic.types.message.Chunk
-
-                yield provider_message.MessageChunk(**args)
-
-            # return llm_entities.Message(**args)
-        except anthropic.AuthenticationError as e:
-            raise errors.RequesterError(f'api-key 无效: {e.message}')
-        except anthropic.BadRequestError as e:
-            raise errors.RequesterError(str(e.message))
-        except anthropic.NotFoundError as e:
-            if 'model: ' in str(e):
-                raise errors.RequesterError(f'模型无效: {e.message}')
-            else:
-                raise errors.RequesterError(f'请求地址无效: {e.message}')
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Anthropic
  icon: anthropic.svg
 spec:
+  litellm_provider: anthropic
  config:
  - name: base_url
    label:
@@ -22,6 +23,7 @@ spec:
    type: integer
    required: true
    default: 120
+  alias: "anthropic Anthropic 克劳德 claude Claude Opus Sonnet Haiku 安thropic"
  support_type:
  - llm
  provider_category: manufacturer
@@ -0,0 +1,5 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#2932E1"/>
+  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Baidu</text>
+  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">ERNIE</text>
+</svg>
@@ -0,0 +1,31 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: baidu-chat-completions
+  label:
+    en_US: Baidu ERNIE
+    zh_Hans: 百度文心一言
+  icon: baidu.svg
+spec:
+  litellm_provider: openai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  alias: "baidu Baidu 百度 千帆 qianfan wenxin 文心 文心一言 ernie ERNIE bce embedding bce-reranker"
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
@@ -1,242 +0,0 @@
-from __future__ import annotations
-
-import typing
-import dashscope
-import openai
-
-from . import modelscopechatcmpl
-from .. import requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
-    """阿里云百炼大模型平台 ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
-        'timeout': 120,
-    }
-
-    async def _closure_stream(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        is_use_dashscope_call = False  # 是否使用阿里原生库调用
-        is_enable_multi_model = True  # 是否支持多轮对话
-        use_time_num = 0  # 模型已调用次数，防止存在多文件时重复调用
-        use_time_ids = []  # 已调用的ID列表
-        message_id = 0  # 记录消息序号
-
-        for msg in messages:
-            # print(msg)
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-                    elif me['type'] == 'file_url' and '.' in me.get('file_name', ''):
-                        # 1. 视频文件推理
-                        # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871
-                        file_type = me.get('file_name').lower().split('.')[-1]
-                        if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']:
-                            me['type'] = 'video_url'
-                            me['video_url'] = {'url': me['file_url']}
-                            del me['file_url']
-                            del me['file_name']
-                            use_time_num += 1
-                            use_time_ids.append(message_id)
-                            is_enable_multi_model = False
-                        # 2. 语音文件识别, 无法通过openai的audio字段传递，暂时不支持
-                        # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031
-                        elif file_type in [
-                            'aac',
-                            'amr',
-                            'aiff',
-                            'flac',
-                            'm4a',
-                            'mp3',
-                            'mpeg',
-                            'ogg',
-                            'opus',
-                            'wav',
-                            'webm',
-                            'wma',
-                        ]:
-                            me['audio'] = me['file_url']
-                            me['type'] = 'audio'
-                            del me['file_url']
-                            del me['type']
-                            del me['file_name']
-                            is_use_dashscope_call = True
-                            use_time_num += 1
-                            use_time_ids.append(message_id)
-                            is_enable_multi_model = False
-            message_id += 1
-
-        # 使用列表推导式，保留不在 use_time_ids[:-1] 中的元素，仅保留最后一个多媒体消息
-        if not is_enable_multi_model and use_time_num > 1:
-            messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]]
-
-        if not is_enable_multi_model:
-            messages = [msg for msg in messages if 'resp_message_id' not in msg]
-
-        args['messages'] = messages
-        args['stream'] = True
-
-        # 流式处理状态
-        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        role = 'assistant'  # 默认角色
-
-        if is_use_dashscope_call:
-            response = dashscope.MultiModalConversation.call(
-                # 若没有配置环境变量，请用百炼API Key将下行替换为：api_key = "sk-xxx"
-                api_key=use_model.provider.token_mgr.get_token(),
-                model=use_model.model_entity.name,
-                messages=messages,
-                result_format='message',
-                asr_options={
-                    # "language": "zh", # 可选，若已知音频的语种，可通过该参数指定待识别语种，以提升识别准确率
-                    'enable_lid': True,
-                    'enable_itn': False,
-                },
-                stream=True,
-            )
-            content_length_list = []
-            previous_length = 0  # 记录上一次的内容长度
-            for res in response:
-                chunk = res['output']
-                # 解析 chunk 数据
-                if hasattr(chunk, 'choices') and chunk.choices:
-                    choice = chunk.choices[0]
-                    delta_content = choice['message'].content[0]['text']
-                    finish_reason = choice['finish_reason']
-                    content_length_list.append(len(delta_content))
-                else:
-                    delta_content = ''
-                    finish_reason = None
-
-                # 跳过空的第一个 chunk（只有 role 没有内容）
-                if chunk_idx == 0 and not delta_content:
-                    chunk_idx += 1
-                    continue
-
-                # 检查 content_length_list 是否有足够的数据
-                if len(content_length_list) >= 2:
-                    now_content = delta_content[previous_length : content_length_list[-1]]
-                    previous_length = content_length_list[-1]  # 更新上一次的长度
-                else:
-                    now_content = delta_content  # 第一次循环时直接使用 delta_content
-                    previous_length = len(delta_content)  # 更新上一次的长度
-
-                # 构建 MessageChunk - 只包含增量内容
-                chunk_data = {
-                    'role': role,
-                    'content': now_content if now_content else None,
-                    'is_final': bool(finish_reason) and finish_reason != 'null',
-                }
-
-                # 移除 None 值
-                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-                yield provider_message.MessageChunk(**chunk_data)
-                chunk_idx += 1
-        else:
-            async for chunk in self._req_stream(args, extra_body=extra_args):
-                # 解析 chunk 数据
-                if hasattr(chunk, 'choices') and chunk.choices:
-                    choice = chunk.choices[0]
-                    delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
-                    finish_reason = getattr(choice, 'finish_reason', None)
-                else:
-                    delta = {}
-                    finish_reason = None
-
-                # 从第一个 chunk 获取 role，后续使用这个 role
-                if 'role' in delta and delta['role']:
-                    role = delta['role']
-
-                # 获取增量内容
-                delta_content = delta.get('content', '')
-                reasoning_content = delta.get('reasoning_content', '')
-
-                # 处理 reasoning_content
-                if reasoning_content:
-                    # accumulated_reasoning += reasoning_content
-                    # 如果设置了 remove_think，跳过 reasoning_content
-                    if remove_think:
-                        chunk_idx += 1
-                        continue
-
-                    # 第一次出现 reasoning_content，添加 <think> 开始标签
-                    if not thinking_started:
-                        thinking_started = True
-                        delta_content = '<think>\n' + reasoning_content
-                    else:
-                        # 继续输出 reasoning_content
-                        delta_content = reasoning_content
-                elif thinking_started and not thinking_ended and delta_content:
-                    # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
-                    thinking_ended = True
-                    delta_content = '\n</think>\n' + delta_content
-
-                # 处理工具调用增量
-                if delta.get('tool_calls'):
-                    for tool_call in delta['tool_calls']:
-                        if tool_call['id'] != '':
-                            tool_id = tool_call['id']
-                        if tool_call['function']['name'] is not None:
-                            tool_name = tool_call['function']['name']
-
-                        if tool_call['type'] is None:
-                            tool_call['type'] = 'function'
-                        tool_call['id'] = tool_id
-                        tool_call['function']['name'] = tool_name
-                        tool_call['function']['arguments'] = (
-                            '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
-                        )
-
-                # 跳过空的第一个 chunk（只有 role 没有内容）
-                if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
-                    chunk_idx += 1
-                    continue
-
-                # 构建 MessageChunk - 只包含增量内容
-                chunk_data = {
-                    'role': role,
-                    'content': delta_content if delta_content else None,
-                    'tool_calls': delta.get('tool_calls'),
-                    'is_final': bool(finish_reason),
-                }
-
-                # 移除 None 值
-                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-                yield provider_message.MessageChunk(**chunk_data)
-                chunk_idx += 1
-                # return
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 阿里云百炼
  icon: bailian.png
 spec:
+  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -22,8 +23,10 @@ spec:
    type: integer
    required: true
    default: 120
+  alias: "bailian 百炼 阿里 阿里云 aliyun alibaba dashscope 通义 通义千问 qwen Qwen tongyi gte-rerank text-embedding-v"
  support_type:
  - llm
+  - text-embedding
  - rerank
  provider_category: maas
 execution:
@@ -1,702 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import typing
-
-import openai
-import openai.types.chat.chat_completion as chat_completion_module
-import httpx
-
-from .. import errors, requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class OpenAIChatCompletions(requester.ProviderAPIRequester):
-    """OpenAI ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.openai.com/v1',
-        'timeout': 120,
-    }
-
-    async def initialize(self):
-        self.client = openai.AsyncClient(
-            api_key=self.init_api_key,
-            base_url=self.requester_cfg['base_url'].replace(' ', ''),
-            timeout=self.requester_cfg['timeout'],
-            http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
-        )
-
-    def _mask_api_key(self, api_key: str | None) -> str:
-        if not api_key:
-            return ''
-        if len(api_key) <= 8:
-            return '****'
-        return f'{api_key[:4]}...{api_key[-4:]}'
-
-    def _infer_model_type(self, model_id: str) -> str:
-        normalized_model_id = (model_id or '').lower()
-        embedding_keywords = (
-            'embedding',
-            'embed',
-            'bge-',
-            'e5-',
-            'm3e',
-            'gte-',
-            'multilingual-e5',
-            'text-embedding',
-        )
-        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
-
-    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
-        normalized_model_id = (model_id or '').lower()
-        abilities: set[str] = set()
-
-        def _flatten(value: typing.Any) -> list[str]:
-            if value is None:
-                return []
-            if isinstance(value, str):
-                return [value.lower()]
-            if isinstance(value, dict):
-                flattened: list[str] = []
-                for nested_value in value.values():
-                    flattened.extend(_flatten(nested_value))
-                return flattened
-            if isinstance(value, (list, tuple, set)):
-                flattened: list[str] = []
-                for nested_value in value:
-                    flattened.extend(_flatten(nested_value))
-                return flattened
-            return [str(value).lower()]
-
-        capability_tokens = _flatten(item.get('capabilities'))
-        capability_tokens.extend(_flatten(item.get('modalities')))
-        capability_tokens.extend(_flatten(item.get('input_modalities')))
-        capability_tokens.extend(_flatten(item.get('output_modalities')))
-        capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
-        capability_tokens.extend(_flatten(item.get('supported_parameters')))
-        capability_tokens.extend(_flatten(item.get('architecture')))
-
-        combined_tokens = capability_tokens + [normalized_model_id]
-
-        vision_keywords = (
-            'vision',
-            'image',
-            'file',
-            'video',
-            'multimodal',
-            'vl',
-            'ocr',
-            'omni',
-        )
-        function_call_keywords = (
-            'function',
-            'tool',
-            'tools',
-            'tool_choice',
-            'tool_call',
-            'tool-use',
-            'tool_use',
-        )
-
-        if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
-            abilities.add('vision')
-
-        if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
-            abilities.add('func_call')
-
-        return sorted(abilities)
-
-    def _normalize_modalities(self, value: typing.Any) -> list[str]:
-        normalized: list[str] = []
-
-        def _collect(item: typing.Any):
-            if item is None:
-                return
-            if isinstance(item, str):
-                for part in item.replace('->', ',').replace('+', ',').split(','):
-                    token = part.strip().lower()
-                    if token and token not in normalized:
-                        normalized.append(token)
-                return
-            if isinstance(item, dict):
-                for nested in item.values():
-                    _collect(nested)
-                return
-            if isinstance(item, (list, tuple, set)):
-                for nested in item:
-                    _collect(nested)
-                return
-
-        _collect(value)
-        return normalized
-
-    def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
-        display_name = item.get('name')
-        if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
-            display_name = ''
-
-        description = item.get('description')
-        if not isinstance(description, str) or not description.strip():
-            description = ''
-
-        context_length = item.get('context_length')
-        if context_length is None and isinstance(item.get('top_provider'), dict):
-            context_length = item['top_provider'].get('context_length')
-
-        if not isinstance(context_length, int):
-            try:
-                context_length = int(context_length) if context_length is not None else None
-            except (TypeError, ValueError):
-                context_length = None
-
-        input_modalities = self._normalize_modalities(item.get('input_modalities'))
-        output_modalities = self._normalize_modalities(item.get('output_modalities'))
-
-        if isinstance(item.get('architecture'), dict):
-            if not input_modalities:
-                input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
-            if not output_modalities:
-                output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
-
-        owned_by = item.get('owned_by')
-        if not isinstance(owned_by, str) or not owned_by.strip():
-            owned_by = ''
-
-        return {
-            'display_name': display_name or None,
-            'description': description or None,
-            'context_length': context_length,
-            'owned_by': owned_by or None,
-            'input_modalities': input_modalities,
-            'output_modalities': output_modalities,
-        }
-
-    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
-        headers = {}
-        if api_key:
-            headers['Authorization'] = f'Bearer {api_key}'
-
-        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
-        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
-            response = await client.get(models_url, headers=headers)
-            response.raise_for_status()
-            payload = response.json()
-
-        models = []
-        for item in payload.get('data', []):
-            model_id = item.get('id')
-            if not model_id:
-                continue
-            models.append(
-                {
-                    'id': model_id,
-                    'name': model_id,
-                    'type': self._infer_model_type(model_id),
-                    'abilities': self._infer_model_abilities(item, model_id),
-                    **self._extract_scan_metadata(item, model_id),
-                }
-            )
-
-        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
-        return {
-            'models': models,
-            'debug': {
-                'request': {
-                    'method': 'GET',
-                    'url': models_url,
-                    'headers': {
-                        'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
-                    },
-                },
-                'response': payload,
-            },
-        }
-
-    async def _req(
-        self,
-        args: dict,
-        extra_body: dict = {},
-    ) -> chat_completion_module.ChatCompletion:
-        return await self.client.chat.completions.create(**args, extra_body=extra_body)
-
-    async def _req_stream(
-        self,
-        args: dict,
-        extra_body: dict = {},
-    ):
-        async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
-            yield chunk
-
-    async def _make_msg(
-        self,
-        chat_completion: chat_completion_module.ChatCompletion,
-        remove_think: bool = False,
-    ) -> provider_message.Message:
-        if not isinstance(chat_completion, chat_completion_module.ChatCompletion):
-            raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}')
-
-        chatcmpl_message = chat_completion.choices[0].message.model_dump()
-
-        # 确保 role 字段存在且不为 None
-        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
-            chatcmpl_message['role'] = 'assistant'
-
-        # 处理思维链
-        content = chatcmpl_message.get('content', '')
-        reasoning_content = chatcmpl_message.get('reasoning_content', None)
-
-        processed_content, _ = await self._process_thinking_content(
-            content=content, reasoning_content=reasoning_content, remove_think=remove_think
-        )
-
-        chatcmpl_message['content'] = processed_content
-
-        # 移除 reasoning_content 字段，避免传递给 Message
-        if 'reasoning_content' in chatcmpl_message:
-            del chatcmpl_message['reasoning_content']
-
-        message = provider_message.Message(**chatcmpl_message)
-
-        return message
-
-    async def _process_thinking_content(
-        self,
-        content: str,
-        reasoning_content: str = None,
-        remove_think: bool = False,
-    ) -> tuple[str, str]:
-        """处理思维链内容
-
-        Args:
-            content: 原始内容
-            reasoning_content: reasoning_content 字段内容
-            remove_think: 是否移除思维链
-
-        Returns:
-            (处理后的内容, 提取的思维链内容)
-        """
-        thinking_content = ''
-
-        # 1. 从 reasoning_content 提取思维链
-        if reasoning_content:
-            thinking_content = reasoning_content
-
-        # 2. 从 content 中提取 <think> 标签内容
-        if content and '<think>' in content and '</think>' in content:
-            import re
-
-            think_pattern = r'<think>(.*?)</think>'
-            think_matches = re.findall(think_pattern, content, re.DOTALL)
-            if think_matches:
-                # 如果已有 reasoning_content，则追加
-                if thinking_content:
-                    thinking_content += '\n' + '\n'.join(think_matches)
-                else:
-                    thinking_content = '\n'.join(think_matches)
-                # 移除 content 中的 <think> 标签
-                content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
-
-        # 3. 根据 remove_think 参数决定是否保留思维链
-        if remove_think:
-            return content, ''
-        else:
-            # 如果有思维链内容，将其以 <think> 格式添加到 content 开头
-            if thinking_content:
-                content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
-            return content, thinking_content
-
-    async def _closure_stream(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.MessageChunk:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-        args['stream'] = True
-
-        # 流式处理状态
-        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        role = 'assistant'  # 默认角色
-        tool_id = ''
-        tool_name = ''
-        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
-
-        async for chunk in self._req_stream(args, extra_body=extra_args):
-            # 解析 chunk 数据
-
-            if hasattr(chunk, 'choices') and chunk.choices:
-                choice = chunk.choices[0]
-                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
-
-                finish_reason = getattr(choice, 'finish_reason', None)
-            else:
-                delta = {}
-                finish_reason = None
-            # 从第一个 chunk 获取 role，后续使用这个 role
-            if 'role' in delta and delta['role']:
-                role = delta['role']
-
-            # 获取增量内容
-            delta_content = delta.get('content', '')
-            reasoning_content = delta.get('reasoning_content', '')
-
-            # 处理 reasoning_content
-            if reasoning_content:
-                # accumulated_reasoning += reasoning_content
-                # 如果设置了 remove_think，跳过 reasoning_content
-                if remove_think:
-                    chunk_idx += 1
-                    continue
-
-                # 第一次出现 reasoning_content，添加 <think> 开始标签
-                if not thinking_started:
-                    thinking_started = True
-                    delta_content = '<think>\n' + reasoning_content
-                else:
-                    # 继续输出 reasoning_content
-                    delta_content = reasoning_content
-            elif thinking_started and not thinking_ended and delta_content:
-                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
-                thinking_ended = True
-                delta_content = '\n</think>\n' + delta_content
-
-            # 处理 content 中已有的 <think> 标签（如果需要移除）
-            # if delta_content and remove_think and '<think>' in delta_content:
-            #     import re
-            #
-            #     # 移除 <think> 标签及其内容
-            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
-
-            # 处理工具调用增量
-            # delta_tool_calls = None
-            if delta.get('tool_calls'):
-                for tool_call in delta['tool_calls']:
-                    if tool_call['id'] and tool_call['function']['name']:
-                        tool_id = tool_call['id']
-                        tool_name = tool_call['function']['name']
-                    else:
-                        tool_call['id'] = tool_id
-                        tool_call['function']['name'] = tool_name
-                    if tool_call['type'] is None:
-                        tool_call['type'] = 'function'
-
-            # 跳过空的第一个 chunk（只有 role 没有内容）
-            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
-                chunk_idx += 1
-                continue
-            # 构建 MessageChunk - 只包含增量内容
-            chunk_data = {
-                'role': role,
-                'content': delta_content if delta_content else None,
-                'tool_calls': delta.get('tool_calls'),
-                'is_final': bool(finish_reason),
-            }
-
-            # 移除 None 值
-            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-            yield provider_message.MessageChunk(**chunk_data)
-            chunk_idx += 1
-
-    async def _closure(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> tuple[provider_message.Message, dict]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-
-        # 发送请求
-
-        resp = await self._req(args, extra_body=extra_args)
-        # 处理请求结果
-        message = await self._make_msg(resp, remove_think)
-
-        # Extract token usage from response
-        usage_info = {}
-        if hasattr(resp, 'usage') and resp.usage:
-            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
-            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
-            usage_info['total_tokens'] = resp.usage.total_tokens or 0
-
-        return message, usage_info
-
-    async def invoke_llm(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> tuple[provider_message.Message, dict]:
-        """Invoke LLM and return message with usage info"""
-        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
-        for m in messages:
-            msg_dict = m.dict(exclude_none=True)
-            content = msg_dict.get('content')
-            if isinstance(content, list):
-                # 检查 content 列表中是否每个部分都是文本
-                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
-                    # 将所有文本部分合并为一个字符串
-                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
-            req_messages.append(msg_dict)
-
-        try:
-            msg, usage_info = await self._closure(
-                query=query,
-                req_messages=req_messages,
-                use_model=model,
-                use_funcs=funcs,
-                extra_args=extra_args,
-                remove_think=remove_think,
-            )
-            return msg, usage_info
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-        except openai.BadRequestError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            if 'context_length_exceeded' in str(e):
-                raise errors.RequesterError(f'上文过长，请重置会话: {error_message}')
-            else:
-                raise errors.RequesterError(f'请求参数错误: {error_message}')
-        except openai.AuthenticationError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            raise errors.RequesterError(f'无效的 api-key: {error_message}')
-        except openai.NotFoundError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            raise errors.RequesterError(f'请求路径错误: {error_message}')
-        except openai.RateLimitError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
-        except openai.APIConnectionError as e:
-            error_message = f'连接错误: {str(e)}'
-            raise errors.RequesterError(error_message)
-        except openai.APIError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            raise errors.RequesterError(f'请求错误: {error_message}')
-
-    async def invoke_embedding(
-        self,
-        model: requester.RuntimeEmbeddingModel,
-        input_text: list[str],
-        extra_args: dict[str, typing.Any] = {},
-    ) -> tuple[list[list[float]], dict]:
-        """调用 Embedding API, returns (embeddings, usage_info)"""
-        self.client.api_key = model.provider.token_mgr.get_token()
-
-        args = {
-            'model': model.model_entity.name,
-            'input': input_text,
-        }
-
-        if model.model_entity.extra_args:
-            args.update(model.model_entity.extra_args)
-
-        args.update(extra_args)
-
-        try:
-            resp = await self.client.embeddings.create(**args)
-
-            # Extract usage info
-            usage_info = {}
-            if hasattr(resp, 'usage') and resp.usage:
-                usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
-                usage_info['total_tokens'] = resp.usage.total_tokens or 0
-
-            return [d.embedding for d in resp.data], usage_info
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-        except openai.BadRequestError as e:
-            raise errors.RequesterError(f'请求参数错误: {e.message}')
-
-    async def invoke_llm_stream(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.MessageChunk:
-        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
-        for m in messages:
-            msg_dict = m.dict(exclude_none=True)
-            content = msg_dict.get('content')
-            if isinstance(content, list):
-                # 检查 content 列表中是否每个部分都是文本
-                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
-                    # 将所有文本部分合并为一个字符串
-                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
-            req_messages.append(msg_dict)
-
-        try:
-            async for item in self._closure_stream(
-                query=query,
-                req_messages=req_messages,
-                use_model=model,
-                use_funcs=funcs,
-                extra_args=extra_args,
-                remove_think=remove_think,
-            ):
-                yield item
-
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-        except openai.BadRequestError as e:
-            if 'context_length_exceeded' in e.message:
-                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
-            else:
-                raise errors.RequesterError(f'请求参数错误: {e.message}')
-        except openai.AuthenticationError as e:
-            raise errors.RequesterError(f'无效的 api-key: {e.message}')
-        except openai.NotFoundError as e:
-            raise errors.RequesterError(f'请求路径错误: {e.message}')
-        except openai.RateLimitError as e:
-            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
-        except openai.APIError as e:
-            raise errors.RequesterError(f'请求错误: {e.message}')
-
-    async def invoke_rerank(
-        self,
-        model: requester.RuntimeRerankModel,
-        query: str,
-        documents: typing.List[str],
-        extra_args: dict[str, typing.Any] = {},
-    ) -> typing.List[dict]:
-        """Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible)
-
-        Supports extra_args from model.extra_args:
-        - rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks")
-        - rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank")
-        - Any other fields are merged into the request payload.
-        """
-        api_key = model.provider.token_mgr.get_token()
-        base_url = self.requester_cfg.get('base_url', '').rstrip('/')
-        timeout = self.requester_cfg.get('timeout', 120)
-
-        merged_args = {}
-        if model.model_entity.extra_args:
-            merged_args.update(model.model_entity.extra_args)
-        if extra_args:
-            merged_args.update(extra_args)
-
-        rerank_url = merged_args.pop('rerank_url', None)
-        rerank_path = merged_args.pop('rerank_path', 'rerank')
-        if not rerank_url:
-            rerank_url = f'{base_url}/{rerank_path}'
-
-        headers = {
-            'Content-Type': 'application/json',
-            'Authorization': f'Bearer {api_key}',
-        }
-
-        payload = {
-            'model': model.model_entity.name,
-            'query': query,
-            'documents': documents[:64],
-            'top_n': min(len(documents), 64),
-        }
-
-        if merged_args:
-            payload.update(merged_args)
-
-        try:
-            async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
-                resp = await client.post(rerank_url, headers=headers, json=payload)
-                resp.raise_for_status()
-                data = resp.json()
-
-            results = self._parse_rerank_response(data)
-
-            if results:
-                scores = [r.get('relevance_score', 0.0) for r in results]
-                min_score = min(scores)
-                max_score = max(scores)
-                if max_score - min_score > 1e-6:
-                    for r in results:
-                        r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
-
-            return results
-        except httpx.HTTPStatusError as e:
-            raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}')
-        except httpx.TimeoutException:
-            raise errors.RequesterError('Rerank request timed out')
-        except Exception as e:
-            raise errors.RequesterError(f'Rerank request error: {str(e)}')
-
-    @staticmethod
-    def _parse_rerank_response(data: dict) -> typing.List[dict]:
-        """Parse rerank response from various providers.
-
-        Handles:
-        - Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]}
-        - Voyage AI: {"data": [{"index", "relevance_score"}]}
-        - DashScope: {"output": {"results": [{"index", "relevance_score"}]}}
-        """
-        if 'results' in data:
-            return data['results']
-        if 'data' in data:
-            return data['data']
-        if 'output' in data and isinstance(data['output'], dict):
-            return data['output'].get('results', [])
-        return []
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: OpenAI
  icon: openai.svg
 spec:
+  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -22,10 +23,10 @@ spec:
    type: integer
    required: true
    default: 120
+  alias: "openai OpenAI 欧派 gpt GPT ChatGPT chatgpt o1 o3 o4 text-embedding 通用 openai兼容 compatible"
  support_type:
  - llm
  - text-embedding
-  - rerank
  provider_category: manufacturer
 execution:
  python:
@@ -12,6 +12,7 @@ metadata:
  icon: chroma.svg
 spec:
  config: []
+  alias: "chroma Chroma 向量 vector embedding 嵌入 chromadb"
  support_type:
  - text-embedding
  provider_category: builtin
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Cohere
  icon: cohere.svg
 spec:
+  litellm_provider: cohere
  config:
  - name: base_url
    label:
@@ -22,6 +23,7 @@ spec:
    type: integer
    required: true
    default: 120
+  alias: "cohere Cohere rerank 重排 reranker rerank-english rerank-multilingual command"
  support_type:
  - rerank
  provider_category: manufacturer
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """CompShare ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.modelverse.cn/v1',
-        'timeout': 120,
-    }
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 优云智算
  icon: compshare.png
 spec:
+  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -22,8 +23,11 @@ spec:
    type: integer
    required: true
    default: 120
+  alias: "compshare 优刻得 ucloud UCloud 算力 共享算力 GPU"
  support_type:
  - llm
+  - text-embedding
+  - rerank
  provider_category: maas
 execution:
  python:
@@ -1,67 +0,0 @@
-from __future__ import annotations
-
-import typing
-
-from . import chatcmpl
-from .. import errors, requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """Deepseek ChatCompletion API 请求器"""
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.deepseek.com',
-        'timeout': 120,
-    }
-
-    async def _closure(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> tuple[provider_message.Message, dict]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages
-
-        # deepseek 不支持多模态，把content都转换成纯文字
-        for m in messages:
-            if 'content' in m and isinstance(m['content'], list):
-                m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c])
-
-        args['messages'] = messages
-
-        # 发送请求
-        resp = await self._req(args, extra_body=extra_args)
-
-        # print(resp)
-
-        if resp is None:
-            raise errors.RequesterError('接口返回为空，请确定模型提供商服务是否正常')
-        # 处理请求结果
-        message = await self._make_msg(resp, remove_think)
-
-        # Extract token usage from response
-        usage_info = {}
-        if hasattr(resp, 'usage') and resp.usage:
-            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
-            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
-            usage_info['total_tokens'] = resp.usage.total_tokens or 0
-
-        return message, usage_info
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: DeepSeek
  icon: deepseek.svg
 spec:
+  litellm_provider: deepseek
  config:
  - name: base_url
    label:
@@ -22,6 +23,7 @@ spec:
    type: integer
    required: true
    default: 120
+  alias: "deepseek DeepSeek 深度求索 深度 求索 dpsk v3 r1 deepseek-chat deepseek-reasoner"
  support_type:
  - llm
  provider_category: manufacturer
@@ -0,0 +1,4 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#3B82F6"/>
+  <text x="30" y="32" font-family="Arial, sans-serif" font-size="12" font-weight="bold" fill="white" text-anchor="middle">豆包</text>
+</svg>
@@ -0,0 +1,31 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: doubao-chat-completions
+  label:
+    en_US: ByteDance Doubao
+    zh_Hans: 字节豆包
+  icon: doubao.svg
+spec:
+  litellm_provider: openai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://ark.cn-beijing.volces.com/api/v3
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  alias: "doubao 豆包 字节 字节跳动 bytedance volcengine 火山 火山引擎 ark 方舟 seed"
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
@@ -1,205 +0,0 @@
-from __future__ import annotations
-
-import typing
-import httpx
-
-from . import chatcmpl
-
-import uuid
-
-from .. import requester
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-
-
-class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """Google Gemini API 请求器"""
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
-        'timeout': 120,
-    }
-
-    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
-        models_url = 'https://generativelanguage.googleapis.com/v1beta/models'
-        params = {'key': api_key} if api_key else {}
-
-        all_models: list[dict[str, typing.Any]] = []
-        next_page_token = ''
-        last_payload: dict[str, typing.Any] = {}
-
-        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
-            while True:
-                request_params = dict(params)
-                if next_page_token:
-                    request_params['pageToken'] = next_page_token
-
-                response = await client.get(models_url, params=request_params)
-                response.raise_for_status()
-                payload = response.json()
-                last_payload = payload
-
-                for item in payload.get('models', []):
-                    model_name = item.get('name', '')
-                    model_id = model_name.replace('models/', '', 1)
-                    if not model_id:
-                        continue
-
-                    supported_methods = item.get('supportedGenerationMethods', []) or []
-                    if 'embedContent' in supported_methods and 'generateContent' not in supported_methods:
-                        model_type = 'embedding'
-                    else:
-                        model_type = 'llm'
-
-                    all_models.append(
-                        {
-                            'id': model_id,
-                            'name': model_id,
-                            'type': model_type,
-                            'abilities': self._infer_model_abilities(item, model_id),
-                            'display_name': item.get('displayName') or None,
-                            'description': item.get('description') or None,
-                            'context_length': item.get('inputTokenLimit'),
-                            'input_modalities': self._normalize_modalities(item.get('inputModalities')),
-                            'output_modalities': self._normalize_modalities(item.get('outputModalities')),
-                        }
-                    )
-
-                next_page_token = payload.get('nextPageToken', '')
-                if not next_page_token:
-                    break
-
-        all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
-        return {
-            'models': all_models,
-            'debug': {
-                'request': {
-                    'method': 'GET',
-                    'url': models_url,
-                    'query': {'key': self._mask_api_key(api_key)} if api_key else {},
-                },
-                'response': last_payload,
-            },
-        }
-
-    async def _closure_stream(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.MessageChunk:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-        args['stream'] = True
-
-        # 流式处理状态
-        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        role = 'assistant'  # 默认角色
-        tool_id = ''
-        tool_name = ''
-        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
-
-        async for chunk in self._req_stream(args, extra_body=extra_args):
-            # 解析 chunk 数据
-
-            if hasattr(chunk, 'choices') and chunk.choices:
-                choice = chunk.choices[0]
-                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
-
-                finish_reason = getattr(choice, 'finish_reason', None)
-            else:
-                delta = {}
-                finish_reason = None
-            # 从第一个 chunk 获取 role，后续使用这个 role
-            if 'role' in delta and delta['role']:
-                role = delta['role']
-
-            # 获取增量内容
-            delta_content = delta.get('content', '')
-            reasoning_content = delta.get('reasoning_content', '')
-
-            # 处理 reasoning_content
-            if reasoning_content:
-                # accumulated_reasoning += reasoning_content
-                # 如果设置了 remove_think，跳过 reasoning_content
-                if remove_think:
-                    chunk_idx += 1
-                    continue
-
-                # 第一次出现 reasoning_content，添加 <think> 开始标签
-                if not thinking_started:
-                    thinking_started = True
-                    delta_content = '<think>\n' + reasoning_content
-                else:
-                    # 继续输出 reasoning_content
-                    delta_content = reasoning_content
-            elif thinking_started and not thinking_ended and delta_content:
-                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
-                thinking_ended = True
-                delta_content = '\n</think>\n' + delta_content
-
-            # 处理 content 中已有的 <think> 标签（如果需要移除）
-            # if delta_content and remove_think and '<think>' in delta_content:
-            #     import re
-            #
-            #     # 移除 <think> 标签及其内容
-            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
-
-            # 处理工具调用增量
-            # delta_tool_calls = None
-            if delta.get('tool_calls'):
-                for tool_call in delta['tool_calls']:
-                    if tool_call['id'] == '' and tool_id == '':
-                        tool_id = str(uuid.uuid4())
-                    if tool_call['function']['name']:
-                        tool_name = tool_call['function']['name']
-                    tool_call['id'] = tool_id
-                    tool_call['function']['name'] = tool_name
-                    if tool_call['type'] is None:
-                        tool_call['type'] = 'function'
-
-            # 跳过空的第一个 chunk（只有 role 没有内容）
-            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
-                chunk_idx += 1
-                continue
-            # 构建 MessageChunk - 只包含增量内容
-            chunk_data = {
-                'role': role,
-                'content': delta_content if delta_content else None,
-                'tool_calls': delta.get('tool_calls'),
-                'is_final': bool(finish_reason),
-            }
-
-            # 移除 None 值
-            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-            yield provider_message.MessageChunk(**chunk_data)
-            chunk_idx += 1
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Google Gemini
  icon: gemini.svg
 spec:
+  litellm_provider: gemini
  config:
  - name: base_url
    label:
@@ -22,8 +23,10 @@ spec:
    type: integer
    required: true
    default: 120
+  alias: "gemini Gemini 谷歌 google Google 双子座 bard flash pro text-embedding-004"
  support_type:
  - llm
+  - text-embedding
  provider_category: manufacturer
 execution:
  python:
@@ -1,15 +0,0 @@
-from __future__ import annotations
-
-
-import typing
-
-from . import ppiochatcmpl
-
-
-class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
-    """Gitee AI ChatCompletions API 请求器"""
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://ai.gitee.com/v1',
-        'timeout': 120,
-    }
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Gitee AI
  icon: giteeai.svg
 spec:
+  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -22,6 +23,7 @@ spec:
    type: integer
    required: true
    default: 120
+  alias: "gitee Gitee 码云 gitee-ai gitee ai serverless bge embedding rerank"
  support_type:
  - llm
  - text-embedding
@@ -0,0 +1,4 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#F97316"/>
+  <text x="30" y="32" font-family="Arial, sans-serif" font-size="14" font-weight="bold" fill="white" text-anchor="middle">Groq</text>
+</svg>
@@ -0,0 +1,29 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: groq-chat-completions
+  label:
+    en_US: Groq
+    zh_Hans: Groq
+  icon: groq.svg
+spec:
+  litellm_provider: groq
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://api.groq.com/openai/v1
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  alias: "groq Groq 高速 llama mixtral 推理加速 lpu"
+  support_type:
+  - llm
+  provider_category: manufacturer
@@ -0,0 +1,5 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#0066FF"/>
+  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">iFlytek</text>
+  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Spark</text>
+</svg>
--- a/Show More
+++ b/Show More