feat(models): persist context metadata

style: simplify wrapped expressions
refactor(provider): simplify litellm capabilities
2026-06-07 22:36:02 +00:00 · 2026-06-08 00:39:30 +08:00 · 2026-06-07 22:05:46 +08:00 · 2026-06-06 00:21:19 +08:00 · 2026-06-05 09:13:57 -04:00 · 2026-06-05 09:52:13 +08:00
127 changed files with 3886 additions and 4591 deletions
--- a/16
+++ b/16
@@ -14,10 +14,22 @@ COPY . .
 COPY --from=node /app/web/dist ./web/dist
-RUN apt update \
+RUN apt-get update \
-    && apt install gcc -y \
+    && apt-get install -y --no-install-recommends gcc ca-certificates curl gnupg \
    # Install the Docker CLI (client only) so the optional langbot_box
    # service can drive the mounted host Docker socket and create sandbox
    # containers. The same image powers langbot / plugin_runtime / box; only
    # box uses the client. Arch-aware via dpkg so multi-arch builds work.
    && install -m 0755 -d /etc/apt/keyrings \
    && curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc \
    && chmod a+r /etc/apt/keyrings/docker.asc \
    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian $(. /etc/os-release && echo \"$VERSION_CODENAME\") stable" > /etc/apt/sources.list.d/docker.list \
    && apt-get update \
    && apt-get install -y --no-install-recommends docker-ce-cli \
    && python -m pip install --no-cache-dir uv \
    && uv sync \
    && apt-get purge -y --auto-remove curl gnupg \
    && rm -rf /var/lib/apt/lists/* \
    && touch /.dockerenv
 CMD [ "uv", "run", "--no-sync", "main.py" ]
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "langbot"
-version = "4.10.0-beta.2"
+version = "4.10.0"
 description = "Production-grade platform for building agentic IM bots"
 readme = "README.md"
 license-files = ["LICENSE"]
@@ -70,7 +70,7 @@ dependencies = [
    "chromadb>=1.0.0,<2.0.0",
    "qdrant-client (>=1.15.1,<2.0.0)",
    "pyseekdb==1.1.0.post3",
-    "langbot-plugin==0.4.0",
+    "langbot-plugin==0.4.1",
    "asyncpg>=0.30.0",
    "line-bot-sdk>=3.19.0",
    "matrix-nio>=0.25.2",
@@ -79,6 +79,7 @@ dependencies = [
    "pymilvus>=2.6.4",
    "pgvector>=0.4.1",
    "botocore>=1.42.39",
    "litellm>=1.0.0",
 ]
 keywords = [
    "bot",
--- a/src/langbot/init.py
+++ b/src/langbot/init.py
@@ -1,3 +1,3 @@
 """LangBot - Production-grade platform for building agentic IM bots"""
-__version__ = '4.10.0-beta.2'
+__version__ = '4.10.0'
--- a/src/langbot/pkg/api/http/controller/groups/monitoring.py
+++ b/src/langbot/pkg/api/http/controller/groups/monitoring.py
@@ -46,6 +46,30 @@ class MonitoringRouterGroup(group.RouterGroup):
            return self.success(data=metrics)
        @self.route('/token-statistics', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
        async def get_token_statistics() -> str:
            """Get detailed token usage statistics (summary, per-model, timeseries)."""
            bot_ids = quart.request.args.getlist('botId')
            pipeline_ids = quart.request.args.getlist('pipelineId')
            start_time_str = quart.request.args.get('startTime')
            end_time_str = quart.request.args.get('endTime')
            bucket = quart.request.args.get('bucket', 'hour')
            if bucket not in ('hour', 'day'):
                bucket = 'hour'
            start_time = parse_iso_datetime(start_time_str)
            end_time = parse_iso_datetime(end_time_str)
            stats = await self.ap.monitoring_service.get_token_statistics(
                bot_ids=bot_ids if bot_ids else None,
                pipeline_ids=pipeline_ids if pipeline_ids else None,
                start_time=start_time,
                end_time=end_time,
                bucket=bucket,
            )
            return self.success(data=stats)
        @self.route('/messages', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
        async def get_messages() -> str:
            """Get message logs"""
--- a/src/langbot/pkg/api/http/service/monitoring.py
+++ b/src/langbot/pkg/api/http/service/monitoring.py
@@ -472,6 +472,179 @@ class MonitoringService:
            'active_sessions': active_sessions,
        }
    async def get_token_statistics(
        self,
        bot_ids: list[str] | None = None,
        pipeline_ids: list[str] | None = None,
        start_time: datetime.datetime | None = None,
        end_time: datetime.datetime | None = None,
        bucket: str = 'hour',
    ) -> dict:
        """Get detailed token usage statistics for production observability.
        Returns:
        - summary: aggregate token counters and call/latency stats over the window
        - by_model: per-model token + call breakdown (sorted by total tokens desc)
        - timeseries: token usage bucketed by `bucket` ('hour' or 'day')
        Only successful LLM calls are counted toward token totals; error calls are
        reported separately so a spike in failures is visible without polluting
        token accounting.
        """
        LLMCall = persistence_monitoring.MonitoringLLMCall
        conditions = []
        if bot_ids:
            conditions.append(LLMCall.bot_id.in_(bot_ids))
        if pipeline_ids:
            conditions.append(LLMCall.pipeline_id.in_(pipeline_ids))
        if start_time:
            conditions.append(LLMCall.timestamp >= start_time)
        if end_time:
            conditions.append(LLMCall.timestamp <= end_time)
        def _apply(query):
            if conditions:
                query = query.where(sqlalchemy.and_(*conditions))
            return query
        # ---- Summary aggregates ----
        summary_query = _apply(
            sqlalchemy.select(
                sqlalchemy.func.count(LLMCall.id),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.input_tokens), 0),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.output_tokens), 0),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.total_tokens), 0),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.duration), 0),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.cost), 0.0),
                sqlalchemy.func.sum(sqlalchemy.case((LLMCall.status == 'success', 1), else_=0)),
                sqlalchemy.func.sum(sqlalchemy.case((LLMCall.status == 'error', 1), else_=0)),
                # Count of successful calls that nonetheless recorded zero tokens —
                # a data-quality signal that usage reporting may be broken upstream.
                sqlalchemy.func.sum(
                    sqlalchemy.case(
                        (sqlalchemy.and_(LLMCall.status == 'success', LLMCall.total_tokens == 0), 1),
                        else_=0,
                    )
                ),
            )
        )
        summary_result = await self.ap.persistence_mgr.execute_async(summary_query)
        row = summary_result.first()
        (
            total_calls,
            total_input_tokens,
            total_output_tokens,
            total_tokens,
            total_duration,
            total_cost,
            success_calls,
            error_calls,
            zero_token_success_calls,
        ) = row if row else (0, 0, 0, 0, 0, 0.0, 0, 0, 0)
        total_calls = total_calls or 0
        success_calls = success_calls or 0
        error_calls = error_calls or 0
        zero_token_success_calls = zero_token_success_calls or 0
        summary = {
            'total_calls': total_calls,
            'success_calls': success_calls,
            'error_calls': error_calls,
            'total_input_tokens': int(total_input_tokens or 0),
            'total_output_tokens': int(total_output_tokens or 0),
            'total_tokens': int(total_tokens or 0),
            'total_cost': round(float(total_cost or 0.0), 6),
            'avg_tokens_per_call': int((total_tokens or 0) / total_calls) if total_calls > 0 else 0,
            'avg_duration_ms': int((total_duration or 0) / total_calls) if total_calls > 0 else 0,
            'avg_tokens_per_second': round((total_output_tokens or 0) / (total_duration / 1000), 2)
            if total_duration and total_duration > 0
            else 0,
            'zero_token_success_calls': zero_token_success_calls,
        }
        # ---- Per-model breakdown ----
        by_model_query = _apply(
            sqlalchemy.select(
                LLMCall.model_name,
                sqlalchemy.func.count(LLMCall.id),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.input_tokens), 0),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.output_tokens), 0),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.total_tokens), 0),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.duration), 0),
                sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.cost), 0.0),
                sqlalchemy.func.sum(sqlalchemy.case((LLMCall.status == 'error', 1), else_=0)),
            ).group_by(LLMCall.model_name)
        )
        by_model_result = await self.ap.persistence_mgr.execute_async(by_model_query)
        by_model = []
        for mrow in by_model_result.all():
            (
                model_name,
                m_calls,
                m_in,
                m_out,
                m_total,
                m_duration,
                m_cost,
                m_errors,
            ) = mrow
            m_calls = m_calls or 0
            by_model.append(
                {
                    'model_name': model_name,
                    'calls': m_calls,
                    'error_calls': m_errors or 0,
                    'input_tokens': int(m_in or 0),
                    'output_tokens': int(m_out or 0),
                    'total_tokens': int(m_total or 0),
                    'cost': round(float(m_cost or 0.0), 6),
                    'avg_tokens_per_call': int((m_total or 0) / m_calls) if m_calls > 0 else 0,
                    'avg_duration_ms': int((m_duration or 0) / m_calls) if m_calls > 0 else 0,
                }
            )
        by_model.sort(key=lambda x: x['total_tokens'], reverse=True)
        # ---- Time-bucketed series ----
        # Use a DB-agnostic bucketing approach: fetch (timestamp, tokens) rows and
        # aggregate in Python. The window is bounded by the time filter, so this is
        # cheap for typical dashboard ranges (hours/days).
        series_query = _apply(
            sqlalchemy.select(
                LLMCall.timestamp,
                LLMCall.input_tokens,
                LLMCall.output_tokens,
                LLMCall.total_tokens,
            ).order_by(LLMCall.timestamp.asc())
        )
        series_result = await self.ap.persistence_mgr.execute_async(series_query)
        bucket_fmt = '%Y-%m-%d %H:00' if bucket == 'hour' else '%Y-%m-%d'
        buckets: dict[str, dict] = {}
        for srow in series_result.all():
            ts, s_in, s_out, s_total = srow
            if ts is None:
                continue
            key = ts.strftime(bucket_fmt)
            b = buckets.setdefault(
                key,
                {'bucket': key, 'input_tokens': 0, 'output_tokens': 0, 'total_tokens': 0, 'calls': 0},
            )
            b['input_tokens'] += int(s_in or 0)
            b['output_tokens'] += int(s_out or 0)
            b['total_tokens'] += int(s_total or 0)
            b['calls'] += 1
        timeseries = [buckets[k] for k in sorted(buckets.keys())]
        return {
            'summary': summary,
            'by_model': by_model,
            'timeseries': timeseries,
            'bucket': bucket,
        }
    async def get_messages(
        self,
        bot_ids: list[str] | None = None,
--- a/src/langbot/pkg/core/bootutils/deps.py
+++ b/src/langbot/pkg/core/bootutils/deps.py
@@ -42,6 +42,7 @@ required_deps = {
    'telegramify_markdown': 'telegramify-markdown',
    'slack_sdk': 'slack_sdk',
    'asyncpg': 'asyncpg',
    'litellm': 'litellm',
 }
--- a/src/langbot/pkg/entity/persistence/model.py
+++ b/src/langbot/pkg/entity/persistence/model.py
@@ -31,6 +31,7 @@ class LLMModel(Base):
    name = sqlalchemy.Column(sqlalchemy.String(255), nullable=False)
    provider_uuid = sqlalchemy.Column(sqlalchemy.String(255), nullable=False)
    abilities = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default=[])
    context_length = sqlalchemy.Column(sqlalchemy.Integer, nullable=True)
    extra_args = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default={})
    prefered_ranking = sqlalchemy.Column(sqlalchemy.Integer, nullable=False, default=0)
    created_at = sqlalchemy.Column(sqlalchemy.DateTime, nullable=False, server_default=sqlalchemy.func.now())
--- a/src/langbot/pkg/persistence/alembic/versions/0004_add_llm_model_context_length.py
+++ b/src/langbot/pkg/persistence/alembic/versions/0004_add_llm_model_context_length.py
@@ -0,0 +1,30 @@
 """add llm model context length
 Revision ID: 0004_add_llm_model_context_length
 Revises: 0003_add_rerank_models
 Create Date: 2026-06-07
 """
 import sqlalchemy as sa
 from alembic import op
 revision = '0004_add_llm_model_context_length'
 down_revision = '0003_add_rerank_models'
 branch_labels = None
 depends_on = None
 def upgrade() -> None:
    conn = op.get_bind()
    inspector = sa.inspect(conn)
    columns = {column['name'] for column in inspector.get_columns('llm_models')}
    if 'context_length' not in columns:
        op.add_column('llm_models', sa.Column('context_length', sa.Integer(), nullable=True))
 def downgrade() -> None:
    conn = op.get_bind()
    inspector = sa.inspect(conn)
    columns = {column['name'] for column in inspector.get_columns('llm_models')}
    if 'context_length' in columns:
        op.drop_column('llm_models', 'context_length')
--- a/src/langbot/pkg/persistence/migrations/dbm026_llm_model_context_length.py
+++ b/src/langbot/pkg/persistence/migrations/dbm026_llm_model_context_length.py
@@ -0,0 +1,42 @@
 import sqlalchemy
 from .. import migration
@migration.migration_class(26)
 class DBMigrateLLMModelContextLength(migration.DBMigration):
    """Add context_length column to LLM models"""
    async def upgrade(self):
        columns = await self._get_columns('llm_models')
        if 'context_length' not in columns:
            await self.ap.persistence_mgr.execute_async(
                sqlalchemy.text('ALTER TABLE llm_models ADD COLUMN context_length INTEGER')
            )
    async def downgrade(self):
        columns = await self._get_columns('llm_models')
        if 'context_length' not in columns:
            return
        if self.ap.persistence_mgr.db.name == 'postgresql':
            await self.ap.persistence_mgr.execute_async(
                sqlalchemy.text('ALTER TABLE llm_models DROP COLUMN IF EXISTS context_length')
            )
        else:
            await self.ap.persistence_mgr.execute_async(
                sqlalchemy.text('ALTER TABLE llm_models DROP COLUMN context_length')
            )
    async def _get_columns(self, table_name: str) -> set[str]:
        if self.ap.persistence_mgr.db.name == 'postgresql':
            result = await self.ap.persistence_mgr.execute_async(
                sqlalchemy.text("""
                    SELECT column_name FROM information_schema.columns
                    WHERE table_name = :table_name
                """),
                {'table_name': table_name},
            )
            return {row[0] for row in result.fetchall()}
        result = await self.ap.persistence_mgr.execute_async(sqlalchemy.text(f'PRAGMA table_info({table_name})'))
        return {row[1] for row in result.fetchall()}
--- a/src/langbot/pkg/pipeline/preproc/preproc.py
+++ b/src/langbot/pkg/pipeline/preproc/preproc.py
@@ -109,7 +109,7 @@ class PreProcessor(stage.PipelineStage):
            if llm_model:
                query.use_llm_model_uuid = llm_model.model_entity.uuid
-                if llm_model.model_entity.abilities.__contains__('func_call'):
+                if 'func_call' in (llm_model.model_entity.abilities or []):
                    # Get bound plugins and MCP servers for filtering tools
                    bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
                    bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
@@ -159,11 +159,7 @@ class PreProcessor(stage.PipelineStage):
        # Check if this model supports vision, if not, remove all images
        # TODO this checking should be performed in runner, and in this stage, the image should be reserved
-        if (
+        if selected_runner == 'local-agent' and llm_model and 'vision' not in (llm_model.model_entity.abilities or []):
            selected_runner == 'local-agent'
            and llm_model
            and not llm_model.model_entity.abilities.__contains__('vision')
        ):
            for msg in query.messages:
                if isinstance(msg.content, list):
                    for me in msg.content:
@@ -181,7 +177,7 @@ class PreProcessor(stage.PipelineStage):
                plain_text += me.text
            elif isinstance(me, platform_message.Image):
                if selected_runner != 'local-agent' or (
-                    llm_model and llm_model.model_entity.abilities.__contains__('vision')
+                    llm_model and 'vision' in (llm_model.model_entity.abilities or [])
                ):
                    if me.base64 is not None:
                        content_list.append(provider_message.ContentElement.from_image_base64(me.base64))
@@ -202,7 +198,7 @@ class PreProcessor(stage.PipelineStage):
                        content_list.append(provider_message.ContentElement.from_text(msg.text))
                    elif isinstance(msg, platform_message.Image):
                        if selected_runner != 'local-agent' or (
-                            llm_model and llm_model.model_entity.abilities.__contains__('vision')
+                            llm_model and 'vision' in (llm_model.model_entity.abilities or [])
                        ):
                            if msg.base64 is not None:
                                content_list.append(provider_message.ContentElement.from_image_base64(msg.base64))
--- a/src/langbot/pkg/plugin/connector.py
+++ b/src/langbot/pkg/plugin/connector.py
@@ -459,7 +459,7 @@ class PluginRuntimeConnector(ManagedRuntimeConnector):
                                        )
                                    file_bytes = download_resp.content
-                                    self._extract_deps_metadata(file_bytes, task_context)
+                                    self._inspect_plugin_package(file_bytes, task_context)
                                    file_key = await self.handler.send_file(file_bytes, 'lbpkg')
                                    install_info['plugin_file_key'] = file_key
                                    self.ap.logger.info(f'Transfered file {file_key} to plugin runtime')
--- a/src/langbot/pkg/provider/modelmgr/modelmgr.py
+++ b/src/langbot/pkg/provider/modelmgr/modelmgr.py
@@ -37,11 +37,41 @@ class ModelManager:
        self.requester_components = []
        self.requester_dict = {}
    @staticmethod
    def _get_litellm_provider_from_manifest(component: engine.Component | None) -> str | None:
        if component is None:
            return None
        spec = getattr(component, 'spec', None) or {}
        litellm_provider = None
        if isinstance(spec, dict):
            litellm_provider = spec.get('litellm_provider')
        else:
            getter = getattr(spec, 'get', None)
            if callable(getter):
                try:
                    litellm_provider = getter('litellm_provider')
                except Exception:
                    litellm_provider = None
        if isinstance(litellm_provider, str) and litellm_provider:
            return litellm_provider
        return None
    async def initialize(self):
        self.requester_components = self.ap.discover.get_components_by_kind('LLMAPIRequester')
        requester_dict: dict[str, type[requester.ProviderAPIRequester]] = {}
        for component in self.requester_components:
            # Skip components that use litellm_provider (they will use litellmchat.py instead)
            litellm_provider = self._get_litellm_provider_from_manifest(component)
            if litellm_provider:
                self.ap.logger.debug(
                    f'Skipping Python class loading for {component.metadata.name} '
                    f'(uses litellm_provider={litellm_provider})'
                )
                continue
            requester_dict[component.metadata.name] = component.get_python_component_class()
        self.requester_dict = requester_dict
@@ -143,18 +173,24 @@ class ModelManager:
        # get the latest models from space
        space_models = await self.ap.space_service.get_models()
-        exists_llm_models_uuids = [m['uuid'] for m in await self.ap.llm_model_service.get_llm_models()]
+        # Index existing models by uuid. Space reuses a model's uuid across
-        exists_embedding_models_uuids = [
+        # renames / re-specs (e.g. the uuid that used to be ``claude-opus-4-6``
-            m['uuid'] for m in await self.ap.embedding_models_service.get_embedding_models()
+        # may later become ``claude-opus-4-7``). So for Space-managed models we
-        ]
+        # upsert: create when the uuid is new, otherwise update name/abilities/
        # ranking to track Space. Models owned by other providers are never
        # touched, even on an (unexpected) uuid collision.
        existing_llm_models = {m['uuid']: m for m in await self.ap.llm_model_service.get_llm_models()}
        existing_embedding_models = {
            m['uuid']: m for m in await self.ap.embedding_models_service.get_embedding_models()
        }
        created = 0
        updated = 0
        for space_model in space_models:
            if space_model.category == 'chat':
-                uuid = space_model.uuid
+                existing = existing_llm_models.get(space_model.uuid)
-
+                if existing is None:
                if uuid in exists_llm_models_uuids:
                    continue
                    # model will be automatically loaded
                    await self.ap.llm_model_service.create_llm_model(
                        {
@@ -168,13 +204,25 @@ class ModelManager:
                        preserve_uuid=True,
                        auto_set_to_default_pipeline=False,
                    )
                    created += 1
                elif existing.get('provider_uuid') == space_model_provider.uuid:
                    desired = {
                        'name': space_model.model_id,
                        'provider_uuid': space_model_provider.uuid,
                        'abilities': space_model.llm_abilities or [],
                        'prefered_ranking': space_model.featured_order,
                    }
                    if (
                        existing.get('name') != desired['name']
                        or list(existing.get('abilities') or []) != list(desired['abilities'])
                        or existing.get('prefered_ranking') != desired['prefered_ranking']
                    ):
                        await self.ap.llm_model_service.update_llm_model(space_model.uuid, dict(desired))
                        updated += 1
            elif space_model.category == 'embedding':
-                uuid = space_model.uuid
+                existing = existing_embedding_models.get(space_model.uuid)
-
+                if existing is None:
                if uuid in exists_embedding_models_uuids:
                    continue
                    # model will be automatically loaded
                    await self.ap.embedding_models_service.create_embedding_model(
                        {
@@ -186,6 +234,22 @@ class ModelManager:
                        },
                        preserve_uuid=True,
                    )
                    created += 1
                elif existing.get('provider_uuid') == space_model_provider.uuid:
                    desired = {
                        'name': space_model.model_id,
                        'provider_uuid': space_model_provider.uuid,
                        'prefered_ranking': space_model.featured_order,
                    }
                    if (
                        existing.get('name') != desired['name']
                        or existing.get('prefered_ranking') != desired['prefered_ranking']
                    ):
                        await self.ap.embedding_models_service.update_embedding_model(space_model.uuid, dict(desired))
                        updated += 1
        if created or updated:
            self.ap.logger.info(f'Synced models from LangBot Space: {created} added, {updated} updated.')
    async def init_temporary_runtime_llm_model(
        self,
@@ -202,6 +266,7 @@ class ModelManager:
                name=model_info.get('name', ''),
                provider_uuid='',
                abilities=model_info.get('abilities', []),
                context_length=model_info.get('context_length'),
                extra_args=model_info.get('extra_args', {}),
            ),
            provider=runtime_provider,
@@ -260,13 +325,37 @@ class ModelManager:
        else:
            provider_entity = provider_info
        # Get requester manifest to check for litellm_provider
        requester_manifest = self.get_available_requester_manifest_by_name(provider_entity.requester)
        litellm_provider = self._get_litellm_provider_from_manifest(requester_manifest)
        # Build config from base_url
        config = {'base_url': provider_entity.base_url}
        # Check if requester manifest specifies litellm_provider
        if litellm_provider:
            from .requesters import litellmchat
            # Use unified LiteLLMRequester with provider prefix
            # Map litellm_provider (YAML spec) to custom_llm_provider (config)
            config['custom_llm_provider'] = litellm_provider
            requester_inst = litellmchat.LiteLLMRequester(
                ap=self.ap,
                config=config,
            )
            self.ap.logger.debug(
                f'Using LiteLLMRequester for {provider_entity.requester} '
                f'with custom_llm_provider={config["custom_llm_provider"]}'
            )
        else:
            # Use original requester class (for backward compatibility)
            if provider_entity.requester not in self.requester_dict:
                raise provider_errors.RequesterNotFoundError(provider_entity.requester)
            requester_inst = self.requester_dict[provider_entity.requester](
                ap=self.ap,
-            config={'base_url': provider_entity.base_url},
+                config=config,
            )
        await requester_inst.initialize()
        token_mgr = token.TokenManager(name=provider_entity.uuid, tokens=provider_entity.api_keys or [])
@@ -372,6 +461,7 @@ class ModelManager:
            name=model_info.get('name', ''),
            provider_uuid=model_info.get('provider_uuid', ''),
            abilities=model_info.get('abilities', []),
            context_length=model_info.get('context_length'),
            extra_args=model_info.get('extra_args', {}),
        )
--- a/src/langbot/pkg/provider/modelmgr/requester.py
+++ b/src/langbot/pkg/provider/modelmgr/requester.py
@@ -67,8 +67,8 @@ class RuntimeProvider:
            if isinstance(result, tuple):
                msg, usage_info = result
                if usage_info:
-                    input_tokens = usage_info.get('input_tokens', 0)
+                    input_tokens = usage_info.get('prompt_tokens', 0)
-                    output_tokens = usage_info.get('output_tokens', 0)
+                    output_tokens = usage_info.get('completion_tokens', 0)
                return msg
            else:
                return result
@@ -128,7 +128,6 @@ class RuntimeProvider:
        start_time = time.time()
        status = 'success'
        error_message = None
        # Note: Stream doesn't easily provide token counts, set to 0
        input_tokens = 0
        output_tokens = 0
@@ -143,6 +142,15 @@ class RuntimeProvider:
                remove_think=remove_think,
            ):
                yield chunk
            # Extract usage from stream if available (stored by LiteLLM requester)
            if query:
                if query.variables is None:
                    query.variables = {}
                if '_stream_usage' in query.variables:
                    usage_info = query.variables['_stream_usage']
                    input_tokens = usage_info.get('prompt_tokens', 0)
                    output_tokens = usage_info.get('completion_tokens', 0)
                    del query.variables['_stream_usage']
        except Exception as e:
            status = 'error'
            error_message = str(e)
--- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
    """302.AI ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.302.ai/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 302.AI
  icon: 302ai.png
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
@@ -1,370 +0,0 @@
 from __future__ import annotations
 import typing
 import json
 import platform
 import socket
 import anthropic
 import httpx
 from .. import errors, requester
 from ....utils import image
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 class AnthropicMessages(requester.ProviderAPIRequester):
    """Anthropic Messages API 请求器"""
    client: anthropic.AsyncAnthropic
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.anthropic.com',
        'timeout': 120,
    }
    async def initialize(self):
        # 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题
        if platform.system() == 'Windows':
            if not hasattr(socket, 'TCP_KEEPINTVL'):
                socket.TCP_KEEPINTVL = 0
            if not hasattr(socket, 'TCP_KEEPCNT'):
                socket.TCP_KEEPCNT = 0
        httpx_client = anthropic._base_client.AsyncHttpxClientWrapper(
            base_url=self.requester_cfg['base_url'],
            # cast to a valid type because mypy doesn't understand our type narrowing
            timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']),
            limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS,
            follow_redirects=True,
            trust_env=True,
        )
        self.client = anthropic.AsyncAnthropic(
            api_key='',
            http_client=httpx_client,
            base_url=self.requester_cfg['base_url'],
        )
    async def invoke_llm(
        self,
        query: pipeline_query.Query,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.Message:
        self.client.api_key = model.provider.token_mgr.get_token()
        args = extra_args.copy()
        args['model'] = model.model_entity.name
        # 处理消息
        # system
        system_role_message = None
        for i, m in enumerate(messages):
            if m.role == 'system':
                system_role_message = m
                break
        if system_role_message:
            messages.pop(i)
        if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
            args['system'] = system_role_message.content
        req_messages = []
        for m in messages:
            if m.role == 'tool':
                tool_call_id = m.tool_call_id
                req_messages.append(
                    {
                        'role': 'user',
                        'content': [
                            {
                                'type': 'tool_result',
                                'tool_use_id': tool_call_id,
                                'is_error': False,
                                'content': [{'type': 'text', 'text': m.content}],
                            }
                        ],
                    }
                )
                continue
            msg_dict = m.dict(exclude_none=True)
            if isinstance(m.content, str) and m.content.strip() != '':
                msg_dict['content'] = [{'type': 'text', 'text': m.content}]
            elif isinstance(m.content, list):
                for i, ce in enumerate(m.content):
                    if ce.type == 'image_base64':
                        image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
                        alter_image_ele = {
                            'type': 'image',
                            'source': {
                                'type': 'base64',
                                'media_type': f'image/{image_format}',
                                'data': image_b64,
                            },
                        }
                        msg_dict['content'][i] = alter_image_ele
            if m.tool_calls:
                for tool_call in m.tool_calls:
                    msg_dict['content'].append(
                        {
                            'type': 'tool_use',
                            'id': tool_call.id,
                            'name': tool_call.function.name,
                            'input': json.loads(tool_call.function.arguments),
                        }
                    )
                del msg_dict['tool_calls']
            req_messages.append(msg_dict)
        args['messages'] = req_messages
        if 'thinking' in args:
            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
        if funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
            if tools:
                args['tools'] = tools
        try:
            resp = await self.client.messages.create(**args)
            args = {
                'content': '',
                'role': resp.role,
            }
            assert type(resp) is anthropic.types.message.Message
            for block in resp.content:
                if not remove_think and block.type == 'thinking':
                    args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
                elif block.type == 'text':
                    args['content'] += block.text
                elif block.type == 'tool_use':
                    assert type(block) is anthropic.types.tool_use_block.ToolUseBlock
                    tool_call = provider_message.ToolCall(
                        id=block.id,
                        type='function',
                        function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)),
                    )
                    if 'tool_calls' not in args:
                        args['tool_calls'] = []
                    args['tool_calls'].append(tool_call)
            return provider_message.Message(**args)
        except anthropic.AuthenticationError as e:
            raise errors.RequesterError(f'api-key 无效: {e.message}')
        except anthropic.BadRequestError as e:
            raise errors.RequesterError(str(e.message))
        except anthropic.NotFoundError as e:
            if 'model: ' in str(e):
                raise errors.RequesterError(f'模型无效: {e.message}')
            else:
                raise errors.RequesterError(f'请求地址无效: {e.message}')
    async def invoke_llm_stream(
        self,
        query: pipeline_query.Query,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.Message:
        self.client.api_key = model.provider.token_mgr.get_token()
        args = extra_args.copy()
        args['model'] = model.model_entity.name
        args['stream'] = True
        # 处理消息
        # system
        system_role_message = None
        for i, m in enumerate(messages):
            if m.role == 'system':
                system_role_message = m
                break
        if system_role_message:
            messages.pop(i)
        if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
            args['system'] = system_role_message.content
        req_messages = []
        for m in messages:
            if m.role == 'tool':
                tool_call_id = m.tool_call_id
                req_messages.append(
                    {
                        'role': 'user',
                        'content': [
                            {
                                'type': 'tool_result',
                                'tool_use_id': tool_call_id,
                                'is_error': False,  # 暂时直接写false
                                'content': [
                                    {'type': 'text', 'text': m.content}
                                ],  # 这里要是list包裹，应该是多个返回的情况？type类型好像也可以填其他的，暂时只写text
                            }
                        ],
                    }
                )
                continue
            msg_dict = m.dict(exclude_none=True)
            if isinstance(m.content, str) and m.content.strip() != '':
                msg_dict['content'] = [{'type': 'text', 'text': m.content}]
            elif isinstance(m.content, list):
                for i, ce in enumerate(m.content):
                    if ce.type == 'image_base64':
                        image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
                        alter_image_ele = {
                            'type': 'image',
                            'source': {
                                'type': 'base64',
                                'media_type': f'image/{image_format}',
                                'data': image_b64,
                            },
                        }
                        msg_dict['content'][i] = alter_image_ele
            if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
                msg_dict['content'] = []  # 这里不知道为什么会莫名有个空导致content为字符
            if m.tool_calls:
                for tool_call in m.tool_calls:
                    msg_dict['content'].append(
                        {
                            'type': 'tool_use',
                            'id': tool_call.id,
                            'name': tool_call.function.name,
                            'input': json.loads(tool_call.function.arguments),
                        }
                    )
                del msg_dict['tool_calls']
            req_messages.append(msg_dict)
        if 'thinking' in args:
            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
        args['messages'] = req_messages
        if funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
            if tools:
                args['tools'] = tools
        try:
            role = 'assistant'  # 默认角色
            # chunk_idx = 0
            think_started = False
            think_ended = False
            finish_reason = False
            tool_name = ''
            tool_id = ''
            async for chunk in await self.client.messages.create(**args):
                content = ''
                tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
                if isinstance(
                    chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
                ):  # 记录开始
                    if chunk.content_block.type == 'tool_use':
                        if chunk.content_block.name is not None:
                            tool_name = chunk.content_block.name
                        if chunk.content_block.id is not None:
                            tool_id = chunk.content_block.id
                        tool_call['function']['name'] = tool_name
                        tool_call['function']['arguments'] = ''
                        tool_call['id'] = tool_id
                    if not remove_think:
                        if chunk.content_block.type == 'thinking' and not remove_think:
                            think_started = True
                        elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
                            think_ended = True
                        continue
                elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
                    if chunk.delta.type == 'thinking_delta':
                        if think_started:
                            think_started = False
                            content = '<think>\n' + chunk.delta.thinking
                        elif remove_think:
                            continue
                        else:
                            content = chunk.delta.thinking
                    elif chunk.delta.type == 'text_delta':
                        if think_ended:
                            think_ended = False
                            content = '\n</think>\n' + chunk.delta.text
                        else:
                            content = chunk.delta.text
                    elif chunk.delta.type == 'input_json_delta':
                        tool_call['function']['arguments'] = chunk.delta.partial_json
                        tool_call['function']['name'] = tool_name
                        tool_call['id'] = tool_id
                elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
                    continue  # 记录raw_content_block结束的
                elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
                    if chunk.delta.stop_reason == 'end_turn':
                        finish_reason = True
                elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
                    continue  # 这个好像是完全结束
                else:
                    # print(chunk)
                    self.ap.logger.debug(f'anthropic chunk: {chunk}')
                    continue
                args = {
                    'content': content,
                    'role': role,
                    'is_final': finish_reason,
                    'tool_calls': None if tool_call['id'] is None else [tool_call],
                }
                # if chunk_idx == 0:
                #     chunk_idx += 1
                #     continue
                # assert type(chunk) is anthropic.types.message.Chunk
                yield provider_message.MessageChunk(**args)
            # return llm_entities.Message(**args)
        except anthropic.AuthenticationError as e:
            raise errors.RequesterError(f'api-key 无效: {e.message}')
        except anthropic.BadRequestError as e:
            raise errors.RequesterError(str(e.message))
        except anthropic.NotFoundError as e:
            if 'model: ' in str(e):
                raise errors.RequesterError(f'模型无效: {e.message}')
            else:
                raise errors.RequesterError(f'请求地址无效: {e.message}')
--- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Anthropic
  icon: anthropic.svg
 spec:
  litellm_provider: anthropic
  config:
  - name: base_url
    label:
@@ -24,6 +25,8 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
 execution:
  python:
--- a/src/langbot/pkg/provider/modelmgr/requesters/baidu.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/baidu.svg
@@ -0,0 +1,5 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#2932E1"/>
  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Baidu</text>
  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">ERNIE</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/baiduchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/baiduchatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: baidu-chat-completions
  label:
    en_US: Baidu ERNIE
    zh_Hans: 百度文心一言
  icon: baidu.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
@@ -1,242 +0,0 @@
 from __future__ import annotations
 import typing
 import dashscope
 import openai
 from . import modelscopechatcmpl
 from .. import requester
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
    """阿里云百炼大模型平台 ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
        'timeout': 120,
    }
    async def _closure_stream(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages.copy()
        is_use_dashscope_call = False  # 是否使用阿里原生库调用
        is_enable_multi_model = True  # 是否支持多轮对话
        use_time_num = 0  # 模型已调用次数，防止存在多文件时重复调用
        use_time_ids = []  # 已调用的ID列表
        message_id = 0  # 记录消息序号
        for msg in messages:
            # print(msg)
            if 'content' in msg and isinstance(msg['content'], list):
                for me in msg['content']:
                    if me['type'] == 'image_base64':
                        me['image_url'] = {'url': me['image_base64']}
                        me['type'] = 'image_url'
                        del me['image_base64']
                    elif me['type'] == 'file_url' and '.' in me.get('file_name', ''):
                        # 1. 视频文件推理
                        # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871
                        file_type = me.get('file_name').lower().split('.')[-1]
                        if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']:
                            me['type'] = 'video_url'
                            me['video_url'] = {'url': me['file_url']}
                            del me['file_url']
                            del me['file_name']
                            use_time_num += 1
                            use_time_ids.append(message_id)
                            is_enable_multi_model = False
                        # 2. 语音文件识别, 无法通过openai的audio字段传递，暂时不支持
                        # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031
                        elif file_type in [
                            'aac',
                            'amr',
                            'aiff',
                            'flac',
                            'm4a',
                            'mp3',
                            'mpeg',
                            'ogg',
                            'opus',
                            'wav',
                            'webm',
                            'wma',
                        ]:
                            me['audio'] = me['file_url']
                            me['type'] = 'audio'
                            del me['file_url']
                            del me['type']
                            del me['file_name']
                            is_use_dashscope_call = True
                            use_time_num += 1
                            use_time_ids.append(message_id)
                            is_enable_multi_model = False
            message_id += 1
        # 使用列表推导式，保留不在 use_time_ids[:-1] 中的元素，仅保留最后一个多媒体消息
        if not is_enable_multi_model and use_time_num > 1:
            messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]]
        if not is_enable_multi_model:
            messages = [msg for msg in messages if 'resp_message_id' not in msg]
        args['messages'] = messages
        args['stream'] = True
        # 流式处理状态
        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
        chunk_idx = 0
        thinking_started = False
        thinking_ended = False
        role = 'assistant'  # 默认角色
        if is_use_dashscope_call:
            response = dashscope.MultiModalConversation.call(
                # 若没有配置环境变量，请用百炼API Key将下行替换为：api_key = "sk-xxx"
                api_key=use_model.provider.token_mgr.get_token(),
                model=use_model.model_entity.name,
                messages=messages,
                result_format='message',
                asr_options={
                    # "language": "zh", # 可选，若已知音频的语种，可通过该参数指定待识别语种，以提升识别准确率
                    'enable_lid': True,
                    'enable_itn': False,
                },
                stream=True,
            )
            content_length_list = []
            previous_length = 0  # 记录上一次的内容长度
            for res in response:
                chunk = res['output']
                # 解析 chunk 数据
                if hasattr(chunk, 'choices') and chunk.choices:
                    choice = chunk.choices[0]
                    delta_content = choice['message'].content[0]['text']
                    finish_reason = choice['finish_reason']
                    content_length_list.append(len(delta_content))
                else:
                    delta_content = ''
                    finish_reason = None
                # 跳过空的第一个 chunk（只有 role 没有内容）
                if chunk_idx == 0 and not delta_content:
                    chunk_idx += 1
                    continue
                # 检查 content_length_list 是否有足够的数据
                if len(content_length_list) >= 2:
                    now_content = delta_content[previous_length : content_length_list[-1]]
                    previous_length = content_length_list[-1]  # 更新上一次的长度
                else:
                    now_content = delta_content  # 第一次循环时直接使用 delta_content
                    previous_length = len(delta_content)  # 更新上一次的长度
                # 构建 MessageChunk - 只包含增量内容
                chunk_data = {
                    'role': role,
                    'content': now_content if now_content else None,
                    'is_final': bool(finish_reason) and finish_reason != 'null',
                }
                # 移除 None 值
                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
                yield provider_message.MessageChunk(**chunk_data)
                chunk_idx += 1
        else:
            async for chunk in self._req_stream(args, extra_body=extra_args):
                # 解析 chunk 数据
                if hasattr(chunk, 'choices') and chunk.choices:
                    choice = chunk.choices[0]
                    delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
                    finish_reason = getattr(choice, 'finish_reason', None)
                else:
                    delta = {}
                    finish_reason = None
                # 从第一个 chunk 获取 role，后续使用这个 role
                if 'role' in delta and delta['role']:
                    role = delta['role']
                # 获取增量内容
                delta_content = delta.get('content', '')
                reasoning_content = delta.get('reasoning_content', '')
                # 处理 reasoning_content
                if reasoning_content:
                    # accumulated_reasoning += reasoning_content
                    # 如果设置了 remove_think，跳过 reasoning_content
                    if remove_think:
                        chunk_idx += 1
                        continue
                    # 第一次出现 reasoning_content，添加 <think> 开始标签
                    if not thinking_started:
                        thinking_started = True
                        delta_content = '<think>\n' + reasoning_content
                    else:
                        # 继续输出 reasoning_content
                        delta_content = reasoning_content
                elif thinking_started and not thinking_ended and delta_content:
                    # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
                    thinking_ended = True
                    delta_content = '\n</think>\n' + delta_content
                # 处理工具调用增量
                if delta.get('tool_calls'):
                    for tool_call in delta['tool_calls']:
                        if tool_call['id'] != '':
                            tool_id = tool_call['id']
                        if tool_call['function']['name'] is not None:
                            tool_name = tool_call['function']['name']
                        if tool_call['type'] is None:
                            tool_call['type'] = 'function'
                        tool_call['id'] = tool_id
                        tool_call['function']['name'] = tool_name
                        tool_call['function']['arguments'] = (
                            '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
                        )
                # 跳过空的第一个 chunk（只有 role 没有内容）
                if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
                    chunk_idx += 1
                    continue
                # 构建 MessageChunk - 只包含增量内容
                chunk_data = {
                    'role': role,
                    'content': delta_content if delta_content else None,
                    'tool_calls': delta.get('tool_calls'),
                    'is_final': bool(finish_reason),
                }
                # 移除 None 值
                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
                yield provider_message.MessageChunk(**chunk_data)
                chunk_idx += 1
                # return
--- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 阿里云百炼
  icon: bailian.png
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -24,6 +25,7 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: maas
 execution:
--- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
@@ -1,702 +0,0 @@
 from __future__ import annotations
 import asyncio
 import typing
 import openai
 import openai.types.chat.chat_completion as chat_completion_module
 import httpx
 from .. import errors, requester
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 class OpenAIChatCompletions(requester.ProviderAPIRequester):
    """OpenAI ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.openai.com/v1',
        'timeout': 120,
    }
    async def initialize(self):
        self.client = openai.AsyncClient(
            api_key=self.init_api_key,
            base_url=self.requester_cfg['base_url'].replace(' ', ''),
            timeout=self.requester_cfg['timeout'],
            http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
        )
    def _mask_api_key(self, api_key: str | None) -> str:
        if not api_key:
            return ''
        if len(api_key) <= 8:
            return '****'
        return f'{api_key[:4]}...{api_key[-4:]}'
    def _infer_model_type(self, model_id: str) -> str:
        normalized_model_id = (model_id or '').lower()
        embedding_keywords = (
            'embedding',
            'embed',
            'bge-',
            'e5-',
            'm3e',
            'gte-',
            'multilingual-e5',
            'text-embedding',
        )
        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
        normalized_model_id = (model_id or '').lower()
        abilities: set[str] = set()
        def _flatten(value: typing.Any) -> list[str]:
            if value is None:
                return []
            if isinstance(value, str):
                return [value.lower()]
            if isinstance(value, dict):
                flattened: list[str] = []
                for nested_value in value.values():
                    flattened.extend(_flatten(nested_value))
                return flattened
            if isinstance(value, (list, tuple, set)):
                flattened: list[str] = []
                for nested_value in value:
                    flattened.extend(_flatten(nested_value))
                return flattened
            return [str(value).lower()]
        capability_tokens = _flatten(item.get('capabilities'))
        capability_tokens.extend(_flatten(item.get('modalities')))
        capability_tokens.extend(_flatten(item.get('input_modalities')))
        capability_tokens.extend(_flatten(item.get('output_modalities')))
        capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
        capability_tokens.extend(_flatten(item.get('supported_parameters')))
        capability_tokens.extend(_flatten(item.get('architecture')))
        combined_tokens = capability_tokens + [normalized_model_id]
        vision_keywords = (
            'vision',
            'image',
            'file',
            'video',
            'multimodal',
            'vl',
            'ocr',
            'omni',
        )
        function_call_keywords = (
            'function',
            'tool',
            'tools',
            'tool_choice',
            'tool_call',
            'tool-use',
            'tool_use',
        )
        if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
            abilities.add('vision')
        if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
            abilities.add('func_call')
        return sorted(abilities)
    def _normalize_modalities(self, value: typing.Any) -> list[str]:
        normalized: list[str] = []
        def _collect(item: typing.Any):
            if item is None:
                return
            if isinstance(item, str):
                for part in item.replace('->', ',').replace('+', ',').split(','):
                    token = part.strip().lower()
                    if token and token not in normalized:
                        normalized.append(token)
                return
            if isinstance(item, dict):
                for nested in item.values():
                    _collect(nested)
                return
            if isinstance(item, (list, tuple, set)):
                for nested in item:
                    _collect(nested)
                return
        _collect(value)
        return normalized
    def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
        display_name = item.get('name')
        if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
            display_name = ''
        description = item.get('description')
        if not isinstance(description, str) or not description.strip():
            description = ''
        context_length = item.get('context_length')
        if context_length is None and isinstance(item.get('top_provider'), dict):
            context_length = item['top_provider'].get('context_length')
        if not isinstance(context_length, int):
            try:
                context_length = int(context_length) if context_length is not None else None
            except (TypeError, ValueError):
                context_length = None
        input_modalities = self._normalize_modalities(item.get('input_modalities'))
        output_modalities = self._normalize_modalities(item.get('output_modalities'))
        if isinstance(item.get('architecture'), dict):
            if not input_modalities:
                input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
            if not output_modalities:
                output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
        owned_by = item.get('owned_by')
        if not isinstance(owned_by, str) or not owned_by.strip():
            owned_by = ''
        return {
            'display_name': display_name or None,
            'description': description or None,
            'context_length': context_length,
            'owned_by': owned_by or None,
            'input_modalities': input_modalities,
            'output_modalities': output_modalities,
        }
    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
        headers = {}
        if api_key:
            headers['Authorization'] = f'Bearer {api_key}'
        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
            response = await client.get(models_url, headers=headers)
            response.raise_for_status()
            payload = response.json()
        models = []
        for item in payload.get('data', []):
            model_id = item.get('id')
            if not model_id:
                continue
            models.append(
                {
                    'id': model_id,
                    'name': model_id,
                    'type': self._infer_model_type(model_id),
                    'abilities': self._infer_model_abilities(item, model_id),
                    **self._extract_scan_metadata(item, model_id),
                }
            )
        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
        return {
            'models': models,
            'debug': {
                'request': {
                    'method': 'GET',
                    'url': models_url,
                    'headers': {
                        'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
                    },
                },
                'response': payload,
            },
        }
    async def _req(
        self,
        args: dict,
        extra_body: dict = {},
    ) -> chat_completion_module.ChatCompletion:
        return await self.client.chat.completions.create(**args, extra_body=extra_body)
    async def _req_stream(
        self,
        args: dict,
        extra_body: dict = {},
    ):
        async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
            yield chunk
    async def _make_msg(
        self,
        chat_completion: chat_completion_module.ChatCompletion,
        remove_think: bool = False,
    ) -> provider_message.Message:
        if not isinstance(chat_completion, chat_completion_module.ChatCompletion):
            raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}')
        chatcmpl_message = chat_completion.choices[0].message.model_dump()
        # 确保 role 字段存在且不为 None
        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
            chatcmpl_message['role'] = 'assistant'
        # 处理思维链
        content = chatcmpl_message.get('content', '')
        reasoning_content = chatcmpl_message.get('reasoning_content', None)
        processed_content, _ = await self._process_thinking_content(
            content=content, reasoning_content=reasoning_content, remove_think=remove_think
        )
        chatcmpl_message['content'] = processed_content
        # 移除 reasoning_content 字段，避免传递给 Message
        if 'reasoning_content' in chatcmpl_message:
            del chatcmpl_message['reasoning_content']
        message = provider_message.Message(**chatcmpl_message)
        return message
    async def _process_thinking_content(
        self,
        content: str,
        reasoning_content: str = None,
        remove_think: bool = False,
    ) -> tuple[str, str]:
        """处理思维链内容
        Args:
            content: 原始内容
            reasoning_content: reasoning_content 字段内容
            remove_think: 是否移除思维链
        Returns:
            (处理后的内容, 提取的思维链内容)
        """
        thinking_content = ''
        # 1. 从 reasoning_content 提取思维链
        if reasoning_content:
            thinking_content = reasoning_content
        # 2. 从 content 中提取 <think> 标签内容
        if content and '<think>' in content and '</think>' in content:
            import re
            think_pattern = r'<think>(.*?)</think>'
            think_matches = re.findall(think_pattern, content, re.DOTALL)
            if think_matches:
                # 如果已有 reasoning_content，则追加
                if thinking_content:
                    thinking_content += '\n' + '\n'.join(think_matches)
                else:
                    thinking_content = '\n'.join(think_matches)
                # 移除 content 中的 <think> 标签
                content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
        # 3. 根据 remove_think 参数决定是否保留思维链
        if remove_think:
            return content, ''
        else:
            # 如果有思维链内容，将其以 <think> 格式添加到 content 开头
            if thinking_content:
                content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
            return content, thinking_content
    async def _closure_stream(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.MessageChunk:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages.copy()
        # 检查vision
        for msg in messages:
            if 'content' in msg and isinstance(msg['content'], list):
                for me in msg['content']:
                    if me['type'] == 'image_base64':
                        me['image_url'] = {'url': me['image_base64']}
                        me['type'] = 'image_url'
                        del me['image_base64']
        args['messages'] = messages
        args['stream'] = True
        # 流式处理状态
        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
        chunk_idx = 0
        thinking_started = False
        thinking_ended = False
        role = 'assistant'  # 默认角色
        tool_id = ''
        tool_name = ''
        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
        async for chunk in self._req_stream(args, extra_body=extra_args):
            # 解析 chunk 数据
            if hasattr(chunk, 'choices') and chunk.choices:
                choice = chunk.choices[0]
                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
                finish_reason = getattr(choice, 'finish_reason', None)
            else:
                delta = {}
                finish_reason = None
            # 从第一个 chunk 获取 role，后续使用这个 role
            if 'role' in delta and delta['role']:
                role = delta['role']
            # 获取增量内容
            delta_content = delta.get('content', '')
            reasoning_content = delta.get('reasoning_content', '')
            # 处理 reasoning_content
            if reasoning_content:
                # accumulated_reasoning += reasoning_content
                # 如果设置了 remove_think，跳过 reasoning_content
                if remove_think:
                    chunk_idx += 1
                    continue
                # 第一次出现 reasoning_content，添加 <think> 开始标签
                if not thinking_started:
                    thinking_started = True
                    delta_content = '<think>\n' + reasoning_content
                else:
                    # 继续输出 reasoning_content
                    delta_content = reasoning_content
            elif thinking_started and not thinking_ended and delta_content:
                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
                thinking_ended = True
                delta_content = '\n</think>\n' + delta_content
            # 处理 content 中已有的 <think> 标签（如果需要移除）
            # if delta_content and remove_think and '<think>' in delta_content:
            #     import re
            #
            #     # 移除 <think> 标签及其内容
            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
            # 处理工具调用增量
            # delta_tool_calls = None
            if delta.get('tool_calls'):
                for tool_call in delta['tool_calls']:
                    if tool_call['id'] and tool_call['function']['name']:
                        tool_id = tool_call['id']
                        tool_name = tool_call['function']['name']
                    else:
                        tool_call['id'] = tool_id
                        tool_call['function']['name'] = tool_name
                    if tool_call['type'] is None:
                        tool_call['type'] = 'function'
            # 跳过空的第一个 chunk（只有 role 没有内容）
            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
                chunk_idx += 1
                continue
            # 构建 MessageChunk - 只包含增量内容
            chunk_data = {
                'role': role,
                'content': delta_content if delta_content else None,
                'tool_calls': delta.get('tool_calls'),
                'is_final': bool(finish_reason),
            }
            # 移除 None 值
            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
            yield provider_message.MessageChunk(**chunk_data)
            chunk_idx += 1
    async def _closure(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> tuple[provider_message.Message, dict]:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages.copy()
        # 检查vision
        for msg in messages:
            if 'content' in msg and isinstance(msg['content'], list):
                for me in msg['content']:
                    if me['type'] == 'image_base64':
                        me['image_url'] = {'url': me['image_base64']}
                        me['type'] = 'image_url'
                        del me['image_base64']
        args['messages'] = messages
        # 发送请求
        resp = await self._req(args, extra_body=extra_args)
        # 处理请求结果
        message = await self._make_msg(resp, remove_think)
        # Extract token usage from response
        usage_info = {}
        if hasattr(resp, 'usage') and resp.usage:
            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
            usage_info['total_tokens'] = resp.usage.total_tokens or 0
        return message, usage_info
    async def invoke_llm(
        self,
        query: pipeline_query.Query,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> tuple[provider_message.Message, dict]:
        """Invoke LLM and return message with usage info"""
        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
        for m in messages:
            msg_dict = m.dict(exclude_none=True)
            content = msg_dict.get('content')
            if isinstance(content, list):
                # 检查 content 列表中是否每个部分都是文本
                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
                    # 将所有文本部分合并为一个字符串
                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
            req_messages.append(msg_dict)
        try:
            msg, usage_info = await self._closure(
                query=query,
                req_messages=req_messages,
                use_model=model,
                use_funcs=funcs,
                extra_args=extra_args,
                remove_think=remove_think,
            )
            return msg, usage_info
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
        except openai.BadRequestError as e:
            error_message = str(e.message) if hasattr(e, 'message') else str(e)
            if 'context_length_exceeded' in str(e):
                raise errors.RequesterError(f'上文过长，请重置会话: {error_message}')
            else:
                raise errors.RequesterError(f'请求参数错误: {error_message}')
        except openai.AuthenticationError as e:
            error_message = str(e.message) if hasattr(e, 'message') else str(e)
            raise errors.RequesterError(f'无效的 api-key: {error_message}')
        except openai.NotFoundError as e:
            error_message = str(e.message) if hasattr(e, 'message') else str(e)
            raise errors.RequesterError(f'请求路径错误: {error_message}')
        except openai.RateLimitError as e:
            error_message = str(e.message) if hasattr(e, 'message') else str(e)
            raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
        except openai.APIConnectionError as e:
            error_message = f'连接错误: {str(e)}'
            raise errors.RequesterError(error_message)
        except openai.APIError as e:
            error_message = str(e.message) if hasattr(e, 'message') else str(e)
            raise errors.RequesterError(f'请求错误: {error_message}')
    async def invoke_embedding(
        self,
        model: requester.RuntimeEmbeddingModel,
        input_text: list[str],
        extra_args: dict[str, typing.Any] = {},
    ) -> tuple[list[list[float]], dict]:
        """调用 Embedding API, returns (embeddings, usage_info)"""
        self.client.api_key = model.provider.token_mgr.get_token()
        args = {
            'model': model.model_entity.name,
            'input': input_text,
        }
        if model.model_entity.extra_args:
            args.update(model.model_entity.extra_args)
        args.update(extra_args)
        try:
            resp = await self.client.embeddings.create(**args)
            # Extract usage info
            usage_info = {}
            if hasattr(resp, 'usage') and resp.usage:
                usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
                usage_info['total_tokens'] = resp.usage.total_tokens or 0
            return [d.embedding for d in resp.data], usage_info
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
        except openai.BadRequestError as e:
            raise errors.RequesterError(f'请求参数错误: {e.message}')
    async def invoke_llm_stream(
        self,
        query: pipeline_query.Query,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.MessageChunk:
        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
        for m in messages:
            msg_dict = m.dict(exclude_none=True)
            content = msg_dict.get('content')
            if isinstance(content, list):
                # 检查 content 列表中是否每个部分都是文本
                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
                    # 将所有文本部分合并为一个字符串
                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
            req_messages.append(msg_dict)
        try:
            async for item in self._closure_stream(
                query=query,
                req_messages=req_messages,
                use_model=model,
                use_funcs=funcs,
                extra_args=extra_args,
                remove_think=remove_think,
            ):
                yield item
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
        except openai.BadRequestError as e:
            if 'context_length_exceeded' in e.message:
                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
            else:
                raise errors.RequesterError(f'请求参数错误: {e.message}')
        except openai.AuthenticationError as e:
            raise errors.RequesterError(f'无效的 api-key: {e.message}')
        except openai.NotFoundError as e:
            raise errors.RequesterError(f'请求路径错误: {e.message}')
        except openai.RateLimitError as e:
            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
        except openai.APIError as e:
            raise errors.RequesterError(f'请求错误: {e.message}')
    async def invoke_rerank(
        self,
        model: requester.RuntimeRerankModel,
        query: str,
        documents: typing.List[str],
        extra_args: dict[str, typing.Any] = {},
    ) -> typing.List[dict]:
        """Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible)
        Supports extra_args from model.extra_args:
        - rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks")
        - rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank")
        - Any other fields are merged into the request payload.
        """
        api_key = model.provider.token_mgr.get_token()
        base_url = self.requester_cfg.get('base_url', '').rstrip('/')
        timeout = self.requester_cfg.get('timeout', 120)
        merged_args = {}
        if model.model_entity.extra_args:
            merged_args.update(model.model_entity.extra_args)
        if extra_args:
            merged_args.update(extra_args)
        rerank_url = merged_args.pop('rerank_url', None)
        rerank_path = merged_args.pop('rerank_path', 'rerank')
        if not rerank_url:
            rerank_url = f'{base_url}/{rerank_path}'
        headers = {
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {api_key}',
        }
        payload = {
            'model': model.model_entity.name,
            'query': query,
            'documents': documents[:64],
            'top_n': min(len(documents), 64),
        }
        if merged_args:
            payload.update(merged_args)
        try:
            async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
                resp = await client.post(rerank_url, headers=headers, json=payload)
                resp.raise_for_status()
                data = resp.json()
            results = self._parse_rerank_response(data)
            if results:
                scores = [r.get('relevance_score', 0.0) for r in results]
                min_score = min(scores)
                max_score = max(scores)
                if max_score - min_score > 1e-6:
                    for r in results:
                        r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
            return results
        except httpx.HTTPStatusError as e:
            raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}')
        except httpx.TimeoutException:
            raise errors.RequesterError('Rerank request timed out')
        except Exception as e:
            raise errors.RequesterError(f'Rerank request error: {str(e)}')
    @staticmethod
    def _parse_rerank_response(data: dict) -> typing.List[dict]:
        """Parse rerank response from various providers.
        Handles:
        - Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]}
        - Voyage AI: {"data": [{"index", "relevance_score"}]}
        - DashScope: {"output": {"results": [{"index", "relevance_score"}]}}
        """
        if 'results' in data:
            return data['results']
        if 'data' in data:
            return data['data']
        if 'output' in data and isinstance(data['output'], dict):
            return data['output'].get('results', [])
        return []
--- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: OpenAI
  icon: openai.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Cohere
  icon: cohere.svg
 spec:
  litellm_provider: cohere
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
    """CompShare ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.modelverse.cn/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 优云智算
  icon: compshare.png
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -24,6 +25,8 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: maas
 execution:
  python:
--- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
@@ -1,67 +0,0 @@
 from __future__ import annotations
 import typing
 from . import chatcmpl
 from .. import errors, requester
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
    """Deepseek ChatCompletion API 请求器"""
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.deepseek.com',
        'timeout': 120,
    }
    async def _closure(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> tuple[provider_message.Message, dict]:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages
        # deepseek 不支持多模态，把content都转换成纯文字
        for m in messages:
            if 'content' in m and isinstance(m['content'], list):
                m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c])
        args['messages'] = messages
        # 发送请求
        resp = await self._req(args, extra_body=extra_args)
        # print(resp)
        if resp is None:
            raise errors.RequesterError('接口返回为空，请确定模型提供商服务是否正常')
        # 处理请求结果
        message = await self._make_msg(resp, remove_think)
        # Extract token usage from response
        usage_info = {}
        if hasattr(resp, 'usage') and resp.usage:
            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
            usage_info['total_tokens'] = resp.usage.total_tokens or 0
        return message, usage_info
--- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: DeepSeek
  icon: deepseek.svg
 spec:
  litellm_provider: deepseek
  config:
  - name: base_url
    label:
@@ -24,6 +25,8 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
 execution:
  python:
--- a/src/langbot/pkg/provider/modelmgr/requesters/doubao.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/doubao.svg
@@ -0,0 +1,4 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#3B82F6"/>
  <text x="30" y="32" font-family="Arial, sans-serif" font-size="12" font-weight="bold" fill="white" text-anchor="middle">豆包</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/doubaochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/doubaochatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: doubao-chat-completions
  label:
    en_US: ByteDance Doubao
    zh_Hans: 字节豆包
  icon: doubao.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://ark.cn-beijing.volces.com/api/v3
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
@@ -1,205 +0,0 @@
 from __future__ import annotations
 import typing
 import httpx
 from . import chatcmpl
 import uuid
 from .. import requester
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
    """Google Gemini API 请求器"""
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
        'timeout': 120,
    }
    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
        models_url = 'https://generativelanguage.googleapis.com/v1beta/models'
        params = {'key': api_key} if api_key else {}
        all_models: list[dict[str, typing.Any]] = []
        next_page_token = ''
        last_payload: dict[str, typing.Any] = {}
        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
            while True:
                request_params = dict(params)
                if next_page_token:
                    request_params['pageToken'] = next_page_token
                response = await client.get(models_url, params=request_params)
                response.raise_for_status()
                payload = response.json()
                last_payload = payload
                for item in payload.get('models', []):
                    model_name = item.get('name', '')
                    model_id = model_name.replace('models/', '', 1)
                    if not model_id:
                        continue
                    supported_methods = item.get('supportedGenerationMethods', []) or []
                    if 'embedContent' in supported_methods and 'generateContent' not in supported_methods:
                        model_type = 'embedding'
                    else:
                        model_type = 'llm'
                    all_models.append(
                        {
                            'id': model_id,
                            'name': model_id,
                            'type': model_type,
                            'abilities': self._infer_model_abilities(item, model_id),
                            'display_name': item.get('displayName') or None,
                            'description': item.get('description') or None,
                            'context_length': item.get('inputTokenLimit'),
                            'input_modalities': self._normalize_modalities(item.get('inputModalities')),
                            'output_modalities': self._normalize_modalities(item.get('outputModalities')),
                        }
                    )
                next_page_token = payload.get('nextPageToken', '')
                if not next_page_token:
                    break
        all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
        return {
            'models': all_models,
            'debug': {
                'request': {
                    'method': 'GET',
                    'url': models_url,
                    'query': {'key': self._mask_api_key(api_key)} if api_key else {},
                },
                'response': last_payload,
            },
        }
    async def _closure_stream(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.MessageChunk:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages.copy()
        # 检查vision
        for msg in messages:
            if 'content' in msg and isinstance(msg['content'], list):
                for me in msg['content']:
                    if me['type'] == 'image_base64':
                        me['image_url'] = {'url': me['image_base64']}
                        me['type'] = 'image_url'
                        del me['image_base64']
        args['messages'] = messages
        args['stream'] = True
        # 流式处理状态
        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
        chunk_idx = 0
        thinking_started = False
        thinking_ended = False
        role = 'assistant'  # 默认角色
        tool_id = ''
        tool_name = ''
        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
        async for chunk in self._req_stream(args, extra_body=extra_args):
            # 解析 chunk 数据
            if hasattr(chunk, 'choices') and chunk.choices:
                choice = chunk.choices[0]
                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
                finish_reason = getattr(choice, 'finish_reason', None)
            else:
                delta = {}
                finish_reason = None
            # 从第一个 chunk 获取 role，后续使用这个 role
            if 'role' in delta and delta['role']:
                role = delta['role']
            # 获取增量内容
            delta_content = delta.get('content', '')
            reasoning_content = delta.get('reasoning_content', '')
            # 处理 reasoning_content
            if reasoning_content:
                # accumulated_reasoning += reasoning_content
                # 如果设置了 remove_think，跳过 reasoning_content
                if remove_think:
                    chunk_idx += 1
                    continue
                # 第一次出现 reasoning_content，添加 <think> 开始标签
                if not thinking_started:
                    thinking_started = True
                    delta_content = '<think>\n' + reasoning_content
                else:
                    # 继续输出 reasoning_content
                    delta_content = reasoning_content
            elif thinking_started and not thinking_ended and delta_content:
                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
                thinking_ended = True
                delta_content = '\n</think>\n' + delta_content
            # 处理 content 中已有的 <think> 标签（如果需要移除）
            # if delta_content and remove_think and '<think>' in delta_content:
            #     import re
            #
            #     # 移除 <think> 标签及其内容
            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
            # 处理工具调用增量
            # delta_tool_calls = None
            if delta.get('tool_calls'):
                for tool_call in delta['tool_calls']:
                    if tool_call['id'] == '' and tool_id == '':
                        tool_id = str(uuid.uuid4())
                    if tool_call['function']['name']:
                        tool_name = tool_call['function']['name']
                    tool_call['id'] = tool_id
                    tool_call['function']['name'] = tool_name
                    if tool_call['type'] is None:
                        tool_call['type'] = 'function'
            # 跳过空的第一个 chunk（只有 role 没有内容）
            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
                chunk_idx += 1
                continue
            # 构建 MessageChunk - 只包含增量内容
            chunk_data = {
                'role': role,
                'content': delta_content if delta_content else None,
                'tool_calls': delta.get('tool_calls'),
                'is_final': bool(finish_reason),
            }
            # 移除 None 值
            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
            yield provider_message.MessageChunk(**chunk_data)
            chunk_idx += 1
--- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Google Gemini
  icon: gemini.svg
 spec:
  litellm_provider: gemini
  config:
  - name: base_url
    label:
@@ -24,6 +25,8 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
 execution:
  python:
--- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
@@ -1,15 +0,0 @@
 from __future__ import annotations
 import typing
 from . import ppiochatcmpl
 class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
    """Gitee AI ChatCompletions API 请求器"""
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://ai.gitee.com/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Gitee AI
  icon: giteeai.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/groq.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/groq.svg
@@ -0,0 +1,4 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#F97316"/>
  <text x="30" y="32" font-family="Arial, sans-serif" font-size="14" font-weight="bold" fill="white" text-anchor="middle">Groq</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/groqchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/groqchatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: groq-chat-completions
  label:
    en_US: Groq
    zh_Hans: Groq
  icon: groq.svg
 spec:
  litellm_provider: groq
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://api.groq.com/openai/v1
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/iflytek.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/iflytek.svg
@@ -0,0 +1,5 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#0066FF"/>
  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">iFlytek</text>
  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Spark</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/iflytekchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/iflytekchatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: iflytek-chat-completions
  label:
    en_US: iFlytek Spark
    zh_Hans: 讯飞星火
  icon: iflytek.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://spark-api-open.xf-yun.com/v1
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
@@ -1,208 +0,0 @@
 from __future__ import annotations
 import openai
 import typing
 from . import chatcmpl
 from .. import requester
 import openai.types.chat.chat_completion as chat_completion
 import re
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 class JieKouAIChatCompletions(chatcmpl.OpenAIChatCompletions):
    """接口 AI ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.jiekou.ai/openai',
        'timeout': 120,
    }
    is_think: bool = False
    async def _make_msg(
        self,
        chat_completion: chat_completion.ChatCompletion,
        remove_think: bool,
    ) -> provider_message.Message:
        chatcmpl_message = chat_completion.choices[0].message.model_dump()
        # print(chatcmpl_message.keys(), chatcmpl_message.values())
        # 确保 role 字段存在且不为 None
        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
            chatcmpl_message['role'] = 'assistant'
        reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
        # deepseek的reasoner模型
        chatcmpl_message['content'] = await self._process_thinking_content(
            chatcmpl_message['content'], reasoning_content, remove_think
        )
        # 移除 reasoning_content 字段，避免传递给 Message
        if 'reasoning_content' in chatcmpl_message:
            del chatcmpl_message['reasoning_content']
        message = provider_message.Message(**chatcmpl_message)
        return message
    async def _process_thinking_content(
        self,
        content: str,
        reasoning_content: str = None,
        remove_think: bool = False,
    ) -> tuple[str, str]:
        """处理思维链内容
        Args:
            content: 原始内容
            reasoning_content: reasoning_content 字段内容
            remove_think: 是否移除思维链
        Returns:
            处理后的内容
        """
        if remove_think:
            content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
        else:
            if reasoning_content is not None:
                content = '<think>\n' + reasoning_content + '\n</think>\n' + content
        return content
    async def _make_msg_chunk(
        self,
        delta: dict[str, typing.Any],
        idx: int,
    ) -> provider_message.MessageChunk:
        # 处理流式chunk和完整响应的差异
        # print(chat_completion.choices[0])
        # 确保 role 字段存在且不为 None
        if 'role' not in delta or delta['role'] is None:
            delta['role'] = 'assistant'
        reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
        delta['content'] = '' if delta['content'] is None else delta['content']
        # print(reasoning_content)
        # deepseek的reasoner模型
        if reasoning_content is not None:
            delta['content'] += reasoning_content
        message = provider_message.MessageChunk(**delta)
        return message
    async def _closure_stream(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages.copy()
        # 检查vision
        for msg in messages:
            if 'content' in msg and isinstance(msg['content'], list):
                for me in msg['content']:
                    if me['type'] == 'image_base64':
                        me['image_url'] = {'url': me['image_base64']}
                        me['type'] = 'image_url'
                        del me['image_base64']
        args['messages'] = messages
        args['stream'] = True
        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
        chunk_idx = 0
        thinking_started = False
        thinking_ended = False
        role = 'assistant'  # 默认角色
        async for chunk in self._req_stream(args, extra_body=extra_args):
            # 解析 chunk 数据
            if hasattr(chunk, 'choices') and chunk.choices:
                choice = chunk.choices[0]
                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
                finish_reason = getattr(choice, 'finish_reason', None)
            else:
                delta = {}
                finish_reason = None
            # 从第一个 chunk 获取 role，后续使用这个 role
            if 'role' in delta and delta['role']:
                role = delta['role']
            # 获取增量内容
            delta_content = delta.get('content', '')
            # reasoning_content = delta.get('reasoning_content', '')
            if remove_think:
                if delta['content'] is not None:
                    if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
                        thinking_started = True
                        continue
                    elif delta['content'] == r'</think>' and not thinking_ended:
                        thinking_ended = True
                        continue
                    elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
                        thinking_started = False
                        continue
                    elif thinking_started and not thinking_ended:
                        continue
            # delta_tool_calls = None
            if delta.get('tool_calls'):
                for tool_call in delta['tool_calls']:
                    if tool_call['id'] and tool_call['function']['name']:
                        tool_id = tool_call['id']
                        tool_name = tool_call['function']['name']
                    if tool_call['id'] is None:
                        tool_call['id'] = tool_id
                    if tool_call['function']['name'] is None:
                        tool_call['function']['name'] = tool_name
                    if tool_call['function']['arguments'] is None:
                        tool_call['function']['arguments'] = ''
                    if tool_call['type'] is None:
                        tool_call['type'] = 'function'
            # 跳过空的第一个 chunk（只有 role 没有内容）
            if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
                chunk_idx += 1
                continue
            # 构建 MessageChunk - 只包含增量内容
            chunk_data = {
                'role': role,
                'content': delta_content if delta_content else None,
                'tool_calls': delta.get('tool_calls'),
                'is_final': bool(finish_reason),
            }
            # 移除 None 值
            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
            yield provider_message.MessageChunk(**chunk_data)
            chunk_idx += 1
--- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 接口 AI
  icon: jiekouai.png
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Jina
  icon: jina.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
@@ -0,0 +1,644 @@
 """LiteLLM unified requester for chat, embedding, and rerank."""
 from __future__ import annotations
 import typing
 import litellm
 from litellm import acompletion, aembedding, arerank
 from .. import errors, requester
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 class LiteLLMRequester(requester.ProviderAPIRequester):
    """LiteLLM unified API requester supporting chat, embedding, and rerank."""
    _EMBEDDING_MODEL_HINTS = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding')
    _RERANK_MODEL_HINTS = ('rerank', 're-rank', 're_rank')
    default_config: dict[str, typing.Any] = {
        'base_url': '',
        'timeout': 120,
        'custom_llm_provider': '',
        'drop_params': False,
        'num_retries': 0,
        'api_version': '',
    }
    async def initialize(self):
        """Initialize LiteLLM client settings."""
        # LiteLLM doesn't require explicit client initialization
        # Configuration is passed per-request via litellm params
        pass
    def _build_litellm_model_name(self, model_name: str, custom_llm_provider: str | None = None) -> str:
        """Build LiteLLM model name with provider prefix if needed."""
        provider = custom_llm_provider or self.requester_cfg.get('custom_llm_provider', '')
        if provider:
            # LiteLLM format: provider/model_name
            if model_name.startswith(f'{provider}/'):
                return model_name
            return f'{provider}/{model_name}'
        # If no custom provider, assume model_name already includes prefix or is OpenAI-compatible
        return model_name
    def _get_custom_llm_provider(self) -> str | None:
        return self.requester_cfg.get('custom_llm_provider') or None
    def _safe_litellm_bool_helper(self, helper_name: str, model_name: str) -> bool:
        """Call a LiteLLM boolean capability helper without letting metadata gaps fail requests."""
        helper = getattr(litellm, helper_name, None)
        if not callable(helper):
            return False
        provider = self._get_custom_llm_provider()
        candidates: list[tuple[str, str | None]] = [(model_name, provider)]
        litellm_model_name = self._build_litellm_model_name(model_name)
        if litellm_model_name != model_name:
            candidates.append((litellm_model_name, None))
        for metadata_provider in self._metadata_provider_candidates(model_name):
            candidates.append((f'{metadata_provider}/{model_name}', None))
        tried_candidates: set[tuple[str, str | None]] = set()
        for candidate_model, candidate_provider in candidates:
            candidate_key = (candidate_model, candidate_provider)
            if candidate_key in tried_candidates:
                continue
            tried_candidates.add(candidate_key)
            try:
                if bool(helper(model=candidate_model, custom_llm_provider=candidate_provider)):
                    return True
            except Exception:
                continue
        return False
    def _context_length_from_scan_payload(self, model_payload: dict[str, typing.Any] | None) -> int | None:
        if not model_payload:
            return None
        for field_name in ('context_length', 'context_window', 'max_context_length'):
            value = model_payload.get(field_name)
            if isinstance(value, bool):
                continue
            if isinstance(value, int) and value > 0:
                return value
            if isinstance(value, str) and value.isdigit():
                parsed_value = int(value)
                if parsed_value > 0:
                    return parsed_value
        return None
    def _metadata_provider_candidates(self, model_name: str) -> list[str]:
        normalized_model_name = (model_name or '').lower()
        candidates = []
        if normalized_model_name.startswith(('moonshot-', 'kimi-')):
            candidates.append('moonshot')
        if normalized_model_name.startswith('deepseek-'):
            candidates.append('deepseek')
        base_url = self.requester_cfg.get('base_url', '').lower()
        if 'moonshot' in base_url:
            candidates.append('moonshot')
        if 'deepseek' in base_url:
            candidates.append('deepseek')
        deduped_candidates = []
        for candidate in candidates:
            if candidate not in deduped_candidates:
                deduped_candidates.append(candidate)
        return deduped_candidates
    def _known_context_length_fallback(self, model_name: str) -> int | None:
        normalized_model_name = (model_name or '').lower()
        if normalized_model_name.startswith('deepseek-v4-'):
            return 1_000_000
        if normalized_model_name.startswith(('kimi-k2.5', 'kimi-k2.6')):
            return 256 * 1024
        if normalized_model_name.startswith('moonshot-v1-8k'):
            return 8 * 1024
        if normalized_model_name.startswith('moonshot-v1-32k'):
            return 32 * 1024
        if normalized_model_name.startswith('moonshot-v1-128k') or normalized_model_name == 'moonshot-v1-auto':
            return 128 * 1024
        return None
    def _safe_context_length(self, model_name: str) -> int | None:
        helper = getattr(litellm, 'get_max_tokens', None)
        if not callable(helper):
            return self._known_context_length_fallback(model_name)
        candidates = [model_name]
        litellm_model_name = self._build_litellm_model_name(model_name)
        if litellm_model_name != model_name:
            candidates.append(litellm_model_name)
        for provider in self._metadata_provider_candidates(model_name):
            candidates.append(f'{provider}/{model_name}')
        tried_candidates = []
        for candidate in candidates:
            if candidate in tried_candidates:
                continue
            tried_candidates.append(candidate)
            try:
                max_tokens = helper(candidate)
            except Exception:
                continue
            if isinstance(max_tokens, int) and max_tokens > 0:
                return max_tokens
        return self._known_context_length_fallback(model_name)
    def _supports_function_calling(self, model_name: str) -> bool:
        return self._safe_litellm_bool_helper('supports_function_calling', model_name)
    def _supports_vision(self, model_name: str) -> bool:
        return self._safe_litellm_bool_helper('supports_vision', model_name)
    def _infer_model_type(self, model_id: str) -> str:
        normalized_id = (model_id or '').lower()
        if any(kw in normalized_id for kw in self._RERANK_MODEL_HINTS):
            return 'rerank'
        if any(kw in normalized_id for kw in self._EMBEDDING_MODEL_HINTS):
            return 'embedding'
        return 'llm'
    def _enrich_scanned_model(
        self,
        model_id: str,
        model_payload: dict[str, typing.Any] | None = None,
    ) -> dict[str, typing.Any]:
        model_type = self._infer_model_type(model_id)
        scanned_model: dict[str, typing.Any] = {
            'id': model_id,
            'name': model_id,
            'type': model_type,
        }
        if model_type == 'llm':
            abilities = []
            if self._supports_function_calling(model_id):
                abilities.append('func_call')
            supports_provider_reported_vision = bool(
                model_payload
                and (model_payload.get('supports_image_in') is True or model_payload.get('supports_vision') is True)
            )
            if supports_provider_reported_vision or self._supports_vision(model_id):
                abilities.append('vision')
            scanned_model['abilities'] = abilities
            context_length = self._context_length_from_scan_payload(model_payload)
            if context_length is None:
                context_length = self._safe_context_length(model_id)
            if context_length is not None:
                scanned_model['context_length'] = context_length
        return scanned_model
    def _convert_messages(self, messages: typing.List[provider_message.Message]) -> list[dict]:
        """Convert LangBot messages to LiteLLM/OpenAI format."""
        req_messages = []
        for m in messages:
            msg_dict = m.dict(exclude_none=True)
            content = msg_dict.get('content')
            if isinstance(content, list):
                for part in content:
                    if isinstance(part, dict) and part.get('type') == 'image_base64':
                        part['image_url'] = {'url': part['image_base64']}
                        part['type'] = 'image_url'
                        del part['image_base64']
            req_messages.append(msg_dict)
        return req_messages
    def _process_thinking_content(self, content: str, reasoning_content: str | None, remove_think: bool) -> str:
        """Process thinking/reasoning content.
        Args:
            content: The main content from response
            reasoning_content: Separate reasoning content from model
            remove_think: If True, remove thinking markers; if False, preserve them
        Returns:
            Processed content string
        """
        # Extract and handle thinking tags
        if content and 'CRETIRE_REASONING_BEGINk' in content and 'CRETIRE_REASONING_ENDk' in content:
            import re
            think_pattern = r'CRETIRE_REASONING_BEGINk(.*?)CRETIRE_REASONING_ENDk'
            if remove_think:
                # Remove thinking tags and their content from output
                content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
            # else: preserve thinking content as-is
        # Handle separate reasoning_content field
        # Currently we don't include reasoning_content in user-facing output regardless of remove_think
        # because it's typically internal model reasoning, not user-visible thinking
        return content or ''
    @staticmethod
    def _normalize_usage(usage: typing.Any) -> dict:
        """Normalize a LiteLLM/OpenAI usage object into a plain token dict.
        Handles several real-world shapes returned by different upstreams:
        - object with ``prompt_tokens`` / ``completion_tokens`` / ``total_tokens`` attrs
        - dict with the same keys
        - missing ``total_tokens`` (derived from prompt + completion)
        - ``None`` / partially-populated usage (defaults to 0)
        """
        if usage is None:
            return {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}
        def _get(key: str) -> typing.Any:
            if isinstance(usage, dict):
                return usage.get(key)
            return getattr(usage, key, None)
        prompt_tokens = _get('prompt_tokens') or 0
        completion_tokens = _get('completion_tokens') or 0
        total_tokens = _get('total_tokens') or 0
        # Some providers omit total_tokens in streaming usage; derive it.
        if not total_tokens:
            total_tokens = prompt_tokens + completion_tokens
        return {
            'prompt_tokens': int(prompt_tokens),
            'completion_tokens': int(completion_tokens),
            'total_tokens': int(total_tokens),
        }
    def _extract_usage(self, response) -> dict:
        """Extract usage info from a non-streaming LiteLLM response."""
        return self._normalize_usage(getattr(response, 'usage', None))
    @staticmethod
    def _as_dict(value: typing.Any) -> dict:
        if value is None:
            return {}
        if isinstance(value, dict):
            return value
        if hasattr(value, 'model_dump'):
            return value.model_dump()
        return {}
    def _normalize_stream_tool_calls(
        self,
        raw_tool_calls: typing.Any,
        tool_call_state: dict[int, dict[str, str]],
    ) -> list[dict] | None:
        """Fill OpenAI-style streaming tool-call deltas so MessageChunk can validate them."""
        if not raw_tool_calls:
            return None
        normalized = []
        for fallback_index, raw_tool_call in enumerate(raw_tool_calls):
            tool_call = self._as_dict(raw_tool_call)
            index = tool_call.get('index')
            if not isinstance(index, int):
                index = fallback_index
            state = tool_call_state.setdefault(index, {'id': '', 'type': 'function', 'name': ''})
            if tool_call.get('id'):
                state['id'] = tool_call['id']
            if tool_call.get('type'):
                state['type'] = tool_call['type']
            function = self._as_dict(tool_call.get('function'))
            if function.get('name'):
                state['name'] = function['name']
            arguments = function.get('arguments')
            if arguments is None:
                arguments = ''
            elif not isinstance(arguments, str):
                arguments = str(arguments)
            if not state['id'] or not state['name']:
                continue
            normalized.append(
                {
                    'id': state['id'],
                    'type': state['type'] or 'function',
                    'function': {
                        'name': state['name'],
                        'arguments': arguments,
                    },
                }
            )
        return normalized or None
    def _build_common_args(self, args: dict, include_retry_params: bool = True) -> dict:
        """Apply common requester config to args dict."""
        if self.requester_cfg.get('base_url'):
            args['api_base'] = self.requester_cfg['base_url']
        if self.requester_cfg.get('timeout'):
            args['timeout'] = self.requester_cfg['timeout']
        if include_retry_params:
            if self.requester_cfg.get('drop_params'):
                args['drop_params'] = self.requester_cfg['drop_params']
            if self.requester_cfg.get('num_retries'):
                args['num_retries'] = self.requester_cfg['num_retries']
            if self.requester_cfg.get('api_version'):
                args['api_version'] = self.requester_cfg['api_version']
        return args
    def _handle_litellm_error(self, e: Exception) -> None:
        """Convert LiteLLM exceptions to RequesterError. Never returns, always raises."""
        # Check more specific exceptions first (they inherit from base exceptions)
        if isinstance(e, litellm.ContextWindowExceededError):
            raise errors.RequesterError(f'上下文长度超限: {str(e)}')
        if isinstance(e, litellm.BadRequestError):
            raise errors.RequesterError(f'请求参数错误: {str(e)}')
        if isinstance(e, litellm.AuthenticationError):
            raise errors.RequesterError(f'API key 无效: {str(e)}')
        if isinstance(e, litellm.NotFoundError):
            raise errors.RequesterError(f'模型或路径无效: {str(e)}')
        if isinstance(e, litellm.RateLimitError):
            raise errors.RequesterError(f'请求过于频繁或余额不足: {str(e)}')
        if isinstance(e, litellm.Timeout):
            raise errors.RequesterError(f'请求超时: {str(e)}')
        if isinstance(e, litellm.APIConnectionError):
            raise errors.RequesterError(f'连接错误: {str(e)}')
        if isinstance(e, litellm.APIError):
            raise errors.RequesterError(f'API 错误: {str(e)}')
        raise errors.RequesterError(f'未知错误: {str(e)}')
    async def _build_completion_args(
        self,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        stream: bool = False,
    ) -> dict:
        """Build common completion arguments for invoke_llm and invoke_llm_stream."""
        req_messages = self._convert_messages(messages)
        model_name = self._build_litellm_model_name(model.model_entity.name)
        api_key = model.provider.token_mgr.get_token()
        args = {
            'model': model_name,
            'messages': req_messages,
            'api_key': api_key,
        }
        if stream:
            args['stream'] = True
            args['stream_options'] = {'include_usage': True}
        self._build_common_args(args)
        # Apply model-level extra_args first, then call-level extra_args
        if model.model_entity.extra_args:
            args.update(model.model_entity.extra_args)
        args.update(extra_args)
        if funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs)
            if tools:
                args['tools'] = tools
                args.setdefault('tool_choice', 'auto')
        return args
    async def invoke_llm(
        self,
        query: pipeline_query.Query,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> tuple[provider_message.Message, dict]:
        """Invoke LLM and return message with usage info."""
        args = await self._build_completion_args(model, messages, funcs, extra_args, stream=False)
        try:
            response = await acompletion(**args)
            message_data = response.choices[0].message.model_dump()
            if 'role' not in message_data or message_data['role'] is None:
                message_data['role'] = 'assistant'
            content = message_data.get('content', '')
            reasoning_content = message_data.get('reasoning_content', None)
            message_data['content'] = self._process_thinking_content(content, reasoning_content, remove_think)
            if 'reasoning_content' in message_data:
                del message_data['reasoning_content']
            message = provider_message.Message(**message_data)
            usage_info = self._extract_usage(response)
            return message, usage_info
        except Exception as e:
            self._handle_litellm_error(e)
    async def invoke_llm_stream(
        self,
        query: pipeline_query.Query,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.MessageChunk:
        """Invoke LLM streaming and yield chunks."""
        args = await self._build_completion_args(model, messages, funcs, extra_args, stream=True)
        chunk_idx = 0
        role = 'assistant'
        tool_call_state: dict[int, dict[str, str]] = {}
        try:
            response = await acompletion(**args)
            async for chunk in response:
                # Capture usage whenever a chunk carries it.
                #
                # Important: many OpenAI-compatible gateways (e.g. new-api) and
                # providers send the final usage payload in a chunk that STILL
                # contains a (empty-delta) choice, not an empty `choices` list.
                # The previous implementation only captured usage when `choices`
                # was empty, so streamed calls always recorded 0 tokens.
                # We therefore capture usage independently of `choices`, and then
                # fall through to also process any content this chunk may carry.
                if getattr(chunk, 'usage', None):
                    usage_info = self._normalize_usage(chunk.usage)
                    if query is not None:
                        if query.variables is None:
                            query.variables = {}
                        query.variables['_stream_usage'] = usage_info
                if not hasattr(chunk, 'choices') or not chunk.choices:
                    continue
                choice = chunk.choices[0]
                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
                finish_reason = getattr(choice, 'finish_reason', None)
                if 'role' in delta and delta['role']:
                    role = delta['role']
                delta_content = delta.get('content', '')
                reasoning_content = delta.get('reasoning_content', '')
                # Handle reasoning_content based on remove_think flag
                if reasoning_content:
                    if remove_think:
                        # Skip reasoning content when remove_think is True
                        chunk_idx += 1
                        continue
                    else:
                        # Use reasoning_content as the displayed content
                        delta_content = reasoning_content
                tool_calls = self._normalize_stream_tool_calls(delta.get('tool_calls'), tool_call_state)
                if chunk_idx == 0 and not delta_content and not tool_calls:
                    chunk_idx += 1
                    continue
                chunk_data = {
                    'role': role,
                    'content': delta_content if delta_content else None,
                    'tool_calls': tool_calls,
                    'is_final': bool(finish_reason),
                }
                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
                yield provider_message.MessageChunk(**chunk_data)
                chunk_idx += 1
        except Exception as e:
            self._handle_litellm_error(e)
    async def invoke_embedding(
        self,
        model: requester.RuntimeEmbeddingModel,
        input_text: list[str],
        extra_args: dict[str, typing.Any] = {},
    ) -> tuple[list[list[float]], dict]:
        """Invoke embedding and return vectors with usage info."""
        model_name = self._build_litellm_model_name(model.model_entity.name)
        api_key = model.provider.token_mgr.get_token()
        args = {
            'model': model_name,
            'input': input_text,
            'api_key': api_key,
        }
        self._build_common_args(args, include_retry_params=False)
        if model.model_entity.extra_args:
            args.update(model.model_entity.extra_args)
        args.update(extra_args)
        try:
            response = await aembedding(**args)
            embeddings = [d.embedding for d in response.data]
            usage_info = self._extract_usage(response)
            return embeddings, usage_info
        except Exception as e:
            self._handle_litellm_error(e)
    async def invoke_rerank(
        self,
        model: requester.RuntimeRerankModel,
        query: str,
        documents: typing.List[str],
        extra_args: dict[str, typing.Any] = {},
    ) -> typing.List[dict]:
        """Invoke rerank and return relevance scores."""
        model_name = self._build_litellm_model_name(model.model_entity.name)
        api_key = model.provider.token_mgr.get_token()
        args = {
            'model': model_name,
            'query': query,
            'documents': documents,
            'api_key': api_key,
            'top_n': min(len(documents), 64),
        }
        self._build_common_args(args, include_retry_params=False)
        if model.model_entity.extra_args:
            args.update(model.model_entity.extra_args)
        args.update(extra_args)
        try:
            response = await arerank(**args)
            results = []
            for r in response.results:
                results.append(
                    {
                        'index': r.get('index', 0),
                        'relevance_score': r.get('relevance_score', 0.0),
                    }
                )
            if results:
                scores = [r['relevance_score'] for r in results]
                min_score = min(scores)
                max_score = max(scores)
                if max_score - min_score > 1e-6:
                    for r in results:
                        r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
            return results
        except Exception as e:
            self._handle_litellm_error(e)
    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
        """Scan models supported by the provider."""
        import httpx
        base_url = self.requester_cfg.get('base_url', '').rstrip('/')
        timeout = self.requester_cfg.get('timeout', 120)
        if not base_url:
            raise errors.RequesterError('Base URL required for model scanning')
        headers = {}
        if api_key:
            headers['Authorization'] = f'Bearer {api_key}'
        models_url = f'{base_url}/models'
        try:
            async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
                response = await client.get(models_url, headers=headers)
                response.raise_for_status()
                payload = response.json()
            models = []
            for item in payload.get('data', []):
                model_id = item.get('id')
                if not model_id:
                    continue
                models.append(self._enrich_scanned_model(model_id, item))
            models.sort(key=lambda x: (x['type'] != 'llm', x['name'].lower()))
            return {'models': models}
        except httpx.HTTPStatusError as e:
            raise errors.RequesterError(f'Model scan failed: {e.response.status_code}')
        except httpx.TimeoutException:
            raise errors.RequesterError('Model scan timeout')
        except Exception as e:
            raise errors.RequesterError(f'Model scan error: {str(e)}')
--- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.yaml
@@ -0,0 +1,64 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: litellm-chat
  label:
    en_US: LiteLLM (Unified)
    zh_Hans: LiteLLM (统一请求器)
  icon: litellm.svg
 spec:
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: false
    default: ''
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  - name: custom_llm_provider
    label:
      en_US: Custom Provider
      zh_Hans: 自定义 Provider
    type: string
    required: false
    default: ''
    description:
      en_US: Force provider type (e.g., anthropic, openai, gemini)
      zh_Hans: 强制指定 provider 类型（如 anthropic, openai, gemini）
  - name: drop_params
    label:
      en_US: Drop Unsupported Params
      zh_Hans: 丢弃不支持参数
    type: boolean
    required: false
    default: false
  - name: num_retries
    label:
      en_US: Number of Retries
      zh_Hans: 重试次数
    type: integer
    required: false
    default: 0
  - name: api_version
    label:
      en_US: API Version
      zh_Hans: API 版本
    type: string
    required: false
    default: ''
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: unified
 execution:
  python:
    path: ./litellmchat.py
    attr: LiteLLMRequester
--- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class LmStudioChatCompletions(chatcmpl.OpenAIChatCompletions):
    """LMStudio ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'http://127.0.0.1:1234/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: LM Studio
  icon: lmstudio.webp
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/mimo.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/mimo.svg
@@ -0,0 +1,4 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#FF6700"/>
  <text x="30" y="32" font-family="Arial, sans-serif" font-size="18" font-weight="bold" fill="white" text-anchor="middle">MiMo</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/mimochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/mimochatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: mimo-chat-completions
  label:
    en_US: Xiaomi MiMo
    zh_Hans: 小米 MiMo
  icon: mimo.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://api.xiaomimimo.com/v1
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/minimax.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/minimax.svg
@@ -0,0 +1,4 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#4F46E5"/>
  <text x="30" y="32" font-family="Arial, sans-serif" font-size="12" font-weight="bold" fill="white" text-anchor="middle">MiniMax</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/minimaxchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/minimaxchatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: minimax-chat-completions
  label:
    en_US: MiniMax
    zh_Hans: MiniMax
  icon: minimax.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://api.minimax.chat/v1
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/mistral.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/mistral.svg
@@ -0,0 +1,5 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#FF6B35"/>
  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Mistral</text>
  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">AI</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/mistralchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/mistralchatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: mistral-chat-completions
  label:
    en_US: Mistral AI
    zh_Hans: Mistral AI
  icon: mistral.svg
 spec:
  litellm_provider: mistral
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://api.mistral.ai/v1
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
@@ -1,561 +0,0 @@
 from __future__ import annotations
 import asyncio
 import typing
 import openai
 import openai.types.chat.chat_completion as chat_completion
 import httpx
 from .. import entities, errors, requester
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 class ModelScopeChatCompletions(requester.ProviderAPIRequester):
    """ModelScope ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api-inference.modelscope.cn/v1',
        'timeout': 120,
    }
    async def initialize(self):
        self.client = openai.AsyncClient(
            api_key=self.init_api_key,
            base_url=self.requester_cfg['base_url'],
            timeout=self.requester_cfg['timeout'],
            http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
        )
    def _mask_api_key(self, api_key: str | None) -> str:
        if not api_key:
            return ''
        if len(api_key) <= 8:
            return '****'
        return f'{api_key[:4]}...{api_key[-4:]}'
    def _infer_model_type(self, model_id: str) -> str:
        normalized_model_id = (model_id or '').lower()
        embedding_keywords = (
            'embedding',
            'embed',
            'bge-',
            'e5-',
            'm3e',
            'gte-',
            'multilingual-e5',
            'text-embedding',
        )
        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
        normalized_model_id = (model_id or '').lower()
        abilities: set[str] = set()
        def _flatten(value: typing.Any) -> list[str]:
            if value is None:
                return []
            if isinstance(value, str):
                return [value.lower()]
            if isinstance(value, dict):
                flattened: list[str] = []
                for nested_value in value.values():
                    flattened.extend(_flatten(nested_value))
                return flattened
            if isinstance(value, (list, tuple, set)):
                flattened: list[str] = []
                for nested_value in value:
                    flattened.extend(_flatten(nested_value))
                return flattened
            return [str(value).lower()]
        capability_tokens = _flatten(item.get('capabilities'))
        capability_tokens.extend(_flatten(item.get('modalities')))
        capability_tokens.extend(_flatten(item.get('input_modalities')))
        capability_tokens.extend(_flatten(item.get('output_modalities')))
        capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
        capability_tokens.extend(_flatten(item.get('supported_parameters')))
        capability_tokens.extend(_flatten(item.get('architecture')))
        combined_tokens = capability_tokens + [normalized_model_id]
        vision_keywords = ('vision', 'image', 'file', 'video', 'multimodal', 'vl', 'ocr', 'omni')
        function_call_keywords = ('function', 'tool', 'tools', 'tool_choice', 'tool_call', 'tool-use', 'tool_use')
        if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
            abilities.add('vision')
        if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
            abilities.add('func_call')
        return sorted(abilities)
    def _normalize_modalities(self, value: typing.Any) -> list[str]:
        normalized: list[str] = []
        def _collect(item: typing.Any):
            if item is None:
                return
            if isinstance(item, str):
                for part in item.replace('->', ',').replace('+', ',').split(','):
                    token = part.strip().lower()
                    if token and token not in normalized:
                        normalized.append(token)
                return
            if isinstance(item, dict):
                for nested in item.values():
                    _collect(nested)
                return
            if isinstance(item, (list, tuple, set)):
                for nested in item:
                    _collect(nested)
                return
        _collect(value)
        return normalized
    def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
        display_name = item.get('name')
        if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
            display_name = ''
        description = item.get('description')
        if not isinstance(description, str) or not description.strip():
            description = ''
        context_length = item.get('context_length')
        if context_length is None and isinstance(item.get('top_provider'), dict):
            context_length = item['top_provider'].get('context_length')
        if not isinstance(context_length, int):
            try:
                context_length = int(context_length) if context_length is not None else None
            except (TypeError, ValueError):
                context_length = None
        input_modalities = self._normalize_modalities(item.get('input_modalities'))
        output_modalities = self._normalize_modalities(item.get('output_modalities'))
        if isinstance(item.get('architecture'), dict):
            if not input_modalities:
                input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
            if not output_modalities:
                output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
        owned_by = item.get('owned_by')
        if not isinstance(owned_by, str) or not owned_by.strip():
            owned_by = ''
        return {
            'display_name': display_name or None,
            'description': description or None,
            'context_length': context_length,
            'owned_by': owned_by or None,
            'input_modalities': input_modalities,
            'output_modalities': output_modalities,
        }
    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
        headers = {}
        if api_key:
            headers['Authorization'] = f'Bearer {api_key}'
        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
            response = await client.get(models_url, headers=headers)
            response.raise_for_status()
            payload = response.json()
        models = []
        for item in payload.get('data', []):
            model_id = item.get('id')
            if not model_id:
                continue
            models.append(
                {
                    'id': model_id,
                    'name': model_id,
                    'type': self._infer_model_type(model_id),
                    'abilities': self._infer_model_abilities(item, model_id),
                    **self._extract_scan_metadata(item, model_id),
                }
            )
        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
        return {
            'models': models,
            'debug': {
                'request': {
                    'method': 'GET',
                    'url': models_url,
                    'headers': {
                        'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
                    },
                },
                'response': payload,
            },
        }
    async def _req(
        self,
        query: pipeline_query.Query,
        args: dict,
        extra_body: dict = {},
        remove_think: bool = False,
    ) -> list[dict[str, typing.Any]]:
        args['stream'] = True
        chunk = None
        pending_content = ''
        tool_calls = []
        resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)
        chunk_idx = 0
        thinking_started = False
        thinking_ended = False
        tool_id = ''
        tool_name = ''
        message_delta = {}
        async for chunk in resp_gen:
            if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
                continue
            delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
            reasoning_content = delta.get('reasoning_content')
            # 处理 reasoning_content
            if reasoning_content:
                # accumulated_reasoning += reasoning_content
                # 如果设置了 remove_think，跳过 reasoning_content
                if remove_think:
                    chunk_idx += 1
                    continue
                # 第一次出现 reasoning_content，添加 <think> 开始标签
                if not thinking_started:
                    thinking_started = True
                    pending_content += '<think>\n' + reasoning_content
                else:
                    # 继续输出 reasoning_content
                    pending_content += reasoning_content
            elif thinking_started and not thinking_ended and delta.get('content'):
                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
                thinking_ended = True
                pending_content += '\n</think>\n' + delta.get('content')
            if delta.get('content') is not None:
                pending_content += delta.get('content')
            if delta.get('tool_calls') is not None:
                for tool_call in delta.get('tool_calls'):
                    if tool_call['id'] != '':
                        tool_id = tool_call['id']
                    if tool_call['function']['name'] is not None:
                        tool_name = tool_call['function']['name']
                    if tool_call['function']['arguments'] is None:
                        continue
                    tool_call['id'] = tool_id
                    tool_call['name'] = tool_name
                    for tc in tool_calls:
                        if tc['index'] == tool_call['index']:
                            tc['function']['arguments'] += tool_call['function']['arguments']
                            break
                    else:
                        tool_calls.append(tool_call)
            if chunk.choices[0].finish_reason is not None:
                break
        message_delta['content'] = pending_content
        message_delta['role'] = 'assistant'
        message_delta['tool_calls'] = tool_calls if tool_calls else None
        return [message_delta]
    async def _make_msg(
        self,
        chat_completion: list[dict[str, typing.Any]],
    ) -> provider_message.Message:
        chatcmpl_message = chat_completion[0]
        # 确保 role 字段存在且不为 None
        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
            chatcmpl_message['role'] = 'assistant'
        message = provider_message.Message(**chatcmpl_message)
        return message
    async def _closure(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> tuple[provider_message.Message, dict]:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages.copy()
        # 检查vision
        for msg in messages:
            if 'content' in msg and isinstance(msg['content'], list):
                for me in msg['content']:
                    if me['type'] == 'image_base64':
                        me['image_url'] = {'url': me['image_base64']}
                        me['type'] = 'image_url'
                        del me['image_base64']
        args['messages'] = messages
        # 发送请求
        resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)
        # 处理请求结果
        message = await self._make_msg(resp)
        # ModelScope uses streaming, usage info not available
        usage_info = {}
        return message, usage_info
    async def _req_stream(
        self,
        args: dict,
        extra_body: dict = {},
    ) -> chat_completion.ChatCompletion:
        async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
            yield chunk
    async def _closure_stream(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages.copy()
        # 检查vision
        for msg in messages:
            if 'content' in msg and isinstance(msg['content'], list):
                for me in msg['content']:
                    if me['type'] == 'image_base64':
                        me['image_url'] = {'url': me['image_base64']}
                        me['type'] = 'image_url'
                        del me['image_base64']
        args['messages'] = messages
        args['stream'] = True
        # 流式处理状态
        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
        chunk_idx = 0
        thinking_started = False
        thinking_ended = False
        role = 'assistant'  # 默认角色
        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
        async for chunk in self._req_stream(args, extra_body=extra_args):
            # 解析 chunk 数据
            if hasattr(chunk, 'choices') and chunk.choices:
                choice = chunk.choices[0]
                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
                finish_reason = getattr(choice, 'finish_reason', None)
            else:
                delta = {}
                finish_reason = None
            # 从第一个 chunk 获取 role，后续使用这个 role
            if 'role' in delta and delta['role']:
                role = delta['role']
            # 获取增量内容
            delta_content = delta.get('content', '')
            reasoning_content = delta.get('reasoning_content', '')
            # 处理 reasoning_content
            if reasoning_content:
                # accumulated_reasoning += reasoning_content
                # 如果设置了 remove_think，跳过 reasoning_content
                if remove_think:
                    chunk_idx += 1
                    continue
                # 第一次出现 reasoning_content，添加 <think> 开始标签
                if not thinking_started:
                    thinking_started = True
                    delta_content = '<think>\n' + reasoning_content
                else:
                    # 继续输出 reasoning_content
                    delta_content = reasoning_content
            elif thinking_started and not thinking_ended and delta_content:
                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
                thinking_ended = True
                delta_content = '\n</think>\n' + delta_content
            # 处理 content 中已有的 <think> 标签（如果需要移除）
            # if delta_content and remove_think and '<think>' in delta_content:
            #     import re
            #
            #     # 移除 <think> 标签及其内容
            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
            # 处理工具调用增量
            if delta.get('tool_calls'):
                for tool_call in delta['tool_calls']:
                    if tool_call['id'] != '':
                        tool_id = tool_call['id']
                    if tool_call['function']['name'] is not None:
                        tool_name = tool_call['function']['name']
                    if tool_call['type'] is None:
                        tool_call['type'] = 'function'
                    tool_call['id'] = tool_id
                    tool_call['function']['name'] = tool_name
                    tool_call['function']['arguments'] = (
                        '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
                    )
            # 跳过空的第一个 chunk（只有 role 没有内容）
            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
                chunk_idx += 1
                continue
            # 构建 MessageChunk - 只包含增量内容
            chunk_data = {
                'role': role,
                'content': delta_content if delta_content else None,
                'tool_calls': delta.get('tool_calls'),
                'is_final': bool(finish_reason),
            }
            # 移除 None 值
            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
            yield provider_message.MessageChunk(**chunk_data)
            chunk_idx += 1
            # return
    async def invoke_llm(
        self,
        query: pipeline_query.Query,
        model: entities.LLMModelInfo,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.Message:
        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
        for m in messages:
            msg_dict = m.dict(exclude_none=True)
            content = msg_dict.get('content')
            if isinstance(content, list):
                # 检查 content 列表中是否每个部分都是文本
                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
                    # 将所有文本部分合并为一个字符串
                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
            req_messages.append(msg_dict)
        try:
            return await self._closure(
                query=query,
                req_messages=req_messages,
                use_model=model,
                use_funcs=funcs,
                extra_args=extra_args,
                remove_think=remove_think,
            )
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
        except openai.BadRequestError as e:
            if 'context_length_exceeded' in e.message:
                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
            else:
                raise errors.RequesterError(f'请求参数错误: {e.message}')
        except openai.AuthenticationError as e:
            raise errors.RequesterError(f'无效的 api-key: {e.message}')
        except openai.NotFoundError as e:
            raise errors.RequesterError(f'请求路径错误: {e.message}')
        except openai.RateLimitError as e:
            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
        except openai.APIError as e:
            raise errors.RequesterError(f'请求错误: {e.message}')
    async def invoke_llm_stream(
        self,
        query: pipeline_query.Query,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.MessageChunk:
        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
        for m in messages:
            msg_dict = m.dict(exclude_none=True)
            content = msg_dict.get('content')
            if isinstance(content, list):
                # 检查 content 列表中是否每个部分都是文本
                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
                    # 将所有文本部分合并为一个字符串
                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
            req_messages.append(msg_dict)
        try:
            async for item in self._closure_stream(
                query=query,
                req_messages=req_messages,
                use_model=model,
                use_funcs=funcs,
                extra_args=extra_args,
                remove_think=remove_think,
            ):
                yield item
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
        except openai.BadRequestError as e:
            if 'context_length_exceeded' in e.message:
                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
            else:
                raise errors.RequesterError(f'请求参数错误: {e.message}')
        except openai.AuthenticationError as e:
            raise errors.RequesterError(f'无效的 api-key: {e.message}')
        except openai.NotFoundError as e:
            raise errors.RequesterError(f'请求路径错误: {e.message}')
        except openai.RateLimitError as e:
            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
        except openai.APIError as e:
            raise errors.RequesterError(f'请求错误: {e.message}')
--- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 魔搭社区
  icon: modelscope.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -31,6 +32,8 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: maas
 execution:
  python:
--- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
@@ -1,67 +0,0 @@
 from __future__ import annotations
 import typing
 from . import chatcmpl
 from .. import requester
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
    """Moonshot ChatCompletion API 请求器"""
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.moonshot.cn/v1',
        'timeout': 120,
    }
    async def _closure(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> tuple[provider_message.Message, dict]:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages
        # deepseek 不支持多模态，把content都转换成纯文字
        for m in messages:
            if 'content' in m and isinstance(m['content'], list):
                m['content'] = ' '.join([c['text'] for c in m['content']])
        # 删除空的，不知道干嘛的，直接删了。
        # messages = [m for m in messages if m["content"].strip() != "" and ('tool_calls' not in m or not m['tool_calls'])]
        args['messages'] = messages
        # 发送请求
        resp = await self._req(args, extra_body=extra_args)
        # 处理请求结果
        message = await self._make_msg(resp, remove_think)
        # Extract token usage from response
        usage_info = {}
        if hasattr(resp, 'usage') and resp.usage:
            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
            usage_info['total_tokens'] = resp.usage.total_tokens or 0
        return message, usage_info
--- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 月之暗面
  icon: moonshot.png
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -24,6 +25,8 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
 execution:
  python:
--- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
    """New API ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'http://localhost:3000/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: New API
  icon: newapi.png
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
@@ -1,314 +0,0 @@
 from __future__ import annotations
 import asyncio
 import os
 import typing
 from typing import Union, Mapping, Any, AsyncIterator
 import uuid
 import json
 import ollama
 import httpx
 from .. import errors, requester
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 REQUESTER_NAME: str = 'ollama-chat'
 class OllamaChatCompletions(requester.ProviderAPIRequester):
    """Ollama平台 ChatCompletion API请求器"""
    client: ollama.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'http://127.0.0.1:11434',
        'timeout': 120,
    }
    async def initialize(self):
        os.environ['OLLAMA_HOST'] = self.requester_cfg['base_url']
        self.client = ollama.AsyncClient(timeout=self.requester_cfg['timeout'])
    def _infer_model_type(self, model_id: str) -> str:
        normalized_model_id = (model_id or '').lower()
        embedding_keywords = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding')
        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
        normalized_model_id = (model_id or '').lower()
        abilities: set[str] = set()
        details = item.get('details', {}) or {}
        families = details.get('families', []) or []
        tokens = [normalized_model_id, str(details.get('family', '')).lower()]
        tokens.extend(str(family).lower() for family in families)
        if any(keyword in token for token in tokens for keyword in ('vision', 'vl', 'omni', 'llava', 'ocr')):
            abilities.add('vision')
        if any(keyword in token for token in tokens for keyword in ('tool', 'function')):
            abilities.add('func_call')
        return sorted(abilities)
    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
        del api_key
        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/api/tags'
        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
            response = await client.get(models_url)
            response.raise_for_status()
            payload = response.json()
        models: list[dict[str, typing.Any]] = []
        for item in payload.get('models', []):
            model_id = item.get('model') or item.get('name')
            if not model_id:
                continue
            models.append(
                {
                    'id': model_id,
                    'name': item.get('name', model_id),
                    'type': self._infer_model_type(model_id),
                    'abilities': self._infer_model_abilities(item, model_id),
                }
            )
        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
        return {
            'models': models,
            'debug': {
                'request': {
                    'method': 'GET',
                    'url': models_url,
                },
                'response': payload,
            },
        }
    async def _req(
        self,
        args: dict,
    ) -> Union[Mapping[str, Any], AsyncIterator[Mapping[str, Any]]]:
        return await self.client.chat(**args)
    async def _closure(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.Message:
        args = extra_args.copy()
        args['model'] = use_model.model_entity.name
        messages: list[dict] = req_messages.copy()
        for msg in messages:
            if 'content' in msg and isinstance(msg['content'], list):
                text_content: list = []
                image_urls: list = []
                for me in msg['content']:
                    if me['type'] == 'text':
                        text_content.append(me['text'])
                    elif me['type'] == 'image_base64':
                        image_urls.append(me['image_base64'])
                msg['content'] = '\n'.join(text_content)
                msg['images'] = [url.split(',')[1] for url in image_urls]
            if 'tool_calls' in msg:  # LangBot 内部以 str 存储 tool_calls 的参数，这里需要转换为 dict
                for tool_call in msg['tool_calls']:
                    tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
        args['messages'] = messages
        args['tools'] = []
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        resp = await self._req(args)
        message: provider_message.Message = await self._make_msg(resp)
        return message
    async def _make_msg(self, chat_completions: ollama.ChatResponse) -> provider_message.Message:
        message: ollama.Message = chat_completions.message
        if message is None:
            raise ValueError("chat_completions must contain a 'message' field")
        ret_msg: provider_message.Message = None
        if message.content is not None:
            ret_msg = provider_message.Message(role='assistant', content=message.content)
        if message.tool_calls is not None and len(message.tool_calls) > 0:
            tool_calls: list[provider_message.ToolCall] = []
            for tool_call in message.tool_calls:
                tool_calls.append(
                    provider_message.ToolCall(
                        id=uuid.uuid4().hex,
                        type='function',
                        function=provider_message.FunctionCall(
                            name=tool_call.function.name,
                            arguments=json.dumps(tool_call.function.arguments),
                        ),
                    )
                )
            ret_msg.tool_calls = tool_calls
        return ret_msg
    async def _prepare_messages(
        self,
        messages: typing.List[provider_message.Message],
    ) -> list[dict]:
        """Prepare messages for Ollama API request."""
        req_messages: list = []
        for m in messages:
            msg_dict: dict = m.dict(exclude_none=True)
            content: Any = msg_dict.get('content')
            if isinstance(content, list):
                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
            req_messages.append(msg_dict)
        return req_messages
    async def invoke_llm(
        self,
        query: pipeline_query.Query,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.Message:
        req_messages = await self._prepare_messages(messages)
        try:
            return await self._closure(
                query=query,
                req_messages=req_messages,
                use_model=model,
                use_funcs=funcs,
                extra_args=extra_args,
                remove_think=remove_think,
            )
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
    async def invoke_llm_stream(
        self,
        query: pipeline_query.Query,
        model: requester.RuntimeLLMModel,
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.MessageChunk:
        req_messages = await self._prepare_messages(messages)
        try:
            args = extra_args.copy()
            args['model'] = model.model_entity.name
            # Process messages for Ollama format
            msgs: list[dict] = req_messages.copy()
            for msg in msgs:
                if 'content' in msg and isinstance(msg['content'], list):
                    text_content: list = []
                    image_urls: list = []
                    for me in msg['content']:
                        if me['type'] == 'text':
                            text_content.append(me['text'])
                        elif me['type'] == 'image_base64':
                            image_urls.append(me['image_base64'])
                    msg['content'] = '\n'.join(text_content)
                    msg['images'] = [url.split(',')[1] for url in image_urls]
                if 'tool_calls' in msg:
                    for tool_call in msg['tool_calls']:
                        tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
            args['messages'] = msgs
            args['tools'] = []
            if funcs:
                tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs)
                if tools:
                    args['tools'] = tools
            args['stream'] = True
            chunk_idx = 0
            thinking_started = False
            thinking_ended = False
            role = 'assistant'
            async for chunk in await self.client.chat(**args):
                message: ollama.Message = chunk.message
                done = chunk.done
                delta_content = message.content or ''
                reasoning_content = getattr(message, 'thinking', '') or ''
                # Handle reasoning/thinking content
                if reasoning_content:
                    if remove_think:
                        chunk_idx += 1
                        continue
                    if not thinking_started:
                        thinking_started = True
                        delta_content = '<think>\n' + reasoning_content
                    else:
                        delta_content = reasoning_content
                elif thinking_started and not thinking_ended and delta_content:
                    thinking_ended = True
                    delta_content = '\n</think>\n' + delta_content
                # Handle tool calls
                tool_calls_data = None
                if message.tool_calls:
                    tool_calls_data = []
                    for tc in message.tool_calls:
                        tool_calls_data.append(
                            {
                                'id': uuid.uuid4().hex,
                                'type': 'function',
                                'function': {
                                    'name': tc.function.name,
                                    'arguments': json.dumps(tc.function.arguments),
                                },
                            }
                        )
                # Skip empty first chunk
                if chunk_idx == 0 and not delta_content and not reasoning_content and not tool_calls_data:
                    chunk_idx += 1
                    continue
                chunk_data = {
                    'role': role,
                    'content': delta_content if delta_content else None,
                    'tool_calls': tool_calls_data,
                    'is_final': bool(done),
                }
                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
                yield provider_message.MessageChunk(**chunk_data)
                chunk_idx += 1
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
    async def invoke_embedding(
        self,
        model: requester.RuntimeEmbeddingModel,
        input_text: list[str],
        extra_args: dict[str, typing.Any] = {},
    ) -> list[list[float]]:
        return (
            await self.client.embed(
                model=model.model_entity.name,
                input=input_text,
                **extra_args,
            )
        ).embeddings
--- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Ollama
  icon: ollama.svg
 spec:
  litellm_provider: ollama
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py
@@ -1,25 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import modelscopechatcmpl
 class OpenRouterChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
    """OpenRouter ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://openrouter.ai/api/v1',
        'timeout': 120,
    }
    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
        original_base_url = self.requester_cfg.get('base_url', '')
        self.requester_cfg['base_url'] = 'https://openrouter.ai/api/v1'
        try:
            return await super().scan_models(api_key)
        finally:
            self.requester_cfg['base_url'] = original_base_url
--- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: OpenRouter
  icon: openrouter.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
@@ -1,208 +0,0 @@
 from __future__ import annotations
 import openai
 import typing
 from . import chatcmpl
 from .. import requester
 import openai.types.chat.chat_completion as chat_completion
 import re
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
    """欧派云 ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.ppinfra.com/v3/openai',
        'timeout': 120,
    }
    is_think: bool = False
    async def _make_msg(
        self,
        chat_completion: chat_completion.ChatCompletion,
        remove_think: bool,
    ) -> provider_message.Message:
        chatcmpl_message = chat_completion.choices[0].message.model_dump()
        # print(chatcmpl_message.keys(), chatcmpl_message.values())
        # 确保 role 字段存在且不为 None
        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
            chatcmpl_message['role'] = 'assistant'
        reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
        # deepseek的reasoner模型
        chatcmpl_message['content'] = await self._process_thinking_content(
            chatcmpl_message['content'], reasoning_content, remove_think
        )
        # 移除 reasoning_content 字段，避免传递给 Message
        if 'reasoning_content' in chatcmpl_message:
            del chatcmpl_message['reasoning_content']
        message = provider_message.Message(**chatcmpl_message)
        return message
    async def _process_thinking_content(
        self,
        content: str,
        reasoning_content: str = None,
        remove_think: bool = False,
    ) -> tuple[str, str]:
        """处理思维链内容
        Args:
            content: 原始内容
            reasoning_content: reasoning_content 字段内容
            remove_think: 是否移除思维链
        Returns:
            处理后的内容
        """
        if remove_think:
            content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
        else:
            if reasoning_content is not None:
                content = '<think>\n' + reasoning_content + '\n</think>\n' + content
        return content
    async def _make_msg_chunk(
        self,
        delta: dict[str, typing.Any],
        idx: int,
    ) -> provider_message.MessageChunk:
        # 处理流式chunk和完整响应的差异
        # print(chat_completion.choices[0])
        # 确保 role 字段存在且不为 None
        if 'role' not in delta or delta['role'] is None:
            delta['role'] = 'assistant'
        reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
        delta['content'] = '' if delta['content'] is None else delta['content']
        # print(reasoning_content)
        # deepseek的reasoner模型
        if reasoning_content is not None:
            delta['content'] += reasoning_content
        message = provider_message.MessageChunk(**delta)
        return message
    async def _closure_stream(
        self,
        query: pipeline_query.Query,
        req_messages: list[dict],
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
        self.client.api_key = use_model.provider.token_mgr.get_token()
        args = {}
        args['model'] = use_model.model_entity.name
        if use_funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
            if tools:
                args['tools'] = tools
        # 设置此次请求中的messages
        messages = req_messages.copy()
        # 检查vision
        for msg in messages:
            if 'content' in msg and isinstance(msg['content'], list):
                for me in msg['content']:
                    if me['type'] == 'image_base64':
                        me['image_url'] = {'url': me['image_base64']}
                        me['type'] = 'image_url'
                        del me['image_base64']
        args['messages'] = messages
        args['stream'] = True
        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
        chunk_idx = 0
        thinking_started = False
        thinking_ended = False
        role = 'assistant'  # 默认角色
        async for chunk in self._req_stream(args, extra_body=extra_args):
            # 解析 chunk 数据
            if hasattr(chunk, 'choices') and chunk.choices:
                choice = chunk.choices[0]
                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
                finish_reason = getattr(choice, 'finish_reason', None)
            else:
                delta = {}
                finish_reason = None
            # 从第一个 chunk 获取 role，后续使用这个 role
            if 'role' in delta and delta['role']:
                role = delta['role']
            # 获取增量内容
            delta_content = delta.get('content', '')
            # reasoning_content = delta.get('reasoning_content', '')
            if remove_think:
                if delta['content'] is not None:
                    if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
                        thinking_started = True
                        continue
                    elif delta['content'] == r'</think>' and not thinking_ended:
                        thinking_ended = True
                        continue
                    elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
                        thinking_started = False
                        continue
                    elif thinking_started and not thinking_ended:
                        continue
            # delta_tool_calls = None
            if delta.get('tool_calls'):
                for tool_call in delta['tool_calls']:
                    if tool_call['id'] and tool_call['function']['name']:
                        tool_id = tool_call['id']
                        tool_name = tool_call['function']['name']
                    if tool_call['id'] is None:
                        tool_call['id'] = tool_id
                    if tool_call['function']['name'] is None:
                        tool_call['function']['name'] = tool_name
                    if tool_call['function']['arguments'] is None:
                        tool_call['function']['arguments'] = ''
                    if tool_call['type'] is None:
                        tool_call['type'] = 'function'
            # 跳过空的第一个 chunk（只有 role 没有内容）
            if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
                chunk_idx += 1
                continue
            # 构建 MessageChunk - 只包含增量内容
            chunk_data = {
                'role': role,
                'content': delta_content if delta_content else None,
                'tool_calls': delta.get('tool_calls'),
                'is_final': bool(finish_reason),
            }
            # 移除 None 值
            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
            yield provider_message.MessageChunk(**chunk_data)
            chunk_idx += 1
--- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 派欧云
  icon: ppio.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import openai
 import typing
 from . import chatcmpl
 class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
    """启航 AI ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.qhaigc.com/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 启航 AI
  icon: qhaigc.png
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/qiniuchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/qiniuchatcmpl.py
@@ -2,19 +2,16 @@ from __future__ import annotations
 import typing
-import openai
+from . import litellmchat
 from . import chatcmpl
-class QiniuChatCompletions(chatcmpl.OpenAIChatCompletions):
+class QiniuChatCompletions(litellmchat.LiteLLMRequester):
    """七牛云 ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.qnaigc.com/v1',
        'timeout': 120,
        'custom_llm_provider': 'openai',
    }
    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
--- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
@@ -1,32 +0,0 @@
 from __future__ import annotations
 import openai
 import typing
 from . import chatcmpl
 import openai.types.chat.chat_completion as chat_completion
 class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
    """胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://router.shengsuanyun.com/api/v1',
        'timeout': 120,
    }
    async def _req(
        self,
        args: dict,
        extra_body: dict = {},
    ) -> chat_completion.ChatCompletion:
        return await self.client.chat.completions.create(
            **args,
            extra_body=extra_body,
            extra_headers={
                'HTTP-Referer': 'https://langbot.app',
                'X-Title': 'LangBot',
            },
        )
--- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 胜算云
  icon: shengsuanyun.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class SiliconFlowChatCompletions(chatcmpl.OpenAIChatCompletions):
    """SiliconFlow ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.siliconflow.cn/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 硅基流动
  icon: siliconflow.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class LangBotSpaceChatCompletions(chatcmpl.OpenAIChatCompletions):
    """LangBot Space ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.langbot.cloud/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Space
  icon: space.webp
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/tencent.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tencent.svg
@@ -0,0 +1,5 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#0052D9"/>
  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Tencent</text>
  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Hunyuan</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/tencentchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tencentchatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: tencent-chat-completions
  label:
    en_US: Tencent Hunyuan
    zh_Hans: 腾讯混元
  icon: tencent.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://hunyuan.tencentcloudapi.com/v1
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/together.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/together.svg
@@ -0,0 +1,5 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#8B5CF6"/>
  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Together</text>
  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">AI</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/togetherchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/togetherchatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: together-chat-completions
  label:
    en_US: Together AI
    zh_Hans: Together AI
  icon: together.svg
 spec:
  litellm_provider: together_ai
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://api.together.xyz/v1
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 小马算力
  icon: tokenpony.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class TokenPonyChatCompletions(chatcmpl.OpenAIChatCompletions):
    """TokenPony ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.tokenpony.cn/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class VolcArkChatCompletions(chatcmpl.OpenAIChatCompletions):
    """火山方舟大模型平台 ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://ark.cn-beijing.volces.com/api/v3',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 火山方舟
  icon: volcark.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -24,6 +25,8 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: maas
 execution:
  python:
--- a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: Voyage AI
  icon: voyageai.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
--- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class XaiChatCompletions(chatcmpl.OpenAIChatCompletions):
    """xAI ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://api.x.ai/v1',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: xAI
  icon: xai.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -24,6 +25,8 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
 execution:
  python:
--- a/src/langbot/pkg/provider/modelmgr/requesters/yi.svg
+++ b/src/langbot/pkg/provider/modelmgr/requesters/yi.svg
@@ -0,0 +1,5 @@
 <svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
  <rect width="60" height="50" rx="8" fill="#10B981"/>
  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">01.AI</text>
  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Yi</text>
 </svg>
--- a/src/langbot/pkg/provider/modelmgr/requesters/yichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/yichatcmpl.yaml
@@ -0,0 +1,30 @@
 apiVersion: v1
 kind: LLMAPIRequester
 metadata:
  name: yi-chat-completions
  label:
    en_US: 01.AI Yi
    zh_Hans: 零一万物
  icon: yi.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
      en_US: Base URL
      zh_Hans: 基础 URL
    type: string
    required: true
    default: https://api.lingyiwanwu.com/v1
  - name: timeout
    label:
      en_US: Timeout
      zh_Hans: 超时时间
    type: integer
    required: true
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
--- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py
@@ -1,17 +0,0 @@
 from __future__ import annotations
 import typing
 import openai
 from . import chatcmpl
 class ZhipuAIChatCompletions(chatcmpl.OpenAIChatCompletions):
    """智谱AI ChatCompletion API 请求器"""
    client: openai.AsyncClient
    default_config: dict[str, typing.Any] = {
        'base_url': 'https://open.bigmodel.cn/api/paas/v4',
        'timeout': 120,
    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
    zh_Hans: 智谱 AI
  icon: zhipuai.svg
 spec:
  litellm_provider: openai
  config:
  - name: base_url
    label:
@@ -24,6 +25,8 @@ spec:
    default: 120
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
 execution:
  python:
--- a/src/langbot/pkg/provider/runners/localagent.py
+++ b/src/langbot/pkg/provider/runners/localagent.py
@@ -41,6 +41,64 @@ SANDBOX_EXEC_SYSTEM_GUIDANCE = (
 MAX_TOOL_CALL_ROUNDS = 128
 def _model_has_ability(model: modelmgr_requester.RuntimeLLMModel, ability: str) -> bool:
    return ability in (model.model_entity.abilities or [])
 class _StreamAccumulator:
    """Accumulate streamed content and fragmented OpenAI-style tool calls."""
    def __init__(self, msg_sequence: int = 0, initial_content: str | None = None):
        self.tool_calls_map: dict[str, provider_message.ToolCall] = {}
        self.msg_idx = 0
        self.accumulated_content = initial_content or ''
        self.last_role = 'assistant'
        self.msg_sequence = msg_sequence
    def add(self, msg: provider_message.MessageChunk) -> provider_message.MessageChunk | None:
        self.msg_idx += 1
        if msg.role:
            self.last_role = msg.role
        if msg.content:
            self.accumulated_content += msg.content
        if msg.tool_calls:
            for tool_call in msg.tool_calls:
                if tool_call.id not in self.tool_calls_map:
                    self.tool_calls_map[tool_call.id] = provider_message.ToolCall(
                        id=tool_call.id,
                        type=tool_call.type,
                        function=provider_message.FunctionCall(
                            name=tool_call.function.name if tool_call.function else '',
                            arguments='',
                        ),
                    )
                if tool_call.function and tool_call.function.arguments:
                    self.tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
        if self.msg_idx % 8 == 0 or msg.is_final:
            self.msg_sequence += 1
            return provider_message.MessageChunk(
                role=self.last_role,
                content=self.accumulated_content,
                tool_calls=list(self.tool_calls_map.values()) if (self.tool_calls_map and msg.is_final) else None,
                is_final=msg.is_final,
                msg_sequence=self.msg_sequence,
            )
        return None
    def final_message(self) -> provider_message.MessageChunk:
        return provider_message.MessageChunk(
            role=self.last_role,
            content=self.accumulated_content,
            tool_calls=list(self.tool_calls_map.values()) if self.tool_calls_map else None,
            msg_sequence=self.msg_sequence,
        )
@runner.runner_class('local-agent')
 class LocalAgentRunner(runner.RequestRunner):
    """Local agent request runner"""
@@ -105,7 +163,7 @@ class LocalAgentRunner(runner.RequestRunner):
                    query,
                    model,
                    messages,
-                    funcs if model.model_entity.abilities.__contains__('func_call') else [],
+                    funcs if _model_has_ability(model, 'func_call') else [],
                    extra_args=model.model_entity.extra_args,
                    remove_think=remove_think,
                )
@@ -135,7 +193,7 @@ class LocalAgentRunner(runner.RequestRunner):
                    query,
                    model,
                    messages,
-                    funcs if model.model_entity.abilities.__contains__('func_call') else [],
+                    funcs if _model_has_ability(model, 'func_call') else [],
                    extra_args=model.model_entity.extra_args,
                    remove_think=remove_think,
                )
@@ -302,11 +360,7 @@ class LocalAgentRunner(runner.RequestRunner):
            final_msg = msg
        else:
            # Streaming: invoke with fallback
-            tool_calls_map: dict[str, provider_message.ToolCall] = {}
+            stream_accumulator = _StreamAccumulator(msg_sequence=1)
            msg_idx = 0
            accumulated_content = ''
            last_role = 'assistant'
            msg_sequence = 1
            stream_src, use_llm_model = await self._invoke_stream_with_fallback(
                query,
@@ -316,44 +370,12 @@ class LocalAgentRunner(runner.RequestRunner):
                remove_think,
            )
            async for msg in stream_src:
-                msg_idx = msg_idx + 1
+                chunk = stream_accumulator.add(msg)
-
+                if chunk:
-                if msg.role:
+                    yield chunk
                    last_role = msg.role
                if msg.content:
                    accumulated_content += msg.content
                if msg.tool_calls:
                    for tool_call in msg.tool_calls:
                        if tool_call.id not in tool_calls_map:
                            tool_calls_map[tool_call.id] = provider_message.ToolCall(
                                id=tool_call.id,
                                type=tool_call.type,
                                function=provider_message.FunctionCall(
                                    name=tool_call.function.name if tool_call.function else '', arguments=''
                                ),
                            )
                        if tool_call.function and tool_call.function.arguments:
                            tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
                if msg_idx % 8 == 0 or msg.is_final:
                    msg_sequence += 1
                    yield provider_message.MessageChunk(
                        role=last_role,
                        content=accumulated_content,
                        tool_calls=list(tool_calls_map.values()) if (tool_calls_map and msg.is_final) else None,
                        is_final=msg.is_final,
                        msg_sequence=msg_sequence,
                    )
                    initial_response_emitted = True
-            final_msg = provider_message.MessageChunk(
+            final_msg = stream_accumulator.final_message()
                role=last_role,
                content=accumulated_content,
                tool_calls=list(tool_calls_map.values()) if tool_calls_map else None,
                msg_sequence=msg_sequence,
            )
        pending_tool_calls = final_msg.tool_calls
        first_content = final_msg.content
@@ -438,69 +460,32 @@ class LocalAgentRunner(runner.RequestRunner):
            )
            if is_stream:
-                tool_calls_map = {}
+                stream_accumulator = _StreamAccumulator(
-                msg_idx = 0
+                    msg_sequence=first_end_sequence,
-                accumulated_content = ''
+                    initial_content=first_content,
-                last_role = 'assistant'
+                )
                msg_sequence = first_end_sequence
                tool_stream_src = use_llm_model.provider.invoke_llm_stream(
                    query,
                    use_llm_model,
                    req_messages,
-                    query.use_funcs if use_llm_model.model_entity.abilities.__contains__('func_call') else [],
+                    query.use_funcs if _model_has_ability(use_llm_model, 'func_call') else [],
                    extra_args=use_llm_model.model_entity.extra_args,
                    remove_think=remove_think,
                )
                async for msg in tool_stream_src:
-                    msg_idx += 1
+                    chunk = stream_accumulator.add(msg)
                    if chunk:
                        yield chunk
-                    if msg.role:
+                final_msg = stream_accumulator.final_message()
                        last_role = msg.role
                    # Prepend first-round content on first chunk of tool-call round
                    if msg_idx == 1:
                        accumulated_content = first_content if first_content is not None else accumulated_content
                    if msg.content:
                        accumulated_content += msg.content
                    if msg.tool_calls:
                        for tool_call in msg.tool_calls:
                            if tool_call.id not in tool_calls_map:
                                tool_calls_map[tool_call.id] = provider_message.ToolCall(
                                    id=tool_call.id,
                                    type=tool_call.type,
                                    function=provider_message.FunctionCall(
                                        name=tool_call.function.name if tool_call.function else '', arguments=''
                                    ),
                                )
                            if tool_call.function and tool_call.function.arguments:
                                tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
                    if msg_idx % 8 == 0 or msg.is_final:
                        msg_sequence += 1
                        yield provider_message.MessageChunk(
                            role=last_role,
                            content=accumulated_content,
                            tool_calls=list(tool_calls_map.values()) if (tool_calls_map and msg.is_final) else None,
                            is_final=msg.is_final,
                            msg_sequence=msg_sequence,
                        )
                final_msg = provider_message.MessageChunk(
                    role=last_role,
                    content=accumulated_content,
                    tool_calls=list(tool_calls_map.values()) if tool_calls_map else None,
                    msg_sequence=msg_sequence,
                )
            else:
                # Non-streaming: use committed model directly (no fallback in tool loop)
                msg = await use_llm_model.provider.invoke_llm(
                    query,
                    use_llm_model,
                    req_messages,
-                    query.use_funcs if use_llm_model.model_entity.abilities.__contains__('func_call') else [],
+                    query.use_funcs if _model_has_ability(use_llm_model, 'func_call') else [],
                    extra_args=use_llm_model.model_entity.extra_args,
                    remove_think=remove_think,
                )
--- a/src/langbot/pkg/provider/tools/loaders/mcp.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp.py
@@ -240,12 +240,13 @@ class RuntimeMCPSession:
                    return
                if attempt >= self._MAX_RETRIES:
                    self.status = MCPSessionStatus.ERROR
-                    self.error_message = f'Failed after {self._MAX_RETRIES + 1} attempts: {e}'
+                    self.error_message = f'Failed after {self._MAX_RETRIES + 1} attempts: {self._describe_exception(e)}'
                    self._ready_event.set()
                    return
                delay = self._RETRY_DELAYS[attempt]
                self.ap.logger.warning(
-                    f'MCP session {self.server_name} failed (attempt {attempt + 1}), retrying in {delay}s: {e}'
+                    f'MCP session {self.server_name} failed (attempt {attempt + 1}), '
                    f'retrying in {delay}s: {self._describe_exception(e)}'
                )
                await self._cleanup_box_stdio_session()
                # Reset status for retry
@@ -254,6 +255,30 @@ class RuntimeMCPSession:
                self.error_phase = None
                await asyncio.sleep(delay)
    @staticmethod
    def _describe_exception(exc: BaseException) -> str:
        """Flatten an exception into its underlying leaf messages.
        anyio / the MCP client wrap real failures in a TaskGroup, whose own
        message is the unhelpful "unhandled errors in a TaskGroup (N
        sub-exception)". Recurse into ExceptionGroups so the actual cause
        (e.g. ``httpx.HTTPStatusError: Client error '410 Gone'``) is surfaced.
        """
        leaves: list[str] = []
        def visit(e: BaseException) -> None:
            sub = getattr(e, 'exceptions', None)
            if sub:  # ExceptionGroup / BaseExceptionGroup
                for child in sub:
                    visit(child)
            else:
                leaves.append(f'{type(e).__name__}: {e}')
        visit(exc)
        seen: set[str] = set()
        unique = [m for m in leaves if not (m in seen or seen.add(m))]
        return '; '.join(unique) if unique else f'{type(exc).__name__}: {exc}'
    _MONITOR_POLL_INTERVAL = 5
    _MONITOR_MAX_CONSECUTIVE_ERRORS = 3
--- a/src/langbot/pkg/provider/tools/toolmgr.py
+++ b/src/langbot/pkg/provider/tools/toolmgr.py
@@ -83,19 +83,6 @@ class ToolManager:
        return tools
    async def generate_tools_for_anthropic(self, use_funcs: list[resource_tool.LLMTool]) -> list:
        tools = []
        for function in use_funcs:
            function_schema = {
                'name': function.name,
                'description': function.description,
                'input_schema': function.parameters,
            }
            tools.append(function_schema)
        return tools
    async def execute_func_call(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any:
        if await self.native_tool_loader.has_tool(name):
            return await self.native_tool_loader.invoke_tool(name, parameters, query)
--- a/src/langbot/pkg/utils/constants.py
+++ b/src/langbot/pkg/utils/constants.py
@@ -2,7 +2,7 @@ import langbot
 semantic_version = f'v{langbot.__version__}'
-required_database_version = 25
+required_database_version = 26
 """Tag the version of the database schema, used to check if the database needs to be migrated"""
 debug_mode = False
--- a/tests/unit_tests/api/service/test_model_service.py
+++ b/tests/unit_tests/api/service/test_model_service.py
@@ -35,6 +35,7 @@ def _create_mock_llm_model(
    name: str = 'Test LLM',
    provider_uuid: str = 'provider-uuid',
    abilities: list = None,
    context_length: int | None = None,
    extra_args: dict = None,
 ) -> Mock:
    """Helper to create mock LLMModel entity."""
@@ -43,6 +44,7 @@ def _create_mock_llm_model(
    model.name = name
    model.provider_uuid = provider_uuid
    model.abilities = abilities or []
    model.context_length = context_length
    model.extra_args = extra_args or {}
    return model
@@ -142,10 +144,12 @@ class TestRuntimeModelData:
            'name': 'Model',
            'provider_uuid': 'provider',
            'abilities': ['vision'],
            'context_length': 128000,
            'extra_args': {'temp': 0.7},
        }
        result = _runtime_model_data('uuid', update_payload)
        assert result['abilities'] == ['vision']
        assert result['context_length'] == 128000
        assert result['extra_args'] == {'temp': 0.7}
@@ -188,7 +192,7 @@ class TestLLMModelsServiceGetLLMModels:
        ap = SimpleNamespace()
        ap.persistence_mgr = SimpleNamespace()
-        model = _create_mock_llm_model()
+        model = _create_mock_llm_model(context_length=128000)
        provider = _create_mock_provider()
        mock_model_result = _create_mock_result([model])
@@ -206,6 +210,7 @@ class TestLLMModelsServiceGetLLMModels:
                'uuid': entity.uuid,
                'name': entity.name,
                'provider_uuid': entity.provider_uuid if hasattr(entity, 'provider_uuid') else None,
                'context_length': getattr(entity, 'context_length', None),
                'api_keys': entity.api_keys if hasattr(entity, 'api_keys') else None,
            }
        )
@@ -218,6 +223,7 @@ class TestLLMModelsServiceGetLLMModels:
        # Verify
        assert len(result) == 1
        assert result[0]['name'] == 'Test LLM'
        assert result[0]['context_length'] == 128000
    async def test_get_llm_models_hide_secret_keys(self):
        """Hides secret API keys when include_secret=False."""
@@ -265,7 +271,7 @@ class TestLLMModelsServiceGetLLMModel:
        ap = SimpleNamespace()
        ap.persistence_mgr = SimpleNamespace()
-        model = _create_mock_llm_model(model_uuid='found-uuid')
+        model = _create_mock_llm_model(model_uuid='found-uuid', context_length=128000)
        provider = _create_mock_provider()
        mock_model_result = _create_mock_result([], first_item=model)
@@ -279,11 +285,12 @@ class TestLLMModelsServiceGetLLMModel:
        ap.persistence_mgr.execute_async = AsyncMock(side_effect=mock_execute)
        ap.persistence_mgr.serialize_model = Mock(
-            return_value={
+            side_effect=lambda model_cls, entity: {
-                'uuid': 'found-uuid',
+                'uuid': entity.uuid,
-                'name': 'Test LLM',
+                'name': entity.name,
-                'provider_uuid': 'provider-uuid',
+                'provider_uuid': getattr(entity, 'provider_uuid', None),
-                'provider': {'uuid': 'provider-uuid', 'api_keys': ['key']},
+                'context_length': getattr(entity, 'context_length', None),
                'api_keys': getattr(entity, 'api_keys', None),
            }
        )
@@ -295,6 +302,7 @@ class TestLLMModelsServiceGetLLMModel:
        # Verify
        assert result is not None
        assert result['uuid'] == 'found-uuid'
        assert result['context_length'] == 128000
    async def test_get_llm_model_not_found(self):
        """Returns None when model not found."""
@@ -402,6 +410,39 @@ class TestLLMModelsServiceCreateLLMModel:
        # Verify
        assert model_uuid == 'preserved-uuid'
    async def test_create_llm_model_persists_context_length_as_column(self):
        """Creates LLM model with context_length outside extra_args."""
        ap = SimpleNamespace()
        ap.persistence_mgr = SimpleNamespace()
        ap.model_mgr = SimpleNamespace()
        ap.model_mgr.provider_dict = {'provider-uuid': Mock()}
        ap.model_mgr.llm_models = []
        ap.model_mgr.load_llm_model_with_provider = AsyncMock(return_value=Mock())
        ap.pipeline_service = SimpleNamespace(update_pipeline=AsyncMock())
        mock_result = _create_mock_result([])
        ap.persistence_mgr.execute_async = AsyncMock(return_value=mock_result)
        service = LLMModelsService(ap)
        await service.create_llm_model(
            {
                'uuid': 'model-with-context',
                'name': 'Context Model',
                'provider_uuid': 'provider-uuid',
                'abilities': ['func_call'],
                'context_length': 128000,
                'extra_args': {'temperature': 0.2},
            },
            preserve_uuid=True,
            auto_set_to_default_pipeline=False,
        )
        runtime_entity = ap.model_mgr.load_llm_model_with_provider.await_args.args[0]
        assert runtime_entity.context_length == 128000
        assert runtime_entity.extra_args == {'temperature': 0.2}
        assert 'context_length' not in runtime_entity.extra_args
    async def test_create_llm_model_provider_not_found_raises_error(self):
        """Raises Exception when provider not found in runtime."""
        # Setup
@@ -512,6 +553,35 @@ class TestLLMModelsServiceUpdateLLMModel:
                'provider_uuid': 'nonexistent-provider',
            })
    async def test_update_llm_model_reloads_context_length_as_column(self):
        """Updates runtime model with context_length outside extra_args."""
        ap = SimpleNamespace()
        ap.persistence_mgr = SimpleNamespace(execute_async=AsyncMock())
        ap.model_mgr = SimpleNamespace()
        ap.model_mgr.provider_dict = {'provider-uuid': Mock()}
        ap.model_mgr.llm_models = []
        ap.model_mgr.remove_llm_model = AsyncMock()
        ap.model_mgr.load_llm_model_with_provider = AsyncMock(return_value=Mock())
        service = LLMModelsService(ap)
        await service.update_llm_model(
            'existing-uuid',
            {
                'name': 'Updated Name',
                'provider_uuid': 'provider-uuid',
                'abilities': ['vision'],
                'context_length': 64000,
                'extra_args': {'temperature': 0.4},
            },
        )
        runtime_entity = ap.model_mgr.load_llm_model_with_provider.await_args.args[0]
        assert runtime_entity.uuid == 'existing-uuid'
        assert runtime_entity.context_length == 64000
        assert runtime_entity.extra_args == {'temperature': 0.4}
        assert 'context_length' not in runtime_entity.extra_args
 class TestLLMModelsServiceDeleteLLMModel:
    """Tests for LLMModelsService.delete_llm_model method."""
--- a/tests/unit_tests/provider/init.py
+++ b/tests/unit_tests/provider/init.py
@@ -1 +1 @@
-
+"""Provider requester tests"""
--- a/tests/unit_tests/provider/requesters/test_anthropic_requester.py
+++ b/tests/unit_tests/provider/requesters/test_anthropic_requester.py
@@ -1,32 +0,0 @@
 """Tests for AnthropicMessages requester.
 Tests config and pure utility methods.
 """
 from __future__ import annotations
 from unittest.mock import MagicMock
 class TestAnthropicMessagesConfig:
    """Tests for default config."""
    def test_default_config_values(self):
        """Check default_config."""
        from langbot.pkg.provider.modelmgr.requesters.anthropicmsgs import AnthropicMessages
        assert AnthropicMessages.default_config['base_url'] == 'https://api.anthropic.com'
        assert AnthropicMessages.default_config['timeout'] == 120
    def test_config_override(self):
        """Config can override defaults."""
        from langbot.pkg.provider.modelmgr.requesters.anthropicmsgs import AnthropicMessages
        mock_app = MagicMock()
        req = AnthropicMessages(mock_app, {
            'base_url': 'https://custom.anthropic.com',
            'timeout': 60,
        })
        assert req.requester_cfg['base_url'] == 'https://custom.anthropic.com'
        assert req.requester_cfg['timeout'] == 60
--- a/tests/unit_tests/provider/requesters/test_chatcmpl_errors_direct.py
+++ b/tests/unit_tests/provider/requesters/test_chatcmpl_errors_direct.py
@@ -1,247 +0,0 @@
 """Tests for requester error handling - direct import version.
 Tests error handling branches by importing real packages and mocking
 only the necessary dependencies.
 """
 from __future__ import annotations
 import asyncio
 from unittest.mock import AsyncMock, MagicMock
 import pytest
 import openai  # Import real openai package
 from langbot.pkg.provider.modelmgr.errors import RequesterError
 class TestInvokeLLMErrorHandling:
    """Tests for invoke_llm error handling branches."""
    @pytest.fixture
    def mock_app(self):
        """Create mock Application."""
        app = MagicMock()
        app.tool_mgr = MagicMock()
        app.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=[])
        return app
    @pytest.fixture
    def mock_model(self):
        """Create mock RuntimeLLMModel."""
        model = MagicMock()
        model.model_entity = MagicMock()
        model.model_entity.name = 'gpt-4'
        model.provider = MagicMock()
        model.provider.token_mgr = MagicMock()
        model.provider.token_mgr.get_token = MagicMock(return_value='test-key')
        return model
    @pytest.fixture
    def mock_message(self):
        """Create mock provider message."""
        msg = MagicMock()
        msg.dict = MagicMock(return_value={'role': 'user', 'content': 'test'})
        return msg
    @pytest.fixture
    def requester_with_mocked_client(self, mock_app):
        """Create requester with mocked OpenAI client."""
        from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
        req = OpenAIChatCompletions(mock_app, {
            'base_url': 'https://api.openai.com/v1',
            'timeout': 120,
        })
        # Replace client with mock
        req.client = MagicMock()
        req.client.chat = MagicMock()
        req.client.chat.completions = MagicMock()
        req.client.chat.completions.create = AsyncMock()
        return req
    @pytest.mark.asyncio
    async def test_timeout_error(self, requester_with_mocked_client, mock_model, mock_message):
        """TimeoutError is wrapped as RequesterError."""
        requester_with_mocked_client.client.chat.completions.create = AsyncMock(
            side_effect=asyncio.TimeoutError()
        )
        with pytest.raises(RequesterError) as exc:
            await requester_with_mocked_client.invoke_llm(
                query=None,
                model=mock_model,
                messages=[mock_message],
            )
        assert '超时' in str(exc.value)
    @pytest.mark.asyncio
    async def test_bad_request_context_length(self, requester_with_mocked_client, mock_model, mock_message):
        """BadRequestError with context_length_exceeded has special message."""
        error = openai.BadRequestError(
            message='context_length_exceeded: max 4096',
            response=MagicMock(status_code=400),
            body={}
        )
        requester_with_mocked_client.client.chat.completions.create = AsyncMock(
            side_effect=error
        )
        with pytest.raises(RequesterError) as exc:
            await requester_with_mocked_client.invoke_llm(
                query=None,
                model=mock_model,
                messages=[mock_message],
            )
        assert '上文过长' in str(exc.value)
    @pytest.mark.asyncio
    async def test_authentication_error(self, requester_with_mocked_client, mock_model, mock_message):
        """AuthenticationError shows invalid api-key message."""
        error = openai.AuthenticationError(
            message='Invalid API key',
            response=MagicMock(status_code=401),
            body={}
        )
        requester_with_mocked_client.client.chat.completions.create = AsyncMock(
            side_effect=error
        )
        with pytest.raises(RequesterError) as exc:
            await requester_with_mocked_client.invoke_llm(
                query=None,
                model=mock_model,
                messages=[mock_message],
            )
        assert 'api-key' in str(exc.value).lower() or '无效' in str(exc.value)
    @pytest.mark.asyncio
    async def test_rate_limit_error(self, requester_with_mocked_client, mock_model, mock_message):
        """RateLimitError shows rate limit message."""
        error = openai.RateLimitError(
            message='Rate limit exceeded',
            response=MagicMock(status_code=429),
            body={}
        )
        requester_with_mocked_client.client.chat.completions.create = AsyncMock(
            side_effect=error
        )
        with pytest.raises(RequesterError) as exc:
            await requester_with_mocked_client.invoke_llm(
                query=None,
                model=mock_model,
                messages=[mock_message],
            )
        assert '频繁' in str(exc.value) or '余额' in str(exc.value)
 class TestInvokeEmbeddingErrorHandling:
    """Tests for invoke_embedding error handling."""
    @pytest.fixture
    def mock_app(self):
        return MagicMock()
    @pytest.fixture
    def mock_embedding_model(self):
        model = MagicMock()
        model.model_entity = MagicMock()
        model.model_entity.name = 'text-embedding-ada-002'
        model.model_entity.extra_args = {}
        model.provider = MagicMock()
        model.provider.token_mgr = MagicMock()
        model.provider.token_mgr.get_token = MagicMock(return_value='test-key')
        return model
    @pytest.fixture
    def requester_with_mocked_client(self, mock_app):
        from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
        req = OpenAIChatCompletions(mock_app, {})
        req.client = MagicMock()
        req.client.embeddings = MagicMock()
        req.client.embeddings.create = AsyncMock()
        return req
    @pytest.mark.asyncio
    async def test_embedding_timeout_error(self, requester_with_mocked_client, mock_embedding_model):
        """TimeoutError in embedding request."""
        requester_with_mocked_client.client.embeddings.create = AsyncMock(
            side_effect=asyncio.TimeoutError()
        )
        with pytest.raises(RequesterError) as exc:
            await requester_with_mocked_client.invoke_embedding(
                model=mock_embedding_model,
                input_text=['test'],
            )
        assert '超时' in str(exc.value)
    @pytest.mark.asyncio
    async def test_embedding_bad_request_error(self, requester_with_mocked_client, mock_embedding_model):
        """BadRequestError in embedding request."""
        error = openai.BadRequestError(
            message='Invalid model',
            response=MagicMock(status_code=400),
            body={}
        )
        requester_with_mocked_client.client.embeddings.create = AsyncMock(
            side_effect=error
        )
        with pytest.raises(RequesterError) as exc:
            await requester_with_mocked_client.invoke_embedding(
                model=mock_embedding_model,
                input_text=['test'],
            )
        assert '参数' in str(exc.value)
 class TestRequesterErrorClass:
    """Tests for RequesterError."""
    def test_error_message_prefix(self):
        """RequesterError has '模型请求失败' prefix."""
        from langbot.pkg.provider.modelmgr.errors import RequesterError
        error = RequesterError('test error')
        assert '模型请求失败' in str(error)
    def test_error_is_exception(self):
        """RequesterError inherits Exception."""
        from langbot.pkg.provider.modelmgr.errors import RequesterError
        error = RequesterError('test')
        assert isinstance(error, Exception)
 class TestDefaultConfig:
    """Tests for requester default config."""
    def test_default_config(self):
        """Check default_config values."""
        from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
        assert OpenAIChatCompletions.default_config['base_url'] == 'https://api.openai.com/v1'
        assert OpenAIChatCompletions.default_config['timeout'] == 120
    def test_config_override(self):
        """Config overrides defaults."""
        from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
        req = OpenAIChatCompletions(MagicMock(), {
            'base_url': 'https://custom.com/v1',
            'timeout': 60,
        })
        assert req.requester_cfg['base_url'] == 'https://custom.com/v1'
        assert req.requester_cfg['timeout'] == 60
--- a/tests/unit_tests/provider/requesters/test_chatcmpl_utils.py
+++ b/tests/unit_tests/provider/requesters/test_chatcmpl_utils.py
@@ -1,340 +0,0 @@
 """Tests for requester pure utility functions.
 Tests the helper methods in OpenAIChatCompletions that don't require network calls.
 """
 from __future__ import annotations
 from unittest.mock import MagicMock
 from tests.utils.import_isolation import isolated_sys_modules
 class TestMaskApiKey:
    """Tests for _mask_api_key method."""
    def _create_requester_with_mocks(self):
        """Create requester instance with mocked dependencies."""
        mocks = {
            'langbot.pkg.core.app': MagicMock(),
            'langbot_plugin.api.entities.builtin.resource.tool': MagicMock(),
            'langbot_plugin.api.entities.builtin.pipeline.query': MagicMock(),
            'langbot_plugin.api.entities.builtin.provider.message': MagicMock(),
            'langbot.pkg.provider.modelmgr.errors': MagicMock(),
        }
        with isolated_sys_modules(mocks):
            from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
            mock_app = MagicMock()
            requester = OpenAIChatCompletions(mock_app, {})
            return requester
    def test_mask_api_key_full(self):
        """Mask a full API key."""
        requester = self._create_requester_with_mocks()
        result = requester._mask_api_key('sk-1234567890abcdef')
        assert result == 'sk-1...cdef'
    def test_mask_api_key_short(self):
        """Mask a short API key (<=8 chars)."""
        requester = self._create_requester_with_mocks()
        result = requester._mask_api_key('short')
        assert result == '****'
    def test_mask_api_key_empty(self):
        """Empty API key returns empty string."""
        requester = self._create_requester_with_mocks()
        result = requester._mask_api_key('')
        assert result == ''
    def test_mask_api_key_none(self):
        """None API key returns empty string."""
        requester = self._create_requester_with_mocks()
        result = requester._mask_api_key(None)
        assert result == ''
    def test_mask_api_key_exact_8_chars(self):
        """API key with exactly 8 chars is masked as **** (<=8 threshold)."""
        requester = self._create_requester_with_mocks()
        result = requester._mask_api_key('12345678')
        assert result == '****'  # <= 8 chars gets masked
 class TestInferModelType:
    """Tests for _infer_model_type method."""
    def _create_requester_with_mocks(self):
        mocks = {
            'langbot.pkg.core.app': MagicMock(),
            'langbot_plugin.api.entities.builtin.resource.tool': MagicMock(),
            'langbot_plugin.api.entities.builtin.pipeline.query': MagicMock(),
            'langbot_plugin.api.entities.builtin.provider.message': MagicMock(),
            'langbot.pkg.provider.modelmgr.errors': MagicMock(),
        }
        with isolated_sys_modules(mocks):
            from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
            mock_app = MagicMock()
            requester = OpenAIChatCompletions(mock_app, {})
            return requester
    def test_infer_embedding_from_name(self):
        """Infer embedding type from model name."""
        requester = self._create_requester_with_mocks()
        assert requester._infer_model_type('text-embedding-ada-002') == 'embedding'
        assert requester._infer_model_type('bge-large-en') == 'embedding'
        assert requester._infer_model_type('e5-base') == 'embedding'
        assert requester._infer_model_type('m3e-base') == 'embedding'
    def test_infer_llm_from_name(self):
        """Infer LLM type from model name."""
        requester = self._create_requester_with_mocks()
        assert requester._infer_model_type('gpt-4') == 'llm'
        assert requester._infer_model_type('claude-3-opus') == 'llm'
        assert requester._infer_model_type('llama-2-70b') == 'llm'
    def test_infer_model_type_none_id(self):
        """Handle None model_id."""
        requester = self._create_requester_with_mocks()
        result = requester._infer_model_type(None)
        assert result == 'llm'  # Default
    def test_infer_model_type_empty_id(self):
        """Handle empty model_id."""
        requester = self._create_requester_with_mocks()
        result = requester._infer_model_type('')
        assert result == 'llm'  # Default
 class TestNormalizeModalities:
    """Tests for _normalize_modalities method."""
    def _create_requester_with_mocks(self):
        mocks = {
            'langbot.pkg.core.app': MagicMock(),
            'langbot_plugin.api.entities.builtin.resource.tool': MagicMock(),
            'langbot_plugin.api.entities.builtin.pipeline.query': MagicMock(),
            'langbot_plugin.api.entities.builtin.provider.message': MagicMock(),
            'langbot.pkg.provider.modelmgr.errors': MagicMock(),
        }
        with isolated_sys_modules(mocks):
            from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
            mock_app = MagicMock()
            requester = OpenAIChatCompletions(mock_app, {})
            return requester
    def test_normalize_string_modality(self):
        """Normalize single string modality."""
        requester = self._create_requester_with_mocks()
        result = requester._normalize_modalities('text,image')
        assert result == ['text', 'image']
    def test_normalize_list_modalities(self):
        """Normalize list of modalities."""
        requester = self._create_requester_with_mocks()
        result = requester._normalize_modalities(['text', 'image', 'audio'])
        assert result == ['text', 'image', 'audio']
    def test_normalize_dict_modalities(self):
        """Normalize dict with nested modalities."""
        requester = self._create_requester_with_mocks()
        result = requester._normalize_modalities({'input': ['text'], 'output': ['text', 'image']})
        assert result == ['text', 'image']
    def test_normalize_none(self):
        """Handle None input."""
        requester = self._create_requester_with_mocks()
        result = requester._normalize_modalities(None)
        assert result == []
    def test_normalize_arrow_separator(self):
        """Handle arrow separator in modality string."""
        requester = self._create_requester_with_mocks()
        result = requester._normalize_modalities('text->image')
        assert result == ['text', 'image']
 class TestParseRerankResponse:
    """Tests for _parse_rerank_response static method."""
    def test_parse_cohere_jina_format(self):
        """Parse Cohere/Jina/SiliconFlow format."""
        from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
        data = {
            'results': [
                {'index': 0, 'relevance_score': 0.95},
                {'index': 1, 'relevance_score': 0.80},
            ]
        }
        result = OpenAIChatCompletions._parse_rerank_response(data)
        assert result == [
            {'index': 0, 'relevance_score': 0.95},
            {'index': 1, 'relevance_score': 0.80},
        ]
    def test_parse_voyage_format(self):
        """Parse Voyage AI format."""
        from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
        data = {
            'data': [
                {'index': 0, 'relevance_score': 0.90},
                {'index': 2, 'relevance_score': 0.75},
            ]
        }
        result = OpenAIChatCompletions._parse_rerank_response(data)
        assert result == [
            {'index': 0, 'relevance_score': 0.90},
            {'index': 2, 'relevance_score': 0.75},
        ]
    def test_parse_dashscope_format(self):
        """Parse DashScope format."""
        from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
        data = {
            'output': {
                'results': [
                    {'index': 0, 'relevance_score': 0.85},
                ]
            }
        }
        result = OpenAIChatCompletions._parse_rerank_response(data)
        assert result == [{'index': 0, 'relevance_score': 0.85}]
    def test_parse_unknown_format(self):
        """Handle unknown format returns empty list."""
        from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
        data = {'unknown_key': 'value'}
        result = OpenAIChatCompletions._parse_rerank_response(data)
        assert result == []
    def test_parse_empty_results(self):
        """Handle empty results."""
        from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
        data = {'results': []}
        result = OpenAIChatCompletions._parse_rerank_response(data)
        assert result == []
 class TestExtractScanMetadata:
    """Tests for _extract_scan_metadata method."""
    def _create_requester_with_mocks(self):
        mocks = {
            'langbot.pkg.core.app': MagicMock(),
            'langbot_plugin.api.entities.builtin.resource.tool': MagicMock(),
            'langbot_plugin.api.entities.builtin.pipeline.query': MagicMock(),
            'langbot_plugin.api.entities.builtin.provider.message': MagicMock(),
            'langbot.pkg.provider.modelmgr.errors': MagicMock(),
        }
        with isolated_sys_modules(mocks):
            from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
            mock_app = MagicMock()
            requester = OpenAIChatCompletions(mock_app, {})
            return requester
    def test_extract_basic_metadata(self):
        """Extract basic model metadata."""
        requester = self._create_requester_with_mocks()
        item = {
            'id': 'gpt-4',
            'name': 'GPT-4 Turbo',
            'description': 'Most capable GPT-4 model',
            'context_length': 128000,
            'owned_by': 'openai',
        }
        result = requester._extract_scan_metadata(item, 'gpt-4')
        assert result['display_name'] == 'GPT-4 Turbo'
        assert result['description'] == 'Most capable GPT-4 model'
        assert result['context_length'] == 128000
        assert result['owned_by'] == 'openai'
    def test_extract_metadata_missing_fields(self):
        """Handle missing metadata fields."""
        requester = self._create_requester_with_mocks()
        item = {'id': 'unknown-model'}
        result = requester._extract_scan_metadata(item, 'unknown-model')
        assert result['display_name'] is None
        assert result['description'] is None
        assert result['context_length'] is None
        assert result['owned_by'] is None
    def test_extract_metadata_top_provider_context(self):
        """Extract context_length from top_provider."""
        requester = self._create_requester_with_mocks()
        item = {
            'id': 'model',
            'top_provider': {
                'context_length': 4096,
            },
        }
        result = requester._extract_scan_metadata(item, 'model')
        assert result['context_length'] == 4096
    def test_extract_metadata_empty_strings(self):
        """Handle empty string values."""
        requester = self._create_requester_with_mocks()
        item = {
            'id': 'model',
            'name': '',  # Empty name
            'description': '   ',  # Whitespace only
            'owned_by': '',
        }
        result = requester._extract_scan_metadata(item, 'model')
        assert result['display_name'] is None
        assert result['description'] is None
        assert result['owned_by'] is None
    def test_extract_metadata_name_matches_id(self):
        """When name equals id, display_name is None."""
        requester = self._create_requester_with_mocks()
        item = {
            'id': 'gpt-4',
            'name': 'gpt-4',  # Same as id
        }
        result = requester._extract_scan_metadata(item, 'gpt-4')
        assert result['display_name'] is None
--- a/tests/unit_tests/provider/requesters/test_ollama_requester.py
+++ b/tests/unit_tests/provider/requesters/test_ollama_requester.py
@@ -1,264 +0,0 @@
 """Tests for OllamaChatCompletions requester.
 Tests model inference, payload construction, and error handling.
 """
 from __future__ import annotations
 import asyncio
 from unittest.mock import AsyncMock, MagicMock
 import pytest
 from langbot.pkg.provider.modelmgr.errors import RequesterError
 class TestOllamaRequesterConfig:
    """Tests for default config."""
    def test_default_config_values(self):
        """Check default_config."""
        from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
        assert OllamaChatCompletions.default_config['base_url'] == 'http://127.0.0.1:11434'
        assert OllamaChatCompletions.default_config['timeout'] == 120
    def test_config_override(self):
        """Config can override defaults."""
        from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
        mock_app = MagicMock()
        req = OllamaChatCompletions(mock_app, {
            'base_url': 'http://custom.ollama:11434',
            'timeout': 300,
        })
        assert req.requester_cfg['base_url'] == 'http://custom.ollama:11434'
        assert req.requester_cfg['timeout'] == 300
 class TestOllamaInferModelType:
    """Tests for _infer_model_type pure function."""
    @pytest.fixture
    def requester(self):
        from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
        return OllamaChatCompletions(MagicMock(), {})
    def test_infer_embedding_from_name(self, requester):
        """Embedding keywords return 'embedding'."""
        assert requester._infer_model_type('nomic-embed-text') == 'embedding'
        assert requester._infer_model_type('bge-large') == 'embedding'
        assert requester._infer_model_type('text-embedding') == 'embedding'
    def test_infer_llm_from_name(self, requester):
        """Non-embedding keywords return 'llm'."""
        assert requester._infer_model_type('llama2') == 'llm'
        assert requester._infer_model_type('mistral') == 'llm'
        assert requester._infer_model_type('codellama') == 'llm'
    def test_infer_model_type_none(self, requester):
        """None model_id returns 'llm'."""
        assert requester._infer_model_type(None) == 'llm'
    def test_infer_model_type_empty(self, requester):
        """Empty model_id returns 'llm'."""
        assert requester._infer_model_type('') == 'llm'
 class TestOllamaInferModelAbilities:
    """Tests for _infer_model_abilities pure function."""
    @pytest.fixture
    def requester(self):
        from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
        return OllamaChatCompletions(MagicMock(), {})
    def test_infer_vision_ability(self, requester):
        """Vision keywords add 'vision' ability."""
        item = {
            'details': {
                'family': 'llava',
            }
        }
        abilities = requester._infer_model_abilities(item, 'llava-v1.5')
        assert 'vision' in abilities
    def test_infer_vision_from_model_id(self, requester):
        """Vision keywords in model_id add 'vision' ability."""
        item = {}
        abilities = requester._infer_model_abilities(item, 'llava-7b')
        assert 'vision' in abilities
    def test_infer_func_call_ability(self, requester):
        """Tool/function keywords add 'func_call' ability."""
        item = {
            'details': {
                'families': ['tools'],
            }
        }
        abilities = requester._infer_model_abilities(item, 'model')
        assert 'func_call' in abilities
    def test_infer_no_abilities(self, requester):
        """No matching keywords returns empty abilities."""
        item = {
            'details': {
                'family': 'llama',
            }
        }
        abilities = requester._infer_model_abilities(item, 'llama-2')
        assert len(abilities) == 0
    def test_infer_multiple_abilities(self, requester):
        """Multiple keywords can add multiple abilities."""
        item = {
            'details': {
                'family': 'vision',
                'families': ['tools'],
            }
        }
        abilities = requester._infer_model_abilities(item, 'vision-tool-model')
        assert 'vision' in abilities
        assert 'func_call' in abilities
 class TestOllamaMakeMessage:
    """Tests for _make_msg response parsing."""
    @pytest.fixture
    def requester(self):
        from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
        return OllamaChatCompletions(MagicMock(), {})
    def _create_ollama_response(self, content, tool_calls=None):
        """Helper to create mock ollama response."""
        import ollama
        mock_response = MagicMock(spec=ollama.ChatResponse)
        mock_message = MagicMock(spec=ollama.Message)
        mock_message.content = content
        mock_message.tool_calls = tool_calls
        mock_response.message = mock_message
        return mock_response
    @pytest.mark.asyncio
    async def test_make_msg_text_content(self, requester):
        """Text content is extracted."""
        mock_response = self._create_ollama_response('Hello world')
        result = await requester._make_msg(mock_response)
        assert result.content == 'Hello world'
        assert result.role == 'assistant'
    @pytest.mark.asyncio
    async def test_make_msg_with_tool_calls(self, requester):
        """Tool calls are parsed."""
        mock_tool_call = MagicMock()
        mock_tool_call.function = MagicMock()
        mock_tool_call.function.name = 'get_weather'
        mock_tool_call.function.arguments = {'location': 'Beijing'}
        mock_response = self._create_ollama_response('', tool_calls=[mock_tool_call])
        result = await requester._make_msg(mock_response)
        assert result.tool_calls is not None
        assert len(result.tool_calls) == 1
        assert result.tool_calls[0].function.name == 'get_weather'
        # Arguments should be JSON string
        assert isinstance(result.tool_calls[0].function.arguments, str)
    @pytest.mark.asyncio
    async def test_make_msg_empty_message_raises(self, requester):
        """Empty message raises ValueError."""
        mock_response = MagicMock()
        mock_response.message = None
        with pytest.raises(ValueError, match='message'):
            await requester._make_msg(mock_response)
 class TestOllamaErrorHandling:
    """Tests for error handling branches."""
    @pytest.fixture
    def mock_app(self):
        app = MagicMock()
        app.tool_mgr = MagicMock()
        app.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=[])
        return app
    @pytest.fixture
    def requester_with_mocked_client(self, mock_app):
        from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
        req = OllamaChatCompletions(mock_app, {})
        req.client = MagicMock()
        req.client.chat = AsyncMock()
        return req
    @pytest.fixture
    def mock_model(self):
        model = MagicMock()
        model.model_entity = MagicMock()
        model.model_entity.name = 'llama2'
        model.provider = MagicMock()
        model.provider.token_mgr = MagicMock()
        model.provider.token_mgr.get_token = MagicMock(return_value='')
        return model
    @pytest.fixture
    def mock_message(self):
        msg = MagicMock()
        msg.role = 'user'
        msg.content = 'test'
        msg.dict = MagicMock(return_value={'role': 'user', 'content': 'test'})
        return msg
    @pytest.mark.asyncio
    async def test_timeout_error(self, requester_with_mocked_client, mock_model, mock_message):
        """TimeoutError is converted to RequesterError."""
        requester_with_mocked_client.client.chat = AsyncMock(side_effect=asyncio.TimeoutError())
        with pytest.raises(RequesterError) as exc:
            await requester_with_mocked_client.invoke_llm(
                query=None,
                model=mock_model,
                messages=[mock_message],
            )
        assert '超时' in str(exc.value)
 class TestOllamaScanModels:
    """Tests for scan_models method."""
    @pytest.fixture
    def mock_app(self):
        return MagicMock()
    @pytest.fixture
    def requester(self, mock_app):
        from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
        req = OllamaChatCompletions(mock_app, {
            'base_url': 'http://127.0.0.1:11434',
            'timeout': 120,
        })
        return req
    def test_requester_name_constant(self):
        """REQUESTER_NAME constant exists."""
        from langbot.pkg.provider.modelmgr.requesters.ollamachat import REQUESTER_NAME
        assert REQUESTER_NAME == 'ollama-chat'
--- a/tests/unit_tests/provider/test_litellmchat.py
+++ b/tests/unit_tests/provider/test_litellmchat.py
--- a/tests/unit_tests/provider/test_localagent_sandbox_exec.py
+++ b/tests/unit_tests/provider/test_localagent_sandbox_exec.py
@@ -10,7 +10,7 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 import langbot_plugin.api.entities.builtin.provider.session as provider_session
-from langbot.pkg.provider.runners.localagent import LocalAgentRunner
+from langbot.pkg.provider.runners.localagent import LocalAgentRunner, _StreamAccumulator
 class RecordingProvider:
@@ -124,6 +124,45 @@ def make_query() -> pipeline_query.Query:
    )
 def test_stream_accumulator_merges_fragmented_tool_call_arguments():
    accumulator = _StreamAccumulator(msg_sequence=1)
    assert (
        accumulator.add(
            provider_message.MessageChunk(
                role='assistant',
                tool_calls=[
                    provider_message.ToolCall(
                        id='call-1',
                        type='function',
                        function=provider_message.FunctionCall(name='exec', arguments='{"command":'),
                    )
                ],
            )
        )
        is None
    )
    emitted = accumulator.add(
        provider_message.MessageChunk(
            role='assistant',
            tool_calls=[
                provider_message.ToolCall(
                    id='call-1',
                    type='function',
                    function=provider_message.FunctionCall(name='exec', arguments='"pwd"}'),
                )
            ],
            is_final=True,
        )
    )
    assert emitted is not None
    final_msg = accumulator.final_message()
    assert final_msg.tool_calls[0].function.name == 'exec'
    assert final_msg.tool_calls[0].function.arguments == '{"command":"pwd"}'
@pytest.mark.asyncio
 async def test_localagent_uses_exec_for_exact_calculation():
    provider = RecordingProvider()
--- a/tests/unit_tests/provider/test_model_manager.py
+++ b/tests/unit_tests/provider/test_model_manager.py
@@ -494,6 +494,7 @@ async def test_model_manager_init_temporary_runtime_llm_model(fake_requester_reg
            'api_keys': ['temp-key'],
        },
        'abilities': ['func_call'],
        'context_length': 128000,
        'extra_args': {'temperature': 0.5},
    }
@@ -501,6 +502,9 @@ async def test_model_manager_init_temporary_runtime_llm_model(fake_requester_reg
    assert runtime_model.model_entity.uuid == 'temp-model-uuid'
    assert runtime_model.model_entity.name == 'TempModel'
    assert runtime_model.model_entity.context_length == 128000
    assert runtime_model.model_entity.extra_args == {'temperature': 0.5}
    assert 'context_length' not in runtime_model.model_entity.extra_args
    assert runtime_model.provider.provider_entity.uuid == 'temp-provider-uuid'
    assert runtime_model.provider.token_mgr.tokens == ['temp-key']
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
huanghuoguoguo	b82db2b7f8	feat(models): persist context metadata	2026-06-08 00:39:30 +08:00
huanghuoguoguo	573e1fe36e	style: simplify wrapped expressions	2026-06-07 22:05:46 +08:00
huanghuoguoguo	7fb3cfa638	refactor(provider): simplify litellm capabilities	2026-06-06 00:21:19 +08:00
RockChinQ	39673444d2	fix(provider): capture streaming token usage; add token observability The LiteLLM streaming requester only captured usage when a chunk had an empty `choices` list. Many OpenAI-compatible gateways (e.g. new-api) and providers send the final usage payload in a chunk that still carries an empty-delta choice, so streamed calls always recorded 0 tokens in the monitoring logs/dashboard (non-streaming worked). - Capture stream usage whenever a chunk carries it, regardless of choices - Add robust _normalize_usage (dict/obj shapes, derive missing total_tokens) - Register litellm in bootutils/deps.py (was in pyproject only) - Add MonitoringService.get_token_statistics + /monitoring/token-statistics endpoint: summary, per-model breakdown, token timeseries, and a zero-token-success data-quality signal - Add TokenMonitoring dashboard tab (summary tiles, stacked token chart, per-model table) + i18n (en/zh) - Regression tests for stream usage capture and usage normalization Verified end-to-end against a real OpenAI-compatible endpoint with gpt-5.5 and claude-opus-4-8: tokens now recorded non-zero for both streaming and non-streaming paths.	2026-06-05 09:13:57 -04:00
huanghuoguoguo	d450226701	fix(provider): align litellm rebase with master	2026-06-05 09:52:13 +08:00
fdc310	926e0c0854	feat: update requesters and improve provider selection UI - Added `litellm_provider` field to various requesters' YAML configurations. - Removed obsolete Python requester files for OpenRouter, PPIO, QHAIGC, ShengSuanYun, SiliconFlow, Space, TokenPony, VolcArk, and Xai. - Introduced new requesters for Tencent and Together AI with corresponding YAML configurations and SVG icons. - Enhanced the ProviderForm component to include a searchable dropdown for selecting providers, improving user experience. - Updated localization files to include search provider text for both English and Chinese.	2026-06-05 09:39:28 +08:00
huanghuoguoguo	89bcf82518	restore: restore deleted provider requester files Restore individual provider requester implementations that were removed in `de61b5d3`. These files coexist with the unified litellmchat.py backend. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-06-05 09:39:28 +08:00
huanghuoguoguo	7ea1ce2fd3	refactor(provider): simplify LiteLLM requester usage handling - Remove unused Anthropic-specific tool schema generation - Share completion argument construction between normal and streaming calls - Use LiteLLM/OpenAI native usage fields for monitoring - Collect stream token usage from LiteLLM stream_options - Update LiteLLM requester tests for unified usage fields	2026-06-05 09:39:28 +08:00
huanghuoguoguo	31ad85517b	fix: ruff format provider.py Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-06-05 09:38:16 +08:00
huanghuoguoguo	a62fce1cf7	refactor(provider): use LiteLLM as unified LLM requester backend - Replace 23+ individual requester implementations with unified litellmchat.py - Add litellm_provider field to 27 YAML manifests for provider routing - Delete redundant requester subclasses - Add unit tests for LiteLLMRequester (29 tests) - Fix num_retries parameter name (was max_retries) - Fix exception handling order for subclass exceptions LiteLLM provides unified API for 100+ providers, eliminating need for provider-specific requesters.	2026-06-05 09:38:16 +08:00
Junyan Qin	101e04db6d	feat(web): add Discord link to sidebar account menu Add a "Join our Discord" entry to the account dropdown's external-links group, opening https://discord.gg/wdNEHETs87 in a new tab. lucide-react has no Discord brand glyph, so include a small inline Discord SVG icon (brand color). Add the joinDiscord label to all 8 locales. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 22:26:55 +08:00
Junyan Qin	b79edda3a7	style(web): give extension cards a subtle border The softened shadow alone left cards with no visible edge against the page background. Add `border border-border` so each card has a clear, restrained boundary while keeping the gentle shadow. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 21:49:55 +08:00
Junyan Qin	a20d3d11e5	style(web): soften extension card shadow and hover effect Reduce the marketplace card box-shadow (4px/0.2 -> 2px/0.06) and the hover shadow (8px/0.15 -> 5px/0.08, dark proportional) for a more restrained, understated look. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 21:45:35 +08:00
Junyan Qin	3b4c455813	fix(web): distinct extension-format icons (plugin/mcp/skill) The format filter used Wrench/AudioWaveform/Book for plugin/mcp/skill, which collided with the plugin-component icons (Tool/EventListener/ KnowledgeEngine) shown right below. Switch formats to Puzzle/Server/ Sparkles — matching the canonical getTypeIcon used by the detail badges — across the market filter, installed filter, install-queue map and install-progress dialog. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 21:34:23 +08:00
Junyan Qin	c967a2aa82	i18n(market): say "extensions" not "plugins" in the marketplace count The marketplace now lists plugins, MCPs and skills, so the item count ("Total N plugins") read wrong. Update market.totalPlugins and market.searchResults to "extensions" across all 8 locales. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 21:24:10 +08:00
Junyan Qin	79cc6da96f	fix(mcp): surface real cause from TaskGroup ExceptionGroups MCP connection failures were reported as "unhandled errors in a TaskGroup (1 sub-exception)" because anyio/the MCP client wrap the real error in an ExceptionGroup and we interpolated its str() directly. Add _describe_exception() to recurse into ExceptionGroups and surface the leaf cause (e.g. "httpx.HTTPStatusError: Client error '410 Gone'") in both the retry warning and the final error_message. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 21:19:18 +08:00
Junyan Qin	fee7d48dc3	refactor(web): drop redundant Manual/Scan tabs in model add popover The model add/scan popover nested a second Manual/Scan tab row inside the Chat/Embedding/Rerank type tabs. But ProviderCard already opens the popover from two distinct entry points (Add -> manual, Scan -> scan via initialMode), so the inner tabs were redundant. Render the manual form or scan UI directly off `mode` and remove the inner Tabs/TabsList, leaving a single clean tab row. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 18:36:59 +08:00
Junyan Qin	8811fb647f	fix(plugin): call _inspect_plugin_package in marketplace install path Marketplace plugin install referenced self._extract_deps_metadata, which no longer exists (renamed to _inspect_plugin_package), raising AttributeError and failing every plugin install from Space. Use the current method name; it extracts identity + dependency metadata as the local-install path already does. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 18:17:01 +08:00
Junyan Qin	37b017459d	fix(modelmgr): upsert Space-managed models instead of insert-only sync_new_models_from_space() skipped any model whose uuid already existed. LangBot Space reuses a model's uuid across renames/re-specs (e.g. the uuid that was claude-opus-4-6 later becomes claude-opus-4-7), so renamed models never propagated locally — the stale local name was also sent to the models gateway, causing model_not_found at inference. Now upsert: create new uuids, and for existing models owned by the Space provider, update name/abilities/ranking to track Space (models from other providers are left untouched). Logs added/updated counts. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 18:11:26 +08:00
Junyan Qin	4889a3881b	chore(release): bump version to 4.10.0 Version-only bump from 4.10.0-beta.3. No release/tag/publish. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-04 17:26:03 +08:00
Junyan Qin	fe4f95b9a3	fix(docker): install docker CLI for box backend; bump to 4.10.0-beta.3 The langbot_box service drives sandbox containers through the docker CLI (CLISandboxBackend shells out to `docker run`/`docker exec`), but the image shipped without a docker client, so DockerBackend.is_available() was always false and the Box sandbox backend was unavailable in Docker deployments — disabling native tools, skill execution and stdio MCP. Install docker-ce-cli (client only) in the image, arch-aware so multi-arch builds work. Also bump langbot-plugin pin to 0.4.1, which disables proxy auto-detection on internal control-plane WebSocket connections (the langbot<->plugin_runtime / langbot<->box handshakes were failing on hosts that inject a proxy into containers). Bumps version to 4.10.0-beta.3.	2026-06-04 13:20:36 +08:00
`@@ -1,3 +1,3 @@`
	`"""LangBot - Production-grade platform for building agentic IM bots"""`	`"""LangBot - Production-grade platform for building agentic IM bots"""`

	`__version__ = '4.10.0-beta.2'`	`__version__ = '4.10.0'`