feat: make agent runner config schema driven

2026-07-26 14:26:06 +00:00 · 2026-05-19 12:20:28 +08:00
parent f4f91c43b5
commit be8d30894a
20 changed files with 901 additions and 236 deletions
@@ -24,7 +24,8 @@ class ConfigMigration:
    Responsibilities:
    - Resolve runner ID from new ai.runner.id or old ai.runner.runner
    - Map old built-in runner names to official plugin runner IDs
-    - Extract runner config from ai.runner_config or old ai.<runner-name>
+    - Extract runtime runner config from ai.runner_config
+    - Migrate old ai.<runner-name> blocks into ai.runner_config
    """

    @staticmethod
@@ -74,9 +75,9 @@ class ConfigMigration:
    ) -> dict[str, typing.Any]:
        """Resolve runner binding configuration from pipeline configuration.

-        Priority:
-        1. New format: ai.runner_config[runner_id]
-        2. Old format: ai.<runner-name> (mapped from runner_id if applicable)
+        Runtime code should only read the migrated format. Legacy
+        ai.<runner-name> blocks are handled by migration helpers, not by the
+        hot path.

        Args:
            pipeline_config: Pipeline configuration dict
@@ -92,7 +93,16 @@ class ConfigMigration:
        if runner_id in runner_configs:
            return runner_configs[runner_id]

-        # Check old format: ai.<old_runner_name>
+        return {}
+
+    @staticmethod
+    def resolve_legacy_runner_config(
+        pipeline_config: dict[str, typing.Any],
+        runner_id: str,
+    ) -> dict[str, typing.Any]:
+        """Resolve old ai.<runner-name> config for migration only."""
+        ai_config = pipeline_config.get('ai', {})
+
        # Try to find old runner name from runner_id
        old_runner_name = None
        for old_name, mapped_id in OLD_RUNNER_TO_PLUGIN_RUNNER_ID.items():
@@ -105,12 +115,6 @@ class ConfigMigration:
            if old_config:
                return old_config

-        # If runner_id is plugin:* format, try extracting runner_name as config key
-        if is_plugin_runner_id(runner_id):
-            # Some configs might use just the runner_name component as key
-            # But this is legacy behavior - prefer ai.runner_config[id]
-            pass
-
        return {}

    @staticmethod
@@ -181,6 +185,8 @@ class ConfigMigration:

            # Migrate runner config
            resolved_config = ConfigMigration.resolve_runner_config(pipeline_config, runner_id)
+            if not resolved_config:
+                resolved_config = ConfigMigration.resolve_legacy_runner_config(pipeline_config, runner_id)
            if resolved_config:
                runner_configs[runner_id] = resolved_config
                # Remove old runner config block
@@ -193,4 +199,4 @@ class ConfigMigration:
        ai_config['runner_config'] = runner_configs
        new_config['ai'] = ai_config

-        return new_config
+        return new_config
@@ -0,0 +1,208 @@
+"""Helpers for interpreting AgentRunner DynamicForm configuration."""
+from __future__ import annotations
+
+import typing
+
+from .descriptor import AgentRunnerDescriptor
+
+
+LLM_MODEL_SELECTOR_TYPES = {'model-fallback-selector', 'llm-model-selector'}
+KB_SELECTOR_TYPES = {'knowledge-base-multi-selector'}
+PROMPT_EDITOR_TYPES = {'prompt-editor'}
+NONE_SENTINELS = {'', '__none__', '__none'}
+
+
+def iter_schema_items(
+    descriptor: AgentRunnerDescriptor | None,
+    field_types: set[str],
+) -> typing.Iterator[dict[str, typing.Any]]:
+    """Yield descriptor config schema items whose type is in field_types."""
+    if descriptor is None:
+        return
+    for item in descriptor.config_schema or []:
+        if not isinstance(item, dict):
+            continue
+        if item.get('type') in field_types:
+            yield item
+
+
+def has_permission(
+    descriptor: AgentRunnerDescriptor | None,
+    name: str,
+    actions: set[str],
+) -> bool:
+    """Return whether a runner descriptor requests one of the given actions."""
+    if descriptor is None:
+        return False
+    configured_actions = descriptor.permissions.get(name, [])
+    return any(action in configured_actions for action in actions)
+
+
+def uses_host_models(descriptor: AgentRunnerDescriptor | None) -> bool:
+    """Return whether LangBot should resolve model resources for this runner."""
+    return (
+        has_permission(descriptor, 'models', {'invoke', 'stream', 'list'})
+        and any(True for _ in iter_schema_items(descriptor, LLM_MODEL_SELECTOR_TYPES))
+    )
+
+
+def uses_host_tools(descriptor: AgentRunnerDescriptor | None) -> bool:
+    """Return whether LangBot should expose tool resources to this runner."""
+    return (
+        descriptor is not None
+        and descriptor.supports_tool_calling()
+        and has_permission(descriptor, 'tools', {'list', 'detail', 'call'})
+    )
+
+
+def uses_host_knowledge_bases(descriptor: AgentRunnerDescriptor | None) -> bool:
+    """Return whether LangBot should expose knowledge-base resources to this runner."""
+    return (
+        descriptor is not None
+        and descriptor.supports_knowledge_retrieval()
+        and has_permission(descriptor, 'knowledge_bases', {'list', 'retrieve'})
+    )
+
+
+def extract_prompt_config(
+    descriptor: AgentRunnerDescriptor | None,
+    runner_config: dict[str, typing.Any],
+    default_prompt: list[dict[str, typing.Any]],
+) -> list[dict[str, typing.Any]]:
+    """Extract the prompt-editor value selected by the runner schema."""
+    for item in iter_schema_items(descriptor, PROMPT_EDITOR_TYPES):
+        field_name = item.get('name')
+        if field_name and field_name in runner_config:
+            configured_prompt = runner_config[field_name]
+            if isinstance(configured_prompt, list):
+                return configured_prompt
+        default_value = item.get('default')
+        if isinstance(default_value, list):
+            return default_value
+    return default_prompt
+
+
+def extract_model_selection(
+    descriptor: AgentRunnerDescriptor | None,
+    runner_config: dict[str, typing.Any],
+) -> tuple[str, list[str]]:
+    """Extract primary/fallback LLM selections from schema-defined fields."""
+    primary_uuid = ''
+    fallback_uuids: list[str] = []
+
+    for item in iter_schema_items(descriptor, LLM_MODEL_SELECTOR_TYPES):
+        field_name = item.get('name')
+        if not field_name:
+            continue
+
+        value = runner_config.get(field_name, item.get('default'))
+        if item.get('type') == 'model-fallback-selector':
+            if isinstance(value, str):
+                primary_uuid = value
+            elif isinstance(value, dict):
+                primary_uuid = value.get('primary') or ''
+                fallbacks = value.get('fallbacks', [])
+                if isinstance(fallbacks, list):
+                    fallback_uuids = [fallback for fallback in fallbacks if isinstance(fallback, str)]
+            break
+
+        if item.get('type') == 'llm-model-selector' and isinstance(value, str):
+            primary_uuid = value
+            break
+
+    return primary_uuid, fallback_uuids
+
+
+def extract_knowledge_base_uuids(
+    descriptor: AgentRunnerDescriptor | None,
+    runner_config: dict[str, typing.Any],
+) -> list[str]:
+    """Extract configured knowledge-base UUIDs from schema-defined fields."""
+    if not uses_host_knowledge_bases(descriptor):
+        return []
+
+    kb_uuids: list[str] = []
+    for item in iter_schema_items(descriptor, KB_SELECTOR_TYPES):
+        field_name = item.get('name')
+        if not field_name:
+            continue
+        value = runner_config.get(field_name, item.get('default', []))
+        if isinstance(value, list):
+            kb_uuids.extend(
+                kb_uuid for kb_uuid in value if isinstance(kb_uuid, str) and kb_uuid not in NONE_SENTINELS
+            )
+
+    return list(dict.fromkeys(kb_uuids))
+
+
+def iter_config_model_refs(
+    descriptor: AgentRunnerDescriptor,
+    runner_config: dict[str, typing.Any],
+) -> typing.Iterator[tuple[str, str]]:
+    """Yield model references declared by schema-defined model selector fields."""
+    for item in descriptor.config_schema or []:
+        if not isinstance(item, dict):
+            continue
+
+        field_name = item.get('name')
+        field_type = item.get('type')
+        if not field_name or field_name not in runner_config:
+            continue
+
+        value = runner_config.get(field_name)
+        if field_type == 'model-fallback-selector':
+            if isinstance(value, str) and value not in NONE_SENTINELS:
+                yield 'llm', value
+            elif isinstance(value, dict):
+                primary = value.get('primary')
+                if isinstance(primary, str) and primary not in NONE_SENTINELS:
+                    yield 'llm', primary
+                fallbacks = value.get('fallbacks', [])
+                if isinstance(fallbacks, list):
+                    for fallback_uuid in fallbacks:
+                        if isinstance(fallback_uuid, str) and fallback_uuid not in NONE_SENTINELS:
+                            yield 'llm', fallback_uuid
+        elif field_type == 'llm-model-selector':
+            if isinstance(value, str) and value not in NONE_SENTINELS:
+                yield 'llm', value
+        elif field_type == 'rerank-model-selector':
+            if isinstance(value, str) and value not in NONE_SENTINELS:
+                yield 'rerank', value
+
+
+def set_empty_llm_model_selection(
+    descriptor: AgentRunnerDescriptor,
+    runner_config: dict[str, typing.Any],
+    model_uuid: str,
+) -> bool:
+    """Set the first empty schema-defined LLM selector to model_uuid."""
+    for item in iter_schema_items(descriptor, LLM_MODEL_SELECTOR_TYPES):
+        field_name = item.get('name')
+        field_type = item.get('type')
+        if not field_name:
+            continue
+
+        value = runner_config.get(field_name, item.get('default'))
+        if field_type == 'model-fallback-selector':
+            if isinstance(value, dict):
+                primary = value.get('primary') or ''
+                if primary not in NONE_SENTINELS:
+                    return False
+                fallbacks = value.get('fallbacks', [])
+                runner_config[field_name] = {
+                    'primary': model_uuid,
+                    'fallbacks': fallbacks if isinstance(fallbacks, list) else [],
+                }
+                return True
+            if isinstance(value, str) and value not in NONE_SENTINELS:
+                return False
+            runner_config[field_name] = {'primary': model_uuid, 'fallbacks': []}
+            return True
+
+        if field_type == 'llm-model-selector':
+            if isinstance(value, str) and value not in NONE_SENTINELS:
+                return False
+            runner_config[field_name] = model_uuid
+            return True
+
+    return False
@@ -15,6 +15,9 @@ from .state_store import get_state_store
 from . import events as runner_events


+DEFAULT_RUNNER_TIMEOUT_SECONDS = 300
+
+
 # Internal models for the agent runner context protocol.


@@ -106,7 +109,7 @@ class AgentRuntimeContext(typing.TypedDict):
    sdk_protocol_version: str
    query_id: int | None
    trace_id: str | None
-    deadline_at: int | None
+    deadline_at: float | None
    metadata: dict[str, typing.Any]


@@ -480,9 +483,13 @@ class AgentRunContextBuilder:
            },
        }

-    def _build_deadline(self, runner_config: dict[str, typing.Any]) -> int | None:
-        """Build deadline timestamp from runner timeout config if present."""
-        timeout = runner_config.get('timeout')
+    def _build_deadline(self, runner_config: dict[str, typing.Any]) -> float | None:
+        """Build deadline timestamp from runner timeout config.
+
+        A missing timeout uses the host default. Explicit null, zero, or negative
+        values disable the total run deadline for advanced deployments.
+        """
+        timeout = runner_config.get('timeout', DEFAULT_RUNNER_TIMEOUT_SECONDS)
        if timeout is None:
            return None

@@ -494,7 +501,7 @@ class AgentRunContextBuilder:
        if timeout_seconds <= 0:
            return None

-        return int(time.time() + timeout_seconds)
+        return time.time() + timeout_seconds

    async def _is_stream_output_supported(self, query: pipeline_query.Query) -> bool:
        """Check whether the current adapter can consume streaming chunks."""
@@ -3,9 +3,12 @@ from __future__ import annotations

 import typing
 import traceback
+import asyncio
+import time

 from langbot_plugin.api.entities.builtin.provider import message as provider_message
 from langbot_plugin.api.entities.builtin.pipeline import query as pipeline_query
+from langbot_plugin.entities.io.errors import ActionCallTimeoutError

 from ...core import app
 from .descriptor import AgentRunnerDescriptor
@@ -155,14 +158,32 @@ class AgentRunOrchestrator:
            )

        try:
-            async for result_dict in self.ap.plugin_connector.run_agent(
+            gen = self.ap.plugin_connector.run_agent(
                plugin_author=descriptor.plugin_author,
                plugin_name=descriptor.plugin_name,
                runner_name=descriptor.runner_name,
                context=context,
-            ):
+            )
+
+            while True:
+                try:
+                    result_dict = await self._next_with_deadline(gen, descriptor, context)
+                except StopAsyncIteration:
+                    break
                yield result_dict

+        except asyncio.TimeoutError as e:
+            raise RunnerExecutionError(
+                descriptor.id,
+                'Runner timed out (code: runner.timeout)',
+                retryable=True,
+            ) from e
+        except ActionCallTimeoutError as e:
+            raise RunnerExecutionError(
+                descriptor.id,
+                f'{e} (code: runner.timeout)',
+                retryable=True,
+            ) from e
        except RunnerExecutionError:
            raise
        except Exception as e:
@@ -176,6 +197,57 @@ class AgentRunOrchestrator:
                retryable=False,
            )

+    async def _next_with_deadline(
+        self,
+        gen: typing.AsyncGenerator[dict[str, typing.Any], None],
+        descriptor: AgentRunnerDescriptor,
+        context: AgentRunContextPayload,
+    ) -> dict[str, typing.Any]:
+        """Read the next runner result while enforcing the run deadline."""
+        remaining = self._remaining_deadline_seconds(context)
+        if remaining is not None and remaining <= 0:
+            await self._close_generator(gen, descriptor)
+            raise asyncio.TimeoutError
+
+        try:
+            if remaining is None:
+                return await anext(gen)
+            return await asyncio.wait_for(anext(gen), timeout=remaining)
+        except StopAsyncIteration:
+            if self._is_deadline_exhausted(context):
+                raise asyncio.TimeoutError
+            raise
+        except asyncio.TimeoutError:
+            await self._close_generator(gen, descriptor)
+            raise
+
+    def _remaining_deadline_seconds(
+        self,
+        context: AgentRunContextPayload,
+    ) -> float | None:
+        runtime = context.get('runtime') or {}
+        deadline_at = runtime.get('deadline_at')
+        if deadline_at is None:
+            return None
+        try:
+            return float(deadline_at) - time.time()
+        except (TypeError, ValueError):
+            return None
+
+    def _is_deadline_exhausted(self, context: AgentRunContextPayload) -> bool:
+        remaining = self._remaining_deadline_seconds(context)
+        return remaining is not None and remaining <= 0
+
+    async def _close_generator(
+        self,
+        gen: typing.AsyncGenerator[dict[str, typing.Any], None],
+        descriptor: AgentRunnerDescriptor,
+    ) -> None:
+        try:
+            await gen.aclose()
+        except Exception as e:
+            self.ap.logger.warning(f'Failed to close timed-out runner {descriptor.id}: {e}')
+
    def resolve_runner_id_for_telemetry(self, query: pipeline_query.Query) -> str | None:
        """Resolve runner ID for telemetry/logging without full execution.

@@ -13,6 +13,7 @@ from .context_builder import (
    KnowledgeBaseResource,
    StorageResource,
 )
+from . import config_schema


 class AgentResourceBuilder:
@@ -73,7 +74,7 @@ class AgentResourceBuilder:
        models, tools, knowledge_bases = await asyncio.gather(
            self._build_models(manifest_perms, runner_config, descriptor, query),
            self._build_tools(manifest_perms, bound_plugins, bound_mcp_servers, query),
-            self._build_knowledge_bases(manifest_perms, runner_config, query),
+            self._build_knowledge_bases(manifest_perms, runner_config, descriptor, query),
        )
        storage = self._build_storage(manifest_perms)

@@ -132,34 +133,11 @@ class AgentResourceBuilder:
        runner_config: dict[str, typing.Any],
    ) -> None:
        """Authorize model-like values selected through DynamicForm fields."""
-        for item in descriptor.config_schema or []:
-            if not isinstance(item, dict):
-                continue
-
-            field_name = item.get('name')
-            field_type = item.get('type')
-            if not field_name or field_name not in runner_config:
-                continue
-
-            value = runner_config.get(field_name)
-            if field_type == 'model-fallback-selector':
-                if isinstance(value, str):
-                    await self._append_llm_model_resource(models, seen_model_ids, value)
-                elif isinstance(value, dict):
-                    primary = value.get('primary')
-                    if isinstance(primary, str):
-                        await self._append_llm_model_resource(models, seen_model_ids, primary)
-                    fallbacks = value.get('fallbacks', [])
-                    if isinstance(fallbacks, list):
-                        for fallback_uuid in fallbacks:
-                            if isinstance(fallback_uuid, str):
-                                await self._append_llm_model_resource(models, seen_model_ids, fallback_uuid)
-            elif field_type == 'llm-model-selector':
-                if isinstance(value, str):
-                    await self._append_llm_model_resource(models, seen_model_ids, value)
-            elif field_type == 'rerank-model-selector':
-                if isinstance(value, str):
-                    await self._append_rerank_model_resource(models, seen_model_ids, value)
+        for model_type, model_uuid in config_schema.iter_config_model_refs(descriptor, runner_config):
+            if model_type == 'llm':
+                await self._append_llm_model_resource(models, seen_model_ids, model_uuid)
+            elif model_type == 'rerank':
+                await self._append_rerank_model_resource(models, seen_model_ids, model_uuid)

    async def _append_llm_model_resource(
        self,
@@ -236,6 +214,7 @@ class AgentResourceBuilder:
        self,
        manifest_perms: dict[str, list[str]],
        runner_config: dict[str, typing.Any],
+        descriptor: AgentRunnerDescriptor,
        query: typing.Any,
    ) -> list[KnowledgeBaseResource]:
        """Build knowledge bases list with plugin SDK field names."""
@@ -246,13 +225,8 @@ class AgentResourceBuilder:
        if 'list' not in kb_perms and 'retrieve' not in kb_perms:
            return kb_resources

-        # Get knowledge base UUIDs from config
-        kb_uuids = runner_config.get('knowledge-bases', [])
-        if not kb_uuids:
-            # Old single KB config
-            old_kb_uuid = runner_config.get('knowledge-base', '')
-            if old_kb_uuid and old_kb_uuid != '__none__':
-                kb_uuids = [old_kb_uuid]
+        # Get knowledge base UUIDs from schema-defined config fields.
+        kb_uuids = config_schema.extract_knowledge_base_uuids(descriptor, runner_config)

        # Also check query variables (may be modified by plugin PromptPreProcessing)
        kb_uuids_from_vars = query.variables.get('_knowledge_base_uuids', [])
@@ -9,6 +9,8 @@ from ....core import app
 from ....entity.persistence import model as persistence_model
 from ....entity.persistence import pipeline as persistence_pipeline
 from ....provider.modelmgr import requester as model_requester
+from ....agent.runner.config_migration import ConfigMigration
+from ....agent.runner import config_schema


 def _parse_provider_api_keys(provider_dict: dict) -> dict:
@@ -40,6 +42,40 @@ class LLMModelsService:
    def __init__(self, ap: app.Application) -> None:
        self.ap = ap

+    async def _get_runner_descriptor(self, runner_id: str):
+        registry = getattr(self.ap, 'agent_runner_registry', None)
+        if registry is None:
+            return None
+        try:
+            return await registry.get(runner_id, bound_plugins=None)
+        except Exception as e:
+            logger = getattr(self.ap, 'logger', None)
+            if logger:
+                logger.warning(f'Failed to load AgentRunner descriptor while setting default model: {e}')
+            return None
+
+    async def _auto_set_default_pipeline_llm_model(self, pipeline: persistence_pipeline.LegacyPipeline, model_uuid: str):
+        pipeline_config = pipeline.config
+        if not isinstance(pipeline_config, dict):
+            return
+
+        runner_id = ConfigMigration.resolve_runner_id(pipeline_config)
+        if not runner_id:
+            return
+
+        descriptor = await self._get_runner_descriptor(runner_id)
+        if descriptor is None:
+            return
+
+        ai_config = pipeline_config.setdefault('ai', {})
+        runner_configs = ai_config.setdefault('runner_config', {})
+        runner_config = runner_configs.setdefault(runner_id, {})
+
+        if not config_schema.set_empty_llm_model_selection(descriptor, runner_config, model_uuid):
+            return
+
+        await self.ap.pipeline_service.update_pipeline(pipeline.uuid, {'config': pipeline_config})
+
    async def get_llm_models(self, include_secret: bool = True) -> list[dict]:
        """Get all LLM models with provider info"""
        result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.LLMModel))
@@ -109,7 +145,6 @@ class LLMModelsService:
        self.ap.model_mgr.llm_models.append(runtime_llm_model)

        if auto_set_to_default_pipeline:
-            # set the default pipeline model to this model
            result = await self.ap.persistence_mgr.execute_async(
                sqlalchemy.select(persistence_pipeline.LegacyPipeline).where(
                    persistence_pipeline.LegacyPipeline.is_default == True
@@ -117,15 +152,7 @@ class LLMModelsService:
            )
            pipeline = result.first()
            if pipeline is not None:
-                model_config = pipeline.config.get('ai', {}).get('local-agent', {}).get('model', {})
-                if not model_config.get('primary', ''):
-                    pipeline_config = pipeline.config
-                    pipeline_config['ai']['local-agent']['model'] = {
-                        'primary': model_data['uuid'],
-                        'fallbacks': [],
-                    }
-                    pipeline_data = {'config': pipeline_config}
-                    await self.ap.pipeline_service.update_pipeline(pipeline.uuid, pipeline_data)
+                await self._auto_set_default_pipeline_llm_model(pipeline, model_data['uuid'])

        return model_data['uuid']

@@ -11,7 +11,8 @@ class RoundTruncator(truncator.Truncator):

    async def truncate(self, query: pipeline_query.Query) -> pipeline_query.Query:
        """截断"""
-        # Get max-round from runner config (new or old format)
+        # max-round remains a pipeline-side trimming knob until token-budget
+        # based compaction replaces this stage.
        runner_id = ConfigMigration.resolve_runner_id(query.pipeline_config)
        runner_config = ConfigMigration.resolve_runner_config(query.pipeline_config, runner_id) if runner_id else {}
        max_round = runner_config.get('max-round', 10)
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import datetime
+import typing

 from .. import stage, entities
 from langbot_plugin.api.entities.builtin.provider import message as provider_message
@@ -9,10 +10,15 @@ import langbot_plugin.api.entities.builtin.platform.message as platform_message
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.platform.events as platform_events

+from ...agent.runner.descriptor import AgentRunnerDescriptor
 from ...agent.runner.config_migration import ConfigMigration
+from ...agent.runner import config_schema


-# Official local-agent runner ID
+DEFAULT_PROMPT_CONFIG = [
+    {'role': 'system', 'content': 'You are a helpful assistant.'},
+]
+
 LOCAL_AGENT_RUNNER_ID = 'plugin:langbot/local-agent/default'


@@ -31,6 +37,76 @@ class PreProcessor(stage.PipelineStage):
        - use_funcs
    """

+    async def _get_runner_descriptor(
+        self,
+        runner_id: str | None,
+        bound_plugins: list[str] | None,
+    ) -> AgentRunnerDescriptor | None:
+        if not runner_id:
+            return None
+
+        registry = getattr(self.ap, 'agent_runner_registry', None)
+        if registry is None:
+            return None
+
+        try:
+            return await registry.get(runner_id, bound_plugins)
+        except Exception as e:
+            self.ap.logger.debug(f'Unable to load AgentRunner descriptor for {runner_id}: {e}')
+            return None
+
+    async def _resolve_llm_model(
+        self,
+        primary_uuid: str,
+    ) -> typing.Any | None:
+        if primary_uuid in config_schema.NONE_SENTINELS:
+            return None
+        try:
+            return await self.ap.model_mgr.get_model_by_uuid(primary_uuid)
+        except ValueError:
+            self.ap.logger.warning(f'LLM model {primary_uuid} not found or not configured')
+            return None
+
+    async def _resolve_fallback_models(self, fallback_uuids: list[str]) -> list[str]:
+        valid_fallbacks = []
+        for fallback_uuid in fallback_uuids:
+            if fallback_uuid in config_schema.NONE_SENTINELS:
+                continue
+            try:
+                await self.ap.model_mgr.get_model_by_uuid(fallback_uuid)
+                valid_fallbacks.append(fallback_uuid)
+            except ValueError:
+                self.ap.logger.warning(f'Fallback model {fallback_uuid} not found, skipping')
+        return valid_fallbacks
+
+    def _runner_accepts_multimodal_input(self, descriptor: AgentRunnerDescriptor | None) -> bool:
+        if descriptor is None:
+            return True
+        return descriptor.capabilities.get('multimodal_input', False)
+
+    def _model_supports_vision(self, llm_model: typing.Any | None) -> bool:
+        if not llm_model:
+            return False
+        abilities = getattr(getattr(llm_model, 'model_entity', None), 'abilities', [])
+        return 'vision' in abilities
+
+    def _should_keep_image_inputs(
+        self,
+        descriptor: AgentRunnerDescriptor | None,
+        uses_host_models: bool,
+        llm_model: typing.Any | None,
+    ) -> bool:
+        if not self._runner_accepts_multimodal_input(descriptor):
+            return False
+        if uses_host_models:
+            return self._model_supports_vision(llm_model)
+        return True
+
+    def _strip_images_from_history(self, query: pipeline_query.Query) -> None:
+        for msg in query.messages:
+            if isinstance(msg.content, list):
+                msg.content = [elem for elem in msg.content if elem.type != 'image_url']
+
    async def process(
        self,
        query: pipeline_query.Query,
@@ -40,57 +116,28 @@ class PreProcessor(stage.PipelineStage):
        # Resolve runner ID using ConfigMigration (supports both new and old formats)
        runner_id = ConfigMigration.resolve_runner_id(query.pipeline_config)

-        # Get runner config (from new ai.runner_config or old ai.<runner-name>)
+        # Get runner config from ai.runner_config[runner_id].
        runner_config = ConfigMigration.resolve_runner_config(query.pipeline_config, runner_id) if runner_id else {}
+        query.variables = query.variables or {}
+        bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
+        bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
+        descriptor = await self._get_runner_descriptor(runner_id, bound_plugins)

        session = await self.ap.sess_mgr.get_session(query)

-        # Determine if this is a local-agent runner (built-in LLM capabilities)
-        # Check by runner_id OR by legacy runner field for backward compatibility
-        is_local_agent = runner_id == LOCAL_AGENT_RUNNER_ID or (
-            runner_id is None and
-            query.pipeline_config.get('ai', {}).get('runner', {}).get('runner') == 'local-agent'
-        )
+        uses_host_models = config_schema.uses_host_models(descriptor)
+        uses_host_tools = config_schema.uses_host_tools(descriptor)
+        is_local_agent = runner_id == LOCAL_AGENT_RUNNER_ID
        include_skill_authoring = is_local_agent and getattr(self.ap, 'skill_service', None) is not None
-
-        # When not local-agent, llm_model is None
        llm_model = None
-        if is_local_agent:
-            # Read model config — new format is { primary: str, fallbacks: [str] },
-            # but handle legacy plain string for backward compatibility
-            model_config = runner_config.get('model', {})
-            if isinstance(model_config, str):
-                # Legacy format: plain UUID string
-                primary_uuid = model_config
-                fallback_uuids = []
-            else:
-                primary_uuid = model_config.get('primary', '')
-                fallback_uuids = model_config.get('fallbacks', [])
+        if uses_host_models:
+            primary_uuid, fallback_uuids = config_schema.extract_model_selection(descriptor, runner_config)
+            llm_model = await self._resolve_llm_model(primary_uuid)
+            valid_fallbacks = await self._resolve_fallback_models(fallback_uuids)
+            if valid_fallbacks:
+                query.variables['_fallback_model_uuids'] = valid_fallbacks

-            if primary_uuid:
-                try:
-                    llm_model = await self.ap.model_mgr.get_model_by_uuid(primary_uuid)
-                except ValueError:
-                    self.ap.logger.warning(f'LLM model {primary_uuid} not found or not configured')
-
-            # Resolve fallback model UUIDs
-            if fallback_uuids:
-                valid_fallbacks = []
-                for fb_uuid in fallback_uuids:
-                    try:
-                        await self.ap.model_mgr.get_model_by_uuid(fb_uuid)
-                        valid_fallbacks.append(fb_uuid)
-                    except ValueError:
-                        self.ap.logger.warning(f'Fallback model {fb_uuid} not found, skipping')
-                if valid_fallbacks:
-                    query.variables['_fallback_model_uuids'] = valid_fallbacks
-
-        # Get prompt config - for local-agent, use runner_config; for others, use default prompt
-        prompt_config = runner_config.get('prompt', [
-            {'role': 'system', 'content': 'You are a helpful assistant.'}
-        ]) if is_local_agent else [
-            {'role': 'system', 'content': 'You are a helpful assistant.'}
-        ]
+        prompt_config = config_schema.extract_prompt_config(descriptor, runner_config, DEFAULT_PROMPT_CONFIG)

        conversation = await self.ap.sess_mgr.get_conversation(
            query,
@@ -126,15 +173,12 @@ class PreProcessor(stage.PipelineStage):
        query.prompt = conversation.prompt.copy()
        query.messages = conversation.messages.copy()

-        if is_local_agent:
+        if uses_host_models:
            query.use_funcs = []
            if llm_model:
                query.use_llm_model_uuid = llm_model.model_entity.uuid

-                if llm_model.model_entity.abilities.__contains__('func_call'):
-                    # Get bound plugins and MCP servers for filtering tools
-                    bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
-                    bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
+                if uses_host_tools and llm_model.model_entity.abilities.__contains__('func_call'):
                    query.use_funcs = await self.ap.tool_mgr.get_all_tools(
                        bound_plugins,
                        bound_mcp_servers,
@@ -147,9 +191,7 @@ class PreProcessor(stage.PipelineStage):

            # If primary model doesn't support func_call but fallback models exist,
            # load tools anyway since fallback models may support them
-            if not query.use_funcs and query.variables.get('_fallback_model_uuids'):
-                bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
-                bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
+            if uses_host_tools and not query.use_funcs and query.variables.get('_fallback_model_uuids'):
                query.use_funcs = await self.ap.tool_mgr.get_all_tools(
                    bound_plugins,
                    bound_mcp_servers,
@@ -179,18 +221,9 @@ class PreProcessor(stage.PipelineStage):
        }
        query.variables.update(variables)

-        # Check if this model supports vision, if not, remove all images
-        # TODO this checking should be performed in runner, and in this stage, the image should be reserved
-        if (
-            is_local_agent
-            and llm_model
-            and not llm_model.model_entity.abilities.__contains__('vision')
-        ):
-            for msg in query.messages:
-                if isinstance(msg.content, list):
-                    for me in msg.content:
-                        if me.type == 'image_url':
-                            msg.content.remove(me)
+        keep_image_inputs = self._should_keep_image_inputs(descriptor, uses_host_models, llm_model)
+        if not keep_image_inputs:
+            self._strip_images_from_history(query)

        content_list: list[provider_message.ContentElement] = []

@@ -202,10 +235,7 @@ class PreProcessor(stage.PipelineStage):
                content_list.append(provider_message.ContentElement.from_text(me.text))
                plain_text += me.text
            elif isinstance(me, platform_message.Image):
-                # Allow images for non-local-agent runners or if local-agent has vision
-                if not is_local_agent or (
-                    llm_model and llm_model.model_entity.abilities.__contains__('vision')
-                ):
+                if keep_image_inputs:
                    if me.base64 is not None:
                        content_list.append(provider_message.ContentElement.from_image_base64(me.base64))
            elif isinstance(me, platform_message.Voice):
@@ -224,9 +254,7 @@ class PreProcessor(stage.PipelineStage):
                    if isinstance(msg, platform_message.Plain):
                        content_list.append(provider_message.ContentElement.from_text(msg.text))
                    elif isinstance(msg, platform_message.Image):
-                        if not is_local_agent or (
-                            llm_model and llm_model.model_entity.abilities.__contains__('vision')
-                        ):
+                        if keep_image_inputs:
                            if msg.base64 is not None:
                                content_list.append(provider_message.ContentElement.from_image_base64(msg.base64))
                    elif isinstance(msg, platform_message.File):
@@ -246,15 +274,12 @@ class PreProcessor(stage.PipelineStage):

        query.user_message = provider_message.Message(role='user', content=content_list)

-        # Extract knowledge base UUIDs into query variables so plugins can modify them
-        # during PromptPreProcessing before the runner performs retrieval.
-        # Only for local-agent runner
-        kb_uuids = runner_config.get('knowledge-bases', []) if is_local_agent else []
-        if not kb_uuids:
-            old_kb_uuid = runner_config.get('knowledge-base', '') if is_local_agent else ''
-            if old_kb_uuid and old_kb_uuid != '__none__':
-                kb_uuids = [old_kb_uuid]
-        query.variables['_knowledge_base_uuids'] = list(kb_uuids)
+        # Extract configured KB UUIDs into query variables so PromptPreProcessing
+        # plugins can still adjust the authorized retrieval set before run_agent.
+        query.variables['_knowledge_base_uuids'] = config_schema.extract_knowledge_base_uuids(
+            descriptor,
+            runner_config,
+        )

        # =========== 触发事件 PromptPreProcessing

@@ -25,6 +25,8 @@ from ..entity.persistence import bstorage as persistence_bstorage
 from ..core import app
 from ..utils import constants
 from ..agent.runner.session_registry import get_session_registry
+from ..agent.runner.config_migration import ConfigMigration
+from ..agent.runner import config_schema


 def _make_rag_error_response(error: Exception, error_type: str, **extra_context) -> handler.ActionResponse:
@@ -98,6 +100,46 @@ def _build_tool_detail(tool: Any, requested_tool_name: str | None = None) -> dic
    }


+def _normalize_uuid_list(values: Any) -> list[str]:
+    """Normalize a user/config supplied UUID list while preserving order."""
+    if not isinstance(values, list):
+        return []
+    return list(
+        dict.fromkeys(
+            value for value in values if isinstance(value, str) and value not in config_schema.NONE_SENTINELS
+        )
+    )
+
+
+async def _get_pipeline_knowledge_base_uuids(ap: app.Application, query: Any) -> list[str]:
+    """Resolve pipeline-scoped KBs from preprocessed variables or runner schema."""
+    variables = getattr(query, 'variables', {}) or {}
+    if '_knowledge_base_uuids' in variables:
+        return _normalize_uuid_list(variables.get('_knowledge_base_uuids'))
+
+    pipeline_config = getattr(query, 'pipeline_config', None)
+    if not pipeline_config:
+        return []
+
+    runner_id = ConfigMigration.resolve_runner_id(pipeline_config)
+    if not runner_id:
+        return []
+
+    runner_config = ConfigMigration.resolve_runner_config(pipeline_config, runner_id)
+    registry = getattr(ap, 'agent_runner_registry', None)
+    if registry is None:
+        return []
+
+    bound_plugins = variables.get('_pipeline_bound_plugins')
+    try:
+        descriptor = await registry.get(runner_id, bound_plugins)
+    except Exception as e:
+        ap.logger.warning(f'Failed to load AgentRunner descriptor for pipeline knowledge-base scope: {e}')
+        return []
+
+    return config_schema.extract_knowledge_base_uuids(descriptor, runner_config)
+
+
 async def _validate_run_authorization(
    run_id: str,
    resource_type: str,
@@ -1155,15 +1197,7 @@ class RuntimeConnectionHandler(handler.Handler):

            query = self.ap.query_pool.cached_queries[query_id]

-            kb_uuids = []
-            if query.pipeline_config:
-                local_agent_config = query.pipeline_config.get('ai', {}).get('local-agent', {})
-                kb_uuids = local_agent_config.get('knowledge-bases', [])
-                # Backward compatibility
-                if not kb_uuids:
-                    old_kb_uuid = local_agent_config.get('knowledge-base', '')
-                    if old_kb_uuid and old_kb_uuid != '__none__':
-                        kb_uuids = [old_kb_uuid]
+            kb_uuids = await _get_pipeline_knowledge_base_uuids(self.ap, query)

            knowledge_bases = []
            for kb_uuid in kb_uuids:
@@ -1213,19 +1247,9 @@ class RuntimeConnectionHandler(handler.Handler):
                if error:
                    return error
            else:
-                # Regular plugin call: validate against pipeline's configured knowledge bases
-                # FIX: First resolve runner_id, then resolve runner_config
-                allowed_kb_uuids = []
-                if query.pipeline_config:
-                    from langbot.pkg.agent.runner.config_migration import ConfigMigration
-                    runner_id = ConfigMigration.resolve_runner_id(query.pipeline_config)
-                    if runner_id:
-                        runner_config = ConfigMigration.resolve_runner_config(query.pipeline_config, runner_id)
-                        allowed_kb_uuids = runner_config.get('knowledge-bases', [])
-                        if not allowed_kb_uuids:
-                            old_kb_uuid = runner_config.get('knowledge-base', '')
-                            if old_kb_uuid and old_kb_uuid != '__none__':
-                                allowed_kb_uuids = [old_kb_uuid]
+                # Regular plugin call: validate against the runner binding's
+                # schema-defined KB selectors or the preprocessed query scope.
+                allowed_kb_uuids = await _get_pipeline_knowledge_base_uuids(self.ap, query)

                if kb_id not in allowed_kb_uuids:
                    return handler.ActionResponse.error(
@@ -1434,6 +1458,7 @@ class RuntimeConnectionHandler(handler.Handler):

        Yields AgentRunResult dicts.
        """
+        timeout = self._get_runner_action_timeout(context)
        gen = self.call_action_generator(
            LangBotToRuntimeAction.RUN_AGENT,
            {
@@ -1442,12 +1467,27 @@ class RuntimeConnectionHandler(handler.Handler):
                'runner_name': runner_name,
                'context': context,
            },
-            timeout=300,
+            timeout=timeout,
        )

        async for ret in gen:
            yield ret

+    def _get_runner_action_timeout(self, context: dict[str, Any]) -> float:
+        """Use the run deadline as the transport idle timeout when available."""
+        try:
+            import time
+
+            deadline_at = (context.get('runtime') or {}).get('deadline_at')
+            if deadline_at is None:
+                return 300
+            remaining = float(deadline_at) - time.time()
+            if remaining <= 0:
+                return 0.001
+            return max(remaining + 1.0, 0.001)
+        except (TypeError, ValueError):
+            return 300
+
    async def get_plugin_icon(self, plugin_author: str, plugin_name: str) -> dict[str, Any]:
        """Get plugin icon"""
        result = await self.call_action(