perf(agent-runner): improve session registry and orchestrator efficiency

- Add pre-computed _authorized_ids (frozenset) at session registration for O(1) lookup - Refactor is_resource_allowed() from linear search to set membership check - Add thread-safe locking to get_session_registry() singleton - Cache _session_registry and _state_store references in orchestrator __init__ - Add asyncio.gather() for parallel resource building in AgentResourceBuilder - Create shared test fixtures in tests/unit_tests/agent/conftest.py - Update test files to import from shared conftest.py Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-11 16:26:02 +00:00 · 2026-05-11 21:45:26 +08:00
parent d6b8f48e73
commit dc82fb584a
23 changed files with 4438 additions and 677 deletions
--- a/src/langbot/pkg/agent/runner/init.py
+++ b/src/langbot/pkg/agent/runner/init.py
@@ -16,6 +16,7 @@ from .resource_builder import AgentResourceBuilder
 from .result_normalizer import AgentResultNormalizer
 from .orchestrator import AgentRunOrchestrator
 from .config_migration import ConfigMigration
+from .session_registry import AgentRunSessionRegistry, AgentRunSession, get_session_registry

 __all__ = [
    'AgentRunnerDescriptor',
@@ -33,4 +34,7 @@ __all__ = [
    'AgentResultNormalizer',
    'AgentRunOrchestrator',
    'ConfigMigration',
+    'AgentRunSessionRegistry',
+    'AgentRunSession',
+    'get_session_registry',
 ]
--- a/src/langbot/pkg/agent/runner/config_migration.py
+++ b/src/langbot/pkg/agent/runner/config_migration.py
@@ -72,7 +72,7 @@ class ConfigMigration:
        pipeline_config: dict[str, typing.Any],
        runner_id: str,
    ) -> dict[str, typing.Any]:
-        """Resolve runner instance configuration from pipeline configuration.
+        """Resolve runner binding configuration from pipeline configuration.

        Priority:
        1. New format: ai.runner_config[runner_id]
--- a/src/langbot/pkg/agent/runner/context_builder.py
+++ b/src/langbot/pkg/agent/runner/context_builder.py
@@ -10,6 +10,7 @@ from langbot_plugin.api.entities.builtin.pipeline import query as pipeline_query
 from ...core import app
 from .descriptor import AgentRunnerDescriptor
 from .config_migration import ConfigMigration
+from .state_store import get_state_store


 # Internal models for SDK v1 context protocol matching SDK v1 resources.py
@@ -41,6 +42,14 @@ class AgentInput(typing.TypedDict):
    attachments: list[dict[str, typing.Any]]


+class AgentRunState(typing.TypedDict):
+    """Agent run state with 4 scopes."""
+    conversation: dict[str, typing.Any]
+    actor: dict[str, typing.Any]
+    subject: dict[str, typing.Any]
+    runner: dict[str, typing.Any]
+
+
 # SDK v1 Protocol resource models - matching langbot-plugin-sdk/resources.py


@@ -100,7 +109,11 @@ class AgentRuntimeContext(typing.TypedDict):


 class AgentRunContextV1(typing.TypedDict):
-    """SDK v1 AgentRunContext per PROTOCOL_V1.md."""
+    """SDK v1 AgentRunContext per PROTOCOL_V1.md.
+
+    Note: The 'config' field contains the binding config from ai.runner_config[runner_id],
+    which is Pipeline's configuration for this specific runner binding (not plugin instance config).
+    """
    run_id: str
    trigger: AgentTrigger
    conversation: ConversationContext | None
@@ -109,9 +122,11 @@ class AgentRunContextV1(typing.TypedDict):
    subject: dict[str, typing.Any] | None  # Reserved for EBA
    messages: list[dict[str, typing.Any]]
    input: AgentInput
+    params: dict[str, typing.Any]
    resources: AgentResources
+    state: AgentRunState
    runtime: AgentRuntimeContext
-    config: dict[str, typing.Any]
+    config: dict[str, typing.Any]  # Binding config from ai.runner_config[runner_id]


 class AgentRunContextBuilder:
@@ -123,13 +138,25 @@ class AgentRunContextBuilder:
    - Build conversation context from session
    - Convert messages to SDK format
    - Build input from user_message and message_chain
+    - Build params from query.variables with filtering
+    - Build state snapshot from state_store
    - Set resources from AgentResourceBuilder result
    - Build runtime context with host info, trace_id, deadline
-    - Set config from runner instance configuration
+    - Set config from runner binding configuration (ai.runner_config[runner_id])
    """

    ap: app.Application

+    # Params filtering rules
+    # Exclude variables starting with underscore (internal)
+    INTERNAL_PREFIX = '_'
+
+    # Exclude variables with sensitive naming patterns
+    SENSITIVE_PATTERNS = ('secret', 'token', 'key', 'password', 'credential', 'api_key', 'apikey')
+
+    # Exclude permission/control variables
+    PERMISSION_VARS = ('_pipeline_bound_plugins', '_authorized', '_permission')
+
    def __init__(self, ap: app.Application):
        self.ap = ap

@@ -178,7 +205,16 @@ class AgentRunContextBuilder:
        # Build messages
        messages = self._build_messages(query)

-        # Get runner config
+        # Build params from query.variables with filtering
+        params = self._build_params(query)
+
+        # Build state snapshot from state_store
+        state_store = get_state_store()
+        state: AgentRunState = state_store.build_snapshot(query, descriptor)
+
+        # Get runner binding config from ai.runner_config[runner_id]
+        # This is Pipeline's configuration for this specific runner binding,
+        # passed through AgentRunContext.config to the runner
        runner_config = ConfigMigration.resolve_runner_config(
            query.pipeline_config,
            descriptor.id,
@@ -207,7 +243,9 @@ class AgentRunContextBuilder:
            'subject': None,  # Reserved for EBA
            'messages': messages,
            'input': input,
+            'params': params,
            'resources': resources,
+            'state': state,
            'runtime': runtime,
            'config': runner_config,
        }
@@ -251,4 +289,72 @@ class AgentRunContextBuilder:
            for msg in query.messages:
                messages.append(msg.model_dump(mode='json'))

-        return messages
+        return messages
+
+    def _build_params(self, query: pipeline_query.Query) -> dict[str, typing.Any]:
+        """Build params from query.variables with filtering.
+
+        Filtering rules:
+        1. Exclude variables starting with underscore (internal)
+        2. Exclude variables with sensitive naming patterns (secret, token, key, password)
+        3. Exclude permission/control variables
+        4. Keep only JSON-serializable values
+
+        Args:
+            query: Pipeline query
+
+        Returns:
+            Filtered params dict
+        """
+        params: dict[str, typing.Any] = {}
+
+        if not query.variables:
+            return params
+
+        for key, value in query.variables.items():
+            # Filter internal variables (starting with underscore)
+            if key.startswith(self.INTERNAL_PREFIX):
+                continue
+
+            # Filter sensitive naming patterns
+            key_lower = key.lower()
+            if any(pattern in key_lower for pattern in self.SENSITIVE_PATTERNS):
+                continue
+
+            # Filter permission variables
+            if any(key == perm_var or key.startswith(perm_var) for perm_var in self.PERMISSION_VARS):
+                continue
+
+            # Keep only JSON-serializable values
+            if self._is_json_serializable(value):
+                params[key] = value
+
+        return params
+
+    def _is_json_serializable(self, value: typing.Any) -> bool:
+        """Check if value is JSON-serializable.
+
+        Note: set is NOT JSON-serializable. json.dumps({"x": {1}}) fails.
+        Only list and tuple are allowed as collection types.
+
+        Args:
+            value: Value to check
+
+        Returns:
+            True if JSON-serializable, False otherwise
+        """
+        if value is None:
+            return True
+        if isinstance(value, (str, int, float, bool)):
+            return True
+        # Only allow list and tuple, NOT set (set is not JSON-serializable)
+        if isinstance(value, (list, tuple)):
+            return all(self._is_json_serializable(item) for item in value)
+        if isinstance(value, dict):
+            return all(
+                isinstance(k, str) and self._is_json_serializable(v)
+                for k, v in value.items()
+            )
+        # Pydantic models and other complex types are not directly serializable
+        # as params (they may have internal structure not meant for runners)
+        return False
--- a/src/langbot/pkg/agent/runner/orchestrator.py
+++ b/src/langbot/pkg/agent/runner/orchestrator.py
@@ -13,6 +13,8 @@ from .registry import AgentRunnerRegistry
 from .context_builder import AgentRunContextBuilder, AgentRunContextV1
 from .resource_builder import AgentResourceBuilder
 from .result_normalizer import AgentResultNormalizer
+from .state_store import get_state_store, RunnerScopedStateStore
+from .session_registry import get_session_registry, AgentRunSessionRegistry
 from .config_migration import ConfigMigration
 from .errors import (
    RunnerNotFoundError,
@@ -46,6 +48,10 @@ class AgentRunOrchestrator:

    result_normalizer: AgentResultNormalizer

+    # Cached singleton references (set in __init__)
+    _session_registry: AgentRunSessionRegistry
+    _state_store: RunnerScopedStateStore
+
    def __init__(
        self,
        ap: app.Application,
@@ -56,6 +62,9 @@ class AgentRunOrchestrator:
        self.context_builder = AgentRunContextBuilder(ap)
        self.resource_builder = AgentResourceBuilder(ap)
        self.result_normalizer = AgentResultNormalizer(ap)
+        # Cache singleton references to avoid per-request getter calls
+        self._session_registry = get_session_registry()
+        self._state_store = get_state_store()

    async def run_from_query(
        self,
@@ -93,12 +102,33 @@ class AgentRunOrchestrator:
        # Build context
        context = await self.context_builder.build_context(query, descriptor, resources)

-        # Run via plugin connector
-        async for result_dict in self._invoke_runner(descriptor, context):
-            # Normalize result
-            result = await self.result_normalizer.normalize(result_dict, descriptor)
-            if result is not None:
-                yield result
+        # Register session for proxy action permission validation
+        run_id = context['run_id']
+        await self._session_registry.register(
+            run_id=run_id,
+            runner_id=descriptor.id,
+            query_id=query.query_id,
+            plugin_identity=descriptor.get_plugin_id(),
+            resources=resources,
+        )
+
+        try:
+            # Run via plugin connector
+            async for result_dict in self._invoke_runner(descriptor, context):
+                # Handle state.updated first - consume before normalizer
+                if result_dict.get('type') == 'state.updated':
+                    self._handle_state_updated(result_dict, query, descriptor)
+                    # Pass to normalizer for logging, but don't yield to pipeline
+                    await self.result_normalizer.normalize(result_dict, descriptor)
+                    continue
+
+                # Normalize result for other types
+                result = await self.result_normalizer.normalize(result_dict, descriptor)
+                if result is not None:
+                    yield result
+        finally:
+            # Unregister session after run completes (success or error)
+            await self._session_registry.unregister(run_id)

    async def _invoke_runner(
        self,
@@ -155,4 +185,48 @@ class AgentRunOrchestrator:
        Returns:
            Runner ID string, or None
        """
-        return ConfigMigration.resolve_runner_id(query.pipeline_config)
+        return ConfigMigration.resolve_runner_id(query.pipeline_config)
+
+    def _handle_state_updated(
+        self,
+        result_dict: dict[str, typing.Any],
+        query: pipeline_query.Query,
+        descriptor: AgentRunnerDescriptor,
+    ) -> None:
+        """Handle state.updated result - apply to state store.
+
+        Args:
+            result_dict: Raw result dict with type='state.updated'
+            query: Pipeline query
+            descriptor: Runner descriptor
+        """
+        data = result_dict.get('data', {})
+
+        # Extract scope (default to 'conversation' for backward compat)
+        scope = data.get('scope', 'conversation')
+
+        # Extract key and value
+        key = data.get('key')
+        value = data.get('value')
+
+        if not key:
+            self.ap.logger.warning(
+                f'Runner {descriptor.id} state.updated missing key, ignoring'
+            )
+            return
+
+        # Apply update to state store
+        success = self._state_store.apply_update(
+            query=query,
+            descriptor=descriptor,
+            scope=scope,
+            key=key,
+            value=value,
+            logger=self.ap.logger,
+        )
+
+        if success:
+            self.ap.logger.debug(
+                f'Runner {descriptor.id} state.updated: scope={scope}, key={key}, value={value}'
+            )
+        # Invalid scope is already logged by state_store.apply_update
--- a/src/langbot/pkg/agent/runner/resource_builder.py
+++ b/src/langbot/pkg/agent/runner/resource_builder.py
@@ -1,6 +1,7 @@
 """Agent resource builder for constructing authorized resources."""
 from __future__ import annotations

+import asyncio
 import typing

 from ...core import app
@@ -68,10 +69,12 @@ class AgentResourceBuilder:
        from .config_migration import ConfigMigration
        runner_config = ConfigMigration.resolve_runner_config(query.pipeline_config, descriptor.id)

-        # Build each resource category
-        models = await self._build_models(manifest_perms, query)
-        tools = await self._build_tools(manifest_perms, bound_plugins, bound_mcp_servers, query)
-        knowledge_bases = await self._build_knowledge_bases(manifest_perms, runner_config, query)
+        # Build each resource category in parallel
+        models, tools, knowledge_bases = await asyncio.gather(
+            self._build_models(manifest_perms, query),
+            self._build_tools(manifest_perms, bound_plugins, bound_mcp_servers, query),
+            self._build_knowledge_bases(manifest_perms, runner_config, query),
+        )
        storage = self._build_storage(manifest_perms)

        return {
@@ -104,11 +107,10 @@ class AgentResourceBuilder:
        try:
            model = await self.ap.model_mgr.get_model_by_uuid(model_uuid)
            if model and model.model_entity:
-                # Use SDK v1 field names: model_id, model_type, provider
                models.append({
                    'model_id': model_uuid,
-                    'model_type': model.model_entity.model_type,
-                    'provider': model.provider_entity.name if hasattr(model, 'provider_entity') else None,
+                    'model_type': getattr(model.model_entity, 'model_type', None),
+                    'provider': getattr(model.provider_entity, 'name', None) if hasattr(model, 'provider_entity') else None,
                })
        except Exception:
            pass
--- a/src/langbot/pkg/agent/runner/result_normalizer.py
+++ b/src/langbot/pkg/agent/runner/result_normalizer.py
@@ -108,9 +108,13 @@ class AgentResultNormalizer:
            return None

        elif result_type == 'state.updated':
-            # Log for telemetry, don't yield
+            # Log for telemetry, don't yield to pipeline
+            # Orchestrator already handles the actual state_store.apply_update
+            scope = data.get('scope', 'conversation')  # Default for backward compat
+            key = data.get('key', 'unknown')
+            value_repr = repr(data.get('value', '...'))[:100]  # Truncate for log
            self.ap.logger.debug(
-                f'Runner {descriptor.id} state updated: {data.get("key", "unknown")}={data.get("value", "...")}'
+                f'Runner {descriptor.id} state.updated logged: scope={scope}, key={key}, value={value_repr}'
            )
            return None

--- a/src/langbot/pkg/agent/runner/session_registry.py
+++ b/src/langbot/pkg/agent/runner/session_registry.py
@@ -0,0 +1,217 @@
+"""Agent run session registry for proxy action permission validation."""
+from __future__ import annotations
+
+import asyncio
+import typing
+import time
+import threading
+
+from .context_builder import AgentResources
+
+
+class AgentRunSessionStatus(typing.TypedDict):
+    """Status tracking for agent run session."""
+    started_at: int
+    last_activity_at: int
+
+
+class AgentRunSession(typing.TypedDict):
+    """Session for an active agent runner execution.
+
+    Stored in AgentRunSessionRegistry for proxy action permission validation.
+
+    Fields:
+        run_id: Unique run identifier (UUID from AgentRunContext)
+        runner_id: Runner descriptor ID (plugin:author/name/runner)
+        query_id: Pipeline query ID
+        plugin_identity: Plugin identifier (author/name) of the runner
+        resources: Authorized resources for this run (from AgentResources)
+        status: Session status tracking
+        _authorized_ids: Pre-computed authorized resource IDs for O(1) lookup
+    """
+    run_id: str
+    runner_id: str
+    query_id: int | None
+    plugin_identity: str  # author/name
+    resources: AgentResources
+    status: AgentRunSessionStatus
+    _authorized_ids: dict[str, set[str]]  # Pre-computed sets for O(1) lookup
+
+
+class AgentRunSessionRegistry:
+    """Registry for active agent run sessions.
+
+    Host-owned registry for tracking active AgentRunner executions.
+    Used by proxy actions in handler.py to validate resource access.
+
+    Key: run_id (UUID from AgentRunContext)
+    Value: AgentRunSession with authorized resources
+
+    Thread-safe via asyncio.Lock.
+    """
+
+    _sessions: dict[str, AgentRunSession]
+    _lock: asyncio.Lock
+
+    def __init__(self):
+        self._sessions = {}
+        self._lock = asyncio.Lock()
+
+    async def register(
+        self,
+        run_id: str,
+        runner_id: str,
+        query_id: int | None,
+        plugin_identity: str,
+        resources: AgentResources,
+    ) -> None:
+        """Register a new agent run session.
+
+        Args:
+            run_id: Unique run identifier
+            runner_id: Runner descriptor ID
+            query_id: Pipeline query ID
+            plugin_identity: Plugin identifier (author/name)
+            resources: Authorized resources for this run
+        """
+        now = int(time.time())
+
+        # Pre-compute authorized resource IDs for O(1) lookup
+        authorized_ids: dict[str, set[str]] = {
+            'model': {m.get('model_id') for m in resources.get('models', [])},
+            'tool': {t.get('tool_name') for t in resources.get('tools', [])},
+            'knowledge_base': {kb.get('kb_id') for kb in resources.get('knowledge_bases', [])},
+        }
+
+        session: AgentRunSession = {
+            'run_id': run_id,
+            'runner_id': runner_id,
+            'query_id': query_id,
+            'plugin_identity': plugin_identity,
+            'resources': resources,
+            'status': {
+                'started_at': now,
+                'last_activity_at': now,
+            },
+            '_authorized_ids': authorized_ids,
+        }
+
+        async with self._lock:
+            self._sessions[run_id] = session
+
+    async def unregister(self, run_id: str) -> None:
+        """Unregister an agent run session.
+
+        Args:
+            run_id: Unique run identifier
+        """
+        async with self._lock:
+            if run_id in self._sessions:
+                del self._sessions[run_id]
+
+    async def get(self, run_id: str) -> AgentRunSession | None:
+        """Get session by run_id.
+
+        Args:
+            run_id: Unique run identifier
+
+        Returns:
+            AgentRunSession if found, None otherwise
+        """
+        async with self._lock:
+            return self._sessions.get(run_id)
+
+    async def update_activity(self, run_id: str) -> None:
+        """Update last activity timestamp for session.
+
+        Args:
+            run_id: Unique run identifier
+        """
+        async with self._lock:
+            if run_id in self._sessions:
+                self._sessions[run_id]['status']['last_activity_at'] = int(time.time())
+
+    def is_resource_allowed(
+        self,
+        session: AgentRunSession,
+        resource_type: str,
+        resource_id: str,
+    ) -> bool:
+        """Check if resource access is allowed for this session.
+
+        Uses pre-computed authorized IDs for O(1) lookup.
+
+        Args:
+            session: AgentRunSession to check
+            resource_type: Resource type ('model', 'tool', 'knowledge_base', 'storage')
+            resource_id: Resource identifier (model_id, tool_name, kb_id)
+
+        Returns:
+            True if resource is authorized, False otherwise
+        """
+        authorized_ids = session.get('_authorized_ids', {})
+
+        if resource_type in ('model', 'tool', 'knowledge_base'):
+            return resource_id in authorized_ids.get(resource_type, set())
+
+        if resource_type == 'storage':
+            storage = session['resources'].get('storage', {})
+            if resource_id == 'plugin':
+                return storage.get('plugin_storage', False)
+            elif resource_id == 'workspace':
+                return storage.get('workspace_storage', False)
+            return False
+
+        return False
+
+    async def list_active_runs(self) -> list[AgentRunSession]:
+        """List all active run sessions.
+
+        Returns:
+            List of active AgentRunSession dicts
+        """
+        async with self._lock:
+            return list(self._sessions.values())
+
+    async def cleanup_stale_sessions(self, max_age_seconds: int = 3600) -> int:
+        """Cleanup sessions that have been inactive for too long.
+
+        Args:
+            max_age_seconds: Maximum inactivity time in seconds (default 1 hour)
+
+        Returns:
+            Number of sessions cleaned up
+        """
+        now = int(time.time())
+        cleaned = 0
+
+        async with self._lock:
+            stale_run_ids = []
+            for run_id, session in self._sessions.items():
+                last_activity = session['status'].get('last_activity_at', 0)
+                if now - last_activity > max_age_seconds:
+                    stale_run_ids.append(run_id)
+
+            for run_id in stale_run_ids:
+                del self._sessions[run_id]
+                cleaned += 1
+
+        return cleaned
+
+
+# Global registry instance (singleton)
+_global_registry: AgentRunSessionRegistry | None = None
+_global_registry_lock = threading.Lock()
+
+
+def get_session_registry() -> AgentRunSessionRegistry:
+    """Get global session registry instance (thread-safe singleton).
+
+    Returns:
+        AgentRunSessionRegistry singleton
+    """
+    global _global_registry
+    with _global_registry_lock:
+        if _global_registry is None:
+            _global_registry = AgentRunSessionRegistry()
+        return _global_registry
--- a/src/langbot/pkg/agent/runner/state_store.py
+++ b/src/langbot/pkg/agent/runner/state_store.py
@@ -0,0 +1,299 @@
+"""Runner scoped state store for managing AgentRunner state across runs."""
+from __future__ import annotations
+
+import typing
+import threading
+
+from langbot_plugin.api.entities.builtin.pipeline import query as pipeline_query
+
+from .descriptor import AgentRunnerDescriptor
+
+
+# Valid state scopes per PROTOCOL_V1.md
+VALID_STATE_SCOPES = ('conversation', 'actor', 'subject', 'runner')
+
+# Key mapping for backward compatibility
+LEGACY_KEY_MAPPING = {
+    'conversation_id': 'external.conversation_id',
+}
+
+
+class RunnerScopedStateStore:
+    """In-memory scoped state store for AgentRunner protocol state.
+
+    IMPORTANT: This is HOST-OWNED protocol state, NOT plugin instance state.
+
+    Key Design Principles:
+    1. Host-owned: State is owned and managed by LangBot host, not by the plugin.
+       The plugin can only read/write through the SDK v1 protocol state API.
+    2. Scope keys based on stable host identity: Uses host-controlled identifiers
+       (runner_id, bot_uuid, pipeline_uuid, launcher_type, launcher_id) rather
+       than external/unstable identifiers like external conversation id.
+    3. External conversation id is a VALUE: The runner can update external.conversation_id
+       in state, which syncs to conversation.uuid. The scope key remains stable,
+       preventing state loss when conversation identity changes.
+
+    State scopes:
+    - conversation: runner_id + bot_uuid + pipeline_uuid + launcher_type + launcher_id + conversation identity
+    - actor: runner_id + bot_uuid + sender_id
+    - subject: runner_id + bot_uuid + launcher_type + launcher_id
+    - runner: runner_id + pipeline_uuid
+
+    This ensures different runners don't share state and same runner
+    has appropriate isolation per scope.
+
+    Note: This is an in-memory store. State only persists within the
+    current process lifetime. For production use, a persistent storage
+    backend should be implemented.
+    """
+
+    def __init__(self):
+        # Use thread-safe dict for concurrent access
+        self._store: dict[str, dict[str, typing.Any]] = {}
+        self._lock = threading.Lock()
+
+    def _make_conversation_scope_key(
+        self,
+        query: pipeline_query.Query,
+        descriptor: AgentRunnerDescriptor,
+    ) -> str:
+        """Build conversation scope identity key.
+
+        Uses host-owned stable identity, NOT external conversation id.
+        External conversation id is a state VALUE, not part of state KEY.
+
+        This prevents state loss when runner updates external.conversation_id:
+        - First run: scope key uses stable identity, state saved
+        - Runner returns external.conversation_id, synced to conversation.uuid
+        - Next run: scope key still uses same stable identity, state accessible
+        """
+        parts = [
+            descriptor.id,
+            query.bot_uuid or 'unknown_bot',
+            query.pipeline_uuid or 'unknown_pipeline',
+        ]
+
+        if query.session:
+            parts.append(query.session.launcher_type.value)
+            parts.append(query.session.launcher_id)
+
+            # Use stable conversation identity (NOT external uuid)
+            # Options:
+            # 1. conversation.create_time if available (stable host-owned)
+            # 2. Use "conversation" literal as stable identity within launcher scope
+            #    (assumes one active conversation per launcher context)
+            # We use option 2 for simplicity - conversation state is scoped to
+            # launcher (person/group) + bot + pipeline + runner
+            # External conversation id is just a VALUE inside this scope
+            conv_create_time = getattr(query.session.using_conversation, 'create_time', None)
+            if conv_create_time:
+                # Use create_time as stable identity if available
+                parts.append(str(conv_create_time))
+            # else: no additional part - launcher scope identity is sufficient
+
+        return f'conversation:{":".join(parts)}'
+
+    def _make_actor_scope_key(
+        self,
+        query: pipeline_query.Query,
+        descriptor: AgentRunnerDescriptor,
+    ) -> str:
+        """Build actor scope identity key."""
+        parts = [
+            descriptor.id,
+            query.bot_uuid or 'unknown_bot',
+            str(query.sender_id) if query.sender_id else 'unknown_sender',
+        ]
+
+        return f'actor:{":".join(parts)}'
+
+    def _make_subject_scope_key(
+        self,
+        query: pipeline_query.Query,
+        descriptor: AgentRunnerDescriptor,
+    ) -> str:
+        """Build subject scope identity key."""
+        parts = [
+            descriptor.id,
+            query.bot_uuid or 'unknown_bot',
+        ]
+
+        if query.session:
+            parts.append(query.session.launcher_type.value)
+            parts.append(query.session.launcher_id)
+
+        return f'subject:{":".join(parts)}'
+
+    def _make_runner_scope_key(
+        self,
+        query: pipeline_query.Query,
+        descriptor: AgentRunnerDescriptor,
+    ) -> str:
+        """Build runner scope identity key."""
+        parts = [
+            descriptor.id,
+            query.pipeline_uuid or 'unknown_pipeline',
+        ]
+
+        return f'runner:{":".join(parts)}'
+
+    def _get_scope_key(
+        self,
+        scope: str,
+        query: pipeline_query.Query,
+        descriptor: AgentRunnerDescriptor,
+    ) -> str:
+        """Get the storage key for a given scope."""
+        if scope == 'conversation':
+            return self._make_conversation_scope_key(query, descriptor)
+        elif scope == 'actor':
+            return self._make_actor_scope_key(query, descriptor)
+        elif scope == 'subject':
+            return self._make_subject_scope_key(query, descriptor)
+        elif scope == 'runner':
+            return self._make_runner_scope_key(query, descriptor)
+        else:
+            raise ValueError(f'Invalid scope: {scope}')
+
+    def build_snapshot(
+        self,
+        query: pipeline_query.Query,
+        descriptor: AgentRunnerDescriptor,
+    ) -> dict[str, dict[str, typing.Any]]:
+        """Build state snapshot for all scopes.
+
+        Args:
+            query: Pipeline query
+            descriptor: Runner descriptor
+
+        Returns:
+            Dict with 4 scope keys, each containing scope state dict
+        """
+        snapshot: dict[str, dict[str, typing.Any]] = {
+            'conversation': {},
+            'actor': {},
+            'subject': {},
+            'runner': {},
+        }
+
+        with self._lock:
+            for scope in VALID_STATE_SCOPES:
+                scope_key = self._get_scope_key(scope, query, descriptor)
+                scope_state = self._store.get(scope_key, {})
+                snapshot[scope] = dict(scope_state)  # Copy to avoid mutation
+
+        # Seed external.conversation_id from existing conversation uuid
+        if query.session and query.session.using_conversation:
+            conv_uuid = getattr(query.session.using_conversation, 'uuid', None)
+            if conv_uuid and 'external.conversation_id' not in snapshot['conversation']:
+                snapshot['conversation']['external.conversation_id'] = conv_uuid
+
+        return snapshot
+
+    def apply_update(
+        self,
+        query: pipeline_query.Query,
+        descriptor: AgentRunnerDescriptor,
+        scope: str,
+        key: str,
+        value: typing.Any,
+        logger: typing.Any = None,
+    ) -> bool:
+        """Apply a state update to the store.
+
+        Args:
+            query: Pipeline query
+            descriptor: Runner descriptor
+            scope: State scope (conversation, actor, subject, runner)
+            key: State key (should use namespace prefix like external.*)
+            value: State value (must be JSON-serializable)
+            logger: Optional logger for warnings
+
+        Returns:
+            True if update applied successfully, False if invalid scope
+
+        Side effects:
+            - Updates internal store
+            - Syncs external.conversation_id to query.session.using_conversation.uuid
+        """
+        # Validate scope
+        if scope not in VALID_STATE_SCOPES:
+            if logger:
+                logger.warning(
+                    f'Runner {descriptor.id} state.updated with invalid scope: {scope}. '
+                    f'Valid scopes: {", ".join(VALID_STATE_SCOPES)}'
+                )
+            return False
+
+        # Map legacy key names
+        if key in LEGACY_KEY_MAPPING:
+            mapped_key = LEGACY_KEY_MAPPING[key]
+            if logger:
+                logger.debug(
+                    f'Runner {descriptor.id} state.updated legacy key "{key}" mapped to "{mapped_key}"'
+                )
+            key = mapped_key
+
+        # Apply update to store
+        with self._lock:
+            scope_key = self._get_scope_key(scope, query, descriptor)
+            if scope_key not in self._store:
+                self._store[scope_key] = {}
+            self._store[scope_key][key] = value
+
+        # Sync external.conversation_id to query.session.using_conversation.uuid
+        if scope == 'conversation' and key == 'external.conversation_id':
+            if query.session and query.session.using_conversation:
+                # Update conversation uuid for backward compatibility
+                # This ensures old conversation continuation behavior works
+                setattr(query.session.using_conversation, 'uuid', value)
+                if logger:
+                    logger.debug(
+                        f'Synced external.conversation_id "{value}" to conversation.uuid'
+                    )
+
+        return True
+
+    def clear_scope(
+        self,
+        scope: str,
+        query: pipeline_query.Query,
+        descriptor: AgentRunnerDescriptor,
+    ) -> None:
+        """Clear all state for a specific scope.
+
+        Args:
+            scope: State scope to clear
+            query: Pipeline query
+            descriptor: Runner descriptor
+        """
+        with self._lock:
+            scope_key = self._get_scope_key(scope, query, descriptor)
+            if scope_key in self._store:
+                del self._store[scope_key]
+
+    def clear_all(self) -> None:
+        """Clear all stored state (for testing/reset)."""
+        with self._lock:
+            self._store.clear()
+
+
+# Global singleton state store
+_state_store: RunnerScopedStateStore | None = None
+_state_store_lock = threading.Lock()
+
+
+def get_state_store() -> RunnerScopedStateStore:
+    """Get the global state store singleton."""
+    global _state_store
+    with _state_store_lock:
+        if _state_store is None:
+            _state_store = RunnerScopedStateStore()
+        return _state_store
+
+
+def reset_state_store() -> None:
+    """Reset the global state store (for testing)."""
+    global _state_store
+    with _state_store_lock:
+        _state_store = None