From 9330a684fea637b4f8b6d36dadba33966a460c69 Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Mon, 25 May 2026 10:34:16 +0800 Subject: [PATCH] refactor(agent-runner): tighten protocol v1 runtime boundaries --- .../HOST_SDK_INFRASTRUCTURE.md | 6 +- .../IMPLEMENTATION_PLAN.md | 92 +- .../PHASE1_QA_ACCEPTANCE_MATRIX.md | 2 +- docs/agent-runner-pluginization/README.md | 2 +- src/langbot/pkg/agent/runner/__init__.py | 2 - .../pkg/agent/runner/context_builder.py | 573 +------- .../pkg/agent/runner/context_packager.py | 74 - src/langbot/pkg/agent/runner/host_models.py | 6 + src/langbot/pkg/agent/runner/orchestrator.py | 162 +-- .../agent/runner/persistent_state_store.py | 115 +- .../pkg/agent/runner/pipeline_adapter.py | 170 ++- .../pkg/agent/runner/resource_builder.py | 164 --- .../pkg/agent/runner/result_normalizer.py | 4 +- src/langbot/pkg/agent/runner/state_scope.py | 113 ++ src/langbot/pkg/agent/runner/state_store.py | 618 --------- .../entity/persistence/agent_runner_state.py | 3 +- .../pkg/pipeline/msgtrun/round_policy.py | 34 + .../pkg/pipeline/msgtrun/truncators/round.py | 2 +- .../test_context_builder_params_state.py | 393 +----- .../agent/test_context_builder_state.py | 2 +- .../agent/test_context_validation.py | 11 +- .../agent/test_event_first_protocol.py | 30 + .../agent/test_orchestrator_integration.py | 32 +- .../unit_tests/agent/test_resource_builder.py | 17 +- .../agent/test_result_normalizer.py | 3 +- tests/unit_tests/agent/test_state_store.py | 1209 ++--------------- 26 files changed, 548 insertions(+), 3291 deletions(-) delete mode 100644 src/langbot/pkg/agent/runner/context_packager.py create mode 100644 src/langbot/pkg/agent/runner/state_scope.py delete mode 100644 src/langbot/pkg/agent/runner/state_store.py create mode 100644 src/langbot/pkg/pipeline/msgtrun/round_policy.py diff --git a/docs/agent-runner-pluginization/HOST_SDK_INFRASTRUCTURE.md b/docs/agent-runner-pluginization/HOST_SDK_INFRASTRUCTURE.md index 6d3418be..dd5dddb0 100644 --- a/docs/agent-runner-pluginization/HOST_SDK_INFRASTRUCTURE.md +++ b/docs/agent-runner-pluginization/HOST_SDK_INFRASTRUCTURE.md @@ -60,7 +60,7 @@ Delivery / Renderer / Platform API ``` **当前状态**: -- `PipelineAdapter` 作为当前 transition adapter,将 Pipeline Query 转换为 `AgentEventEnvelope` + `AgentBinding` +- `PipelineAdapter` 作为当前入口 adapter,将 Pipeline Query 转换为 `AgentEventEnvelope` + `AgentBinding` - `run_from_query()` 内部委托到 `run(event, binding)` - EventLog / Transcript / ArtifactStore / PersistentStateStore 已落地 - EventGateway 由外部 event branch 实现 @@ -99,7 +99,7 @@ class AgentEventEnvelope(BaseModel): raw_ref: RawEventRef | None ``` -**当前 transition source**:`PipelineAdapter.query_to_event(query)` 从 Pipeline Query 生成 `AgentEventEnvelope`。 +**当前 adapter source**:`PipelineAdapter.query_to_event(query)` 从 Pipeline Query 生成 `AgentEventEnvelope`。 原始平台 payload 可以存为 raw event 或 artifact ref;不要把平台私有字段直接扩散到 AgentRunner 顶层协议。 @@ -122,7 +122,7 @@ class AgentBinding(BaseModel): enabled: bool ``` -**当前 transition source**:`PipelineAdapter.pipeline_config_to_binding(query, runner_id)` 从 Pipeline config 生成临时 `AgentBinding`。 +**当前 adapter source**:`PipelineAdapter.pipeline_config_to_binding(query, runner_id)` 从 Pipeline config 生成临时 `AgentBinding`。 Pipeline 当前可以被迁移为一种 binding source: diff --git a/docs/agent-runner-pluginization/IMPLEMENTATION_PLAN.md b/docs/agent-runner-pluginization/IMPLEMENTATION_PLAN.md index 51aaa92f..34a2b569 100644 --- a/docs/agent-runner-pluginization/IMPLEMENTATION_PLAN.md +++ b/docs/agent-runner-pluginization/IMPLEMENTATION_PLAN.md @@ -150,53 +150,52 @@ class AgentRunnerDescriptor(BaseModel): - Pipeline metadata 请求时发现缓存为空 - 可选 TTL,优先保证正确性 -### 3.4 context_builder.py +### 3.4 context_builder.py / pipeline_adapter.py -把当前 Pipeline query 转换成 SDK v1 `AgentRunContext` envelope。这里做协议字段组装、Host-owned 状态快照、授权资源挂载和默认工作窗口 provisioning,不承担 Agent 的最终 prompt 组装或长期记忆/压缩策略。 +`context_builder.py` 只负责从 `AgentEventEnvelope + AgentBinding` 构造 SDK v1 `AgentRunContext`。Pipeline Query 的读取、参数过滤、prompt 提取和 `max-round` bootstrap 映射都属于 `PipelineAdapter`,不再放进 context builder。 -当前消息 Pipeline 的最小字段: +当前消息 Pipeline 进入 agent runner 的路径: + +```text +Query + -> PipelineAdapter.query_to_event(query) + -> PipelineAdapter.pipeline_config_to_binding(query, runner_id) + -> PipelineAdapter.build_adapter_context(query, binding) + -> AgentRunOrchestrator.run(event, binding, adapter_context=...) + -> AgentRunContextBuilder.build_context_from_event(...) +``` + +Protocol v1 context 的稳定字段: - `run_id`: 新 UUID,不使用 query id 作为全局 run id -- `trigger.type`: `message.received` -- `conversation`: launcher、sender、bot、pipeline、历史消息 -- `event`: message event envelope 子集,`event_type` 使用稳定协议名,平台/SDK 原始事件名放入 `event_data.source_event_type` -- `actor`: sender -- `subject`: 当前消息或 launcher -- `prompt`: 宿主已处理的有效 prompt,即 `query.prompt.messages` -- `messages`: `query.messages` 进入 AgentRunner context packaging 后的历史窗口。插件化 AgentRunner 路径不再由 Pipeline `msgtrun` 截断 -- `runtime.metadata.context_packaging`: Host 本次实际下发的历史窗口元数据,例如来源、策略、下发消息数、完整性;未来可扩展 cursor 和 host-side history API -- `input`: 从 `query.user_message` 和 `query.message_chain` 构造 -- `params`: 过滤后的公开业务变量 -- `resources`: 由 `resource_builder` 注入 -- `state`: host-managed scoped state snapshot -- `runtime`: host/version/workspace/bot/pipeline/query/trace/deadline -- `config`: 当前 Pipeline 对该 runner id 的绑定配置,即 `ai.runner_config[runner_id]` +- `trigger.type`: 事件触发类型,例如 `message.received` +- `conversation`: conversation/thread/launcher/sender/bot/pipeline 投影 +- `event`: 稳定事件上下文 +- `actor`: 触发者 +- `subject`: 当前消息、群、频道或其它事件主体 +- `input`: 当前事件输入,不是历史消息窗口 +- `delivery`: 输出 surface 和平台投递能力 +- `resources`: 由 `resource_builder` 基于 binding policy 注入 +- `state`: `PersistentStateStore` 读取的 host-managed scoped state snapshot +- `runtime`: host/version/workspace/bot/query/trace/deadline +- `config`: 当前 binding 对该 runner id 的配置,即 `runner_config` +- `bootstrap`: 可选小窗口,不是完整历史 +- `adapter`: Pipeline 或其它入口 adapter 的元数据 -保留 SDK legacy helper 是 SDK 的责任,LangBot 不再构造 PoC 的 `query_id/session/messages/user_message/extra_config` 上下文。 +Pipeline adapter 的 `prompt` 和公开业务变量不进入顶层协议字段: -`prompt` 的语义必须明确:它不是静态配置 `config["prompt"]`,而是 LangBot PreProcessor 和 `PromptPreProcessing` 插件事件之后的有效 prompt。旧内置 local-agent 请求模型时使用: +- effective prompt -> `ctx.adapter.extra["prompt"]` +- filtered params -> `ctx.adapter.extra["params"]` +- `max-round` working window -> `ctx.bootstrap.messages` +- 同一窗口也可出现在 `ctx.adapter.adapter_messages`,供 adapter 消费方读取 +- packaging 元数据 -> `ctx.runtime.metadata.context_packaging` -```python -query.prompt.messages + query.messages + [query.user_message] -``` - -插件化 runner 要保持行为一致,应消费: - -```python -ctx.prompt + ctx.messages + [current_user_message_from_ctx.input] -``` - -现阶段不要优化裁剪算法,也不要把新的压缩或 token-budget 裁剪塞回 Pipeline stage。 -插件化 AgentRunner 路径应跳过 Pipeline `msgtrun` 的破坏性截断,然后由 -`AgentContextPackager` 在 AgentRunner 边界执行同一套 max-round user-round 规则。 -当前 SDK v1 还没有顶层 context packaging 字段,LangBot 先把本次 packaging -元数据放在 `ctx.runtime.metadata.context_packaging`。这是实际下发结果说明,不是 LangBot 侧的长期策略控制面。 -后续 LiteLLM 接入后再把真实 context window、token 预算和摘要策略接到这个边界上。 +现阶段不要把新的压缩或 token-budget 裁剪塞回 Pipeline stage。Pipeline 只负责入口适配;完整历史和长期上下文由 EventLog / Transcript / pull APIs / future ContextCompressor 支撑。 ### 3.4.1 Agentic context plan -本轮只落地 `AgentContextPackager` 的 `max_round` working window,不改变 user-round 选择规则。 -下面的 `ConversationStore` / `EventLog`、`ContextCompressor` 和 host history API 仍是设计预留。 +本轮只在 `PipelineAdapter` 中保留 `max-round` working window,不改变 user-round 选择规则。 +EventLog / Transcript / Host pull APIs 已落地,`ContextCompressor` 仍是设计预留。 目标是让 Pipeline 逐步退化为入口 adapter,让 AgentRunner 层拥有上下文打包职责。 建议最终拆成四个 host-side 服务: @@ -206,7 +205,7 @@ ConversationStore / EventLog -> durable append-only raw messages, events, tool results, artifact refs ConversationProjection -> converts events into agent-readable conversation history -AgentContextPackager +PipelineAdapter bootstrap policy -> builds the bounded working context for one run ContextCompressor -> creates and updates summaries/checkpoints when thresholds are exceeded @@ -215,10 +214,10 @@ ContextCompressor 关键原则: - 完整历史属于 LangBot host,不属于插件实例。插件仍是 singleton/stateless。 -- `ctx.messages` 是 working context window,不是完整 conversation dump。 +- `ctx.bootstrap.messages` 是 optional working context window,不是完整 conversation dump。 - 每轮不能全量复制/序列化完整历史给插件 runtime;否则长会话会产生 O(n) 成本和跨进程 payload 膨胀。 -- `max-round` 的 user-round 规则可以先搬到 `AgentContextPackager`,作为 `max_round` adapter 策略。 -- LiteLLM 接入后,`AgentContextPackager` 再读取模型 context window,升级为 token budget 策略。 +- `max-round` 的 user-round 规则只属于 Pipeline adapter 的 bootstrap 策略。 +- LiteLLM 接入后,context packaging 应升级为 token budget / summary / pull API 协作策略。 - `ContextCompressor` 生成的是派生 summary/checkpoint,不能覆盖或删除 raw history。 - 重启恢复依赖持久化 store 和 summary checkpoint,不依赖 `SessionManager` 里的进程内 conversation list。 @@ -252,7 +251,7 @@ page size、总字节数、deadline 和可访问 conversation。 ### 3.4.2 Large artifacts and tool collaboration -大文件、多模态输入和工具产物不要内联进 `ctx.messages` 或 tool result。后续统一用 +大文件、多模态输入和工具产物不要内联进 bootstrap messages 或 tool result。后续统一用 artifact/resource ref 协作: - message/content 里只放小文本和必要摘要。 @@ -322,7 +321,7 @@ Platform Adapter -> ConversationProjection update message/history view when applicable -> EventRouter resolve binding -> AgentRunOrchestrator.run_from_event(event_request) - -> AgentContextPackager build working context from projection + state + artifacts + -> Context packager builds working context from projection + state + artifacts ``` 这样消息事件、工具事件、群成员事件、好友申请事件可以共用同一套 run/session/state/resource @@ -481,8 +480,9 @@ async def run_from_query(query: pipeline_query.Query) -> AsyncGenerator[Message ### Step 1:补齐宿主上下文 -- SDK `AgentRunContext` 增加 `prompt`,并保持向后兼容默认空列表。 -- LangBot context builder 写入 `ctx.prompt`、`ctx.input.contents`、`ctx.runtime.metadata.streaming_supported`、`ctx.runtime.metadata.remove_think`。 +- SDK `AgentRunContext` 保持 event-first:`event/input/delivery/resources/context/state/runtime/config/bootstrap/adapter`。 +- LangBot context builder 只从 `AgentEventEnvelope + AgentBinding` 写入稳定协议字段。 +- Pipeline adapter 把 effective prompt 写入 `ctx.adapter.extra["prompt"]`,把公开业务变量写入 `ctx.adapter.extra["params"]`。 - 保持 `ctx.config` 只表达静态绑定配置。 ### Step 2:增强宿主 AgentRun proxy action @@ -502,7 +502,7 @@ async def run_from_query(query: pipeline_query.Query) -> AsyncGenerator[Message ### Step 4:local-agent parity -- 使用 `ctx.prompt` 而不是重新读取 `ctx.config["prompt"]`。 +- 使用 `ctx.adapter.extra["prompt"]` 而不是重新读取 `ctx.config["prompt"]`。 - 当前 user message 从 `ctx.input.contents` 构造,保留多模态内容。 - RAG 只替换/插入文本部分,不丢图片/文件。 - streaming/non-streaming 默认跟随 `runtime.metadata.streaming_supported`。 diff --git a/docs/agent-runner-pluginization/PHASE1_QA_ACCEPTANCE_MATRIX.md b/docs/agent-runner-pluginization/PHASE1_QA_ACCEPTANCE_MATRIX.md index 00b72b6e..e20a9c0e 100644 --- a/docs/agent-runner-pluginization/PHASE1_QA_ACCEPTANCE_MATRIX.md +++ b/docs/agent-runner-pluginization/PHASE1_QA_ACCEPTANCE_MATRIX.md @@ -88,7 +88,7 @@ Host 侧 agent runner 单测不通过时,不应进入 UI parity QA。 | ID | 场景 | 步骤 | 通过条件 | | --- | --- | --- | --- | | P1-LA-01 | 普通文本对话 | 绑定 `plugin:langbot/local-agent/default`,发送普通文本。 | 回复正常生成;conversation history 写入用户消息和助手消息。 | -| P1-LA-02 | 有效 prompt | 配置 system prompt,并通过 PromptPreProcessing 插件或现有预处理改变 prompt。 | runner 使用 host 处理后的 `ctx.prompt`,不是只读取静态 `ctx.config.prompt`;回复体现有效 prompt。 | +| P1-LA-02 | 有效 prompt | 配置 system prompt,并通过 PromptPreProcessing 插件或现有预处理改变 prompt。 | runner 使用 host 处理后的 `ctx.adapter.extra["prompt"]`,不是只读取静态 `ctx.config.prompt`;回复体现有效 prompt。 | | P1-LA-03 | 历史消息 | 连续多轮对话,第二轮引用第一轮内容。 | 当前兼容路径下 runner 能读到 host 下发的 bootstrap/history;目标协议下应通过 history API 或插件自管上下文实现。第二轮能基于上下文回答。 | | P1-LA-04 | 流式输出 | 使用支持流式的 adapter/WebUI,开启流式模型或流式 runner。 | UI 逐步更新;后端接收 `message.delta`;最终没有重复消息或空白卡片。 | | P1-LA-05 | 非流式输出 | 使用不支持流式或关闭流式的路径。 | 只输出最终消息;不会创建异常流式卡片。 | diff --git a/docs/agent-runner-pluginization/README.md b/docs/agent-runner-pluginization/README.md index b0797fcc..9b5cf632 100644 --- a/docs/agent-runner-pluginization/README.md +++ b/docs/agent-runner-pluginization/README.md @@ -29,7 +29,7 @@ EventGateway 在本文档中描述为 **future integration point**,由外部 e ## 当前状态 -**当前 Pipeline 是 transition adapter,不再是 agent runner 设计核心。** +**当前 Pipeline 是入口 adapter,不再是 agent runner 设计核心。** 当前主入口仍可由 Pipeline 触发,但内部已转换成 event-first path: diff --git a/src/langbot/pkg/agent/runner/__init__.py b/src/langbot/pkg/agent/runner/__init__.py index 40ee1dab..986320c9 100644 --- a/src/langbot/pkg/agent/runner/__init__.py +++ b/src/langbot/pkg/agent/runner/__init__.py @@ -12,7 +12,6 @@ from .errors import ( ) from .registry import AgentRunnerRegistry from .context_builder import AgentRunContextBuilder -from .context_packager import AgentContextPackager from .resource_builder import AgentResourceBuilder from .result_normalizer import AgentResultNormalizer from .orchestrator import AgentRunOrchestrator @@ -38,7 +37,6 @@ __all__ = [ 'RunnerExecutionError', 'AgentRunnerRegistry', 'AgentRunContextBuilder', - 'AgentContextPackager', 'AgentResourceBuilder', 'AgentResultNormalizer', 'AgentRunOrchestrator', diff --git a/src/langbot/pkg/agent/runner/context_builder.py b/src/langbot/pkg/agent/runner/context_builder.py index b023ebc9..da7321e4 100644 --- a/src/langbot/pkg/agent/runner/context_builder.py +++ b/src/langbot/pkg/agent/runner/context_builder.py @@ -5,16 +5,9 @@ import uuid import time import typing -from langbot_plugin.api.entities.builtin.pipeline import query as pipeline_query -from langbot_plugin.api.entities.builtin.platform import message as platform_message - from ...core import app from .descriptor import AgentRunnerDescriptor -from .config_migration import ConfigMigration -from .context_packager import AgentContextPackager -from .state_store import get_state_store from .persistent_state_store import get_persistent_state_store -from . import events as runner_events from .host_models import AgentEventEnvelope, AgentBinding @@ -33,12 +26,14 @@ class AgentTrigger(typing.TypedDict): class ConversationContext(typing.TypedDict): """Conversation context.""" - session_id: str | None conversation_id: str | None + thread_id: str | None launcher_type: str | None launcher_id: str | None sender_id: str | None - bot_uuid: str | None + bot_id: str | None + workspace_id: str | None + session_id: str | None pipeline_uuid: str | None @@ -145,36 +140,22 @@ class AgentRunContextPayload(typing.TypedDict): class AgentRunContextBuilder: """Builder for provisioning AgentRunContext. - Two entry points: - - build_context_from_event(event, binding): Event-first Protocol v1 - - build_context(query, descriptor, resources): Pipeline adapter Query-based entry - Responsibilities: - Generate new run_id (UUID, not query id) - - Set trigger type based on source - - Build conversation context from session/event - - Build input from user_message/event - - Build params with filtering - - Build state snapshot from state_store + - Set trigger type based on event source + - Build conversation context from event + - Build input from event + - Build state snapshot from PersistentStateStore - Build runtime context with host info, trace_id, deadline - - Set config from runner binding configuration + - Set config from runner binding configuration. + + Pipeline Query adaptation belongs to PipelineAdapter, not this builder. """ ap: app.Application - # Params filtering rules - # Exclude variables starting with underscore (internal) - INTERNAL_PREFIX = '_' - - # Exclude variables with sensitive naming patterns - SENSITIVE_PATTERNS = ('secret', 'token', 'key', 'password', 'credential', 'api_key', 'apikey') - - # Exclude permission/control variables - PERMISSION_VARS = ('_pipeline_bound_plugins', '_authorized', '_permission') - def __init__(self, ap: app.Application): self.ap = ap - self.context_packager = AgentContextPackager() async def build_context_from_event( self, @@ -217,7 +198,8 @@ class AgentRunContextBuilder: 'launcher_type': None, # Will be filled from actor/subject if needed 'launcher_id': None, 'sender_id': event.actor.actor_id if event.actor else None, - 'bot_uuid': event.bot_id, + 'bot_id': event.bot_id, + 'workspace_id': event.workspace_id, 'pipeline_uuid': binding.pipeline_uuid, # Pipeline adapter field } @@ -227,8 +209,9 @@ class AgentRunContextBuilder: 'event_type': event.event_type, 'event_time': event.event_time, 'source': event.source, - 'source_event_type': None, - 'data': {}, + 'source_event_type': event.source_event_type, + 'raw_ref': event.raw_ref.model_dump(mode='json') if event.raw_ref else None, + 'data': event.data, } # Build actor context @@ -323,427 +306,6 @@ class AgentRunContextBuilder: return context - async def build_context( - self, - query: pipeline_query.Query, - descriptor: AgentRunnerDescriptor, - resources: AgentResources, - ) -> AgentRunContextPayload: - """Build AgentRunContext envelope from Query. - - This is a Pipeline adapter wrapper that converts Query to event + binding - and delegates to build_context_from_event(). - - For Protocol v1, messages are NOT inlined by default. - Pipeline max-round only affects bootstrap, NOT Protocol v1 entities. - - Args: - query: Pipeline query - descriptor: Runner descriptor - resources: Built resources from AgentResourceBuilder - - Returns: - AgentRunContext payload for the plugin runner - """ - # Resolve runner config for binding - runner_id = descriptor.id - runner_config = ConfigMigration.resolve_runner_config( - query.pipeline_config, - runner_id, - ) - - # Extract max_round for Pipeline adapter bootstrap (NOT Protocol v1) - # Note: config uses 'max-round' with hyphen, not 'max_round' - max_round = runner_config.get('max-round') - if max_round is None: - ai_config = query.pipeline_config.get('ai', {}) if query.pipeline_config else {} - max_round = ai_config.get('max-round') - - # Build trigger - trigger: AgentTrigger = { - 'type': runner_events.MESSAGE_RECEIVED, - 'source': 'pipeline', - 'timestamp': int(time.time()), - } - - # Build conversation context - conversation: ConversationContext | None = None - session = getattr(query, 'session', None) - if session: - conversation = { - 'session_id': f'{getattr(session, "launcher_type", "").value if hasattr(getattr(session, "launcher_type", ""), "value") else getattr(session, "launcher_type", "")}_{getattr(session, "launcher_id", "")}', - 'conversation_id': getattr(getattr(session, 'using_conversation', None), 'uuid', None), - 'launcher_type': getattr(session, 'launcher_type', None).value if hasattr(getattr(session, 'launcher_type', None), 'value') else getattr(session, 'launcher_type', None), - 'launcher_id': getattr(session, 'launcher_id', None), - 'sender_id': str(getattr(query, 'sender_id', '')) if getattr(query, 'sender_id', None) else None, - 'bot_uuid': getattr(query, 'bot_uuid', None), - 'pipeline_uuid': getattr(query, 'pipeline_uuid', None), - } - - # Build input - input: AgentInput = self._build_input(query) - - # Build params from query.variables with filtering - params = self._build_params(query) - - # Build state snapshot from state_store - state_store = get_state_store() - state: AgentRunState = state_store.build_snapshot(query, descriptor) - - streaming_supported = await self._is_stream_output_supported(query) - remove_think = query.pipeline_config.get('output', {}).get('misc', {}).get('remove-think', False) if query.pipeline_config else False - - # Build runtime context - run_id = str(uuid.uuid4()) - runtime: AgentRuntimeContext = { - 'langbot_version': self.ap.ver_mgr.get_current_version(), - 'sdk_protocol_version': descriptor.protocol_version, - 'query_id': query.query_id, - 'trace_id': run_id, # Use run_id as trace_id for now - 'deadline_at': self._build_deadline(runner_config), - 'metadata': { - 'bot_name': query.variables.get('_monitoring_bot_name', 'Unknown') if query.variables else 'Unknown', - 'pipeline_name': query.variables.get('_monitoring_pipeline_name', 'Unknown') if query.variables else 'Unknown', - 'streaming_supported': streaming_supported, - 'remove_think': remove_think, - }, - } - - # Build delivery context from query adapter capabilities - delivery_context = { - 'surface': 'pipeline', - 'reply_target': None, - 'supports_streaming': streaming_supported, - 'supports_edit': False, - 'supports_reaction': False, - 'max_message_size': None, - 'platform_capabilities': {}, - } - - # Build context access for the direct Query adapter helper. - # The event-first run_from_query path uses build_context_from_event(). - context_access = { - 'conversation_id': conversation.get('conversation_id') if conversation else None, - 'thread_id': None, - 'latest_cursor': None, - 'event_seq': None, - 'transcript_seq': None, - 'has_history_before': False, - 'inline_policy': { - 'mode': 'current_event', - 'delivered_count': 0, - 'source_total_count': None, - 'messages_complete': False, - 'reason': 'pipeline_adapter', - }, - 'available_apis': { - 'history_page': False, - 'history_search': False, - 'event_get': False, - 'event_page': False, - 'artifact_metadata': False, - 'artifact_read': False, - 'state': False, - 'storage': True, - }, - } - - # Build adapter context (for Pipeline adapter fields) - adapter_context = { - 'query_id': query.query_id, - 'pipeline_uuid': getattr(query, 'pipeline_uuid', None), - 'max_round': max_round, # For reference only - 'adapter_messages': [], # Will be filled if max_round is set - 'extra': { - 'params': params, # Put params in adapter.extra - 'prompt': self._build_prompt(query), # Put prompt in adapter.extra - }, - } - - # Build bootstrap context (optional, for Pipeline adapter max-round) - bootstrap_context = None - - # For Pipeline adapter: add bootstrap messages if max_round is set - # This goes into bootstrap.messages, NOT top-level messages - if max_round and max_round > 0: - packaged_context = self.context_packager.package_messages(query, runner_config) - adapter_messages = self._build_messages(packaged_context.messages) - # Put in bootstrap for Protocol v1 - bootstrap_context = { - 'messages': adapter_messages, - 'summary': None, - 'artifacts': [], - 'metadata': {}, - } - # Also update adapter for transition runners - adapter_context['adapter_messages'] = adapter_messages - # Update runtime metadata - runtime['metadata']['context_packaging'] = { - 'policy': packaged_context.policy, - 'history': packaged_context.history, - } - - # Build full context - Protocol v1 structure - context: AgentRunContextPayload = { - 'run_id': run_id, - 'trigger': trigger, - 'conversation': conversation, - 'event': self._build_event(query), # REQUIRED - 'actor': self._build_actor(query), - 'subject': self._build_subject(query), - 'input': input, - 'delivery': delivery_context, # REQUIRED - 'resources': resources, - 'context': context_access, # ContextAccess - REQUIRED - 'state': state, - 'runtime': runtime, - 'config': runner_config, - 'bootstrap': bootstrap_context, # Optional bootstrap - 'adapter': adapter_context, # Pipeline adapter context - 'metadata': {}, # Additional metadata - } - - return context - - def _build_input(self, query: pipeline_query.Query) -> AgentInput: - """Build AgentInput from query.""" - text = None - text_parts: list[str] = [] - contents: list[dict[str, typing.Any]] = [] - - if query.user_message: - # Extract text if content is single text element - if isinstance(query.user_message.content, list): - for elem in query.user_message.content: - contents.append(elem.model_dump(mode='json')) - if elem.type == 'text': - elem_text = getattr(elem, 'text', None) - if elem_text: - text_parts.append(elem_text) - else: - # Single string content - text = str(query.user_message.content) - contents.append({'type': 'text', 'text': text}) - - if text_parts: - text = ''.join(text_parts) - - # Include message_chain for platform-specific format - message_chain_dict = None - if query.message_chain: - message_chain_dict = query.message_chain.model_dump(mode='json') - - return { - 'text': text, - 'contents': contents, - 'message_chain': message_chain_dict, - 'attachments': self._build_attachments(query, contents), - } - - def _build_attachments( - self, - query: pipeline_query.Query, - contents: list[dict[str, typing.Any]], - ) -> list[dict[str, typing.Any]]: - """Extract runner-consumable attachment data from input contents.""" - attachments: list[dict[str, typing.Any]] = [] - - for elem in contents: - elem_type = elem.get('type') - if elem_type == 'image_url': - image_url = elem.get('image_url') or {} - attachments.append( - { - 'type': 'image', - 'source': 'url', - 'url': image_url.get('url') if isinstance(image_url, dict) else str(image_url), - } - ) - elif elem_type == 'image_base64': - image_base64 = elem.get('image_base64') - attachments.append( - { - 'type': 'image', - 'source': 'base64', - 'content': image_base64, - 'content_type': self._infer_base64_content_type(image_base64, 'image/jpeg'), - 'name': 'image', - 'has_content': bool(image_base64), - } - ) - elif elem_type == 'file_url': - attachments.append( - { - 'type': 'file', - 'source': 'url', - 'url': elem.get('file_url'), - 'name': elem.get('file_name'), - } - ) - elif elem_type == 'file_base64': - file_base64 = elem.get('file_base64') - attachments.append( - { - 'type': 'file', - 'source': 'base64', - 'name': elem.get('file_name'), - 'content': file_base64, - 'content_type': self._infer_base64_content_type(file_base64, 'application/octet-stream'), - 'has_content': bool(file_base64), - } - ) - - message_chain = getattr(query, 'message_chain', None) - if message_chain: - for component in message_chain: - if isinstance(component, platform_message.Image): - attachments.append( - { - 'type': 'image', - 'source': 'message_chain', - 'id': component.image_id or None, - 'url': component.url or None, - 'path': str(component.path) if component.path else None, - 'content': component.base64 or None, - 'content_type': self._infer_base64_content_type(component.base64, 'image/jpeg'), - 'name': 'image', - 'has_content': bool(component.base64), - } - ) - elif isinstance(component, platform_message.File): - attachments.append( - { - 'type': 'file', - 'source': 'message_chain', - 'id': component.id or None, - 'name': component.name or None, - 'size': component.size or 0, - 'url': component.url or None, - 'path': component.path or None, - 'content': component.base64 or None, - 'content_type': self._infer_base64_content_type(component.base64, 'application/octet-stream'), - 'has_content': bool(component.base64), - } - ) - elif isinstance(component, platform_message.Voice): - attachments.append( - { - 'type': 'voice', - 'source': 'message_chain', - 'id': component.voice_id or None, - 'url': component.url or None, - 'path': component.path or None, - 'duration': component.length or 0, - 'content': component.base64 or None, - 'content_type': self._infer_base64_content_type(component.base64, 'audio/mpeg'), - 'name': 'voice', - 'has_content': bool(component.base64), - } - ) - - return attachments - - def _infer_base64_content_type(self, value: typing.Any, default: str) -> str: - """Infer MIME type from a data URL base64 value.""" - if not isinstance(value, str): - return default - if value.startswith('data:') and ';base64,' in value: - return value[5:value.find(';base64,')] or default - return default - - def _build_event(self, query: pipeline_query.Query) -> dict[str, typing.Any]: - """Build a minimal EBA-compatible event envelope from the message query. - - The public event_type must be a stable AgentRunner protocol name. Keep - platform or SDK class names inside event_data so future EventRouter - events can share the same top-level naming contract. - """ - message_event = getattr(query, 'message_event', None) - event_data: dict[str, typing.Any] = {} - - if message_event and hasattr(message_event, 'model_dump'): - try: - event_data = message_event.model_dump(mode='json') - except TypeError: - event_data = message_event.model_dump() - except Exception: - event_data = {} - event_data.pop('source_platform_object', None) - - source_event_type = getattr(message_event, 'type', None) if message_event else None - if source_event_type: - event_data.setdefault('source_event_type', source_event_type) - - message_chain = getattr(query, 'message_chain', None) - message_id = getattr(message_chain, 'message_id', None) - if message_id == -1: - message_id = None - - event_time = getattr(message_event, 'time', None) if message_event else None - event_timestamp = int(event_time) if isinstance(event_time, (int, float)) else None - - return { - 'event_type': runner_events.MESSAGE_RECEIVED, - 'event_id': str(message_id or getattr(query, 'query_id', '')), - 'event_timestamp': event_timestamp, - 'event_data': event_data, - } - - def _build_actor(self, query: pipeline_query.Query) -> dict[str, typing.Any]: - """Build actor context for the sender that triggered the run.""" - message_event = getattr(query, 'message_event', None) - sender = getattr(message_event, 'sender', None) if message_event else None - actor_id = getattr(sender, 'id', None) or getattr(query, 'sender_id', None) - actor_name = sender.get_name() if sender and hasattr(sender, 'get_name') else None - - return { - 'actor_type': 'user', - 'actor_id': str(actor_id) if actor_id is not None else None, - 'actor_name': actor_name, - } - - def _build_subject(self, query: pipeline_query.Query) -> dict[str, typing.Any]: - """Build subject context for the current message.""" - message_chain = getattr(query, 'message_chain', None) - message_id = getattr(message_chain, 'message_id', None) - if message_id == -1: - message_id = None - - launcher_type = getattr(query, 'launcher_type', None) - launcher_type_value = getattr(launcher_type, 'value', launcher_type) - - return { - 'subject_type': 'message', - 'subject_id': str(message_id or getattr(query, 'query_id', '')), - 'subject_data': { - 'launcher_type': launcher_type_value, - 'launcher_id': getattr(query, 'launcher_id', None), - 'sender_id': str(getattr(query, 'sender_id', '')), - 'bot_uuid': getattr(query, 'bot_uuid', None), - 'pipeline_uuid': getattr(query, 'pipeline_uuid', None), - }, - } - - def _build_deadline(self, runner_config: dict[str, typing.Any]) -> float | None: - """Build deadline timestamp from runner timeout config. - - A missing timeout uses the host default. Explicit null, zero, or negative - values disable the total run deadline for advanced deployments. - """ - timeout = runner_config.get('timeout', DEFAULT_RUNNER_TIMEOUT_SECONDS) - if timeout is None: - return None - - try: - timeout_seconds = float(timeout) - except (TypeError, ValueError): - return None - - if timeout_seconds <= 0: - return None - - return time.time() + timeout_seconds - def _build_deadline_from_binding(self, binding: AgentBinding) -> float | None: """Build deadline timestamp from binding timeout config. @@ -767,106 +329,6 @@ class AgentRunContextBuilder: return time.time() + timeout_seconds - async def _is_stream_output_supported(self, query: pipeline_query.Query) -> bool: - """Check whether the current adapter can consume streaming chunks.""" - try: - return await query.adapter.is_stream_output_supported() - except AttributeError: - return False - except Exception: - return False - - def _build_prompt(self, query: pipeline_query.Query) -> list[dict[str, typing.Any]]: - """Build effective prompt messages from query.prompt after preprocessing.""" - prompt_messages: list[dict[str, typing.Any]] = [] - - prompt = getattr(query, 'prompt', None) - messages = getattr(prompt, 'messages', None) - if not messages: - return prompt_messages - - for msg in messages: - prompt_messages.append(msg.model_dump(mode='json')) - - return prompt_messages - - def _build_messages(self, source_messages: list[typing.Any]) -> list[dict[str, typing.Any]]: - """Build messages list from packaged source messages.""" - messages: list[dict[str, typing.Any]] = [] - - for msg in source_messages: - messages.append(msg.model_dump(mode='json')) - - return messages - - def _build_params(self, query: pipeline_query.Query) -> dict[str, typing.Any]: - """Build params from query.variables with filtering. - - Filtering rules: - 1. Exclude variables starting with underscore (internal) - 2. Exclude variables with sensitive naming patterns (secret, token, key, password) - 3. Exclude permission/control variables - 4. Keep only JSON-serializable values - - Args: - query: Pipeline query - - Returns: - Filtered params dict - """ - params: dict[str, typing.Any] = {} - - if not query.variables: - return params - - for key, value in query.variables.items(): - # Filter internal variables (starting with underscore) - if key.startswith(self.INTERNAL_PREFIX): - continue - - # Filter sensitive naming patterns - key_lower = key.lower() - if any(pattern in key_lower for pattern in self.SENSITIVE_PATTERNS): - continue - - # Filter permission variables - if any(key == perm_var or key.startswith(perm_var) for perm_var in self.PERMISSION_VARS): - continue - - # Keep only JSON-serializable values - if self._is_json_serializable(value): - params[key] = value - - return params - - def _is_json_serializable(self, value: typing.Any) -> bool: - """Check if value is JSON-serializable. - - Note: set is NOT JSON-serializable. json.dumps({"x": {1}}) fails. - Only list and tuple are allowed as collection types. - - Args: - value: Value to check - - Returns: - True if JSON-serializable, False otherwise - """ - if value is None: - return True - if isinstance(value, (str, int, float, bool)): - return True - # Only allow list and tuple, NOT set (set is not JSON-serializable) - if isinstance(value, (list, tuple)): - return all(self._is_json_serializable(item) for item in value) - if isinstance(value, dict): - return all( - isinstance(k, str) and self._is_json_serializable(v) - for k, v in value.items() - ) - # Pydantic models and other complex types are not directly serializable - # as params (they may have internal structure not meant for runners) - return False - async def _build_context_access( self, event: AgentEventEnvelope, @@ -899,8 +361,7 @@ class AgentRunContextBuilder: artifact_metadata_enabled = 'metadata' in artifact_permissions artifact_read_enabled = 'read' in artifact_permissions - # Determine state API availability based on binding state_policy (event-first mode) - # Direct Query context builder does not expose persistent state API. + # Determine state API availability based on binding state_policy. state_enabled = False if binding is not None: state_policy = binding.state_policy diff --git a/src/langbot/pkg/agent/runner/context_packager.py b/src/langbot/pkg/agent/runner/context_packager.py deleted file mode 100644 index 3de8a558..00000000 --- a/src/langbot/pkg/agent/runner/context_packager.py +++ /dev/null @@ -1,74 +0,0 @@ -"""Agent context packaging helpers.""" -from __future__ import annotations - -import dataclasses -import typing - -from langbot_plugin.api.entities.builtin.pipeline import query as pipeline_query - - -DEFAULT_MAX_ROUND = 10 - - -@dataclasses.dataclass(frozen=True) -class ContextPackagingResult: - """Packaged working context for one AgentRunner run.""" - - messages: list[typing.Any] - policy: dict[str, typing.Any] - history: dict[str, typing.Any] - - -def get_max_round(runner_config: dict[str, typing.Any]) -> typing.Any: - """Return the configured Pipeline adapter max-round value.""" - return runner_config.get('max-round', DEFAULT_MAX_ROUND) - - -def select_max_round_messages( - messages: list[typing.Any] | None, - max_round: typing.Any, -) -> list[typing.Any]: - """Select a bounded recent message window by user-round count.""" - if not messages: - return [] - - temp_messages: list[typing.Any] = [] - current_round = 0 - - for msg in messages[::-1]: - if current_round < max_round: - temp_messages.append(msg) - if getattr(msg, 'role', None) == 'user': - current_round += 1 - else: - break - - return temp_messages[::-1] - - -class AgentContextPackager: - """Build the bounded working context for AgentRunner execution.""" - - def package_messages( - self, - query: pipeline_query.Query, - runner_config: dict[str, typing.Any], - ) -> ContextPackagingResult: - """Package query messages using the Pipeline adapter max-round policy.""" - source_messages = query.messages or [] - max_round = get_max_round(runner_config) - packaged_messages = select_max_round_messages(source_messages, max_round) - - return ContextPackagingResult( - messages=packaged_messages, - policy={ - 'mode': 'max_round', - 'max_round': max_round, - }, - history={ - 'source': 'query.messages', - 'source_total_count': len(source_messages), - 'delivered_count': len(packaged_messages), - 'messages_complete': len(packaged_messages) == len(source_messages), - }, - ) diff --git a/src/langbot/pkg/agent/runner/host_models.py b/src/langbot/pkg/agent/runner/host_models.py index 92e8756c..a29c52d5 100644 --- a/src/langbot/pkg/agent/runner/host_models.py +++ b/src/langbot/pkg/agent/runner/host_models.py @@ -37,6 +37,9 @@ class AgentEventEnvelope(pydantic.BaseModel): source: str """Event source (platform, webui, api, scheduler, system).""" + source_event_type: str | None = None + """Original source event type, when available.""" + bot_id: str | None = None """Bot UUID handling this event.""" @@ -64,6 +67,9 @@ class AgentEventEnvelope(pydantic.BaseModel): raw_ref: RawEventRef | None = None """Reference to raw event payload.""" + data: dict[str, typing.Any] = pydantic.Field(default_factory=dict) + """Small structured event payload. Large payloads should be referenced via raw_ref/artifacts.""" + # Binding scope types class BindingScope(pydantic.BaseModel): diff --git a/src/langbot/pkg/agent/runner/orchestrator.py b/src/langbot/pkg/agent/runner/orchestrator.py index 05659a0f..f8bd3f03 100644 --- a/src/langbot/pkg/agent/runner/orchestrator.py +++ b/src/langbot/pkg/agent/runner/orchestrator.py @@ -16,12 +16,12 @@ from .registry import AgentRunnerRegistry from .context_builder import AgentRunContextBuilder, AgentRunContextPayload from .resource_builder import AgentResourceBuilder from .result_normalizer import AgentResultNormalizer -from .state_store import get_state_store, RunnerScopedStateStore from .persistent_state_store import get_persistent_state_store, PersistentStateStore from .session_registry import get_session_registry, AgentRunSessionRegistry from .config_migration import ConfigMigration from .host_models import AgentEventEnvelope, AgentBinding from .pipeline_adapter import PipelineAdapter +from .state_scope import build_state_context from .errors import ( RunnerNotFoundError, RunnerExecutionError, @@ -63,7 +63,6 @@ class AgentRunOrchestrator: # Cached singleton references (set in __init__) _session_registry: AgentRunSessionRegistry - _state_store: RunnerScopedStateStore _persistent_state_store: PersistentStateStore | None def __init__( @@ -78,7 +77,6 @@ class AgentRunOrchestrator: self.result_normalizer = AgentResultNormalizer(ap) # Cache singleton references to avoid per-request getter calls self._session_registry = get_session_registry() - self._state_store = get_state_store() self._persistent_state_store = None # Lazy init on first use async def run( @@ -132,13 +130,13 @@ class AgentRunOrchestrator: # Merge params into adapter.extra if 'params' in adapter_context: context['adapter']['extra']['params'] = adapter_context['params'] - # Merge prompt into adapter.extra (for transition runners) + # Merge prompt into adapter.extra for Pipeline adapter consumers. if 'prompt' in adapter_context: context['adapter']['extra']['prompt'] = adapter_context['prompt'] # Merge bootstrap if provided if adapter_context.get('bootstrap'): context['bootstrap'] = adapter_context['bootstrap'] - # Also set adapter_messages for transition runners + # Also expose the bootstrap window through adapter metadata. bootstrap_messages = adapter_context['bootstrap'].get('messages') if bootstrap_messages: context['adapter']['adapter_messages'] = bootstrap_messages @@ -150,7 +148,7 @@ class AgentRunOrchestrator: context['runtime']['query_id'] = adapter_context['query_id'] # Build state context for State API handlers - state_context = self._build_state_context(event, binding, descriptor) + state_context = build_state_context(event, binding, descriptor) # Register session for proxy action permission validation run_id = context['run_id'] @@ -274,7 +272,7 @@ class AgentRunOrchestrator: bound_plugins = query.variables.get('_pipeline_bound_plugins') # Build adapter context for Pipeline-specific fields - adapter_context = await self._build_adapter_context(query, binding) + adapter_context = PipelineAdapter.build_adapter_context(query, binding) # Delegate to event-first run() async for result in self.run( @@ -285,73 +283,6 @@ class AgentRunOrchestrator: ): yield result - async def _build_adapter_context( - self, - query: pipeline_query.Query, - binding: AgentBinding, - ) -> dict[str, typing.Any]: - """Build adapter context for Pipeline Query-based flow. - - This extracts adapter-specific fields from Query that aren't available in - the event-first flow: - - params (from query.variables) - - bootstrap messages (for max-round) - - query_id - - prompt messages - - Args: - query: Pipeline query - binding: Agent binding with max_round - - Returns: - Adapter context dict - """ - from .context_packager import AgentContextPackager - - # Use context_builder's _build_params for proper filtering - # (excludes internal vars, sensitive patterns, permission vars, non-JSON values) - params = self.context_builder._build_params(query) - - # Build prompt from query.prompt.messages (for transition runners) - prompt = self.context_builder._build_prompt(query) - - # Build bootstrap context for max-round - bootstrap = None - runtime_metadata = {} - max_round = binding.max_round - - if max_round and max_round > 0 and query.messages: - # Package messages using context_packager - runner_config = binding.runner_config or {} - context_packager = AgentContextPackager() - packaged_context = context_packager.package_messages(query, runner_config) - - # Build messages list - adapter_messages = [] - for msg in packaged_context.messages: - adapter_messages.append(msg.model_dump(mode='json')) - - bootstrap = { - 'messages': adapter_messages, - 'summary': None, - 'artifacts': [], - 'metadata': {}, - } - - # Build runtime metadata for context_packaging - runtime_metadata['context_packaging'] = { - 'policy': packaged_context.policy, - 'history': packaged_context.history, - } - - return { - 'params': params, - 'prompt': prompt, - 'bootstrap': bootstrap, - 'query_id': query.query_id, - 'runtime_metadata': runtime_metadata, - } - async def _invoke_runner( self, descriptor: AgentRunnerDescriptor, @@ -497,18 +428,22 @@ class AgentRunOrchestrator: """ data = result_dict.get('data', {}) - # Extract scope (default to conversation when omitted by the runner) - scope = data.get('scope', 'conversation') + scope = data.get('scope') + if not scope: + raise RunnerProtocolError( + descriptor.id, + 'state.updated missing required field: scope', + ) # Extract key and value key = data.get('key') value = data.get('value') if not key: - self.ap.logger.warning( - f'Runner {descriptor.id} state.updated missing key, ignoring' + raise RunnerProtocolError( + descriptor.id, + 'state.updated missing required field: key', ) - return # Lazy init persistent state store if self._persistent_state_store is None: @@ -536,75 +471,6 @@ class AgentRunOrchestrator: f'Runner {descriptor.id} state.updated rejected: {error}' ) - def _build_state_context( - self, - event: AgentEventEnvelope, - binding: AgentBinding, - descriptor: AgentRunnerDescriptor, - ) -> dict[str, typing.Any]: - """Build state context for State API handlers. - - Returns context with: - - scope_keys: Dict mapping scope name to scope_key - - binding_identity: Binding identity for state isolation - - Additional context fields for DB insert - """ - # Get binding identity - binding_identity = binding.binding_id - if not binding_identity: - scope = binding.scope - if scope.scope_type and scope.scope_id: - binding_identity = f"{scope.scope_type}:{scope.scope_id}" - else: - binding_identity = "unknown_binding" - - # Build scope keys for each scope - scope_keys: dict[str, str] = {} - - # Conversation scope - if event.conversation_id: - parts = [descriptor.id, binding_identity, event.conversation_id] - if event.thread_id: - parts.append(event.thread_id) - scope_keys['conversation'] = f'conversation:{":".join(parts)}' - - # Actor scope - if event.actor and event.actor.actor_id: - parts = [ - descriptor.id, - binding_identity, - event.actor.actor_type or 'user', - event.actor.actor_id, - ] - scope_keys['actor'] = f'actor:{":".join(parts)}' - - # Subject scope - if event.subject and event.subject.subject_id: - parts = [ - descriptor.id, - binding_identity, - event.subject.subject_type or 'unknown', - event.subject.subject_id, - ] - scope_keys['subject'] = f'subject:{":".join(parts)}' - - # Runner scope (always available) - parts = [descriptor.id, binding_identity] - scope_keys['runner'] = f'runner:{":".join(parts)}' - - return { - 'scope_keys': scope_keys, - 'binding_identity': binding_identity, - 'bot_id': event.bot_id, - 'workspace_id': event.workspace_id, - 'conversation_id': event.conversation_id, - 'thread_id': event.thread_id, - 'actor_type': event.actor.actor_type if event.actor else None, - 'actor_id': event.actor.actor_id if event.actor else None, - 'subject_type': event.subject.subject_type if event.subject else None, - 'subject_id': event.subject.subject_id if event.subject else None, - } - async def _write_event_log( self, event: AgentEventEnvelope, diff --git a/src/langbot/pkg/agent/runner/persistent_state_store.py b/src/langbot/pkg/agent/runner/persistent_state_store.py index 2f90d939..8208dd52 100644 --- a/src/langbot/pkg/agent/runner/persistent_state_store.py +++ b/src/langbot/pkg/agent/runner/persistent_state_store.py @@ -6,7 +6,6 @@ from __future__ import annotations import typing import json -import asyncio import threading from datetime import datetime @@ -14,21 +13,17 @@ import sqlalchemy from sqlalchemy.ext.asyncio import AsyncEngine from sqlalchemy import select, delete, update -from langbot_plugin.api.entities.builtin.pipeline import query as pipeline_query - from .descriptor import AgentRunnerDescriptor from .host_models import AgentEventEnvelope, AgentBinding +from .state_scope import ( + VALID_STATE_SCOPES, + build_state_scope_key, + get_binding_identity, + normalize_state_key, +) from ...entity.persistence.agent_runner_state import AgentRunnerState -# Valid state scopes for agent runner state updates. -VALID_STATE_SCOPES = ('conversation', 'actor', 'subject', 'runner') - -# External-facing key aliases accepted from runners. -STATE_KEY_ALIASES = { - 'conversation_id': 'external.conversation_id', -} - # Maximum value_json size (256KB) MAX_VALUE_JSON_BYTES = 256 * 1024 @@ -52,89 +47,6 @@ class PersistentStateStore: def __init__(self, db_engine: AsyncEngine): self._db_engine = db_engine - # ========== Scope Key Building (shared with in-memory store) ========== - - def _get_binding_identity(self, binding: AgentBinding) -> str: - """Get stable binding identity for scope key.""" - if binding.binding_id: - return binding.binding_id - scope = binding.scope - if scope.scope_type and scope.scope_id: - return f"{scope.scope_type}:{scope.scope_id}" - return "unknown_binding" - - def _make_conversation_scope_key( - self, - event: AgentEventEnvelope, - binding: AgentBinding, - descriptor: AgentRunnerDescriptor, - ) -> str | None: - """Build conversation scope key from event and binding.""" - if not event.conversation_id: - return None - - binding_identity = self._get_binding_identity(binding) - parts = [ - descriptor.id, - binding_identity, - event.conversation_id, - ] - if event.thread_id: - parts.append(event.thread_id) - return f'conversation:{":".join(parts)}' - - def _make_actor_scope_key( - self, - event: AgentEventEnvelope, - binding: AgentBinding, - descriptor: AgentRunnerDescriptor, - ) -> str | None: - """Build actor scope key from event and binding.""" - if not event.actor or not event.actor.actor_id: - return None - - binding_identity = self._get_binding_identity(binding) - parts = [ - descriptor.id, - binding_identity, - event.actor.actor_type or 'user', - event.actor.actor_id, - ] - return f'actor:{":".join(parts)}' - - def _make_subject_scope_key( - self, - event: AgentEventEnvelope, - binding: AgentBinding, - descriptor: AgentRunnerDescriptor, - ) -> str | None: - """Build subject scope key from event and binding.""" - if not event.subject or not event.subject.subject_id: - return None - - binding_identity = self._get_binding_identity(binding) - parts = [ - descriptor.id, - binding_identity, - event.subject.subject_type or 'unknown', - event.subject.subject_id, - ] - return f'subject:{":".join(parts)}' - - def _make_runner_scope_key( - self, - event: AgentEventEnvelope, - binding: AgentBinding, - descriptor: AgentRunnerDescriptor, - ) -> str: - """Build runner scope key from event and binding.""" - binding_identity = self._get_binding_identity(binding) - parts = [ - descriptor.id, - binding_identity, - ] - return f'runner:{":".join(parts)}' - def _get_scope_key( self, scope: str, @@ -143,15 +55,7 @@ class PersistentStateStore: descriptor: AgentRunnerDescriptor, ) -> str | None: """Get scope key for given scope.""" - if scope == 'conversation': - return self._make_conversation_scope_key(event, binding, descriptor) - elif scope == 'actor': - return self._make_actor_scope_key(event, binding, descriptor) - elif scope == 'subject': - return self._make_subject_scope_key(event, binding, descriptor) - elif scope == 'runner': - return self._make_runner_scope_key(event, binding, descriptor) - return None + return build_state_scope_key(scope, event, binding, descriptor) def _check_scope_enabled(self, scope: str, binding: AgentBinding) -> bool: """Check if scope is enabled by binding's state_policy.""" @@ -276,8 +180,7 @@ class PersistentStateStore: return False, f'Scope "{scope}" not enabled by binding policy' # Map accepted key aliases - if key in STATE_KEY_ALIASES: - key = STATE_KEY_ALIASES[key] + key = normalize_state_key(key) # Get scope key scope_key = self._get_scope_key(scope, event, binding, descriptor) @@ -290,7 +193,7 @@ class PersistentStateStore: return False, error # Build context fields - binding_identity = self._get_binding_identity(binding) + binding_identity = get_binding_identity(binding) async with self._db_engine.begin() as conn: # Check if entry exists diff --git a/src/langbot/pkg/agent/runner/pipeline_adapter.py b/src/langbot/pkg/agent/runner/pipeline_adapter.py index 8aaf3ec3..cbabebaa 100644 --- a/src/langbot/pkg/agent/runner/pipeline_adapter.py +++ b/src/langbot/pkg/agent/runner/pipeline_adapter.py @@ -30,6 +30,7 @@ from .host_models import ( DeliveryPolicy, ) from . import events as runner_events +from ...pipeline.msgtrun.round_policy import select_max_round_messages class PipelineAdapter: @@ -42,6 +43,10 @@ class PipelineAdapter: - Putting Query-only fields into adapter context """ + INTERNAL_PREFIX = '_' + SENSITIVE_PATTERNS = ('secret', 'token', 'key', 'password', 'credential', 'api_key', 'apikey') + PERMISSION_VARS = ('_pipeline_bound_plugins', '_authorized', '_permission') + @classmethod def query_to_event( cls, @@ -81,6 +86,7 @@ class PipelineAdapter: event_type=event.event_type or runner_events.MESSAGE_RECEIVED, event_time=event.event_time, source="pipeline_adapter", + source_event_type=event.source_event_type, bot_id=query.bot_uuid, workspace_id=None, # Not available in Query conversation_id=conversation.conversation_id, @@ -90,6 +96,7 @@ class PipelineAdapter: input=input, delivery=delivery, raw_ref=raw_ref, + data=event.data, ) @classmethod @@ -110,6 +117,7 @@ class PipelineAdapter: pipeline_config = query.pipeline_config or {} ai_config = pipeline_config.get('ai', {}) runner_config = ai_config.get('runner_config', {}).get(runner_id, {}) + pipeline_uuid = getattr(query, 'pipeline_uuid', None) # Extract max_round for adapter (used in bootstrap, not Protocol v1) # Note: config uses 'max-round' with hyphen, not 'max_round' with underscore @@ -118,7 +126,7 @@ class PipelineAdapter: # Build scope scope = BindingScope( scope_type="pipeline", - scope_id=query.pipeline_uuid, + scope_id=pipeline_uuid, ) # Build resource policy from pipeline config @@ -141,7 +149,7 @@ class PipelineAdapter: ) return AgentBinding( - binding_id=f"pipeline_{query.pipeline_uuid or 'default'}_{runner_id}", + binding_id=f"pipeline_{pipeline_uuid or 'default'}_{runner_id}", scope=scope, event_types=[runner_events.MESSAGE_RECEIVED], runner_id=runner_id, @@ -150,80 +158,116 @@ class PipelineAdapter: state_policy=state_policy, delivery_policy=delivery_policy, enabled=True, - pipeline_uuid=query.pipeline_uuid, + pipeline_uuid=pipeline_uuid, max_round=max_round, ) @classmethod - def build_bootstrap_from_binding( + def build_bootstrap_context( cls, query: pipeline_query.Query, binding: AgentBinding, - ) -> dict[str, typing.Any]: - """Build bootstrap context from binding for max-round. - - This method handles the max-round -> bootstrap conversion. - max-round is NOT part of Protocol v1, only used by Pipeline adapter. - - Args: - query: Pipeline query - binding: Agent binding with max_round - - Returns: - Bootstrap context data - """ + ) -> tuple[dict[str, typing.Any] | None, dict[str, typing.Any]]: + """Build bootstrap messages and runtime metadata for Pipeline max-round.""" max_round = binding.max_round + source_messages = query.messages or [] + if not max_round or max_round <= 0 or not source_messages: + return None, {} - # If no max_round or self_managed_context, return empty bootstrap - if max_round is None or max_round <= 0: - return { - "messages": [], - "summary": None, - "artifacts": [], - "metadata": { - "policy": "self_managed", - "max_round": None, - }, - } - - # max-round packaging (will be handled by context_packager) - return { - "messages": [], # Will be filled by context_packager + packaged_messages = select_max_round_messages(source_messages, max_round) + bootstrap_messages = [cls._dump_message(msg) for msg in packaged_messages] + bootstrap = { + "messages": bootstrap_messages, "summary": None, "artifacts": [], - "metadata": { - "policy": "max_round", - "max_round": max_round, + "metadata": {}, + } + runtime_metadata = { + 'context_packaging': { + 'policy': { + 'mode': 'max_round', + 'max_round': max_round, + }, + 'history': { + 'source': 'query.messages', + 'source_total_count': len(source_messages), + 'delivered_count': len(packaged_messages), + 'messages_complete': len(packaged_messages) == len(source_messages), + }, }, } + return bootstrap, runtime_metadata @classmethod def build_adapter_context( cls, query: pipeline_query.Query, + binding: AgentBinding, ) -> dict[str, typing.Any]: - """Build adapter context for Pipeline adapter fields. - - These fields are for transition purposes only. - Runners should NOT depend on them for long-term capabilities. - - Args: - query: Pipeline query - - Returns: - Adapter context data - """ + """Build Query-derived fields for the Pipeline adapter entry.""" + bootstrap, runtime_metadata = cls.build_bootstrap_context(query, binding) return { - "query_id": query.query_id, - "pipeline_uuid": query.pipeline_uuid, - "max_round": None, # Moved to binding, not here - "adapter_messages": [], # Will be filled by context_packager - "extra": { - "bot_uuid": query.bot_uuid, - "sender_id": str(query.sender_id) if query.sender_id else None, - "launcher_type": query.launcher_type.value if query.launcher_type else None, - "launcher_id": query.launcher_id, - }, + 'params': cls.build_params(query), + 'prompt': cls.build_prompt(query), + 'bootstrap': bootstrap, + 'query_id': getattr(query, 'query_id', None), + 'runtime_metadata': runtime_metadata, + } + + @classmethod + def build_params(cls, query: pipeline_query.Query) -> dict[str, typing.Any]: + """Build adapter params from Pipeline variables with host filtering.""" + params: dict[str, typing.Any] = {} + variables = getattr(query, 'variables', None) + if not variables: + return params + + for key, value in variables.items(): + if key.startswith(cls.INTERNAL_PREFIX): + continue + key_lower = key.lower() + if any(pattern in key_lower for pattern in cls.SENSITIVE_PATTERNS): + continue + if any(key == perm_var or key.startswith(perm_var) for perm_var in cls.PERMISSION_VARS): + continue + if cls.is_json_serializable(value): + params[key] = value + + return params + + @classmethod + def build_prompt(cls, query: pipeline_query.Query) -> list[dict[str, typing.Any]]: + """Build effective prompt messages from Pipeline preprocessing output.""" + prompt = getattr(query, 'prompt', None) + messages = getattr(prompt, 'messages', None) + if not messages: + return [] + return [cls._dump_message(msg) for msg in messages] + + @classmethod + def is_json_serializable(cls, value: typing.Any) -> bool: + """Return whether a value can safely cross the adapter boundary as JSON.""" + if value is None or isinstance(value, (str, int, float, bool)): + return True + if isinstance(value, (list, tuple)): + return all(cls.is_json_serializable(item) for item in value) + if isinstance(value, dict): + return all( + isinstance(k, str) and cls.is_json_serializable(v) + for k, v in value.items() + ) + return False + + @staticmethod + def _dump_message(message: typing.Any) -> dict[str, typing.Any]: + """Serialize a provider message-like object.""" + if hasattr(message, 'model_dump'): + return message.model_dump(mode='json') + if isinstance(message, dict): + return message + return { + 'role': getattr(message, 'role', None), + 'content': getattr(message, 'content', None), } # Private helper methods @@ -519,10 +563,11 @@ class PipelineAdapter: query: pipeline_query.Query, ) -> DeliveryContext: """Build DeliveryContext from Query.""" + message_chain = getattr(query, 'message_chain', None) return DeliveryContext( surface="platform", reply_target={ - "message_id": getattr(query.message_chain, 'message_id', None), + "message_id": getattr(message_chain, 'message_id', None), }, supports_streaming=True, supports_edit=False, @@ -545,10 +590,17 @@ class PipelineAdapter: query: pipeline_query.Query, ) -> list[str] | None: """Extract allowed model UUIDs from query.""" + model_uuids: list[str] = [] model_uuid = getattr(query, 'use_llm_model_uuid', None) if model_uuid: - return [model_uuid] - return None + model_uuids.append(model_uuid) + + variables = getattr(query, 'variables', None) or {} + for fallback_uuid in variables.get('_fallback_model_uuids', []) or []: + if fallback_uuid and fallback_uuid not in model_uuids: + model_uuids.append(fallback_uuid) + + return model_uuids or None @classmethod def _extract_allowed_tools( diff --git a/src/langbot/pkg/agent/runner/resource_builder.py b/src/langbot/pkg/agent/runner/resource_builder.py index 1fcde97b..00039ffc 100644 --- a/src/langbot/pkg/agent/runner/resource_builder.py +++ b/src/langbot/pkg/agent/runner/resource_builder.py @@ -1,7 +1,6 @@ """Agent resource builder for constructing authorized resources.""" from __future__ import annotations -import asyncio import typing from ...core import app @@ -30,10 +29,6 @@ class AgentResourceBuilder: - Build knowledge_bases list from config - Build storage and files permissions summary - Entry points: - - build_resources_from_binding(event, binding, descriptor): Event-first Protocol v1 - - build_resources(query, descriptor): Pipeline adapter Query-based - Note: This only builds the resource declaration. The actual proxy actions in handler.py must still validate against ctx.resources at runtime. @@ -209,89 +204,6 @@ class AgentResourceBuilder: 'workspace_storage': 'workspace' in storage_perms and resource_policy.allow_workspace_storage, } - async def build_resources( - self, - query: typing.Any, # pipeline_query.Query - descriptor: AgentRunnerDescriptor, - ) -> AgentResources: - """Build AgentResources from query and runner descriptor. - - This is a Pipeline adapter wrapper for Query-based flow. - - Args: - query: Pipeline query with pipeline_config and variables - descriptor: Runner descriptor with permissions and capabilities - - Returns: - AgentResources dict with filtered resource lists - """ - # Get bound plugins and MCP servers from query - bound_plugins = query.variables.get('_pipeline_bound_plugins') - bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers') - - # Layer 1: Runner manifest permissions - manifest_perms = descriptor.permissions - - # Layer 2: Pipeline extensions_preference (already in bound_plugins/MCP servers) - # Layer 3: Runner instance config (from pipeline_config) - resolved via ConfigMigration - from .config_migration import ConfigMigration - runner_config = ConfigMigration.resolve_runner_config(query.pipeline_config, descriptor.id) - - # Build each resource category in parallel - models, tools, knowledge_bases = await asyncio.gather( - self._build_models(manifest_perms, runner_config, descriptor, query), - self._build_tools(manifest_perms, bound_plugins, bound_mcp_servers, query), - self._build_knowledge_bases(manifest_perms, runner_config, descriptor, query), - ) - storage = self._build_storage(manifest_perms) - - return { - 'models': models, - 'tools': tools, - 'knowledge_bases': knowledge_bases, - 'files': [], # Files are populated at runtime - 'storage': storage, - 'platform_capabilities': {}, # Reserved for EBA - } - - async def _build_models( - self, - manifest_perms: dict[str, list[str]], - runner_config: dict[str, typing.Any], - descriptor: AgentRunnerDescriptor, - query: typing.Any, - ) -> list[ModelResource]: - """Build models list with plugin SDK field names.""" - models: list[ModelResource] = [] - seen_model_ids: set[str] = set() - - # Check manifest permission - model_perms = manifest_perms.get('models', []) - if 'invoke' not in model_perms and 'stream' not in model_perms: - return models - - # Get model from query (preproc already resolved this) - model_uuid = getattr(query, 'use_llm_model_uuid', None) - if model_uuid: - await self._append_llm_model_resource(models, seen_model_ids, model_uuid) - - # Add fallback models if present - fallback_uuids = query.variables.get('_fallback_model_uuids', []) - for fb_uuid in fallback_uuids: - await self._append_llm_model_resource(models, seen_model_ids, fb_uuid) - - # Add model resources referenced by the runner binding config schema. - # This makes authorization generic for AgentRunner plugins instead of - # hard-coding only local-agent's primary/fallback model path. - await self._append_config_declared_model_resources( - models=models, - seen_model_ids=seen_model_ids, - descriptor=descriptor, - runner_config=runner_config, - ) - - return models - async def _append_config_declared_model_resources( self, models: list[ModelResource], @@ -349,79 +261,3 @@ class AgentResourceBuilder: seen_model_ids.add(model_uuid) except Exception as e: self.ap.logger.warning(f'Failed to build rerank model resource {model_uuid}: {e}') - - async def _build_tools( - self, - manifest_perms: dict[str, list[str]], - bound_plugins: list[str] | None, - bound_mcp_servers: list[str] | None, - query: typing.Any, - ) -> list[ToolResource]: - """Build tools list with plugin SDK field names.""" - tools: list[ToolResource] = [] - - # Check manifest permission - tool_perms = manifest_perms.get('tools', []) - if 'list' not in tool_perms and 'call' not in tool_perms: - return tools - - # Get tools from query (preproc already resolved this for local-agent) - use_funcs = getattr(query, 'use_funcs', []) - for tool in use_funcs: - # Use plugin SDK field names: tool_name, tool_type, description - tools.append({ - 'tool_name': tool.name, - 'tool_type': None, # Tool type not available in current LLMTool - 'description': tool.description, - }) - - return tools - - async def _build_knowledge_bases( - self, - manifest_perms: dict[str, list[str]], - runner_config: dict[str, typing.Any], - descriptor: AgentRunnerDescriptor, - query: typing.Any, - ) -> list[KnowledgeBaseResource]: - """Build knowledge bases list with plugin SDK field names.""" - kb_resources: list[KnowledgeBaseResource] = [] - - # Check manifest permission - kb_perms = manifest_perms.get('knowledge_bases', []) - if 'list' not in kb_perms and 'retrieve' not in kb_perms: - return kb_resources - - # Get knowledge base UUIDs from schema-defined config fields. - kb_uuids = config_schema.extract_knowledge_base_uuids(descriptor, runner_config) - - # Also check query variables (may be modified by plugin PromptPreProcessing) - kb_uuids_from_vars = query.variables.get('_knowledge_base_uuids', []) - if kb_uuids_from_vars: - kb_uuids = kb_uuids_from_vars - - for kb_uuid in kb_uuids: - try: - kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid) - if kb: - # Use plugin SDK field names: kb_id, kb_name, kb_type - kb_resources.append({ - 'kb_id': kb_uuid, - 'kb_name': kb.get_name(), - 'kb_type': kb.knowledge_base_entity.kb_type if hasattr(kb.knowledge_base_entity, 'kb_type') else None, - }) - except Exception as e: - self.ap.logger.warning(f'Failed to build knowledge base resource {kb_uuid}: {e}') - - return kb_resources - - def _build_storage( - self, - manifest_perms: dict[str, list[str]], - ) -> StorageResource: - """Build storage permissions with plugin SDK field names.""" - storage_perms = manifest_perms.get('storage', []) - return { - 'plugin_storage': 'plugin' in storage_perms, - 'workspace_storage': 'workspace' in storage_perms, - } diff --git a/src/langbot/pkg/agent/runner/result_normalizer.py b/src/langbot/pkg/agent/runner/result_normalizer.py index 993753ca..62546429 100644 --- a/src/langbot/pkg/agent/runner/result_normalizer.py +++ b/src/langbot/pkg/agent/runner/result_normalizer.py @@ -109,8 +109,8 @@ class AgentResultNormalizer: elif result_type == 'state.updated': # Log for telemetry, don't yield to pipeline - # Orchestrator already handles the actual state_store.apply_update - scope = data.get('scope', 'conversation') # Default for backward compat + # Orchestrator already handles the actual PersistentStateStore update. + scope = data.get('scope', 'unknown') key = data.get('key', 'unknown') value_repr = repr(data.get('value', '...'))[:100] # Truncate for log self.ap.logger.debug( diff --git a/src/langbot/pkg/agent/runner/state_scope.py b/src/langbot/pkg/agent/runner/state_scope.py new file mode 100644 index 00000000..0493ff9f --- /dev/null +++ b/src/langbot/pkg/agent/runner/state_scope.py @@ -0,0 +1,113 @@ +"""State scope key helpers for AgentRunner host-owned state.""" +from __future__ import annotations + +import typing + +from .descriptor import AgentRunnerDescriptor +from .host_models import AgentBinding, AgentEventEnvelope + + +VALID_STATE_SCOPES = ('conversation', 'actor', 'subject', 'runner') + +STATE_KEY_ALIASES = { + 'conversation_id': 'external.conversation_id', +} + + +def normalize_state_key(key: str) -> str: + """Map accepted public aliases to protocol state keys.""" + return STATE_KEY_ALIASES.get(key, key) + + +def get_binding_identity(binding: AgentBinding) -> str: + """Return the stable binding identity used for state isolation.""" + if binding.binding_id: + return binding.binding_id + + scope = binding.scope + if scope.scope_type and scope.scope_id: + return f'{scope.scope_type}:{scope.scope_id}' + + return 'unknown_binding' + + +def build_state_scope_key( + scope: str, + event: AgentEventEnvelope, + binding: AgentBinding, + descriptor: AgentRunnerDescriptor, +) -> str | None: + """Build the storage key for one state scope. + + Returns None when the event lacks the identity required by that scope. + """ + binding_identity = get_binding_identity(binding) + + if scope == 'conversation': + if not event.conversation_id: + return None + parts = [descriptor.id, binding_identity, event.conversation_id] + if event.thread_id: + parts.append(event.thread_id) + return f'conversation:{":".join(parts)}' + + if scope == 'actor': + if not event.actor or not event.actor.actor_id: + return None + parts = [ + descriptor.id, + binding_identity, + event.actor.actor_type or 'user', + event.actor.actor_id, + ] + return f'actor:{":".join(parts)}' + + if scope == 'subject': + if not event.subject or not event.subject.subject_id: + return None + parts = [ + descriptor.id, + binding_identity, + event.subject.subject_type or 'unknown', + event.subject.subject_id, + ] + return f'subject:{":".join(parts)}' + + if scope == 'runner': + return f'runner:{descriptor.id}:{binding_identity}' + + return None + + +def build_state_scope_keys( + event: AgentEventEnvelope, + binding: AgentBinding, + descriptor: AgentRunnerDescriptor, +) -> dict[str, str]: + """Build all available scope keys for an event/binding pair.""" + scope_keys: dict[str, str] = {} + for scope in VALID_STATE_SCOPES: + scope_key = build_state_scope_key(scope, event, binding, descriptor) + if scope_key: + scope_keys[scope] = scope_key + return scope_keys + + +def build_state_context( + event: AgentEventEnvelope, + binding: AgentBinding, + descriptor: AgentRunnerDescriptor, +) -> dict[str, typing.Any]: + """Build the State API context stored in the run session.""" + return { + 'scope_keys': build_state_scope_keys(event, binding, descriptor), + 'binding_identity': get_binding_identity(binding), + 'bot_id': event.bot_id, + 'workspace_id': event.workspace_id, + 'conversation_id': event.conversation_id, + 'thread_id': event.thread_id, + 'actor_type': event.actor.actor_type if event.actor else None, + 'actor_id': event.actor.actor_id if event.actor else None, + 'subject_type': event.subject.subject_type if event.subject else None, + 'subject_id': event.subject.subject_id if event.subject else None, + } diff --git a/src/langbot/pkg/agent/runner/state_store.py b/src/langbot/pkg/agent/runner/state_store.py deleted file mode 100644 index 53e570e2..00000000 --- a/src/langbot/pkg/agent/runner/state_store.py +++ /dev/null @@ -1,618 +0,0 @@ -"""Runner scoped state store for managing AgentRunner state across runs.""" -from __future__ import annotations - -import typing -import threading - -from langbot_plugin.api.entities.builtin.pipeline import query as pipeline_query - -from .descriptor import AgentRunnerDescriptor -from .host_models import AgentEventEnvelope - - -# Valid state scopes for agent runner state updates. -VALID_STATE_SCOPES = ('conversation', 'actor', 'subject', 'runner') - -# External-facing key aliases accepted from runners. -STATE_KEY_ALIASES = { - 'conversation_id': 'external.conversation_id', -} - - -class RunnerScopedStateStore: - """In-memory scoped state store for AgentRunner protocol state. - - IMPORTANT: This is HOST-OWNED protocol state, NOT plugin instance state. - - Key Design Principles: - 1. Host-owned: State is owned and managed by LangBot host, not by the plugin. - The plugin can only read/write through agent runner state updates. - 2. Scope keys based on stable host identity: Uses host-controlled identifiers - (runner_id, bot_uuid, pipeline_uuid, launcher_type, launcher_id) rather - than external/unstable identifiers like external conversation id. - 3. External conversation id is a VALUE: The runner can update external.conversation_id - in state, which syncs to conversation.uuid. The scope key remains stable, - preventing state loss when conversation identity changes. - - State scopes: - - conversation: runner_id + bot_uuid + pipeline_uuid + launcher_type + launcher_id + conversation identity - - actor: runner_id + bot_uuid + sender_id - - subject: runner_id + bot_uuid + launcher_type + launcher_id - - runner: runner_id + pipeline_uuid - - This ensures different runners don't share state and same runner - has appropriate isolation per scope. - - Note: This is an in-memory store. State only persists within the - current process lifetime. For production use, a persistent storage - backend should be implemented. - """ - - def __init__(self): - # Use thread-safe dict for concurrent access - self._store: dict[str, dict[str, typing.Any]] = {} - self._lock = threading.Lock() - - def _make_conversation_scope_key( - self, - query: pipeline_query.Query, - descriptor: AgentRunnerDescriptor, - ) -> str: - """Build conversation scope identity key. - - Uses host-owned stable identity, NOT external conversation id. - External conversation id is a state VALUE, not part of state KEY. - - This prevents state loss when runner updates external.conversation_id: - - First run: scope key uses stable identity, state saved - - Runner returns external.conversation_id, synced to conversation.uuid - - Next run: scope key still uses same stable identity, state accessible - """ - parts = [ - descriptor.id, - query.bot_uuid or 'unknown_bot', - query.pipeline_uuid or 'unknown_pipeline', - ] - - if query.session: - parts.append(query.session.launcher_type.value) - parts.append(query.session.launcher_id) - - # Use stable conversation identity (NOT external uuid) - # Options: - # 1. conversation.create_time if available (stable host-owned) - # 2. Use "conversation" literal as stable identity within launcher scope - # (assumes one active conversation per launcher context) - # We use option 2 for simplicity - conversation state is scoped to - # launcher (person/group) + bot + pipeline + runner - # External conversation id is just a VALUE inside this scope - conv_create_time = getattr(query.session.using_conversation, 'create_time', None) - if conv_create_time: - # Use create_time as stable identity if available - parts.append(str(conv_create_time)) - # else: no additional part - launcher scope identity is sufficient - - return f'conversation:{":".join(parts)}' - - def _make_actor_scope_key( - self, - query: pipeline_query.Query, - descriptor: AgentRunnerDescriptor, - ) -> str: - """Build actor scope identity key.""" - parts = [ - descriptor.id, - query.bot_uuid or 'unknown_bot', - str(query.sender_id) if query.sender_id else 'unknown_sender', - ] - - return f'actor:{":".join(parts)}' - - def _make_subject_scope_key( - self, - query: pipeline_query.Query, - descriptor: AgentRunnerDescriptor, - ) -> str: - """Build subject scope identity key.""" - parts = [ - descriptor.id, - query.bot_uuid or 'unknown_bot', - ] - - if query.session: - parts.append(query.session.launcher_type.value) - parts.append(query.session.launcher_id) - - return f'subject:{":".join(parts)}' - - def _make_runner_scope_key( - self, - query: pipeline_query.Query, - descriptor: AgentRunnerDescriptor, - ) -> str: - """Build runner scope identity key.""" - parts = [ - descriptor.id, - query.pipeline_uuid or 'unknown_pipeline', - ] - - return f'runner:{":".join(parts)}' - - def _get_scope_key( - self, - scope: str, - query: pipeline_query.Query, - descriptor: AgentRunnerDescriptor, - ) -> str: - """Get the storage key for a given scope.""" - if scope == 'conversation': - return self._make_conversation_scope_key(query, descriptor) - elif scope == 'actor': - return self._make_actor_scope_key(query, descriptor) - elif scope == 'subject': - return self._make_subject_scope_key(query, descriptor) - elif scope == 'runner': - return self._make_runner_scope_key(query, descriptor) - else: - raise ValueError(f'Invalid scope: {scope}') - - def build_snapshot( - self, - query: pipeline_query.Query, - descriptor: AgentRunnerDescriptor, - ) -> dict[str, dict[str, typing.Any]]: - """Build state snapshot for all scopes. - - Args: - query: Pipeline query - descriptor: Runner descriptor - - Returns: - Dict with 4 scope keys, each containing scope state dict - """ - snapshot: dict[str, dict[str, typing.Any]] = { - 'conversation': {}, - 'actor': {}, - 'subject': {}, - 'runner': {}, - } - - with self._lock: - for scope in VALID_STATE_SCOPES: - scope_key = self._get_scope_key(scope, query, descriptor) - scope_state = self._store.get(scope_key, {}) - snapshot[scope] = dict(scope_state) # Copy to avoid mutation - - # Seed external.conversation_id from existing conversation uuid - if query.session and query.session.using_conversation: - conv_uuid = getattr(query.session.using_conversation, 'uuid', None) - if conv_uuid and 'external.conversation_id' not in snapshot['conversation']: - snapshot['conversation']['external.conversation_id'] = conv_uuid - - return snapshot - - def apply_update( - self, - query: pipeline_query.Query, - descriptor: AgentRunnerDescriptor, - scope: str, - key: str, - value: typing.Any, - logger: typing.Any = None, - ) -> bool: - """Apply a state update to the store. - - Args: - query: Pipeline query - descriptor: Runner descriptor - scope: State scope (conversation, actor, subject, runner) - key: State key (should use namespace prefix like external.*) - value: State value (must be JSON-serializable) - logger: Optional logger for warnings - - Returns: - True if update applied successfully, False if invalid scope - - Side effects: - - Updates internal store - - Syncs external.conversation_id to query.session.using_conversation.uuid - """ - # Validate scope - if scope not in VALID_STATE_SCOPES: - if logger: - logger.warning( - f'Runner {descriptor.id} state.updated with invalid scope: {scope}. ' - f'Valid scopes: {", ".join(VALID_STATE_SCOPES)}' - ) - return False - - # Map accepted key aliases - if key in STATE_KEY_ALIASES: - mapped_key = STATE_KEY_ALIASES[key] - if logger: - logger.debug( - f'Runner {descriptor.id} state.updated key alias "{key}" mapped to "{mapped_key}"' - ) - key = mapped_key - - # Apply update to store - with self._lock: - scope_key = self._get_scope_key(scope, query, descriptor) - if scope_key not in self._store: - self._store[scope_key] = {} - self._store[scope_key][key] = value - - # Sync external.conversation_id to query.session.using_conversation.uuid - if scope == 'conversation' and key == 'external.conversation_id': - if query.session and query.session.using_conversation: - # Keep the active conversation UUID aligned with runner-owned state. - setattr(query.session.using_conversation, 'uuid', value) - if logger: - logger.debug( - f'Synced external.conversation_id "{value}" to conversation.uuid' - ) - - return True - - def clear_scope( - self, - scope: str, - query: pipeline_query.Query, - descriptor: AgentRunnerDescriptor, - ) -> None: - """Clear all state for a specific scope. - - Args: - scope: State scope to clear - query: Pipeline query - descriptor: Runner descriptor - """ - with self._lock: - scope_key = self._get_scope_key(scope, query, descriptor) - if scope_key in self._store: - del self._store[scope_key] - - def clear_all(self) -> None: - """Clear all stored state (for testing/reset).""" - with self._lock: - self._store.clear() - - # ========== Event-first Protocol v1 methods ========== - - def _get_binding_identity( - self, - binding: "AgentBinding", - ) -> str: - """Get stable binding identity for scope key. - - Uses binding_id if available, falls back to scope_type + scope_id. - """ - if binding.binding_id: - return binding.binding_id - - # Fallback to scope identity - scope = binding.scope - if scope.scope_type and scope.scope_id: - return f"{scope.scope_type}:{scope.scope_id}" - - # Last resort - should not happen in production - return "unknown_binding" - - def _make_conversation_scope_key_from_event( - self, - event: AgentEventEnvelope, - binding: "AgentBinding", - descriptor: AgentRunnerDescriptor, - ) -> str | None: - """Build conversation scope identity key from event and binding. - - Scope key structure: runner_id + binding_id + conversation_id - This ensures state is isolated per binding and per conversation. - - Returns None if conversation_id is missing. - """ - if not event.conversation_id: - return None - - binding_identity = self._get_binding_identity(binding) - - parts = [ - descriptor.id, - binding_identity, - event.conversation_id, - ] - - # Include thread_id if present for thread-scoped state - if event.thread_id: - parts.append(event.thread_id) - - return f'conversation:{":".join(parts)}' - - def _make_actor_scope_key_from_event( - self, - event: AgentEventEnvelope, - binding: "AgentBinding", - descriptor: AgentRunnerDescriptor, - ) -> str | None: - """Build actor scope identity key from event and binding. - - Scope key structure: runner_id + binding_id + actor_type + actor_id - This ensures state is isolated per binding and per actor. - - Returns None if actor_id is missing. - """ - if not event.actor or not event.actor.actor_id: - return None - - binding_identity = self._get_binding_identity(binding) - - parts = [ - descriptor.id, - binding_identity, - event.actor.actor_type or 'user', - event.actor.actor_id, - ] - - return f'actor:{":".join(parts)}' - - def _make_subject_scope_key_from_event( - self, - event: AgentEventEnvelope, - binding: "AgentBinding", - descriptor: AgentRunnerDescriptor, - ) -> str | None: - """Build subject scope identity key from event and binding. - - Scope key structure: runner_id + binding_id + subject_type + subject_id - This ensures state is isolated per binding and per subject. - - Returns None if subject_id is missing. - """ - if not event.subject or not event.subject.subject_id: - return None - - binding_identity = self._get_binding_identity(binding) - - parts = [ - descriptor.id, - binding_identity, - event.subject.subject_type or 'unknown', - event.subject.subject_id, - ] - - return f'subject:{":".join(parts)}' - - def _make_runner_scope_key_from_event( - self, - event: AgentEventEnvelope, - binding: "AgentBinding", - descriptor: AgentRunnerDescriptor, - ) -> str: - """Build runner scope identity key from event and binding. - - Scope key structure: runner_id + binding_id - This ensures state is isolated per binding (not shared across bindings). - """ - binding_identity = self._get_binding_identity(binding) - - parts = [ - descriptor.id, - binding_identity, - ] - - return f'runner:{":".join(parts)}' - - def _get_scope_key_from_event( - self, - scope: str, - event: AgentEventEnvelope, - binding: "AgentBinding", - descriptor: AgentRunnerDescriptor, - ) -> str | None: - """Get the storage key for a given scope from event and binding. - - Returns None if required identity is missing for the scope. - """ - if scope == 'conversation': - return self._make_conversation_scope_key_from_event(event, binding, descriptor) - elif scope == 'actor': - return self._make_actor_scope_key_from_event(event, binding, descriptor) - elif scope == 'subject': - return self._make_subject_scope_key_from_event(event, binding, descriptor) - elif scope == 'runner': - return self._make_runner_scope_key_from_event(event, binding, descriptor) - else: - return None - - def _check_scope_enabled( - self, - scope: str, - binding: "AgentBinding", - ) -> bool: - """Check if a scope is enabled by binding's state_policy. - - Args: - scope: Scope to check - binding: Agent binding with state_policy - - Returns: - True if scope is enabled, False otherwise - """ - state_policy = binding.state_policy - - # Check if state is disabled entirely - if not state_policy.enable_state: - return False - - # Check if scope is in enabled scopes - return scope in state_policy.state_scopes - - def build_snapshot_from_event( - self, - event: AgentEventEnvelope, - binding: "AgentBinding", - descriptor: AgentRunnerDescriptor, - ) -> dict[str, dict[str, typing.Any]]: - """Build state snapshot for all scopes from event and binding. - - Respects binding.state_policy: - - If enable_state is False, returns all empty scopes. - - If a scope is not in state_scopes, returns empty dict for that scope. - - Args: - event: Event envelope - binding: Agent binding configuration - descriptor: Runner descriptor - - Returns: - Dict with 4 scope keys, each containing scope state dict. - Scopes without required identity or disabled by policy will have empty dict. - """ - state_policy = binding.state_policy - - # If state is disabled, return all empty scopes - if not state_policy.enable_state: - return { - 'conversation': {}, - 'actor': {}, - 'subject': {}, - 'runner': {}, - } - - snapshot: dict[str, dict[str, typing.Any]] = { - 'conversation': {}, - 'actor': {}, - 'subject': {}, - 'runner': {}, - } - - with self._lock: - for scope in VALID_STATE_SCOPES: - # Check if scope is enabled by policy - if not self._check_scope_enabled(scope, binding): - continue - - scope_key = self._get_scope_key_from_event(scope, event, binding, descriptor) - if scope_key: - scope_state = self._store.get(scope_key, {}) - snapshot[scope] = dict(scope_state) # Copy to avoid mutation - - # Seed external.conversation_id from event.conversation_id if not already set - # Only if conversation scope is enabled - if self._check_scope_enabled('conversation', binding) and event.conversation_id: - if 'external.conversation_id' not in snapshot['conversation']: - snapshot['conversation']['external.conversation_id'] = event.conversation_id - - return snapshot - - def apply_update_from_event( - self, - event: AgentEventEnvelope, - binding: "AgentBinding", - descriptor: AgentRunnerDescriptor, - scope: str, - key: str, - value: typing.Any, - logger: typing.Any = None, - ) -> bool: - """Apply a state update to the store from event and binding context. - - Respects binding.state_policy: - - If enable_state is False, rejects the update. - - If scope is not in state_scopes, rejects the update. - - Args: - event: Event envelope - binding: Agent binding configuration - descriptor: Runner descriptor - scope: State scope (conversation, actor, subject, runner) - key: State key (should use namespace prefix like external.*) - value: State value (must be JSON-serializable) - logger: Optional logger for warnings - - Returns: - True if update applied successfully, False if invalid scope, - missing identity, or disabled by policy - """ - state_policy = binding.state_policy - - # Check if state is disabled entirely - if not state_policy.enable_state: - if logger: - logger.warning( - f'Runner {descriptor.id} state.updated rejected: ' - f'state is disabled by binding policy' - ) - return False - - # Validate scope - if scope not in VALID_STATE_SCOPES: - if logger: - logger.warning( - f'Runner {descriptor.id} state.updated with invalid scope: {scope}. ' - f'Valid scopes: {", ".join(VALID_STATE_SCOPES)}' - ) - return False - - # Check if scope is enabled by policy - if not self._check_scope_enabled(scope, binding): - if logger: - logger.warning( - f'Runner {descriptor.id} state.updated rejected for scope "{scope}": ' - f'scope not enabled by binding policy. Enabled scopes: {state_policy.state_scopes}' - ) - return False - - # Map accepted key aliases - if key in STATE_KEY_ALIASES: - mapped_key = STATE_KEY_ALIASES[key] - if logger: - logger.debug( - f'Runner {descriptor.id} state.updated key alias "{key}" mapped to "{mapped_key}"' - ) - key = mapped_key - - # Get scope key from event and binding - scope_key = self._get_scope_key_from_event(scope, event, binding, descriptor) - if scope_key is None: - if logger: - logger.warning( - f'Runner {descriptor.id} state.updated for scope "{scope}" ' - f'requires missing identity (conversation_id, actor_id, or subject_id). ' - f'Skipping update.' - ) - return False - - # Apply update to store - with self._lock: - if scope_key not in self._store: - self._store[scope_key] = {} - self._store[scope_key][key] = value - - if logger: - logger.debug( - f'Runner {descriptor.id} state.updated: scope={scope}, key={key}' - ) - - return True - - -# Global singleton state store -_state_store: RunnerScopedStateStore | None = None -_state_store_lock = threading.Lock() - - -def get_state_store() -> RunnerScopedStateStore: - """Get the global state store singleton.""" - global _state_store - with _state_store_lock: - if _state_store is None: - _state_store = RunnerScopedStateStore() - return _state_store - - -def reset_state_store() -> None: - """Reset the global state store (for testing).""" - global _state_store - with _state_store_lock: - _state_store = None diff --git a/src/langbot/pkg/entity/persistence/agent_runner_state.py b/src/langbot/pkg/entity/persistence/agent_runner_state.py index adc71ff8..9d91c1d3 100644 --- a/src/langbot/pkg/entity/persistence/agent_runner_state.py +++ b/src/langbot/pkg/entity/persistence/agent_runner_state.py @@ -21,8 +21,7 @@ class AgentRunnerState(Base): - subject: runner_id + binding_id + subject_type + subject_id - runner: runner_id + binding_id - This table persists state across runs, replacing the in-memory - RunnerScopedStateStore._store dict. + This table is the production store for AgentRunner state. """ __tablename__ = 'agent_runner_state' diff --git a/src/langbot/pkg/pipeline/msgtrun/round_policy.py b/src/langbot/pkg/pipeline/msgtrun/round_policy.py new file mode 100644 index 00000000..659ab13e --- /dev/null +++ b/src/langbot/pkg/pipeline/msgtrun/round_policy.py @@ -0,0 +1,34 @@ +"""Shared max-round message window helpers for Pipeline behavior.""" +from __future__ import annotations + +import typing + + +DEFAULT_MAX_ROUND = 10 + + +def get_max_round(config: dict[str, typing.Any]) -> typing.Any: + """Return the configured Pipeline max-round value.""" + return config.get('max-round', DEFAULT_MAX_ROUND) + + +def select_max_round_messages( + messages: list[typing.Any] | None, + max_round: typing.Any, +) -> list[typing.Any]: + """Select a bounded recent message window by user-round count.""" + if not messages: + return [] + + temp_messages: list[typing.Any] = [] + current_round = 0 + + for msg in messages[::-1]: + if current_round < max_round: + temp_messages.append(msg) + if getattr(msg, 'role', None) == 'user': + current_round += 1 + else: + break + + return temp_messages[::-1] diff --git a/src/langbot/pkg/pipeline/msgtrun/truncators/round.py b/src/langbot/pkg/pipeline/msgtrun/truncators/round.py index e44a4b29..78a55df1 100644 --- a/src/langbot/pkg/pipeline/msgtrun/truncators/round.py +++ b/src/langbot/pkg/pipeline/msgtrun/truncators/round.py @@ -3,7 +3,7 @@ from __future__ import annotations from .. import truncator import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query from ....agent.runner.config_migration import ConfigMigration -from ....agent.runner.context_packager import ( +from ..round_policy import ( get_max_round, select_max_round_messages, ) diff --git a/tests/unit_tests/agent/test_context_builder_params_state.py b/tests/unit_tests/agent/test_context_builder_params_state.py index 4f46bb3f..ae2152a7 100644 --- a/tests/unit_tests/agent/test_context_builder_params_state.py +++ b/tests/unit_tests/agent/test_context_builder_params_state.py @@ -1,67 +1,11 @@ -"""Tests for agent run context builder params and state.""" +"""Tests for Pipeline adapter params and prompt packaging.""" from __future__ import annotations -import pytest - -from langbot.pkg.agent.runner.context_builder import AgentRunContextBuilder -from langbot.pkg.agent.runner.descriptor import AgentRunnerDescriptor -from langbot.pkg.agent.runner.state_store import reset_state_store - -# Import shared test fixtures from conftest.py -from .conftest import make_resources - - -class FakeApplication: - """Fake Application for testing.""" - def __init__(self): - class FakeLogger: - def info(self, msg): - pass - def debug(self, msg): - pass - def warning(self, msg): - pass - def error(self, msg): - pass - - class FakeVersionManager: - def get_current_version(self): - return '1.0.0' - - self.logger = FakeLogger() - self.ver_mgr = FakeVersionManager() - - -def make_descriptor() -> AgentRunnerDescriptor: - """Create a test descriptor.""" - return AgentRunnerDescriptor( - id='plugin:langbot/local-agent/default', - source='plugin', - label={'en_US': 'Local Agent'}, - plugin_author='langbot', - plugin_name='local-agent', - runner_name='default', - protocol_version='1', - capabilities={'streaming': True}, - ) - - -class FakeSession: - """Fake session for testing.""" - def __init__(self): - self.launcher_type = type('LauncherType', (), {'value': 'telegram'})() - self.launcher_id = 'group_123' - self.using_conversation = None - - -class FakeConversation: - """Fake conversation for testing.""" - def __init__(self, uuid: str = 'conv_abc'): - self.uuid = uuid +from langbot.pkg.agent.runner.pipeline_adapter import PipelineAdapter class FakeMessage: - """Fake message for testing.""" + """Fake prompt/history message.""" def __init__(self, content='Hello'): self.content = content self.role = 'user' @@ -76,32 +20,14 @@ class FakePrompt: self.messages = messages or [] -class FakeAdapter: - """Fake adapter with streaming capability.""" - async def is_stream_output_supported(self): - return True - - class TestBuildParams: - """Tests for _build_params filtering.""" + """Tests for PipelineAdapter.build_params filtering.""" def test_params_empty_when_no_variables(self): - """Empty variables should produce empty params.""" - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - - query = type('Query', (), { - 'variables': None, - })() - - params = builder._build_params(query) - assert params == {} + query = type('Query', (), {'variables': None})() + assert PipelineAdapter.build_params(query) == {} def test_params_filters_underscore_prefix(self): - """Params should exclude variables starting with underscore.""" - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - query = type('Query', (), { 'variables': { '_internal_var': 'should_be_excluded', @@ -111,18 +37,13 @@ class TestBuildParams: }, })() - params = builder._build_params(query) + params = PipelineAdapter.build_params(query) assert '_internal_var' not in params assert '_pipeline_bound_plugins' not in params assert '_monitoring_bot_name' not in params - assert 'public_var' in params assert params['public_var'] == 'should_be_included' def test_params_filters_sensitive_naming(self): - """Params should exclude variables with sensitive naming patterns.""" - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - query = type('Query', (), { 'variables': { 'api_key': 'secret123', @@ -140,8 +61,7 @@ class TestBuildParams: }, })() - params = builder._build_params(query) - # All sensitive patterns should be excluded + params = PipelineAdapter.build_params(query) assert 'api_key' not in params assert 'API_KEY' not in params assert 'token' not in params @@ -152,15 +72,10 @@ class TestBuildParams: assert 'user_secret_key' not in params assert 'my_token_value' not in params assert 'user_password_hash' not in params - # Public vars should be included assert 'public_name' in params assert 'safe_value' in params def test_params_keeps_common_public_vars(self): - """Params should keep common public business vars.""" - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - query = type('Query', (), { 'variables': { 'launcher_type': 'telegram', @@ -174,8 +89,7 @@ class TestBuildParams: }, })() - params = builder._build_params(query) - # All these should be included + params = PipelineAdapter.build_params(query) assert params['launcher_type'] == 'telegram' assert params['launcher_id'] == 'group_123' assert params['sender_id'] == 'user_001' @@ -186,10 +100,6 @@ class TestBuildParams: assert params['user_message_text'] == 'Hello world' def test_params_filters_non_json_serializable(self): - """Params should keep only JSON-serializable values.""" - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - class CustomObject: pass @@ -202,11 +112,11 @@ class TestBuildParams: 'null_value': None, 'list_value': ['a', 'b', 'c'], 'dict_value': {'nested': 'value'}, - 'custom_object': CustomObject(), # Not serializable + 'custom_object': CustomObject(), }, })() - params = builder._build_params(query) + params = PipelineAdapter.build_params(query) assert 'string_value' in params assert 'int_value' in params assert 'float_value' in params @@ -217,288 +127,53 @@ class TestBuildParams: assert 'custom_object' not in params def test_params_filters_nested_non_serializable(self): - """Params should filter nested non-serializable values.""" - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - class CustomObject: pass query = type('Query', (), { 'variables': { - 'nested_list_with_bad': ['a', CustomObject(), 'c'], # List with non-serializable - 'nested_dict_with_bad': {'good': 'value', 'bad': CustomObject()}, # Dict with non-serializable + 'nested_list_with_bad': ['a', CustomObject(), 'c'], + 'nested_dict_with_bad': {'good': 'value', 'bad': CustomObject()}, 'good_nested_list': ['a', ['b', 'c']], 'good_nested_dict': {'outer': {'inner': 'value'}}, }, })() - params = builder._build_params(query) - # Nested with bad should be excluded + params = PipelineAdapter.build_params(query) assert 'nested_list_with_bad' not in params assert 'nested_dict_with_bad' not in params - # Good nested should be included assert 'good_nested_list' in params assert 'good_nested_dict' in params - def test_is_json_serializable_primitives(self): - """_is_json_serializable should return True for primitives.""" - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - - assert builder._is_json_serializable(None) is True - assert builder._is_json_serializable('string') is True - assert builder._is_json_serializable(42) is True - assert builder._is_json_serializable(3.14) is True - assert builder._is_json_serializable(True) is True - assert builder._is_json_serializable(False) is True - - def test_is_json_serializable_collections(self): - """_is_json_serializable should check nested collections.""" - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - - assert builder._is_json_serializable([]) is True - assert builder._is_json_serializable(['a', 'b']) is True - assert builder._is_json_serializable({}) is True - assert builder._is_json_serializable({'key': 'value'}) is True - assert builder._is_json_serializable([1, 2, [3, 4]]) is True - assert builder._is_json_serializable({'a': {'b': 'c'}}) is True - - def test_is_json_serializable_custom_objects(self): - """_is_json_serializable should return False for custom objects.""" - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) + def test_is_json_serializable_primitives_and_collections(self): + assert PipelineAdapter.is_json_serializable(None) is True + assert PipelineAdapter.is_json_serializable('string') is True + assert PipelineAdapter.is_json_serializable(42) is True + assert PipelineAdapter.is_json_serializable(['a', 'b']) is True + assert PipelineAdapter.is_json_serializable({'key': 'value'}) is True + assert PipelineAdapter.is_json_serializable((1, 2, 3)) is True + def test_is_json_serializable_rejects_sets_and_objects(self): class CustomObject: pass - assert builder._is_json_serializable(CustomObject()) is False - assert builder._is_json_serializable([CustomObject()]) is False - assert builder._is_json_serializable({'key': CustomObject()}) is False + assert PipelineAdapter.is_json_serializable(CustomObject()) is False + assert PipelineAdapter.is_json_serializable({1, 2, 3}) is False + assert PipelineAdapter.is_json_serializable([1, {2, 3}]) is False + assert PipelineAdapter.is_json_serializable({'key': {1, 2}}) is False - def test_is_json_serializable_set_not_allowed(self): - """_is_json_serializable should return False for set (not JSON-serializable). - json.dumps({"x": {1}}) fails because set is not JSON-serializable. - Only list and tuple are allowed. - """ - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) +class TestBuildPrompt: + """Tests for PipelineAdapter.build_prompt.""" - # set is NOT JSON-serializable - assert builder._is_json_serializable({1, 2, 3}) is False - assert builder._is_json_serializable({'a', 'b'}) is False - # list and tuple ARE allowed - assert builder._is_json_serializable([1, 2, 3]) is True - assert builder._is_json_serializable((1, 2, 3)) is True - # Nested set should also be rejected - assert builder._is_json_serializable([1, {2, 3}]) is False - assert builder._is_json_serializable({'key': {1, 2}}) is False - - def test_params_filters_set_values(self): - """Params should filter out variables with set values. - - set is not JSON-serializable and would cause json.dumps to fail. - """ - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) + def test_prompt_empty_when_missing(self): + query = type('Query', (), {})() + assert PipelineAdapter.build_prompt(query) == [] + def test_prompt_serializes_messages(self): query = type('Query', (), { - 'variables': { - 'list_value': ['a', 'b', 'c'], - 'tuple_value': ('a', 'b', 'c'), - 'set_value': {'a', 'b', 'c'}, # Should be filtered - 'nested_with_set': ['a', {'b', 'c'}], # Should be filtered - 'dict_with_set': {'items': {1, 2}}, # Should be filtered - }, - })() - - params = builder._build_params(query) - # list and tuple should be included - assert 'list_value' in params - assert params['list_value'] == ['a', 'b', 'c'] - assert 'tuple_value' in params - # set should be filtered - assert 'set_value' not in params - assert 'nested_with_set' not in params - assert 'dict_with_set' not in params - - -class TestBuildState: - """Tests for state snapshot building.""" - - @pytest.mark.asyncio - async def test_context_has_state_field(self): - """AgentRunContext should have state field.""" - reset_state_store() - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - descriptor = make_descriptor() - resources = make_resources() - - session = FakeSession() - query = type('Query', (), { - 'query_id': 1, - 'bot_uuid': 'bot_001', - 'pipeline_uuid': 'pipeline_001', - 'sender_id': 'user_001', - 'session': session, - 'user_message': None, - 'message_chain': None, - 'messages': [], - 'pipeline_config': {}, - 'variables': {}, - })() - - context = await builder.build_context(query, descriptor, resources) - - assert 'state' in context - assert 'conversation' in context['state'] - assert 'actor' in context['state'] - assert 'subject' in context['state'] - assert 'runner' in context['state'] - - @pytest.mark.asyncio - async def test_state_seeds_conversation_id_from_existing(self): - """State should seed external.conversation_id from existing conversation uuid.""" - reset_state_store() - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - descriptor = make_descriptor() - resources = make_resources() - - conversation = FakeConversation(uuid='conv_existing') - session = FakeSession() - session.using_conversation = conversation - query = type('Query', (), { - 'query_id': 1, - 'bot_uuid': 'bot_001', - 'pipeline_uuid': 'pipeline_001', - 'sender_id': 'user_001', - 'session': session, - 'user_message': None, - 'message_chain': None, - 'messages': [], - 'pipeline_config': {}, - 'variables': {}, - })() - - context = await builder.build_context(query, descriptor, resources) - - assert context['state']['conversation']['external.conversation_id'] == 'conv_existing' - - -class TestBuildParamsInContext: - """Tests for params in full context.""" - - @pytest.mark.asyncio - async def test_context_has_params_field(self): - """AgentRunContext should have params field.""" - reset_state_store() - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - descriptor = make_descriptor() - resources = make_resources() - - session = FakeSession() - query = type('Query', (), { - 'query_id': 1, - 'bot_uuid': 'bot_001', - 'pipeline_uuid': 'pipeline_001', - 'sender_id': 'user_001', - 'session': session, - 'user_message': None, - 'message_chain': None, - 'messages': [], - 'pipeline_config': {}, - 'variables': { - 'public_param': 'value', - '_private': 'excluded', - }, - })() - - context = await builder.build_context(query, descriptor, resources) - - # Protocol v1: params is in adapter.extra - assert 'adapter' in context - assert 'extra' in context['adapter'] - assert 'params' in context['adapter']['extra'] - assert context['adapter']['extra']['params']['public_param'] == 'value' - assert '_private' not in context['adapter']['extra']['params'] - - @pytest.mark.asyncio - async def test_params_and_state_both_present(self): - """Context should have both params and state.""" - reset_state_store() - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - descriptor = make_descriptor() - resources = make_resources() - - conversation = FakeConversation(uuid='conv_abc') - session = FakeSession() - session.using_conversation = conversation - query = type('Query', (), { - 'query_id': 1, - 'bot_uuid': 'bot_001', - 'pipeline_uuid': 'pipeline_001', - 'sender_id': 'user_001', - 'session': session, - 'user_message': None, - 'message_chain': None, - 'messages': [], - 'pipeline_config': {}, - 'variables': { - 'workflow_input': 'user_question', - 'sender_name': 'John', - }, - })() - - context = await builder.build_context(query, descriptor, resources) - - # Protocol v1: params is in adapter.extra - assert 'adapter' in context - assert 'extra' in context['adapter'] - assert 'params' in context['adapter']['extra'] - assert context['adapter']['extra']['params']['workflow_input'] == 'user_question' - assert context['adapter']['extra']['params']['sender_name'] == 'John' - - # state should have seeded conversation_id - assert 'state' in context - assert context['state']['conversation']['external.conversation_id'] == 'conv_abc' - - @pytest.mark.asyncio - async def test_context_includes_effective_prompt_and_runtime_capabilities(self): - """Context should expose host-preprocessed prompt and adapter capabilities.""" - reset_state_store() - ap = FakeApplication() - builder = AgentRunContextBuilder(ap) - descriptor = make_descriptor() - resources = make_resources() - - session = FakeSession() - query = type('Query', (), { - 'query_id': 1, - 'bot_uuid': 'bot_001', - 'pipeline_uuid': 'pipeline_001', - 'sender_id': 'user_001', - 'session': session, - 'user_message': None, - 'message_chain': None, - 'messages': [], 'prompt': FakePrompt([FakeMessage('Effective prompt')]), - 'adapter': FakeAdapter(), - 'pipeline_config': {'output': {'misc': {'remove-think': True}}}, - 'variables': {}, })() - context = await builder.build_context(query, descriptor, resources) - - # Protocol v1: prompt is in adapter.extra - assert 'adapter' in context - assert 'extra' in context['adapter'] - assert 'prompt' in context['adapter']['extra'] - assert context['adapter']['extra']['prompt'][0]['content'] == 'Effective prompt' - assert context['runtime']['metadata']['streaming_supported'] is True - assert context['runtime']['metadata']['remove_think'] is True + prompt = PipelineAdapter.build_prompt(query) + assert prompt == [{'role': 'user', 'content': 'Effective prompt'}] diff --git a/tests/unit_tests/agent/test_context_builder_state.py b/tests/unit_tests/agent/test_context_builder_state.py index 0fdf2a53..b69246af 100644 --- a/tests/unit_tests/agent/test_context_builder_state.py +++ b/tests/unit_tests/agent/test_context_builder_state.py @@ -126,7 +126,7 @@ class TestContextAccessStateDetermination: @pytest.mark.asyncio async def test_no_binding_sets_state_false(self, mock_app, mock_event, mock_descriptor): - """ContextAccess.state=False when binding is None (legacy mode).""" + """ContextAccess.state=False when no binding is provided.""" builder = AgentRunContextBuilder(mock_app) # Real call without binding diff --git a/tests/unit_tests/agent/test_context_validation.py b/tests/unit_tests/agent/test_context_validation.py index fdc442cd..480b64b3 100644 --- a/tests/unit_tests/agent/test_context_validation.py +++ b/tests/unit_tests/agent/test_context_validation.py @@ -54,8 +54,9 @@ class TestContextValidation: event_type="message.received", event_time=1700000000, source="platform", + source_event_type="platform.message", bot_id="bot_1", - workspace_id=None, + workspace_id="workspace_1", conversation_id="conv_1", thread_id=None, actor=ActorContext( @@ -66,6 +67,7 @@ class TestContextValidation: subject=None, input=EventInput(text="Hello world"), delivery=DeliveryContext(surface="test"), + data={"platform_event_id": "source_evt_1"}, ) def _make_binding(self) -> AgentBinding: @@ -155,6 +157,13 @@ class TestContextValidation: assert validated.event.event_id == "evt_1" assert validated.event.event_type == "message.received" assert validated.event.source == "platform" + assert validated.event.source_event_type == "platform.message" + assert validated.event.data == {"platform_event_id": "source_evt_1"} + + # Verify conversation context uses SDK field names + assert validated.conversation is not None + assert validated.conversation.bot_id == "bot_1" + assert validated.conversation.workspace_id == "workspace_1" # Verify delivery context assert validated.delivery.surface == "test" diff --git a/tests/unit_tests/agent/test_event_first_protocol.py b/tests/unit_tests/agent/test_event_first_protocol.py index ee77007c..6dee9c30 100644 --- a/tests/unit_tests/agent/test_event_first_protocol.py +++ b/tests/unit_tests/agent/test_event_first_protocol.py @@ -88,6 +88,36 @@ class TestPipelineQueryToEventEnvelope: assert event.delivery.surface == "platform" assert isinstance(event.delivery.supports_streaming, bool) + def test_query_to_event_preserves_source_event_data(self, mock_query): + """Test source event metadata survives the adapter boundary.""" + source_event = Mock() + source_event.type = "platform.message.created" + source_event.time = 1700000000 + source_event.sender = None + source_event.model_dump = Mock(return_value={ + "type": "platform.message.created", + "message_id": "source-message-1", + "source_platform_object": {"large": "payload"}, + }) + mock_query.message_event = source_event + + event = PipelineAdapter.query_to_event(mock_query) + + assert event.source_event_type == "platform.message.created" + assert event.event_time == 1700000000 + assert event.data == { + "type": "platform.message.created", + "message_id": "source-message-1", + } + + def test_query_to_event_handles_missing_message_chain(self, mock_query): + """Test delivery context building when Query has no message_chain.""" + delattr(mock_query, "message_chain") + + event = PipelineAdapter.query_to_event(mock_query) + + assert event.delivery.reply_target == {"message_id": None} + class TestPipelineConfigToBinding: """Test Pipeline config -> AgentBinding conversion.""" diff --git a/tests/unit_tests/agent/test_orchestrator_integration.py b/tests/unit_tests/agent/test_orchestrator_integration.py index 1ed456ed..c1470280 100644 --- a/tests/unit_tests/agent/test_orchestrator_integration.py +++ b/tests/unit_tests/agent/test_orchestrator_integration.py @@ -11,10 +11,9 @@ from sqlalchemy.ext.asyncio import create_async_engine, AsyncEngine from langbot.pkg.agent.runner.descriptor import AgentRunnerDescriptor from langbot.pkg.agent.runner.errors import RunnerExecutionError -from langbot.pkg.agent.runner.context_builder import AgentRunContextBuilder from langbot.pkg.agent.runner.orchestrator import AgentRunOrchestrator +from langbot.pkg.agent.runner.pipeline_adapter import PipelineAdapter from langbot.pkg.agent.runner.session_registry import get_session_registry -from langbot.pkg.agent.runner.state_store import get_state_store, reset_state_store from langbot.pkg.agent.runner.persistent_state_store import reset_persistent_state_store from langbot_plugin.api.entities.builtin.platform import entities as platform_entities from langbot_plugin.api.entities.builtin.platform import events as platform_events @@ -227,7 +226,6 @@ def make_query(): def test_context_builder_includes_consumable_base64_attachments(): - builder = AgentRunContextBuilder(ap=types.SimpleNamespace()) query = make_query() query.user_message = provider_message.Message( role="user", @@ -241,20 +239,15 @@ def test_context_builder_includes_consumable_base64_attachments(): [platform_message.Image(base64="data:image/jpeg;base64,aGVsbG8=")] ) - input_data = builder._build_input(query) - attachments = input_data["attachments"] + input_data = PipelineAdapter._build_input(query) - image_attachment = next(item for item in attachments if item["type"] == "image" and item["source"] == "base64") - file_attachment = next(item for item in attachments if item["type"] == "file" and item["source"] == "base64") - chain_attachment = next(item for item in attachments if item["source"] == "message_chain") + assert input_data.contents[0].text == "see attached" + assert input_data.contents[1].image_base64 == "data:image/png;base64,aGVsbG8=" + assert input_data.contents[2].file_base64 == "data:text/plain;base64,aGVsbG8=" - assert image_attachment["content"] == "data:image/png;base64,aGVsbG8=" - assert image_attachment["content_type"] == "image/png" - assert file_attachment["content"] == "data:text/plain;base64,aGVsbG8=" - assert file_attachment["content_type"] == "text/plain" - assert file_attachment["name"] == "hello.txt" - assert chain_attachment["content"] == "data:image/jpeg;base64,aGVsbG8=" - assert chain_attachment["content_type"] == "image/jpeg" + artifact_types = [attachment.artifact_type for attachment in input_data.attachments] + assert artifact_types == ["image", "file", "image"] + assert input_data.attachments[1].name == "hello.txt" @pytest.fixture(autouse=True) @@ -262,7 +255,6 @@ async def clean_agent_state(): """Reset all singleton stores and create a test database engine.""" from langbot.pkg.entity.persistence.base import Base - reset_state_store() reset_persistent_state_store() registry = get_session_registry() for session in await registry.list_active_runs(): @@ -280,7 +272,6 @@ async def clean_agent_state(): # Cleanup for session in await registry.list_active_runs(): await registry.unregister(session["run_id"]) - reset_state_store() reset_persistent_state_store() await test_engine.dispose() @@ -378,7 +369,7 @@ async def test_orchestrator_packages_max_round_without_mutating_query(clean_agen "message 3", "response 3", ] - # Also in adapter.adapter_messages for transition runners + # Also exposed in adapter.adapter_messages for runners that consume adapter bootstrap. assert [message["content"] for message in context["adapter"]["adapter_messages"]] == [ "message 2", "response 2", @@ -453,10 +444,7 @@ async def test_orchestrator_applies_state_updates_and_suppresses_protocol_event( messages = [message async for message in orchestrator.run_from_query(query)] assert [message.content for message in messages] == ["state saved"] - # Note: State is now persisted via PersistentStateStore, not in-memory RunnerScopedStateStore - # The legacy behavior of updating query.session.using_conversation.uuid is no longer supported - # when using event-first path via run_from_query() -> run() - # Instead, state is persisted to the database via PersistentStateStore + # State is persisted to the database via PersistentStateStore. @pytest.mark.asyncio diff --git a/tests/unit_tests/agent/test_resource_builder.py b/tests/unit_tests/agent/test_resource_builder.py index e883965c..f7d546a0 100644 --- a/tests/unit_tests/agent/test_resource_builder.py +++ b/tests/unit_tests/agent/test_resource_builder.py @@ -7,6 +7,7 @@ from unittest.mock import AsyncMock, Mock import pytest from langbot.pkg.agent.runner.descriptor import AgentRunnerDescriptor +from langbot.pkg.agent.runner.pipeline_adapter import PipelineAdapter from langbot.pkg.agent.runner.resource_builder import AgentResourceBuilder @@ -47,6 +48,16 @@ def make_query(runner_config: dict, *, variables: dict | None = None, use_llm_mo }, variables=variables or {}, use_llm_model_uuid=use_llm_model_uuid, + pipeline_uuid='pipeline_001', + ) + + +async def build_resources(app, query, descriptor): + binding = PipelineAdapter.pipeline_config_to_binding(query, descriptor.id) + return await AgentResourceBuilder(app).build_resources_from_binding( + event=Mock(), + binding=binding, + descriptor=descriptor, ) @@ -93,7 +104,7 @@ async def test_build_models_authorizes_config_declared_llm_and_rerank_models(app 'rerank-model': 'rerank', }) - resources = await AgentResourceBuilder(app).build_resources(query, descriptor) + resources = await build_resources(app, query, descriptor) assert resources['models'] == [ {'model_id': 'primary', 'model_type': 'llm', 'provider': 'test-provider'}, @@ -120,7 +131,7 @@ async def test_build_models_still_honors_manifest_permissions(app): 'rerank-model': 'rerank', }) - resources = await AgentResourceBuilder(app).build_resources(query, descriptor) + resources = await build_resources(app, query, descriptor) assert resources['models'] == [] app.model_mgr.get_model_by_uuid.assert_not_awaited() @@ -143,6 +154,6 @@ async def test_build_models_deduplicates_query_and_config_models(app): use_llm_model_uuid='primary', ) - resources = await AgentResourceBuilder(app).build_resources(query, descriptor) + resources = await build_resources(app, query, descriptor) assert [model['model_id'] for model in resources['models']] == ['primary', 'fallback'] diff --git a/tests/unit_tests/agent/test_result_normalizer.py b/tests/unit_tests/agent/test_result_normalizer.py index 2ec86580..263b6106 100644 --- a/tests/unit_tests/agent/test_result_normalizer.py +++ b/tests/unit_tests/agent/test_result_normalizer.py @@ -242,6 +242,7 @@ class TestNormalizeNonMessageResults: result_dict = { 'type': 'state.updated', 'data': { + 'scope': 'conversation', 'key': 'external_conversation_id', 'value': 'abc123', }, @@ -340,4 +341,4 @@ class TestNormalizeInvalidResults: }, } result = await normalizer.normalize(result_dict, descriptor) - assert result is None \ No newline at end of file + assert result is None diff --git a/tests/unit_tests/agent/test_state_store.py b/tests/unit_tests/agent/test_state_store.py index 41dbd958..88e9d5fe 100644 --- a/tests/unit_tests/agent/test_state_store.py +++ b/tests/unit_tests/agent/test_state_store.py @@ -1,15 +1,23 @@ -"""Tests for runner scoped state store.""" +"""Tests for persistent AgentRunner state store.""" from __future__ import annotations -from langbot.pkg.agent.runner.state_store import ( - RunnerScopedStateStore, - get_state_store, - reset_state_store, - VALID_STATE_SCOPES, - STATE_KEY_ALIASES, -) +import os +import tempfile + +import pytest +from sqlalchemy.ext.asyncio import create_async_engine + from langbot.pkg.agent.runner.descriptor import AgentRunnerDescriptor -from langbot.pkg.agent.runner.host_models import AgentBinding, BindingScope, StatePolicy +from langbot.pkg.agent.runner.host_models import BindingScope, StatePolicy +from langbot.pkg.agent.runner.persistent_state_store import PersistentStateStore +from langbot.pkg.agent.runner.state_scope import ( + STATE_KEY_ALIASES, + VALID_STATE_SCOPES, + build_state_context, + build_state_scope_key, + get_binding_identity, + normalize_state_key, +) def make_descriptor(runner_id: str = 'plugin:test/my-runner/default') -> AgentRunnerDescriptor: @@ -26,469 +34,6 @@ def make_descriptor(runner_id: str = 'plugin:test/my-runner/default') -> AgentRu ) -class FakeSession: - """Fake session for testing.""" - def __init__(self): - self.launcher_type = type('LauncherType', (), {'value': 'telegram'})() - self.launcher_id = 'group_123' - self.using_conversation = None - - -class FakeConversation: - """Fake conversation for testing.""" - def __init__(self, uuid: str = 'conv_abc', create_time: int | None = None): - self.uuid = uuid - self.create_time = create_time - - -class FakeQuery: - """Fake query for testing.""" - def __init__( - self, - bot_uuid: str = 'bot_001', - pipeline_uuid: str = 'pipeline_002', - sender_id: str = 'user_123', - session: FakeSession | None = None, - ): - self.bot_uuid = bot_uuid - self.pipeline_uuid = pipeline_uuid - self.sender_id = sender_id - self.session = session or FakeSession() - - -class FakeLogger: - """Fake logger for testing.""" - def __init__(self): - self.debugs = [] - self.warnings = [] - - def debug(self, msg): - self.debugs.append(msg) - - def warning(self, msg): - self.warnings.append(msg) - - -class FakeBinding: - """Fake binding for testing event-first state.""" - def __init__( - self, - binding_id: str = 'binding_001', - state_policy: StatePolicy | None = None, - ): - self.binding_id = binding_id - self.scope = BindingScope(scope_type='pipeline', scope_id='pipeline_001') - self.state_policy = state_policy or StatePolicy() - - -class TestStateStoreBuildSnapshot: - """Tests for build_snapshot.""" - - def test_build_snapshot_returns_four_scopes(self): - """Snapshot should have all four scope keys.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - query = FakeQuery() - - snapshot = store.build_snapshot(query, descriptor) - - assert 'conversation' in snapshot - assert 'actor' in snapshot - assert 'subject' in snapshot - assert 'runner' in snapshot - assert snapshot['conversation'] == {} - assert snapshot['actor'] == {} - assert snapshot['subject'] == {} - assert snapshot['runner'] == {} - - def test_build_snapshot_seeds_conversation_id(self): - """Snapshot should seed external.conversation_id from existing conversation.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - conversation = FakeConversation(uuid='conv_existing') - session = FakeSession() - session.using_conversation = conversation - query = FakeQuery(session=session) - - snapshot = store.build_snapshot(query, descriptor) - - assert snapshot['conversation']['external.conversation_id'] == 'conv_existing' - - def test_build_snapshot_returns_stored_values(self): - """Snapshot should return previously stored values.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - query = FakeQuery() - logger = FakeLogger() - - # Store some values - store.apply_update(query, descriptor, 'conversation', 'external.conversation_id', 'conv_001', logger) - store.apply_update(query, descriptor, 'actor', 'preferred_language', 'zh', logger) - store.apply_update(query, descriptor, 'subject', 'group_topic', 'tech', logger) - store.apply_update(query, descriptor, 'runner', 'cache_version', 'v1', logger) - - # Build snapshot - snapshot = store.build_snapshot(query, descriptor) - - assert snapshot['conversation']['external.conversation_id'] == 'conv_001' - assert snapshot['actor']['preferred_language'] == 'zh' - assert snapshot['subject']['group_topic'] == 'tech' - assert snapshot['runner']['cache_version'] == 'v1' - - def test_build_snapshot_isolation_by_runner_id(self): - """Different runner IDs should have isolated state.""" - store = RunnerScopedStateStore() - descriptor1 = make_descriptor('plugin:test/runner-a/default') - descriptor2 = make_descriptor('plugin:test/runner-b/default') - query = FakeQuery() - logger = FakeLogger() - - # Store for runner-a - store.apply_update(query, descriptor1, 'conversation', 'external.conversation_id', 'conv_a', logger) - - # Build snapshot for runner-b - snapshot_b = store.build_snapshot(query, descriptor2) - - # runner-b should not see runner-a's state - assert snapshot_b['conversation'] == {} - - -class TestStateStoreApplyUpdate: - """Tests for apply_update.""" - - def test_apply_update_conversation_scope(self): - """Apply update to conversation scope.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - query = FakeQuery() - logger = FakeLogger() - - result = store.apply_update( - query, descriptor, 'conversation', 'external.conversation_id', 'conv_new', logger - ) - - assert result is True - assert len(logger.warnings) == 0 - - def test_apply_update_actor_scope(self): - """Apply update to actor scope.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - query = FakeQuery() - logger = FakeLogger() - - result = store.apply_update(query, descriptor, 'actor', 'preferred_language', 'en', logger) - - assert result is True - assert len(logger.warnings) == 0 - - def test_apply_update_subject_scope(self): - """Apply update to subject scope.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - query = FakeQuery() - logger = FakeLogger() - - result = store.apply_update(query, descriptor, 'subject', 'group_topic', 'general', logger) - - assert result is True - assert len(logger.warnings) == 0 - - def test_apply_update_runner_scope(self): - """Apply update to runner scope.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - query = FakeQuery() - logger = FakeLogger() - - result = store.apply_update(query, descriptor, 'runner', 'cache_version', 'v2', logger) - - assert result is True - assert len(logger.warnings) == 0 - - def test_apply_update_invalid_scope(self): - """Invalid scope should return False and log warning.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - query = FakeQuery() - logger = FakeLogger() - - result = store.apply_update(query, descriptor, 'invalid_scope', 'key', 'value', logger) - - assert result is False - assert len(logger.warnings) == 1 - assert 'invalid scope' in logger.warnings[0] - - def test_apply_update_state_key_alias(self): - """Alias key conversation_id should be mapped to external.conversation_id.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - query = FakeQuery() - logger = FakeLogger() - - result = store.apply_update(query, descriptor, 'conversation', 'conversation_id', 'conv_old', logger) - - assert result is True - assert 'mapped to' in logger.debugs[0] - - # Check mapped key is stored - snapshot = store.build_snapshot(query, descriptor) - assert snapshot['conversation']['external.conversation_id'] == 'conv_old' - - def test_apply_update_syncs_conversation_uuid(self): - """external.conversation_id update should sync to query.session.using_conversation.uuid.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - conversation = FakeConversation(uuid='conv_old') - session = FakeSession() - session.using_conversation = conversation - query = FakeQuery(session=session) - logger = FakeLogger() - - result = store.apply_update( - query, descriptor, 'conversation', 'external.conversation_id', 'conv_new', logger - ) - - assert result is True - assert conversation.uuid == 'conv_new' # Synced - assert 'Synced' in logger.debugs[-1] - - -class TestStateStoreScopeIdentity: - """Tests for scope identity isolation.""" - - def test_conversation_scope_includes_runner_id(self): - """Conversation scope key should include runner_id.""" - store = RunnerScopedStateStore() - descriptor_a = make_descriptor('plugin:test/runner-a/default') - descriptor_b = make_descriptor('plugin:test/runner-b/default') - query = FakeQuery() - logger = FakeLogger() - - # Store for runner-a - store.apply_update(query, descriptor_a, 'conversation', 'key', 'value_a', logger) - - # runner-b should not see runner-a's conversation state - snapshot_b = store.build_snapshot(query, descriptor_b) - assert snapshot_b['conversation'] == {} - - # runner-a should see its own state - snapshot_a = store.build_snapshot(query, descriptor_a) - assert snapshot_a['conversation']['key'] == 'value_a' - - def test_actor_scope_includes_sender_id(self): - """Actor scope should be isolated per sender_id.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - query_user1 = FakeQuery(sender_id='user_001') - query_user2 = FakeQuery(sender_id='user_002') - logger = FakeLogger() - - # Store for user_001 - store.apply_update(query_user1, descriptor, 'actor', 'preferred_language', 'en', logger) - - # user_002 should not see user_001's actor state - snapshot_user2 = store.build_snapshot(query_user2, descriptor) - assert snapshot_user2['actor'] == {} - - # user_001 should see its own state - snapshot_user1 = store.build_snapshot(query_user1, descriptor) - assert snapshot_user1['actor']['preferred_language'] == 'en' - - def test_subject_scope_includes_launcher(self): - """Subject scope should be isolated per launcher_type + launcher_id.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - session1 = FakeSession() - session1.launcher_type = type('LauncherType', (), {'value': 'telegram'})() - session1.launcher_id = 'group_001' - session2 = FakeSession() - session2.launcher_type = type('LauncherType', (), {'value': 'telegram'})() - session2.launcher_id = 'group_002' - query1 = FakeQuery(session=session1) - query2 = FakeQuery(session=session2) - logger = FakeLogger() - - # Store for group_001 - store.apply_update(query1, descriptor, 'subject', 'group_topic', 'tech', logger) - - # group_002 should not see group_001's subject state - snapshot2 = store.build_snapshot(query2, descriptor) - assert snapshot2['subject'] == {} - - # group_001 should see its own state - snapshot1 = store.build_snapshot(query1, descriptor) - assert snapshot1['subject']['group_topic'] == 'tech' - - def test_conversation_scope_not_dependent_on_external_uuid(self): - """Conversation scope identity should NOT use external conversation uuid. - - Using external uuid as scope key would cause state loss when - runner updates external.conversation_id: - - First run: state saved under key with old uuid - - Runner returns new external.conversation_id, synced to conversation.uuid - - Next run: scope key uses new uuid, previous state inaccessible - - This test verifies scope key stability when conversation.uuid changes. - """ - store = RunnerScopedStateStore() - descriptor = make_descriptor() - # Use stable create_time as conversation identity - conversation = FakeConversation(uuid='conv_initial', create_time=12345) - session = FakeSession() - session.using_conversation = conversation - query = FakeQuery(session=session) - logger = FakeLogger() - - # Store some conversation state (e.g., memory.summary, external.thread_id) - store.apply_update( - query, descriptor, 'conversation', 'memory.summary', 'Summary content', logger - ) - store.apply_update( - query, descriptor, 'conversation', 'external.thread_id', 'thread_abc', logger - ) - - # Simulate runner returning new external.conversation_id - store.apply_update( - query, descriptor, 'conversation', 'external.conversation_id', 'conv_new_from_runner', logger - ) - - # conversation.uuid is synced to new value - assert conversation.uuid == 'conv_new_from_runner' - - # Build new snapshot - previous state should still be accessible - # because scope key is based on stable identity (create_time), not external uuid - snapshot = store.build_snapshot(query, descriptor) - - # All previously stored state should still be present - assert snapshot['conversation']['memory.summary'] == 'Summary content' - assert snapshot['conversation']['external.thread_id'] == 'thread_abc' - assert snapshot['conversation']['external.conversation_id'] == 'conv_new_from_runner' - - def test_conversation_scope_with_create_time_stability(self): - """Conversation scope key should use create_time for stability. - - When create_time is available, it should be used as stable identity. - Different conversations with same launcher but different create_time - should have different scope keys. - """ - store = RunnerScopedStateStore() - descriptor = make_descriptor() - - # Two conversations with same launcher but different create_time - conversation1 = FakeConversation(uuid='conv_1', create_time=10000) - conversation2 = FakeConversation(uuid='conv_2', create_time=20000) - session1 = FakeSession() - session1.using_conversation = conversation1 - session2 = FakeSession() - session2.using_conversation = conversation2 - - query1 = FakeQuery(session=session1) - query2 = FakeQuery(session=session2) - logger = FakeLogger() - - # Store for conversation1 - store.apply_update(query1, descriptor, 'conversation', 'key', 'value1', logger) - - # conversation2 should not see conversation1's state (different create_time) - # Note: snapshot2 may have seeded external.conversation_id from conversation2.uuid - snapshot2 = store.build_snapshot(query2, descriptor) - assert 'key' not in snapshot2['conversation'] # No state from conversation1 - - # conversation1 should see its own state - snapshot1 = store.build_snapshot(query1, descriptor) - assert snapshot1['conversation']['key'] == 'value1' - - def test_conversation_scope_without_create_time_uses_launcher_identity(self): - """Conversation scope without create_time should use launcher identity. - - When create_time is not available, scope key should be based on - launcher (person/group) identity, assuming one active conversation - per launcher context. - """ - store = RunnerScopedStateStore() - descriptor = make_descriptor() - - # Conversation without create_time - conversation = FakeConversation(uuid='conv_1', create_time=None) - session = FakeSession() - session.using_conversation = conversation - query = FakeQuery(session=session) - logger = FakeLogger() - - # Store some state - store.apply_update(query, descriptor, 'conversation', 'key', 'value', logger) - - # State should be accessible - snapshot = store.build_snapshot(query, descriptor) - assert snapshot['conversation']['key'] == 'value' - - # Update external.conversation_id - store.apply_update( - query, descriptor, 'conversation', 'external.conversation_id', 'conv_2', logger - ) - - # State should still be accessible (scope key unchanged) - snapshot = store.build_snapshot(query, descriptor) - assert snapshot['conversation']['key'] == 'value' - assert snapshot['conversation']['external.conversation_id'] == 'conv_2' - - -class TestStateStoreGlobalSingleton: - """Tests for global singleton functions.""" - - def test_get_state_store_returns_singleton(self): - """get_state_store should return the same instance.""" - reset_state_store() - store1 = get_state_store() - store2 = get_state_store() - - assert store1 is store2 - - def test_reset_state_store_clears_singleton(self): - """reset_state_store should clear the singleton.""" - store1 = get_state_store() - reset_state_store() - store2 = get_state_store() - - assert store1 is not store2 - - def test_reset_state_store_clears_data(self): - """reset_state_store should clear stored data.""" - store = get_state_store() - descriptor = make_descriptor() - query = FakeQuery() - logger = FakeLogger() - - # Store some data - store.apply_update(query, descriptor, 'conversation', 'key', 'value', logger) - snapshot = store.build_snapshot(query, descriptor) - assert snapshot['conversation']['key'] == 'value' - - # Reset - reset_state_store() - store = get_state_store() - - # Data should be gone - snapshot = store.build_snapshot(query, descriptor) - assert snapshot['conversation'] == {} - - -class TestConstants: - """Tests for module constants.""" - - def test_valid_state_scopes(self): - """VALID_STATE_SCOPES should have four scopes.""" - assert VALID_STATE_SCOPES == ('conversation', 'actor', 'subject', 'runner') - - def test_state_key_aliases(self): - """STATE_KEY_ALIASES should map conversation_id.""" - assert STATE_KEY_ALIASES == {'conversation_id': 'external.conversation_id'} - - -# ========== Event-first Protocol v1 tests ========== - - class FakeActorContext: """Fake actor context for event testing.""" def __init__(self, actor_type: str = 'user', actor_id: str = 'user_123', actor_name: str = 'Test User'): @@ -499,44 +44,24 @@ class FakeActorContext: class FakeSubjectContext: """Fake subject context for event testing.""" - def __init__(self, subject_type: str = 'message', subject_id: str = 'msg_001', data: dict = None): + def __init__(self, subject_type: str = 'message', subject_id: str = 'msg_001', data: dict | None = None): self.subject_type = subject_type self.subject_id = subject_id self.data = data or {} -class FakeAgentInput: - """Fake agent input for event testing.""" - def __init__(self, text: str = 'Hello'): - self.text = text - self.contents = [] - self.message_chain = None - self.attachments = [] - - -class FakeDeliveryContext: - """Fake delivery context for event testing.""" - def __init__(self): - self.surface = 'platform' - self.reply_target = None - self.supports_streaming = True - self.supports_edit = False - self.supports_reaction = False - self.max_message_size = None - self.platform_capabilities = {} - - class FakeEventEnvelope: """Fake event envelope for testing event-first state.""" def __init__( self, event_id: str = 'evt_001', event_type: str = 'message.received', - conversation_id: str = 'conv_001', + conversation_id: str | None = 'conv_001', actor: FakeActorContext | None = None, subject: FakeSubjectContext | None = None, bot_id: str = 'bot_001', workspace_id: str = 'ws_001', + thread_id: str | None = None, ): self.event_id = event_id self.event_type = event_type @@ -545,609 +70,92 @@ class FakeEventEnvelope: self.bot_id = bot_id self.workspace_id = workspace_id self.conversation_id = conversation_id - self.thread_id = None + self.thread_id = thread_id self.actor = actor or FakeActorContext() self.subject = subject - self.input = FakeAgentInput() - self.delivery = FakeDeliveryContext() self.raw_ref = None -class TestStateStoreEventFirstBuildSnapshot: - """Tests for build_snapshot_from_event.""" +class FakeBinding: + """Fake binding for testing state.""" + def __init__( + self, + binding_id: str = 'binding_001', + state_policy: StatePolicy | None = None, + scope_type: str = 'pipeline', + scope_id: str = 'pipeline_001', + ): + self.binding_id = binding_id + self.scope = BindingScope(scope_type=scope_type, scope_id=scope_id) + self.state_policy = state_policy or StatePolicy() - def test_build_snapshot_returns_four_scopes(self): - """Snapshot from event should have all four scope keys.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope() - binding = FakeBinding() - - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - - assert 'conversation' in snapshot - assert 'actor' in snapshot - assert 'subject' in snapshot - assert 'runner' in snapshot - - def test_build_snapshot_seeds_conversation_id(self): - """Snapshot should seed external.conversation_id from event.conversation_id.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(conversation_id='conv_test') - binding = FakeBinding() - - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - - assert snapshot['conversation']['external.conversation_id'] == 'conv_test' - - def test_build_snapshot_without_conversation_id(self): - """Snapshot without conversation_id should have empty conversation scope.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(conversation_id=None) - binding = FakeBinding() - - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - - assert snapshot['conversation'] == {} - - def test_build_snapshot_without_actor(self): - """Snapshot without actor should have empty actor scope.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(actor=None) - binding = FakeBinding() - - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - - assert snapshot['actor'] == {} - - def test_build_snapshot_without_subject(self): - """Snapshot without subject should have empty subject scope.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(subject=None) - binding = FakeBinding() - - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - - assert snapshot['subject'] == {} - - def test_build_snapshot_returns_stored_values(self): - """Snapshot should return previously stored values via event.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(conversation_id='conv_001', actor=FakeActorContext(actor_id='user_001')) - # Use binding with all scopes enabled - binding = FakeBinding(state_policy=StatePolicy(state_scopes=['conversation', 'actor', 'subject', 'runner'])) - logger = FakeLogger() - - # Store values using event-first methods - store.apply_update_from_event(event, binding, descriptor, 'conversation', 'memory.summary', 'Summary', logger) - store.apply_update_from_event(event, binding, descriptor, 'actor', 'preferred_language', 'en', logger) - store.apply_update_from_event(event, binding, descriptor, 'runner', 'cache_version', 'v1', logger) - - # Build snapshot - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - - assert snapshot['conversation']['memory.summary'] == 'Summary' - assert snapshot['actor']['preferred_language'] == 'en' - assert snapshot['runner']['cache_version'] == 'v1' - - def test_build_snapshot_isolation_by_runner_id(self): - """Different runner IDs should have isolated state in event-first mode.""" - store = RunnerScopedStateStore() - descriptor1 = make_descriptor('plugin:test/runner-a/default') - descriptor2 = make_descriptor('plugin:test/runner-b/default') - event = FakeEventEnvelope(conversation_id='conv_001') - binding = FakeBinding() - logger = FakeLogger() - - # Store for runner-a - store.apply_update_from_event(event, binding, descriptor1, 'conversation', 'key', 'value_a', logger) - - # Build snapshot for runner-b - snapshot_b = store.build_snapshot_from_event(event, binding, descriptor2) - - # runner-b should not see runner-a's state (only external.conversation_id seeded) - assert snapshot_b['conversation'] == {'external.conversation_id': 'conv_001'} - - -class TestStateStoreEventFirstApplyUpdate: - """Tests for apply_update_from_event.""" - - def test_apply_update_conversation_scope(self): - """Apply update to conversation scope via event.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(conversation_id='conv_001') - binding = FakeBinding() - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'conversation', 'memory.summary', 'Summary', logger - ) - - assert result is True - assert len(logger.warnings) == 0 - - def test_apply_update_actor_scope(self): - """Apply update to actor scope via event.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(actor=FakeActorContext(actor_id='user_001')) - binding = FakeBinding() - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'actor', 'preferred_language', 'en', logger - ) - - assert result is True - assert len(logger.warnings) == 0 - - def test_apply_update_subject_scope(self): - """Apply update to subject scope via event.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(subject=FakeSubjectContext(subject_id='msg_001')) - binding = FakeBinding(state_policy=StatePolicy(state_scopes=['conversation', 'actor', 'subject', 'runner'])) - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'subject', 'group_topic', 'general', logger - ) - - assert result is True - assert len(logger.warnings) == 0 - - def test_apply_update_runner_scope(self): - """Apply update to runner scope via event (always works).""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope() # No special identity needed - binding = FakeBinding(state_policy=StatePolicy(state_scopes=['conversation', 'actor', 'subject', 'runner'])) - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'runner', 'cache_version', 'v2', logger - ) - - assert result is True - assert len(logger.warnings) == 0 - - def test_apply_update_invalid_scope(self): - """Invalid scope should return False and log warning.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope() - binding = FakeBinding() - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'invalid_scope', 'key', 'value', logger - ) - - assert result is False - assert len(logger.warnings) == 1 - assert 'invalid scope' in logger.warnings[0] - - def test_apply_update_conversation_missing_conversation_id(self): - """Conversation scope without conversation_id should return False.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(conversation_id=None) - binding = FakeBinding() - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'conversation', 'key', 'value', logger - ) - - assert result is False - assert len(logger.warnings) == 1 - assert 'missing identity' in logger.warnings[0] - - def test_apply_update_actor_missing_actor_id(self): - """Actor scope without actor_id should return False.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(actor=FakeActorContext(actor_id=None)) - binding = FakeBinding() - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'actor', 'key', 'value', logger - ) - - assert result is False - assert len(logger.warnings) == 1 - assert 'missing identity' in logger.warnings[0] - - def test_apply_update_subject_missing_subject_id(self): - """Subject scope without subject_id should return False.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(subject=FakeSubjectContext(subject_id=None)) - binding = FakeBinding(state_policy=StatePolicy(state_scopes=['conversation', 'actor', 'subject', 'runner'])) - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'subject', 'key', 'value', logger - ) - - assert result is False - assert len(logger.warnings) == 1 - assert 'missing identity' in logger.warnings[0] - - def test_apply_update_state_key_alias(self): - """Alias key conversation_id should be mapped to external.conversation_id.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(conversation_id='conv_001') - binding = FakeBinding() - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'conversation', 'conversation_id', 'conv_old', logger - ) - - assert result is True - assert 'mapped to' in logger.debugs[0] - - # Check mapped key is stored - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - assert snapshot['conversation']['external.conversation_id'] == 'conv_old' - - -class TestStateStoreEventFirstScopeIsolation: - """Tests for scope isolation in event-first mode.""" - - def test_conversation_scope_isolated_by_conversation_id(self): - """Conversation scope should be isolated by conversation_id.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - binding = FakeBinding() - event1 = FakeEventEnvelope(conversation_id='conv_001') - event2 = FakeEventEnvelope(conversation_id='conv_002') - logger = FakeLogger() - - # Store for conv_001 - store.apply_update_from_event(event1, binding, descriptor, 'conversation', 'key', 'value1', logger) - - # conv_002 should not see conv_001's state - snapshot2 = store.build_snapshot_from_event(event2, binding, descriptor) - assert snapshot2['conversation'] == {'external.conversation_id': 'conv_002'} - - # conv_001 should see its own state - snapshot1 = store.build_snapshot_from_event(event1, binding, descriptor) - assert snapshot1['conversation']['key'] == 'value1' - - def test_actor_scope_isolated_by_actor_id(self): - """Actor scope should be isolated by actor_type + actor_id.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - binding = FakeBinding() - event1 = FakeEventEnvelope(actor=FakeActorContext(actor_type='user', actor_id='user_001')) - event2 = FakeEventEnvelope(actor=FakeActorContext(actor_type='user', actor_id='user_002')) - logger = FakeLogger() - - # Store for user_001 - store.apply_update_from_event(event1, binding, descriptor, 'actor', 'preferred_language', 'en', logger) - - # user_002 should not see user_001's state - snapshot2 = store.build_snapshot_from_event(event2, binding, descriptor) - assert snapshot2['actor'] == {} - - # user_001 should see its own state - snapshot1 = store.build_snapshot_from_event(event1, binding, descriptor) - assert snapshot1['actor']['preferred_language'] == 'en' - - def test_subject_scope_isolated_by_subject_id(self): - """Subject scope should be isolated by subject_type + subject_id.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - binding = FakeBinding(state_policy=StatePolicy(state_scopes=['conversation', 'actor', 'subject', 'runner'])) - event1 = FakeEventEnvelope(subject=FakeSubjectContext(subject_type='message', subject_id='msg_001')) - event2 = FakeEventEnvelope(subject=FakeSubjectContext(subject_type='message', subject_id='msg_002')) - logger = FakeLogger() - - # Store for msg_001 - store.apply_update_from_event(event1, binding, descriptor, 'subject', 'key', 'value1', logger) - - # msg_002 should not see msg_001's state - snapshot2 = store.build_snapshot_from_event(event2, binding, descriptor) - assert snapshot2['subject'] == {} - - # msg_001 should see its own state - snapshot1 = store.build_snapshot_from_event(event1, binding, descriptor) - assert snapshot1['subject']['key'] == 'value1' - - def test_runner_scope_shared_within_runner(self): - """Runner scope should be shared within same runner across all events.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - binding = FakeBinding(state_policy=StatePolicy(state_scopes=['conversation', 'actor', 'subject', 'runner'])) - event1 = FakeEventEnvelope(conversation_id='conv_001') - event2 = FakeEventEnvelope(conversation_id='conv_002') - logger = FakeLogger() - - # Store for event1's runner scope - store.apply_update_from_event(event1, binding, descriptor, 'runner', 'cache_version', 'v1', logger) - - # event2 should see the same runner state - snapshot2 = store.build_snapshot_from_event(event2, binding, descriptor) - assert snapshot2['runner']['cache_version'] == 'v1' - - -class TestStateStoreEventFirstRoundTrip: - """Tests for state round trip: store -> read via event-first.""" - - def test_state_round_trip_conversation(self): - """State stored via event should be readable via event.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(conversation_id='conv_001') - binding = FakeBinding() - logger = FakeLogger() - - # Store - store.apply_update_from_event(event, binding, descriptor, 'conversation', 'memory.summary', 'Summary', logger) - - # Read - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - assert snapshot['conversation']['memory.summary'] == 'Summary' - - def test_state_round_trip_actor(self): - """Actor state stored via event should be readable via event.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(actor=FakeActorContext(actor_id='user_001')) - binding = FakeBinding() - logger = FakeLogger() - - # Store - store.apply_update_from_event(event, binding, descriptor, 'actor', 'preferred_language', 'zh', logger) - - # Read - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - assert snapshot['actor']['preferred_language'] == 'zh' - - def test_state_round_trip_subject(self): - """Subject state stored via event should be readable via event.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(subject=FakeSubjectContext(subject_id='msg_001')) - binding = FakeBinding(state_policy=StatePolicy(state_scopes=['conversation', 'actor', 'subject', 'runner'])) - logger = FakeLogger() - - # Store - store.apply_update_from_event(event, binding, descriptor, 'subject', 'group_topic', 'tech', logger) - - # Read - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - assert snapshot['subject']['group_topic'] == 'tech' - - def test_state_round_trip_runner(self): - """Runner state stored via event should be readable via event.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope() - binding = FakeBinding(state_policy=StatePolicy(state_scopes=['conversation', 'actor', 'subject', 'runner'])) - logger = FakeLogger() - - # Store - store.apply_update_from_event(event, binding, descriptor, 'runner', 'cache_version', 'v2', logger) - - # Read - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - assert snapshot['runner']['cache_version'] == 'v2' - - -class TestStateStoreBindingIsolation: - """Tests for binding isolation in event-first state.""" - - def test_conversation_state_isolated_by_binding_id(self): - """Same runner, same conversation_id, different binding_id: conversation state isolated.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() # Same runner - event = FakeEventEnvelope(conversation_id='conv_001') - binding_a = FakeBinding(binding_id='binding_a') - binding_b = FakeBinding(binding_id='binding_b') - logger = FakeLogger() - - # Store for binding_a - store.apply_update_from_event(event, binding_a, descriptor, 'conversation', 'key', 'value_a', logger) - - # binding_b should not see binding_a's state - snapshot_b = store.build_snapshot_from_event(event, binding_b, descriptor) - assert snapshot_b['conversation'] == {'external.conversation_id': 'conv_001'} - - # binding_a should see its own state - snapshot_a = store.build_snapshot_from_event(event, binding_a, descriptor) - assert snapshot_a['conversation']['key'] == 'value_a' - - def test_runner_state_isolated_by_binding_id(self): - """Same runner, different binding_id: runner state isolated.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() # Same runner - event = FakeEventEnvelope() - policy = StatePolicy(state_scopes=['conversation', 'actor', 'subject', 'runner']) - binding_a = FakeBinding(binding_id='binding_a', state_policy=policy) - binding_b = FakeBinding(binding_id='binding_b', state_policy=policy) - logger = FakeLogger() - - # Store for binding_a - store.apply_update_from_event(event, binding_a, descriptor, 'runner', 'cache_version', 'v1', logger) - - # binding_b should not see binding_a's runner state - snapshot_b = store.build_snapshot_from_event(event, binding_b, descriptor) - assert snapshot_b['runner'] == {} - - # binding_a should see its own state - snapshot_a = store.build_snapshot_from_event(event, binding_a, descriptor) - assert snapshot_a['runner']['cache_version'] == 'v1' - - def test_actor_state_isolated_by_binding_id(self): - """Same runner, same actor_id, different binding_id: actor state isolated.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(actor=FakeActorContext(actor_id='user_001')) - binding_a = FakeBinding(binding_id='binding_a') - binding_b = FakeBinding(binding_id='binding_b') - logger = FakeLogger() - - # Store for binding_a - store.apply_update_from_event(event, binding_a, descriptor, 'actor', 'preferred_language', 'en', logger) - - # binding_b should not see binding_a's state - snapshot_b = store.build_snapshot_from_event(event, binding_b, descriptor) - assert snapshot_b['actor'] == {} - - # binding_a should see its own state - snapshot_a = store.build_snapshot_from_event(event, binding_a, descriptor) - assert snapshot_a['actor']['preferred_language'] == 'en' - - -class TestStateStorePolicyEnforcement: - """Tests for state policy enforcement.""" - - def test_enable_state_false_returns_empty_snapshot(self): - """enable_state=False should return all empty scopes.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(conversation_id='conv_001') - policy = StatePolicy(enable_state=False) - binding = FakeBinding(state_policy=policy) - logger = FakeLogger() - - # Even if state exists, snapshot should be empty - store.apply_update_from_event( - event, FakeBinding(), descriptor, 'conversation', 'key', 'value', logger - ) - - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - assert snapshot['conversation'] == {} - assert snapshot['actor'] == {} - assert snapshot['subject'] == {} - assert snapshot['runner'] == {} - - def test_enable_state_false_rejects_update(self): - """enable_state=False should reject state updates.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope(conversation_id='conv_001') - policy = StatePolicy(enable_state=False) - binding = FakeBinding(state_policy=policy) - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'conversation', 'key', 'value', logger - ) - - assert result is False - assert len(logger.warnings) == 1 - assert 'disabled' in logger.warnings[0] - - def test_state_scopes_restricts_enabled_scopes(self): - """state_scopes should restrict which scopes are enabled.""" - store = RunnerScopedStateStore() + +class TestStateScopeHelpers: + """Tests for shared state scope helpers.""" + + def test_valid_state_scopes(self): + assert VALID_STATE_SCOPES == ('conversation', 'actor', 'subject', 'runner') + + def test_state_key_aliases(self): + assert STATE_KEY_ALIASES == {'conversation_id': 'external.conversation_id'} + assert normalize_state_key('conversation_id') == 'external.conversation_id' + assert normalize_state_key('external.session_id') == 'external.session_id' + + def test_binding_identity_uses_binding_id_first(self): + binding = FakeBinding(binding_id='binding_a') + assert get_binding_identity(binding) == 'binding_a' + + def test_binding_identity_falls_back_to_scope(self): + binding = FakeBinding(binding_id='', scope_type='workspace', scope_id='ws_001') + assert get_binding_identity(binding) == 'workspace:ws_001' + + def test_scope_key_building(self): descriptor = make_descriptor() + binding = FakeBinding(binding_id='binding_a') event = FakeEventEnvelope( conversation_id='conv_001', actor=FakeActorContext(actor_id='user_001'), + subject=FakeSubjectContext(subject_id='msg_001'), + thread_id='thread_001', ) - # Only allow conversation scope - policy = StatePolicy(state_scopes=['conversation']) - binding = FakeBinding(state_policy=policy) - logger = FakeLogger() - # Conversation update should work - result_conv = store.apply_update_from_event( - event, binding, descriptor, 'conversation', 'key', 'value_conv', logger + assert build_state_scope_key('conversation', event, binding, descriptor) == ( + 'conversation:plugin:test/my-runner/default:binding_a:conv_001:thread_001' ) - assert result_conv is True - - # Actor update should be rejected - result_actor = store.apply_update_from_event( - event, binding, descriptor, 'actor', 'key', 'value_actor', logger + assert build_state_scope_key('actor', event, binding, descriptor) == ( + 'actor:plugin:test/my-runner/default:binding_a:user:user_001' + ) + assert build_state_scope_key('subject', event, binding, descriptor) == ( + 'subject:plugin:test/my-runner/default:binding_a:message:msg_001' + ) + assert build_state_scope_key('runner', event, binding, descriptor) == ( + 'runner:plugin:test/my-runner/default:binding_a' ) - assert result_actor is False - assert any('not enabled' in w for w in logger.warnings) - def test_state_scopes_restricts_snapshot(self): - """state_scopes should restrict which scopes appear in snapshot.""" - store = RunnerScopedStateStore() + def test_scope_key_missing_identity_returns_none(self): descriptor = make_descriptor() + binding = FakeBinding() + event = FakeEventEnvelope(conversation_id=None, actor=None, subject=None) + + assert build_state_scope_key('conversation', event, binding, descriptor) is None + assert build_state_scope_key('subject', event, binding, descriptor) is None + assert build_state_scope_key('runner', event, binding, descriptor) is not None + + def test_build_state_context(self): + descriptor = make_descriptor() + binding = FakeBinding(binding_id='binding_a') event = FakeEventEnvelope( conversation_id='conv_001', actor=FakeActorContext(actor_id='user_001'), - ) - # Only allow conversation scope - policy = StatePolicy(state_scopes=['conversation']) - binding = FakeBinding(state_policy=policy) - logger = FakeLogger() - - # Store values for all scopes using a binding with all scopes enabled - full_binding = FakeBinding() - store.apply_update_from_event(event, full_binding, descriptor, 'conversation', 'conv_key', 'conv_val', logger) - store.apply_update_from_event(event, full_binding, descriptor, 'actor', 'actor_key', 'actor_val', logger) - - # Snapshot with restricted binding should only have conversation - snapshot = store.build_snapshot_from_event(event, binding, descriptor) - assert snapshot['conversation']['conv_key'] == 'conv_val' - assert snapshot['actor'] == {} # Not enabled by policy - - def test_default_state_scopes_conversation_and_actor(self): - """Default state_scopes should be conversation and actor only.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope( - conversation_id='conv_001', subject=FakeSubjectContext(subject_id='msg_001'), ) - binding = FakeBinding() # Uses default policy - logger = FakeLogger() - # Conversation should work (in default scopes) - result_conv = store.apply_update_from_event( - event, binding, descriptor, 'conversation', 'key', 'value', logger - ) - assert result_conv is True + context = build_state_context(event, binding, descriptor) - # Subject should be rejected (not in default scopes) - result_subject = store.apply_update_from_event( - event, binding, descriptor, 'subject', 'key', 'value', logger - ) - assert result_subject is False - - def test_runner_scope_restricted_by_policy(self): - """Runner scope should be restricted by state_scopes.""" - store = RunnerScopedStateStore() - descriptor = make_descriptor() - event = FakeEventEnvelope() - # Only allow conversation scope - policy = StatePolicy(state_scopes=['conversation']) - binding = FakeBinding(state_policy=policy) - logger = FakeLogger() - - result = store.apply_update_from_event( - event, binding, descriptor, 'runner', 'key', 'value', logger - ) - - assert result is False - assert any('not enabled' in w for w in logger.warnings) - - -# ========== Persistent State Store Tests ========== - - -import pytest -import asyncio -import tempfile -import os -from sqlalchemy.ext.asyncio import create_async_engine, AsyncEngine + assert context['binding_identity'] == 'binding_a' + assert context['conversation_id'] == 'conv_001' + assert context['actor_id'] == 'user_001' + assert set(context['scope_keys']) == {'conversation', 'actor', 'subject', 'runner'} class TestPersistentStateStore: @@ -1161,28 +169,24 @@ class TestPersistentStateStore: engine = create_async_engine(f'sqlite+aiosqlite:///{db_path}', echo=False) - # Create tables from langbot.pkg.entity.persistence.base import Base async with engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) yield engine - # Cleanup await engine.dispose() os.unlink(db_path) @pytest.fixture async def persistent_store(self, db_engine): """Create a persistent state store for testing.""" - from langbot.pkg.agent.runner.persistent_state_store import PersistentStateStore store = PersistentStateStore(db_engine) yield store await store.clear_all() @pytest.mark.asyncio async def test_build_snapshot_empty(self, persistent_store): - """Building snapshot from empty store returns empty scopes.""" descriptor = make_descriptor() event = FakeEventEnvelope(conversation_id='conv_001') binding = FakeBinding() @@ -1196,56 +200,45 @@ class TestPersistentStateStore: @pytest.mark.asyncio async def test_state_set_and_get(self, persistent_store): - """State set/get round trip.""" descriptor = make_descriptor() event = FakeEventEnvelope(conversation_id='conv_001') binding = FakeBinding() - # Set state success, error = await persistent_store.apply_update_from_event( event, binding, descriptor, 'conversation', 'test_key', {'nested': 'value'}, None ) assert success is True assert error is None - # Get via snapshot snapshot = await persistent_store.build_snapshot_from_event(event, binding, descriptor) assert snapshot['conversation']['test_key'] == {'nested': 'value'} @pytest.mark.asyncio async def test_binding_isolation(self, persistent_store): - """Different binding_id should have isolated state.""" descriptor = make_descriptor() event = FakeEventEnvelope(conversation_id='conv_001') binding_a = FakeBinding(binding_id='binding_a') binding_b = FakeBinding(binding_id='binding_b') - # Set for binding_a await persistent_store.apply_update_from_event( event, binding_a, descriptor, 'conversation', 'key', 'value_a', None ) - # binding_b should not see binding_a's state snapshot_b = await persistent_store.build_snapshot_from_event(event, binding_b, descriptor) assert snapshot_b['conversation'] == {'external.conversation_id': 'conv_001'} - # binding_a should see its own state snapshot_a = await persistent_store.build_snapshot_from_event(event, binding_a, descriptor) assert snapshot_a['conversation']['key'] == 'value_a' @pytest.mark.asyncio async def test_policy_disable_state(self, persistent_store): - """enable_state=False should return empty snapshot and reject updates.""" descriptor = make_descriptor() event = FakeEventEnvelope(conversation_id='conv_001') - policy = StatePolicy(enable_state=False) - binding = FakeBinding(state_policy=policy) + binding = FakeBinding(state_policy=StatePolicy(enable_state=False)) - # Snapshot should be empty snapshot = await persistent_store.build_snapshot_from_event(event, binding, descriptor) assert snapshot == {'conversation': {}, 'actor': {}, 'subject': {}, 'runner': {}} - # Update should be rejected success, error = await persistent_store.apply_update_from_event( event, binding, descriptor, 'conversation', 'key', 'value', None ) @@ -1254,22 +247,18 @@ class TestPersistentStateStore: @pytest.mark.asyncio async def test_policy_scope_restriction(self, persistent_store): - """state_scopes should restrict which scopes are accessible.""" descriptor = make_descriptor() event = FakeEventEnvelope( conversation_id='conv_001', actor=FakeActorContext(actor_id='user_001'), ) - policy = StatePolicy(state_scopes=['conversation']) # Only conversation - binding = FakeBinding(state_policy=policy) + binding = FakeBinding(state_policy=StatePolicy(state_scopes=['conversation'])) - # Conversation should work success_conv, _ = await persistent_store.apply_update_from_event( event, binding, descriptor, 'conversation', 'key', 'value_conv', None ) assert success_conv is True - # Actor should be rejected success_actor, error_actor = await persistent_store.apply_update_from_event( event, binding, descriptor, 'actor', 'key', 'value_actor', None ) @@ -1278,12 +267,10 @@ class TestPersistentStateStore: @pytest.mark.asyncio async def test_value_json_size_limit(self, persistent_store): - """Value exceeding size limit should be rejected.""" descriptor = make_descriptor() event = FakeEventEnvelope(conversation_id='conv_001') binding = FakeBinding() - # Create a large value (> 256KB) large_value = 'x' * (300 * 1024) success, error = await persistent_store.apply_update_from_event( @@ -1294,28 +281,22 @@ class TestPersistentStateStore: @pytest.mark.asyncio async def test_value_not_json_serializable(self, persistent_store): - """Non-JSON-serializable value should be rejected.""" descriptor = make_descriptor() event = FakeEventEnvelope(conversation_id='conv_001') binding = FakeBinding() - # Create a non-serializable value (set is not JSON-serializable) - non_serializable = {'key': {1, 2, 3}} - success, error = await persistent_store.apply_update_from_event( - event, binding, descriptor, 'conversation', 'key', non_serializable, None + event, binding, descriptor, 'conversation', 'key', {'key': {1, 2, 3}}, None ) assert success is False assert 'json' in error.lower() @pytest.mark.asyncio async def test_state_list(self, persistent_store): - """State list should return keys with optional prefix filter.""" descriptor = make_descriptor() event = FakeEventEnvelope(conversation_id='conv_001') binding = FakeBinding() - # Set multiple keys await persistent_store.apply_update_from_event( event, binding, descriptor, 'conversation', 'external.id', '123', None ) @@ -1326,17 +307,12 @@ class TestPersistentStateStore: event, binding, descriptor, 'conversation', 'memory.key', 'value', None ) - # Build scope key for list - from langbot.pkg.agent.runner.persistent_state_store import PersistentStateStore - temp_store = PersistentStateStore(None) - scope_key = temp_store._make_conversation_scope_key(event, binding, descriptor) + scope_key = build_state_scope_key('conversation', event, binding, descriptor) - # List all keys keys, has_more = await persistent_store.state_list(scope_key) assert len(keys) == 3 assert has_more is False - # List with prefix keys_ext, _ = await persistent_store.state_list(scope_key, prefix='external.') assert len(keys_ext) == 2 assert 'external.id' in keys_ext @@ -1344,31 +320,22 @@ class TestPersistentStateStore: @pytest.mark.asyncio async def test_state_delete(self, persistent_store): - """State delete should remove key.""" descriptor = make_descriptor() event = FakeEventEnvelope(conversation_id='conv_001') binding = FakeBinding() - # Set and verify await persistent_store.apply_update_from_event( event, binding, descriptor, 'conversation', 'key', 'value', None ) snapshot = await persistent_store.build_snapshot_from_event(event, binding, descriptor) assert snapshot['conversation']['key'] == 'value' - # Build scope key for delete - from langbot.pkg.agent.runner.persistent_state_store import PersistentStateStore - temp_store = PersistentStateStore(None) - scope_key = temp_store._make_conversation_scope_key(event, binding, descriptor) - - # Delete + scope_key = build_state_scope_key('conversation', event, binding, descriptor) deleted = await persistent_store.state_delete(scope_key, 'key') assert deleted is True - # Verify deleted snapshot = await persistent_store.build_snapshot_from_event(event, binding, descriptor) assert 'key' not in snapshot['conversation'] - # Delete non-existent should return False deleted_again = await persistent_store.state_delete(scope_key, 'key') assert deleted_again is False