refactor(agent-runner): tighten protocol v1 runtime boundaries

This commit is contained in:
huanghuoguoguo
2026-05-25 10:34:16 +08:00
parent cc911cc413
commit ab96070036
26 changed files with 548 additions and 3291 deletions

View File

@@ -5,16 +5,9 @@ import uuid
import time
import typing
from langbot_plugin.api.entities.builtin.pipeline import query as pipeline_query
from langbot_plugin.api.entities.builtin.platform import message as platform_message
from ...core import app
from .descriptor import AgentRunnerDescriptor
from .config_migration import ConfigMigration
from .context_packager import AgentContextPackager
from .state_store import get_state_store
from .persistent_state_store import get_persistent_state_store
from . import events as runner_events
from .host_models import AgentEventEnvelope, AgentBinding
@@ -33,12 +26,14 @@ class AgentTrigger(typing.TypedDict):
class ConversationContext(typing.TypedDict):
"""Conversation context."""
session_id: str | None
conversation_id: str | None
thread_id: str | None
launcher_type: str | None
launcher_id: str | None
sender_id: str | None
bot_uuid: str | None
bot_id: str | None
workspace_id: str | None
session_id: str | None
pipeline_uuid: str | None
@@ -145,36 +140,22 @@ class AgentRunContextPayload(typing.TypedDict):
class AgentRunContextBuilder:
"""Builder for provisioning AgentRunContext.
Two entry points:
- build_context_from_event(event, binding): Event-first Protocol v1
- build_context(query, descriptor, resources): Pipeline adapter Query-based entry
Responsibilities:
- Generate new run_id (UUID, not query id)
- Set trigger type based on source
- Build conversation context from session/event
- Build input from user_message/event
- Build params with filtering
- Build state snapshot from state_store
- Set trigger type based on event source
- Build conversation context from event
- Build input from event
- Build state snapshot from PersistentStateStore
- Build runtime context with host info, trace_id, deadline
- Set config from runner binding configuration
- Set config from runner binding configuration.
Pipeline Query adaptation belongs to PipelineAdapter, not this builder.
"""
ap: app.Application
# Params filtering rules
# Exclude variables starting with underscore (internal)
INTERNAL_PREFIX = '_'
# Exclude variables with sensitive naming patterns
SENSITIVE_PATTERNS = ('secret', 'token', 'key', 'password', 'credential', 'api_key', 'apikey')
# Exclude permission/control variables
PERMISSION_VARS = ('_pipeline_bound_plugins', '_authorized', '_permission')
def __init__(self, ap: app.Application):
self.ap = ap
self.context_packager = AgentContextPackager()
async def build_context_from_event(
self,
@@ -217,7 +198,8 @@ class AgentRunContextBuilder:
'launcher_type': None, # Will be filled from actor/subject if needed
'launcher_id': None,
'sender_id': event.actor.actor_id if event.actor else None,
'bot_uuid': event.bot_id,
'bot_id': event.bot_id,
'workspace_id': event.workspace_id,
'pipeline_uuid': binding.pipeline_uuid, # Pipeline adapter field
}
@@ -227,8 +209,9 @@ class AgentRunContextBuilder:
'event_type': event.event_type,
'event_time': event.event_time,
'source': event.source,
'source_event_type': None,
'data': {},
'source_event_type': event.source_event_type,
'raw_ref': event.raw_ref.model_dump(mode='json') if event.raw_ref else None,
'data': event.data,
}
# Build actor context
@@ -323,427 +306,6 @@ class AgentRunContextBuilder:
return context
async def build_context(
self,
query: pipeline_query.Query,
descriptor: AgentRunnerDescriptor,
resources: AgentResources,
) -> AgentRunContextPayload:
"""Build AgentRunContext envelope from Query.
This is a Pipeline adapter wrapper that converts Query to event + binding
and delegates to build_context_from_event().
For Protocol v1, messages are NOT inlined by default.
Pipeline max-round only affects bootstrap, NOT Protocol v1 entities.
Args:
query: Pipeline query
descriptor: Runner descriptor
resources: Built resources from AgentResourceBuilder
Returns:
AgentRunContext payload for the plugin runner
"""
# Resolve runner config for binding
runner_id = descriptor.id
runner_config = ConfigMigration.resolve_runner_config(
query.pipeline_config,
runner_id,
)
# Extract max_round for Pipeline adapter bootstrap (NOT Protocol v1)
# Note: config uses 'max-round' with hyphen, not 'max_round'
max_round = runner_config.get('max-round')
if max_round is None:
ai_config = query.pipeline_config.get('ai', {}) if query.pipeline_config else {}
max_round = ai_config.get('max-round')
# Build trigger
trigger: AgentTrigger = {
'type': runner_events.MESSAGE_RECEIVED,
'source': 'pipeline',
'timestamp': int(time.time()),
}
# Build conversation context
conversation: ConversationContext | None = None
session = getattr(query, 'session', None)
if session:
conversation = {
'session_id': f'{getattr(session, "launcher_type", "").value if hasattr(getattr(session, "launcher_type", ""), "value") else getattr(session, "launcher_type", "")}_{getattr(session, "launcher_id", "")}',
'conversation_id': getattr(getattr(session, 'using_conversation', None), 'uuid', None),
'launcher_type': getattr(session, 'launcher_type', None).value if hasattr(getattr(session, 'launcher_type', None), 'value') else getattr(session, 'launcher_type', None),
'launcher_id': getattr(session, 'launcher_id', None),
'sender_id': str(getattr(query, 'sender_id', '')) if getattr(query, 'sender_id', None) else None,
'bot_uuid': getattr(query, 'bot_uuid', None),
'pipeline_uuid': getattr(query, 'pipeline_uuid', None),
}
# Build input
input: AgentInput = self._build_input(query)
# Build params from query.variables with filtering
params = self._build_params(query)
# Build state snapshot from state_store
state_store = get_state_store()
state: AgentRunState = state_store.build_snapshot(query, descriptor)
streaming_supported = await self._is_stream_output_supported(query)
remove_think = query.pipeline_config.get('output', {}).get('misc', {}).get('remove-think', False) if query.pipeline_config else False
# Build runtime context
run_id = str(uuid.uuid4())
runtime: AgentRuntimeContext = {
'langbot_version': self.ap.ver_mgr.get_current_version(),
'sdk_protocol_version': descriptor.protocol_version,
'query_id': query.query_id,
'trace_id': run_id, # Use run_id as trace_id for now
'deadline_at': self._build_deadline(runner_config),
'metadata': {
'bot_name': query.variables.get('_monitoring_bot_name', 'Unknown') if query.variables else 'Unknown',
'pipeline_name': query.variables.get('_monitoring_pipeline_name', 'Unknown') if query.variables else 'Unknown',
'streaming_supported': streaming_supported,
'remove_think': remove_think,
},
}
# Build delivery context from query adapter capabilities
delivery_context = {
'surface': 'pipeline',
'reply_target': None,
'supports_streaming': streaming_supported,
'supports_edit': False,
'supports_reaction': False,
'max_message_size': None,
'platform_capabilities': {},
}
# Build context access for the direct Query adapter helper.
# The event-first run_from_query path uses build_context_from_event().
context_access = {
'conversation_id': conversation.get('conversation_id') if conversation else None,
'thread_id': None,
'latest_cursor': None,
'event_seq': None,
'transcript_seq': None,
'has_history_before': False,
'inline_policy': {
'mode': 'current_event',
'delivered_count': 0,
'source_total_count': None,
'messages_complete': False,
'reason': 'pipeline_adapter',
},
'available_apis': {
'history_page': False,
'history_search': False,
'event_get': False,
'event_page': False,
'artifact_metadata': False,
'artifact_read': False,
'state': False,
'storage': True,
},
}
# Build adapter context (for Pipeline adapter fields)
adapter_context = {
'query_id': query.query_id,
'pipeline_uuid': getattr(query, 'pipeline_uuid', None),
'max_round': max_round, # For reference only
'adapter_messages': [], # Will be filled if max_round is set
'extra': {
'params': params, # Put params in adapter.extra
'prompt': self._build_prompt(query), # Put prompt in adapter.extra
},
}
# Build bootstrap context (optional, for Pipeline adapter max-round)
bootstrap_context = None
# For Pipeline adapter: add bootstrap messages if max_round is set
# This goes into bootstrap.messages, NOT top-level messages
if max_round and max_round > 0:
packaged_context = self.context_packager.package_messages(query, runner_config)
adapter_messages = self._build_messages(packaged_context.messages)
# Put in bootstrap for Protocol v1
bootstrap_context = {
'messages': adapter_messages,
'summary': None,
'artifacts': [],
'metadata': {},
}
# Also update adapter for transition runners
adapter_context['adapter_messages'] = adapter_messages
# Update runtime metadata
runtime['metadata']['context_packaging'] = {
'policy': packaged_context.policy,
'history': packaged_context.history,
}
# Build full context - Protocol v1 structure
context: AgentRunContextPayload = {
'run_id': run_id,
'trigger': trigger,
'conversation': conversation,
'event': self._build_event(query), # REQUIRED
'actor': self._build_actor(query),
'subject': self._build_subject(query),
'input': input,
'delivery': delivery_context, # REQUIRED
'resources': resources,
'context': context_access, # ContextAccess - REQUIRED
'state': state,
'runtime': runtime,
'config': runner_config,
'bootstrap': bootstrap_context, # Optional bootstrap
'adapter': adapter_context, # Pipeline adapter context
'metadata': {}, # Additional metadata
}
return context
def _build_input(self, query: pipeline_query.Query) -> AgentInput:
"""Build AgentInput from query."""
text = None
text_parts: list[str] = []
contents: list[dict[str, typing.Any]] = []
if query.user_message:
# Extract text if content is single text element
if isinstance(query.user_message.content, list):
for elem in query.user_message.content:
contents.append(elem.model_dump(mode='json'))
if elem.type == 'text':
elem_text = getattr(elem, 'text', None)
if elem_text:
text_parts.append(elem_text)
else:
# Single string content
text = str(query.user_message.content)
contents.append({'type': 'text', 'text': text})
if text_parts:
text = ''.join(text_parts)
# Include message_chain for platform-specific format
message_chain_dict = None
if query.message_chain:
message_chain_dict = query.message_chain.model_dump(mode='json')
return {
'text': text,
'contents': contents,
'message_chain': message_chain_dict,
'attachments': self._build_attachments(query, contents),
}
def _build_attachments(
self,
query: pipeline_query.Query,
contents: list[dict[str, typing.Any]],
) -> list[dict[str, typing.Any]]:
"""Extract runner-consumable attachment data from input contents."""
attachments: list[dict[str, typing.Any]] = []
for elem in contents:
elem_type = elem.get('type')
if elem_type == 'image_url':
image_url = elem.get('image_url') or {}
attachments.append(
{
'type': 'image',
'source': 'url',
'url': image_url.get('url') if isinstance(image_url, dict) else str(image_url),
}
)
elif elem_type == 'image_base64':
image_base64 = elem.get('image_base64')
attachments.append(
{
'type': 'image',
'source': 'base64',
'content': image_base64,
'content_type': self._infer_base64_content_type(image_base64, 'image/jpeg'),
'name': 'image',
'has_content': bool(image_base64),
}
)
elif elem_type == 'file_url':
attachments.append(
{
'type': 'file',
'source': 'url',
'url': elem.get('file_url'),
'name': elem.get('file_name'),
}
)
elif elem_type == 'file_base64':
file_base64 = elem.get('file_base64')
attachments.append(
{
'type': 'file',
'source': 'base64',
'name': elem.get('file_name'),
'content': file_base64,
'content_type': self._infer_base64_content_type(file_base64, 'application/octet-stream'),
'has_content': bool(file_base64),
}
)
message_chain = getattr(query, 'message_chain', None)
if message_chain:
for component in message_chain:
if isinstance(component, platform_message.Image):
attachments.append(
{
'type': 'image',
'source': 'message_chain',
'id': component.image_id or None,
'url': component.url or None,
'path': str(component.path) if component.path else None,
'content': component.base64 or None,
'content_type': self._infer_base64_content_type(component.base64, 'image/jpeg'),
'name': 'image',
'has_content': bool(component.base64),
}
)
elif isinstance(component, platform_message.File):
attachments.append(
{
'type': 'file',
'source': 'message_chain',
'id': component.id or None,
'name': component.name or None,
'size': component.size or 0,
'url': component.url or None,
'path': component.path or None,
'content': component.base64 or None,
'content_type': self._infer_base64_content_type(component.base64, 'application/octet-stream'),
'has_content': bool(component.base64),
}
)
elif isinstance(component, platform_message.Voice):
attachments.append(
{
'type': 'voice',
'source': 'message_chain',
'id': component.voice_id or None,
'url': component.url or None,
'path': component.path or None,
'duration': component.length or 0,
'content': component.base64 or None,
'content_type': self._infer_base64_content_type(component.base64, 'audio/mpeg'),
'name': 'voice',
'has_content': bool(component.base64),
}
)
return attachments
def _infer_base64_content_type(self, value: typing.Any, default: str) -> str:
"""Infer MIME type from a data URL base64 value."""
if not isinstance(value, str):
return default
if value.startswith('data:') and ';base64,' in value:
return value[5:value.find(';base64,')] or default
return default
def _build_event(self, query: pipeline_query.Query) -> dict[str, typing.Any]:
"""Build a minimal EBA-compatible event envelope from the message query.
The public event_type must be a stable AgentRunner protocol name. Keep
platform or SDK class names inside event_data so future EventRouter
events can share the same top-level naming contract.
"""
message_event = getattr(query, 'message_event', None)
event_data: dict[str, typing.Any] = {}
if message_event and hasattr(message_event, 'model_dump'):
try:
event_data = message_event.model_dump(mode='json')
except TypeError:
event_data = message_event.model_dump()
except Exception:
event_data = {}
event_data.pop('source_platform_object', None)
source_event_type = getattr(message_event, 'type', None) if message_event else None
if source_event_type:
event_data.setdefault('source_event_type', source_event_type)
message_chain = getattr(query, 'message_chain', None)
message_id = getattr(message_chain, 'message_id', None)
if message_id == -1:
message_id = None
event_time = getattr(message_event, 'time', None) if message_event else None
event_timestamp = int(event_time) if isinstance(event_time, (int, float)) else None
return {
'event_type': runner_events.MESSAGE_RECEIVED,
'event_id': str(message_id or getattr(query, 'query_id', '')),
'event_timestamp': event_timestamp,
'event_data': event_data,
}
def _build_actor(self, query: pipeline_query.Query) -> dict[str, typing.Any]:
"""Build actor context for the sender that triggered the run."""
message_event = getattr(query, 'message_event', None)
sender = getattr(message_event, 'sender', None) if message_event else None
actor_id = getattr(sender, 'id', None) or getattr(query, 'sender_id', None)
actor_name = sender.get_name() if sender and hasattr(sender, 'get_name') else None
return {
'actor_type': 'user',
'actor_id': str(actor_id) if actor_id is not None else None,
'actor_name': actor_name,
}
def _build_subject(self, query: pipeline_query.Query) -> dict[str, typing.Any]:
"""Build subject context for the current message."""
message_chain = getattr(query, 'message_chain', None)
message_id = getattr(message_chain, 'message_id', None)
if message_id == -1:
message_id = None
launcher_type = getattr(query, 'launcher_type', None)
launcher_type_value = getattr(launcher_type, 'value', launcher_type)
return {
'subject_type': 'message',
'subject_id': str(message_id or getattr(query, 'query_id', '')),
'subject_data': {
'launcher_type': launcher_type_value,
'launcher_id': getattr(query, 'launcher_id', None),
'sender_id': str(getattr(query, 'sender_id', '')),
'bot_uuid': getattr(query, 'bot_uuid', None),
'pipeline_uuid': getattr(query, 'pipeline_uuid', None),
},
}
def _build_deadline(self, runner_config: dict[str, typing.Any]) -> float | None:
"""Build deadline timestamp from runner timeout config.
A missing timeout uses the host default. Explicit null, zero, or negative
values disable the total run deadline for advanced deployments.
"""
timeout = runner_config.get('timeout', DEFAULT_RUNNER_TIMEOUT_SECONDS)
if timeout is None:
return None
try:
timeout_seconds = float(timeout)
except (TypeError, ValueError):
return None
if timeout_seconds <= 0:
return None
return time.time() + timeout_seconds
def _build_deadline_from_binding(self, binding: AgentBinding) -> float | None:
"""Build deadline timestamp from binding timeout config.
@@ -767,106 +329,6 @@ class AgentRunContextBuilder:
return time.time() + timeout_seconds
async def _is_stream_output_supported(self, query: pipeline_query.Query) -> bool:
"""Check whether the current adapter can consume streaming chunks."""
try:
return await query.adapter.is_stream_output_supported()
except AttributeError:
return False
except Exception:
return False
def _build_prompt(self, query: pipeline_query.Query) -> list[dict[str, typing.Any]]:
"""Build effective prompt messages from query.prompt after preprocessing."""
prompt_messages: list[dict[str, typing.Any]] = []
prompt = getattr(query, 'prompt', None)
messages = getattr(prompt, 'messages', None)
if not messages:
return prompt_messages
for msg in messages:
prompt_messages.append(msg.model_dump(mode='json'))
return prompt_messages
def _build_messages(self, source_messages: list[typing.Any]) -> list[dict[str, typing.Any]]:
"""Build messages list from packaged source messages."""
messages: list[dict[str, typing.Any]] = []
for msg in source_messages:
messages.append(msg.model_dump(mode='json'))
return messages
def _build_params(self, query: pipeline_query.Query) -> dict[str, typing.Any]:
"""Build params from query.variables with filtering.
Filtering rules:
1. Exclude variables starting with underscore (internal)
2. Exclude variables with sensitive naming patterns (secret, token, key, password)
3. Exclude permission/control variables
4. Keep only JSON-serializable values
Args:
query: Pipeline query
Returns:
Filtered params dict
"""
params: dict[str, typing.Any] = {}
if not query.variables:
return params
for key, value in query.variables.items():
# Filter internal variables (starting with underscore)
if key.startswith(self.INTERNAL_PREFIX):
continue
# Filter sensitive naming patterns
key_lower = key.lower()
if any(pattern in key_lower for pattern in self.SENSITIVE_PATTERNS):
continue
# Filter permission variables
if any(key == perm_var or key.startswith(perm_var) for perm_var in self.PERMISSION_VARS):
continue
# Keep only JSON-serializable values
if self._is_json_serializable(value):
params[key] = value
return params
def _is_json_serializable(self, value: typing.Any) -> bool:
"""Check if value is JSON-serializable.
Note: set is NOT JSON-serializable. json.dumps({"x": {1}}) fails.
Only list and tuple are allowed as collection types.
Args:
value: Value to check
Returns:
True if JSON-serializable, False otherwise
"""
if value is None:
return True
if isinstance(value, (str, int, float, bool)):
return True
# Only allow list and tuple, NOT set (set is not JSON-serializable)
if isinstance(value, (list, tuple)):
return all(self._is_json_serializable(item) for item in value)
if isinstance(value, dict):
return all(
isinstance(k, str) and self._is_json_serializable(v)
for k, v in value.items()
)
# Pydantic models and other complex types are not directly serializable
# as params (they may have internal structure not meant for runners)
return False
async def _build_context_access(
self,
event: AgentEventEnvelope,
@@ -899,8 +361,7 @@ class AgentRunContextBuilder:
artifact_metadata_enabled = 'metadata' in artifact_permissions
artifact_read_enabled = 'read' in artifact_permissions
# Determine state API availability based on binding state_policy (event-first mode)
# Direct Query context builder does not expose persistent state API.
# Determine state API availability based on binding state_policy.
state_enabled = False
if binding is not None:
state_policy = binding.state_policy