Files
LangBot/src/langbot/pkg/agent/runner/context_builder.py
2026-06-04 11:11:39 +08:00

599 lines
22 KiB
Python

"""Agent run context builder for converting Query to AgentRunContext."""
from __future__ import annotations
import uuid
import time
import typing
from langbot_plugin.api.entities.builtin.pipeline import query as pipeline_query
from langbot_plugin.api.entities.builtin.platform import message as platform_message
from ...core import app
from .descriptor import AgentRunnerDescriptor
from .config_migration import ConfigMigration
from .state_store import get_state_store
from . import events as runner_events
# Internal models for the agent runner context protocol.
class AgentTrigger(typing.TypedDict):
"""Agent trigger information."""
type: str
source: str # 'pipeline' or 'event_router'
timestamp: int | None
class ConversationContext(typing.TypedDict):
"""Conversation context."""
session_id: str | None
conversation_id: str | None
launcher_type: str | None
launcher_id: str | None
sender_id: str | None
bot_uuid: str | None
pipeline_uuid: str | None
class AgentInput(typing.TypedDict):
"""Agent input."""
text: str | None
contents: list[dict[str, typing.Any]]
message_chain: dict[str, typing.Any] | None
attachments: list[dict[str, typing.Any]]
class AgentRunState(typing.TypedDict):
"""Agent run state with 4 scopes."""
conversation: dict[str, typing.Any]
actor: dict[str, typing.Any]
subject: dict[str, typing.Any]
runner: dict[str, typing.Any]
# Resource payload models matching langbot-plugin-sdk/resources.py.
class ModelResource(typing.TypedDict):
"""Model resource payload."""
model_id: str
model_type: str | None
provider: str | None
class ToolResource(typing.TypedDict):
"""Tool resource payload."""
tool_name: str
tool_type: str | None
description: str | None
class KnowledgeBaseResource(typing.TypedDict):
"""Knowledge base resource payload."""
kb_id: str
kb_name: str | None
kb_type: str | None
class FileResource(typing.TypedDict):
"""File resource payload."""
file_id: str
file_name: str | None
mime_type: str | None
source: str | None
class StorageResource(typing.TypedDict):
"""Storage resource payload."""
plugin_storage: bool
workspace_storage: bool
class AgentResources(typing.TypedDict):
"""Agent resources payload."""
models: list[ModelResource]
tools: list[ToolResource]
knowledge_bases: list[KnowledgeBaseResource]
files: list[FileResource]
storage: StorageResource
platform_capabilities: dict[str, typing.Any]
class AgentRuntimeContext(typing.TypedDict):
"""Agent runtime context."""
langbot_version: str | None
sdk_protocol_version: str
query_id: int | None
trace_id: str | None
deadline_at: int | None
metadata: dict[str, typing.Any]
class AgentRunContextPayload(typing.TypedDict):
"""AgentRunContext payload passed to an agent runner.
Note: The 'config' field contains the binding config from ai.runner_config[runner_id],
which is Pipeline's configuration for this specific runner binding (not plugin instance config).
"""
run_id: str
trigger: AgentTrigger
conversation: ConversationContext | None
event: dict[str, typing.Any] | None
actor: dict[str, typing.Any] | None
subject: dict[str, typing.Any] | None
messages: list[dict[str, typing.Any]]
prompt: list[dict[str, typing.Any]]
input: AgentInput
params: dict[str, typing.Any]
resources: AgentResources
state: AgentRunState
runtime: AgentRuntimeContext
config: dict[str, typing.Any] # Binding config from ai.runner_config[runner_id]
class AgentRunContextBuilder:
"""Builder for converting Query to AgentRunContext.
Responsibilities:
- Generate new run_id (UUID, not query id)
- Set trigger type to 'message.received' for pipeline
- Build conversation context from session
- Convert messages to SDK format
- Build input from user_message and message_chain
- Build params from query.variables with filtering
- Build state snapshot from state_store
- Set resources from AgentResourceBuilder result
- Build runtime context with host info, trace_id, deadline
- Set config from runner binding configuration (ai.runner_config[runner_id])
"""
ap: app.Application
# Params filtering rules
# Exclude variables starting with underscore (internal)
INTERNAL_PREFIX = '_'
# Exclude variables with sensitive naming patterns
SENSITIVE_PATTERNS = ('secret', 'token', 'key', 'password', 'credential', 'api_key', 'apikey')
# Exclude permission/control variables
PERMISSION_VARS = ('_pipeline_bound_plugins', '_authorized', '_permission')
def __init__(self, ap: app.Application):
self.ap = ap
async def build_context(
self,
query: pipeline_query.Query,
descriptor: AgentRunnerDescriptor,
resources: AgentResources,
) -> AgentRunContextPayload:
"""Build AgentRunContext from Query.
Args:
query: Pipeline query
descriptor: Runner descriptor
resources: Built resources from AgentResourceBuilder
Returns:
AgentRunContext payload for the plugin runner
"""
# Generate new run_id
run_id = str(uuid.uuid4())
# Build trigger
trigger: AgentTrigger = {
'type': runner_events.MESSAGE_RECEIVED,
'source': 'pipeline',
'timestamp': int(time.time()),
}
# Build conversation context
conversation: ConversationContext | None = None
if query.session:
conversation = {
'session_id': f'{query.session.launcher_type.value}_{query.session.launcher_id}',
'conversation_id': getattr(query.session.using_conversation, 'uuid', None),
'launcher_type': query.session.launcher_type.value,
'launcher_id': query.session.launcher_id,
'sender_id': str(query.sender_id),
'bot_uuid': query.bot_uuid,
'pipeline_uuid': query.pipeline_uuid,
}
# Build input
input: AgentInput = self._build_input(query)
# Build messages
messages = self._build_messages(query)
# Build params from query.variables with filtering
params = self._build_params(query)
# Build state snapshot from state_store
state_store = get_state_store()
state: AgentRunState = state_store.build_snapshot(query, descriptor)
# Get runner binding config from ai.runner_config[runner_id]
# This is Pipeline's configuration for this specific runner binding,
# passed through AgentRunContext.config to the runner
runner_config = ConfigMigration.resolve_runner_config(
query.pipeline_config,
descriptor.id,
)
streaming_supported = await self._is_stream_output_supported(query)
remove_think = query.pipeline_config.get('output', {}).get('misc', {}).get('remove-think', False)
# Build runtime context
runtime: AgentRuntimeContext = {
'langbot_version': self.ap.ver_mgr.get_current_version(),
'sdk_protocol_version': descriptor.protocol_version,
'query_id': query.query_id,
'trace_id': run_id, # Use run_id as trace_id for now
'deadline_at': self._build_deadline(runner_config),
'metadata': {
'bot_name': query.variables.get('_monitoring_bot_name', 'Unknown'),
'pipeline_name': query.variables.get('_monitoring_pipeline_name', 'Unknown'),
'streaming_supported': streaming_supported,
'remove_think': remove_think,
},
}
# Build full context
context: AgentRunContextPayload = {
'run_id': run_id,
'trigger': trigger,
'conversation': conversation,
'event': self._build_event(query),
'actor': self._build_actor(query),
'subject': self._build_subject(query),
'messages': messages,
'prompt': self._build_prompt(query),
'input': input,
'params': params,
'resources': resources,
'state': state,
'runtime': runtime,
'config': runner_config,
}
return context
def _build_input(self, query: pipeline_query.Query) -> AgentInput:
"""Build AgentInput from query."""
text = None
text_parts: list[str] = []
contents: list[dict[str, typing.Any]] = []
if query.user_message:
# Extract text if content is single text element
if isinstance(query.user_message.content, list):
for elem in query.user_message.content:
contents.append(elem.model_dump(mode='json'))
if elem.type == 'text':
elem_text = getattr(elem, 'text', None)
if elem_text:
text_parts.append(elem_text)
else:
# Single string content
text = str(query.user_message.content)
contents.append({'type': 'text', 'text': text})
if text_parts:
text = ''.join(text_parts)
# Include message_chain for platform-specific format
message_chain_dict = None
if query.message_chain:
message_chain_dict = query.message_chain.model_dump(mode='json')
return {
'text': text,
'contents': contents,
'message_chain': message_chain_dict,
'attachments': self._build_attachments(query, contents),
}
def _build_attachments(
self,
query: pipeline_query.Query,
contents: list[dict[str, typing.Any]],
) -> list[dict[str, typing.Any]]:
"""Extract runner-consumable attachment data from input contents."""
attachments: list[dict[str, typing.Any]] = []
for elem in contents:
elem_type = elem.get('type')
if elem_type == 'image_url':
image_url = elem.get('image_url') or {}
attachments.append(
{
'type': 'image',
'source': 'url',
'url': image_url.get('url') if isinstance(image_url, dict) else str(image_url),
}
)
elif elem_type == 'image_base64':
image_base64 = elem.get('image_base64')
attachments.append(
{
'type': 'image',
'source': 'base64',
'content': image_base64,
'content_type': self._infer_base64_content_type(image_base64, 'image/jpeg'),
'name': 'image',
'has_content': bool(image_base64),
}
)
elif elem_type == 'file_url':
attachments.append(
{
'type': 'file',
'source': 'url',
'url': elem.get('file_url'),
'name': elem.get('file_name'),
}
)
elif elem_type == 'file_base64':
file_base64 = elem.get('file_base64')
attachments.append(
{
'type': 'file',
'source': 'base64',
'name': elem.get('file_name'),
'content': file_base64,
'content_type': self._infer_base64_content_type(file_base64, 'application/octet-stream'),
'has_content': bool(file_base64),
}
)
message_chain = getattr(query, 'message_chain', None)
if message_chain:
for component in message_chain:
if isinstance(component, platform_message.Image):
attachments.append(
{
'type': 'image',
'source': 'message_chain',
'id': component.image_id or None,
'url': component.url or None,
'path': str(component.path) if component.path else None,
'content': component.base64 or None,
'content_type': self._infer_base64_content_type(component.base64, 'image/jpeg'),
'name': 'image',
'has_content': bool(component.base64),
}
)
elif isinstance(component, platform_message.File):
attachments.append(
{
'type': 'file',
'source': 'message_chain',
'id': component.id or None,
'name': component.name or None,
'size': component.size or 0,
'url': component.url or None,
'path': component.path or None,
'content': component.base64 or None,
'content_type': self._infer_base64_content_type(component.base64, 'application/octet-stream'),
'has_content': bool(component.base64),
}
)
elif isinstance(component, platform_message.Voice):
attachments.append(
{
'type': 'voice',
'source': 'message_chain',
'id': component.voice_id or None,
'url': component.url or None,
'path': component.path or None,
'duration': component.length or 0,
'content': component.base64 or None,
'content_type': self._infer_base64_content_type(component.base64, 'audio/mpeg'),
'name': 'voice',
'has_content': bool(component.base64),
}
)
return attachments
def _infer_base64_content_type(self, value: typing.Any, default: str) -> str:
"""Infer MIME type from a data URL base64 value."""
if not isinstance(value, str):
return default
if value.startswith('data:') and ';base64,' in value:
return value[5:value.find(';base64,')] or default
return default
def _build_event(self, query: pipeline_query.Query) -> dict[str, typing.Any]:
"""Build a minimal EBA-compatible event envelope from the message query.
The public event_type must be a stable AgentRunner protocol name. Keep
platform or SDK class names inside event_data so future EventRouter
events can share the same top-level naming contract.
"""
message_event = getattr(query, 'message_event', None)
event_data: dict[str, typing.Any] = {}
if message_event and hasattr(message_event, 'model_dump'):
try:
event_data = message_event.model_dump(mode='json')
except TypeError:
event_data = message_event.model_dump()
except Exception:
event_data = {}
event_data.pop('source_platform_object', None)
source_event_type = getattr(message_event, 'type', None) if message_event else None
if source_event_type:
event_data.setdefault('source_event_type', source_event_type)
message_chain = getattr(query, 'message_chain', None)
message_id = getattr(message_chain, 'message_id', None)
if message_id == -1:
message_id = None
event_time = getattr(message_event, 'time', None) if message_event else None
event_timestamp = int(event_time) if isinstance(event_time, (int, float)) else None
return {
'event_type': runner_events.MESSAGE_RECEIVED,
'event_id': str(message_id or getattr(query, 'query_id', '')),
'event_timestamp': event_timestamp,
'event_data': event_data,
}
def _build_actor(self, query: pipeline_query.Query) -> dict[str, typing.Any]:
"""Build actor context for the sender that triggered the run."""
message_event = getattr(query, 'message_event', None)
sender = getattr(message_event, 'sender', None) if message_event else None
actor_id = getattr(sender, 'id', None) or getattr(query, 'sender_id', None)
actor_name = sender.get_name() if sender and hasattr(sender, 'get_name') else None
return {
'actor_type': 'user',
'actor_id': str(actor_id) if actor_id is not None else None,
'actor_name': actor_name,
}
def _build_subject(self, query: pipeline_query.Query) -> dict[str, typing.Any]:
"""Build subject context for the current message."""
message_chain = getattr(query, 'message_chain', None)
message_id = getattr(message_chain, 'message_id', None)
if message_id == -1:
message_id = None
launcher_type = getattr(query, 'launcher_type', None)
launcher_type_value = getattr(launcher_type, 'value', launcher_type)
return {
'subject_type': 'message',
'subject_id': str(message_id or getattr(query, 'query_id', '')),
'subject_data': {
'launcher_type': launcher_type_value,
'launcher_id': getattr(query, 'launcher_id', None),
'sender_id': str(getattr(query, 'sender_id', '')),
'bot_uuid': getattr(query, 'bot_uuid', None),
'pipeline_uuid': getattr(query, 'pipeline_uuid', None),
},
}
def _build_deadline(self, runner_config: dict[str, typing.Any]) -> int | None:
"""Build deadline timestamp from runner timeout config if present."""
timeout = runner_config.get('timeout')
if timeout is None:
return None
try:
timeout_seconds = float(timeout)
except (TypeError, ValueError):
return None
if timeout_seconds <= 0:
return None
return int(time.time() + timeout_seconds)
async def _is_stream_output_supported(self, query: pipeline_query.Query) -> bool:
"""Check whether the current adapter can consume streaming chunks."""
try:
return await query.adapter.is_stream_output_supported()
except AttributeError:
return False
except Exception:
return False
def _build_prompt(self, query: pipeline_query.Query) -> list[dict[str, typing.Any]]:
"""Build effective prompt messages from query.prompt after preprocessing."""
prompt_messages: list[dict[str, typing.Any]] = []
prompt = getattr(query, 'prompt', None)
messages = getattr(prompt, 'messages', None)
if not messages:
return prompt_messages
for msg in messages:
prompt_messages.append(msg.model_dump(mode='json'))
return prompt_messages
def _build_messages(self, query: pipeline_query.Query) -> list[dict[str, typing.Any]]:
"""Build messages list from query."""
messages: list[dict[str, typing.Any]] = []
if query.messages:
for msg in query.messages:
messages.append(msg.model_dump(mode='json'))
return messages
def _build_params(self, query: pipeline_query.Query) -> dict[str, typing.Any]:
"""Build params from query.variables with filtering.
Filtering rules:
1. Exclude variables starting with underscore (internal)
2. Exclude variables with sensitive naming patterns (secret, token, key, password)
3. Exclude permission/control variables
4. Keep only JSON-serializable values
Args:
query: Pipeline query
Returns:
Filtered params dict
"""
params: dict[str, typing.Any] = {}
if not query.variables:
return params
for key, value in query.variables.items():
# Filter internal variables (starting with underscore)
if key.startswith(self.INTERNAL_PREFIX):
continue
# Filter sensitive naming patterns
key_lower = key.lower()
if any(pattern in key_lower for pattern in self.SENSITIVE_PATTERNS):
continue
# Filter permission variables
if any(key == perm_var or key.startswith(perm_var) for perm_var in self.PERMISSION_VARS):
continue
# Keep only JSON-serializable values
if self._is_json_serializable(value):
params[key] = value
return params
def _is_json_serializable(self, value: typing.Any) -> bool:
"""Check if value is JSON-serializable.
Note: set is NOT JSON-serializable. json.dumps({"x": {1}}) fails.
Only list and tuple are allowed as collection types.
Args:
value: Value to check
Returns:
True if JSON-serializable, False otherwise
"""
if value is None:
return True
if isinstance(value, (str, int, float, bool)):
return True
# Only allow list and tuple, NOT set (set is not JSON-serializable)
if isinstance(value, (list, tuple)):
return all(self._is_json_serializable(item) for item in value)
if isinstance(value, dict):
return all(
isinstance(k, str) and self._is_json_serializable(v)
for k, v in value.items()
)
# Pydantic models and other complex types are not directly serializable
# as params (they may have internal structure not meant for runners)
return False