Compare commits

..

4 Commits

Author SHA1 Message Date
huanghuoguoguo 16d47d3e61 fix(monitoring): measure host rag duration 2026-06-17 20:35:14 +08:00
huanghuoguoguo 3146c58905 fix(monitoring): mark handled pipeline errors in traces 2026-06-17 14:34:57 +08:00
huanghuoguoguo d92b664136 test(monitoring): cover trace observability 2026-06-17 10:46:41 +08:00
huanghuoguoguo 8789c42eeb feat(monitoring): add host RAG trace observability 2026-06-17 00:13:57 +08:00
54 changed files with 2150 additions and 2921 deletions
@@ -313,18 +313,30 @@ class MonitoringRouterGroup(group.RouterGroup):
offset=0,
)
# Get traces
traces, traces_total = await self.ap.monitoring_service.get_traces(
bot_ids=bot_ids if bot_ids else None,
pipeline_ids=pipeline_ids if pipeline_ids else None,
start_time=start_time,
end_time=end_time,
limit=limit,
offset=0,
)
return self.success(
data={
'overview': overview,
'messages': messages,
'llmCalls': llm_calls,
'embeddingCalls': embedding_calls,
'traces': traces,
'sessions': sessions,
'errors': errors,
'totalCount': {
'messages': messages_total,
'llmCalls': llm_calls_total,
'embeddingCalls': embedding_calls_total,
'traces': traces_total,
'sessions': sessions_total,
'errors': errors_total,
},
@@ -350,6 +362,49 @@ class MonitoringRouterGroup(group.RouterGroup):
return self.success(data=details)
@self.route('/traces', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
async def get_traces() -> str:
"""Get end-to-end trace records."""
bot_ids = quart.request.args.getlist('botId')
pipeline_ids = quart.request.args.getlist('pipelineId')
session_ids = quart.request.args.getlist('sessionId')
statuses = quart.request.args.getlist('status')
start_time_str = quart.request.args.get('startTime')
end_time_str = quart.request.args.get('endTime')
limit = int(quart.request.args.get('limit', 100))
offset = int(quart.request.args.get('offset', 0))
start_time = parse_iso_datetime(start_time_str)
end_time = parse_iso_datetime(end_time_str)
traces, total = await self.ap.monitoring_service.get_traces(
bot_ids=bot_ids if bot_ids else None,
pipeline_ids=pipeline_ids if pipeline_ids else None,
session_ids=session_ids if session_ids else None,
statuses=statuses if statuses else None,
start_time=start_time,
end_time=end_time,
limit=limit,
offset=offset,
)
return self.success(
data={
'traces': traces,
'total': total,
'limit': limit,
'offset': offset,
}
)
@self.route('/traces/<trace_id>', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
async def get_trace_details(trace_id: str) -> str:
"""Get one trace with all spans."""
details = await self.ap.monitoring_service.get_trace_details(trace_id)
if not details.get('found'):
return self.http_status(404, -1, f'Trace {trace_id} not found')
return self.success(data=details)
@self.route('/export', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
async def export_data() -> tuple[str, int]:
"""Export monitoring data as CSV"""
@@ -350,8 +350,24 @@ class PluginsRouterGroup(group.RouterGroup):
if not endpoint.startswith('/') or '..' in endpoint:
return self.http_status(400, -1, 'invalid endpoint')
caller = {
'plugin_author': author,
'plugin_name': plugin_name,
'page_id': page_id,
'origin': _get_request_origin(),
}
headers = {
key: value
for key, value in {
'user-agent': quart.request.headers.get('User-Agent'),
'x-request-id': quart.request.headers.get('X-Request-ID'),
'x-forwarded-for': quart.request.headers.get('X-Forwarded-For'),
}.items()
if value
}
result = await self.ap.plugin_connector.handle_page_api(
author, plugin_name, page_id, endpoint, method.upper(), body
author, plugin_name, page_id, endpoint, method.upper(), body, caller, headers
)
if result.get('error'):
return self.http_status(400, -1, result['error'])
@@ -3,11 +3,55 @@ from __future__ import annotations
import uuid
import datetime
import sqlalchemy
import json
from ....core import app
from ....entity.persistence import monitoring as persistence_monitoring
# TODO: Move shared trace/time helpers into a small monitoring utility module
# when trace propagation expands beyond the current query/retrieval path.
def _utc_now() -> datetime.datetime:
return datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
def _json_dumps(value: dict | list | None) -> str | None:
if value is None:
return None
try:
return json.dumps(value, ensure_ascii=False, default=str)
except Exception:
return json.dumps({'serialization_error': str(value)}, ensure_ascii=False)
def _json_loads(value: str | None) -> dict | list | None:
if not value:
return None
try:
return json.loads(value)
except Exception:
return None
def new_trace_id() -> str:
return f'trace-{uuid.uuid4().hex[:16]}'
def new_span_id() -> str:
return f'span-{uuid.uuid4().hex[:16]}'
def normalize_trace_status(status: str | None) -> str:
"""Normalize operation status to the monitoring UI vocabulary."""
if status in ('completed', 'ok'):
return 'success'
if status in ('failed', 'failure', 'exception'):
return 'error'
if status in ('running', 'success', 'error'):
return status
return 'success'
class MonitoringService:
"""Monitoring service"""
@@ -74,6 +118,18 @@ class MonitoringService:
persistence_monitoring.MonitoringFeedback.timestamp,
persistence_monitoring.MonitoringFeedback.id,
),
(
'monitoring_traces',
persistence_monitoring.MonitoringTrace,
persistence_monitoring.MonitoringTrace.started_at,
persistence_monitoring.MonitoringTrace.trace_id,
),
(
'monitoring_spans',
persistence_monitoring.MonitoringSpan,
persistence_monitoring.MonitoringSpan.started_at,
persistence_monitoring.MonitoringSpan.span_id,
),
]
deleted_counts: dict[str, int] = {}
@@ -133,6 +189,116 @@ class MonitoringService:
# ========== Recording Methods ==========
async def start_trace(
self,
trace_id: str | None = None,
name: str = 'LangBot query',
bot_id: str | None = None,
bot_name: str | None = None,
pipeline_id: str | None = None,
pipeline_name: str | None = None,
session_id: str | None = None,
message_id: str | None = None,
query_id: str | int | None = None,
attributes: dict | None = None,
) -> str:
"""Create or update a trace header row."""
trace_id = trace_id or new_trace_id()
trace_data = {
'trace_id': trace_id,
'started_at': _utc_now(),
'ended_at': None,
'duration': None,
'status': 'running',
'name': name,
'bot_id': bot_id,
'bot_name': bot_name,
'pipeline_id': pipeline_id,
'pipeline_name': pipeline_name,
'session_id': session_id,
'message_id': message_id,
'query_id': str(query_id) if query_id is not None else None,
'attributes': _json_dumps(attributes),
}
await self.ap.persistence_mgr.execute_async(
sqlalchemy.insert(persistence_monitoring.MonitoringTrace).values(trace_data)
)
return trace_id
async def finish_trace(
self,
trace_id: str,
status: str = 'success',
duration: int | None = None,
message_id: str | None = None,
attributes: dict | None = None,
) -> None:
"""Mark a trace complete."""
update_values: dict = {
'ended_at': _utc_now(),
'status': normalize_trace_status(status),
}
if duration is not None:
update_values['duration'] = duration
if message_id is not None:
update_values['message_id'] = message_id
if attributes is not None:
update_values['attributes'] = _json_dumps(attributes)
await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_monitoring.MonitoringTrace)
.where(persistence_monitoring.MonitoringTrace.trace_id == trace_id)
.values(update_values)
)
async def record_span(
self,
trace_id: str,
name: str,
kind: str,
status: str = 'success',
span_id: str | None = None,
parent_span_id: str | None = None,
started_at: datetime.datetime | None = None,
ended_at: datetime.datetime | None = None,
duration: int | None = None,
message_id: str | None = None,
session_id: str | None = None,
bot_id: str | None = None,
pipeline_id: str | None = None,
attributes: dict | None = None,
error_message: str | None = None,
) -> str:
"""Record a single completed span."""
started_at = started_at or _utc_now()
if duration is None and ended_at is not None:
duration = int((ended_at - started_at).total_seconds() * 1000)
elif duration is not None:
duration = int(round(float(duration)))
span_data = {
'span_id': span_id or new_span_id(),
'trace_id': trace_id,
'parent_span_id': parent_span_id,
'name': name,
'kind': kind,
'status': normalize_trace_status(status),
'started_at': started_at,
'ended_at': ended_at or _utc_now(),
'duration': duration,
'message_id': message_id,
'session_id': session_id,
'bot_id': bot_id,
'pipeline_id': pipeline_id,
'attributes': _json_dumps(attributes),
'error_message': error_message,
}
await self.ap.persistence_mgr.execute_async(
sqlalchemy.insert(persistence_monitoring.MonitoringSpan).values(span_data)
)
return span_data['span_id']
async def record_message(
self,
bot_id: str,
@@ -1076,6 +1242,19 @@ class MonitoringService:
for row in error_rows
]
trace_query = (
sqlalchemy.select(persistence_monitoring.MonitoringTrace)
.where(persistence_monitoring.MonitoringTrace.message_id == message_id)
.order_by(persistence_monitoring.MonitoringTrace.started_at.desc())
.limit(1)
)
trace_result = await self.ap.persistence_mgr.execute_async(trace_query)
trace_row = trace_result.first()
trace = None
if trace_row:
trace_model = trace_row[0] if isinstance(trace_row, tuple) else trace_row
trace = self._serialize_trace(trace_model)
return {
'message_id': message_id,
'found': True,
@@ -1090,6 +1269,84 @@ class MonitoringService:
'average_duration_ms': int(total_duration / len(llm_rows)) if len(llm_rows) > 0 else 0,
},
'errors': errors,
'trace': trace,
}
def _serialize_trace(self, trace: persistence_monitoring.MonitoringTrace) -> dict:
data = self.ap.persistence_mgr.serialize_model(persistence_monitoring.MonitoringTrace, trace)
data['attributes'] = _json_loads(data.get('attributes')) or {}
return data
def _serialize_span(self, span: persistence_monitoring.MonitoringSpan) -> dict:
data = self.ap.persistence_mgr.serialize_model(persistence_monitoring.MonitoringSpan, span)
data['attributes'] = _json_loads(data.get('attributes')) or {}
return data
async def get_traces(
self,
bot_ids: list[str] | None = None,
pipeline_ids: list[str] | None = None,
session_ids: list[str] | None = None,
statuses: list[str] | None = None,
start_time: datetime.datetime | None = None,
end_time: datetime.datetime | None = None,
limit: int = 100,
offset: int = 0,
) -> tuple[list[dict], int]:
"""Get trace headers with filters."""
conditions = []
if bot_ids:
conditions.append(persistence_monitoring.MonitoringTrace.bot_id.in_(bot_ids))
if pipeline_ids:
conditions.append(persistence_monitoring.MonitoringTrace.pipeline_id.in_(pipeline_ids))
if session_ids:
conditions.append(persistence_monitoring.MonitoringTrace.session_id.in_(session_ids))
if statuses:
conditions.append(persistence_monitoring.MonitoringTrace.status.in_(statuses))
if start_time:
conditions.append(persistence_monitoring.MonitoringTrace.started_at >= start_time)
if end_time:
conditions.append(persistence_monitoring.MonitoringTrace.started_at <= end_time)
count_query = sqlalchemy.select(sqlalchemy.func.count(persistence_monitoring.MonitoringTrace.trace_id))
query = sqlalchemy.select(persistence_monitoring.MonitoringTrace)
if conditions:
clause = sqlalchemy.and_(*conditions)
count_query = count_query.where(clause)
query = query.where(clause)
total_result = await self.ap.persistence_mgr.execute_async(count_query)
total = total_result.scalar() or 0
query = query.order_by(persistence_monitoring.MonitoringTrace.started_at.desc()).limit(limit).offset(offset)
result = await self.ap.persistence_mgr.execute_async(query)
traces = [self._serialize_trace(row[0] if isinstance(row, tuple) else row) for row in result.all()]
return traces, total
async def get_trace_details(self, trace_id: str) -> dict:
"""Get a single trace and all spans in chronological order."""
trace_query = sqlalchemy.select(persistence_monitoring.MonitoringTrace).where(
persistence_monitoring.MonitoringTrace.trace_id == trace_id
)
trace_result = await self.ap.persistence_mgr.execute_async(trace_query)
trace_row = trace_result.first()
if not trace_row:
return {'trace_id': trace_id, 'found': False}
trace = trace_row[0] if isinstance(trace_row, tuple) else trace_row
span_query = (
sqlalchemy.select(persistence_monitoring.MonitoringSpan)
.where(persistence_monitoring.MonitoringSpan.trace_id == trace_id)
.order_by(persistence_monitoring.MonitoringSpan.started_at.asc())
)
span_result = await self.ap.persistence_mgr.execute_async(span_query)
spans = [self._serialize_span(row[0] if isinstance(row, tuple) else row) for row in span_result.all()]
return {
'trace_id': trace_id,
'found': True,
'trace': self._serialize_trace(trace),
'spans': spans,
}
# ========== Export Methods ==========
+1 -518
View File
@@ -105,7 +105,6 @@ class BoxService:
f'LangBot Box runtime initialized: profile={self.profile.name} '
f'default_workspace={self.default_workspace or "(none)"}'
)
await self._purge_attachment_dirs()
except Exception as exc:
self.ap.logger.warning(f'LangBot Box runtime unavailable, sandbox features disabled: {exc}')
self._available = False
@@ -336,507 +335,6 @@ class BoxService:
return await self.execute_spec_payload(spec_payload, query)
# ── Attachment passthrough (inbound / outbound) ──────────────────
#
# IM/webchat attachments (images, voices, files) reach the LLM as
# multimodal content, but historically never landed on the sandbox
# filesystem, so the agent's exec/read/write tools could not operate on
# them. Conversely, files the agent produced inside the sandbox were
# never surfaced back to the user. These two helpers close both gaps:
#
# inbound : message_chain attachments -> /workspace/inbox/<query_id>/
# outbound : /workspace/outbox/<query_id>/ -> reply MessageChain
#
# Transfer prefers DIRECT HOST FILESYSTEM access to the bind-mounted
# workspace (default_workspace on the host maps to /workspace inside the
# container), which has no size limit. This covers the local docker /
# nsjail / stdio backends. For backends where the workspace is NOT visible
# on the LangBot host (E2B, an external remote runtime.endpoint), it falls
# back to a base64-through-exec round-trip. The exec channel can only move
# small files reliably — the docker backend passes the command as a single
# argv (ARG_MAX) and exec stdout is truncated by output_limit_chars — so
# the host path is strongly preferred and used whenever available.
INBOX_MOUNT_DIR = '/workspace/inbox'
OUTBOX_MOUNT_DIR = '/workspace/outbox'
INBOX_SUBDIR = 'inbox'
OUTBOX_SUBDIR = 'outbox'
# Hard cap on a single attachment. The HTTP upload endpoints already cap
# uploads at 10MiB; keep parity.
_ATTACHMENT_MAX_BYTES = 10 * _MIB
# Conservative cap for the exec FALLBACK path only (ARG_MAX / stdout
# truncation). The host-filesystem path has no such limit.
_EXEC_FALLBACK_MAX_BYTES = 256 * 1024
def _host_query_dir(self, subdir: str, query_id) -> str | None:
"""Host path for ``/workspace/<subdir>/<query_id>`` when LangBot can
access the bind-mounted workspace directly, else ``None``.
``default_workspace`` is the host directory bind-mounted to
``/workspace`` for the local docker/nsjail backends and shared
outright in stdio mode, so a file written there by LangBot is visible
to the sandbox (and vice-versa). It is ``None`` / not a local dir for
E2B and remote runtimes, where we must fall back to the exec channel.
"""
root = self.default_workspace
if not root or not os.path.isdir(root):
return None
return os.path.join(root, subdir, str(query_id))
async def _purge_attachment_dirs(self) -> None:
"""Remove leftover inbox/outbox directories on startup.
``query_id`` is a process-local counter (see pipeline query pool) that
resets to 0 on every restart, so per-query attachment directories from
a previous process would otherwise be silently reused — leaking a prior
run's inbound files and re-sending stale outbound files.
Outbox files are written by the sandbox **container**, which runs as
root over the bind-mount, so the LangBot host process (a non-root user)
cannot ``rmtree`` them. We therefore try a host-side delete first (fast,
works for host-owned inbox files) and, for anything that survives,
delete from *inside* the sandbox via exec where the container's root can
remove its own files. Best-effort: never block startup.
"""
root = self.default_workspace
if not root or not os.path.isdir(root):
return
import shutil
host_survivors: list[str] = []
def _host_purge() -> list[str]:
survivors: list[str] = []
for subdir in (self.INBOX_SUBDIR, self.OUTBOX_SUBDIR):
path = os.path.join(root, subdir)
if not os.path.isdir(path):
continue
shutil.rmtree(path, ignore_errors=True)
if os.path.exists(path):
survivors.append(subdir)
return survivors
try:
host_survivors = await asyncio.to_thread(_host_purge)
except Exception as exc: # pragma: no cover - defensive
self.ap.logger.warning(f'Host-side purge of sandbox attachment dirs failed: {exc}')
host_survivors = [self.INBOX_SUBDIR, self.OUTBOX_SUBDIR]
if not host_survivors:
self.ap.logger.info('Purged leftover sandbox attachment dirs from a previous process.')
return
# Root-owned leftovers (container output): delete from inside the box.
targets = ' '.join(f'/workspace/{sub}' for sub in host_survivors)
try:
spec = self.build_spec({'cmd': f'rm -rf {targets}', 'session_id': '__startup_purge__', 'timeout_sec': 30})
await self.client.execute(spec)
self.ap.logger.info(
f'Purged root-owned leftover sandbox attachment dirs via sandbox exec: {host_survivors}'
)
except Exception as exc:
self.ap.logger.warning(
f'Failed to purge root-owned sandbox attachment dirs {host_survivors} via exec: {exc}'
)
@staticmethod
def _sanitize_attachment_name(name: str, fallback: str) -> str:
"""Reduce an arbitrary attachment name to a safe basename.
Strips directory separators and parent refs so a crafted file name
can never escape the inbox/outbox directory.
"""
base = os.path.basename(str(name or '').replace('\\', '/').strip())
base = base.lstrip('.') or ''
# Drop anything that is not a conservative filename charset.
cleaned = ''.join(c for c in base if c.isalnum() or c in ('.', '_', '-', ' ')).strip()
cleaned = cleaned.replace(' ', '_')
return cleaned or fallback
@staticmethod
async def _component_to_bytes(component) -> tuple[bytes, str] | None:
"""Best-effort extraction of (bytes, mime) from a platform component.
Handles base64, http(s) url and local path sources. Returns None when
no payload can be resolved.
"""
import base64 as _b64
b64 = getattr(component, 'base64', None)
if b64:
data = b64
mime = 'application/octet-stream'
if isinstance(data, str) and data.startswith('data:'):
split_index = data.find(';base64,')
if split_index != -1:
mime = data[5:split_index]
data = data[split_index + 8 :]
try:
return _b64.b64decode(data), mime
except Exception:
return None
url = getattr(component, 'url', None)
if url:
try:
import httpx
async with httpx.AsyncClient(timeout=30) as client:
resp = await client.get(url)
resp.raise_for_status()
return resp.content, resp.headers.get('Content-Type', 'application/octet-stream')
except Exception:
return None
path = getattr(component, 'path', None)
if path:
try:
import aiofiles
async with aiofiles.open(path, 'rb') as f:
return await f.read(), 'application/octet-stream'
except Exception:
return None
return None
async def _write_files_into_sandbox(
self,
query: pipeline_query.Query,
subdir: str,
target_mount_dir: str,
files: list[tuple[str, bytes]],
) -> list[str]:
"""Write *files* (name, bytes) into the per-query directory.
Prefers a direct host-filesystem write to the bind-mounted workspace
(no size limit). Falls back to a base64-through-exec round-trip only
when the workspace is not visible on the LangBot host (E2B / remote).
Returns the list of in-sandbox paths actually written.
"""
if not files:
return []
host_dir = self._host_query_dir(subdir, query.query_id)
if host_dir is not None:
return await asyncio.to_thread(self._write_files_host, host_dir, target_mount_dir, files)
return await self._write_files_via_exec(query, target_mount_dir, files)
def _write_files_host(
self,
host_dir: str,
target_mount_dir: str,
files: list[tuple[str, bytes]],
) -> list[str]:
"""Write attachments straight onto the bind-mounted host directory.
Recreates the per-query directory from scratch so a reused query_id
(the webchat session uses small sequential ids) never inherits stale
files from an earlier turn.
"""
import shutil
shutil.rmtree(host_dir, ignore_errors=True)
os.makedirs(host_dir, exist_ok=True)
written: list[str] = []
for name, data in files:
with open(os.path.join(host_dir, name), 'wb') as fh:
fh.write(data)
written.append(f'{target_mount_dir}/{name}')
return written
async def _write_files_via_exec(
self,
query: pipeline_query.Query,
target_dir: str,
files: list[tuple[str, bytes]],
) -> list[str]:
"""Fallback: ship files into the sandbox over the exec channel.
Only used for backends without host-filesystem access (E2B / remote).
Each file is base64-decoded inside the sandbox. Files larger than the
conservative exec cap are skipped (ARG_MAX / stdout limits).
"""
import base64 as _b64
import json as _json
manifest = []
for name, data in files:
if len(data) > self._EXEC_FALLBACK_MAX_BYTES:
self.ap.logger.warning(
f'Attachment "{name}" ({len(data)} bytes) exceeds the exec-channel '
f'fallback limit ({self._EXEC_FALLBACK_MAX_BYTES} bytes); skipping. '
f'Configure a host-shared workspace to transfer large files.'
)
continue
manifest.append({'name': name, 'b64': _b64.b64encode(data).decode('ascii')})
if not manifest:
return []
manifest_b64 = _b64.b64encode(_json.dumps(manifest).encode('utf-8')).decode('ascii')
script = (
'import base64, json, os, shutil\n'
f'target = {target_dir!r}\n'
'shutil.rmtree(target, ignore_errors=True)\n'
'os.makedirs(target, exist_ok=True)\n'
f'manifest = json.loads(base64.b64decode({manifest_b64!r}))\n'
'written = []\n'
'for item in manifest:\n'
" p = os.path.join(target, item['name'])\n"
" with open(p, 'wb') as f:\n"
" f.write(base64.b64decode(item['b64']))\n"
' written.append(p)\n'
'print(json.dumps(written))\n'
)
result = await self.execute_tool(
{'command': f"python3 - <<'LBPY'\n{script}\nLBPY", 'timeout_sec': 120},
query,
)
if not result.get('ok'):
self.ap.logger.warning(
f'Failed to write inbound attachments into sandbox via exec: '
f'query_id={query.query_id} stderr={result.get("stderr", "")[:200]}'
)
return []
try:
return _json.loads(str(result.get('stdout') or '').strip().splitlines()[-1])
except Exception:
return []
async def materialize_inbound_attachments(self, query: pipeline_query.Query) -> list[dict]:
"""Persist message-chain attachments into the sandbox inbox.
Returns a list of ``{path, name, type, size}`` describing what was
written, so the runner can tell the LLM the exact in-sandbox paths.
Returns ``[]`` when sandbox is unavailable or there are no attachments.
"""
if not self._available:
return []
import langbot_plugin.api.entities.builtin.platform.message as platform_message
message_chain = getattr(query, 'message_chain', None)
if not message_chain:
return []
type_map = [
(platform_message.Image, 'Image', 'image', 'png'),
(platform_message.Voice, 'Voice', 'voice', 'wav'),
(platform_message.File, 'File', 'file', 'bin'),
]
pending: list[tuple[str, bytes]] = []
descriptors: list[dict] = []
index = 0
for component in message_chain:
matched = None
for cls, kind, prefix, default_ext in type_map:
if isinstance(component, cls):
matched = (kind, prefix, default_ext)
break
if matched is None:
continue
kind, prefix, default_ext = matched
payload = await self._component_to_bytes(component)
if payload is None:
continue
data, _mime = payload
if not data or len(data) > self._ATTACHMENT_MAX_BYTES:
continue
index += 1
raw_name = getattr(component, 'name', None) or f'{prefix}_{index}.{default_ext}'
safe_name = self._sanitize_attachment_name(raw_name, f'{prefix}_{index}.{default_ext}')
pending.append((safe_name, data))
descriptors.append(
{
'name': safe_name,
'type': kind,
'size': len(data),
}
)
if not pending:
return []
target_dir = f'{self.INBOX_MOUNT_DIR}/{query.query_id}'
written = await self._write_files_into_sandbox(query, self.INBOX_SUBDIR, target_dir, pending)
written_basenames = {os.path.basename(p) for p in written}
result: list[dict] = []
for desc in descriptors:
if desc['name'] in written_basenames:
desc['path'] = f'{target_dir}/{desc["name"]}'
result.append(desc)
if result:
self.ap.logger.info(
f'Materialized {len(result)} inbound attachment(s) into sandbox: '
f'query_id={query.query_id} dir={target_dir}'
)
return result
async def collect_outbound_attachments(self, query: pipeline_query.Query) -> list[dict]:
"""Collect files the agent produced in the sandbox outbox.
Reads ``/workspace/outbox/<query_id>/`` (recursively) — directly from
the bind-mounted host directory when available (no size limit), else
via the exec channel — returns a list of ``{type, name, base64}``
ready to become platform message components, then clears the outbox so
a later turn in the same session does not re-send stale files. Returns
``[]`` when nothing was produced.
"""
if not self._available:
return []
host_dir = self._host_query_dir(self.OUTBOX_SUBDIR, query.query_id)
if host_dir is not None:
entries = await asyncio.to_thread(self._read_outbox_host, host_dir)
else:
entries = await self._read_outbox_via_exec(query)
attachments = self._classify_outbound_entries(entries)
# Always clear the per-query outbox after reading — even when nothing
# was collected — so a later turn that reuses the same query_id (the
# counter resets across restarts) never inherits stale files.
await self._clear_outbox(query, host_dir)
if attachments:
self.ap.logger.info(
f'Collected {len(attachments)} outbound attachment(s) from sandbox: query_id={query.query_id}'
)
return attachments
def _read_outbox_host(self, host_dir: str) -> list[dict]:
"""Read outbox files straight off the bind-mounted host directory."""
import base64 as _b64
entries: list[dict] = []
if not os.path.isdir(host_dir):
return entries
for root, _dirs, names in os.walk(host_dir):
for name in sorted(names):
path = os.path.join(root, name)
try:
if os.path.getsize(path) > self._ATTACHMENT_MAX_BYTES:
continue
with open(path, 'rb') as fh:
data = fh.read()
except OSError:
continue
rel = os.path.relpath(path, host_dir)
entries.append({'name': rel, 'b64': _b64.b64encode(data).decode('ascii')})
return entries
async def _read_outbox_via_exec(self, query: pipeline_query.Query) -> list[dict]:
"""Fallback: read the outbox over the exec channel (E2B / remote).
Note: exec stdout is truncated by ``output_limit_chars``, so this path
only reliably transfers small files. The host path is preferred.
"""
import json as _json
target_dir = f'{self.OUTBOX_MOUNT_DIR}/{query.query_id}'
max_bytes = self._EXEC_FALLBACK_MAX_BYTES
script = (
'import base64, json, os\n'
f'target = {target_dir!r}\n'
f'max_bytes = {max_bytes}\n'
'out = []\n'
'if os.path.isdir(target):\n'
' for root, _dirs, names in os.walk(target):\n'
' for n in sorted(names):\n'
' p = os.path.join(root, n)\n'
' try:\n'
' if os.path.getsize(p) > max_bytes:\n'
' continue\n'
" with open(p, 'rb') as f:\n"
' data = f.read()\n'
' except OSError:\n'
' continue\n'
' rel = os.path.relpath(p, target)\n'
" out.append({'name': rel, 'b64': base64.b64encode(data).decode('ascii')})\n"
'print(json.dumps(out))\n'
)
result = await self.execute_tool(
{'command': f"python3 - <<'LBPY'\n{script}\nLBPY", 'timeout_sec': 120},
query,
)
if not result.get('ok'):
return []
try:
return _json.loads(str(result.get('stdout') or '').strip().splitlines()[-1])
except Exception:
return []
async def _clear_outbox(self, query: pipeline_query.Query, host_dir: str | None) -> None:
"""Empty the per-query outbox after collection.
Tries a host-side ``rmtree`` first (fast, no container round-trip).
Outbox files are created by the sandbox container as root over the
bind-mount, so when LangBot runs as a non-root user the host delete
fails silently and the files survive — they would then be re-collected
on the next turn that reuses the same query_id. So if anything survives
the host delete, clear it from *inside* the sandbox via exec, where the
container's root can remove its own files. Best-effort: never raise
into the pipeline.
"""
target_dir = f'{self.OUTBOX_MOUNT_DIR}/{query.query_id}'
if host_dir is not None:
import shutil
def _clear() -> bool:
shutil.rmtree(host_dir, ignore_errors=True)
survived = os.path.exists(host_dir) and bool(os.listdir(host_dir))
os.makedirs(host_dir, exist_ok=True)
return survived
survived = await asyncio.to_thread(_clear)
if not survived:
return
# Root-owned container files survived the host delete — fall through.
try:
await self.execute_tool(
{'command': f'rm -rf {target_dir} && mkdir -p {target_dir}', 'timeout_sec': 30},
query,
)
except Exception as exc:
self.ap.logger.warning(f'Failed to clear sandbox outbox {target_dir}: {exc}')
@staticmethod
def _classify_outbound_entries(entries: list[dict]) -> list[dict]:
"""Classify outbox files into Image/Voice/File component descriptors."""
image_exts = {'png', 'jpg', 'jpeg', 'gif', 'webp', 'bmp'}
voice_exts = {'wav', 'mp3', 'silk', 'amr', 'ogg', 'm4a', 'aac'}
mime_by_ext = {
'png': 'image/png',
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'gif': 'image/gif',
'webp': 'image/webp',
'bmp': 'image/bmp',
}
attachments: list[dict] = []
for entry in entries or []:
name = str(entry.get('name', '') or '')
b64 = entry.get('b64')
if not name or not b64:
continue
ext = name.rsplit('.', 1)[-1].lower() if '.' in name else ''
base_name = os.path.basename(name)
if ext in image_exts:
mime = mime_by_ext.get(ext, 'image/png')
attachments.append({'type': 'Image', 'name': base_name, 'base64': f'data:{mime};base64,{b64}'})
elif ext in voice_exts:
attachments.append({'type': 'Voice', 'name': base_name, 'base64': f'data:audio/{ext};base64,{b64}'})
else:
attachments.append({'type': 'File', 'name': base_name, 'base64': b64})
return attachments
async def shutdown(self):
await self.client.shutdown()
@@ -1302,19 +800,11 @@ class BoxService:
def get_recent_errors(self) -> list[dict]:
return list(self._recent_errors)
def get_system_guidance(self, query_id=None) -> str:
def get_system_guidance(self) -> str:
"""Return LLM system-prompt guidance for the exec tool.
All execution-specific prompt text is kept here so that callers
(e.g. LocalAgentRunner) stay free of box domain knowledge.
``query_id`` is the current turn's pipeline query id. When provided,
the guidance ALWAYS advertises the per-query outbox path so the agent
knows how to deliver generated files back to the user — even on turns
where the user sent no inbound attachment (e.g. "generate a QR code"),
which is exactly when the inbound-attachment note never fires. Outbound
collection in the wrapper runs on every turn regardless of inbound
files, so without this the file would be produced and silently dropped.
"""
guidance = (
'When the exec tool is available, use it for exact calculations, statistics, structured data parsing, '
@@ -1329,13 +819,6 @@ class BoxService:
'modify local files in the working directory, use exec with /workspace paths directly; do not ask the '
'user for directory parameters unless they explicitly need a different directory.'
)
if query_id is not None:
outbox_dir = f'{self.OUTBOX_MOUNT_DIR}/{query_id}'
guidance += (
f' If you produce any file (image, audio, document, etc.) that should be sent back to the user, '
f'write it into {outbox_dir}/ (create the directory if needed). Every file placed there will be '
'delivered to the user automatically; do not paste file contents or base64 into your reply.'
)
return guidance
async def get_status(self) -> dict:
+3 -20
View File
@@ -146,19 +146,13 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
_LB_PIP_CACHE_DIR="{mount_path}/.cache/pip"
mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"
if [ -z "$_LB_SYSTEM_PYTHON" ]; then
echo "python3 or python is required to prepare the workspace Python environment" >&2
exit 127
fi
export TMPDIR="$_LB_TMP_DIR"
export TEMP="$_LB_TMP_DIR"
export TMP="$_LB_TMP_DIR"
export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
_lb_python_meta() {{
"$_LB_SYSTEM_PYTHON" - <<'PY'
python - <<'PY'
import hashlib
import json
import os
@@ -207,26 +201,15 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
_LB_LOCK_WAIT=0
while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
_LB_LOCK_OWNER="$(cat "$_LB_LOCK_DIR/pid" 2>/dev/null || true)"
if [ -n "$_LB_LOCK_OWNER" ] && kill -0 "$_LB_LOCK_OWNER" 2>/dev/null; then
echo "Timed out waiting for active Python environment lock: $_LB_LOCK_DIR" >&2
exit 1
fi
echo "Timed out waiting for Python environment lock, clearing stale lock: $_LB_LOCK_DIR" >&2
rm -rf "$_LB_LOCK_DIR" 2>/dev/null || true
if mkdir "$_LB_LOCK_DIR" 2>/dev/null; then
break
fi
echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
exit 1
fi
sleep 1
_LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
done
printf '%s\\n' "$$" > "$_LB_LOCK_DIR/pid" 2>/dev/null || true
_lb_cleanup_lock() {{
rm -rf "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
}}
trap _lb_cleanup_lock EXIT INT TERM
@@ -242,7 +225,7 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
rm -rf "$_LB_VENV_DIR"
"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"
python -m venv "$_LB_VENV_DIR"
. "$_LB_VENV_DIR/bin/activate"
python -m pip install --upgrade pip setuptools wheel
if [ -f "{mount_path}/requirements.txt" ]; then
@@ -3,6 +3,49 @@ import sqlalchemy
from .base import Base
class MonitoringTrace(Base):
"""End-to-end monitoring trace records"""
__tablename__ = 'monitoring_traces'
trace_id = sqlalchemy.Column(sqlalchemy.String(255), primary_key=True)
started_at = sqlalchemy.Column(sqlalchemy.DateTime, nullable=False, index=True)
ended_at = sqlalchemy.Column(sqlalchemy.DateTime, nullable=True, index=True)
duration = sqlalchemy.Column(sqlalchemy.Integer, nullable=True) # milliseconds
status = sqlalchemy.Column(sqlalchemy.String(50), nullable=False, index=True) # running, success, error
name = sqlalchemy.Column(sqlalchemy.String(255), nullable=False)
bot_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
bot_name = sqlalchemy.Column(sqlalchemy.String(255), nullable=True)
pipeline_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
pipeline_name = sqlalchemy.Column(sqlalchemy.String(255), nullable=True)
session_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
message_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
query_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
attributes = sqlalchemy.Column(sqlalchemy.Text, nullable=True)
class MonitoringSpan(Base):
"""Trace span records for pipeline, RAG, model, plugin and tool operations"""
__tablename__ = 'monitoring_spans'
span_id = sqlalchemy.Column(sqlalchemy.String(255), primary_key=True)
trace_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=False, index=True)
parent_span_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
name = sqlalchemy.Column(sqlalchemy.String(255), nullable=False)
kind = sqlalchemy.Column(sqlalchemy.String(80), nullable=False, index=True)
status = sqlalchemy.Column(sqlalchemy.String(50), nullable=False, index=True)
started_at = sqlalchemy.Column(sqlalchemy.DateTime, nullable=False, index=True)
ended_at = sqlalchemy.Column(sqlalchemy.DateTime, nullable=True)
duration = sqlalchemy.Column(sqlalchemy.Integer, nullable=True) # milliseconds
message_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
session_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
bot_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
pipeline_id = sqlalchemy.Column(sqlalchemy.String(255), nullable=True, index=True)
attributes = sqlalchemy.Column(sqlalchemy.Text, nullable=True)
error_message = sqlalchemy.Column(sqlalchemy.Text, nullable=True)
class MonitoringMessage(Base):
"""Monitoring message records"""
@@ -0,0 +1,88 @@
"""add monitoring traces and spans
Revision ID: 0006_monitoring_traces
Revises: 0005_add_llm_context_length
Create Date: 2026-06-16
"""
import sqlalchemy as sa
from alembic import op
revision = '0006_monitoring_traces'
down_revision = '0005_add_llm_context_length'
branch_labels = None
depends_on = None
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
tables = set(inspector.get_table_names())
if 'monitoring_traces' not in tables:
op.create_table(
'monitoring_traces',
sa.Column('trace_id', sa.String(length=255), nullable=False),
sa.Column('started_at', sa.DateTime(), nullable=False),
sa.Column('ended_at', sa.DateTime(), nullable=True),
sa.Column('duration', sa.Integer(), nullable=True),
sa.Column('status', sa.String(length=50), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.Column('bot_id', sa.String(length=255), nullable=True),
sa.Column('bot_name', sa.String(length=255), nullable=True),
sa.Column('pipeline_id', sa.String(length=255), nullable=True),
sa.Column('pipeline_name', sa.String(length=255), nullable=True),
sa.Column('session_id', sa.String(length=255), nullable=True),
sa.Column('message_id', sa.String(length=255), nullable=True),
sa.Column('query_id', sa.String(length=255), nullable=True),
sa.Column('attributes', sa.Text(), nullable=True),
sa.PrimaryKeyConstraint('trace_id'),
)
op.create_index('ix_monitoring_traces_started_at', 'monitoring_traces', ['started_at'])
op.create_index('ix_monitoring_traces_ended_at', 'monitoring_traces', ['ended_at'])
op.create_index('ix_monitoring_traces_status', 'monitoring_traces', ['status'])
op.create_index('ix_monitoring_traces_bot_id', 'monitoring_traces', ['bot_id'])
op.create_index('ix_monitoring_traces_pipeline_id', 'monitoring_traces', ['pipeline_id'])
op.create_index('ix_monitoring_traces_session_id', 'monitoring_traces', ['session_id'])
op.create_index('ix_monitoring_traces_message_id', 'monitoring_traces', ['message_id'])
op.create_index('ix_monitoring_traces_query_id', 'monitoring_traces', ['query_id'])
if 'monitoring_spans' not in tables:
op.create_table(
'monitoring_spans',
sa.Column('span_id', sa.String(length=255), nullable=False),
sa.Column('trace_id', sa.String(length=255), nullable=False),
sa.Column('parent_span_id', sa.String(length=255), nullable=True),
sa.Column('name', sa.String(length=255), nullable=False),
sa.Column('kind', sa.String(length=80), nullable=False),
sa.Column('status', sa.String(length=50), nullable=False),
sa.Column('started_at', sa.DateTime(), nullable=False),
sa.Column('ended_at', sa.DateTime(), nullable=True),
sa.Column('duration', sa.Integer(), nullable=True),
sa.Column('message_id', sa.String(length=255), nullable=True),
sa.Column('session_id', sa.String(length=255), nullable=True),
sa.Column('bot_id', sa.String(length=255), nullable=True),
sa.Column('pipeline_id', sa.String(length=255), nullable=True),
sa.Column('attributes', sa.Text(), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.PrimaryKeyConstraint('span_id'),
)
op.create_index('ix_monitoring_spans_trace_id', 'monitoring_spans', ['trace_id'])
op.create_index('ix_monitoring_spans_parent_span_id', 'monitoring_spans', ['parent_span_id'])
op.create_index('ix_monitoring_spans_kind', 'monitoring_spans', ['kind'])
op.create_index('ix_monitoring_spans_status', 'monitoring_spans', ['status'])
op.create_index('ix_monitoring_spans_started_at', 'monitoring_spans', ['started_at'])
op.create_index('ix_monitoring_spans_message_id', 'monitoring_spans', ['message_id'])
op.create_index('ix_monitoring_spans_session_id', 'monitoring_spans', ['session_id'])
op.create_index('ix_monitoring_spans_bot_id', 'monitoring_spans', ['bot_id'])
op.create_index('ix_monitoring_spans_pipeline_id', 'monitoring_spans', ['pipeline_id'])
def downgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
tables = set(inspector.get_table_names())
if 'monitoring_spans' in tables:
op.drop_table('monitoring_spans')
if 'monitoring_traces' in tables:
op.drop_table('monitoring_traces')
+156 -27
View File
@@ -2,6 +2,9 @@ from __future__ import annotations
import typing
import traceback
import time
import uuid
import datetime
import sqlalchemy
@@ -79,6 +82,19 @@ class RuntimePipeline:
enable_all_plugins: bool
"""是否启用所有插件"""
@staticmethod
def _new_span_id() -> str:
return f'span-{uuid.uuid4().hex[:16]}'
@staticmethod
def _utc_now() -> datetime.datetime:
return datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
@staticmethod
def _query_session_id(query: pipeline_query.Query) -> str:
launcher_type = query.launcher_type.value if hasattr(query.launcher_type, 'value') else str(query.launcher_type)
return f'{launcher_type}_{query.launcher_id}'
enable_all_mcp_servers: bool
"""是否启用所有MCP服务器"""
@@ -234,44 +250,102 @@ class RuntimePipeline:
stage_container = self.stage_containers[i]
query.current_stage_name = stage_container.inst_name # 标记到 Query 对象里
span_started_at = self._utc_now()
span_started = time.perf_counter()
span_status = 'success'
span_error = None
span_result_type = None
result = stage_container.inst.process(query, stage_container.inst_name)
try:
result = stage_container.inst.process(query, stage_container.inst_name)
if isinstance(result, typing.Coroutine):
result = await result
if isinstance(result, typing.Coroutine):
result = await result
if isinstance(result, pipeline_entities.StageProcessResult): # 直接返回结果
self.ap.logger.debug(
f'Stage {stage_container.inst_name} processed query {query.query_id} res {result.result_type}'
)
await self._check_output(query, result)
if result.result_type == pipeline_entities.ResultType.INTERRUPT:
self.ap.logger.debug(f'Stage {stage_container.inst_name} interrupted query {query.query_id}')
break
elif result.result_type == pipeline_entities.ResultType.CONTINUE:
query = result.new_query
elif isinstance(result, typing.AsyncGenerator): # 生成器
self.ap.logger.debug(f'Stage {stage_container.inst_name} processed query {query.query_id} gen')
async for sub_result in result:
self.ap.logger.debug(
f'Stage {stage_container.inst_name} processed query {query.query_id} res {sub_result.result_type}'
if isinstance(result, pipeline_entities.StageProcessResult): # 直接返回结果
span_result_type = str(
result.result_type.value if hasattr(result.result_type, 'value') else result.result_type
)
await self._check_output(query, sub_result)
self.ap.logger.debug(
f'Stage {stage_container.inst_name} processed query {query.query_id} res {result.result_type}'
)
await self._check_output(query, result)
if result.error_notice:
span_status = 'error'
span_error = result.error_notice
if sub_result.result_type == pipeline_entities.ResultType.INTERRUPT:
if result.result_type == pipeline_entities.ResultType.INTERRUPT:
self.ap.logger.debug(f'Stage {stage_container.inst_name} interrupted query {query.query_id}')
break
elif sub_result.result_type == pipeline_entities.ResultType.CONTINUE:
query = sub_result.new_query
await self._execute_from_stage(i + 1, query)
break
elif result.result_type == pipeline_entities.ResultType.CONTINUE:
query = result.new_query
elif isinstance(result, typing.AsyncGenerator): # 生成器
span_result_type = 'generator'
self.ap.logger.debug(f'Stage {stage_container.inst_name} processed query {query.query_id} gen')
async for sub_result in result:
span_result_type = str(
sub_result.result_type.value
if hasattr(sub_result.result_type, 'value')
else sub_result.result_type
)
self.ap.logger.debug(
f'Stage {stage_container.inst_name} processed query {query.query_id} res {sub_result.result_type}'
)
await self._check_output(query, sub_result)
if sub_result.error_notice:
span_status = 'error'
span_error = sub_result.error_notice
if sub_result.result_type == pipeline_entities.ResultType.INTERRUPT:
self.ap.logger.debug(
f'Stage {stage_container.inst_name} interrupted query {query.query_id}'
)
break
elif sub_result.result_type == pipeline_entities.ResultType.CONTINUE:
query = sub_result.new_query
await self._execute_from_stage(i + 1, query)
break
except Exception as e:
span_status = 'error'
span_error = str(e)
raise
finally:
trace_id = (query.variables or {}).get('_monitoring_trace_id')
root_span_id = (query.variables or {}).get('_monitoring_root_span_id')
if trace_id:
try:
await self.ap.monitoring_service.record_span(
trace_id=trace_id,
parent_span_id=root_span_id,
name=stage_container.inst_name,
kind='pipeline.stage',
status=span_status,
started_at=span_started_at,
duration=int((time.perf_counter() - span_started) * 1000),
message_id=(query.variables or {}).get('_monitoring_message_id'),
session_id=self._query_session_id(query),
bot_id=query.bot_uuid,
pipeline_id=self.pipeline_entity.uuid,
attributes={
'stage_class': stage_container.inst.__class__.__name__,
'result_type': span_result_type,
'query_id': query.query_id,
},
error_message=span_error,
)
except Exception as monitor_err:
self.ap.logger.error(f'Failed to record stage span: {monitor_err}')
i += 1
async def process_query(self, query: pipeline_query.Query):
"""处理请求"""
trace_started_at = self._utc_now()
trace_started = time.perf_counter()
root_span_id = self._new_span_id()
trace_id = None
trace_status = 'success'
# Get monitoring metadata
bot_name = query.variables.get('_monitoring_bot_name', 'Unknown')
pipeline_name = query.variables.get('_monitoring_pipeline_name', 'Unknown')
@@ -303,6 +377,28 @@ class RuntimePipeline:
except Exception as e:
self.ap.logger.error(f'Failed to record query start: {e}')
try:
trace_id = await self.ap.monitoring_service.start_trace(
name='LangBot query',
bot_id=query.bot_uuid or 'unknown',
bot_name=bot_name,
pipeline_id=self.pipeline_entity.uuid,
pipeline_name=pipeline_name,
session_id=self._query_session_id(query),
message_id=message_id or None,
query_id=query.query_id,
attributes={
'launcher_type': query.launcher_type.value
if hasattr(query.launcher_type, 'value')
else str(query.launcher_type),
'runner_name': runner_name,
},
)
query.variables['_monitoring_trace_id'] = trace_id
query.variables['_monitoring_root_span_id'] = root_span_id
except Exception as e:
self.ap.logger.error(f'Failed to start query trace: {e}')
try:
# Get bound plugins for this pipeline
bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
@@ -336,7 +432,10 @@ class RuntimePipeline:
await self._execute_from_stage(0, query)
# Record query success only if no error occurred during processing
if not query.variables.get('_monitoring_has_error', False):
has_monitoring_error = query.variables.get('_monitoring_has_error', False)
if has_monitoring_error:
trace_status = 'error'
else:
try:
await monitoring_helper.MonitoringHelper.record_query_success(
ap=self.ap,
@@ -361,6 +460,7 @@ class RuntimePipeline:
self.ap.logger.error(f'Failed to record query response: {e}')
except Exception as e:
trace_status = 'error'
inst_name = query.current_stage_name if query.current_stage_name else 'unknown'
self.ap.logger.error(f'Error processing query {query.query_id} stage={inst_name} : {e}')
self.ap.logger.error(f'Traceback: {traceback.format_exc()}')
@@ -383,6 +483,35 @@ class RuntimePipeline:
self.ap.logger.error(f'Failed to record query error: {me}')
finally:
if trace_id:
try:
duration_ms = int((time.perf_counter() - trace_started) * 1000)
await self.ap.monitoring_service.record_span(
trace_id=trace_id,
span_id=root_span_id,
name='LangBot query',
kind='pipeline.query',
status=trace_status,
started_at=trace_started_at,
duration=duration_ms,
message_id=message_id or None,
session_id=self._query_session_id(query),
bot_id=query.bot_uuid,
pipeline_id=self.pipeline_entity.uuid,
attributes={
'query_id': query.query_id,
'pipeline_name': pipeline_name,
'runner_name': runner_name,
},
)
await self.ap.monitoring_service.finish_trace(
trace_id=trace_id,
status=trace_status,
duration=duration_ms,
message_id=message_id or None,
)
except Exception as monitor_err:
self.ap.logger.error(f'Failed to finish query trace: {monitor_err}')
self.ap.logger.debug(f'Query {query.query_id} processed')
del self.ap.query_pool.cached_queries[query.query_id]
+3 -54
View File
@@ -7,7 +7,6 @@ from .. import stage
import langbot_plugin.api.entities.builtin.platform.message as platform_message
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.events as events
@@ -24,50 +23,6 @@ class ResponseWrapper(stage.PipelineStage):
async def initialize(self, pipeline_config: dict):
pass
def _is_final_assistant_message(self, result) -> bool:
"""Whether *result* is the agent's final, tool-call-free answer.
Intermediate streaming chunks and tool-call rounds must NOT trigger
outbound attachment collection only the terminal assistant message.
"""
if getattr(result, 'role', None) != 'assistant':
return False
if result.tool_calls:
return False
if isinstance(result, provider_message.MessageChunk):
return bool(result.is_final)
return True
async def _append_outbound_attachments(
self,
query: pipeline_query.Query,
message_chain: platform_message.MessageChain,
) -> None:
"""Collect sandbox outbox files and append them to *message_chain*.
Runs at most once per query (guarded by a query variable) and never
raises into the pipeline attachment delivery is best-effort.
"""
if query.variables.get('_sandbox_outbound_collected'):
return
box_service = getattr(self.ap, 'box_service', None)
if box_service is None or not getattr(box_service, 'available', False):
return
query.variables['_sandbox_outbound_collected'] = True
try:
attachments = await box_service.collect_outbound_attachments(query)
except Exception as e:
self.ap.logger.warning(f'Outbound attachment collection failed: {e}')
return
for att in attachments:
att_type = att.get('type')
if att_type == 'Image':
message_chain.append(platform_message.Image(base64=att['base64']))
elif att_type == 'Voice':
message_chain.append(platform_message.Voice(base64=att['base64']))
else:
message_chain.append(platform_message.File(name=att.get('name', 'file'), base64=att['base64']))
async def process(
self,
query: pipeline_query.Query,
@@ -128,16 +83,10 @@ class ResponseWrapper(stage.PipelineStage):
)
else:
if event_ctx.event.reply_message_chain is not None:
reply_chain = event_ctx.event.reply_message_chain
query.resp_message_chain.append(event_ctx.event.reply_message_chain)
else:
reply_chain = result.get_content_platform_message_chain()
# Attach files the agent produced in the sandbox
# outbox, but only on the terminal assistant message.
if self._is_final_assistant_message(result):
await self._append_outbound_attachments(query, reply_chain)
query.resp_message_chain.append(reply_chain)
query.resp_message_chain.append(result.get_content_platform_message_chain())
yield entities.StageProcessResult(
result_type=entities.ResultType.CONTINUE,
@@ -312,18 +312,12 @@ class WebSocketAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter)
async def _process_image_components(self, message_chain_obj: list):
"""
处理消息链中的图片语音和文件组件 path 转换为 base64
Image / Voice / File components uploaded from the web client carry a
storage key in ``path``. Resolve it to a base64 data URI so downstream
stages (multimodal LLM input and the Box sandbox inbox) have a usable
payload, then drop the now-consumed storage object.
处理消息链中的图片和文件组件path转换为base64
Args:
message_chain_obj: 消息链对象列表
"""
import base64
import mimetypes
storage_mgr = self.ap.storage_mgr
@@ -331,33 +325,31 @@ class WebSocketAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter)
comp_type = component.get('type', '')
comp_path = component.get('path', '')
if not comp_path or comp_type not in ('Image', 'Voice', 'File'):
if not comp_path:
continue
try:
file_content = await storage_mgr.storage_provider.load(comp_path)
base64_str = base64.b64encode(file_content).decode('utf-8')
if comp_type == 'Image':
try:
file_content = await storage_mgr.storage_provider.load(comp_path)
base64_str = base64.b64encode(file_content).decode('utf-8')
lowered = comp_path.lower()
if comp_type == 'Image':
if lowered.endswith(('.jpg', '.jpeg')):
file_key = comp_path
if file_key.lower().endswith(('.jpg', '.jpeg')):
mime_type = 'image/jpeg'
elif lowered.endswith('.gif'):
elif file_key.lower().endswith('.png'):
mime_type = 'image/png'
elif file_key.lower().endswith('.gif'):
mime_type = 'image/gif'
elif lowered.endswith('.webp'):
elif file_key.lower().endswith('.webp'):
mime_type = 'image/webp'
else:
mime_type = 'image/png'
elif comp_type == 'Voice':
mime_type = mimetypes.guess_type(comp_path)[0] or 'audio/wav'
else: # File
mime_type = mimetypes.guess_type(comp_path)[0] or 'application/octet-stream'
component['base64'] = f'data:{mime_type};base64,{base64_str}'
await storage_mgr.storage_provider.delete(comp_path)
component['path'] = ''
except Exception as e:
await self.logger.error(f'Failed to load {comp_type} file {comp_path}: {e}')
component['base64'] = f'data:{mime_type};base64,{base64_str}'
await storage_mgr.storage_provider.delete(comp_path)
component['path'] = ''
except Exception as e:
await self.logger.error(f'Failed to load image file {comp_path}: {e}')
async def handle_websocket_message(
self,
+12 -1
View File
@@ -711,8 +711,19 @@ class PluginRuntimeConnector(ManagedRuntimeConnector):
endpoint: str,
method: str,
body: Any = None,
caller: dict[str, Any] | None = None,
headers: dict[str, str] | None = None,
) -> dict[str, Any]:
return await self.handler.handle_page_api(plugin_author, plugin_name, page_id, endpoint, method, body)
return await self.handler.handle_page_api(
plugin_author,
plugin_name,
page_id,
endpoint,
method,
body,
caller,
headers or {},
)
async def get_debug_info(self) -> dict[str, Any]:
"""Get debug information including debug key and WS URL"""
+19
View File
@@ -755,6 +755,21 @@ class RuntimeConnectionHandler(handler.Handler):
'session_name': session_name,
'bot_uuid': query.bot_uuid or '',
'sender_id': str(query.sender_id),
'_trace_context': {
'trace_id': query.variables.get('_monitoring_trace_id') if query.variables else None,
'parent_span_id': query.variables.get('_monitoring_root_span_id')
if query.variables
else None,
'message_id': query.variables.get('_monitoring_message_id') if query.variables else None,
'query_id': query.query_id,
'session_id': session_name,
'bot_id': query.bot_uuid or '',
'pipeline_id': query.pipeline_uuid or '',
'knowledge_base_id': kb_id,
'attributes': {
'source': 'plugin-api',
},
},
},
)
results = [entry.model_dump(mode='json') for entry in entries]
@@ -1011,6 +1026,8 @@ class RuntimeConnectionHandler(handler.Handler):
endpoint: str,
method: str,
body: Any = None,
caller: dict[str, Any] | None = None,
headers: dict[str, str] | None = None,
) -> dict[str, Any]:
"""Forward a page API call to the plugin via runtime."""
result = await self.call_action(
@@ -1022,6 +1039,8 @@ class RuntimeConnectionHandler(handler.Handler):
'endpoint': endpoint,
'method': method,
'body': body,
'caller': caller,
'headers': headers or {},
},
timeout=30,
)
+1 -11
View File
@@ -1,6 +1,5 @@
from __future__ import annotations
import asyncio
import sqlalchemy
import traceback
@@ -85,17 +84,8 @@ class ModelManager:
self.ap.logger.info('LangBot Space Models service is disabled, skipping sync.')
return
sync_timeout = space_config.get('models_sync_timeout')
try:
if sync_timeout:
await asyncio.wait_for(
self.sync_new_models_from_space(),
timeout=float(sync_timeout),
)
else:
await self.sync_new_models_from_space()
except asyncio.TimeoutError:
self.ap.logger.warning(f'LangBot Space model sync timed out after {sync_timeout}s, skipping startup sync.')
await self.sync_new_models_from_space()
except Exception as e:
self.ap.logger.warning('Failed to sync new models from LangBot Space, model list may not be updated.')
self.ap.logger.warning(f' - Error: {e}')
@@ -3,6 +3,7 @@ from __future__ import annotations
import abc
import typing
import time
import datetime
from ...core import app
from ...entity.persistence import model as persistence_model
@@ -16,6 +17,15 @@ LLM_USAGE_QUERY_VARIABLE = '_llm_usage'
STREAM_USAGE_QUERY_VARIABLE = '_stream_usage'
def _utc_now() -> datetime.datetime:
return datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
def _query_session_id(query: pipeline_query.Query) -> str:
launcher_type = query.launcher_type.value if hasattr(query.launcher_type, 'value') else str(query.launcher_type)
return f'{launcher_type}_{query.launcher_id}'
def _store_llm_usage(query: pipeline_query.Query | None, usage_info: dict | None) -> None:
"""Store the latest provider usage on the query for upstream action handlers."""
if query is None or not usage_info:
@@ -59,6 +69,7 @@ class RuntimeProvider:
"""Bridge method for invoking LLM with monitoring"""
# Start timing for monitoring
start_time = time.time()
span_started_at = _utc_now()
input_tokens = 0
output_tokens = 0
status = 'success'
@@ -125,6 +136,30 @@ class RuntimeProvider:
error_message=error_message,
message_id=message_id,
)
trace_id = query.variables.get('_monitoring_trace_id') if query.variables else None
parent_span_id = query.variables.get('_monitoring_root_span_id') if query.variables else None
if trace_id:
await self.requester.ap.monitoring_service.record_span(
trace_id=trace_id,
parent_span_id=parent_span_id,
name=f'LLM {model.model_entity.name}',
kind='model.llm',
status=status,
started_at=span_started_at,
duration=duration_ms,
message_id=message_id,
session_id=_query_session_id(query),
bot_id=query.bot_uuid,
pipeline_id=query.pipeline_uuid,
attributes={
'model_name': model.model_entity.name,
'input_tokens': input_tokens,
'output_tokens': output_tokens,
'total_tokens': input_tokens + output_tokens,
'stream': False,
},
error_message=error_message,
)
except Exception as monitor_err:
self.requester.ap.logger.error(f'[Monitoring] Failed to record LLM call: {monitor_err}')
@@ -140,6 +175,7 @@ class RuntimeProvider:
"""Bridge method for invoking LLM stream with monitoring"""
# Start timing for monitoring
start_time = time.time()
span_started_at = _utc_now()
status = 'success'
error_message = None
input_tokens = 0
@@ -204,6 +240,30 @@ class RuntimeProvider:
error_message=error_message,
message_id=message_id,
)
trace_id = query.variables.get('_monitoring_trace_id') if query.variables else None
parent_span_id = query.variables.get('_monitoring_root_span_id') if query.variables else None
if trace_id:
await self.requester.ap.monitoring_service.record_span(
trace_id=trace_id,
parent_span_id=parent_span_id,
name=f'LLM stream {model.model_entity.name}',
kind='model.llm',
status=status,
started_at=span_started_at,
duration=duration_ms,
message_id=message_id,
session_id=_query_session_id(query),
bot_id=query.bot_uuid,
pipeline_id=query.pipeline_uuid,
attributes={
'model_name': model.model_entity.name,
'input_tokens': input_tokens,
'output_tokens': output_tokens,
'total_tokens': input_tokens + output_tokens,
'stream': True,
},
error_message=error_message,
)
except Exception as monitor_err:
self.requester.ap.logger.error(f'[Monitoring] Failed to record LLM stream call: {monitor_err}')
@@ -216,22 +216,11 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
content = msg_dict.get('content')
if isinstance(content, list):
converted_parts = []
for part in content:
if isinstance(part, dict) and part.get('type') == 'image_base64':
part['image_url'] = {'url': part['image_base64']}
part['type'] = 'image_url'
del part['image_base64']
# OpenAI-compatible chat models reject non-image file parts
# (audio/document base64 or url). These originate from Voice /
# File attachments — including ones replayed from conversation
# history — and the agent already accesses their bytes via the
# sandbox. Drop them from the model payload to avoid
# "Invalid user message ... invalid content type=file_base64".
if isinstance(part, dict) and part.get('type') in ('file_base64', 'file_url'):
continue
converted_parts.append(part)
msg_dict['content'] = converted_parts
req_messages.append(msg_dict)
@@ -392,17 +381,6 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
elif not isinstance(arguments, str):
arguments = str(arguments)
# Some OpenAI-compatible providers (notably Ollama's
# /v1/chat/completions) stream a tool-call delta with an `index` and
# a `function` payload but never emit an OpenAI-style `id`. Without
# an id the call used to be dropped here, so the whole tool call
# silently vanished: a tool-only turn then yielded no content and no
# tool call, the stream "completed" with 0 chars, and the chat
# appeared stuck. Synthesize a stable per-index id so named-but-idless
# tool calls survive. Providers that do send ids keep theirs.
if not state['id'] and state['name']:
state['id'] = f'call_{index}'
if not state['id'] or not state['name']:
continue
@@ -3,8 +3,8 @@ kind: LLMAPIRequester
metadata:
name: moonshot-chat-completions
label:
en_US: Moonshot / Kimi (Global · api.moonshot.ai)
zh_Hans: 月之暗面 / Kimi(国际站 · api.moonshot.ai
en_US: Moonshot
zh_Hans: 月之暗面
icon: moonshot.png
spec:
litellm_provider: openai
@@ -1,33 +0,0 @@
apiVersion: v1
kind: LLMAPIRequester
metadata:
name: moonshot-cn-chat-completions
label:
en_US: Moonshot / Kimi (China · api.moonshot.cn)
zh_Hans: 月之暗面 / Kimi(国内站 · api.moonshot.cn
icon: moonshot.png
spec:
litellm_provider: openai
config:
- name: base_url
label:
en_US: Base URL
zh_Hans: 基础 URL
type: string
required: true
default: https://api.moonshot.cn/v1
- name: timeout
label:
en_US: Timeout
zh_Hans: 超时时间
type: integer
required: true
default: 120
alias: "moonshot Moonshot 月之暗面 月暗 kimi Kimi 月之 暗面 moonshot-v1 k2 cn 国内 国内站"
support_type:
- llm
provider_category: manufacturer
execution:
python:
path: ./moonshotchatcmpl.py
attr: MoonshotChatCompletions
+16 -69
View File
@@ -104,68 +104,6 @@ class _StreamAccumulator:
class LocalAgentRunner(runner.RequestRunner):
"""Local agent request runner"""
async def _inject_inbound_attachments(
self,
query: pipeline_query.Query,
user_message: provider_message.Message,
) -> None:
"""Persist inbound attachments into the sandbox and tell the model.
No-op when the box service is unavailable or there are no attachments.
On success, appends an extra text ContentElement to the user message
listing the in-sandbox paths and the outbox convention, and stashes the
descriptors in ``query.variables['_sandbox_inbound_attachments']``.
"""
box_service = getattr(self.ap, 'box_service', None)
if box_service is None or not getattr(box_service, 'available', False):
return
try:
attachments = await box_service.materialize_inbound_attachments(query)
except Exception as e: # never break the chat turn over attachment IO
self.ap.logger.warning(f'Inbound attachment materialization failed: {e}')
return
if not attachments:
return
query.variables['_sandbox_inbound_attachments'] = attachments
lines = [
'The user sent attachments. They have been saved into the sandbox and are '
'available to the exec/read/write tools at these paths:'
]
for att in attachments:
lines.append(f'- {att["type"]}: {att["path"]} ({att["size"]} bytes)')
outbox_dir = f'{box_service.OUTBOX_MOUNT_DIR}/{query.query_id}'
lines.append(
'If you produce any file (image, audio, document, etc.) that should be sent '
f'back to the user, write it into {outbox_dir}/ (create the directory if '
'needed). Every file placed there will be delivered to the user automatically.'
)
note = '\n'.join(lines)
# Voice/File attachments are now available to the agent via the sandbox
# (exec/read/write tools). Their raw bytes must NOT be forwarded to the
# chat model as multimodal content: providers reject non-image file
# parts ("Invalid user message ... ensure all user messages are valid
# OpenAI chat completion messages"). Strip those content elements and
# rely on the sandbox-path note instead. Images are kept so vision
# models can still see them.
_model_unsafe_types = {'file_base64', 'file_url'}
if isinstance(user_message.content, list):
user_message.content = [
ce for ce in user_message.content if getattr(ce, 'type', None) not in _model_unsafe_types
]
if isinstance(user_message.content, str):
user_message.content = [
provider_message.ContentElement.from_text(user_message.content),
provider_message.ContentElement.from_text(note),
]
elif isinstance(user_message.content, list):
user_message.content.append(provider_message.ContentElement.from_text(note))
else:
user_message.content = [provider_message.ContentElement.from_text(note)]
def _build_request_messages(
self,
query: pipeline_query.Query,
@@ -177,7 +115,7 @@ class LocalAgentRunner(runner.RequestRunner):
req_messages.append(
provider_message.Message(
role='system',
content=self.ap.box_service.get_system_guidance(query.query_id),
content=self.ap.box_service.get_system_guidance(),
)
)
@@ -294,12 +232,6 @@ class LocalAgentRunner(runner.RequestRunner):
user_message = copy.deepcopy(query.user_message)
# Materialize inbound attachments (images / voices / files) into the
# sandbox so the agent's exec/read/write tools can operate on the real
# bytes — not just the multimodal copy the model sees. The exact
# in-sandbox paths are announced to the model as a system note.
await self._inject_inbound_attachments(query, user_message)
user_message_text = ''
if isinstance(user_message.content, str):
@@ -336,6 +268,21 @@ class LocalAgentRunner(runner.RequestRunner):
'bot_uuid': query.bot_uuid or '',
'sender_id': str(query.sender_id),
'session_name': f'{query.session.launcher_type.value}_{query.session.launcher_id}',
'_trace_context': {
'trace_id': query.variables.get('_monitoring_trace_id') if query.variables else None,
'parent_span_id': query.variables.get('_monitoring_root_span_id')
if query.variables
else None,
'message_id': query.variables.get('_monitoring_message_id') if query.variables else None,
'query_id': query.query_id,
'session_id': f'{query.launcher_type.value}_{query.launcher_id}',
'bot_id': query.bot_uuid or '',
'pipeline_id': query.pipeline_uuid or '',
'knowledge_base_id': kb_uuid,
'attributes': {
'source': 'local-agent',
},
},
},
)
@@ -1,18 +0,0 @@
from __future__ import annotations
from typing import Any
async def is_box_backend_available(ap: Any) -> bool:
"""Return whether the configured Box backend is ready for tool execution."""
box_service = getattr(ap, 'box_service', None)
if box_service is None:
return False
if not getattr(box_service, 'available', False):
return False
try:
status = await box_service.get_status()
backend_info = status.get('backend', {})
return bool(backend_info.get('available', False))
except Exception:
return False
@@ -5,8 +5,6 @@ import asyncio
import os
import shutil
import shlex
import threading
from contextlib import suppress
from typing import TYPE_CHECKING, Any
import pydantic
@@ -20,26 +18,12 @@ from ....box.workspace import (
rewrite_mounted_path,
rewrite_venv_command,
unwrap_venv_path,
wrap_python_command_with_env,
)
if TYPE_CHECKING:
from .mcp import RuntimeMCPSession
_WORKSPACE_COPY_LOCKS: dict[str, threading.Lock] = {}
_WORKSPACE_COPY_LOCKS_GUARD = threading.Lock()
def _workspace_copy_lock(path: str) -> threading.Lock:
with _WORKSPACE_COPY_LOCKS_GUARD:
lock = _WORKSPACE_COPY_LOCKS.get(path)
if lock is None:
lock = threading.Lock()
_WORKSPACE_COPY_LOCKS[path] = lock
return lock
class MCPSessionErrorPhase(enum.Enum):
"""Which phase of the MCP lifecycle failed."""
@@ -65,7 +49,7 @@ class MCPServerBoxConfig(pydantic.BaseModel):
host_path: str | None = None
host_path_mode: str = 'ro' # MCP servers default to read-write mount only when explicitly requested
env: dict[str, str] = pydantic.Field(default_factory=dict)
startup_timeout_sec: int = 300 # First Docker bootstrap may need to build a venv and install MCP deps.
startup_timeout_sec: int = 120 # Longer default to allow dependency bootstrap
cpus: float | None = None
memory_mb: int | None = None
pids_limit: int | None = None
@@ -144,7 +128,6 @@ class BoxStdioSessionRuntime:
workspace = self._build_workspace(host_path=None)
host_path = self.resolve_host_path()
process_cwd = '/workspace'
install_cmd: str | None = None
try:
await workspace.create_session()
@@ -185,8 +168,6 @@ class BoxStdioSessionRuntime:
env=self.server_config.get('env', {}),
cwd=process_cwd,
)
if install_cmd:
payload = self._wrap_process_payload_with_python_env(payload, process_cwd)
payload['process_id'] = self.process_id
await workspace.box_service.start_managed_process(workspace.session_id, payload)
except Exception:
@@ -272,42 +253,14 @@ class BoxStdioSessionRuntime:
@staticmethod
def _copy_workspace_tree(source_path: str, process_host_root: str, process_host_workspace: str) -> None:
# Docker-backed bootstrap writes root-owned runtime directories such as
# .venv/.tmp into the staged workspace. The host process may not be able
# to delete them, so refresh source files in place and preserve runtime
# directories instead of rmtree'ing the whole staging root.
with _workspace_copy_lock(process_host_root):
preserved_names = {'.venv', 'venv', 'env', '.cache', '.tmp', '.langbot'}
os.makedirs(process_host_workspace, exist_ok=True)
for name in os.listdir(process_host_workspace):
if name in preserved_names:
continue
path = os.path.join(process_host_workspace, name)
if os.path.isdir(path) and not os.path.islink(path):
shutil.rmtree(path, ignore_errors=True)
else:
# The entry may disappear between listdir and unlink if cleanup races us.
with suppress(FileNotFoundError):
os.unlink(path)
shutil.copytree(
source_path,
process_host_workspace,
symlinks=True,
dirs_exist_ok=True,
ignore=shutil.ignore_patterns(
'.git',
'__pycache__',
'.pytest_cache',
'.mypy_cache',
'.ruff_cache',
'.venv',
'venv',
'env',
'.cache',
'.tmp',
'.langbot',
),
)
shutil.rmtree(process_host_root, ignore_errors=True)
os.makedirs(process_host_root, exist_ok=True)
shutil.copytree(
source_path,
process_host_workspace,
symlinks=True,
ignore=shutil.ignore_patterns('.git', '__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache'),
)
async def _cleanup_staged_workspace(self) -> None:
if not self.resolve_host_path():
@@ -390,25 +343,23 @@ class BoxStdioSessionRuntime:
@staticmethod
def detect_install_command(host_path: str, workspace_path: str = '/workspace') -> str | None:
workspace_kind = classify_python_workspace(host_path)
if workspace_kind in {'package', 'requirements'}:
return wrap_python_command_with_env('python -c "pass"', mount_path=workspace_path).rstrip()
quoted_workspace_path = shlex.quote(workspace_path)
if workspace_kind == 'package':
return (
'mkdir -p /opt/_lb_src'
f' && tar -C {quoted_workspace_path}'
' --exclude=.venv --exclude=.git --exclude=__pycache__'
' --exclude=node_modules --exclude=.tox --exclude=.nox'
' --exclude="*.egg-info" --exclude=.uv-cache'
' -cf - .'
' | tar -C /opt/_lb_src -xf -'
' && pip install --no-cache-dir /opt/_lb_src'
' && rm -rf /opt/_lb_src'
)
if workspace_kind == 'requirements':
return f'pip install --no-cache-dir -r {quoted_workspace_path}/requirements.txt'
return None
@staticmethod
def _wrap_process_payload_with_python_env(payload: dict[str, Any], workspace_path: str) -> dict[str, Any]:
"""Start a prepared Python workspace without writing bootstrap output to MCP stdio."""
workspace_root = workspace_path.rstrip('/') or '/workspace'
venv_dir = f'{workspace_root}/.venv'
venv_bin = f'{venv_dir}/bin'
command = ' '.join([shlex.quote(payload['command']), *[shlex.quote(arg) for arg in payload.get('args', [])]])
wrapped = dict(payload)
wrapped['command'] = 'sh'
wrapped['args'] = [
'-lc',
(f'export VIRTUAL_ENV={shlex.quote(venv_dir)}; export PATH={shlex.quote(venv_bin)}:$PATH; exec {command}'),
]
return wrapped
def build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict[str, Any]:
workspace = self._build_workspace()
workspace.session_id = session_id
+71 -532
View File
@@ -1,6 +1,5 @@
from __future__ import annotations
import base64
import json
import os
@@ -9,7 +8,6 @@ from langbot_plugin.api.entities.events import pipeline_query
from .. import loader
from ..errors import ToolNotFoundError
from .availability import is_box_backend_available
from . import skill as skill_loader
EXEC_TOOL_NAME = 'exec'
@@ -24,15 +22,6 @@ _ALL_TOOL_NAMES = {EXEC_TOOL_NAME, READ_TOOL_NAME, WRITE_TOOL_NAME, EDIT_TOOL_NA
# Skip these dirs during grep walk to avoid noise
_SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', 'dist', 'build'}
_DEFAULT_READ_MAX_LINES = 2000
_MAX_READ_MAX_LINES = 10000
_DEFAULT_TOOL_RESULT_MAX_BYTES = 50 * 1024
_BOX_FILE_SCRIPT_MAX_BYTES = 2048
_GLOB_MAX_MATCHES = 100
_GREP_MAX_MATCHES = 200
_GREP_MAX_FILES = 5000
_GREP_MAX_LINE_CHARS = 500
class NativeToolLoader(loader.ToolLoader):
def __init__(self, ap):
@@ -54,7 +43,18 @@ class NativeToolLoader(loader.ToolLoader):
async def _check_backend_available(self) -> bool:
"""Check if the box backend is truly available (not just the runtime)."""
return await is_box_backend_available(self.ap)
box_service = getattr(self.ap, 'box_service', None)
if box_service is None:
return False
if not getattr(box_service, 'available', False):
return False
# Check if backend is truly available via get_status
try:
status = await box_service.get_status()
backend_info = status.get('backend', {})
return backend_info.get('available', False)
except Exception:
return False
async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
if not self._is_sandbox_available():
@@ -139,7 +139,6 @@ class NativeToolLoader(loader.ToolLoader):
# via execute_tool. Skills are mounted at /workspace/.skills/{name}/
# via extra_mounts built by BoxService.
result = await self.ap.box_service.execute_tool(parameters, query)
result = self._normalize_exec_result(result)
if selected_skill is not None:
self._refresh_skill_from_disk(selected_skill)
@@ -228,121 +227,34 @@ class NativeToolLoader(loader.ToolLoader):
except Exception:
return {'ok': False, 'error': stdout or 'Box file operation returned no result'}
async def _read_workspace_via_box(self, path: str, parameters: dict, query: pipeline_query.Query) -> dict:
offset = self._positive_int(parameters.get('offset'), default=1)
byte_offset = self._non_negative_int(parameters.get('byte_offset'), default=0)
max_lines = self._positive_int(
parameters.get('limit'),
default=_DEFAULT_READ_MAX_LINES,
max_value=_MAX_READ_MAX_LINES,
)
# Box file fallback returns through exec stdout, which is already capped
# by BoxService. Keep this payload small enough to remain valid JSON.
max_bytes = min(
self._positive_int(parameters.get('max_bytes'), default=_DEFAULT_TOOL_RESULT_MAX_BYTES),
_BOX_FILE_SCRIPT_MAX_BYTES,
)
encoding = self._read_encoding(parameters)
async def _read_workspace_via_box(self, path: str, query: pipeline_query.Query) -> dict:
script = f"""
import base64, json, os
import json, os
path = {json.dumps(path)}
offset = {offset}
byte_offset = {byte_offset}
max_lines = {max_lines}
max_bytes = {max_bytes}
encoding = {json.dumps(encoding)}
if not path.startswith('/workspace'):
print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
elif not os.path.exists(path):
print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}}))
elif os.path.isdir(path):
entries = sorted(os.listdir(path))
content = '\\n'.join(entries)
print(json.dumps({{'ok': True, 'content': content, 'is_directory': True, 'total': len(entries), 'truncated': False}}))
elif encoding == 'base64':
size_bytes = os.path.getsize(path)
with open(path, 'rb') as f:
f.seek(byte_offset)
data = f.read(max_bytes + 1)
chunk = data[:max_bytes]
has_more = len(data) > max_bytes
print(json.dumps({{
'ok': True,
'content': base64.b64encode(chunk).decode('ascii'),
'encoding': 'base64',
'byte_offset': byte_offset,
'length': len(chunk),
'size_bytes': size_bytes,
'has_more': has_more,
'next_byte_offset': byte_offset + len(chunk) if has_more else None,
'max_bytes': max_bytes,
}}))
print(json.dumps({{'ok': True, 'content': '\\n'.join(sorted(os.listdir(path))), 'is_directory': True}}))
else:
lines = []
output_bytes = 0
end_line = offset - 1
truncated = False
next_offset = None
with open(path, 'r', encoding='utf-8', errors='replace') as f:
for line_number, line in enumerate(f, 1):
if line_number < offset:
continue
if len(lines) >= max_lines:
truncated = True
next_offset = line_number
break
line_bytes = len(line.encode('utf-8'))
if output_bytes + line_bytes > max_bytes:
truncated = True
next_offset = line_number
break
lines.append(line.rstrip('\\n'))
output_bytes += line_bytes
end_line = line_number
print(json.dumps({{
'ok': True,
'content': '\\n'.join(lines),
'truncated': truncated,
'start_line': offset,
'end_line': end_line,
'next_offset': next_offset,
'max_lines': max_lines,
'max_bytes': max_bytes,
}}))
print(json.dumps({{'ok': True, 'content': f.read()}}))
""".strip()
return await self._run_workspace_file_script(script, query)
async def _write_workspace_via_box(
self,
path: str,
content: str,
parameters: dict,
query: pipeline_query.Query,
) -> dict:
encoding, mode = self._write_options(parameters)
async def _write_workspace_via_box(self, path: str, content: str, query: pipeline_query.Query) -> dict:
script = f"""
import base64, json, os
import json, os
path = {json.dumps(path)}
content = {json.dumps(content)}
encoding = {json.dumps(encoding)}
mode = {json.dumps(mode)}
if not path.startswith('/workspace'):
print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
else:
os.makedirs(os.path.dirname(path) or '/workspace', exist_ok=True)
if encoding == 'base64':
try:
data = base64.b64decode(content, validate=True)
except Exception as exc:
print(json.dumps({{'ok': False, 'error': f'invalid base64 content: {{exc}}'}}))
else:
with open(path, 'ab' if mode == 'append' else 'wb') as f:
f.write(data)
print(json.dumps({{'ok': True, 'path': path}}))
else:
with open(path, 'a' if mode == 'append' else 'w', encoding='utf-8') as f:
f.write(content)
print(json.dumps({{'ok': True, 'path': path}}))
with open(path, 'w', encoding='utf-8') as f:
f.write(content)
print(json.dumps({{'ok': True, 'path': path}}))
""".strip()
return await self._run_workspace_file_script(script, query)
@@ -395,27 +307,12 @@ else:
if not any(part in skip_dirs for part in item.parts)
]
hits.sort(key=lambda item: item.stat().st_mtime if item.exists() else 0, reverse=True)
shown = hits[:{_GLOB_MAX_MATCHES}]
shown = hits[:100]
matches = []
output_bytes = 0
truncated_by_bytes = False
for item in shown:
rel = os.path.relpath(str(item), path)
sandbox_path = os.path.join(path, rel).replace(os.sep, '/')
entry_bytes = len(sandbox_path.encode('utf-8')) + (1 if matches else 0)
if output_bytes + entry_bytes > {_DEFAULT_TOOL_RESULT_MAX_BYTES}:
truncated_by_bytes = True
break
matches.append(sandbox_path)
output_bytes += entry_bytes
print(json.dumps({{
'ok': True,
'matches': matches,
'preview': '\\n'.join(matches),
'total': len(hits),
'truncated': len(hits) > len(matches) or truncated_by_bytes,
'truncated_by': 'bytes' if truncated_by_bytes else ('matches' if len(hits) > len(matches) else None),
}}))
matches.append(os.path.join(path, rel).replace(os.sep, '/'))
print(json.dumps({{'ok': True, 'matches': matches, 'total': len(hits), 'truncated': len(hits) > 100}}))
""".strip()
return await self._run_workspace_file_script(script, query)
@@ -453,54 +350,29 @@ else:
continue
if item.is_file():
files.append(item)
if len(files) >= {_GREP_MAX_FILES}:
if len(files) >= 5000:
break
matches = []
output_bytes = 0
truncated_by = None
for fp in files:
try:
handle = fp.open('r', encoding='utf-8', errors='ignore')
text = fp.read_text(errors='ignore')
except OSError:
continue
with handle:
for lineno, line in enumerate(handle, 1):
if regex.search(line):
if base.is_file():
file_path = path
else:
rel = os.path.relpath(str(fp), path)
file_path = os.path.join(path, rel).replace(os.sep, '/')
content = line.rstrip()
line_truncated = False
if len(content) > {_GREP_MAX_LINE_CHARS}:
content = content[:{_GREP_MAX_LINE_CHARS}] + '... [truncated]'
line_truncated = True
entry = {{'file': file_path, 'line': lineno, 'content': content}}
entry_bytes = len(json.dumps(entry, ensure_ascii=False).encode('utf-8')) + 1
if output_bytes + entry_bytes > {_DEFAULT_TOOL_RESULT_MAX_BYTES}:
truncated_by = 'bytes'
break
if line_truncated and truncated_by is None:
truncated_by = 'line'
matches.append(entry)
output_bytes += entry_bytes
if len(matches) >= {_GREP_MAX_MATCHES}:
truncated_by = truncated_by or 'matches'
break
if truncated_by == 'bytes' or len(matches) >= {_GREP_MAX_MATCHES}:
break
if truncated_by == 'bytes' or len(matches) >= {_GREP_MAX_MATCHES}:
for lineno, line in enumerate(text.splitlines(), 1):
if regex.search(line):
if base.is_file():
file_path = path
else:
rel = os.path.relpath(str(fp), path)
file_path = os.path.join(path, rel).replace(os.sep, '/')
matches.append({{'file': file_path, 'line': lineno, 'content': line.rstrip()}})
if len(matches) >= 200:
break
if len(matches) >= 200:
break
print(json.dumps({{
'ok': True,
'matches': matches,
'total': len(matches),
'truncated': truncated_by is not None,
'truncated_by': truncated_by,
}}))
print(json.dumps({{'ok': True, 'matches': matches, 'total': len(matches), 'truncated': len(matches) >= 200}}))
""".strip()
return await self._run_workspace_file_script(script, query)
@@ -515,20 +387,14 @@ else:
)
if skill_request is not None and hasattr(self.ap.box_service, 'read_skill_file'):
selected_skill, relative = skill_request
host_path = self._resolve_skill_host_path(selected_skill, relative)
if host_path and os.path.exists(host_path):
if os.path.isdir(host_path):
return self._build_directory_result(os.listdir(host_path))
return self._read_text_file_preview(host_path, parameters)
try:
result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative)
return self._build_read_result_from_text(str(result.get('content', '')), parameters)
return {'ok': True, 'content': result.get('content', '')}
except Exception:
try:
result = await self.ap.box_service.list_skill_files(selected_skill['name'], relative)
entries = [entry['name'] for entry in result.get('entries', [])]
return self._build_directory_result(entries)
return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
except Exception as exc:
return {'ok': False, 'error': str(exc)}
@@ -539,19 +405,20 @@ else:
include_activated=True,
)
if self._should_use_box_workspace_files(selected_skill):
return await self._read_workspace_via_box(path, parameters, query)
return await self._read_workspace_via_box(path, query)
if not os.path.exists(host_path):
return {'ok': False, 'error': f'File not found: {path}'}
if os.path.isdir(host_path):
entries = os.listdir(host_path)
return self._build_directory_result(entries)
return self._read_text_file_preview(host_path, parameters)
return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
with open(host_path, 'r', errors='replace') as f:
content = f.read()
return {'ok': True, 'content': content}
async def _invoke_write(self, parameters: dict, query: pipeline_query.Query) -> dict:
path = parameters['path']
content = parameters['content']
self.ap.logger.info(f'write tool invoked: query_id={query.query_id} path={path} length={len(content)}')
encoding, _mode = self._write_options(parameters)
skill_request = self._resolve_skill_relative_path(
query,
path,
@@ -559,8 +426,6 @@ else:
include_activated=True,
)
if skill_request is not None and hasattr(self.ap.box_service, 'write_skill_file'):
if encoding != 'text':
return {'ok': False, 'error': 'base64 writes to skill packages are not supported.'}
selected_skill, relative = skill_request
await self.ap.box_service.write_skill_file(selected_skill['name'], relative, content)
await self.ap.skill_mgr.reload_skills()
@@ -573,12 +438,10 @@ else:
include_activated=True,
)
if self._should_use_box_workspace_files(selected_skill):
return await self._write_workspace_via_box(path, content, parameters, query)
return await self._write_workspace_via_box(path, content, query)
os.makedirs(os.path.dirname(host_path), exist_ok=True)
try:
self._write_host_file(host_path, content, parameters)
except ValueError as exc:
return {'ok': False, 'error': str(exc)}
with open(host_path, 'w', encoding='utf-8') as f:
f.write(content)
self._refresh_skill_from_disk(selected_skill)
return {'ok': True, 'path': path}
@@ -721,40 +584,6 @@ else:
'type': 'string',
'description': 'Absolute path to the file (must be under /workspace).',
},
'offset': {
'type': 'integer',
'description': '1-indexed line number to start reading from. Defaults to 1.',
'default': 1,
'minimum': 1,
},
'limit': {
'type': 'integer',
'description': f'Maximum number of lines to return. Defaults to {_DEFAULT_READ_MAX_LINES}.',
'default': _DEFAULT_READ_MAX_LINES,
'minimum': 1,
'maximum': _MAX_READ_MAX_LINES,
},
'max_bytes': {
'type': 'integer',
'description': (
f'Maximum bytes of file content to return. Defaults to {_DEFAULT_TOOL_RESULT_MAX_BYTES}.'
),
'default': _DEFAULT_TOOL_RESULT_MAX_BYTES,
'minimum': 1,
'maximum': _DEFAULT_TOOL_RESULT_MAX_BYTES,
},
'encoding': {
'type': 'string',
'description': 'Return text by default, or base64 for binary byte-range reads.',
'enum': ['text', 'base64'],
'default': 'text',
},
'byte_offset': {
'type': 'integer',
'description': '0-indexed byte offset used when encoding is base64. Defaults to 0.',
'default': 0,
'minimum': 0,
},
},
'required': ['path'],
'additionalProperties': False,
@@ -780,19 +609,7 @@ else:
},
'content': {
'type': 'string',
'description': 'Text content, or base64 content when encoding is base64.',
},
'encoding': {
'type': 'string',
'description': 'Write content as text by default, or decode it from base64 for binary files.',
'enum': ['text', 'base64'],
'default': 'text',
},
'mode': {
'type': 'string',
'description': 'Overwrite the file by default, or append to it.',
'enum': ['overwrite', 'append'],
'default': 'overwrite',
'description': 'Content to write to the file.',
},
},
'required': ['path', 'content'],
@@ -923,30 +740,22 @@ else:
hits.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0, reverse=True)
total = len(hits)
shown = hits[:_GLOB_MAX_MATCHES]
shown = hits[:100]
# Convert back to sandbox paths
sandbox_paths = []
output_bytes = 0
truncated_by_bytes = False
for h in shown:
rel = os.path.relpath(str(h), host_path)
sandbox_path = os.path.join(path, rel)
entry_bytes = len(sandbox_path.encode('utf-8')) + (1 if sandbox_paths else 0)
if output_bytes + entry_bytes > _DEFAULT_TOOL_RESULT_MAX_BYTES:
truncated_by_bytes = True
break
sandbox_paths.append(sandbox_path)
output_bytes += entry_bytes
return {
'ok': True,
'matches': sandbox_paths,
'preview': '\n'.join(sandbox_paths),
'total': total,
'truncated': total > len(sandbox_paths) or truncated_by_bytes,
'truncated_by': 'bytes' if truncated_by_bytes else ('matches' if total > len(sandbox_paths) else None),
}
result_lines = sandbox_paths
result = '\n'.join(result_lines)
if total > 100:
result += f'\n... ({total} matches, showing first 100)'
return {'ok': True, 'matches': result_lines, 'total': total, 'truncated': total > 100}
async def _invoke_grep(self, parameters: dict, query: pipeline_query.Query) -> dict:
pattern = parameters['pattern']
@@ -982,46 +791,32 @@ else:
files = self._grep_walk(base, include)
matches = []
output_bytes = 0
truncated_by = None
for fp in files:
try:
handle = fp.open('r', encoding='utf-8', errors='ignore')
text = fp.read_text(errors='ignore')
except OSError:
continue
with handle:
for lineno, line in enumerate(handle, 1):
if regex.search(line):
rel = os.path.relpath(str(fp), host_path)
sandbox_path = os.path.join(path, rel)
content, line_truncated = self._truncate_grep_line(line.rstrip())
entry = {
for lineno, line in enumerate(text.splitlines(), 1):
if regex.search(line):
rel = os.path.relpath(str(fp), host_path)
sandbox_path = os.path.join(path, rel)
matches.append(
{
'file': sandbox_path,
'line': lineno,
'content': content,
'content': line.rstrip(),
}
entry_bytes = len(json.dumps(entry, ensure_ascii=False).encode('utf-8')) + 1
if output_bytes + entry_bytes > _DEFAULT_TOOL_RESULT_MAX_BYTES:
truncated_by = 'bytes'
break
if line_truncated and truncated_by is None:
truncated_by = 'line'
matches.append(entry)
output_bytes += entry_bytes
if len(matches) >= _GREP_MAX_MATCHES:
truncated_by = truncated_by or 'matches'
break
if truncated_by == 'bytes' or len(matches) >= _GREP_MAX_MATCHES:
break
if truncated_by == 'bytes' or len(matches) >= _GREP_MAX_MATCHES:
)
if len(matches) >= 200:
break
if len(matches) >= 200:
break
return {
'ok': True,
'matches': matches,
'total': len(matches),
'truncated': truncated_by is not None,
'truncated_by': truncated_by,
'truncated': len(matches) >= 200,
}
@staticmethod
@@ -1033,266 +828,10 @@ else:
continue
if item.is_file():
results.append(item)
if len(results) >= _GREP_MAX_FILES:
if len(results) >= 5000:
break
return results
@staticmethod
def _resolve_skill_host_path(selected_skill: dict, relative: str) -> str | None:
package_root = str(selected_skill.get('package_root', '') or '').strip()
if not package_root:
return None
host_root = os.path.realpath(package_root)
host_path = os.path.realpath(os.path.join(host_root, relative))
if not (host_path == host_root or host_path.startswith(host_root + os.sep)):
raise ValueError('Path escapes the skill package boundary.')
return host_path
def _normalize_exec_result(self, result: dict) -> dict:
normalized = dict(result)
stdout = str(normalized.get('stdout') or '')
stderr = str(normalized.get('stderr') or '')
stdout, stdout_capped = self._truncate_text_to_bytes_with_flag(stdout, _DEFAULT_TOOL_RESULT_MAX_BYTES)
stderr, stderr_capped = self._truncate_text_to_bytes_with_flag(stderr, _DEFAULT_TOOL_RESULT_MAX_BYTES)
normalized['stdout'] = stdout
normalized['stderr'] = stderr
normalized['stdout_truncated'] = bool(normalized.get('stdout_truncated') or stdout_capped)
normalized['stderr_truncated'] = bool(normalized.get('stderr_truncated') or stderr_capped)
if stdout and stderr:
preview_raw = f'stdout:\n{stdout}\n\nstderr:\n{stderr}'
else:
preview_raw = stdout or stderr
preview, preview_capped = self._truncate_text_to_bytes_with_flag(preview_raw, _DEFAULT_TOOL_RESULT_MAX_BYTES)
normalized['preview'] = preview
normalized['truncated'] = bool(
normalized['stdout_truncated'] or normalized['stderr_truncated'] or preview_capped
)
if preview_capped and not normalized.get('truncated_by'):
normalized['truncated_by'] = 'bytes'
return normalized
def _build_directory_result(self, entries: list[str]) -> dict:
sorted_entries = sorted(str(entry) for entry in entries)
content = '\n'.join(sorted_entries)
preview = self._truncate_text_to_bytes(content, _DEFAULT_TOOL_RESULT_MAX_BYTES)
truncated = preview != content
return {
'ok': True,
'content': preview,
'is_directory': True,
'total': len(sorted_entries),
'truncated': truncated,
'truncated_by': 'bytes' if truncated else None,
}
def _read_text_file_preview(self, host_path: str, parameters: dict) -> dict:
if self._read_encoding(parameters) == 'base64':
return self._read_binary_file_chunk(host_path, parameters)
offset = self._positive_int(parameters.get('offset'), default=1)
max_lines = self._positive_int(
parameters.get('limit'),
default=_DEFAULT_READ_MAX_LINES,
max_value=_MAX_READ_MAX_LINES,
)
max_bytes = self._positive_int(
parameters.get('max_bytes'),
default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
)
lines: list[str] = []
output_bytes = 0
end_line = offset - 1
truncated = False
truncated_by: str | None = None
next_offset: int | None = None
with open(host_path, 'r', encoding='utf-8', errors='replace') as f:
for line_number, line in enumerate(f, 1):
if line_number < offset:
continue
if len(lines) >= max_lines:
truncated = True
truncated_by = 'lines'
next_offset = line_number
break
line_bytes = len(line.encode('utf-8'))
if output_bytes + line_bytes > max_bytes:
truncated = True
truncated_by = 'bytes'
next_offset = line_number
break
lines.append(line.rstrip('\n'))
output_bytes += line_bytes
end_line = line_number
if not lines and truncated_by == 'bytes':
content = (
f'[Line {next_offset or offset} exceeds the {self._format_size(max_bytes)} read limit. '
'Use exec with a byte-range command for this line, or read a different offset.]'
)
else:
content = '\n'.join(lines)
return {
'ok': True,
'content': content,
'truncated': truncated,
'truncated_by': truncated_by,
'start_line': offset,
'end_line': end_line,
'next_offset': next_offset,
'max_lines': max_lines,
'max_bytes': max_bytes,
}
def _read_binary_file_chunk(self, host_path: str, parameters: dict) -> dict:
byte_offset = self._non_negative_int(parameters.get('byte_offset'), default=0)
max_bytes = self._positive_int(
parameters.get('max_bytes'),
default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
)
size_bytes = os.path.getsize(host_path)
with open(host_path, 'rb') as f:
f.seek(byte_offset)
data = f.read(max_bytes + 1)
chunk = data[:max_bytes]
has_more = len(data) > max_bytes
return {
'ok': True,
'content': base64.b64encode(chunk).decode('ascii'),
'encoding': 'base64',
'byte_offset': byte_offset,
'length': len(chunk),
'size_bytes': size_bytes,
'has_more': has_more,
'next_byte_offset': byte_offset + len(chunk) if has_more else None,
'max_bytes': max_bytes,
}
def _write_host_file(self, host_path: str, content: str, parameters: dict) -> None:
encoding, mode = self._write_options(parameters)
if encoding == 'base64':
try:
data = base64.b64decode(content, validate=True)
except Exception as exc:
raise ValueError(f'invalid base64 content: {exc}') from exc
with open(host_path, 'ab' if mode == 'append' else 'wb') as f:
f.write(data)
return
with open(host_path, 'a' if mode == 'append' else 'w', encoding='utf-8') as f:
f.write(content)
@staticmethod
def _read_encoding(parameters: dict) -> str:
return 'base64' if parameters.get('encoding') == 'base64' else 'text'
@staticmethod
def _write_options(parameters: dict) -> tuple[str, str]:
encoding = 'base64' if parameters.get('encoding') == 'base64' else 'text'
mode = 'append' if parameters.get('mode') == 'append' else 'overwrite'
return encoding, mode
def _build_read_result_from_text(self, content: str, parameters: dict) -> dict:
offset = self._positive_int(parameters.get('offset'), default=1)
max_lines = self._positive_int(
parameters.get('limit'),
default=_DEFAULT_READ_MAX_LINES,
max_value=_MAX_READ_MAX_LINES,
)
max_bytes = self._positive_int(
parameters.get('max_bytes'),
default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
)
all_lines = content.splitlines()
start_index = offset - 1
if start_index >= len(all_lines) and all_lines:
return {'ok': False, 'error': f'Offset {offset} is beyond end of file ({len(all_lines)} lines total)'}
output_lines: list[str] = []
output_bytes = 0
truncated = False
truncated_by: str | None = None
next_offset: int | None = None
for index, line in enumerate(all_lines[start_index:], start_index + 1):
if len(output_lines) >= max_lines:
truncated = True
truncated_by = 'lines'
next_offset = index
break
line_bytes = len(line.encode('utf-8')) + (1 if output_lines else 0)
if output_bytes + line_bytes > max_bytes:
truncated = True
truncated_by = 'bytes'
next_offset = index
break
output_lines.append(line)
output_bytes += line_bytes
end_line = offset + len(output_lines) - 1
return {
'ok': True,
'content': '\n'.join(output_lines),
'truncated': truncated,
'truncated_by': truncated_by,
'start_line': offset,
'end_line': end_line,
'next_offset': next_offset,
'max_lines': max_lines,
'max_bytes': max_bytes,
}
@staticmethod
def _positive_int(value, *, default: int, max_value: int | None = None) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
parsed = default
if parsed <= 0:
parsed = default
if max_value is not None:
parsed = min(parsed, max_value)
return parsed
@staticmethod
def _non_negative_int(value, *, default: int) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
parsed = default
return parsed if parsed >= 0 else default
@staticmethod
def _truncate_grep_line(line: str) -> tuple[str, bool]:
if len(line) <= _GREP_MAX_LINE_CHARS:
return line, False
return f'{line[:_GREP_MAX_LINE_CHARS]}... [truncated]', True
@staticmethod
def _truncate_text_to_bytes(text: str, max_bytes: int) -> str:
return NativeToolLoader._truncate_text_to_bytes_with_flag(text, max_bytes)[0]
@staticmethod
def _truncate_text_to_bytes_with_flag(text: str, max_bytes: int) -> tuple[str, bool]:
data = text.encode('utf-8')
if len(data) <= max_bytes:
return text, False
truncated = data[:max_bytes]
while truncated and (truncated[-1] & 0xC0) == 0x80:
truncated = truncated[:-1]
return truncated.decode('utf-8', errors='ignore'), True
@staticmethod
def _format_size(bytes_count: int) -> str:
if bytes_count < 1024:
return f'{bytes_count}B'
return f'{bytes_count / 1024:.1f}KB'
def _summarize_parameters(self, parameters: dict) -> dict:
summary = dict(parameters)
cmd = str(summary.get('command', '')).strip()
@@ -72,45 +72,6 @@ def register_activated_skill(query: pipeline_query.Query, skill_data: dict) -> N
activated[skill_name] = skill_data
def normalize_skill_names(value: typing.Any) -> list[str]:
"""Return a de-duplicated list of non-empty skill names."""
if not isinstance(value, list):
return []
names: list[str] = []
for item in value:
skill_name = str(item or '').strip()
if skill_name and skill_name not in names:
names.append(skill_name)
return names
def get_activated_skill_names(query: pipeline_query.Query) -> list[str]:
"""Return activated skill names for callers that own persistence policy."""
return normalize_skill_names(list(get_activated_skills(query).keys()))
def restore_activated_skills(
ap: app.Application,
query: pipeline_query.Query,
skill_names: typing.Any,
) -> list[str]:
"""Restore caller-provided activated skill names into Query variables.
Persistence and state scope ownership belong to higher-level flows. This
helper only rebuilds current Query state from pipeline-visible skills, so
removed or unbound skills stay unavailable to native exec/write/edit.
"""
restored: list[str] = []
for skill_name in normalize_skill_names(skill_names):
skill_data = get_visible_skill(ap, query, skill_name)
if skill_data is None:
continue
register_activated_skill(query, skill_data)
restored.append(skill_name)
return restored
def parse_skill_mount_path(sandbox_path: str) -> tuple[str | None, str]:
normalized_path = str(sandbox_path or '/workspace').strip() or '/workspace'
if normalized_path == SKILL_MOUNT_PREFIX:
@@ -6,7 +6,6 @@ import typing
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
from .. import loader
from .availability import is_box_backend_available
# Align with Claude Code's Skill tool design:
# - activate: Activate a skill via Tool Call, returns SKILL.md content
@@ -46,7 +45,18 @@ class SkillToolLoader(loader.ToolLoader):
async def _check_sandbox_available(self) -> bool:
"""Check if the box backend is truly available (not just the runtime)."""
return await is_box_backend_available(self.ap)
box_service = getattr(self.ap, 'box_service', None)
if box_service is None:
return False
if not getattr(box_service, 'available', False):
return False
# Check if backend is truly available via get_status
try:
status = await box_service.get_status()
backend_info = status.get('backend', {})
return backend_info.get('available', False)
except Exception:
return False
async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
if not self._is_available():
@@ -82,15 +92,16 @@ class SkillToolLoader(loader.ToolLoader):
if not skill_name:
raise ValueError('skill_name is required')
from . import skill as skill_loader
skill_data = skill_loader.get_visible_skill(self.ap, query, skill_name)
skill_mgr = self.ap.skill_mgr
skill_data = skill_mgr.get_skill_by_name(skill_name)
if skill_data is None:
visible_skills = skill_loader.get_visible_skills(self.ap, query)
visible_skills = getattr(skill_mgr, 'skills', {})
available_names = ', '.join(sorted(visible_skills.keys())) or 'none'
raise ValueError(f'Skill "{skill_name}" not found. Available skills: {available_names}')
# Register activated skill for sandbox mount path resolution
from . import skill as skill_loader
skill_loader.register_activated_skill(query, skill_data)
# Return SKILL.md content as Tool Result (injects into context)
@@ -116,7 +127,6 @@ class SkillToolLoader(loader.ToolLoader):
'activated': True,
'skill_name': skill_name,
'mount_path': mount_path,
'activated_skill_names': skill_loader.get_activated_skill_names(query),
'content': result_content,
}
@@ -191,13 +201,13 @@ class SkillToolLoader(loader.ToolLoader):
return resource_tool.LLMTool(
name=ACTIVATE_SKILL_TOOL_NAME,
human_desc='Activate a skill',
description='Activate a pipeline-visible skill by name and return its instructions as a tool result.',
description=self._build_activate_tool_description(),
parameters={
'type': 'object',
'properties': {
'skill_name': {
'type': 'string',
'description': 'The skill name to activate.',
'description': 'The skill name to activate (no arguments). E.g., "pdf" or "data-analysis"',
},
},
'required': ['skill_name'],
@@ -245,3 +255,50 @@ class SkillToolLoader(loader.ToolLoader):
},
func=lambda parameters: parameters,
)
def _build_activate_tool_description(self) -> str:
"""Build tool description with embedded available_skills list."""
skill_mgr = getattr(self.ap, 'skill_mgr', None)
if skill_mgr is None:
return 'Activate a skill. No skills are currently available.'
skills = getattr(skill_mgr, 'skills', {})
if not skills:
return 'Activate a skill. No skills are currently available.'
# Build <available_skills> section
available_skills_lines = ['<available_skills>']
for skill_name, skill_data in sorted(skills.items()):
description = skill_data.get('description', '')
available_skills_lines.append('<skill>')
available_skills_lines.append(f'<name>{skill_name}</name>')
available_skills_lines.append(f'<description>{description}</description>')
available_skills_lines.append('</skill>')
available_skills_lines.append('</available_skills>')
available_skills_block = '\n'.join(available_skills_lines)
return f"""Activate a skill within the main conversation.
<skills_instructions>
When users ask you to perform tasks, check if any of the available skills
below can help complete the task more effectively. Skills provide specialized
capabilities and domain knowledge.
How to use skills:
- Invoke skills using this tool with the skill name only (no arguments)
- When you invoke a skill, you will see <command-message>
The skill is activated
</command-message>
- The skill's instructions will be provided in the tool result
- Examples:
- skill_name: "pdf" - invoke the pdf skill
- skill_name: "data-analysis" - invoke the data-analysis skill
Important:
- Only use skills listed in <available_skills> below
- Do not invoke a skill that is already running
- To create a new skill: prepare it in /workspace, then use register_skill tool
</skills_instructions>
{available_skills_block}"""
+123 -4
View File
@@ -1,10 +1,12 @@
from __future__ import annotations
import mimetypes
import os.path
import time
import traceback
import uuid
import zipfile
import io
import datetime
from typing import Any
from langbot.pkg.core import app
import sqlalchemy
@@ -25,6 +27,10 @@ class RuntimeKnowledgeBase(KnowledgeBaseInterface):
super().__init__(ap)
self.knowledge_base_entity = knowledge_base_entity
@staticmethod
def _utc_now() -> datetime.datetime:
return datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
async def initialize(self):
pass
@@ -334,6 +340,25 @@ class RuntimeKnowledgeBase(KnowledgeBaseInterface):
# are passed directly to vector_search by some plugins (e.g. LangRAG)
# and would cause empty results when the metadata field doesn't exist.
filters = settings.pop('filters', {})
trace_context = settings.pop('_trace_context', None)
host_span_started_at = self._utc_now()
host_span_started = time.perf_counter()
host_span_id = None
if trace_context and trace_context.get('trace_id'):
host_parent_span_id = trace_context.get('parent_span_id')
host_span_id = trace_context.get('rag_span_id') or f'span-{uuid.uuid4().hex[:16]}'
trace_context = {
'trace_id': trace_context.get('trace_id'),
'parent_span_id': host_span_id,
'host_parent_span_id': host_parent_span_id,
'message_id': trace_context.get('message_id'),
'query_id': trace_context.get('query_id'),
'session_id': trace_context.get('session_id'),
'bot_id': trace_context.get('bot_id'),
'pipeline_id': trace_context.get('pipeline_id'),
'knowledge_base_id': kb.uuid,
'attributes': trace_context.get('attributes') or {},
}
retrieval_context = {
'query': query,
@@ -343,13 +368,107 @@ class RuntimeKnowledgeBase(KnowledgeBaseInterface):
'creation_settings': kb.creation_settings or {},
'filters': filters,
}
if trace_context:
retrieval_context['trace_context'] = trace_context
result = await self.ap.plugin_connector.call_rag_retrieve(
plugin_id,
retrieval_context,
)
try:
result = await self.ap.plugin_connector.call_rag_retrieve(
plugin_id,
retrieval_context,
)
except Exception as e:
if trace_context:
await self._record_rag_trace_result(
trace_context=trace_context,
host_span_id=host_span_id,
started_at=host_span_started_at,
duration=int((time.perf_counter() - host_span_started) * 1000),
plugin_id=plugin_id,
result={
'results': [],
'metadata': {
'status': 'error',
'error_message': str(e),
},
},
)
raise
if trace_context:
await self._record_rag_trace_result(
trace_context=trace_context,
host_span_id=host_span_id,
started_at=host_span_started_at,
duration=int((time.perf_counter() - host_span_started) * 1000),
plugin_id=plugin_id,
result=result,
)
return result
async def _record_rag_trace_result(
self,
trace_context: dict[str, Any],
host_span_id: str | None,
started_at: datetime.datetime,
duration: int,
plugin_id: str,
result: dict[str, Any],
) -> None:
"""Persist host RAG span and plugin-provided child spans."""
trace_id = trace_context.get('trace_id')
if not trace_id:
return
metadata = result.get('metadata') if isinstance(result, dict) else {}
metadata = metadata if isinstance(metadata, dict) else {}
plugin_spans = metadata.get('trace_spans') if isinstance(metadata.get('trace_spans'), list) else []
parent_span_id = trace_context.get('parent_span_id')
host_parent_span_id = trace_context.get('host_parent_span_id')
try:
await self.ap.monitoring_service.record_span(
trace_id=trace_id,
span_id=host_span_id,
parent_span_id=host_parent_span_id,
name=f'Knowledge retrieval {self.knowledge_base_entity.name}',
kind='rag.retrieval',
status=metadata.get('status', 'success'),
started_at=started_at,
duration=duration,
message_id=trace_context.get('message_id'),
session_id=trace_context.get('session_id'),
bot_id=trace_context.get('bot_id'),
pipeline_id=trace_context.get('pipeline_id'),
attributes={
'knowledge_base_id': self.knowledge_base_entity.uuid,
'knowledge_base_name': self.knowledge_base_entity.name,
'plugin_id': plugin_id,
'returned_count': len(result.get('results', []) if isinstance(result, dict) else []),
'total_found': result.get('total_found') if isinstance(result, dict) else None,
},
error_message=metadata.get('error_message'),
)
for span in plugin_spans:
if not isinstance(span, dict):
continue
await self.ap.monitoring_service.record_span(
trace_id=trace_id,
span_id=span.get('span_id'),
parent_span_id=span.get('parent_span_id') or host_span_id or parent_span_id,
name=span.get('name') or 'RAG plugin stage',
kind=span.get('kind') or 'rag.stage',
status=span.get('status') or 'success',
started_at=started_at,
duration=span.get('duration_ms'),
message_id=trace_context.get('message_id'),
session_id=trace_context.get('session_id'),
bot_id=trace_context.get('bot_id'),
pipeline_id=trace_context.get('pipeline_id'),
attributes=span.get('attributes') if isinstance(span.get('attributes'), dict) else {},
error_message=span.get('error_message'),
)
except Exception as e:
self.ap.logger.error(f'Failed to record RAG trace spans: {e}')
async def _delete_document(self, document_id: str) -> bool:
"""Call plugin to delete document."""
kb = self.knowledge_base_entity
+65
View File
@@ -8,6 +8,7 @@ Run: uv run pytest tests/integration/api/test_monitoring.py -q
from __future__ import annotations
import datetime
import pytest
from unittest.mock import MagicMock, AsyncMock, Mock
@@ -82,6 +83,15 @@ def fake_monitoring_app():
app.monitoring_service.get_messages = AsyncMock(return_value=([{'id': 'msg-1', 'content': 'test'}], 100))
app.monitoring_service.get_llm_calls = AsyncMock(return_value=([{'id': 'llm-1'}], 50))
app.monitoring_service.get_embedding_calls = AsyncMock(return_value=([{'id': 'emb-1'}], 10))
app.monitoring_service.get_traces = AsyncMock(return_value=([{'trace_id': 'trace-1'}], 1))
app.monitoring_service.get_trace_details = AsyncMock(
side_effect=lambda trace_id: {
'found': trace_id == 'trace-1',
'trace_id': trace_id,
'trace': {'trace_id': trace_id} if trace_id == 'trace-1' else None,
'spans': [] if trace_id == 'trace-1' else None,
}
)
app.monitoring_service.get_sessions = AsyncMock(return_value=([{'session_id': 'sess-1'}], 20))
app.monitoring_service.get_errors = AsyncMock(return_value=([{'id': 'err-1'}], 2))
app.monitoring_service.get_session_analysis = AsyncMock(
@@ -222,6 +232,7 @@ class TestMonitoringAllDataEndpoint:
assert response.status_code == 200
data = await response.get_json()
assert 'overview' in data['data']
assert 'traces' in data['data']
@pytest.mark.usefixtures('mock_circular_import_chain')
@@ -246,6 +257,60 @@ class TestMonitoringDetailsEndpoints:
assert response.status_code == 200
@pytest.mark.asyncio
async def test_get_trace_details(self, quart_test_client):
"""GET /api/v1/monitoring/traces/{id}."""
response = await quart_test_client.get(
'/api/v1/monitoring/traces/trace-1', headers={'Authorization': 'Bearer test_token'}
)
assert response.status_code == 200
@pytest.mark.usefixtures('mock_circular_import_chain')
class TestMonitoringTraceEndpoints:
"""Tests for trace list and detail endpoints."""
@pytest.mark.asyncio
async def test_get_traces_forwards_filters(self, quart_test_client, fake_monitoring_app):
"""GET /api/v1/monitoring/traces forwards filters to service."""
response = await quart_test_client.get(
'/api/v1/monitoring/traces'
'?botId=bot-1'
'&pipelineId=pipeline-1'
'&sessionId=session-1'
'&status=success'
'&startTime=2026-01-01T00:00:00Z'
'&endTime=2026-01-02T00:00:00Z'
'&limit=25'
'&offset=5',
headers={'Authorization': 'Bearer test_token'},
)
assert response.status_code == 200
data = await response.get_json()
assert data['data']['traces'] == [{'trace_id': 'trace-1'}]
assert data['data']['total'] == 1
fake_monitoring_app.monitoring_service.get_traces.assert_awaited_with(
bot_ids=['bot-1'],
pipeline_ids=['pipeline-1'],
session_ids=['session-1'],
statuses=['success'],
start_time=datetime.datetime(2026, 1, 1, 0, 0),
end_time=datetime.datetime(2026, 1, 2, 0, 0),
limit=25,
offset=5,
)
@pytest.mark.asyncio
async def test_get_trace_details_not_found(self, quart_test_client):
"""GET /api/v1/monitoring/traces/{id} returns 404 when missing."""
response = await quart_test_client.get(
'/api/v1/monitoring/traces/trace-missing', headers={'Authorization': 'Bearer test_token'}
)
assert response.status_code == 404
@pytest.mark.usefixtures('mock_circular_import_chain')
class TestMonitoringFeedbackEndpoints:
@@ -104,7 +104,7 @@ class TestSQLiteMigrationUpgrade:
rev = await get_alembic_current(sqlite_engine)
assert rev is not None, 'Expected a revision after upgrade'
# Head should be the latest migration
assert rev.startswith('0005'), f'Expected head to be 0005_*, got {rev}'
assert rev.startswith('0006'), f'Expected head to be 0006_*, got {rev}'
@pytest.mark.asyncio
async def test_upgrade_idempotent(self, sqlite_engine):
@@ -144,8 +144,8 @@ class TestPostgreSQLMigrationUpgrade:
# Verify revision
rev = await get_alembic_current(postgres_engine)
assert rev is not None, 'Expected a revision after upgrade'
# Head should be the latest migration (0005 for current state)
assert rev.startswith('0005'), f'Expected head to be 0005_*, got {rev}'
# Head should be the latest migration.
assert rev.startswith('0006'), f'Expected head to be 0006_*, got {rev}'
@pytest.mark.asyncio
async def test_postgres_upgrade_idempotent(self, postgres_engine, clean_tables, clean_alembic_version):
@@ -0,0 +1,207 @@
"""Unit tests for MonitoringService trace observability."""
from __future__ import annotations
import datetime
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
import sqlalchemy
from sqlalchemy.ext.asyncio import create_async_engine
from langbot.pkg.api.http.service.monitoring import MonitoringService
from langbot.pkg.entity.persistence.base import Base
from langbot.pkg.entity.persistence import monitoring as persistence_monitoring
pytestmark = pytest.mark.asyncio
class _SQLitePersistence:
def __init__(self, engine):
self._engine = engine
def get_db_engine(self):
return self._engine
async def execute_async(self, *args, **kwargs):
async with self._engine.connect() as conn:
result = await conn.execute(*args, **kwargs)
await conn.commit()
return result
def serialize_model(self, model, data, masked_columns=None):
masked_columns = masked_columns or []
return {
column.name: getattr(data, column.name).isoformat()
if isinstance(getattr(data, column.name), datetime.datetime)
else getattr(data, column.name)
for column in model.__table__.columns
if column.name not in masked_columns
}
@pytest.fixture
async def monitoring_service(tmp_path):
engine = create_async_engine(f'sqlite+aiosqlite:///{tmp_path / "monitoring.db"}')
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
ap = SimpleNamespace(
persistence_mgr=_SQLitePersistence(engine),
instance_config=SimpleNamespace(data={'database': {'use': 'sqlite'}}),
logger=Mock(),
)
service = MonitoringService(ap)
yield service
await engine.dispose()
async def test_trace_lifecycle_records_spans_and_returns_details(monitoring_service):
started_at = datetime.datetime(2026, 1, 1, 12, 0, 0)
ended_at = started_at + datetime.timedelta(milliseconds=125)
trace_id = await monitoring_service.start_trace(
trace_id='trace-test',
name='Pipeline query',
bot_id='bot-1',
bot_name='Bot',
pipeline_id='pipeline-1',
pipeline_name='Default',
session_id='session-1',
message_id='message-1',
query_id=42,
attributes={'source': 'unit-test'},
)
assert trace_id == 'trace-test'
root_span_id = await monitoring_service.record_span(
trace_id=trace_id,
span_id='span-root',
name='Pipeline',
kind='pipeline',
status='completed',
started_at=started_at,
ended_at=ended_at,
message_id='message-1',
session_id='session-1',
bot_id='bot-1',
pipeline_id='pipeline-1',
attributes={'stage_count': 2},
)
await monitoring_service.record_span(
trace_id=trace_id,
span_id='span-rag',
parent_span_id=root_span_id,
name='RAG retrieval',
kind='rag.retrieval',
status='failed',
started_at=started_at + datetime.timedelta(seconds=1),
duration=12.7,
attributes={'top_k': 5},
error_message='vector store timeout',
)
await monitoring_service.finish_trace(
trace_id,
status='completed',
duration=250,
message_id='message-final',
attributes={'result_type': 'reply'},
)
traces, total = await monitoring_service.get_traces(
bot_ids=['bot-1'],
pipeline_ids=['pipeline-1'],
session_ids=['session-1'],
statuses=['success'],
limit=10,
offset=0,
)
assert total == 1
assert traces[0]['trace_id'] == trace_id
assert traces[0]['status'] == 'success'
assert traces[0]['message_id'] == 'message-final'
assert traces[0]['query_id'] == '42'
assert traces[0]['attributes'] == {'result_type': 'reply'}
details = await monitoring_service.get_trace_details(trace_id)
assert details['found'] is True
assert details['trace']['trace_id'] == trace_id
assert [span['span_id'] for span in details['spans']] == ['span-root', 'span-rag']
assert details['spans'][0]['status'] == 'success'
assert details['spans'][0]['duration'] == 125
assert details['spans'][0]['attributes'] == {'stage_count': 2}
assert details['spans'][1]['status'] == 'error'
assert details['spans'][1]['duration'] == 13
assert details['spans'][1]['parent_span_id'] == 'span-root'
assert details['spans'][1]['error_message'] == 'vector store timeout'
async def test_get_trace_details_returns_not_found_for_missing_trace(monitoring_service):
details = await monitoring_service.get_trace_details('trace-missing')
assert details == {'trace_id': 'trace-missing', 'found': False}
async def test_cleanup_expired_records_includes_traces_and_spans(monitoring_service):
old_time = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None) - datetime.timedelta(days=30)
recent_time = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None)
await monitoring_service.ap.persistence_mgr.execute_async(
sqlalchemy.insert(persistence_monitoring.MonitoringTrace),
[
{
'trace_id': 'trace-old',
'started_at': old_time,
'ended_at': old_time,
'duration': 10,
'status': 'success',
'name': 'Old trace',
},
{
'trace_id': 'trace-recent',
'started_at': recent_time,
'ended_at': recent_time,
'duration': 10,
'status': 'success',
'name': 'Recent trace',
},
],
)
await monitoring_service.ap.persistence_mgr.execute_async(
sqlalchemy.insert(persistence_monitoring.MonitoringSpan),
[
{
'span_id': 'span-old',
'trace_id': 'trace-old',
'name': 'Old span',
'kind': 'pipeline',
'status': 'success',
'started_at': old_time,
'ended_at': old_time,
},
{
'span_id': 'span-recent',
'trace_id': 'trace-recent',
'name': 'Recent span',
'kind': 'pipeline',
'status': 'success',
'started_at': recent_time,
'ended_at': recent_time,
},
],
)
monitoring_service._release_sqlite_space = AsyncMock()
deleted = await monitoring_service.cleanup_expired_records(retention_days=7, batch_size=1)
assert deleted['monitoring_traces'] == 1
assert deleted['monitoring_spans'] == 1
monitoring_service._release_sqlite_space.assert_awaited_once()
remaining = await monitoring_service.get_trace_details('trace-recent')
assert remaining['found'] is True
assert remaining['spans'][0]['span_id'] == 'span-recent'
@@ -0,0 +1,111 @@
"""Unit tests for monitoring trace HTTP routes."""
from __future__ import annotations
import datetime
from unittest.mock import AsyncMock, Mock
import pytest
import quart
from tests.factories import FakeApp
from tests.utils.import_isolation import MockLifecycleControlScope, isolated_sys_modules
pytestmark = pytest.mark.asyncio
@pytest.fixture
async def monitoring_client():
mock_app = Mock()
mock_app.Application = type('FakeMinimalApplication', (), {})
mock_entities = Mock()
mock_entities.LifecycleControlScope = MockLifecycleControlScope
clear = [
'langbot.pkg.api.http.controller.group',
'langbot.pkg.api.http.controller.groups',
'langbot.pkg.api.http.controller.groups.monitoring',
'langbot.pkg.api.http.controller.main',
]
app = FakeApp()
app.user_service = Mock()
app.user_service.verify_jwt_token = AsyncMock(return_value='test@example.com')
app.user_service.get_user_by_email = AsyncMock(return_value=Mock(email='test@example.com'))
app.monitoring_service = Mock()
app.monitoring_service.get_traces = AsyncMock(return_value=([{'trace_id': 'trace-1'}], 1))
app.monitoring_service.get_trace_details = AsyncMock(
side_effect=lambda trace_id: {
'found': trace_id == 'trace-1',
'trace_id': trace_id,
'trace': {'trace_id': trace_id} if trace_id == 'trace-1' else None,
'spans': [] if trace_id == 'trace-1' else None,
}
)
with isolated_sys_modules(
mocks={
'langbot.pkg.core.app': mock_app,
'langbot.pkg.core.entities': mock_entities,
},
clear=clear,
):
from langbot.pkg.api.http.controller.groups.monitoring import MonitoringRouterGroup
quart_app = quart.Quart(__name__)
group = MonitoringRouterGroup(app, quart_app)
await group.initialize()
yield app, quart_app.test_client()
async def test_get_traces_route_forwards_filters(monitoring_client):
app, client = monitoring_client
response = await client.get(
'/api/v1/monitoring/traces'
'?botId=bot-1'
'&pipelineId=pipeline-1'
'&sessionId=session-1'
'&status=success'
'&startTime=2026-01-01T00:00:00Z'
'&endTime=2026-01-02T00:00:00Z'
'&limit=25'
'&offset=5',
headers={'Authorization': 'Bearer test_token'},
)
assert response.status_code == 200
data = await response.get_json()
assert data['data'] == {
'traces': [{'trace_id': 'trace-1'}],
'total': 1,
'limit': 25,
'offset': 5,
}
app.monitoring_service.get_traces.assert_awaited_once_with(
bot_ids=['bot-1'],
pipeline_ids=['pipeline-1'],
session_ids=['session-1'],
statuses=['success'],
start_time=datetime.datetime(2026, 1, 1, 0, 0),
end_time=datetime.datetime(2026, 1, 2, 0, 0),
limit=25,
offset=5,
)
async def test_get_trace_details_route_returns_404_for_missing_trace(monitoring_client):
_app, client = monitoring_client
response = await client.get(
'/api/v1/monitoring/traces/trace-missing',
headers={'Authorization': 'Bearer test_token'},
)
assert response.status_code == 404
data = await response.get_json()
assert data['code'] == -1
assert data['msg'] == 'Trace trace-missing not found'
-379
View File
@@ -546,41 +546,6 @@ async def test_box_service_rejects_host_mount_outside_allowed_roots(tmp_path):
)
class TestGetSystemGuidance:
"""``get_system_guidance`` must ALWAYS advertise the per-query outbox path
when given a ``query_id`` even with no inbound attachment so files the
agent generates (QR codes, charts, rendered docs) are actually delivered.
The wrapper collects the outbox on every turn regardless of inbound files;
before this, the agent was only told the outbox path inside the
inbound-attachment note, so pure-generation turns produced files that were
silently dropped.
"""
def _service(self, logger=None):
logger = logger or Mock()
runtime = BoxRuntime(logger=logger, backends=[FakeBackend(logger)], session_ttl_sec=300)
return BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
def test_guidance_includes_outbox_when_query_id_given(self):
service = self._service()
guidance = service.get_system_guidance(42)
assert f'{service.OUTBOX_MOUNT_DIR}/42' in guidance
assert 'delivered to the user automatically' in guidance
def test_guidance_omits_outbox_without_query_id(self):
service = self._service()
guidance = service.get_system_guidance()
assert service.OUTBOX_MOUNT_DIR not in guidance
# core exec guidance is still present
assert 'exec tool' in guidance
def test_guidance_outbox_independent_of_inbound_attachments(self):
# A bare query_id (the pure-generation case) still gets the outbox note.
service = self._service()
assert f'{service.OUTBOX_MOUNT_DIR}/0' in service.get_system_guidance(0)
@pytest.mark.asyncio
async def test_box_runtime_rejects_host_mount_conflict_in_same_session(tmp_path):
logger = Mock()
@@ -1591,347 +1556,3 @@ class TestBuildSkillExtraMounts:
service = BoxService(app, client=Mock(spec=BoxRuntimeClient))
assert service.build_skill_extra_mounts(make_query()) == []
# ── Attachment passthrough (inbound / outbound) ─────────────────────────────
class TestAttachmentHelpers:
def test_sanitize_attachment_name_strips_traversal(self):
assert BoxService._sanitize_attachment_name('../../etc/passwd', 'fb') == 'passwd'
assert BoxService._sanitize_attachment_name('/a/b/c.png', 'fb') == 'c.png'
assert BoxService._sanitize_attachment_name('a b c.txt', 'fb') == 'a_b_c.txt'
assert BoxService._sanitize_attachment_name('', 'fallback.bin') == 'fallback.bin'
assert BoxService._sanitize_attachment_name('...', 'fb.bin') == 'fb.bin'
# weird unicode / shell chars dropped, but keeps a usable name
out = BoxService._sanitize_attachment_name('rm -rf $(x).png', 'fb')
assert '/' not in out and '$' not in out and out.endswith('.png')
def test_classify_outbound_entries_by_extension(self):
entries = [
{'name': 'chart.png', 'b64': 'AAA'},
{'name': 'clip.mp3', 'b64': 'BBB'},
{'name': 'report.pdf', 'b64': 'CCC'},
{'name': 'sub/dir/photo.JPG', 'b64': 'DDD'},
{'name': 'noext', 'b64': 'EEE'},
{'name': 'skip', 'b64': ''}, # dropped (no payload)
]
out = BoxService._classify_outbound_entries(entries)
by_name = {a['name']: a for a in out}
assert by_name['chart.png']['type'] == 'Image'
assert by_name['chart.png']['base64'].startswith('data:image/png;base64,')
assert by_name['clip.mp3']['type'] == 'Voice'
assert by_name['clip.mp3']['base64'].startswith('data:audio/mp3;base64,')
assert by_name['report.pdf']['type'] == 'File'
assert by_name['report.pdf']['base64'] == 'CCC' # raw b64, no data: prefix
# nested path collapses to basename, case-insensitive ext
assert by_name['photo.JPG']['type'] == 'Image'
assert by_name['noext']['type'] == 'File'
assert 'skip' not in by_name
@pytest.mark.asyncio
async def test_component_to_bytes_from_data_uri(self):
import base64
raw = b'hello-bytes'
data_uri = 'data:text/plain;base64,' + base64.b64encode(raw).decode()
component = SimpleNamespace(base64=data_uri, url=None, path=None)
result = await BoxService._component_to_bytes(component)
assert result is not None
data, mime = result
assert data == raw
assert mime == 'text/plain'
@pytest.mark.asyncio
async def test_component_to_bytes_returns_none_when_empty(self):
component = SimpleNamespace(base64=None, url=None, path=None)
assert await BoxService._component_to_bytes(component) is None
class TestInboundOutboundRoundTrip:
def _service(self) -> BoxService:
service = BoxService(make_app(Mock()), client=Mock(spec=BoxRuntimeClient))
service._available = True
return service
@pytest.mark.asyncio
async def test_materialize_inbound_writes_and_describes(self):
import base64
import langbot_plugin.api.entities.builtin.platform.message as platform_message
service = self._service()
img_bytes = b'\x89PNG\r\n\x1a\n fake png'
img_b64 = 'data:image/png;base64,' + base64.b64encode(img_bytes).decode()
query = make_query()
query.message_chain = platform_message.MessageChain(
[
platform_message.Plain(text='please resize this'),
platform_message.Image(base64=img_b64),
]
)
# Mock the sandbox write path: echo back the written paths.
async def fake_execute_tool(parameters, q):
assert '/workspace/inbox/' in parameters['command']
return {
'ok': True,
'stdout': '["/workspace/inbox/42/image_1.png"]',
'stderr': '',
}
service.execute_tool = AsyncMock(side_effect=fake_execute_tool)
descriptors = await service.materialize_inbound_attachments(query)
assert len(descriptors) == 1
d = descriptors[0]
assert d['type'] == 'Image'
assert d['path'] == '/workspace/inbox/42/image_1.png'
assert d['size'] == len(img_bytes)
@pytest.mark.asyncio
async def test_materialize_inbound_noop_without_attachments(self):
import langbot_plugin.api.entities.builtin.platform.message as platform_message
service = self._service()
query = make_query()
query.message_chain = platform_message.MessageChain([platform_message.Plain(text='just text')])
service.execute_tool = AsyncMock()
assert await service.materialize_inbound_attachments(query) == []
service.execute_tool.assert_not_called()
@pytest.mark.asyncio
async def test_collect_outbound_reads_and_clears(self):
service = self._service()
query = make_query()
calls = []
async def fake_execute_tool(parameters, q):
calls.append(parameters['command'])
if 'os.walk' in parameters['command']:
return {
'ok': True,
'stdout': '[{"name": "out.png", "b64": "QUJD"}]',
'stderr': '',
}
# the rm -rf cleanup call
return {'ok': True, 'stdout': '', 'stderr': ''}
service.execute_tool = AsyncMock(side_effect=fake_execute_tool)
attachments = await service.collect_outbound_attachments(query)
assert len(attachments) == 1
assert attachments[0]['type'] == 'Image'
assert attachments[0]['name'] == 'out.png'
# cleanup (rm -rf) must have been issued after a successful collection
assert any('rm -rf' in c for c in calls)
@pytest.mark.asyncio
async def test_collect_outbound_empty_still_clears(self):
# An empty collection MUST still clear the per-query outbox, so a later
# turn reusing the same query_id (the counter resets across restarts)
# cannot inherit stale files left from a prior run.
service = self._service()
query = make_query()
calls = []
async def fake_execute_tool(parameters, q):
calls.append(parameters['command'])
if 'os.walk' in parameters['command']:
return {'ok': True, 'stdout': '[]', 'stderr': ''}
return {'ok': True, 'stdout': '', 'stderr': ''}
service.execute_tool = AsyncMock(side_effect=fake_execute_tool)
assert await service.collect_outbound_attachments(query) == []
# cleanup (rm -rf) is issued unconditionally now
assert any('rm -rf' in c for c in calls)
@pytest.mark.asyncio
async def test_passthrough_noop_when_unavailable(self):
service = BoxService(make_app(Mock()), client=Mock(spec=BoxRuntimeClient))
service._available = False
query = make_query()
assert await service.materialize_inbound_attachments(query) == []
assert await service.collect_outbound_attachments(query) == []
class TestAttachmentHostPath:
"""Direct host-filesystem transfer path (bind-mounted workspace).
When ``default_workspace`` is a real local dir, inbound/outbound bypass the
exec channel entirely (no ARG_MAX / stdout-truncation limits) and read/write
the bind-mounted host dir directly.
"""
def _service_with_workspace(self, tmp_path):
ws = str(tmp_path / 'box' / 'default')
os.makedirs(ws, exist_ok=True)
app = make_app(Mock(), allowed_mount_roots=[str(tmp_path)], host_root=str(tmp_path / 'box'))
service = BoxService(app, client=Mock(spec=BoxRuntimeClient))
service._available = True
# Force the default_workspace to our tmp dir so _host_query_dir resolves.
service.default_workspace = ws
return service, ws
@pytest.mark.asyncio
async def test_inbound_writes_to_host_no_exec(self, tmp_path):
import base64
import langbot_plugin.api.entities.builtin.platform.message as platform_message
service, ws = self._service_with_workspace(tmp_path)
# Big payload that would blow ARG_MAX on the exec path:
big = b'\x89PNG\r\n\x1a\n' + b'x' * (300 * 1024)
b64 = 'data:image/png;base64,' + base64.b64encode(big).decode()
query = make_query()
query.message_chain = platform_message.MessageChain([platform_message.Image(base64=b64)])
# execute_tool must NOT be called on the host path.
service.execute_tool = AsyncMock(side_effect=AssertionError('exec must not be used on host path'))
descriptors = await service.materialize_inbound_attachments(query)
assert len(descriptors) == 1
d = descriptors[0]
assert d['type'] == 'Image'
assert d['size'] == len(big)
# File actually landed on the host workspace.
host_file = os.path.join(ws, 'inbox', str(query.query_id), d['name'])
assert os.path.isfile(host_file)
assert open(host_file, 'rb').read() == big
@pytest.mark.asyncio
async def test_inbound_host_clears_stale_query_dir(self, tmp_path):
import base64
import langbot_plugin.api.entities.builtin.platform.message as platform_message
service, ws = self._service_with_workspace(tmp_path)
# Seed a stale file under the same query_id (simulates webchat id reuse).
stale_dir = os.path.join(ws, 'inbox', '42')
os.makedirs(stale_dir, exist_ok=True)
open(os.path.join(stale_dir, 'image_1.png'), 'wb').write(b'STALE-OLD-IMAGE')
new = b'\x89PNG\r\n\x1a\n NEW'
b64 = 'data:image/png;base64,' + base64.b64encode(new).decode()
query = make_query(query_id=42)
query.message_chain = platform_message.MessageChain([platform_message.Image(base64=b64)])
service.execute_tool = AsyncMock()
descriptors = await service.materialize_inbound_attachments(query)
# The new write recreated the dir; the stale file is gone, new bytes present.
host_file = os.path.join(stale_dir, descriptors[0]['name'])
assert open(host_file, 'rb').read() == new
# No leftover content from the stale image.
assert b'STALE-OLD-IMAGE' not in open(host_file, 'rb').read()
@pytest.mark.asyncio
async def test_outbound_reads_host_and_clears(self, tmp_path):
service, ws = self._service_with_workspace(tmp_path)
query = make_query()
outbox = os.path.join(ws, 'outbox', str(query.query_id))
os.makedirs(outbox, exist_ok=True)
# A large file that would be truncated on the exec/stdout path:
big_png = b'\x89PNG\r\n\x1a\n' + b'y' * (400 * 1024)
open(os.path.join(outbox, 'result.png'), 'wb').write(big_png)
open(os.path.join(outbox, 'notes.txt'), 'wb').write(b'hello')
service.execute_tool = AsyncMock(side_effect=AssertionError('exec must not be used on host path'))
attachments = await service.collect_outbound_attachments(query)
by_name = {a['name']: a for a in attachments}
assert by_name['result.png']['type'] == 'Image'
assert by_name['notes.txt']['type'] == 'File'
# Full image survived (no truncation).
import base64
raw = base64.b64decode(by_name['result.png']['base64'].split(',', 1)[-1])
assert raw == big_png
# Outbox cleared after collection.
assert os.listdir(outbox) == []
@pytest.mark.asyncio
async def test_outbound_empty_clears_stale_host_dir(self, tmp_path):
# Reusing a query_id (counter resets on restart) must not re-send files
# a previous run left in the outbox: an empty collection still clears it.
service, ws = self._service_with_workspace(tmp_path)
query = make_query()
outbox = os.path.join(ws, 'outbox', str(query.query_id))
os.makedirs(outbox, exist_ok=True)
# Stale file from a prior turn; the agent produced nothing this turn —
# but _read_outbox_host would still pick it up, so collection must drop
# it and then wipe the dir. Simulate "nothing produced this turn" by
# treating any present file as stale and asserting it is not re-sent
# across a second, genuinely-empty collection.
open(os.path.join(outbox, 'stale.png'), 'wb').write(b'\x89PNG\r\n\x1a\n old')
service.execute_tool = AsyncMock(side_effect=AssertionError('exec must not be used on host path'))
# First collection drains + clears the dir.
first = await service.collect_outbound_attachments(query)
assert {a['name'] for a in first} == {'stale.png'}
assert os.listdir(outbox) == []
# Second collection (no new files) returns nothing and leaves a clean dir.
second = await service.collect_outbound_attachments(query)
assert second == []
assert os.listdir(outbox) == []
@pytest.mark.asyncio
async def test_purge_attachment_dirs_wipes_host_owned_leftovers_on_init(self, tmp_path):
# Leftover inbox/outbox dirs from a previous process (same reset
# query_id counter) must be removed at startup. Host-owned files are
# cleared without any sandbox exec.
service, ws = self._service_with_workspace(tmp_path)
for sub in ('inbox', 'outbox'):
d = os.path.join(ws, sub, '0')
os.makedirs(d, exist_ok=True)
open(os.path.join(d, 'leftover.bin'), 'wb').write(b'from a previous process')
service.execute_tool = AsyncMock(side_effect=AssertionError('exec must not be used for host-owned files'))
await service._purge_attachment_dirs()
assert not os.path.exists(os.path.join(ws, 'inbox'))
assert not os.path.exists(os.path.join(ws, 'outbox'))
# The workspace root itself survives.
assert os.path.isdir(ws)
@pytest.mark.asyncio
async def test_purge_attachment_dirs_falls_back_to_exec_for_root_owned(self, tmp_path, monkeypatch):
# When the host delete cannot remove a dir (root-owned container output),
# purge must fall back to deleting from inside the sandbox via exec.
service, ws = self._service_with_workspace(tmp_path)
outbox = os.path.join(ws, 'outbox')
os.makedirs(os.path.join(outbox, '0'), exist_ok=True)
# Simulate a host delete that cannot remove the root-owned outbox.
import shutil as _shutil
real_rmtree = _shutil.rmtree
def fake_rmtree(path, *a, **k):
if os.path.abspath(path) == os.path.abspath(outbox):
return # "permission denied" — silently leaves the dir
return real_rmtree(path, *a, **k)
monkeypatch.setattr(_shutil, 'rmtree', fake_rmtree)
executed = {}
spec_obj = object()
service.build_spec = Mock(return_value=spec_obj)
service.client.execute = AsyncMock(side_effect=lambda s: executed.setdefault('spec', s))
await service._purge_attachment_dirs()
# build_spec was asked to rm the surviving outbox via exec.
cmd = service.build_spec.call_args.args[0]['cmd']
assert 'rm -rf' in cmd and '/workspace/outbox' in cmd
assert '/workspace/inbox' not in cmd # inbox was host-deletable
service.client.execute.assert_awaited_once_with(spec_obj)
@pytest.mark.asyncio
async def test_purge_attachment_dirs_noop_without_workspace(self):
# No bind-mounted workspace (E2B / remote): purge is a safe no-op.
service = BoxService(make_app(Mock()), client=Mock(spec=BoxRuntimeClient))
service.default_workspace = None
# Must not raise.
await service._purge_attachment_dirs()
+1 -3
View File
@@ -54,9 +54,7 @@ def test_classify_python_workspace_detects_package_and_requirements():
def test_wrap_python_command_with_env_contains_bootstrap_and_command():
command = wrap_python_command_with_env('python script.py')
assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
assert 'kill -0 "$_LB_LOCK_OWNER"' in command
assert 'python -m venv "$_LB_VENV_DIR"' in command
assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
assert command.rstrip().endswith('python script.py')
@@ -0,0 +1,87 @@
"""Unit tests for the monitoring trace Alembic migration."""
from __future__ import annotations
from importlib import import_module
class _FakeInspector:
def __init__(self, tables):
self._tables = tables
def get_table_names(self):
return list(self._tables)
class _FakeOp:
def __init__(self):
self.created_tables = []
self.created_indexes = []
self.dropped_tables = []
def get_bind(self):
return object()
def create_table(self, table_name, *columns):
self.created_tables.append((table_name, columns))
def create_index(self, index_name, table_name, columns):
self.created_indexes.append((index_name, table_name, columns))
def drop_table(self, table_name):
self.dropped_tables.append(table_name)
def _migration_module():
return import_module('langbot.pkg.persistence.alembic.versions.0006_monitoring_traces')
def test_upgrade_creates_monitoring_trace_tables_and_indexes(monkeypatch):
migration = _migration_module()
fake_op = _FakeOp()
monkeypatch.setattr(migration, 'op', fake_op)
monkeypatch.setattr(migration.sa, 'inspect', lambda _conn: _FakeInspector(tables=set()))
migration.upgrade()
assert [table_name for table_name, _columns in fake_op.created_tables] == [
'monitoring_traces',
'monitoring_spans',
]
assert ('ix_monitoring_traces_started_at', 'monitoring_traces', ['started_at']) in fake_op.created_indexes
assert ('ix_monitoring_spans_trace_id', 'monitoring_spans', ['trace_id']) in fake_op.created_indexes
assert ('ix_monitoring_spans_pipeline_id', 'monitoring_spans', ['pipeline_id']) in fake_op.created_indexes
def test_upgrade_skips_existing_monitoring_trace_tables(monkeypatch):
migration = _migration_module()
fake_op = _FakeOp()
monkeypatch.setattr(migration, 'op', fake_op)
monkeypatch.setattr(
migration.sa,
'inspect',
lambda _conn: _FakeInspector(tables={'monitoring_traces', 'monitoring_spans'}),
)
migration.upgrade()
assert fake_op.created_tables == []
assert fake_op.created_indexes == []
def test_downgrade_drops_spans_before_traces(monkeypatch):
migration = _migration_module()
fake_op = _FakeOp()
monkeypatch.setattr(migration, 'op', fake_op)
monkeypatch.setattr(
migration.sa,
'inspect',
lambda _conn: _FakeInspector(tables={'monitoring_traces', 'monitoring_spans'}),
)
migration.downgrade()
assert fake_op.dropped_tables == ['monitoring_spans', 'monitoring_traces']
@@ -162,3 +162,61 @@ async def test_runtime_pipeline_execute(mock_app, sample_query):
# Verify stage was called
mock_stage.process.assert_called_once()
@pytest.mark.asyncio
async def test_runtime_pipeline_marks_trace_error_when_stage_returns_error_notice(mock_app, sample_query):
"""Trace status follows handled stage errors, not only raised exceptions."""
pipelinemgr = get_pipelinemgr_module()
stage = get_stage_module()
persistence_pipeline = get_persistence_pipeline_module()
entities = get_entities_module()
error_result = entities.StageProcessResult(
result_type=entities.ResultType.INTERRUPT,
new_query=sample_query,
user_notice='',
console_notice='',
debug_notice='traceback',
error_notice='model request failed',
)
mock_stage = Mock(spec=stage.PipelineStage)
mock_stage.process = AsyncMock(return_value=error_result)
stage_container = pipelinemgr.StageInstContainer(inst_name='FailingStage', inst=mock_stage)
pipeline_entity = Mock(spec=persistence_pipeline.LegacyPipeline)
pipeline_entity.uuid = 'test-pipeline-uuid'
pipeline_entity.name = 'Test Pipeline'
pipeline_entity.config = sample_query.pipeline_config
pipeline_entity.extensions_preferences = {'plugins': []}
mock_app.bot_service = AsyncMock()
mock_app.bot_service.get_bot = AsyncMock(return_value={'name': 'Test Bot'})
mock_app.monitoring_service = AsyncMock()
mock_app.monitoring_service.record_message = AsyncMock(return_value='message-1')
mock_app.monitoring_service.update_session_activity = AsyncMock(return_value=True)
mock_app.monitoring_service.start_trace = AsyncMock(return_value='trace-1')
mock_app.monitoring_service.record_span = AsyncMock()
mock_app.monitoring_service.finish_trace = AsyncMock()
mock_app.monitoring_service.update_message_status = AsyncMock()
mock_app.monitoring_service.record_error = AsyncMock()
event_ctx = Mock()
event_ctx.is_prevented_default = Mock(return_value=False)
mock_app.plugin_connector.emit_event = AsyncMock(return_value=event_ctx)
mock_app.query_pool.cached_queries[sample_query.query_id] = sample_query
runtime_pipeline = pipelinemgr.RuntimePipeline(mock_app, pipeline_entity, [stage_container])
await runtime_pipeline.run(sample_query)
mock_app.monitoring_service.finish_trace.assert_awaited_once()
assert mock_app.monitoring_service.finish_trace.await_args.kwargs['status'] == 'error'
span_calls = mock_app.monitoring_service.record_span.await_args_list
stage_span_call = next(call for call in span_calls if call.kwargs['name'] == 'FailingStage')
root_span_call = next(call for call in span_calls if call.kwargs['kind'] == 'pipeline.query')
assert stage_span_call.kwargs['status'] == 'error'
assert stage_span_call.kwargs['error_message'] == 'model request failed'
assert root_span_call.kwargs['status'] == 'error'
@@ -1,146 +0,0 @@
"""Unit tests for ResponseWrapper outbound-attachment helpers.
Covers the sandbox -> user attachment path added for the Box attachment
round-trip:
* ``_is_final_assistant_message`` only the terminal, tool-call-free assistant
message (or a final MessageChunk) should trigger collection.
* ``_append_outbound_attachments`` collects sandbox outbox files exactly once
per query and maps each descriptor to the right platform component, swallowing
collection errors.
"""
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
import langbot_plugin.api.entities.builtin.platform.message as platform_message
import langbot_plugin.api.entities.builtin.provider.message as provider_message
from langbot.pkg.pipeline.wrapper.wrapper import ResponseWrapper
def _make_wrapper(box_service) -> ResponseWrapper:
app = SimpleNamespace(logger=Mock())
wrapper = ResponseWrapper.__new__(ResponseWrapper)
wrapper.ap = app
return wrapper
def _make_query():
return SimpleNamespace(variables={})
def test_is_final_assistant_message_plain_assistant():
wrapper = _make_wrapper(box_service=None)
msg = provider_message.Message(role='assistant', content='done')
assert wrapper._is_final_assistant_message(msg) is True
def test_is_final_assistant_message_rejects_non_assistant():
wrapper = _make_wrapper(box_service=None)
msg = provider_message.Message(role='tool', content='{}')
assert wrapper._is_final_assistant_message(msg) is False
def test_is_final_assistant_message_rejects_tool_call_round():
wrapper = _make_wrapper(box_service=None)
msg = provider_message.Message(
role='assistant',
content='calling',
tool_calls=[
provider_message.ToolCall(
id='c1',
type='function',
function=provider_message.FunctionCall(name='exec', arguments='{}'),
)
],
)
assert wrapper._is_final_assistant_message(msg) is False
def test_is_final_assistant_message_non_final_chunk():
wrapper = _make_wrapper(box_service=None)
chunk = provider_message.MessageChunk(role='assistant', content='partial', is_final=False)
assert wrapper._is_final_assistant_message(chunk) is False
final_chunk = provider_message.MessageChunk(role='assistant', content='partial', is_final=True)
assert wrapper._is_final_assistant_message(final_chunk) is True
@pytest.mark.asyncio
async def test_append_outbound_attachments_maps_each_type():
box_service = SimpleNamespace(
available=True,
collect_outbound_attachments=AsyncMock(
return_value=[
{'type': 'Image', 'base64': 'data:image/png;base64,iVBORw0K'},
{'type': 'Voice', 'base64': 'data:audio/wav;base64,UklGRg=='},
{'type': 'File', 'name': 'report.xlsx', 'base64': 'data:app;base64,UEsDBA=='},
]
),
)
wrapper = _make_wrapper(box_service)
wrapper.ap.box_service = box_service
query = _make_query()
chain = platform_message.MessageChain([])
await wrapper._append_outbound_attachments(query, chain)
kinds = [type(c).__name__ for c in chain]
assert kinds == ['Image', 'Voice', 'File']
assert query.variables['_sandbox_outbound_collected'] is True
# File keeps its name
file_comp = chain[2]
assert getattr(file_comp, 'name', None) == 'report.xlsx'
@pytest.mark.asyncio
async def test_append_outbound_attachments_runs_once_per_query():
box_service = SimpleNamespace(
available=True,
collect_outbound_attachments=AsyncMock(return_value=[]),
)
wrapper = _make_wrapper(box_service)
wrapper.ap.box_service = box_service
query = _make_query()
query.variables['_sandbox_outbound_collected'] = True
chain = platform_message.MessageChain([])
await wrapper._append_outbound_attachments(query, chain)
box_service.collect_outbound_attachments.assert_not_awaited()
assert len(chain) == 0
@pytest.mark.asyncio
async def test_append_outbound_attachments_noop_without_box_service():
wrapper = _make_wrapper(box_service=None)
wrapper.ap.box_service = None
query = _make_query()
chain = platform_message.MessageChain([])
await wrapper._append_outbound_attachments(query, chain)
assert len(chain) == 0
# not marked collected, since service is unavailable
assert '_sandbox_outbound_collected' not in query.variables
@pytest.mark.asyncio
async def test_append_outbound_attachments_swallows_collection_error():
box_service = SimpleNamespace(
available=True,
collect_outbound_attachments=AsyncMock(side_effect=RuntimeError('boom')),
)
wrapper = _make_wrapper(box_service)
wrapper.ap.box_service = box_service
query = _make_query()
chain = platform_message.MessageChain([])
# must not raise
await wrapper._append_outbound_attachments(query, chain)
assert len(chain) == 0
wrapper.ap.logger.warning.assert_called_once()
@@ -1,92 +0,0 @@
"""Unit tests for WebSocketAdapter._process_image_components.
The web debug client uploads Image / Voice / File components carrying a storage
key in ``path``. This helper resolves each to a base64 data URI (so multimodal
LLM input and the Box sandbox inbox have usable bytes), then deletes the
consumed storage object and clears ``path``. Covers mimetype selection per
type and graceful error handling.
"""
from __future__ import annotations
import base64
from unittest.mock import AsyncMock, Mock
import pytest
from langbot.pkg.platform.sources.websocket_adapter import WebSocketAdapter
def _make_adapter(load_return=b'hello', load_side_effect=None):
provider = Mock()
provider.load = AsyncMock(return_value=load_return, side_effect=load_side_effect)
provider.delete = AsyncMock()
ap = Mock()
ap.storage_mgr.storage_provider = provider
logger = Mock()
logger.error = AsyncMock()
# WebSocketAdapter is a pydantic model; bypass full __init__/validation.
adapter = WebSocketAdapter.model_construct(ap=ap, logger=logger)
return adapter, provider
@pytest.mark.asyncio
async def test_image_jpeg_mimetype_and_cleanup():
adapter, provider = _make_adapter(load_return=b'\xff\xd8\xff')
chain = [{'type': 'Image', 'path': 'storage://abc/photo.jpg'}]
await adapter._process_image_components(chain)
expected_b64 = base64.b64encode(b'\xff\xd8\xff').decode('utf-8')
assert chain[0]['base64'] == f'data:image/jpeg;base64,{expected_b64}'
assert chain[0]['path'] == '' # consumed
provider.delete.assert_awaited_once_with('storage://abc/photo.jpg')
@pytest.mark.asyncio
async def test_image_defaults_to_png():
adapter, _ = _make_adapter()
chain = [{'type': 'Image', 'path': 'storage://abc/blob'}]
await adapter._process_image_components(chain)
assert chain[0]['base64'].startswith('data:image/png;base64,')
@pytest.mark.asyncio
async def test_voice_uses_guessed_or_wav_mimetype():
adapter, _ = _make_adapter()
chain = [{'type': 'Voice', 'path': 'storage://abc/clip.wav'}]
await adapter._process_image_components(chain)
assert chain[0]['base64'].startswith('data:audio/')
@pytest.mark.asyncio
async def test_file_uses_octet_stream_fallback():
adapter, _ = _make_adapter()
chain = [{'type': 'File', 'path': 'storage://abc/unknownblob'}]
await adapter._process_image_components(chain)
assert chain[0]['base64'].startswith('data:application/octet-stream;base64,')
@pytest.mark.asyncio
async def test_skips_components_without_path_or_unknown_type():
adapter, provider = _make_adapter()
chain = [
{'type': 'Image', 'path': ''}, # no path
{'type': 'Plain', 'path': 'storage://abc/x'}, # not a file component
{'type': 'At', 'target': '123'}, # no path key at all
]
await adapter._process_image_components(chain)
provider.load.assert_not_awaited()
assert 'base64' not in chain[0]
assert 'base64' not in chain[1]
@pytest.mark.asyncio
async def test_load_failure_is_logged_not_raised():
adapter, _ = _make_adapter(load_side_effect=RuntimeError('storage down'))
chain = [{'type': 'File', 'path': 'storage://abc/doc.pdf'}]
# must not raise
await adapter._process_image_components(chain)
assert 'base64' not in chain[0]
adapter.logger.error.assert_awaited_once()
@@ -1,93 +0,0 @@
"""Unit tests for LiteLLMRequester._convert_messages.
Focus: the content-part normalization that (a) converts image_base64 parts to
the OpenAI image_url shape and (b) drops non-image file parts (file_base64 /
file_url) which OpenAI-compatible chat models reject. The latter is essential
for Voice/File attachments including ones replayed from conversation history
since the agent consumes their bytes via the sandbox, not the model payload.
"""
import langbot_plugin.api.entities.builtin.provider.message as provider_message
from langbot.pkg.provider.modelmgr.requesters.litellmchat import LiteLLMRequester
def _make_requester() -> LiteLLMRequester:
# _convert_messages does not touch instance config, so bypass __init__.
return LiteLLMRequester.__new__(LiteLLMRequester)
def test_convert_messages_drops_file_base64_part():
req = _make_requester()
msg = provider_message.Message(
role='user',
content=[
provider_message.ContentElement.from_text('analyze this audio'),
provider_message.ContentElement.from_file_base64('data:audio/wav;base64,AAAA', 'voice.wav'),
],
)
out = req._convert_messages([msg])
parts = out[0]['content']
types = [p.get('type') for p in parts]
assert 'file_base64' not in types
assert types == ['text']
assert parts[0]['text'] == 'analyze this audio'
def test_convert_messages_drops_file_url_part():
req = _make_requester()
msg = provider_message.Message(
role='user',
content=[
provider_message.ContentElement.from_text('here is a doc'),
provider_message.ContentElement.from_file_url('http://example.com/report.xlsx', 'report.xlsx'),
],
)
out = req._convert_messages([msg])
types = [p.get('type') for p in out[0]['content']]
assert types == ['text']
def test_convert_messages_keeps_image_and_converts_to_image_url():
req = _make_requester()
msg = provider_message.Message(
role='user',
content=[
provider_message.ContentElement.from_text('look'),
provider_message.ContentElement.from_image_base64('data:image/png;base64,AAAA'),
],
)
out = req._convert_messages([msg])
parts = out[0]['content']
types = [p.get('type') for p in parts]
# image is preserved and reshaped to the OpenAI image_url form
assert types == ['text', 'image_url']
img_part = parts[1]
assert img_part['image_url'] == {'url': 'data:image/png;base64,AAAA'}
assert 'image_base64' not in img_part
def test_convert_messages_mixed_history_strips_only_files():
req = _make_requester()
# Simulate replayed history: an old voice turn + a current text turn.
history_voice = provider_message.Message(
role='user',
content=[
provider_message.ContentElement.from_text('old audio turn'),
provider_message.ContentElement.from_file_base64('data:audio/wav;base64,BBBB', 'voice.wav'),
],
)
current = provider_message.Message(
role='user',
content=[provider_message.ContentElement.from_text('now do the csv')],
)
out = req._convert_messages([history_voice, current])
assert [p.get('type') for p in out[0]['content']] == ['text']
assert [p.get('type') for p in out[1]['content']] == ['text']
def test_convert_messages_plain_string_content_untouched():
req = _make_requester()
msg = provider_message.Message(role='user', content='just text')
out = req._convert_messages([msg])
assert out[0]['content'] == 'just text'
@@ -352,117 +352,6 @@ class TestInvokeLLMStreamUsage:
assert tool_chunks[1].tool_calls[0].function.arguments == '{"text":'
assert tool_chunks[2].tool_calls[0].function.arguments == '"plugin-tool-ok"}'
@pytest.mark.asyncio
async def test_stream_tool_call_without_id_is_not_dropped(self):
"""Regression for #2261.
Ollama's OpenAI-compatible streaming endpoint emits a tool-call delta
carrying an ``index`` and a ``function`` payload but never an
OpenAI-style ``id``. The requester used to drop any id-less tool call,
so a tool-only turn yielded nothing, the stream "completed" with 0
chars, and the chat got stuck. A stable per-index id must be
synthesized so the tool call survives.
"""
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
mock_ap = Mock()
mock_ap.tool_mgr = Mock()
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(
return_value=[{'type': 'function', 'function': {'name': 'zotero_search_items'}}]
)
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={'custom_llm_provider': 'openai'})
model = MockRuntimeModel('gpt-oss:20b', 'ollama')
# Ollama delivers the whole tool call in a single delta, with no id.
chunks = [
self._make_chunk(
tool_calls=[
{
'index': 0,
'function': {'name': 'zotero_search_items', 'arguments': '{"query":"hello"}'},
}
]
),
self._make_chunk(finish_reason='tool_calls'),
]
async def _aiter(*args, **kwargs):
for c in chunks:
yield c
query = Mock(spec=pipeline_query.Query)
query.variables = {}
messages = [provider_message.Message(role='user', content='hello?')]
funcs = [Mock()]
with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
collected = [
chunk
async for chunk in requester.invoke_llm_stream(
query=query,
model=model,
messages=messages,
funcs=funcs,
)
]
tool_chunks = [chunk for chunk in collected if chunk.tool_calls]
assert len(tool_chunks) == 1, 'id-less Ollama tool call must not be dropped'
tc = tool_chunks[0].tool_calls[0]
assert tc.id == 'call_0'
assert tc.function.name == 'zotero_search_items'
assert tc.function.arguments == '{"query":"hello"}'
@pytest.mark.asyncio
async def test_stream_multiple_tool_calls_without_id_get_distinct_ids(self):
"""Two parallel id-less tool calls must keep distinct synthesized ids."""
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
mock_ap = Mock()
mock_ap.tool_mgr = Mock()
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(
return_value=[{'type': 'function', 'function': {'name': 'zotero_search_items'}}]
)
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={'custom_llm_provider': 'openai'})
model = MockRuntimeModel('gpt-oss:20b', 'ollama')
chunks = [
self._make_chunk(
tool_calls=[
{'index': 0, 'function': {'name': 'zotero_search_items', 'arguments': '{"q":"a"}'}},
{'index': 1, 'function': {'name': 'zotero_get_notes', 'arguments': '{"q":"b"}'}},
]
),
self._make_chunk(finish_reason='tool_calls'),
]
async def _aiter(*args, **kwargs):
for c in chunks:
yield c
query = Mock(spec=pipeline_query.Query)
query.variables = {}
messages = [provider_message.Message(role='user', content='hello?')]
funcs = [Mock()]
with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
collected = [
chunk
async for chunk in requester.invoke_llm_stream(
query=query,
model=model,
messages=messages,
funcs=funcs,
)
]
tool_chunks = [chunk for chunk in collected if chunk.tool_calls]
assert len(tool_chunks) == 1
ids = {tc.id for tc in tool_chunks[0].tool_calls}
assert ids == {'call_0', 'call_1'}
class TestProcessThinkingContent:
"""Test _process_thinking_content method"""
@@ -1,146 +0,0 @@
"""Unit tests for LocalAgentRunner._inject_inbound_attachments.
Covers the user -> sandbox attachment path added for the Box attachment
round-trip:
* materialized descriptors are stashed on the query and described to the model
via an appended text note (in-sandbox paths + outbox convention);
* non-image file parts (file_base64 / file_url) are stripped from the user
message content because OpenAI-compatible chat models reject them, while
image and text parts are kept for vision models;
* the helper is a no-op when the box service is unavailable or yields nothing,
and never raises into the chat turn on materialization failure.
"""
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
import langbot_plugin.api.entities.builtin.provider.message as provider_message
from langbot.pkg.provider.runners.localagent import LocalAgentRunner
def _make_runner(box_service) -> LocalAgentRunner:
runner = LocalAgentRunner.__new__(LocalAgentRunner)
runner.ap = SimpleNamespace(logger=Mock(), box_service=box_service)
return runner
def _make_query():
return SimpleNamespace(variables={}, query_id='q-123')
def _box_service(attachments):
svc = SimpleNamespace(
available=True,
OUTBOX_MOUNT_DIR='/outbox',
materialize_inbound_attachments=AsyncMock(return_value=attachments),
)
return svc
@pytest.mark.asyncio
async def test_inject_strips_file_parts_and_appends_note():
box = _box_service([{'type': 'Voice', 'path': '/inbox/q-123/voice.wav', 'size': 176000}])
runner = _make_runner(box)
query = _make_query()
user_message = provider_message.Message(
role='user',
content=[
provider_message.ContentElement.from_text('transcribe this'),
provider_message.ContentElement.from_file_base64('data:audio/wav;base64,AAAA', 'voice.wav'),
],
)
await runner._inject_inbound_attachments(query, user_message)
types = [getattr(ce, 'type', None) for ce in user_message.content]
# file_base64 dropped; text kept; sandbox-path note appended as text
assert 'file_base64' not in types
assert types.count('text') == 2
note = user_message.content[-1].text
assert '/inbox/q-123/voice.wav' in note
assert '/outbox/q-123' in note
# descriptors stashed for downstream stages
assert query.variables['_sandbox_inbound_attachments'] == box.materialize_inbound_attachments.return_value
@pytest.mark.asyncio
async def test_inject_keeps_image_parts():
box = _box_service([{'type': 'Image', 'path': '/inbox/q-123/pic.png', 'size': 1234}])
runner = _make_runner(box)
query = _make_query()
user_message = provider_message.Message(
role='user',
content=[
provider_message.ContentElement.from_text('what is this'),
provider_message.ContentElement.from_image_base64('data:image/png;base64,iVBORw0K'),
],
)
await runner._inject_inbound_attachments(query, user_message)
types = [getattr(ce, 'type', None) for ce in user_message.content]
assert 'image_base64' in types # vision part preserved
assert types[-1] == 'text' # note appended last
@pytest.mark.asyncio
async def test_inject_promotes_string_content_to_list_with_note():
box = _box_service([{'type': 'File', 'path': '/inbox/q-123/data.csv', 'size': 42}])
runner = _make_runner(box)
query = _make_query()
user_message = provider_message.Message(role='user', content='clean this csv')
await runner._inject_inbound_attachments(query, user_message)
assert isinstance(user_message.content, list)
assert [getattr(ce, 'type', None) for ce in user_message.content] == ['text', 'text']
assert user_message.content[0].text == 'clean this csv'
assert '/inbox/q-123/data.csv' in user_message.content[1].text
@pytest.mark.asyncio
async def test_inject_noop_without_box_service():
runner = _make_runner(box_service=None)
query = _make_query()
user_message = provider_message.Message(role='user', content='hello')
await runner._inject_inbound_attachments(query, user_message)
assert user_message.content == 'hello'
assert '_sandbox_inbound_attachments' not in query.variables
@pytest.mark.asyncio
async def test_inject_noop_when_no_attachments():
box = _box_service([])
runner = _make_runner(box)
query = _make_query()
user_message = provider_message.Message(role='user', content='hello')
await runner._inject_inbound_attachments(query, user_message)
assert user_message.content == 'hello'
assert '_sandbox_inbound_attachments' not in query.variables
@pytest.mark.asyncio
async def test_inject_swallows_materialization_error():
box = SimpleNamespace(
available=True,
OUTBOX_MOUNT_DIR='/outbox',
materialize_inbound_attachments=AsyncMock(side_effect=RuntimeError('disk full')),
)
runner = _make_runner(box)
query = _make_query()
user_message = provider_message.Message(role='user', content='hello')
# must not raise
await runner._inject_inbound_attachments(query, user_message)
assert user_message.content == 'hello'
runner.ap.logger.warning.assert_called_once()
@@ -180,7 +180,7 @@ class TestMCPServerBoxConfig:
assert cfg.host_path is None
assert cfg.host_path_mode == 'ro'
assert cfg.env == {}
assert cfg.startup_timeout_sec == 300
assert cfg.startup_timeout_sec == 120
assert cfg.cpus is None
assert cfg.memory_mb is None
assert cfg.pids_limit is None
@@ -494,84 +494,6 @@ class TestBuildBoxProcessPayload:
assert payload['args'] == ['/opt/other/server.py', '--flag']
# ── Python Workspace Preparation ────────────────────────────────────
class TestPythonWorkspacePreparation:
def test_requirements_workspace_uses_venv_bootstrap(self, mcp_module, tmp_path):
host_path = tmp_path / 'mcp-source'
host_path.mkdir()
(host_path / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
command = mcp_module.BoxStdioSessionRuntime.detect_install_command(
str(host_path),
'/workspace/.mcp/u1/workspace',
)
assert command is not None
assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
assert 'python -m pip install -r "/workspace/.mcp/u1/workspace/requirements.txt"' in command
assert 'pip install --no-cache-dir -r' not in command
def test_staging_refresh_removes_stale_source_files_but_preserves_runtime_dirs(self, mcp_module, tmp_path):
source = tmp_path / 'source'
source.mkdir()
(source / 'server.py').write_text('print("new")\n', encoding='utf-8')
(source / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
(source / '.env').write_text('TOKEN=new\n', encoding='utf-8')
process_root = tmp_path / 'shared' / '.mcp' / 'u1'
workspace = process_root / 'workspace'
(workspace / '.venv' / 'bin').mkdir(parents=True)
(workspace / '.venv' / 'bin' / 'python').write_text('', encoding='utf-8')
(workspace / '.langbot').mkdir()
(workspace / '.langbot' / 'python-env.lock').mkdir()
(workspace / '.env').write_text('TOKEN=old\n', encoding='utf-8')
(workspace / 'server.py').write_text('print("old")\n', encoding='utf-8')
(workspace / 'removed.py').write_text('stale\n', encoding='utf-8')
(workspace / 'removed_dir').mkdir()
(workspace / 'removed_dir' / 'old.txt').write_text('stale\n', encoding='utf-8')
mcp_module.BoxStdioSessionRuntime._copy_workspace_tree(str(source), str(process_root), str(workspace))
assert (workspace / 'server.py').read_text(encoding='utf-8') == 'print("new")\n'
assert (workspace / 'requirements.txt').read_text(encoding='utf-8') == 'mcp==1.26.0\n'
assert (workspace / '.env').read_text(encoding='utf-8') == 'TOKEN=new\n'
assert not (workspace / 'removed.py').exists()
assert not (workspace / 'removed_dir').exists()
assert (workspace / '.venv' / 'bin' / 'python').exists()
assert (workspace / '.langbot' / 'python-env.lock').is_dir()
def test_staging_refresh_ignores_unlink_race(self, mcp_module, tmp_path, monkeypatch):
mcp_stdio_module = sys.modules['langbot.pkg.provider.tools.loaders.mcp_stdio']
source = tmp_path / 'source'
source.mkdir()
(source / 'server.py').write_text('print("new")\n', encoding='utf-8')
process_root = tmp_path / 'shared' / '.mcp' / 'u1'
workspace = process_root / 'workspace'
workspace.mkdir(parents=True)
stale_file = workspace / 'removed.py'
stale_file.write_text('stale\n', encoding='utf-8')
real_unlink = os.unlink
def unlink_with_race(path):
if os.fspath(path) == str(stale_file):
real_unlink(path)
raise FileNotFoundError(path)
real_unlink(path)
monkeypatch.setattr(mcp_stdio_module.os, 'unlink', unlink_with_race)
mcp_module.BoxStdioSessionRuntime._copy_workspace_tree(str(source), str(process_root), str(workspace))
assert not stale_file.exists()
assert (workspace / 'server.py').read_text(encoding='utf-8') == 'print("new")\n'
# ── get_runtime_info_dict ───────────────────────────────────────────
@@ -1,6 +1,5 @@
from __future__ import annotations
import asyncio
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
@@ -89,28 +88,6 @@ def test_token_manager_next_token_ignores_empty_token_list():
assert token_mgr.using_token_index == 0
@pytest.mark.asyncio
async def test_model_manager_initialize_skips_space_sync_after_timeout():
ap = SimpleNamespace()
ap.discover = SimpleNamespace(get_components_by_kind=Mock(return_value=[]))
ap.instance_config = SimpleNamespace(data={'space': {'models_sync_timeout': 0.01}})
ap.logger = Mock()
mgr = ModelManager(ap)
mgr.load_models_from_db = AsyncMock()
async def slow_sync():
await asyncio.sleep(1)
mgr.sync_new_models_from_space = AsyncMock(side_effect=slow_sync)
await mgr.initialize()
mgr.load_models_from_db.assert_awaited_once()
mgr.sync_new_models_from_space.assert_awaited_once()
ap.logger.warning.assert_any_call('LangBot Space model sync timed out after 0.01s, skipping startup sync.')
@pytest.mark.asyncio
async def test_updated_llm_model_is_immediately_usable_by_local_agent_pipeline():
from langbot.pkg.api.http.service.model import LLMModelsService
+3 -30
View File
@@ -193,29 +193,6 @@ class TestSkillPathHelpers:
assert list(result.keys()) == ['visible']
def test_restore_activated_skills_uses_caller_provided_names_and_visibility(self):
from langbot.pkg.provider.tools.loaders.skill import (
ACTIVATED_SKILLS_KEY,
PIPELINE_BOUND_SKILLS_KEY,
get_activated_skill_names,
restore_activated_skills,
)
ap = _make_ap()
ap.skill_mgr = SimpleNamespace(
skills={
'visible': _make_skill_data(name='visible'),
'hidden': _make_skill_data(name='hidden'),
}
)
query = SimpleNamespace(variables={PIPELINE_BOUND_SKILLS_KEY: ['visible']})
restored = restore_activated_skills(ap, query, ['visible', 'hidden', 'visible', ''])
assert restored == ['visible']
assert list(query.variables[ACTIVATED_SKILLS_KEY].keys()) == ['visible']
assert get_activated_skill_names(query) == ['visible']
def test_resolve_virtual_skill_path_allows_visible_skill_reads(self):
from langbot.pkg.provider.tools.loaders.skill import (
PIPELINE_BOUND_SKILLS_KEY,
@@ -268,8 +245,7 @@ class TestSkillPathHelpers:
command = wrap_skill_command_with_python_env('python scripts/run.py')
assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
assert 'python -m venv "$_LB_VENV_DIR"' in command
assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
assert command.rstrip().endswith('python scripts/run.py')
@@ -305,7 +281,6 @@ class TestSkillToolLoader:
assert result['activated'] is True
assert result['skill_name'] == 'demo'
assert result['mount_path'] == '/workspace/.skills/demo'
assert result['activated_skill_names'] == ['demo']
assert 'Step 1' in result['content']
assert set(query.variables[ACTIVATED_SKILLS_KEY].keys()) == {'demo'}
@@ -481,9 +456,7 @@ class TestNativeToolLoaderSkillPaths:
SimpleNamespace(query_id='q1', variables={PIPELINE_BOUND_SKILLS_KEY: ['demo']}),
)
assert result['ok'] is True
assert result['content'] == 'demo instructions'
assert result['truncated'] is False
assert result == {'ok': True, 'content': 'demo instructions'}
@pytest.mark.asyncio
async def test_exec_in_activated_skill_mount_rewrites_command_and_refreshes(self):
@@ -512,7 +485,7 @@ class TestNativeToolLoaderSkillPaths:
query,
)
assert result['ok'] is True
assert result == {'ok': True}
tool_parameters = ap.box_service.execute_tool.await_args.args[0]
assert tool_parameters['command'] == 'python /workspace/.skills/demo/scripts/run.py'
assert tool_parameters['workdir'] == '/workspace/.skills/demo'
@@ -1,6 +1,5 @@
from __future__ import annotations
import base64
import os
import tempfile
from types import SimpleNamespace
@@ -190,78 +189,6 @@ async def test_write_creates_subdirectories():
assert f.read() == 'nested'
@pytest.mark.asyncio
async def test_read_binary_file_as_base64_chunk():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
with open(os.path.join(tmpdir, 'blob.bin'), 'wb') as f:
f.write(b'\x00\x01\x02\x03\x04')
result = await loader.invoke_tool(
'read',
{
'path': '/workspace/blob.bin',
'encoding': 'base64',
'byte_offset': 1,
'max_bytes': 2,
},
_make_query(),
)
assert result['ok'] is True
assert result['content'] == base64.b64encode(b'\x01\x02').decode('ascii')
assert result['encoding'] == 'base64'
assert result['byte_offset'] == 1
assert result['length'] == 2
assert result['size_bytes'] == 5
assert result['has_more'] is True
assert result['next_byte_offset'] == 3
@pytest.mark.asyncio
async def test_write_base64_file_append():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
first = base64.b64encode(b'\x00\x01').decode('ascii')
second = base64.b64encode(b'\x02\x03').decode('ascii')
await loader.invoke_tool(
'write',
{'path': '/workspace/blob.bin', 'content': first, 'encoding': 'base64'},
_make_query(),
)
result = await loader.invoke_tool(
'write',
{
'path': '/workspace/blob.bin',
'content': second,
'encoding': 'base64',
'mode': 'append',
},
_make_query(),
)
assert result['ok'] is True
with open(os.path.join(tmpdir, 'blob.bin'), 'rb') as f:
assert f.read() == b'\x00\x01\x02\x03'
@pytest.mark.asyncio
async def test_write_base64_rejects_invalid_content():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
result = await loader.invoke_tool(
'write',
{'path': '/workspace/blob.bin', 'content': 'not base64!', 'encoding': 'base64'},
_make_query(),
)
assert result['ok'] is False
assert 'invalid base64' in result['error']
assert not os.path.exists(os.path.join(tmpdir, 'blob.bin'))
@pytest.mark.asyncio
async def test_edit_replaces_unique_string():
with tempfile.TemporaryDirectory() as tmpdir:
@@ -321,135 +248,3 @@ async def test_path_escape_blocked():
with pytest.raises(ValueError, match='escapes'):
await loader.invoke_tool('read', {'path': '/workspace/../../etc/passwd'}, _make_query())
@pytest.mark.asyncio
async def test_box_availability_helper_handles_unavailable_and_errors():
from langbot.pkg.provider.tools.loaders.availability import is_box_backend_available
assert await is_box_backend_available(SimpleNamespace()) is False
assert await is_box_backend_available(SimpleNamespace(box_service=SimpleNamespace(available=False))) is False
unavailable_backend = SimpleNamespace(
available=True,
get_status=AsyncMock(return_value={'backend': {'available': False}}),
)
assert await is_box_backend_available(SimpleNamespace(box_service=unavailable_backend)) is False
failing_backend = SimpleNamespace(
available=True,
get_status=AsyncMock(side_effect=RuntimeError('box unavailable')),
)
assert await is_box_backend_available(SimpleNamespace(box_service=failing_backend)) is False
@pytest.mark.asyncio
async def test_read_file_supports_offset_limit_and_truncation_metadata():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
with open(os.path.join(tmpdir, 'lines.txt'), 'w', encoding='utf-8') as f:
f.write('one\ntwo\nthree\nfour\n')
result = await loader.invoke_tool(
'read',
{'path': '/workspace/lines.txt', 'offset': 2, 'limit': 2},
_make_query(),
)
assert result == {
'ok': True,
'content': 'two\nthree',
'truncated': True,
'truncated_by': 'lines',
'start_line': 2,
'end_line': 3,
'next_offset': 4,
'max_lines': 2,
'max_bytes': 50 * 1024,
}
@pytest.mark.asyncio
async def test_read_file_handles_line_larger_than_byte_limit():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
with open(os.path.join(tmpdir, 'long-line.txt'), 'w', encoding='utf-8') as f:
f.write('abcdef\n')
result = await loader.invoke_tool(
'read',
{'path': '/workspace/long-line.txt', 'max_bytes': 3},
_make_query(),
)
assert result['ok'] is True
assert result['truncated'] is True
assert result['truncated_by'] == 'bytes'
assert result['next_offset'] == 1
assert 'exceeds the 3B read limit' in result['content']
@pytest.mark.asyncio
async def test_exec_result_is_capped_and_exposes_preview_metadata():
with tempfile.TemporaryDirectory() as tmpdir:
box_service = SimpleNamespace(
available=True,
default_workspace=tmpdir,
execute_tool=AsyncMock(
return_value={
'ok': True,
'stdout': 'a' * 60000,
'stderr': 'b' * 60000,
'exit_code': 0,
}
),
)
loader = NativeToolLoader(SimpleNamespace(box_service=box_service, logger=Mock()))
result = await loader.invoke_tool('exec', {'command': 'python -V'}, _make_query())
assert result['ok'] is True
assert len(result['stdout'].encode('utf-8')) == 50 * 1024
assert len(result['stderr'].encode('utf-8')) == 50 * 1024
assert len(result['preview'].encode('utf-8')) == 50 * 1024
assert result['stdout_truncated'] is True
assert result['stderr_truncated'] is True
assert result['truncated'] is True
assert result['truncated_by'] == 'bytes'
@pytest.mark.asyncio
async def test_glob_caps_match_count_and_returns_preview():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
for index in range(105):
with open(os.path.join(tmpdir, f'file-{index:03d}.txt'), 'w', encoding='utf-8') as f:
f.write(str(index))
result = await loader.invoke_tool('glob', {'path': '/workspace', 'pattern': '*.txt'}, _make_query())
assert result['ok'] is True
assert result['total'] == 105
assert len(result['matches']) == 100
assert result['preview'] == '\n'.join(result['matches'])
assert result['truncated'] is True
assert result['truncated_by'] == 'matches'
@pytest.mark.asyncio
async def test_grep_reports_invalid_regex_and_truncates_long_matching_lines():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
with open(os.path.join(tmpdir, 'data.txt'), 'w', encoding='utf-8') as f:
f.write('needle ' + ('x' * 600) + '\n')
invalid = await loader.invoke_tool('grep', {'path': '/workspace', 'pattern': '['}, _make_query())
result = await loader.invoke_tool('grep', {'path': '/workspace', 'pattern': 'needle'}, _make_query())
assert invalid['ok'] is False
assert 'Invalid regex' in invalid['error']
assert result['ok'] is True
assert result['truncated'] is True
assert result['truncated_by'] == 'line'
assert result['matches'][0]['file'] == '/workspace/data.txt'
assert result['matches'][0]['content'].endswith('... [truncated]')
+26
View File
@@ -407,6 +407,32 @@ class TestRuntimeKnowledgeBaseRetrieve:
call_args = mock_app.plugin_connector.call_rag_retrieve.call_args
assert call_args[0][1]['retrieval_settings']['top_k'] == 5
@pytest.mark.asyncio
async def test_retrieve_records_host_rag_duration(self, monkeypatch):
"""Test host RAG span duration is measured even if plugin omits it."""
rag_module = get_rag_module()
mock_app = create_mock_app()
mock_app.monitoring_service = AsyncMock()
mock_kb = create_mock_kb_entity()
mock_app.plugin_connector.call_rag_retrieve = AsyncMock(
return_value={'results': [], 'metadata': {'status': 'success'}}
)
monkeypatch.setattr(rag_module.time, 'perf_counter', Mock(side_effect=[10.0, 10.25]))
runtime_kb = rag_module.RuntimeKnowledgeBase(mock_app, mock_kb)
await runtime_kb._retrieve(
'query text',
{
'_trace_context': {
'trace_id': 'trace-1',
'parent_span_id': 'span-root',
}
},
)
assert mock_app.monitoring_service.record_span.await_args.kwargs['duration'] == 250
@pytest.mark.asyncio
async def test_retrieve_converts_dict_to_entry(self):
"""Test that dict results are converted to RetrievalResultEntry."""
-9
View File
@@ -74,15 +74,6 @@
}
}
/* Hide scrollbar while keeping scroll behaviour (horizontal tag/filter rows). */
.scrollbar-hide {
-ms-overflow-style: none; /* IE / Edge */
scrollbar-width: none; /* Firefox */
}
.scrollbar-hide::-webkit-scrollbar {
display: none; /* Chrome / Safari / WebKit */
}
@custom-variant dark (&:is(.dark *));
@theme inline {
@@ -310,7 +310,6 @@ function SingleSelectField({
{options.map((opt) => (
<div key={opt.id}>
<button
type="button"
onClick={() => onChange(opt.id)}
className={`w-full text-left text-sm px-3 py-2 rounded-lg border transition-colors ${
value === opt.id
@@ -362,16 +361,8 @@ function MultiSelectField({
const selected = value.includes(opt.id);
return (
<div key={opt.id}>
<div
role="button"
tabIndex={0}
<button
onClick={() => toggle(opt.id)}
onKeyDown={(e) => {
if (e.key === 'Enter' || e.key === ' ') {
e.preventDefault();
toggle(opt.id);
}
}}
className={`w-full text-left text-sm px-3 py-2 rounded-lg border transition-colors flex items-center gap-2 ${
selected
? 'border-primary bg-primary/5 text-primary'
@@ -380,7 +371,7 @@ function MultiSelectField({
>
<Checkbox checked={selected} className="pointer-events-none" />
{getI18nText(opt.label)}
</div>
</button>
{opt.has_input && selected && (
<input
type="text"
@@ -5,6 +5,7 @@ import {
ModelCall,
LLMCall,
EmbeddingCall,
MonitoringTrace,
} from '../types/monitoring';
import { backendClient } from '@/app/infra/http';
import { parseUTCTimestamp } from '../utils/dateUtils';
@@ -263,12 +264,48 @@ export function useMonitoringData(filterState: FilterState) {
messageId: error.message_id,
}),
),
traces: (response.traces || []).map(
(trace: {
trace_id: string;
started_at: string;
ended_at?: string;
duration?: number;
status: string;
name: string;
bot_id?: string;
bot_name?: string;
pipeline_id?: string;
pipeline_name?: string;
session_id?: string;
message_id?: string;
query_id?: string;
attributes?: Record<string, unknown>;
}): MonitoringTrace => ({
traceId: trace.trace_id,
name: trace.name,
startedAt: parseUTCTimestamp(trace.started_at),
endedAt: trace.ended_at
? parseUTCTimestamp(trace.ended_at)
: undefined,
duration: trace.duration,
status: trace.status as 'running' | 'success' | 'error',
botId: trace.bot_id,
botName: trace.bot_name,
pipelineId: trace.pipeline_id,
pipelineName: trace.pipeline_name,
sessionId: trace.session_id,
messageId: trace.message_id,
queryId: trace.query_id,
attributes: trace.attributes || {},
}),
),
totalCount: {
messages: response.totalCount.messages,
llmCalls: response.totalCount.llmCalls,
embeddingCalls: response.totalCount.embeddingCalls || 0,
sessions: response.totalCount.sessions,
errors: response.totalCount.errors,
traces: response.totalCount.traces || 0,
},
};
+297 -1
View File
@@ -10,6 +10,7 @@ import {
MessageSquare,
Sparkles,
CheckCircle2,
GitBranch,
} from 'lucide-react';
import OverviewCards from './components/overview-cards/OverviewCards';
import MonitoringFilters from './components/filters/MonitoringFilters';
@@ -22,9 +23,15 @@ import { MessageDetailsCard } from './components/MessageDetailsCard';
import { MessageContentRenderer } from './components/MessageContentRenderer';
import { FeedbackStatsCards } from './components/FeedbackCard';
import { FeedbackList } from './components/FeedbackList';
import { MessageDetails } from './types/monitoring';
import {
MessageDetails,
TraceDetails,
MonitoringSpan,
} from './types/monitoring';
import { httpClient } from '@/app/infra/http/HttpClient';
import { backendClient } from '@/app/infra/http';
import { LoadingSpinner, LoadingPage } from '@/components/ui/loading-spinner';
import { parseUTCTimestamp } from './utils/dateUtils';
interface RawMessageData {
id: string;
@@ -72,6 +79,97 @@ interface RawErrorData {
stack_trace: string | null;
}
interface RawTraceData {
trace_id: string;
started_at: string;
ended_at?: string;
duration?: number;
status: string;
name: string;
bot_id?: string;
bot_name?: string;
pipeline_id?: string;
pipeline_name?: string;
session_id?: string;
message_id?: string;
query_id?: string;
attributes?: Record<string, unknown>;
}
interface RawSpanData {
span_id: string;
trace_id: string;
parent_span_id?: string;
name: string;
kind: string;
status: string;
started_at: string;
ended_at?: string;
duration?: number;
message_id?: string;
session_id?: string;
bot_id?: string;
pipeline_id?: string;
attributes?: Record<string, unknown>;
error_message?: string;
}
function mapTrace(raw: RawTraceData) {
return {
traceId: raw.trace_id,
name: raw.name,
startedAt: parseUTCTimestamp(raw.started_at),
endedAt: raw.ended_at ? parseUTCTimestamp(raw.ended_at) : undefined,
duration: raw.duration,
status: raw.status as 'running' | 'success' | 'error',
botId: raw.bot_id,
botName: raw.bot_name,
pipelineId: raw.pipeline_id,
pipelineName: raw.pipeline_name,
sessionId: raw.session_id,
messageId: raw.message_id,
queryId: raw.query_id,
attributes: raw.attributes || {},
};
}
function mapSpan(raw: RawSpanData): MonitoringSpan {
return {
spanId: raw.span_id,
traceId: raw.trace_id,
parentSpanId: raw.parent_span_id,
name: raw.name,
kind: raw.kind,
status: raw.status as 'running' | 'success' | 'error',
startedAt: parseUTCTimestamp(raw.started_at),
endedAt: raw.ended_at ? parseUTCTimestamp(raw.ended_at) : undefined,
duration: raw.duration,
messageId: raw.message_id,
sessionId: raw.session_id,
botId: raw.bot_id,
pipelineId: raw.pipeline_id,
attributes: raw.attributes || {},
errorMessage: raw.error_message,
};
}
function spanDepth(
span: MonitoringSpan,
spansById: Map<string, MonitoringSpan>,
) {
let depth = 0;
let current = span.parentSpanId
? spansById.get(span.parentSpanId)
: undefined;
while (current && depth < 8) {
depth += 1;
current = current.parentSpanId
? spansById.get(current.parentSpanId)
: undefined;
}
return depth;
}
function MonitoringPageContent() {
const { t } = useTranslation();
const { filterState, setSelectedBots, setSelectedPipelines, setTimeRange } =
@@ -158,6 +256,13 @@ function MonitoringPageContent() {
// State for expanded errors
const [expandedErrorId, setExpandedErrorId] = useState<string | null>(null);
const [expandedTraceId, setExpandedTraceId] = useState<string | null>(null);
const [traceDetails, setTraceDetails] = useState<
Record<string, TraceDetails>
>({});
const [loadingTraceDetails, setLoadingTraceDetails] = useState<
Record<string, boolean>
>({});
// State for controlled tabs
const [activeTab, setActiveTab] = useState<string>('messages');
@@ -265,6 +370,34 @@ function MonitoringPageContent() {
}
};
const toggleTraceExpand = async (traceId: string) => {
if (expandedTraceId === traceId) {
setExpandedTraceId(null);
return;
}
setExpandedTraceId(traceId);
if (traceDetails[traceId]) return;
setLoadingTraceDetails((prev) => ({ ...prev, [traceId]: true }));
try {
const result = await backendClient.getMonitoringTraceDetails(traceId);
setTraceDetails((prev) => ({
...prev,
[traceId]: {
traceId: result.trace_id,
found: result.found,
trace: result.trace ? mapTrace(result.trace) : undefined,
spans: (result.spans || []).map(mapSpan),
},
}));
} catch (error) {
console.error('Failed to fetch trace details:', error);
} finally {
setLoadingTraceDetails((prev) => ({ ...prev, [traceId]: false }));
}
};
return (
<div className="w-full h-full overflow-y-auto overflow-x-hidden">
{/* Filters and Refresh Button - Sticky */}
@@ -323,6 +456,9 @@ function MonitoringPageContent() {
<TabsTrigger value="tokens" className="px-6 py-2">
{t('monitoring.tabs.tokens')}
</TabsTrigger>
<TabsTrigger value="traces" className="px-6 py-2">
{t('monitoring.tabs.traces')}
</TabsTrigger>
<TabsTrigger value="feedback" className="px-6 py-2">
{t('monitoring.tabs.feedback')}
</TabsTrigger>
@@ -690,6 +826,166 @@ function MonitoringPageContent() {
/>
</TabsContent>
<TabsContent value="traces" className="p-6 m-0">
<div>
{loading && (
<div className="py-12 flex justify-center">
<LoadingSpinner text={t('common.loading')} />
</div>
)}
{!loading && data && data.traces && data.traces.length > 0 && (
<div className="space-y-4">
{data.traces.map((trace) => {
const details = traceDetails[trace.traceId];
const spans = details?.spans || [];
const spansById = new Map(
spans.map((span) => [span.spanId, span]),
);
const maxDuration = Math.max(
1,
...spans.map((span) => span.duration || 0),
);
return (
<div
key={trace.traceId}
className="border rounded-xl overflow-hidden transition-all duration-200"
>
<div
className="p-5 cursor-pointer hover:bg-accent transition-colors"
onClick={() => toggleTraceExpand(trace.traceId)}
>
<div className="flex items-start justify-between gap-4">
<div className="flex items-start flex-1 min-w-0">
<div className="mr-3 mt-0.5">
{expandedTraceId === trace.traceId ? (
<ChevronDown className="w-5 h-5 text-muted-foreground" />
) : (
<ChevronRight className="w-5 h-5 text-muted-foreground" />
)}
</div>
<div className="min-w-0 flex-1">
<div className="flex flex-wrap items-center gap-2 mb-2">
<span className="text-xs text-muted-foreground font-mono">
{trace.traceId}
</span>
<span
className={`text-xs px-2 py-1 rounded ${
trace.status === 'error'
? 'bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200'
: trace.status === 'running'
? 'bg-yellow-100 text-yellow-800 dark:bg-yellow-900 dark:text-yellow-200'
: 'bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200'
}`}
>
{trace.status}
</span>
</div>
<div className="font-medium text-sm text-foreground mb-1">
{trace.name}
</div>
<div className="text-xs text-muted-foreground truncate">
{trace.botName || '-'} {' '}
{trace.pipelineName || '-'}
{trace.sessionId
? ` · ${trace.sessionId}`
: ''}
</div>
</div>
</div>
<div className="flex flex-col items-end gap-1 text-xs text-muted-foreground whitespace-nowrap">
<span>{trace.startedAt.toLocaleString()}</span>
<span>{trace.duration ?? 0}ms</span>
</div>
</div>
</div>
{expandedTraceId === trace.traceId && (
<div className="border-t p-5 bg-muted">
{loadingTraceDetails[trace.traceId] && (
<div className="py-4 flex justify-center">
<LoadingSpinner size="sm" text="" />
</div>
)}
{!loadingTraceDetails[trace.traceId] && (
<div className="space-y-3">
{spans.length === 0 && (
<div className="text-sm text-muted-foreground">
{t('monitoring.traces.noSpans')}
</div>
)}
{spans.map((span) => {
const depth = spanDepth(span, spansById);
const width = Math.max(
6,
Math.min(
100,
((span.duration || 0) / maxDuration) *
100,
),
);
return (
<div
key={span.spanId}
className="grid grid-cols-[minmax(180px,1fr)_minmax(140px,2fr)_80px] gap-3 items-center text-xs"
>
<div
className="min-w-0"
style={{
paddingLeft: `${depth * 16}px`,
}}
>
<div className="font-medium text-foreground truncate">
{span.name}
</div>
<div className="text-muted-foreground truncate">
{span.kind}
</div>
</div>
<div className="h-7 bg-background rounded border overflow-hidden">
<div
className={`h-full ${
span.status === 'error'
? 'bg-red-500/70'
: 'bg-blue-500/70'
}`}
style={{ width: `${width}%` }}
/>
</div>
<div className="text-right text-muted-foreground">
{span.duration ?? 0}ms
</div>
{span.errorMessage && (
<div className="col-span-3 text-red-600 dark:text-red-400 bg-background rounded p-2">
{span.errorMessage}
</div>
)}
</div>
);
})}
</div>
)}
</div>
)}
</div>
);
})}
</div>
)}
{!loading &&
(!data || !data.traces || data.traces.length === 0) && (
<div className="flex flex-col items-center justify-center text-muted-foreground py-16 gap-2">
<GitBranch className="h-[3rem] w-[3rem]" />
<div className="text-sm">
{t('monitoring.traces.noTraces')}
</div>
</div>
)}
</div>
</TabsContent>
<TabsContent value="feedback" className="p-6 m-0">
<div>
{loading && (
@@ -111,6 +111,48 @@ export interface ErrorLog {
messageId?: string;
}
export interface MonitoringTrace {
traceId: string;
name: string;
startedAt: Date;
endedAt?: Date;
duration?: number;
status: 'running' | 'success' | 'error';
botId?: string;
botName?: string;
pipelineId?: string;
pipelineName?: string;
sessionId?: string;
messageId?: string;
queryId?: string;
attributes: Record<string, unknown>;
}
export interface MonitoringSpan {
spanId: string;
traceId: string;
parentSpanId?: string;
name: string;
kind: string;
status: 'success' | 'error' | 'running';
startedAt: Date;
endedAt?: Date;
duration?: number;
messageId?: string;
sessionId?: string;
botId?: string;
pipelineId?: string;
attributes: Record<string, unknown>;
errorMessage?: string;
}
export interface TraceDetails {
traceId: string;
found: boolean;
trace?: MonitoringTrace;
spans: MonitoringSpan[];
}
export interface MessageDetails {
messageId: string;
found: boolean;
@@ -125,6 +167,7 @@ export interface MessageDetails {
averageDurationMs: number;
};
errors: ErrorLog[];
trace?: MonitoringTrace;
}
export interface OverviewMetrics {
@@ -203,6 +246,7 @@ export interface MonitoringData {
modelCalls: ModelCall[];
sessions: SessionInfo[];
errors: ErrorLog[];
traces: MonitoringTrace[];
feedback?: FeedbackRecord[];
feedbackStats?: FeedbackStats;
totalCount: {
@@ -211,6 +255,7 @@ export interface MonitoringData {
embeddingCalls: number;
sessions: number;
errors: number;
traces: number;
feedback?: number;
};
}
@@ -15,7 +15,6 @@ import {
At,
Quote,
Voice,
File as FileComponent,
Source,
} from '@/app/infra/entities/message';
import { toast } from 'sonner';
@@ -65,12 +64,7 @@ export default function DebugDialog({
const [isHovering, setIsHovering] = useState(false);
const [isConnected, setIsConnected] = useState(false);
const [selectedImages, setSelectedImages] = useState<
Array<{
file: File;
preview: string;
fileKey?: string;
kind: 'image' | 'voice' | 'file';
}>
Array<{ file: File; preview: string; fileKey?: string }>
>([]);
const [isUploading, setIsUploading] = useState(false);
const [previewImageUrl, setPreviewImageUrl] = useState<string>('');
@@ -298,38 +292,23 @@ export default function DebugDialog({
const files = e.target.files;
if (!files || files.length === 0) return;
const newImages: Array<{
file: File;
preview: string;
kind: 'image' | 'voice' | 'file';
}> = [];
const newImages: Array<{ file: File; preview: string }> = [];
for (let i = 0; i < files.length; i++) {
const file = files[i];
if (file.type.startsWith('image/')) {
newImages.push({
file,
preview: URL.createObjectURL(file),
kind: 'image',
});
} else if (file.type.startsWith('audio/')) {
newImages.push({ file, preview: '', kind: 'voice' });
} else {
newImages.push({ file, preview: '', kind: 'file' });
const preview = URL.createObjectURL(file);
newImages.push({ file, preview });
}
}
setSelectedImages((prev) => [...prev, ...newImages]);
// reset the input so selecting the same file again re-triggers onChange
e.target.value = '';
};
const handleRemoveImage = (index: number) => {
setSelectedImages((prev) => {
const newImages = [...prev];
if (newImages[index].preview) {
URL.revokeObjectURL(newImages[index].preview);
}
URL.revokeObjectURL(newImages[index].preview);
newImages.splice(index, 1);
return newImages;
});
@@ -393,33 +372,19 @@ export default function DebugDialog({
});
}
// Upload attachments and add to message chain
for (const attachment of selectedImages) {
// Upload images and add to message chain
for (const image of selectedImages) {
try {
if (attachment.kind === 'image') {
const result = await httpClient.uploadWebSocketImage(
selectedPipelineId,
attachment.file,
);
messageChain.push({
type: 'Image',
path: result.file_key,
});
} else {
// Voice / File go through the generic document upload endpoint,
// which returns a storage key the backend resolves into the
// sandbox inbox just like images.
const result = await httpClient.uploadDocumentFile(attachment.file);
messageChain.push({
type: attachment.kind === 'voice' ? 'Voice' : 'File',
path: result.file_id,
...(attachment.kind === 'file'
? { name: attachment.file.name }
: {}),
});
}
const result = await httpClient.uploadWebSocketImage(
selectedPipelineId,
image.file,
);
messageChain.push({
type: 'Image',
path: result.file_key,
});
} catch (error) {
console.error('Attachment upload failed:', error);
console.error('Image upload failed:', error);
toast.error(t('pipelines.debugDialog.imageUploadFailed'));
}
}
@@ -428,9 +393,7 @@ export default function DebugDialog({
setInputValue('');
setHasAt(false);
setQuotedMessage(null);
selectedImages.forEach((img) => {
if (img.preview) URL.revokeObjectURL(img.preview);
});
selectedImages.forEach((img) => URL.revokeObjectURL(img.preview));
setSelectedImages([]);
// Send message via WebSocket
@@ -497,29 +460,13 @@ export default function DebugDialog({
}
case 'File': {
const file = component as FileComponent;
const downloadHref = file.base64
? file.base64.startsWith('data:')
? file.base64
: `data:application/octet-stream;base64,${file.base64}`
: file.url || '';
const fileName = file.name || 'Unknown';
const file = component as MessageChainComponent & { name?: string };
return (
<div key={index} className="my-2 flex items-center gap-2 text-sm">
<Paperclip className="size-4" />
{downloadHref ? (
<a
href={downloadHref}
download={fileName}
className="text-primary underline hover:opacity-80"
>
[{t('pipelines.debugDialog.file')}] {fileName}
</a>
) : (
<span>
[{t('pipelines.debugDialog.file')}] {fileName}
</span>
)}
<span>
[{t('pipelines.debugDialog.file')}] {file.name || 'Unknown'}
</span>
</div>
);
}
@@ -897,30 +844,17 @@ export default function DebugDialog({
</div>
)}
{/* Attachment preview area */}
{/* Image preview area */}
{selectedImages.length > 0 && (
<div className="px-4 pb-2">
<div className="flex gap-2 flex-wrap">
{selectedImages.map((image, index) => (
<div key={index} className="relative group">
{image.kind === 'image' ? (
<img
src={image.preview}
alt={`preview-${index}`}
className="w-20 h-20 object-cover rounded-lg border"
/>
) : (
<div className="w-36 h-20 px-2 rounded-lg border bg-muted/40 flex items-center gap-2 overflow-hidden">
{image.kind === 'voice' ? (
<Music className="size-5 shrink-0 text-muted-foreground" />
) : (
<Paperclip className="size-5 shrink-0 text-muted-foreground" />
)}
<span className="text-xs text-muted-foreground truncate">
{image.file.name}
</span>
</div>
)}
<img
src={image.preview}
alt={`preview-${index}`}
className="w-20 h-20 object-cover rounded-lg border"
/>
<button
type="button"
onClick={() => handleRemoveImage(index)}
@@ -949,7 +883,7 @@ export default function DebugDialog({
<input
ref={fileInputRef}
type="file"
accept="image/*,audio/*,*/*"
accept="image/*"
multiple
onChange={handleImageSelect}
className="hidden"
@@ -787,42 +787,38 @@ function MarketPageContent({
</div>
</div>
{/* 用真实标签做快速筛选 —— 始终单行横向滚动,避免标签变多时换行错位 */}
<div className="relative mx-auto w-full max-w-4xl">
<div className="scrollbar-hide flex items-center gap-1.5 overflow-x-auto pb-1 pr-6">
<Button
type="button"
variant={selectedTags.length === 0 ? 'secondary' : 'ghost'}
size="sm"
className="h-7 shrink-0 px-2.5 text-xs"
onClick={() => handleTagsChange([])}
>
{t('market.allExtensions')}
</Button>
{availableTags.map((tag) => {
const selected = selectedTags.includes(tag.tag);
return (
<Button
key={tag.tag}
type="button"
variant={selected ? 'secondary' : 'ghost'}
size="sm"
className="h-7 shrink-0 px-2.5 text-xs"
onClick={() => {
const newTags = selected
? selectedTags.filter((t) => t !== tag.tag)
: [...selectedTags, tag.tag];
handleTagsChange(newTags);
}}
>
{tagNames[tag.tag] || tag.tag}
{selected && <X className="h-3 w-3" />}
</Button>
);
})}
</div>
{/* 右侧渐隐,提示还有更多标签可横向滚动查看 */}
<div className="pointer-events-none absolute right-0 top-0 bottom-1 w-8 bg-gradient-to-l from-background to-transparent" />
{/* 用真实标签做快速筛选 */}
<div className="mx-auto flex w-full max-w-4xl items-center gap-2 overflow-x-auto pb-1 sm:flex-wrap sm:justify-center sm:overflow-visible">
<Button
type="button"
variant={selectedTags.length === 0 ? 'secondary' : 'ghost'}
size="sm"
className="h-8 shrink-0"
onClick={() => handleTagsChange([])}
>
{t('market.allExtensions')}
</Button>
{availableTags.map((tag) => {
const selected = selectedTags.includes(tag.tag);
return (
<Button
key={tag.tag}
type="button"
variant={selected ? 'secondary' : 'ghost'}
size="sm"
className="h-8 shrink-0"
onClick={() => {
const newTags = selected
? selectedTags.filter((t) => t !== tag.tag)
: [...selectedTags, tag.tag];
handleTagsChange(newTags);
}}
>
{tagNames[tag.tag] || tag.tag}
{selected && <X className="h-3.5 w-3.5" />}
</Button>
);
})}
</div>
</div>
@@ -64,8 +64,6 @@ export interface File extends MessageComponent {
name?: string;
size?: number;
url?: string;
path?: string;
base64?: string;
}
// Unknown component
+101
View File
@@ -1185,12 +1185,29 @@ export class BackendClient extends BaseHttpClient {
stack_trace?: string;
message_id?: string;
}>;
traces?: Array<{
trace_id: string;
started_at: string;
ended_at?: string;
duration?: number;
status: string;
name: string;
bot_id?: string;
bot_name?: string;
pipeline_id?: string;
pipeline_name?: string;
session_id?: string;
message_id?: string;
query_id?: string;
attributes?: Record<string, unknown>;
}>;
totalCount: {
messages: number;
llmCalls: number;
embeddingCalls: number;
sessions: number;
errors: number;
traces?: number;
};
}> {
const queryParams = new URLSearchParams();
@@ -1213,6 +1230,90 @@ export class BackendClient extends BaseHttpClient {
return this.get(`/api/v1/monitoring/data?${queryParams.toString()}`);
}
public getMonitoringTraces(params: {
botId?: string[];
pipelineId?: string[];
startTime?: string;
endTime?: string;
limit?: number;
}): Promise<{
traces: Array<{
trace_id: string;
started_at: string;
ended_at?: string;
duration?: number;
status: string;
name: string;
bot_id?: string;
bot_name?: string;
pipeline_id?: string;
pipeline_name?: string;
session_id?: string;
message_id?: string;
query_id?: string;
attributes?: Record<string, unknown>;
}>;
total: number;
}> {
const queryParams = new URLSearchParams();
if (params.botId) {
params.botId.forEach((id) => queryParams.append('botId', id));
}
if (params.pipelineId) {
params.pipelineId.forEach((id) => queryParams.append('pipelineId', id));
}
if (params.startTime) {
queryParams.append('startTime', params.startTime);
}
if (params.endTime) {
queryParams.append('endTime', params.endTime);
}
if (params.limit) {
queryParams.append('limit', params.limit.toString());
}
return this.get(`/api/v1/monitoring/traces?${queryParams.toString()}`);
}
public getMonitoringTraceDetails(traceId: string): Promise<{
trace_id: string;
found: boolean;
trace: {
trace_id: string;
started_at: string;
ended_at?: string;
duration?: number;
status: string;
name: string;
bot_id?: string;
bot_name?: string;
pipeline_id?: string;
pipeline_name?: string;
session_id?: string;
message_id?: string;
query_id?: string;
attributes?: Record<string, unknown>;
};
spans: Array<{
span_id: string;
trace_id: string;
parent_span_id?: string;
name: string;
kind: string;
status: string;
started_at: string;
ended_at?: string;
duration?: number;
message_id?: string;
session_id?: string;
bot_id?: string;
pipeline_id?: string;
attributes?: Record<string, unknown>;
error_message?: string;
}>;
}> {
return this.get(`/api/v1/monitoring/traces/${traceId}`);
}
public getMonitoringOverview(params: {
botId?: string[];
pipelineId?: string[];
+6
View File
@@ -1217,6 +1217,7 @@ const enUS = {
embeddingCalls: 'Embedding Calls',
modelCalls: 'Model Calls',
tokens: 'Token Monitoring',
traces: 'Traces',
feedback: 'User Feedback',
sessions: 'Session Analysis',
errors: 'Error Logs',
@@ -1321,6 +1322,11 @@ const enUS = {
noErrors: 'No errors found',
stackTrace: 'Stack Trace',
},
traces: {
title: 'Traces',
noTraces: 'No traces found',
noSpans: 'No spans recorded for this trace',
},
feedback: {
title: 'User Feedback',
totalFeedback: 'Total Feedback',
+6
View File
@@ -1158,6 +1158,7 @@ const zhHans = {
embeddingCalls: 'Embedding调用',
modelCalls: '模型调用',
tokens: 'Token 监控',
traces: '链路追踪',
feedback: '用户反馈',
sessions: '会话分析',
errors: '错误日志',
@@ -1262,6 +1263,11 @@ const zhHans = {
noErrors: '未找到错误',
stackTrace: '堆栈追踪',
},
traces: {
title: '链路追踪',
noTraces: '未找到链路记录',
noSpans: '此链路暂无 Span 记录',
},
feedback: {
title: '用户反馈',
totalFeedback: '总反馈数',