feat(box): add obs

This commit is contained in:
youhuanghe
2026-03-20 05:14:16 +00:00
committed by WangCham
parent 86b2d517f2
commit 15c03fe96b
4 changed files with 198 additions and 3 deletions

View File

@@ -0,0 +1,22 @@
from __future__ import annotations
from .. import group
@group.group_class('box', '/api/v1/box')
class BoxRouterGroup(group.RouterGroup):
async def initialize(self) -> None:
@self.route('/status', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
async def _() -> str:
status = await self.ap.box_service.get_status()
return self.success(data=status)
@self.route('/sessions', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
async def _() -> str:
sessions = self.ap.box_service.runtime.get_sessions()
return self.success(data=sessions)
@self.route('/errors', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
async def _() -> str:
errors = self.ap.box_service.get_recent_errors()
return self.success(data=errors)

View File

@@ -65,6 +65,46 @@ class BoxRuntime:
for session_id in session_ids:
await self._drop_session_locked(session_id)
# ── Observability ─────────────────────────────────────────────────
async def get_backend_info(self) -> dict:
backend = self._backend
if backend is None:
return {'name': None, 'available': False}
try:
available = await backend.is_available()
except Exception:
available = False
return {'name': backend.name, 'available': available}
def get_sessions(self) -> list[dict]:
return [
{
'session_id': s.info.session_id,
'backend_name': s.info.backend_name,
'backend_session_id': s.info.backend_session_id,
'image': s.info.image,
'network': s.info.network.value,
'host_path': s.info.host_path,
'host_path_mode': s.info.host_path_mode.value,
'cpus': s.info.cpus,
'memory_mb': s.info.memory_mb,
'pids_limit': s.info.pids_limit,
'read_only_rootfs': s.info.read_only_rootfs,
'created_at': s.info.created_at.isoformat(),
'last_used_at': s.info.last_used_at.isoformat(),
}
for s in self._sessions.values()
]
async def get_status(self) -> dict:
backend_info = await self.get_backend_info()
return {
'backend': backend_info,
'active_sessions': len(self._sessions),
'session_ttl_sec': self.session_ttl_sec,
}
async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession:
async with self._lock:
await self._reap_expired_sessions_locked()

View File

@@ -1,5 +1,7 @@
from __future__ import annotations
import collections
import datetime as _dt
import enum
import json
import os
@@ -7,11 +9,13 @@ from typing import TYPE_CHECKING
import pydantic
from .errors import BoxValidationError
from .errors import BoxError, BoxValidationError
from .models import BUILTIN_PROFILES, BoxExecutionResult, BoxProfile, BoxSpec
from .runtime import BoxRuntime
_INT_ADAPTER = pydantic.TypeAdapter(int)
_UTC = _dt.timezone.utc
_MAX_RECENT_ERRORS = 50
if TYPE_CHECKING:
from ..core import app as core_app
@@ -31,6 +35,7 @@ class BoxService:
self.allowed_host_mount_roots = self._load_allowed_host_mount_roots()
self.default_host_workspace = self._load_default_host_workspace()
self.profile = self._load_profile()
self._recent_errors: collections.deque[dict] = collections.deque(maxlen=_MAX_RECENT_ERRORS)
async def initialize(self):
await self.runtime.initialize()
@@ -48,7 +53,9 @@ class BoxService:
spec = BoxSpec.model_validate(spec_payload)
except pydantic.ValidationError as exc:
first_error = exc.errors()[0]
raise BoxValidationError(first_error.get('msg', 'invalid sandbox_exec arguments')) from exc
err = BoxValidationError(first_error.get('msg', 'invalid sandbox_exec arguments'))
self._record_error(err, query)
raise err from exc
self._validate_host_mount(spec)
self.ap.logger.info(
@@ -56,7 +63,11 @@ class BoxService:
f'query_id={query.query_id} '
f'spec={json.dumps(self._summarize_spec(spec), ensure_ascii=False)}'
)
result = await self.runtime.execute(spec)
try:
result = await self.runtime.execute(spec)
except BoxError as exc:
self._record_error(exc, query)
raise
self.ap.logger.info(
'LangBot Box result: '
f'query_id={query.query_id} '
@@ -229,3 +240,24 @@ class BoxService:
if normalized_timeout > profile.max_timeout_sec:
params['timeout_sec'] = profile.max_timeout_sec
# ── Observability ─────────────────────────────────────────────────
def _record_error(self, exc: Exception, query: 'pipeline_query.Query'):
self._recent_errors.append({
'timestamp': _dt.datetime.now(_UTC).isoformat(),
'type': type(exc).__name__,
'message': str(exc),
'query_id': str(query.query_id),
})
def get_recent_errors(self) -> list[dict]:
return list(self._recent_errors)
async def get_status(self) -> dict:
runtime_status = await self.runtime.get_status()
return {
**runtime_status,
'profile': self.profile.name,
'recent_error_count': len(self._recent_errors),
}