mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-02 03:55:55 +00:00
feat(box): add obs
This commit is contained in:
22
src/langbot/pkg/api/http/controller/groups/box.py
Normal file
22
src/langbot/pkg/api/http/controller/groups/box.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from .. import group
|
||||
|
||||
|
||||
@group.group_class('box', '/api/v1/box')
|
||||
class BoxRouterGroup(group.RouterGroup):
|
||||
async def initialize(self) -> None:
|
||||
@self.route('/status', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
|
||||
async def _() -> str:
|
||||
status = await self.ap.box_service.get_status()
|
||||
return self.success(data=status)
|
||||
|
||||
@self.route('/sessions', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
|
||||
async def _() -> str:
|
||||
sessions = self.ap.box_service.runtime.get_sessions()
|
||||
return self.success(data=sessions)
|
||||
|
||||
@self.route('/errors', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
|
||||
async def _() -> str:
|
||||
errors = self.ap.box_service.get_recent_errors()
|
||||
return self.success(data=errors)
|
||||
@@ -65,6 +65,46 @@ class BoxRuntime:
|
||||
for session_id in session_ids:
|
||||
await self._drop_session_locked(session_id)
|
||||
|
||||
# ── Observability ─────────────────────────────────────────────────
|
||||
|
||||
async def get_backend_info(self) -> dict:
|
||||
backend = self._backend
|
||||
if backend is None:
|
||||
return {'name': None, 'available': False}
|
||||
try:
|
||||
available = await backend.is_available()
|
||||
except Exception:
|
||||
available = False
|
||||
return {'name': backend.name, 'available': available}
|
||||
|
||||
def get_sessions(self) -> list[dict]:
|
||||
return [
|
||||
{
|
||||
'session_id': s.info.session_id,
|
||||
'backend_name': s.info.backend_name,
|
||||
'backend_session_id': s.info.backend_session_id,
|
||||
'image': s.info.image,
|
||||
'network': s.info.network.value,
|
||||
'host_path': s.info.host_path,
|
||||
'host_path_mode': s.info.host_path_mode.value,
|
||||
'cpus': s.info.cpus,
|
||||
'memory_mb': s.info.memory_mb,
|
||||
'pids_limit': s.info.pids_limit,
|
||||
'read_only_rootfs': s.info.read_only_rootfs,
|
||||
'created_at': s.info.created_at.isoformat(),
|
||||
'last_used_at': s.info.last_used_at.isoformat(),
|
||||
}
|
||||
for s in self._sessions.values()
|
||||
]
|
||||
|
||||
async def get_status(self) -> dict:
|
||||
backend_info = await self.get_backend_info()
|
||||
return {
|
||||
'backend': backend_info,
|
||||
'active_sessions': len(self._sessions),
|
||||
'session_ttl_sec': self.session_ttl_sec,
|
||||
}
|
||||
|
||||
async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession:
|
||||
async with self._lock:
|
||||
await self._reap_expired_sessions_locked()
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
import datetime as _dt
|
||||
import enum
|
||||
import json
|
||||
import os
|
||||
@@ -7,11 +9,13 @@ from typing import TYPE_CHECKING
|
||||
|
||||
import pydantic
|
||||
|
||||
from .errors import BoxValidationError
|
||||
from .errors import BoxError, BoxValidationError
|
||||
from .models import BUILTIN_PROFILES, BoxExecutionResult, BoxProfile, BoxSpec
|
||||
from .runtime import BoxRuntime
|
||||
|
||||
_INT_ADAPTER = pydantic.TypeAdapter(int)
|
||||
_UTC = _dt.timezone.utc
|
||||
_MAX_RECENT_ERRORS = 50
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..core import app as core_app
|
||||
@@ -31,6 +35,7 @@ class BoxService:
|
||||
self.allowed_host_mount_roots = self._load_allowed_host_mount_roots()
|
||||
self.default_host_workspace = self._load_default_host_workspace()
|
||||
self.profile = self._load_profile()
|
||||
self._recent_errors: collections.deque[dict] = collections.deque(maxlen=_MAX_RECENT_ERRORS)
|
||||
|
||||
async def initialize(self):
|
||||
await self.runtime.initialize()
|
||||
@@ -48,7 +53,9 @@ class BoxService:
|
||||
spec = BoxSpec.model_validate(spec_payload)
|
||||
except pydantic.ValidationError as exc:
|
||||
first_error = exc.errors()[0]
|
||||
raise BoxValidationError(first_error.get('msg', 'invalid sandbox_exec arguments')) from exc
|
||||
err = BoxValidationError(first_error.get('msg', 'invalid sandbox_exec arguments'))
|
||||
self._record_error(err, query)
|
||||
raise err from exc
|
||||
|
||||
self._validate_host_mount(spec)
|
||||
self.ap.logger.info(
|
||||
@@ -56,7 +63,11 @@ class BoxService:
|
||||
f'query_id={query.query_id} '
|
||||
f'spec={json.dumps(self._summarize_spec(spec), ensure_ascii=False)}'
|
||||
)
|
||||
result = await self.runtime.execute(spec)
|
||||
try:
|
||||
result = await self.runtime.execute(spec)
|
||||
except BoxError as exc:
|
||||
self._record_error(exc, query)
|
||||
raise
|
||||
self.ap.logger.info(
|
||||
'LangBot Box result: '
|
||||
f'query_id={query.query_id} '
|
||||
@@ -229,3 +240,24 @@ class BoxService:
|
||||
|
||||
if normalized_timeout > profile.max_timeout_sec:
|
||||
params['timeout_sec'] = profile.max_timeout_sec
|
||||
|
||||
# ── Observability ─────────────────────────────────────────────────
|
||||
|
||||
def _record_error(self, exc: Exception, query: 'pipeline_query.Query'):
|
||||
self._recent_errors.append({
|
||||
'timestamp': _dt.datetime.now(_UTC).isoformat(),
|
||||
'type': type(exc).__name__,
|
||||
'message': str(exc),
|
||||
'query_id': str(query.query_id),
|
||||
})
|
||||
|
||||
def get_recent_errors(self) -> list[dict]:
|
||||
return list(self._recent_errors)
|
||||
|
||||
async def get_status(self) -> dict:
|
||||
runtime_status = await self.runtime.get_status()
|
||||
return {
|
||||
**runtime_status,
|
||||
'profile': self.profile.name,
|
||||
'recent_error_count': len(self._recent_errors),
|
||||
}
|
||||
|
||||
@@ -545,3 +545,104 @@ def test_box_spec_validates_resource_limits():
|
||||
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'memory_mb': 10})
|
||||
with pytest.raises(Exception):
|
||||
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'pids_limit': 0})
|
||||
|
||||
|
||||
# ── Observability tests ───────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runtime_get_status_reports_backend_and_sessions():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
await runtime.initialize()
|
||||
|
||||
status = await runtime.get_status()
|
||||
assert status['backend']['name'] == 'fake'
|
||||
assert status['backend']['available'] is True
|
||||
assert status['active_sessions'] == 0
|
||||
|
||||
await runtime.execute(BoxSpec.model_validate({'cmd': 'echo', 'session_id': 'obs-1'}))
|
||||
status = await runtime.get_status()
|
||||
assert status['active_sessions'] == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runtime_get_sessions_returns_session_info():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
await runtime.initialize()
|
||||
|
||||
await runtime.execute(BoxSpec.model_validate({'cmd': 'echo', 'session_id': 'obs-2'}))
|
||||
sessions = runtime.get_sessions()
|
||||
assert len(sessions) == 1
|
||||
assert sessions[0]['session_id'] == 'obs-2'
|
||||
assert sessions[0]['backend_name'] == 'fake'
|
||||
assert 'created_at' in sessions[0]
|
||||
assert 'last_used_at' in sessions[0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runtime_get_backend_info_when_no_backend():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger, available=False)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
await runtime.initialize()
|
||||
|
||||
info = await runtime.get_backend_info()
|
||||
assert info['name'] is None
|
||||
assert info['available'] is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_service_records_errors_on_failure():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger, available=False)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
with pytest.raises(Exception):
|
||||
await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(50))
|
||||
|
||||
errors = service.get_recent_errors()
|
||||
assert len(errors) == 1
|
||||
assert errors[0]['type'] == 'BoxBackendUnavailableError'
|
||||
assert errors[0]['query_id'] == '50'
|
||||
assert 'timestamp' in errors[0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_service_error_ring_buffer_capped():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger, available=False)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
for i in range(60):
|
||||
with pytest.raises(Exception):
|
||||
await service.execute_sandbox_tool({'cmd': 'fail'}, make_query(100 + i))
|
||||
|
||||
errors = service.get_recent_errors()
|
||||
assert len(errors) == 50
|
||||
# Oldest should have been evicted, newest kept
|
||||
assert errors[0]['query_id'] == '110'
|
||||
assert errors[-1]['query_id'] == '159'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_service_get_status_aggregates_runtime_and_profile():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
status = await service.get_status()
|
||||
assert status['profile'] == 'default'
|
||||
assert status['backend']['name'] == 'fake'
|
||||
assert status['backend']['available'] is True
|
||||
assert status['active_sessions'] == 0
|
||||
assert status['recent_error_count'] == 0
|
||||
|
||||
Reference in New Issue
Block a user