feat(box): add sandbox_exec tool loop for local-agent calculations

This commit is contained in:
youhuanghe
2026-03-19 12:28:10 +00:00
committed by WangCham
parent 3b3deec080
commit ba7a45713d
17 changed files with 952 additions and 10 deletions

View File

@@ -0,0 +1 @@
"""LangBot Box runtime package."""

View File

@@ -0,0 +1,207 @@
from __future__ import annotations
import abc
import asyncio
import dataclasses
import datetime as dt
import logging
import re
import shlex
import shutil
import uuid
from .errors import BoxError
from .models import BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
@dataclasses.dataclass(slots=True)
class _CommandResult:
return_code: int
stdout: str
stderr: str
timed_out: bool = False
class BaseSandboxBackend(abc.ABC):
name: str
def __init__(self, logger: logging.Logger):
self.logger = logger
async def initialize(self):
return None
@abc.abstractmethod
async def is_available(self) -> bool:
pass
@abc.abstractmethod
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
pass
@abc.abstractmethod
async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
pass
@abc.abstractmethod
async def stop_session(self, session: BoxSessionInfo):
pass
class CLISandboxBackend(BaseSandboxBackend):
command: str
def __init__(self, logger: logging.Logger, command: str, backend_name: str):
super().__init__(logger)
self.command = command
self.name = backend_name
async def is_available(self) -> bool:
if shutil.which(self.command) is None:
return False
result = await self._run_command([self.command, 'info'], timeout_sec=5, check=False)
return result.return_code == 0 and not result.timed_out
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
now = dt.datetime.now(dt.UTC)
container_name = self._build_container_name(spec.session_id)
args = [
self.command,
'run',
'-d',
'--rm',
'--name',
container_name,
'--label',
'langbot.box=true',
'--label',
f'langbot.session_id={spec.session_id}',
]
if spec.network.value == 'off':
args.extend(['--network', 'none'])
args.extend([spec.image, 'sh', '-lc', 'while true; do sleep 3600; done'])
await self._run_command(args, timeout_sec=30, check=True)
return BoxSessionInfo(
session_id=spec.session_id,
backend_name=self.name,
backend_session_id=container_name,
image=spec.image,
network=spec.network,
created_at=now,
last_used_at=now,
)
async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
start = dt.datetime.now(dt.UTC)
args = [self.command, 'exec']
for key, value in spec.env.items():
args.extend(['-e', f'{key}={value}'])
args.extend(
[
session.backend_session_id,
'sh',
'-lc',
self._build_exec_command(spec.workdir, spec.cmd),
]
)
result = await self._run_command(args, timeout_sec=spec.timeout_sec, check=False)
duration_ms = int((dt.datetime.now(dt.UTC) - start).total_seconds() * 1000)
if result.timed_out:
return BoxExecutionResult(
session_id=session.session_id,
backend_name=self.name,
status=BoxExecutionStatus.TIMED_OUT,
exit_code=None,
stdout=result.stdout,
stderr=result.stderr or f'Command timed out after {spec.timeout_sec} seconds.',
duration_ms=duration_ms,
)
return BoxExecutionResult(
session_id=session.session_id,
backend_name=self.name,
status=BoxExecutionStatus.COMPLETED,
exit_code=result.return_code,
stdout=result.stdout,
stderr=result.stderr,
duration_ms=duration_ms,
)
async def stop_session(self, session: BoxSessionInfo):
await self._run_command(
[self.command, 'rm', '-f', session.backend_session_id],
timeout_sec=20,
check=False,
)
def _build_container_name(self, session_id: str) -> str:
normalized = re.sub(r'[^a-zA-Z0-9_.-]+', '-', session_id).strip('-').lower() or 'session'
suffix = uuid.uuid4().hex[:8]
return f'langbot-box-{normalized[:32]}-{suffix}'
def _build_exec_command(self, workdir: str, cmd: str) -> str:
quoted_workdir = shlex.quote(workdir)
return f'mkdir -p {quoted_workdir} && cd {quoted_workdir} && {cmd}'
async def _run_command(
self,
args: list[str],
timeout_sec: int,
check: bool,
) -> _CommandResult:
process = await asyncio.create_subprocess_exec(
*args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
try:
stdout_bytes, stderr_bytes = await asyncio.wait_for(process.communicate(), timeout=timeout_sec)
except asyncio.TimeoutError:
process.kill()
stdout_bytes, stderr_bytes = await process.communicate()
return _CommandResult(
return_code=-1,
stdout=stdout_bytes.decode('utf-8', errors='replace').strip(),
stderr=stderr_bytes.decode('utf-8', errors='replace').strip(),
timed_out=True,
)
stdout = stdout_bytes.decode('utf-8', errors='replace').strip()
stderr = stderr_bytes.decode('utf-8', errors='replace').strip()
if check and process.returncode != 0:
raise BoxError(self._format_cli_error(stderr or stdout or 'unknown backend error'))
return _CommandResult(
return_code=process.returncode,
stdout=stdout,
stderr=stderr,
timed_out=False,
)
def _format_cli_error(self, message: str) -> str:
message = ' '.join(message.split())
if len(message) > 300:
message = f'{message[:297]}...'
return f'{self.name} backend error: {message}'
class PodmanBackend(CLISandboxBackend):
def __init__(self, logger: logging.Logger):
super().__init__(logger=logger, command='podman', backend_name='podman')
class DockerBackend(CLISandboxBackend):
def __init__(self, logger: logging.Logger):
super().__init__(logger=logger, command='docker', backend_name='docker')

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
class BoxError(RuntimeError):
"""Base error for LangBot Box failures."""
class BoxValidationError(BoxError):
"""Raised when sandbox_exec arguments are invalid."""
class BoxBackendUnavailableError(BoxError):
"""Raised when no supported container backend is available."""
class BoxSessionConflictError(BoxError):
"""Raised when an existing session cannot satisfy a new request."""

View File

@@ -0,0 +1,89 @@
from __future__ import annotations
import datetime as dt
import enum
import pydantic
DEFAULT_BOX_IMAGE = 'python:3.11-slim'
class BoxNetworkMode(str, enum.Enum):
OFF = 'off'
ON = 'on'
class BoxExecutionStatus(str, enum.Enum):
COMPLETED = 'completed'
TIMED_OUT = 'timed_out'
class BoxSpec(pydantic.BaseModel):
cmd: str
workdir: str = '/workspace'
timeout_sec: int = 30
network: BoxNetworkMode = BoxNetworkMode.OFF
session_id: str
env: dict[str, str] = pydantic.Field(default_factory=dict)
image: str = DEFAULT_BOX_IMAGE
@pydantic.field_validator('cmd')
@classmethod
def validate_cmd(cls, value: str) -> str:
value = value.strip()
if not value:
raise ValueError('cmd must not be empty')
return value
@pydantic.field_validator('workdir')
@classmethod
def validate_workdir(cls, value: str) -> str:
value = value.strip()
if not value.startswith('/'):
raise ValueError('workdir must be an absolute path inside the sandbox')
return value
@pydantic.field_validator('timeout_sec')
@classmethod
def validate_timeout_sec(cls, value: int) -> int:
if value <= 0:
raise ValueError('timeout_sec must be greater than 0')
return value
@pydantic.field_validator('session_id')
@classmethod
def validate_session_id(cls, value: str) -> str:
value = value.strip()
if not value:
raise ValueError('session_id must not be empty')
return value
@pydantic.field_validator('env')
@classmethod
def validate_env(cls, value: dict[str, str]) -> dict[str, str]:
return {str(k): str(v) for k, v in value.items()}
class BoxSessionInfo(pydantic.BaseModel):
session_id: str
backend_name: str
backend_session_id: str
image: str
network: BoxNetworkMode
created_at: dt.datetime
last_used_at: dt.datetime
class BoxExecutionResult(pydantic.BaseModel):
session_id: str
backend_name: str
status: BoxExecutionStatus
exit_code: int | None
stdout: str = ''
stderr: str = ''
duration_ms: int
@property
def ok(self) -> bool:
return self.status == BoxExecutionStatus.COMPLETED and self.exit_code == 0

View File

@@ -0,0 +1,128 @@
from __future__ import annotations
import asyncio
import dataclasses
import datetime as dt
import logging
from .backend import BaseSandboxBackend, DockerBackend, PodmanBackend
from .errors import BoxBackendUnavailableError, BoxSessionConflictError
from .models import BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
@dataclasses.dataclass(slots=True)
class _RuntimeSession:
info: BoxSessionInfo
lock: asyncio.Lock
class BoxRuntime:
def __init__(
self,
logger: logging.Logger,
backends: list[BaseSandboxBackend] | None = None,
session_ttl_sec: int = 300,
):
self.logger = logger
self.backends = backends or [PodmanBackend(logger), DockerBackend(logger)]
self.session_ttl_sec = session_ttl_sec
self._backend: BaseSandboxBackend | None = None
self._sessions: dict[str, _RuntimeSession] = {}
self._lock = asyncio.Lock()
async def initialize(self):
self._backend = await self._select_backend()
async def execute(self, spec: BoxSpec) -> BoxExecutionResult:
session = await self._get_or_create_session(spec)
async with session.lock:
result = await (await self._get_backend()).exec(session.info, spec)
async with self._lock:
now = dt.datetime.now(dt.UTC)
if spec.session_id in self._sessions:
self._sessions[spec.session_id].info.last_used_at = now
if result.status == BoxExecutionStatus.TIMED_OUT:
await self._drop_session_locked(spec.session_id)
return result
async def shutdown(self):
async with self._lock:
session_ids = list(self._sessions.keys())
for session_id in session_ids:
await self._drop_session_locked(session_id)
async def _get_or_create_session(self, spec: BoxSpec) -> _RuntimeSession:
async with self._lock:
await self._reap_expired_sessions_locked()
existing = self._sessions.get(spec.session_id)
if existing is not None:
self._assert_session_compatible(existing.info, spec)
existing.info.last_used_at = dt.datetime.now(dt.UTC)
return existing
backend = await self._get_backend()
info = await backend.start_session(spec)
runtime_session = _RuntimeSession(info=info, lock=asyncio.Lock())
self._sessions[spec.session_id] = runtime_session
return runtime_session
async def _get_backend(self) -> BaseSandboxBackend:
if self._backend is None:
self._backend = await self._select_backend()
if self._backend is None:
raise BoxBackendUnavailableError(
'LangBot Box backend unavailable. Install and start Podman or Docker before using sandbox_exec.'
)
return self._backend
async def _select_backend(self) -> BaseSandboxBackend | None:
for backend in self.backends:
try:
await backend.initialize()
if await backend.is_available():
self.logger.info(f'LangBot Box using backend: {backend.name}')
return backend
except Exception as exc:
self.logger.warning(f'LangBot Box backend {backend.name} probe failed: {exc}')
self.logger.warning('LangBot Box backend unavailable: neither Podman nor Docker is ready')
return None
async def _reap_expired_sessions_locked(self):
if self.session_ttl_sec <= 0:
return
deadline = dt.datetime.now(dt.UTC) - dt.timedelta(seconds=self.session_ttl_sec)
expired_session_ids = [
session_id
for session_id, session in self._sessions.items()
if session.info.last_used_at < deadline
]
for session_id in expired_session_ids:
await self._drop_session_locked(session_id)
async def _drop_session_locked(self, session_id: str):
runtime_session = self._sessions.pop(session_id, None)
if runtime_session is None or self._backend is None:
return
try:
await self._backend.stop_session(runtime_session.info)
except Exception as exc:
self.logger.warning(f'Failed to clean up box session {session_id}: {exc}')
def _assert_session_compatible(self, session: BoxSessionInfo, spec: BoxSpec):
if session.network != spec.network:
raise BoxSessionConflictError(
f'sandbox_exec session {spec.session_id} already exists with network={session.network.value}'
)
if session.image != spec.image:
raise BoxSessionConflictError(
f'sandbox_exec session {spec.session_id} already exists with image={session.image}'
)

View File

@@ -0,0 +1,67 @@
from __future__ import annotations
from typing import TYPE_CHECKING
import pydantic
from .errors import BoxValidationError
from .models import BoxExecutionResult, BoxSpec
from .runtime import BoxRuntime
if TYPE_CHECKING:
from ..core import app as core_app
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
class BoxService:
def __init__(
self,
ap: 'core_app.Application',
runtime: BoxRuntime | None = None,
output_limit_chars: int = 4000,
):
self.ap = ap
self.runtime = runtime or BoxRuntime(logger=ap.logger)
self.output_limit_chars = output_limit_chars
async def initialize(self):
await self.runtime.initialize()
async def execute_sandbox_tool(self, parameters: dict, query: 'pipeline_query.Query') -> dict:
spec_payload = dict(parameters)
spec_payload.setdefault('session_id', str(query.query_id))
spec_payload.setdefault('env', {})
try:
spec = BoxSpec.model_validate(spec_payload)
except pydantic.ValidationError as exc:
first_error = exc.errors()[0]
raise BoxValidationError(first_error.get('msg', 'invalid sandbox_exec arguments')) from exc
result = await self.runtime.execute(spec)
return self._serialize_result(result)
async def shutdown(self):
await self.runtime.shutdown()
def _serialize_result(self, result: BoxExecutionResult) -> dict:
stdout, stdout_truncated = self._truncate(result.stdout)
stderr, stderr_truncated = self._truncate(result.stderr)
return {
'session_id': result.session_id,
'backend': result.backend_name,
'status': result.status.value,
'ok': result.ok,
'exit_code': result.exit_code,
'stdout': stdout,
'stderr': stderr,
'stdout_truncated': stdout_truncated,
'stderr_truncated': stderr_truncated,
'duration_ms': result.duration_ms,
}
def _truncate(self, text: str) -> tuple[str, bool]:
if len(text) <= self.output_limit_chars:
return text, False
return f'{text[: self.output_limit_chars]}...', True