mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-02 03:55:55 +00:00
feat(box): add BoxProfile with resource limits and improved output truncation
- Implement head+tail output truncation (60/40 split) so LLM sees both
beginning and final results; add streaming byte-limited reads in backend
to prevent unbounded memory usage (_MAX_RAW_OUTPUT_BYTES = 1MB)
- Define BoxProfile model with locked fields and max_timeout_sec clamping
- Add four built-in profiles: default, offline_readonly, network_basic,
network_extended with differentiated resource and security constraints
- Add resource limit fields to BoxSpec (cpus, memory_mb, pids_limit,
read_only_rootfs) and pass corresponding container CLI flags
(--cpus, --memory, --pids-limit, --read-only, --tmpfs)
- Profile loaded from config (box.profile), applied in service layer
before BoxSpec validation; locked fields cannot be overridden by
tool-call parameters
This commit is contained in:
@@ -8,11 +8,18 @@ import logging
|
||||
import re
|
||||
import shlex
|
||||
import shutil
|
||||
import typing
|
||||
import uuid
|
||||
|
||||
from .errors import BoxError
|
||||
from .models import DEFAULT_BOX_MOUNT_PATH, BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
|
||||
|
||||
# Hard cap on raw subprocess output to prevent unbounded memory usage.
|
||||
# Container timeout already bounds duration, but fast commands can still
|
||||
# produce large output within the time limit. After this many bytes the
|
||||
# remaining output is discarded before decoding.
|
||||
_MAX_RAW_OUTPUT_BYTES = 1_048_576 # 1 MB per stream
|
||||
|
||||
|
||||
@dataclasses.dataclass(slots=True)
|
||||
class _CommandResult:
|
||||
@@ -83,6 +90,15 @@ class CLISandboxBackend(BaseSandboxBackend):
|
||||
if spec.network.value == 'off':
|
||||
args.extend(['--network', 'none'])
|
||||
|
||||
# Resource limits
|
||||
args.extend(['--cpus', str(spec.cpus)])
|
||||
args.extend(['--memory', f'{spec.memory_mb}m'])
|
||||
args.extend(['--pids-limit', str(spec.pids_limit)])
|
||||
|
||||
if spec.read_only_rootfs:
|
||||
args.append('--read-only')
|
||||
args.extend(['--tmpfs', '/tmp:size=64m'])
|
||||
|
||||
if spec.host_path is not None:
|
||||
mount_spec = f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}:{spec.host_path_mode.value}'
|
||||
args.extend(['-v', mount_spec])
|
||||
@@ -93,7 +109,9 @@ class CLISandboxBackend(BaseSandboxBackend):
|
||||
f'LangBot Box backend start_session: backend={self.name} '
|
||||
f'session_id={spec.session_id} container_name={container_name} '
|
||||
f'image={spec.image} network={spec.network.value} '
|
||||
f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value}'
|
||||
f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} '
|
||||
f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} '
|
||||
f'read_only_rootfs={spec.read_only_rootfs}'
|
||||
)
|
||||
|
||||
await self._run_command(args, timeout_sec=30, check=True)
|
||||
@@ -106,6 +124,10 @@ class CLISandboxBackend(BaseSandboxBackend):
|
||||
network=spec.network,
|
||||
host_path=spec.host_path,
|
||||
host_path_mode=spec.host_path_mode,
|
||||
cpus=spec.cpus,
|
||||
memory_mb=spec.memory_mb,
|
||||
pids_limit=spec.pids_limit,
|
||||
read_only_rootfs=spec.read_only_rootfs,
|
||||
created_at=now,
|
||||
last_used_at=now,
|
||||
)
|
||||
@@ -191,21 +213,30 @@ class CLISandboxBackend(BaseSandboxBackend):
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout_task = asyncio.create_task(self._read_stream(process.stdout))
|
||||
stderr_task = asyncio.create_task(self._read_stream(process.stderr))
|
||||
|
||||
timed_out = False
|
||||
try:
|
||||
stdout_bytes, stderr_bytes = await asyncio.wait_for(process.communicate(), timeout=timeout_sec)
|
||||
await asyncio.wait_for(process.wait(), timeout=timeout_sec)
|
||||
except asyncio.TimeoutError:
|
||||
process.kill()
|
||||
stdout_bytes, stderr_bytes = await process.communicate()
|
||||
timed_out = True
|
||||
await process.wait()
|
||||
|
||||
stdout_bytes, stdout_total = await stdout_task
|
||||
stderr_bytes, stderr_total = await stderr_task
|
||||
|
||||
if timed_out:
|
||||
return _CommandResult(
|
||||
return_code=-1,
|
||||
stdout=stdout_bytes.decode('utf-8', errors='replace').strip(),
|
||||
stderr=stderr_bytes.decode('utf-8', errors='replace').strip(),
|
||||
stdout=self._clip_captured_bytes(stdout_bytes, stdout_total),
|
||||
stderr=self._clip_captured_bytes(stderr_bytes, stderr_total),
|
||||
timed_out=True,
|
||||
)
|
||||
|
||||
stdout = stdout_bytes.decode('utf-8', errors='replace').strip()
|
||||
stderr = stderr_bytes.decode('utf-8', errors='replace').strip()
|
||||
stdout = self._clip_captured_bytes(stdout_bytes, stdout_total)
|
||||
stderr = self._clip_captured_bytes(stderr_bytes, stderr_total)
|
||||
|
||||
if check and process.returncode != 0:
|
||||
raise BoxError(self._format_cli_error(stderr or stdout or 'unknown backend error'))
|
||||
@@ -217,6 +248,40 @@ class CLISandboxBackend(BaseSandboxBackend):
|
||||
timed_out=False,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _clip_bytes(data: bytes, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str:
|
||||
"""Decode bytes to str, discarding bytes beyond *limit*."""
|
||||
clipped = data[:limit]
|
||||
return CLISandboxBackend._clip_captured_bytes(clipped, len(data), limit=limit)
|
||||
|
||||
@staticmethod
|
||||
def _clip_captured_bytes(data: bytes, total_size: int, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str:
|
||||
text = data.decode('utf-8', errors='replace').strip()
|
||||
if total_size > limit:
|
||||
text += f'\n... [raw output clipped at {limit} bytes, {total_size - limit} bytes discarded]'
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
async def _read_stream(
|
||||
stream: typing.Optional[asyncio.StreamReader],
|
||||
limit: int = _MAX_RAW_OUTPUT_BYTES,
|
||||
) -> tuple[bytes, int]:
|
||||
if stream is None:
|
||||
return b'', 0
|
||||
|
||||
chunks = bytearray()
|
||||
total_size = 0
|
||||
while True:
|
||||
chunk = await stream.read(65536)
|
||||
if not chunk:
|
||||
break
|
||||
total_size += len(chunk)
|
||||
remaining = limit - len(chunks)
|
||||
if remaining > 0:
|
||||
chunks.extend(chunk[:remaining])
|
||||
|
||||
return bytes(chunks), total_size
|
||||
|
||||
def _format_cli_error(self, message: str) -> str:
|
||||
message = ' '.join(message.split())
|
||||
if len(message) > 300:
|
||||
|
||||
@@ -35,6 +35,11 @@ class BoxSpec(pydantic.BaseModel):
|
||||
image: str = DEFAULT_BOX_IMAGE
|
||||
host_path: str | None = None
|
||||
host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
|
||||
# Resource limits
|
||||
cpus: float = 1.0
|
||||
memory_mb: int = 512
|
||||
pids_limit: int = 128
|
||||
read_only_rootfs: bool = True
|
||||
|
||||
@pydantic.field_validator('cmd')
|
||||
@classmethod
|
||||
@@ -59,6 +64,27 @@ class BoxSpec(pydantic.BaseModel):
|
||||
raise ValueError('timeout_sec must be greater than 0')
|
||||
return value
|
||||
|
||||
@pydantic.field_validator('cpus')
|
||||
@classmethod
|
||||
def validate_cpus(cls, value: float) -> float:
|
||||
if value <= 0:
|
||||
raise ValueError('cpus must be greater than 0')
|
||||
return value
|
||||
|
||||
@pydantic.field_validator('memory_mb')
|
||||
@classmethod
|
||||
def validate_memory_mb(cls, value: int) -> int:
|
||||
if value < 32:
|
||||
raise ValueError('memory_mb must be at least 32')
|
||||
return value
|
||||
|
||||
@pydantic.field_validator('pids_limit')
|
||||
@classmethod
|
||||
def validate_pids_limit(cls, value: int) -> int:
|
||||
if value < 1:
|
||||
raise ValueError('pids_limit must be at least 1')
|
||||
return value
|
||||
|
||||
@pydantic.field_validator('session_id')
|
||||
@classmethod
|
||||
def validate_session_id(cls, value: str) -> str:
|
||||
@@ -91,6 +117,74 @@ class BoxSpec(pydantic.BaseModel):
|
||||
return self
|
||||
|
||||
|
||||
class BoxProfile(pydantic.BaseModel):
|
||||
"""Preset sandbox configuration.
|
||||
|
||||
Provides default values for BoxSpec fields and optionally locks fields
|
||||
so that tool-call parameters cannot override them.
|
||||
"""
|
||||
|
||||
name: str
|
||||
image: str = DEFAULT_BOX_IMAGE
|
||||
network: BoxNetworkMode = BoxNetworkMode.OFF
|
||||
timeout_sec: int = 30
|
||||
host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
|
||||
max_timeout_sec: int = 120
|
||||
# Resource limits
|
||||
cpus: float = 1.0
|
||||
memory_mb: int = 512
|
||||
pids_limit: int = 128
|
||||
read_only_rootfs: bool = True
|
||||
locked: frozenset[str] = frozenset()
|
||||
|
||||
model_config = pydantic.ConfigDict(frozen=True)
|
||||
|
||||
|
||||
BUILTIN_PROFILES: dict[str, BoxProfile] = {
|
||||
'default': BoxProfile(
|
||||
name='default',
|
||||
network=BoxNetworkMode.OFF,
|
||||
host_path_mode=BoxHostMountMode.READ_WRITE,
|
||||
cpus=1.0,
|
||||
memory_mb=512,
|
||||
pids_limit=128,
|
||||
read_only_rootfs=True,
|
||||
max_timeout_sec=120,
|
||||
),
|
||||
'offline_readonly': BoxProfile(
|
||||
name='offline_readonly',
|
||||
network=BoxNetworkMode.OFF,
|
||||
host_path_mode=BoxHostMountMode.READ_ONLY,
|
||||
cpus=0.5,
|
||||
memory_mb=256,
|
||||
pids_limit=64,
|
||||
read_only_rootfs=True,
|
||||
max_timeout_sec=60,
|
||||
locked=frozenset({'network', 'host_path_mode', 'read_only_rootfs'}),
|
||||
),
|
||||
'network_basic': BoxProfile(
|
||||
name='network_basic',
|
||||
network=BoxNetworkMode.ON,
|
||||
host_path_mode=BoxHostMountMode.READ_WRITE,
|
||||
cpus=1.0,
|
||||
memory_mb=512,
|
||||
pids_limit=128,
|
||||
read_only_rootfs=True,
|
||||
max_timeout_sec=120,
|
||||
),
|
||||
'network_extended': BoxProfile(
|
||||
name='network_extended',
|
||||
network=BoxNetworkMode.ON,
|
||||
host_path_mode=BoxHostMountMode.READ_WRITE,
|
||||
cpus=2.0,
|
||||
memory_mb=1024,
|
||||
pids_limit=256,
|
||||
read_only_rootfs=False,
|
||||
max_timeout_sec=300,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class BoxSessionInfo(pydantic.BaseModel):
|
||||
session_id: str
|
||||
backend_name: str
|
||||
@@ -99,6 +193,10 @@ class BoxSessionInfo(pydantic.BaseModel):
|
||||
network: BoxNetworkMode
|
||||
host_path: str | None = None
|
||||
host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
|
||||
cpus: float = 1.0
|
||||
memory_mb: int = 512
|
||||
pids_limit: int = 128
|
||||
read_only_rootfs: bool = True
|
||||
created_at: dt.datetime
|
||||
last_used_at: dt.datetime
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@ from .backend import BaseSandboxBackend, DockerBackend, PodmanBackend
|
||||
from .errors import BoxBackendUnavailableError, BoxSessionConflictError
|
||||
from .models import BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
|
||||
|
||||
_UTC = dt.timezone.utc
|
||||
|
||||
|
||||
@dataclasses.dataclass(slots=True)
|
||||
class _RuntimeSession:
|
||||
@@ -48,7 +50,7 @@ class BoxRuntime:
|
||||
result = await (await self._get_backend()).exec(session.info, spec)
|
||||
|
||||
async with self._lock:
|
||||
now = dt.datetime.now(dt.UTC)
|
||||
now = dt.datetime.now(_UTC)
|
||||
if spec.session_id in self._sessions:
|
||||
self._sessions[spec.session_id].info.last_used_at = now
|
||||
|
||||
@@ -70,7 +72,7 @@ class BoxRuntime:
|
||||
existing = self._sessions.get(spec.session_id)
|
||||
if existing is not None:
|
||||
self._assert_session_compatible(existing.info, spec)
|
||||
existing.info.last_used_at = dt.datetime.now(dt.UTC)
|
||||
existing.info.last_used_at = dt.datetime.now(_UTC)
|
||||
self.logger.info(
|
||||
'LangBot Box session reused: '
|
||||
f'session_id={spec.session_id} '
|
||||
@@ -121,7 +123,7 @@ class BoxRuntime:
|
||||
if self.session_ttl_sec <= 0:
|
||||
return
|
||||
|
||||
deadline = dt.datetime.now(dt.UTC) - dt.timedelta(seconds=self.session_ttl_sec)
|
||||
deadline = dt.datetime.now(_UTC) - dt.timedelta(seconds=self.session_ttl_sec)
|
||||
expired_session_ids = [
|
||||
session_id
|
||||
for session_id, session in self._sessions.items()
|
||||
@@ -164,3 +166,19 @@ class BoxRuntime:
|
||||
raise BoxSessionConflictError(
|
||||
f'sandbox_exec session {spec.session_id} already exists with host_path_mode={session.host_path_mode.value}'
|
||||
)
|
||||
if session.cpus != spec.cpus:
|
||||
raise BoxSessionConflictError(
|
||||
f'sandbox_exec session {spec.session_id} already exists with cpus={session.cpus}'
|
||||
)
|
||||
if session.memory_mb != spec.memory_mb:
|
||||
raise BoxSessionConflictError(
|
||||
f'sandbox_exec session {spec.session_id} already exists with memory_mb={session.memory_mb}'
|
||||
)
|
||||
if session.pids_limit != spec.pids_limit:
|
||||
raise BoxSessionConflictError(
|
||||
f'sandbox_exec session {spec.session_id} already exists with pids_limit={session.pids_limit}'
|
||||
)
|
||||
if session.read_only_rootfs != spec.read_only_rootfs:
|
||||
raise BoxSessionConflictError(
|
||||
f'sandbox_exec session {spec.session_id} already exists with read_only_rootfs={session.read_only_rootfs}'
|
||||
)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import json
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -7,9 +8,11 @@ from typing import TYPE_CHECKING
|
||||
import pydantic
|
||||
|
||||
from .errors import BoxValidationError
|
||||
from .models import BoxExecutionResult, BoxSpec
|
||||
from .models import BUILTIN_PROFILES, BoxExecutionResult, BoxProfile, BoxSpec
|
||||
from .runtime import BoxRuntime
|
||||
|
||||
_INT_ADAPTER = pydantic.TypeAdapter(int)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..core import app as core_app
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
@@ -27,6 +30,7 @@ class BoxService:
|
||||
self.output_limit_chars = output_limit_chars
|
||||
self.allowed_host_mount_roots = self._load_allowed_host_mount_roots()
|
||||
self.default_host_workspace = self._load_default_host_workspace()
|
||||
self.profile = self._load_profile()
|
||||
|
||||
async def initialize(self):
|
||||
await self.runtime.initialize()
|
||||
@@ -38,6 +42,8 @@ class BoxService:
|
||||
if spec_payload.get('host_path') in (None, '') and self.default_host_workspace is not None:
|
||||
spec_payload['host_path'] = self.default_host_workspace
|
||||
|
||||
self._apply_profile(spec_payload)
|
||||
|
||||
try:
|
||||
spec = BoxSpec.model_validate(spec_payload)
|
||||
except pydantic.ValidationError as exc:
|
||||
@@ -81,7 +87,32 @@ class BoxService:
|
||||
def _truncate(self, text: str) -> tuple[str, bool]:
|
||||
if len(text) <= self.output_limit_chars:
|
||||
return text, False
|
||||
return f'{text[: self.output_limit_chars]}...', True
|
||||
if self.output_limit_chars <= 0:
|
||||
return '', True
|
||||
|
||||
head_size = 0
|
||||
tail_size = 0
|
||||
notice = ''
|
||||
# Recompute once the omitted count is known so the final payload
|
||||
# stays within output_limit_chars even after adding the notice.
|
||||
for _ in range(4):
|
||||
omitted = max(len(text) - head_size - tail_size, 0)
|
||||
notice = f'\n\n... [{omitted} characters truncated] ...\n\n'
|
||||
available = self.output_limit_chars - len(notice)
|
||||
if available <= 0:
|
||||
return notice[: self.output_limit_chars], True
|
||||
|
||||
new_head_size = int(available * 0.6)
|
||||
new_tail_size = available - new_head_size
|
||||
if new_head_size == head_size and new_tail_size == tail_size:
|
||||
break
|
||||
head_size = new_head_size
|
||||
tail_size = new_tail_size
|
||||
|
||||
head = text[:head_size]
|
||||
tail = text[-tail_size:] if tail_size else ''
|
||||
truncated = f'{head}{notice}{tail}'
|
||||
return truncated[: self.output_limit_chars], True
|
||||
|
||||
def _summarize_spec(self, spec: BoxSpec) -> dict:
|
||||
cmd = spec.cmd.strip()
|
||||
@@ -96,6 +127,10 @@ class BoxService:
|
||||
'image': spec.image,
|
||||
'host_path': spec.host_path,
|
||||
'host_path_mode': spec.host_path_mode.value,
|
||||
'cpus': spec.cpus,
|
||||
'memory_mb': spec.memory_mb,
|
||||
'pids_limit': spec.pids_limit,
|
||||
'read_only_rootfs': spec.read_only_rootfs,
|
||||
'env_keys': sorted(spec.env.keys()),
|
||||
'cmd': cmd,
|
||||
}
|
||||
@@ -157,3 +192,40 @@ class BoxService:
|
||||
|
||||
allowed_roots = ', '.join(self.allowed_host_mount_roots)
|
||||
raise BoxValidationError(f'host_path is outside allowed_host_mount_roots: {allowed_roots}')
|
||||
|
||||
def _load_profile(self) -> BoxProfile:
|
||||
box_config = getattr(self.ap, 'instance_config', None)
|
||||
box_config_data = getattr(box_config, 'data', {}) if box_config is not None else {}
|
||||
profile_name = str(box_config_data.get('box', {}).get('profile', 'default')).strip() or 'default'
|
||||
|
||||
profile = BUILTIN_PROFILES.get(profile_name)
|
||||
if profile is None:
|
||||
available = ', '.join(sorted(BUILTIN_PROFILES))
|
||||
raise BoxValidationError(f"unknown box profile '{profile_name}', available profiles: {available}")
|
||||
return profile
|
||||
|
||||
def _apply_profile(self, params: dict):
|
||||
"""Merge profile defaults into *params* in-place, enforce locked fields and clamp timeout."""
|
||||
profile = self.profile
|
||||
_PROFILE_FIELDS = (
|
||||
'image', 'network', 'timeout_sec', 'host_path_mode',
|
||||
'cpus', 'memory_mb', 'pids_limit', 'read_only_rootfs',
|
||||
)
|
||||
|
||||
for field in _PROFILE_FIELDS:
|
||||
profile_value = getattr(profile, field)
|
||||
raw_value = profile_value.value if isinstance(profile_value, enum.Enum) else profile_value
|
||||
|
||||
if field in profile.locked:
|
||||
params[field] = raw_value
|
||||
elif field not in params:
|
||||
params[field] = raw_value
|
||||
|
||||
timeout = params.get('timeout_sec')
|
||||
try:
|
||||
normalized_timeout = _INT_ADAPTER.validate_python(timeout)
|
||||
except pydantic.ValidationError:
|
||||
return
|
||||
|
||||
if normalized_timeout > profile.max_timeout_sec:
|
||||
params['timeout_sec'] = profile.max_timeout_sec
|
||||
|
||||
@@ -88,6 +88,7 @@ monitoring:
|
||||
# Cleanup check interval in hours
|
||||
check_interval_hours: 1
|
||||
box:
|
||||
profile: 'default'
|
||||
default_host_workspace: './data/box-workspaces/default'
|
||||
allowed_host_mount_roots:
|
||||
- './data/box-workspaces'
|
||||
|
||||
37
tests/unit_tests/box/test_backend_clip.py
Normal file
37
tests/unit_tests/box/test_backend_clip.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from langbot.pkg.box.backend import CLISandboxBackend, _MAX_RAW_OUTPUT_BYTES
|
||||
|
||||
|
||||
class TestClipBytes:
|
||||
def test_within_limit_unchanged(self):
|
||||
data = b'hello world'
|
||||
result = CLISandboxBackend._clip_bytes(data, limit=1024)
|
||||
assert result == 'hello world'
|
||||
|
||||
def test_exceeding_limit_clips_and_appends_notice(self):
|
||||
data = b'A' * 200
|
||||
result = CLISandboxBackend._clip_bytes(data, limit=100)
|
||||
assert result.startswith('A' * 100)
|
||||
assert 'raw output clipped at 100 bytes' in result
|
||||
assert '100 bytes discarded' in result
|
||||
|
||||
def test_exact_limit_not_clipped(self):
|
||||
data = b'B' * 100
|
||||
result = CLISandboxBackend._clip_bytes(data, limit=100)
|
||||
assert result == 'B' * 100
|
||||
assert 'clipped' not in result
|
||||
|
||||
def test_default_limit_is_module_constant(self):
|
||||
data = b'x' * 10
|
||||
result = CLISandboxBackend._clip_bytes(data)
|
||||
assert result == 'x' * 10
|
||||
assert _MAX_RAW_OUTPUT_BYTES == 1_048_576
|
||||
|
||||
def test_invalid_utf8_replaced(self):
|
||||
data = b'ok\xff\xfetail'
|
||||
result = CLISandboxBackend._clip_bytes(data, limit=1024)
|
||||
assert 'ok' in result
|
||||
assert 'tail' in result
|
||||
@@ -12,16 +12,20 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
from langbot.pkg.box.backend import BaseSandboxBackend
|
||||
from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxValidationError
|
||||
from langbot.pkg.box.models import (
|
||||
BUILTIN_PROFILES,
|
||||
BoxExecutionResult,
|
||||
BoxExecutionStatus,
|
||||
BoxHostMountMode,
|
||||
BoxNetworkMode,
|
||||
BoxProfile,
|
||||
BoxSessionInfo,
|
||||
BoxSpec,
|
||||
)
|
||||
from langbot.pkg.box.runtime import BoxRuntime
|
||||
from langbot.pkg.box.service import BoxService
|
||||
|
||||
_UTC = dt.timezone.utc
|
||||
|
||||
|
||||
class FakeBackend(BaseSandboxBackend):
|
||||
def __init__(self, logger: Mock, available: bool = True):
|
||||
@@ -39,7 +43,7 @@ class FakeBackend(BaseSandboxBackend):
|
||||
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
|
||||
self.start_calls.append(spec.session_id)
|
||||
self.start_specs.append(spec)
|
||||
now = dt.datetime.now(dt.UTC)
|
||||
now = dt.datetime.now(_UTC)
|
||||
return BoxSessionInfo(
|
||||
session_id=spec.session_id,
|
||||
backend_name=self.name,
|
||||
@@ -48,6 +52,10 @@ class FakeBackend(BaseSandboxBackend):
|
||||
network=spec.network,
|
||||
host_path=spec.host_path,
|
||||
host_path_mode=spec.host_path_mode,
|
||||
cpus=spec.cpus,
|
||||
memory_mb=spec.memory_mb,
|
||||
pids_limit=spec.pids_limit,
|
||||
read_only_rootfs=spec.read_only_rootfs,
|
||||
created_at=now,
|
||||
last_used_at=now,
|
||||
)
|
||||
@@ -72,12 +80,13 @@ def make_query(query_id: int = 42) -> pipeline_query.Query:
|
||||
return pipeline_query.Query.model_construct(query_id=query_id)
|
||||
|
||||
|
||||
def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None):
|
||||
def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None, profile: str = 'default'):
|
||||
return SimpleNamespace(
|
||||
logger=logger,
|
||||
instance_config=SimpleNamespace(
|
||||
data={
|
||||
'box': {
|
||||
'profile': profile,
|
||||
'allowed_host_mount_roots': allowed_host_mount_roots or [],
|
||||
'default_host_workspace': '',
|
||||
}
|
||||
@@ -226,3 +235,313 @@ async def test_box_runtime_rejects_host_mount_conflict_in_same_session(tmp_path)
|
||||
|
||||
with pytest.raises(BoxSessionConflictError):
|
||||
await runtime.execute(second)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_runtime_rejects_resource_limit_conflict_in_same_session():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
await runtime.initialize()
|
||||
|
||||
first = BoxSpec.model_validate({'cmd': 'echo first', 'session_id': 'req-resource', 'cpus': 1.0})
|
||||
second = BoxSpec.model_validate({'cmd': 'echo second', 'session_id': 'req-resource', 'cpus': 2.0})
|
||||
|
||||
await runtime.execute(first)
|
||||
|
||||
with pytest.raises(BoxSessionConflictError):
|
||||
await runtime.execute(second)
|
||||
|
||||
|
||||
# ── Truncation tests ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
class FakeBackendWithOutput(FakeBackend):
|
||||
"""FakeBackend that returns configurable stdout/stderr."""
|
||||
|
||||
def __init__(self, logger: Mock, stdout: str = '', stderr: str = ''):
|
||||
super().__init__(logger)
|
||||
self._stdout = stdout
|
||||
self._stderr = stderr
|
||||
|
||||
async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
|
||||
self.exec_calls.append((session.session_id, spec.cmd))
|
||||
return BoxExecutionResult(
|
||||
session_id=session.session_id,
|
||||
backend_name=self.name,
|
||||
status=BoxExecutionStatus.COMPLETED,
|
||||
exit_code=0,
|
||||
stdout=self._stdout,
|
||||
stderr=self._stderr,
|
||||
duration_ms=5,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncate_short_output_unchanged():
|
||||
logger = Mock()
|
||||
backend = FakeBackendWithOutput(logger, stdout='hello world')
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(20))
|
||||
|
||||
assert result['stdout'] == 'hello world'
|
||||
assert result['stdout_truncated'] is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncate_preserves_head_and_tail():
|
||||
logger = Mock()
|
||||
# Build output: "AAAA...BBB..." where each section is identifiable
|
||||
head_marker = 'HEAD_START|'
|
||||
tail_marker = '|TAIL_END'
|
||||
filler = 'x' * 500
|
||||
big_output = f'{head_marker}{filler}{tail_marker}'
|
||||
|
||||
backend = FakeBackendWithOutput(logger, stdout=big_output)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
limit = 100
|
||||
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=limit)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'cat big'}, make_query(21))
|
||||
|
||||
assert result['stdout_truncated'] is True
|
||||
stdout = result['stdout']
|
||||
# Head part should contain the head marker
|
||||
assert stdout.startswith(head_marker)
|
||||
# Tail part should contain the tail marker
|
||||
assert stdout.endswith(tail_marker)
|
||||
# Should contain the truncation notice
|
||||
assert 'characters truncated' in stdout
|
||||
assert len(stdout) <= limit
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncate_at_exact_limit_not_truncated():
|
||||
logger = Mock()
|
||||
exact_output = 'a' * 200
|
||||
backend = FakeBackendWithOutput(logger, stdout=exact_output)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=200)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'echo a'}, make_query(22))
|
||||
|
||||
assert result['stdout'] == exact_output
|
||||
assert result['stdout_truncated'] is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncate_stderr_independently():
|
||||
logger = Mock()
|
||||
backend = FakeBackendWithOutput(logger, stdout='short', stderr='E' * 300)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'fail'}, make_query(23))
|
||||
|
||||
assert result['stdout_truncated'] is False
|
||||
assert result['stderr_truncated'] is True
|
||||
assert 'characters truncated' in result['stderr']
|
||||
assert len(result['stderr']) <= 100
|
||||
|
||||
|
||||
# ── Profile tests ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_default_provides_defaults():
|
||||
"""When tool call omits network/image, profile defaults are used."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(30))
|
||||
|
||||
assert result['ok'] is True
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.network == BoxNetworkMode.OFF
|
||||
assert spec.image == 'python:3.11-slim'
|
||||
assert spec.timeout_sec == 30
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_unlocked_field_can_be_overridden():
|
||||
"""Tool call can override unlocked profile fields."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'timeout_sec': 60, 'network': 'on'},
|
||||
make_query(31),
|
||||
)
|
||||
|
||||
assert result['ok'] is True
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.timeout_sec == 60
|
||||
assert spec.network == BoxNetworkMode.ON
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_locked_field_cannot_be_overridden():
|
||||
"""offline_readonly profile locks network and host_path_mode."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'network': 'on', 'host_path_mode': 'rw'},
|
||||
make_query(32),
|
||||
)
|
||||
|
||||
assert result['ok'] is True
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.network == BoxNetworkMode.OFF
|
||||
assert spec.host_path_mode == BoxHostMountMode.READ_ONLY
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_timeout_clamped_to_max():
|
||||
"""timeout_sec exceeding max_timeout_sec is clamped."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'timeout_sec': 999},
|
||||
make_query(33),
|
||||
)
|
||||
|
||||
assert result['ok'] is True
|
||||
spec = backend.start_specs[0]
|
||||
# default profile max_timeout_sec = 120
|
||||
assert spec.timeout_sec == 120
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize('timeout_value', ['999', 999.0])
|
||||
async def test_profile_timeout_clamped_for_coercible_inputs(timeout_value):
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'timeout_sec': timeout_value},
|
||||
make_query(34),
|
||||
)
|
||||
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.timeout_sec == 120
|
||||
|
||||
|
||||
def test_unknown_profile_raises_error():
|
||||
"""Config referencing a non-existent profile name raises immediately."""
|
||||
logger = Mock()
|
||||
with pytest.raises(BoxValidationError, match='unknown box profile'):
|
||||
BoxService(make_app(logger, profile='nonexistent'))
|
||||
|
||||
|
||||
def test_builtin_profiles_are_consistent():
|
||||
"""Basic sanity check on all built-in profiles."""
|
||||
assert 'default' in BUILTIN_PROFILES
|
||||
assert 'offline_readonly' in BUILTIN_PROFILES
|
||||
assert 'network_basic' in BUILTIN_PROFILES
|
||||
assert 'network_extended' in BUILTIN_PROFILES
|
||||
|
||||
offline = BUILTIN_PROFILES['offline_readonly']
|
||||
assert offline.network == BoxNetworkMode.OFF
|
||||
assert offline.host_path_mode == BoxHostMountMode.READ_ONLY
|
||||
assert 'network' in offline.locked
|
||||
assert 'host_path_mode' in offline.locked
|
||||
assert 'read_only_rootfs' in offline.locked
|
||||
assert offline.max_timeout_sec <= BUILTIN_PROFILES['default'].max_timeout_sec
|
||||
|
||||
basic = BUILTIN_PROFILES['network_basic']
|
||||
assert basic.network == BoxNetworkMode.ON
|
||||
assert basic.read_only_rootfs is True
|
||||
|
||||
extended = BUILTIN_PROFILES['network_extended']
|
||||
assert extended.network == BoxNetworkMode.ON
|
||||
assert extended.read_only_rootfs is False
|
||||
assert extended.cpus > BUILTIN_PROFILES['default'].cpus
|
||||
assert extended.memory_mb > BUILTIN_PROFILES['default'].memory_mb
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_default_applies_resource_limits():
|
||||
"""Default profile resource limits are applied to BoxSpec."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(40))
|
||||
|
||||
spec = backend.start_specs[0]
|
||||
profile = BUILTIN_PROFILES['default']
|
||||
assert spec.cpus == profile.cpus
|
||||
assert spec.memory_mb == profile.memory_mb
|
||||
assert spec.pids_limit == profile.pids_limit
|
||||
assert spec.read_only_rootfs == profile.read_only_rootfs
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_offline_readonly_locks_read_only_rootfs():
|
||||
"""offline_readonly locks read_only_rootfs so it cannot be overridden."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'read_only_rootfs': False},
|
||||
make_query(41),
|
||||
)
|
||||
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.read_only_rootfs is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_network_extended_has_relaxed_limits():
|
||||
"""network_extended profile provides higher resource limits."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger, profile='network_extended'), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(42))
|
||||
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.network == BoxNetworkMode.ON
|
||||
assert spec.cpus == 2.0
|
||||
assert spec.memory_mb == 1024
|
||||
assert spec.read_only_rootfs is False
|
||||
|
||||
|
||||
def test_box_spec_validates_resource_limits():
|
||||
"""BoxSpec rejects invalid resource limit values."""
|
||||
with pytest.raises(Exception):
|
||||
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'cpus': 0})
|
||||
with pytest.raises(Exception):
|
||||
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'memory_mb': 10})
|
||||
with pytest.raises(Exception):
|
||||
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'pids_limit': 0})
|
||||
|
||||
Reference in New Issue
Block a user