feat(box): add BoxProfile with resource limits and improved output truncation

- Implement head+tail output truncation (60/40 split) so LLM sees both
    beginning and final results; add streaming byte-limited reads in backend
    to prevent unbounded memory usage (_MAX_RAW_OUTPUT_BYTES = 1MB)
  - Define BoxProfile model with locked fields and max_timeout_sec clamping
  - Add four built-in profiles: default, offline_readonly, network_basic,
    network_extended with differentiated resource and security constraints
  - Add resource limit fields to BoxSpec (cpus, memory_mb, pids_limit,
    read_only_rootfs) and pass corresponding container CLI flags
    (--cpus, --memory, --pids-limit, --read-only, --tmpfs)
  - Profile loaded from config (box.profile), applied in service layer
    before BoxSpec validation; locked fields cannot be overridden by
    tool-call parameters
This commit is contained in:
youhuanghe
2026-03-20 04:37:09 +00:00
committed by WangCham
parent 70c56af4ee
commit 86b2d517f2
7 changed files with 624 additions and 14 deletions

View File

@@ -8,11 +8,18 @@ import logging
import re
import shlex
import shutil
import typing
import uuid
from .errors import BoxError
from .models import DEFAULT_BOX_MOUNT_PATH, BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
# Hard cap on raw subprocess output to prevent unbounded memory usage.
# Container timeout already bounds duration, but fast commands can still
# produce large output within the time limit. After this many bytes the
# remaining output is discarded before decoding.
_MAX_RAW_OUTPUT_BYTES = 1_048_576 # 1 MB per stream
@dataclasses.dataclass(slots=True)
class _CommandResult:
@@ -83,6 +90,15 @@ class CLISandboxBackend(BaseSandboxBackend):
if spec.network.value == 'off':
args.extend(['--network', 'none'])
# Resource limits
args.extend(['--cpus', str(spec.cpus)])
args.extend(['--memory', f'{spec.memory_mb}m'])
args.extend(['--pids-limit', str(spec.pids_limit)])
if spec.read_only_rootfs:
args.append('--read-only')
args.extend(['--tmpfs', '/tmp:size=64m'])
if spec.host_path is not None:
mount_spec = f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}:{spec.host_path_mode.value}'
args.extend(['-v', mount_spec])
@@ -93,7 +109,9 @@ class CLISandboxBackend(BaseSandboxBackend):
f'LangBot Box backend start_session: backend={self.name} '
f'session_id={spec.session_id} container_name={container_name} '
f'image={spec.image} network={spec.network.value} '
f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value}'
f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} '
f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} '
f'read_only_rootfs={spec.read_only_rootfs}'
)
await self._run_command(args, timeout_sec=30, check=True)
@@ -106,6 +124,10 @@ class CLISandboxBackend(BaseSandboxBackend):
network=spec.network,
host_path=spec.host_path,
host_path_mode=spec.host_path_mode,
cpus=spec.cpus,
memory_mb=spec.memory_mb,
pids_limit=spec.pids_limit,
read_only_rootfs=spec.read_only_rootfs,
created_at=now,
last_used_at=now,
)
@@ -191,21 +213,30 @@ class CLISandboxBackend(BaseSandboxBackend):
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout_task = asyncio.create_task(self._read_stream(process.stdout))
stderr_task = asyncio.create_task(self._read_stream(process.stderr))
timed_out = False
try:
stdout_bytes, stderr_bytes = await asyncio.wait_for(process.communicate(), timeout=timeout_sec)
await asyncio.wait_for(process.wait(), timeout=timeout_sec)
except asyncio.TimeoutError:
process.kill()
stdout_bytes, stderr_bytes = await process.communicate()
timed_out = True
await process.wait()
stdout_bytes, stdout_total = await stdout_task
stderr_bytes, stderr_total = await stderr_task
if timed_out:
return _CommandResult(
return_code=-1,
stdout=stdout_bytes.decode('utf-8', errors='replace').strip(),
stderr=stderr_bytes.decode('utf-8', errors='replace').strip(),
stdout=self._clip_captured_bytes(stdout_bytes, stdout_total),
stderr=self._clip_captured_bytes(stderr_bytes, stderr_total),
timed_out=True,
)
stdout = stdout_bytes.decode('utf-8', errors='replace').strip()
stderr = stderr_bytes.decode('utf-8', errors='replace').strip()
stdout = self._clip_captured_bytes(stdout_bytes, stdout_total)
stderr = self._clip_captured_bytes(stderr_bytes, stderr_total)
if check and process.returncode != 0:
raise BoxError(self._format_cli_error(stderr or stdout or 'unknown backend error'))
@@ -217,6 +248,40 @@ class CLISandboxBackend(BaseSandboxBackend):
timed_out=False,
)
@staticmethod
def _clip_bytes(data: bytes, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str:
"""Decode bytes to str, discarding bytes beyond *limit*."""
clipped = data[:limit]
return CLISandboxBackend._clip_captured_bytes(clipped, len(data), limit=limit)
@staticmethod
def _clip_captured_bytes(data: bytes, total_size: int, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str:
text = data.decode('utf-8', errors='replace').strip()
if total_size > limit:
text += f'\n... [raw output clipped at {limit} bytes, {total_size - limit} bytes discarded]'
return text
@staticmethod
async def _read_stream(
stream: typing.Optional[asyncio.StreamReader],
limit: int = _MAX_RAW_OUTPUT_BYTES,
) -> tuple[bytes, int]:
if stream is None:
return b'', 0
chunks = bytearray()
total_size = 0
while True:
chunk = await stream.read(65536)
if not chunk:
break
total_size += len(chunk)
remaining = limit - len(chunks)
if remaining > 0:
chunks.extend(chunk[:remaining])
return bytes(chunks), total_size
def _format_cli_error(self, message: str) -> str:
message = ' '.join(message.split())
if len(message) > 300:

View File

@@ -35,6 +35,11 @@ class BoxSpec(pydantic.BaseModel):
image: str = DEFAULT_BOX_IMAGE
host_path: str | None = None
host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
# Resource limits
cpus: float = 1.0
memory_mb: int = 512
pids_limit: int = 128
read_only_rootfs: bool = True
@pydantic.field_validator('cmd')
@classmethod
@@ -59,6 +64,27 @@ class BoxSpec(pydantic.BaseModel):
raise ValueError('timeout_sec must be greater than 0')
return value
@pydantic.field_validator('cpus')
@classmethod
def validate_cpus(cls, value: float) -> float:
if value <= 0:
raise ValueError('cpus must be greater than 0')
return value
@pydantic.field_validator('memory_mb')
@classmethod
def validate_memory_mb(cls, value: int) -> int:
if value < 32:
raise ValueError('memory_mb must be at least 32')
return value
@pydantic.field_validator('pids_limit')
@classmethod
def validate_pids_limit(cls, value: int) -> int:
if value < 1:
raise ValueError('pids_limit must be at least 1')
return value
@pydantic.field_validator('session_id')
@classmethod
def validate_session_id(cls, value: str) -> str:
@@ -91,6 +117,74 @@ class BoxSpec(pydantic.BaseModel):
return self
class BoxProfile(pydantic.BaseModel):
"""Preset sandbox configuration.
Provides default values for BoxSpec fields and optionally locks fields
so that tool-call parameters cannot override them.
"""
name: str
image: str = DEFAULT_BOX_IMAGE
network: BoxNetworkMode = BoxNetworkMode.OFF
timeout_sec: int = 30
host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
max_timeout_sec: int = 120
# Resource limits
cpus: float = 1.0
memory_mb: int = 512
pids_limit: int = 128
read_only_rootfs: bool = True
locked: frozenset[str] = frozenset()
model_config = pydantic.ConfigDict(frozen=True)
BUILTIN_PROFILES: dict[str, BoxProfile] = {
'default': BoxProfile(
name='default',
network=BoxNetworkMode.OFF,
host_path_mode=BoxHostMountMode.READ_WRITE,
cpus=1.0,
memory_mb=512,
pids_limit=128,
read_only_rootfs=True,
max_timeout_sec=120,
),
'offline_readonly': BoxProfile(
name='offline_readonly',
network=BoxNetworkMode.OFF,
host_path_mode=BoxHostMountMode.READ_ONLY,
cpus=0.5,
memory_mb=256,
pids_limit=64,
read_only_rootfs=True,
max_timeout_sec=60,
locked=frozenset({'network', 'host_path_mode', 'read_only_rootfs'}),
),
'network_basic': BoxProfile(
name='network_basic',
network=BoxNetworkMode.ON,
host_path_mode=BoxHostMountMode.READ_WRITE,
cpus=1.0,
memory_mb=512,
pids_limit=128,
read_only_rootfs=True,
max_timeout_sec=120,
),
'network_extended': BoxProfile(
name='network_extended',
network=BoxNetworkMode.ON,
host_path_mode=BoxHostMountMode.READ_WRITE,
cpus=2.0,
memory_mb=1024,
pids_limit=256,
read_only_rootfs=False,
max_timeout_sec=300,
),
}
class BoxSessionInfo(pydantic.BaseModel):
session_id: str
backend_name: str
@@ -99,6 +193,10 @@ class BoxSessionInfo(pydantic.BaseModel):
network: BoxNetworkMode
host_path: str | None = None
host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
cpus: float = 1.0
memory_mb: int = 512
pids_limit: int = 128
read_only_rootfs: bool = True
created_at: dt.datetime
last_used_at: dt.datetime

View File

@@ -9,6 +9,8 @@ from .backend import BaseSandboxBackend, DockerBackend, PodmanBackend
from .errors import BoxBackendUnavailableError, BoxSessionConflictError
from .models import BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
_UTC = dt.timezone.utc
@dataclasses.dataclass(slots=True)
class _RuntimeSession:
@@ -48,7 +50,7 @@ class BoxRuntime:
result = await (await self._get_backend()).exec(session.info, spec)
async with self._lock:
now = dt.datetime.now(dt.UTC)
now = dt.datetime.now(_UTC)
if spec.session_id in self._sessions:
self._sessions[spec.session_id].info.last_used_at = now
@@ -70,7 +72,7 @@ class BoxRuntime:
existing = self._sessions.get(spec.session_id)
if existing is not None:
self._assert_session_compatible(existing.info, spec)
existing.info.last_used_at = dt.datetime.now(dt.UTC)
existing.info.last_used_at = dt.datetime.now(_UTC)
self.logger.info(
'LangBot Box session reused: '
f'session_id={spec.session_id} '
@@ -121,7 +123,7 @@ class BoxRuntime:
if self.session_ttl_sec <= 0:
return
deadline = dt.datetime.now(dt.UTC) - dt.timedelta(seconds=self.session_ttl_sec)
deadline = dt.datetime.now(_UTC) - dt.timedelta(seconds=self.session_ttl_sec)
expired_session_ids = [
session_id
for session_id, session in self._sessions.items()
@@ -164,3 +166,19 @@ class BoxRuntime:
raise BoxSessionConflictError(
f'sandbox_exec session {spec.session_id} already exists with host_path_mode={session.host_path_mode.value}'
)
if session.cpus != spec.cpus:
raise BoxSessionConflictError(
f'sandbox_exec session {spec.session_id} already exists with cpus={session.cpus}'
)
if session.memory_mb != spec.memory_mb:
raise BoxSessionConflictError(
f'sandbox_exec session {spec.session_id} already exists with memory_mb={session.memory_mb}'
)
if session.pids_limit != spec.pids_limit:
raise BoxSessionConflictError(
f'sandbox_exec session {spec.session_id} already exists with pids_limit={session.pids_limit}'
)
if session.read_only_rootfs != spec.read_only_rootfs:
raise BoxSessionConflictError(
f'sandbox_exec session {spec.session_id} already exists with read_only_rootfs={session.read_only_rootfs}'
)

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import enum
import json
import os
from typing import TYPE_CHECKING
@@ -7,9 +8,11 @@ from typing import TYPE_CHECKING
import pydantic
from .errors import BoxValidationError
from .models import BoxExecutionResult, BoxSpec
from .models import BUILTIN_PROFILES, BoxExecutionResult, BoxProfile, BoxSpec
from .runtime import BoxRuntime
_INT_ADAPTER = pydantic.TypeAdapter(int)
if TYPE_CHECKING:
from ..core import app as core_app
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
@@ -27,6 +30,7 @@ class BoxService:
self.output_limit_chars = output_limit_chars
self.allowed_host_mount_roots = self._load_allowed_host_mount_roots()
self.default_host_workspace = self._load_default_host_workspace()
self.profile = self._load_profile()
async def initialize(self):
await self.runtime.initialize()
@@ -38,6 +42,8 @@ class BoxService:
if spec_payload.get('host_path') in (None, '') and self.default_host_workspace is not None:
spec_payload['host_path'] = self.default_host_workspace
self._apply_profile(spec_payload)
try:
spec = BoxSpec.model_validate(spec_payload)
except pydantic.ValidationError as exc:
@@ -81,7 +87,32 @@ class BoxService:
def _truncate(self, text: str) -> tuple[str, bool]:
if len(text) <= self.output_limit_chars:
return text, False
return f'{text[: self.output_limit_chars]}...', True
if self.output_limit_chars <= 0:
return '', True
head_size = 0
tail_size = 0
notice = ''
# Recompute once the omitted count is known so the final payload
# stays within output_limit_chars even after adding the notice.
for _ in range(4):
omitted = max(len(text) - head_size - tail_size, 0)
notice = f'\n\n... [{omitted} characters truncated] ...\n\n'
available = self.output_limit_chars - len(notice)
if available <= 0:
return notice[: self.output_limit_chars], True
new_head_size = int(available * 0.6)
new_tail_size = available - new_head_size
if new_head_size == head_size and new_tail_size == tail_size:
break
head_size = new_head_size
tail_size = new_tail_size
head = text[:head_size]
tail = text[-tail_size:] if tail_size else ''
truncated = f'{head}{notice}{tail}'
return truncated[: self.output_limit_chars], True
def _summarize_spec(self, spec: BoxSpec) -> dict:
cmd = spec.cmd.strip()
@@ -96,6 +127,10 @@ class BoxService:
'image': spec.image,
'host_path': spec.host_path,
'host_path_mode': spec.host_path_mode.value,
'cpus': spec.cpus,
'memory_mb': spec.memory_mb,
'pids_limit': spec.pids_limit,
'read_only_rootfs': spec.read_only_rootfs,
'env_keys': sorted(spec.env.keys()),
'cmd': cmd,
}
@@ -157,3 +192,40 @@ class BoxService:
allowed_roots = ', '.join(self.allowed_host_mount_roots)
raise BoxValidationError(f'host_path is outside allowed_host_mount_roots: {allowed_roots}')
def _load_profile(self) -> BoxProfile:
box_config = getattr(self.ap, 'instance_config', None)
box_config_data = getattr(box_config, 'data', {}) if box_config is not None else {}
profile_name = str(box_config_data.get('box', {}).get('profile', 'default')).strip() or 'default'
profile = BUILTIN_PROFILES.get(profile_name)
if profile is None:
available = ', '.join(sorted(BUILTIN_PROFILES))
raise BoxValidationError(f"unknown box profile '{profile_name}', available profiles: {available}")
return profile
def _apply_profile(self, params: dict):
"""Merge profile defaults into *params* in-place, enforce locked fields and clamp timeout."""
profile = self.profile
_PROFILE_FIELDS = (
'image', 'network', 'timeout_sec', 'host_path_mode',
'cpus', 'memory_mb', 'pids_limit', 'read_only_rootfs',
)
for field in _PROFILE_FIELDS:
profile_value = getattr(profile, field)
raw_value = profile_value.value if isinstance(profile_value, enum.Enum) else profile_value
if field in profile.locked:
params[field] = raw_value
elif field not in params:
params[field] = raw_value
timeout = params.get('timeout_sec')
try:
normalized_timeout = _INT_ADAPTER.validate_python(timeout)
except pydantic.ValidationError:
return
if normalized_timeout > profile.max_timeout_sec:
params['timeout_sec'] = profile.max_timeout_sec

View File

@@ -88,6 +88,7 @@ monitoring:
# Cleanup check interval in hours
check_interval_hours: 1
box:
profile: 'default'
default_host_workspace: './data/box-workspaces/default'
allowed_host_mount_roots:
- './data/box-workspaces'

View File

@@ -0,0 +1,37 @@
from __future__ import annotations
import pytest
from langbot.pkg.box.backend import CLISandboxBackend, _MAX_RAW_OUTPUT_BYTES
class TestClipBytes:
def test_within_limit_unchanged(self):
data = b'hello world'
result = CLISandboxBackend._clip_bytes(data, limit=1024)
assert result == 'hello world'
def test_exceeding_limit_clips_and_appends_notice(self):
data = b'A' * 200
result = CLISandboxBackend._clip_bytes(data, limit=100)
assert result.startswith('A' * 100)
assert 'raw output clipped at 100 bytes' in result
assert '100 bytes discarded' in result
def test_exact_limit_not_clipped(self):
data = b'B' * 100
result = CLISandboxBackend._clip_bytes(data, limit=100)
assert result == 'B' * 100
assert 'clipped' not in result
def test_default_limit_is_module_constant(self):
data = b'x' * 10
result = CLISandboxBackend._clip_bytes(data)
assert result == 'x' * 10
assert _MAX_RAW_OUTPUT_BYTES == 1_048_576
def test_invalid_utf8_replaced(self):
data = b'ok\xff\xfetail'
result = CLISandboxBackend._clip_bytes(data, limit=1024)
assert 'ok' in result
assert 'tail' in result

View File

@@ -12,16 +12,20 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
from langbot.pkg.box.backend import BaseSandboxBackend
from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxValidationError
from langbot.pkg.box.models import (
BUILTIN_PROFILES,
BoxExecutionResult,
BoxExecutionStatus,
BoxHostMountMode,
BoxNetworkMode,
BoxProfile,
BoxSessionInfo,
BoxSpec,
)
from langbot.pkg.box.runtime import BoxRuntime
from langbot.pkg.box.service import BoxService
_UTC = dt.timezone.utc
class FakeBackend(BaseSandboxBackend):
def __init__(self, logger: Mock, available: bool = True):
@@ -39,7 +43,7 @@ class FakeBackend(BaseSandboxBackend):
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
self.start_calls.append(spec.session_id)
self.start_specs.append(spec)
now = dt.datetime.now(dt.UTC)
now = dt.datetime.now(_UTC)
return BoxSessionInfo(
session_id=spec.session_id,
backend_name=self.name,
@@ -48,6 +52,10 @@ class FakeBackend(BaseSandboxBackend):
network=spec.network,
host_path=spec.host_path,
host_path_mode=spec.host_path_mode,
cpus=spec.cpus,
memory_mb=spec.memory_mb,
pids_limit=spec.pids_limit,
read_only_rootfs=spec.read_only_rootfs,
created_at=now,
last_used_at=now,
)
@@ -72,12 +80,13 @@ def make_query(query_id: int = 42) -> pipeline_query.Query:
return pipeline_query.Query.model_construct(query_id=query_id)
def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None):
def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None, profile: str = 'default'):
return SimpleNamespace(
logger=logger,
instance_config=SimpleNamespace(
data={
'box': {
'profile': profile,
'allowed_host_mount_roots': allowed_host_mount_roots or [],
'default_host_workspace': '',
}
@@ -226,3 +235,313 @@ async def test_box_runtime_rejects_host_mount_conflict_in_same_session(tmp_path)
with pytest.raises(BoxSessionConflictError):
await runtime.execute(second)
@pytest.mark.asyncio
async def test_box_runtime_rejects_resource_limit_conflict_in_same_session():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
await runtime.initialize()
first = BoxSpec.model_validate({'cmd': 'echo first', 'session_id': 'req-resource', 'cpus': 1.0})
second = BoxSpec.model_validate({'cmd': 'echo second', 'session_id': 'req-resource', 'cpus': 2.0})
await runtime.execute(first)
with pytest.raises(BoxSessionConflictError):
await runtime.execute(second)
# ── Truncation tests ──────────────────────────────────────────────────
class FakeBackendWithOutput(FakeBackend):
"""FakeBackend that returns configurable stdout/stderr."""
def __init__(self, logger: Mock, stdout: str = '', stderr: str = ''):
super().__init__(logger)
self._stdout = stdout
self._stderr = stderr
async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
self.exec_calls.append((session.session_id, spec.cmd))
return BoxExecutionResult(
session_id=session.session_id,
backend_name=self.name,
status=BoxExecutionStatus.COMPLETED,
exit_code=0,
stdout=self._stdout,
stderr=self._stderr,
duration_ms=5,
)
@pytest.mark.asyncio
async def test_truncate_short_output_unchanged():
logger = Mock()
backend = FakeBackendWithOutput(logger, stdout='hello world')
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(20))
assert result['stdout'] == 'hello world'
assert result['stdout_truncated'] is False
@pytest.mark.asyncio
async def test_truncate_preserves_head_and_tail():
logger = Mock()
# Build output: "AAAA...BBB..." where each section is identifiable
head_marker = 'HEAD_START|'
tail_marker = '|TAIL_END'
filler = 'x' * 500
big_output = f'{head_marker}{filler}{tail_marker}'
backend = FakeBackendWithOutput(logger, stdout=big_output)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
limit = 100
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=limit)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'cat big'}, make_query(21))
assert result['stdout_truncated'] is True
stdout = result['stdout']
# Head part should contain the head marker
assert stdout.startswith(head_marker)
# Tail part should contain the tail marker
assert stdout.endswith(tail_marker)
# Should contain the truncation notice
assert 'characters truncated' in stdout
assert len(stdout) <= limit
@pytest.mark.asyncio
async def test_truncate_at_exact_limit_not_truncated():
logger = Mock()
exact_output = 'a' * 200
backend = FakeBackendWithOutput(logger, stdout=exact_output)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=200)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'echo a'}, make_query(22))
assert result['stdout'] == exact_output
assert result['stdout_truncated'] is False
@pytest.mark.asyncio
async def test_truncate_stderr_independently():
logger = Mock()
backend = FakeBackendWithOutput(logger, stdout='short', stderr='E' * 300)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'fail'}, make_query(23))
assert result['stdout_truncated'] is False
assert result['stderr_truncated'] is True
assert 'characters truncated' in result['stderr']
assert len(result['stderr']) <= 100
# ── Profile tests ─────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_profile_default_provides_defaults():
"""When tool call omits network/image, profile defaults are used."""
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(30))
assert result['ok'] is True
spec = backend.start_specs[0]
assert spec.network == BoxNetworkMode.OFF
assert spec.image == 'python:3.11-slim'
assert spec.timeout_sec == 30
@pytest.mark.asyncio
async def test_profile_unlocked_field_can_be_overridden():
"""Tool call can override unlocked profile fields."""
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
await service.initialize()
result = await service.execute_sandbox_tool(
{'cmd': 'echo hi', 'timeout_sec': 60, 'network': 'on'},
make_query(31),
)
assert result['ok'] is True
spec = backend.start_specs[0]
assert spec.timeout_sec == 60
assert spec.network == BoxNetworkMode.ON
@pytest.mark.asyncio
async def test_profile_locked_field_cannot_be_overridden():
"""offline_readonly profile locks network and host_path_mode."""
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime)
await service.initialize()
result = await service.execute_sandbox_tool(
{'cmd': 'echo hi', 'network': 'on', 'host_path_mode': 'rw'},
make_query(32),
)
assert result['ok'] is True
spec = backend.start_specs[0]
assert spec.network == BoxNetworkMode.OFF
assert spec.host_path_mode == BoxHostMountMode.READ_ONLY
@pytest.mark.asyncio
async def test_profile_timeout_clamped_to_max():
"""timeout_sec exceeding max_timeout_sec is clamped."""
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
await service.initialize()
result = await service.execute_sandbox_tool(
{'cmd': 'echo hi', 'timeout_sec': 999},
make_query(33),
)
assert result['ok'] is True
spec = backend.start_specs[0]
# default profile max_timeout_sec = 120
assert spec.timeout_sec == 120
@pytest.mark.asyncio
@pytest.mark.parametrize('timeout_value', ['999', 999.0])
async def test_profile_timeout_clamped_for_coercible_inputs(timeout_value):
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
await service.initialize()
await service.execute_sandbox_tool(
{'cmd': 'echo hi', 'timeout_sec': timeout_value},
make_query(34),
)
spec = backend.start_specs[0]
assert spec.timeout_sec == 120
def test_unknown_profile_raises_error():
"""Config referencing a non-existent profile name raises immediately."""
logger = Mock()
with pytest.raises(BoxValidationError, match='unknown box profile'):
BoxService(make_app(logger, profile='nonexistent'))
def test_builtin_profiles_are_consistent():
"""Basic sanity check on all built-in profiles."""
assert 'default' in BUILTIN_PROFILES
assert 'offline_readonly' in BUILTIN_PROFILES
assert 'network_basic' in BUILTIN_PROFILES
assert 'network_extended' in BUILTIN_PROFILES
offline = BUILTIN_PROFILES['offline_readonly']
assert offline.network == BoxNetworkMode.OFF
assert offline.host_path_mode == BoxHostMountMode.READ_ONLY
assert 'network' in offline.locked
assert 'host_path_mode' in offline.locked
assert 'read_only_rootfs' in offline.locked
assert offline.max_timeout_sec <= BUILTIN_PROFILES['default'].max_timeout_sec
basic = BUILTIN_PROFILES['network_basic']
assert basic.network == BoxNetworkMode.ON
assert basic.read_only_rootfs is True
extended = BUILTIN_PROFILES['network_extended']
assert extended.network == BoxNetworkMode.ON
assert extended.read_only_rootfs is False
assert extended.cpus > BUILTIN_PROFILES['default'].cpus
assert extended.memory_mb > BUILTIN_PROFILES['default'].memory_mb
@pytest.mark.asyncio
async def test_profile_default_applies_resource_limits():
"""Default profile resource limits are applied to BoxSpec."""
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
await service.initialize()
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(40))
spec = backend.start_specs[0]
profile = BUILTIN_PROFILES['default']
assert spec.cpus == profile.cpus
assert spec.memory_mb == profile.memory_mb
assert spec.pids_limit == profile.pids_limit
assert spec.read_only_rootfs == profile.read_only_rootfs
@pytest.mark.asyncio
async def test_profile_offline_readonly_locks_read_only_rootfs():
"""offline_readonly locks read_only_rootfs so it cannot be overridden."""
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime)
await service.initialize()
await service.execute_sandbox_tool(
{'cmd': 'echo hi', 'read_only_rootfs': False},
make_query(41),
)
spec = backend.start_specs[0]
assert spec.read_only_rootfs is True
@pytest.mark.asyncio
async def test_profile_network_extended_has_relaxed_limits():
"""network_extended profile provides higher resource limits."""
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger, profile='network_extended'), runtime=runtime)
await service.initialize()
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(42))
spec = backend.start_specs[0]
assert spec.network == BoxNetworkMode.ON
assert spec.cpus == 2.0
assert spec.memory_mb == 1024
assert spec.read_only_rootfs is False
def test_box_spec_validates_resource_limits():
"""BoxSpec rejects invalid resource limit values."""
with pytest.raises(Exception):
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'cpus': 0})
with pytest.raises(Exception):
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'memory_mb': 10})
with pytest.raises(Exception):
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'pids_limit': 0})