diff --git a/src/langbot/pkg/box/backend.py b/src/langbot/pkg/box/backend.py index 96e3432e..4db6525c 100644 --- a/src/langbot/pkg/box/backend.py +++ b/src/langbot/pkg/box/backend.py @@ -8,11 +8,18 @@ import logging import re import shlex import shutil +import typing import uuid from .errors import BoxError from .models import DEFAULT_BOX_MOUNT_PATH, BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec +# Hard cap on raw subprocess output to prevent unbounded memory usage. +# Container timeout already bounds duration, but fast commands can still +# produce large output within the time limit. After this many bytes the +# remaining output is discarded before decoding. +_MAX_RAW_OUTPUT_BYTES = 1_048_576 # 1 MB per stream + @dataclasses.dataclass(slots=True) class _CommandResult: @@ -83,6 +90,15 @@ class CLISandboxBackend(BaseSandboxBackend): if spec.network.value == 'off': args.extend(['--network', 'none']) + # Resource limits + args.extend(['--cpus', str(spec.cpus)]) + args.extend(['--memory', f'{spec.memory_mb}m']) + args.extend(['--pids-limit', str(spec.pids_limit)]) + + if spec.read_only_rootfs: + args.append('--read-only') + args.extend(['--tmpfs', '/tmp:size=64m']) + if spec.host_path is not None: mount_spec = f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}:{spec.host_path_mode.value}' args.extend(['-v', mount_spec]) @@ -93,7 +109,9 @@ class CLISandboxBackend(BaseSandboxBackend): f'LangBot Box backend start_session: backend={self.name} ' f'session_id={spec.session_id} container_name={container_name} ' f'image={spec.image} network={spec.network.value} ' - f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value}' + f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value} ' + f'cpus={spec.cpus} memory_mb={spec.memory_mb} pids_limit={spec.pids_limit} ' + f'read_only_rootfs={spec.read_only_rootfs}' ) await self._run_command(args, timeout_sec=30, check=True) @@ -106,6 +124,10 @@ class CLISandboxBackend(BaseSandboxBackend): network=spec.network, host_path=spec.host_path, host_path_mode=spec.host_path_mode, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, created_at=now, last_used_at=now, ) @@ -191,21 +213,30 @@ class CLISandboxBackend(BaseSandboxBackend): stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) + stdout_task = asyncio.create_task(self._read_stream(process.stdout)) + stderr_task = asyncio.create_task(self._read_stream(process.stderr)) + timed_out = False try: - stdout_bytes, stderr_bytes = await asyncio.wait_for(process.communicate(), timeout=timeout_sec) + await asyncio.wait_for(process.wait(), timeout=timeout_sec) except asyncio.TimeoutError: process.kill() - stdout_bytes, stderr_bytes = await process.communicate() + timed_out = True + await process.wait() + + stdout_bytes, stdout_total = await stdout_task + stderr_bytes, stderr_total = await stderr_task + + if timed_out: return _CommandResult( return_code=-1, - stdout=stdout_bytes.decode('utf-8', errors='replace').strip(), - stderr=stderr_bytes.decode('utf-8', errors='replace').strip(), + stdout=self._clip_captured_bytes(stdout_bytes, stdout_total), + stderr=self._clip_captured_bytes(stderr_bytes, stderr_total), timed_out=True, ) - stdout = stdout_bytes.decode('utf-8', errors='replace').strip() - stderr = stderr_bytes.decode('utf-8', errors='replace').strip() + stdout = self._clip_captured_bytes(stdout_bytes, stdout_total) + stderr = self._clip_captured_bytes(stderr_bytes, stderr_total) if check and process.returncode != 0: raise BoxError(self._format_cli_error(stderr or stdout or 'unknown backend error')) @@ -217,6 +248,40 @@ class CLISandboxBackend(BaseSandboxBackend): timed_out=False, ) + @staticmethod + def _clip_bytes(data: bytes, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str: + """Decode bytes to str, discarding bytes beyond *limit*.""" + clipped = data[:limit] + return CLISandboxBackend._clip_captured_bytes(clipped, len(data), limit=limit) + + @staticmethod + def _clip_captured_bytes(data: bytes, total_size: int, limit: int = _MAX_RAW_OUTPUT_BYTES) -> str: + text = data.decode('utf-8', errors='replace').strip() + if total_size > limit: + text += f'\n... [raw output clipped at {limit} bytes, {total_size - limit} bytes discarded]' + return text + + @staticmethod + async def _read_stream( + stream: typing.Optional[asyncio.StreamReader], + limit: int = _MAX_RAW_OUTPUT_BYTES, + ) -> tuple[bytes, int]: + if stream is None: + return b'', 0 + + chunks = bytearray() + total_size = 0 + while True: + chunk = await stream.read(65536) + if not chunk: + break + total_size += len(chunk) + remaining = limit - len(chunks) + if remaining > 0: + chunks.extend(chunk[:remaining]) + + return bytes(chunks), total_size + def _format_cli_error(self, message: str) -> str: message = ' '.join(message.split()) if len(message) > 300: diff --git a/src/langbot/pkg/box/models.py b/src/langbot/pkg/box/models.py index 9c1bb2f7..e99c85b3 100644 --- a/src/langbot/pkg/box/models.py +++ b/src/langbot/pkg/box/models.py @@ -35,6 +35,11 @@ class BoxSpec(pydantic.BaseModel): image: str = DEFAULT_BOX_IMAGE host_path: str | None = None host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + # Resource limits + cpus: float = 1.0 + memory_mb: int = 512 + pids_limit: int = 128 + read_only_rootfs: bool = True @pydantic.field_validator('cmd') @classmethod @@ -59,6 +64,27 @@ class BoxSpec(pydantic.BaseModel): raise ValueError('timeout_sec must be greater than 0') return value + @pydantic.field_validator('cpus') + @classmethod + def validate_cpus(cls, value: float) -> float: + if value <= 0: + raise ValueError('cpus must be greater than 0') + return value + + @pydantic.field_validator('memory_mb') + @classmethod + def validate_memory_mb(cls, value: int) -> int: + if value < 32: + raise ValueError('memory_mb must be at least 32') + return value + + @pydantic.field_validator('pids_limit') + @classmethod + def validate_pids_limit(cls, value: int) -> int: + if value < 1: + raise ValueError('pids_limit must be at least 1') + return value + @pydantic.field_validator('session_id') @classmethod def validate_session_id(cls, value: str) -> str: @@ -91,6 +117,74 @@ class BoxSpec(pydantic.BaseModel): return self +class BoxProfile(pydantic.BaseModel): + """Preset sandbox configuration. + + Provides default values for BoxSpec fields and optionally locks fields + so that tool-call parameters cannot override them. + """ + + name: str + image: str = DEFAULT_BOX_IMAGE + network: BoxNetworkMode = BoxNetworkMode.OFF + timeout_sec: int = 30 + host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + max_timeout_sec: int = 120 + # Resource limits + cpus: float = 1.0 + memory_mb: int = 512 + pids_limit: int = 128 + read_only_rootfs: bool = True + locked: frozenset[str] = frozenset() + + model_config = pydantic.ConfigDict(frozen=True) + + +BUILTIN_PROFILES: dict[str, BoxProfile] = { + 'default': BoxProfile( + name='default', + network=BoxNetworkMode.OFF, + host_path_mode=BoxHostMountMode.READ_WRITE, + cpus=1.0, + memory_mb=512, + pids_limit=128, + read_only_rootfs=True, + max_timeout_sec=120, + ), + 'offline_readonly': BoxProfile( + name='offline_readonly', + network=BoxNetworkMode.OFF, + host_path_mode=BoxHostMountMode.READ_ONLY, + cpus=0.5, + memory_mb=256, + pids_limit=64, + read_only_rootfs=True, + max_timeout_sec=60, + locked=frozenset({'network', 'host_path_mode', 'read_only_rootfs'}), + ), + 'network_basic': BoxProfile( + name='network_basic', + network=BoxNetworkMode.ON, + host_path_mode=BoxHostMountMode.READ_WRITE, + cpus=1.0, + memory_mb=512, + pids_limit=128, + read_only_rootfs=True, + max_timeout_sec=120, + ), + 'network_extended': BoxProfile( + name='network_extended', + network=BoxNetworkMode.ON, + host_path_mode=BoxHostMountMode.READ_WRITE, + cpus=2.0, + memory_mb=1024, + pids_limit=256, + read_only_rootfs=False, + max_timeout_sec=300, + ), +} + + class BoxSessionInfo(pydantic.BaseModel): session_id: str backend_name: str @@ -99,6 +193,10 @@ class BoxSessionInfo(pydantic.BaseModel): network: BoxNetworkMode host_path: str | None = None host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE + cpus: float = 1.0 + memory_mb: int = 512 + pids_limit: int = 128 + read_only_rootfs: bool = True created_at: dt.datetime last_used_at: dt.datetime diff --git a/src/langbot/pkg/box/runtime.py b/src/langbot/pkg/box/runtime.py index cfbfc40a..d4a93ed5 100644 --- a/src/langbot/pkg/box/runtime.py +++ b/src/langbot/pkg/box/runtime.py @@ -9,6 +9,8 @@ from .backend import BaseSandboxBackend, DockerBackend, PodmanBackend from .errors import BoxBackendUnavailableError, BoxSessionConflictError from .models import BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec +_UTC = dt.timezone.utc + @dataclasses.dataclass(slots=True) class _RuntimeSession: @@ -48,7 +50,7 @@ class BoxRuntime: result = await (await self._get_backend()).exec(session.info, spec) async with self._lock: - now = dt.datetime.now(dt.UTC) + now = dt.datetime.now(_UTC) if spec.session_id in self._sessions: self._sessions[spec.session_id].info.last_used_at = now @@ -70,7 +72,7 @@ class BoxRuntime: existing = self._sessions.get(spec.session_id) if existing is not None: self._assert_session_compatible(existing.info, spec) - existing.info.last_used_at = dt.datetime.now(dt.UTC) + existing.info.last_used_at = dt.datetime.now(_UTC) self.logger.info( 'LangBot Box session reused: ' f'session_id={spec.session_id} ' @@ -121,7 +123,7 @@ class BoxRuntime: if self.session_ttl_sec <= 0: return - deadline = dt.datetime.now(dt.UTC) - dt.timedelta(seconds=self.session_ttl_sec) + deadline = dt.datetime.now(_UTC) - dt.timedelta(seconds=self.session_ttl_sec) expired_session_ids = [ session_id for session_id, session in self._sessions.items() @@ -164,3 +166,19 @@ class BoxRuntime: raise BoxSessionConflictError( f'sandbox_exec session {spec.session_id} already exists with host_path_mode={session.host_path_mode.value}' ) + if session.cpus != spec.cpus: + raise BoxSessionConflictError( + f'sandbox_exec session {spec.session_id} already exists with cpus={session.cpus}' + ) + if session.memory_mb != spec.memory_mb: + raise BoxSessionConflictError( + f'sandbox_exec session {spec.session_id} already exists with memory_mb={session.memory_mb}' + ) + if session.pids_limit != spec.pids_limit: + raise BoxSessionConflictError( + f'sandbox_exec session {spec.session_id} already exists with pids_limit={session.pids_limit}' + ) + if session.read_only_rootfs != spec.read_only_rootfs: + raise BoxSessionConflictError( + f'sandbox_exec session {spec.session_id} already exists with read_only_rootfs={session.read_only_rootfs}' + ) diff --git a/src/langbot/pkg/box/service.py b/src/langbot/pkg/box/service.py index 650c76ff..8736706f 100644 --- a/src/langbot/pkg/box/service.py +++ b/src/langbot/pkg/box/service.py @@ -1,5 +1,6 @@ from __future__ import annotations +import enum import json import os from typing import TYPE_CHECKING @@ -7,9 +8,11 @@ from typing import TYPE_CHECKING import pydantic from .errors import BoxValidationError -from .models import BoxExecutionResult, BoxSpec +from .models import BUILTIN_PROFILES, BoxExecutionResult, BoxProfile, BoxSpec from .runtime import BoxRuntime +_INT_ADAPTER = pydantic.TypeAdapter(int) + if TYPE_CHECKING: from ..core import app as core_app import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query @@ -27,6 +30,7 @@ class BoxService: self.output_limit_chars = output_limit_chars self.allowed_host_mount_roots = self._load_allowed_host_mount_roots() self.default_host_workspace = self._load_default_host_workspace() + self.profile = self._load_profile() async def initialize(self): await self.runtime.initialize() @@ -38,6 +42,8 @@ class BoxService: if spec_payload.get('host_path') in (None, '') and self.default_host_workspace is not None: spec_payload['host_path'] = self.default_host_workspace + self._apply_profile(spec_payload) + try: spec = BoxSpec.model_validate(spec_payload) except pydantic.ValidationError as exc: @@ -81,7 +87,32 @@ class BoxService: def _truncate(self, text: str) -> tuple[str, bool]: if len(text) <= self.output_limit_chars: return text, False - return f'{text[: self.output_limit_chars]}...', True + if self.output_limit_chars <= 0: + return '', True + + head_size = 0 + tail_size = 0 + notice = '' + # Recompute once the omitted count is known so the final payload + # stays within output_limit_chars even after adding the notice. + for _ in range(4): + omitted = max(len(text) - head_size - tail_size, 0) + notice = f'\n\n... [{omitted} characters truncated] ...\n\n' + available = self.output_limit_chars - len(notice) + if available <= 0: + return notice[: self.output_limit_chars], True + + new_head_size = int(available * 0.6) + new_tail_size = available - new_head_size + if new_head_size == head_size and new_tail_size == tail_size: + break + head_size = new_head_size + tail_size = new_tail_size + + head = text[:head_size] + tail = text[-tail_size:] if tail_size else '' + truncated = f'{head}{notice}{tail}' + return truncated[: self.output_limit_chars], True def _summarize_spec(self, spec: BoxSpec) -> dict: cmd = spec.cmd.strip() @@ -96,6 +127,10 @@ class BoxService: 'image': spec.image, 'host_path': spec.host_path, 'host_path_mode': spec.host_path_mode.value, + 'cpus': spec.cpus, + 'memory_mb': spec.memory_mb, + 'pids_limit': spec.pids_limit, + 'read_only_rootfs': spec.read_only_rootfs, 'env_keys': sorted(spec.env.keys()), 'cmd': cmd, } @@ -157,3 +192,40 @@ class BoxService: allowed_roots = ', '.join(self.allowed_host_mount_roots) raise BoxValidationError(f'host_path is outside allowed_host_mount_roots: {allowed_roots}') + + def _load_profile(self) -> BoxProfile: + box_config = getattr(self.ap, 'instance_config', None) + box_config_data = getattr(box_config, 'data', {}) if box_config is not None else {} + profile_name = str(box_config_data.get('box', {}).get('profile', 'default')).strip() or 'default' + + profile = BUILTIN_PROFILES.get(profile_name) + if profile is None: + available = ', '.join(sorted(BUILTIN_PROFILES)) + raise BoxValidationError(f"unknown box profile '{profile_name}', available profiles: {available}") + return profile + + def _apply_profile(self, params: dict): + """Merge profile defaults into *params* in-place, enforce locked fields and clamp timeout.""" + profile = self.profile + _PROFILE_FIELDS = ( + 'image', 'network', 'timeout_sec', 'host_path_mode', + 'cpus', 'memory_mb', 'pids_limit', 'read_only_rootfs', + ) + + for field in _PROFILE_FIELDS: + profile_value = getattr(profile, field) + raw_value = profile_value.value if isinstance(profile_value, enum.Enum) else profile_value + + if field in profile.locked: + params[field] = raw_value + elif field not in params: + params[field] = raw_value + + timeout = params.get('timeout_sec') + try: + normalized_timeout = _INT_ADAPTER.validate_python(timeout) + except pydantic.ValidationError: + return + + if normalized_timeout > profile.max_timeout_sec: + params['timeout_sec'] = profile.max_timeout_sec diff --git a/src/langbot/templates/config.yaml b/src/langbot/templates/config.yaml index ef6d1ec9..efee6d3c 100644 --- a/src/langbot/templates/config.yaml +++ b/src/langbot/templates/config.yaml @@ -88,6 +88,7 @@ monitoring: # Cleanup check interval in hours check_interval_hours: 1 box: + profile: 'default' default_host_workspace: './data/box-workspaces/default' allowed_host_mount_roots: - './data/box-workspaces' diff --git a/tests/unit_tests/box/test_backend_clip.py b/tests/unit_tests/box/test_backend_clip.py new file mode 100644 index 00000000..af593abe --- /dev/null +++ b/tests/unit_tests/box/test_backend_clip.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import pytest + +from langbot.pkg.box.backend import CLISandboxBackend, _MAX_RAW_OUTPUT_BYTES + + +class TestClipBytes: + def test_within_limit_unchanged(self): + data = b'hello world' + result = CLISandboxBackend._clip_bytes(data, limit=1024) + assert result == 'hello world' + + def test_exceeding_limit_clips_and_appends_notice(self): + data = b'A' * 200 + result = CLISandboxBackend._clip_bytes(data, limit=100) + assert result.startswith('A' * 100) + assert 'raw output clipped at 100 bytes' in result + assert '100 bytes discarded' in result + + def test_exact_limit_not_clipped(self): + data = b'B' * 100 + result = CLISandboxBackend._clip_bytes(data, limit=100) + assert result == 'B' * 100 + assert 'clipped' not in result + + def test_default_limit_is_module_constant(self): + data = b'x' * 10 + result = CLISandboxBackend._clip_bytes(data) + assert result == 'x' * 10 + assert _MAX_RAW_OUTPUT_BYTES == 1_048_576 + + def test_invalid_utf8_replaced(self): + data = b'ok\xff\xfetail' + result = CLISandboxBackend._clip_bytes(data, limit=1024) + assert 'ok' in result + assert 'tail' in result diff --git a/tests/unit_tests/box/test_box_service.py b/tests/unit_tests/box/test_box_service.py index ffb06b58..104f34ec 100644 --- a/tests/unit_tests/box/test_box_service.py +++ b/tests/unit_tests/box/test_box_service.py @@ -12,16 +12,20 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query from langbot.pkg.box.backend import BaseSandboxBackend from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxValidationError from langbot.pkg.box.models import ( + BUILTIN_PROFILES, BoxExecutionResult, BoxExecutionStatus, BoxHostMountMode, BoxNetworkMode, + BoxProfile, BoxSessionInfo, BoxSpec, ) from langbot.pkg.box.runtime import BoxRuntime from langbot.pkg.box.service import BoxService +_UTC = dt.timezone.utc + class FakeBackend(BaseSandboxBackend): def __init__(self, logger: Mock, available: bool = True): @@ -39,7 +43,7 @@ class FakeBackend(BaseSandboxBackend): async def start_session(self, spec: BoxSpec) -> BoxSessionInfo: self.start_calls.append(spec.session_id) self.start_specs.append(spec) - now = dt.datetime.now(dt.UTC) + now = dt.datetime.now(_UTC) return BoxSessionInfo( session_id=spec.session_id, backend_name=self.name, @@ -48,6 +52,10 @@ class FakeBackend(BaseSandboxBackend): network=spec.network, host_path=spec.host_path, host_path_mode=spec.host_path_mode, + cpus=spec.cpus, + memory_mb=spec.memory_mb, + pids_limit=spec.pids_limit, + read_only_rootfs=spec.read_only_rootfs, created_at=now, last_used_at=now, ) @@ -72,12 +80,13 @@ def make_query(query_id: int = 42) -> pipeline_query.Query: return pipeline_query.Query.model_construct(query_id=query_id) -def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None): +def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None, profile: str = 'default'): return SimpleNamespace( logger=logger, instance_config=SimpleNamespace( data={ 'box': { + 'profile': profile, 'allowed_host_mount_roots': allowed_host_mount_roots or [], 'default_host_workspace': '', } @@ -226,3 +235,313 @@ async def test_box_runtime_rejects_host_mount_conflict_in_same_session(tmp_path) with pytest.raises(BoxSessionConflictError): await runtime.execute(second) + + +@pytest.mark.asyncio +async def test_box_runtime_rejects_resource_limit_conflict_in_same_session(): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + await runtime.initialize() + + first = BoxSpec.model_validate({'cmd': 'echo first', 'session_id': 'req-resource', 'cpus': 1.0}) + second = BoxSpec.model_validate({'cmd': 'echo second', 'session_id': 'req-resource', 'cpus': 2.0}) + + await runtime.execute(first) + + with pytest.raises(BoxSessionConflictError): + await runtime.execute(second) + + +# ── Truncation tests ────────────────────────────────────────────────── + + +class FakeBackendWithOutput(FakeBackend): + """FakeBackend that returns configurable stdout/stderr.""" + + def __init__(self, logger: Mock, stdout: str = '', stderr: str = ''): + super().__init__(logger) + self._stdout = stdout + self._stderr = stderr + + async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult: + self.exec_calls.append((session.session_id, spec.cmd)) + return BoxExecutionResult( + session_id=session.session_id, + backend_name=self.name, + status=BoxExecutionStatus.COMPLETED, + exit_code=0, + stdout=self._stdout, + stderr=self._stderr, + duration_ms=5, + ) + + +@pytest.mark.asyncio +async def test_truncate_short_output_unchanged(): + logger = Mock() + backend = FakeBackendWithOutput(logger, stdout='hello world') + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100) + await service.initialize() + + result = await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(20)) + + assert result['stdout'] == 'hello world' + assert result['stdout_truncated'] is False + + +@pytest.mark.asyncio +async def test_truncate_preserves_head_and_tail(): + logger = Mock() + # Build output: "AAAA...BBB..." where each section is identifiable + head_marker = 'HEAD_START|' + tail_marker = '|TAIL_END' + filler = 'x' * 500 + big_output = f'{head_marker}{filler}{tail_marker}' + + backend = FakeBackendWithOutput(logger, stdout=big_output) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + limit = 100 + service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=limit) + await service.initialize() + + result = await service.execute_sandbox_tool({'cmd': 'cat big'}, make_query(21)) + + assert result['stdout_truncated'] is True + stdout = result['stdout'] + # Head part should contain the head marker + assert stdout.startswith(head_marker) + # Tail part should contain the tail marker + assert stdout.endswith(tail_marker) + # Should contain the truncation notice + assert 'characters truncated' in stdout + assert len(stdout) <= limit + + +@pytest.mark.asyncio +async def test_truncate_at_exact_limit_not_truncated(): + logger = Mock() + exact_output = 'a' * 200 + backend = FakeBackendWithOutput(logger, stdout=exact_output) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=200) + await service.initialize() + + result = await service.execute_sandbox_tool({'cmd': 'echo a'}, make_query(22)) + + assert result['stdout'] == exact_output + assert result['stdout_truncated'] is False + + +@pytest.mark.asyncio +async def test_truncate_stderr_independently(): + logger = Mock() + backend = FakeBackendWithOutput(logger, stdout='short', stderr='E' * 300) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100) + await service.initialize() + + result = await service.execute_sandbox_tool({'cmd': 'fail'}, make_query(23)) + + assert result['stdout_truncated'] is False + assert result['stderr_truncated'] is True + assert 'characters truncated' in result['stderr'] + assert len(result['stderr']) <= 100 + + +# ── Profile tests ───────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_profile_default_provides_defaults(): + """When tool call omits network/image, profile defaults are used.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), runtime=runtime) + await service.initialize() + + result = await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(30)) + + assert result['ok'] is True + spec = backend.start_specs[0] + assert spec.network == BoxNetworkMode.OFF + assert spec.image == 'python:3.11-slim' + assert spec.timeout_sec == 30 + + +@pytest.mark.asyncio +async def test_profile_unlocked_field_can_be_overridden(): + """Tool call can override unlocked profile fields.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), runtime=runtime) + await service.initialize() + + result = await service.execute_sandbox_tool( + {'cmd': 'echo hi', 'timeout_sec': 60, 'network': 'on'}, + make_query(31), + ) + + assert result['ok'] is True + spec = backend.start_specs[0] + assert spec.timeout_sec == 60 + assert spec.network == BoxNetworkMode.ON + + +@pytest.mark.asyncio +async def test_profile_locked_field_cannot_be_overridden(): + """offline_readonly profile locks network and host_path_mode.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime) + await service.initialize() + + result = await service.execute_sandbox_tool( + {'cmd': 'echo hi', 'network': 'on', 'host_path_mode': 'rw'}, + make_query(32), + ) + + assert result['ok'] is True + spec = backend.start_specs[0] + assert spec.network == BoxNetworkMode.OFF + assert spec.host_path_mode == BoxHostMountMode.READ_ONLY + + +@pytest.mark.asyncio +async def test_profile_timeout_clamped_to_max(): + """timeout_sec exceeding max_timeout_sec is clamped.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), runtime=runtime) + await service.initialize() + + result = await service.execute_sandbox_tool( + {'cmd': 'echo hi', 'timeout_sec': 999}, + make_query(33), + ) + + assert result['ok'] is True + spec = backend.start_specs[0] + # default profile max_timeout_sec = 120 + assert spec.timeout_sec == 120 + + +@pytest.mark.asyncio +@pytest.mark.parametrize('timeout_value', ['999', 999.0]) +async def test_profile_timeout_clamped_for_coercible_inputs(timeout_value): + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), runtime=runtime) + await service.initialize() + + await service.execute_sandbox_tool( + {'cmd': 'echo hi', 'timeout_sec': timeout_value}, + make_query(34), + ) + + spec = backend.start_specs[0] + assert spec.timeout_sec == 120 + + +def test_unknown_profile_raises_error(): + """Config referencing a non-existent profile name raises immediately.""" + logger = Mock() + with pytest.raises(BoxValidationError, match='unknown box profile'): + BoxService(make_app(logger, profile='nonexistent')) + + +def test_builtin_profiles_are_consistent(): + """Basic sanity check on all built-in profiles.""" + assert 'default' in BUILTIN_PROFILES + assert 'offline_readonly' in BUILTIN_PROFILES + assert 'network_basic' in BUILTIN_PROFILES + assert 'network_extended' in BUILTIN_PROFILES + + offline = BUILTIN_PROFILES['offline_readonly'] + assert offline.network == BoxNetworkMode.OFF + assert offline.host_path_mode == BoxHostMountMode.READ_ONLY + assert 'network' in offline.locked + assert 'host_path_mode' in offline.locked + assert 'read_only_rootfs' in offline.locked + assert offline.max_timeout_sec <= BUILTIN_PROFILES['default'].max_timeout_sec + + basic = BUILTIN_PROFILES['network_basic'] + assert basic.network == BoxNetworkMode.ON + assert basic.read_only_rootfs is True + + extended = BUILTIN_PROFILES['network_extended'] + assert extended.network == BoxNetworkMode.ON + assert extended.read_only_rootfs is False + assert extended.cpus > BUILTIN_PROFILES['default'].cpus + assert extended.memory_mb > BUILTIN_PROFILES['default'].memory_mb + + +@pytest.mark.asyncio +async def test_profile_default_applies_resource_limits(): + """Default profile resource limits are applied to BoxSpec.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger), runtime=runtime) + await service.initialize() + + await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(40)) + + spec = backend.start_specs[0] + profile = BUILTIN_PROFILES['default'] + assert spec.cpus == profile.cpus + assert spec.memory_mb == profile.memory_mb + assert spec.pids_limit == profile.pids_limit + assert spec.read_only_rootfs == profile.read_only_rootfs + + +@pytest.mark.asyncio +async def test_profile_offline_readonly_locks_read_only_rootfs(): + """offline_readonly locks read_only_rootfs so it cannot be overridden.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime) + await service.initialize() + + await service.execute_sandbox_tool( + {'cmd': 'echo hi', 'read_only_rootfs': False}, + make_query(41), + ) + + spec = backend.start_specs[0] + assert spec.read_only_rootfs is True + + +@pytest.mark.asyncio +async def test_profile_network_extended_has_relaxed_limits(): + """network_extended profile provides higher resource limits.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService(make_app(logger, profile='network_extended'), runtime=runtime) + await service.initialize() + + await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(42)) + + spec = backend.start_specs[0] + assert spec.network == BoxNetworkMode.ON + assert spec.cpus == 2.0 + assert spec.memory_mb == 1024 + assert spec.read_only_rootfs is False + + +def test_box_spec_validates_resource_limits(): + """BoxSpec rejects invalid resource limit values.""" + with pytest.raises(Exception): + BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'cpus': 0}) + with pytest.raises(Exception): + BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'memory_mb': 10}) + with pytest.raises(Exception): + BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'pids_limit': 0})