mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-12 16:56:02 +00:00
feat(box): add BoxProfile with resource limits and improved output truncation
- Implement head+tail output truncation (60/40 split) so LLM sees both
beginning and final results; add streaming byte-limited reads in backend
to prevent unbounded memory usage (_MAX_RAW_OUTPUT_BYTES = 1MB)
- Define BoxProfile model with locked fields and max_timeout_sec clamping
- Add four built-in profiles: default, offline_readonly, network_basic,
network_extended with differentiated resource and security constraints
- Add resource limit fields to BoxSpec (cpus, memory_mb, pids_limit,
read_only_rootfs) and pass corresponding container CLI flags
(--cpus, --memory, --pids-limit, --read-only, --tmpfs)
- Profile loaded from config (box.profile), applied in service layer
before BoxSpec validation; locked fields cannot be overridden by
tool-call parameters
This commit is contained in:
37
tests/unit_tests/box/test_backend_clip.py
Normal file
37
tests/unit_tests/box/test_backend_clip.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from langbot.pkg.box.backend import CLISandboxBackend, _MAX_RAW_OUTPUT_BYTES
|
||||
|
||||
|
||||
class TestClipBytes:
|
||||
def test_within_limit_unchanged(self):
|
||||
data = b'hello world'
|
||||
result = CLISandboxBackend._clip_bytes(data, limit=1024)
|
||||
assert result == 'hello world'
|
||||
|
||||
def test_exceeding_limit_clips_and_appends_notice(self):
|
||||
data = b'A' * 200
|
||||
result = CLISandboxBackend._clip_bytes(data, limit=100)
|
||||
assert result.startswith('A' * 100)
|
||||
assert 'raw output clipped at 100 bytes' in result
|
||||
assert '100 bytes discarded' in result
|
||||
|
||||
def test_exact_limit_not_clipped(self):
|
||||
data = b'B' * 100
|
||||
result = CLISandboxBackend._clip_bytes(data, limit=100)
|
||||
assert result == 'B' * 100
|
||||
assert 'clipped' not in result
|
||||
|
||||
def test_default_limit_is_module_constant(self):
|
||||
data = b'x' * 10
|
||||
result = CLISandboxBackend._clip_bytes(data)
|
||||
assert result == 'x' * 10
|
||||
assert _MAX_RAW_OUTPUT_BYTES == 1_048_576
|
||||
|
||||
def test_invalid_utf8_replaced(self):
|
||||
data = b'ok\xff\xfetail'
|
||||
result = CLISandboxBackend._clip_bytes(data, limit=1024)
|
||||
assert 'ok' in result
|
||||
assert 'tail' in result
|
||||
@@ -12,16 +12,20 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
from langbot.pkg.box.backend import BaseSandboxBackend
|
||||
from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxValidationError
|
||||
from langbot.pkg.box.models import (
|
||||
BUILTIN_PROFILES,
|
||||
BoxExecutionResult,
|
||||
BoxExecutionStatus,
|
||||
BoxHostMountMode,
|
||||
BoxNetworkMode,
|
||||
BoxProfile,
|
||||
BoxSessionInfo,
|
||||
BoxSpec,
|
||||
)
|
||||
from langbot.pkg.box.runtime import BoxRuntime
|
||||
from langbot.pkg.box.service import BoxService
|
||||
|
||||
_UTC = dt.timezone.utc
|
||||
|
||||
|
||||
class FakeBackend(BaseSandboxBackend):
|
||||
def __init__(self, logger: Mock, available: bool = True):
|
||||
@@ -39,7 +43,7 @@ class FakeBackend(BaseSandboxBackend):
|
||||
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
|
||||
self.start_calls.append(spec.session_id)
|
||||
self.start_specs.append(spec)
|
||||
now = dt.datetime.now(dt.UTC)
|
||||
now = dt.datetime.now(_UTC)
|
||||
return BoxSessionInfo(
|
||||
session_id=spec.session_id,
|
||||
backend_name=self.name,
|
||||
@@ -48,6 +52,10 @@ class FakeBackend(BaseSandboxBackend):
|
||||
network=spec.network,
|
||||
host_path=spec.host_path,
|
||||
host_path_mode=spec.host_path_mode,
|
||||
cpus=spec.cpus,
|
||||
memory_mb=spec.memory_mb,
|
||||
pids_limit=spec.pids_limit,
|
||||
read_only_rootfs=spec.read_only_rootfs,
|
||||
created_at=now,
|
||||
last_used_at=now,
|
||||
)
|
||||
@@ -72,12 +80,13 @@ def make_query(query_id: int = 42) -> pipeline_query.Query:
|
||||
return pipeline_query.Query.model_construct(query_id=query_id)
|
||||
|
||||
|
||||
def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None):
|
||||
def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None, profile: str = 'default'):
|
||||
return SimpleNamespace(
|
||||
logger=logger,
|
||||
instance_config=SimpleNamespace(
|
||||
data={
|
||||
'box': {
|
||||
'profile': profile,
|
||||
'allowed_host_mount_roots': allowed_host_mount_roots or [],
|
||||
'default_host_workspace': '',
|
||||
}
|
||||
@@ -226,3 +235,313 @@ async def test_box_runtime_rejects_host_mount_conflict_in_same_session(tmp_path)
|
||||
|
||||
with pytest.raises(BoxSessionConflictError):
|
||||
await runtime.execute(second)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_runtime_rejects_resource_limit_conflict_in_same_session():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
await runtime.initialize()
|
||||
|
||||
first = BoxSpec.model_validate({'cmd': 'echo first', 'session_id': 'req-resource', 'cpus': 1.0})
|
||||
second = BoxSpec.model_validate({'cmd': 'echo second', 'session_id': 'req-resource', 'cpus': 2.0})
|
||||
|
||||
await runtime.execute(first)
|
||||
|
||||
with pytest.raises(BoxSessionConflictError):
|
||||
await runtime.execute(second)
|
||||
|
||||
|
||||
# ── Truncation tests ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
class FakeBackendWithOutput(FakeBackend):
|
||||
"""FakeBackend that returns configurable stdout/stderr."""
|
||||
|
||||
def __init__(self, logger: Mock, stdout: str = '', stderr: str = ''):
|
||||
super().__init__(logger)
|
||||
self._stdout = stdout
|
||||
self._stderr = stderr
|
||||
|
||||
async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
|
||||
self.exec_calls.append((session.session_id, spec.cmd))
|
||||
return BoxExecutionResult(
|
||||
session_id=session.session_id,
|
||||
backend_name=self.name,
|
||||
status=BoxExecutionStatus.COMPLETED,
|
||||
exit_code=0,
|
||||
stdout=self._stdout,
|
||||
stderr=self._stderr,
|
||||
duration_ms=5,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncate_short_output_unchanged():
|
||||
logger = Mock()
|
||||
backend = FakeBackendWithOutput(logger, stdout='hello world')
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(20))
|
||||
|
||||
assert result['stdout'] == 'hello world'
|
||||
assert result['stdout_truncated'] is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncate_preserves_head_and_tail():
|
||||
logger = Mock()
|
||||
# Build output: "AAAA...BBB..." where each section is identifiable
|
||||
head_marker = 'HEAD_START|'
|
||||
tail_marker = '|TAIL_END'
|
||||
filler = 'x' * 500
|
||||
big_output = f'{head_marker}{filler}{tail_marker}'
|
||||
|
||||
backend = FakeBackendWithOutput(logger, stdout=big_output)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
limit = 100
|
||||
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=limit)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'cat big'}, make_query(21))
|
||||
|
||||
assert result['stdout_truncated'] is True
|
||||
stdout = result['stdout']
|
||||
# Head part should contain the head marker
|
||||
assert stdout.startswith(head_marker)
|
||||
# Tail part should contain the tail marker
|
||||
assert stdout.endswith(tail_marker)
|
||||
# Should contain the truncation notice
|
||||
assert 'characters truncated' in stdout
|
||||
assert len(stdout) <= limit
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncate_at_exact_limit_not_truncated():
|
||||
logger = Mock()
|
||||
exact_output = 'a' * 200
|
||||
backend = FakeBackendWithOutput(logger, stdout=exact_output)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=200)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'echo a'}, make_query(22))
|
||||
|
||||
assert result['stdout'] == exact_output
|
||||
assert result['stdout_truncated'] is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncate_stderr_independently():
|
||||
logger = Mock()
|
||||
backend = FakeBackendWithOutput(logger, stdout='short', stderr='E' * 300)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'fail'}, make_query(23))
|
||||
|
||||
assert result['stdout_truncated'] is False
|
||||
assert result['stderr_truncated'] is True
|
||||
assert 'characters truncated' in result['stderr']
|
||||
assert len(result['stderr']) <= 100
|
||||
|
||||
|
||||
# ── Profile tests ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_default_provides_defaults():
|
||||
"""When tool call omits network/image, profile defaults are used."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(30))
|
||||
|
||||
assert result['ok'] is True
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.network == BoxNetworkMode.OFF
|
||||
assert spec.image == 'python:3.11-slim'
|
||||
assert spec.timeout_sec == 30
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_unlocked_field_can_be_overridden():
|
||||
"""Tool call can override unlocked profile fields."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'timeout_sec': 60, 'network': 'on'},
|
||||
make_query(31),
|
||||
)
|
||||
|
||||
assert result['ok'] is True
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.timeout_sec == 60
|
||||
assert spec.network == BoxNetworkMode.ON
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_locked_field_cannot_be_overridden():
|
||||
"""offline_readonly profile locks network and host_path_mode."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'network': 'on', 'host_path_mode': 'rw'},
|
||||
make_query(32),
|
||||
)
|
||||
|
||||
assert result['ok'] is True
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.network == BoxNetworkMode.OFF
|
||||
assert spec.host_path_mode == BoxHostMountMode.READ_ONLY
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_timeout_clamped_to_max():
|
||||
"""timeout_sec exceeding max_timeout_sec is clamped."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'timeout_sec': 999},
|
||||
make_query(33),
|
||||
)
|
||||
|
||||
assert result['ok'] is True
|
||||
spec = backend.start_specs[0]
|
||||
# default profile max_timeout_sec = 120
|
||||
assert spec.timeout_sec == 120
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize('timeout_value', ['999', 999.0])
|
||||
async def test_profile_timeout_clamped_for_coercible_inputs(timeout_value):
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'timeout_sec': timeout_value},
|
||||
make_query(34),
|
||||
)
|
||||
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.timeout_sec == 120
|
||||
|
||||
|
||||
def test_unknown_profile_raises_error():
|
||||
"""Config referencing a non-existent profile name raises immediately."""
|
||||
logger = Mock()
|
||||
with pytest.raises(BoxValidationError, match='unknown box profile'):
|
||||
BoxService(make_app(logger, profile='nonexistent'))
|
||||
|
||||
|
||||
def test_builtin_profiles_are_consistent():
|
||||
"""Basic sanity check on all built-in profiles."""
|
||||
assert 'default' in BUILTIN_PROFILES
|
||||
assert 'offline_readonly' in BUILTIN_PROFILES
|
||||
assert 'network_basic' in BUILTIN_PROFILES
|
||||
assert 'network_extended' in BUILTIN_PROFILES
|
||||
|
||||
offline = BUILTIN_PROFILES['offline_readonly']
|
||||
assert offline.network == BoxNetworkMode.OFF
|
||||
assert offline.host_path_mode == BoxHostMountMode.READ_ONLY
|
||||
assert 'network' in offline.locked
|
||||
assert 'host_path_mode' in offline.locked
|
||||
assert 'read_only_rootfs' in offline.locked
|
||||
assert offline.max_timeout_sec <= BUILTIN_PROFILES['default'].max_timeout_sec
|
||||
|
||||
basic = BUILTIN_PROFILES['network_basic']
|
||||
assert basic.network == BoxNetworkMode.ON
|
||||
assert basic.read_only_rootfs is True
|
||||
|
||||
extended = BUILTIN_PROFILES['network_extended']
|
||||
assert extended.network == BoxNetworkMode.ON
|
||||
assert extended.read_only_rootfs is False
|
||||
assert extended.cpus > BUILTIN_PROFILES['default'].cpus
|
||||
assert extended.memory_mb > BUILTIN_PROFILES['default'].memory_mb
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_default_applies_resource_limits():
|
||||
"""Default profile resource limits are applied to BoxSpec."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(40))
|
||||
|
||||
spec = backend.start_specs[0]
|
||||
profile = BUILTIN_PROFILES['default']
|
||||
assert spec.cpus == profile.cpus
|
||||
assert spec.memory_mb == profile.memory_mb
|
||||
assert spec.pids_limit == profile.pids_limit
|
||||
assert spec.read_only_rootfs == profile.read_only_rootfs
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_offline_readonly_locks_read_only_rootfs():
|
||||
"""offline_readonly locks read_only_rootfs so it cannot be overridden."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
await service.execute_sandbox_tool(
|
||||
{'cmd': 'echo hi', 'read_only_rootfs': False},
|
||||
make_query(41),
|
||||
)
|
||||
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.read_only_rootfs is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_profile_network_extended_has_relaxed_limits():
|
||||
"""network_extended profile provides higher resource limits."""
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(make_app(logger, profile='network_extended'), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(42))
|
||||
|
||||
spec = backend.start_specs[0]
|
||||
assert spec.network == BoxNetworkMode.ON
|
||||
assert spec.cpus == 2.0
|
||||
assert spec.memory_mb == 1024
|
||||
assert spec.read_only_rootfs is False
|
||||
|
||||
|
||||
def test_box_spec_validates_resource_limits():
|
||||
"""BoxSpec rejects invalid resource limit values."""
|
||||
with pytest.raises(Exception):
|
||||
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'cpus': 0})
|
||||
with pytest.raises(Exception):
|
||||
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'memory_mb': 10})
|
||||
with pytest.raises(Exception):
|
||||
BoxSpec.model_validate({'cmd': 'echo', 'session_id': 's1', 'pids_limit': 0})
|
||||
|
||||
Reference in New Issue
Block a user