refactor(box): remove legacy in-process runtime code and clean up smells

After the architecture settled on always using an independent Box Runtime
service, several pieces of compatibility code and design shortcuts were
left behind. This commit cleans them up:

- Remove `LocalBoxRuntimeClient` and `create_box_runtime_client` from
  production code (moved to test-only helper).
- Remove unused `_clip_bytes` method from backend.
- Remove `__langbot_session_placeholder__` hack by making `BoxSpec.cmd`
  default to empty and validating non-empty only in `runtime.execute()`.
- Extract `get_box_config()` helper to eliminate 5× duplicated config
  access boilerplate.
- Remove `session_id`/`host_path`/`host_path_mode` from the LLM-facing
  tool schema to enforce request-scoped session isolation.
- Fix dual shutdown path: `NativeToolLoader.shutdown()` no longer calls
  `box_service.shutdown()` (handled by `Application.dispose()`).
- Simplify `_assert_session_compatible` with a loop.
- Inline client creation in `BoxRuntimeConnector`.
- Remove redundant `BOX__RUNTIME_URL` env var from docker-compose
  (auto-detected by code).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
youhuanghe
2026-03-20 12:04:39 +00:00
committed by WangCham
parent eaae31edd0
commit 6391678fdb
11 changed files with 98 additions and 161 deletions
+8 -7
View File
@@ -5,33 +5,34 @@ import pytest
from langbot.pkg.box.backend import CLISandboxBackend, _MAX_RAW_OUTPUT_BYTES
class TestClipBytes:
class TestClipCapturedBytes:
def test_within_limit_unchanged(self):
data = b'hello world'
result = CLISandboxBackend._clip_bytes(data, limit=1024)
result = CLISandboxBackend._clip_captured_bytes(data, total_size=len(data), limit=1024)
assert result == 'hello world'
def test_exceeding_limit_clips_and_appends_notice(self):
data = b'A' * 200
result = CLISandboxBackend._clip_bytes(data, limit=100)
captured = b'A' * 100
total_size = 200
result = CLISandboxBackend._clip_captured_bytes(captured, total_size=total_size, limit=100)
assert result.startswith('A' * 100)
assert 'raw output clipped at 100 bytes' in result
assert '100 bytes discarded' in result
def test_exact_limit_not_clipped(self):
data = b'B' * 100
result = CLISandboxBackend._clip_bytes(data, limit=100)
result = CLISandboxBackend._clip_captured_bytes(data, total_size=100, limit=100)
assert result == 'B' * 100
assert 'clipped' not in result
def test_default_limit_is_module_constant(self):
data = b'x' * 10
result = CLISandboxBackend._clip_bytes(data)
result = CLISandboxBackend._clip_captured_bytes(data, total_size=10)
assert result == 'x' * 10
assert _MAX_RAW_OUTPUT_BYTES == 1_048_576
def test_invalid_utf8_replaced(self):
data = b'ok\xff\xfetail'
result = CLISandboxBackend._clip_bytes(data, limit=1024)
result = CLISandboxBackend._clip_captured_bytes(data, total_size=len(data), limit=1024)
assert 'ok' in result
assert 'tail' in result
+54 -23
View File
@@ -12,7 +12,7 @@ import pytest
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
from langbot.pkg.box.backend import BaseSandboxBackend
from langbot.pkg.box.client import LocalBoxRuntimeClient, RemoteBoxRuntimeClient
from langbot.pkg.box.client import BoxRuntimeClient, RemoteBoxRuntimeClient
from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxSessionNotFoundError, BoxValidationError
from langbot.pkg.box.models import (
BUILTIN_PROFILES,
@@ -30,6 +30,37 @@ from langbot.pkg.box.service import BoxService
_UTC = dt.timezone.utc
class _InProcessBoxRuntimeClient(BoxRuntimeClient):
"""Test-only client that wraps a BoxRuntime in-process (no HTTP)."""
def __init__(self, logger, runtime=None):
self._runtime = runtime or BoxRuntime(logger=logger)
async def initialize(self):
await self._runtime.initialize()
async def execute(self, spec):
return await self._runtime.execute(spec)
async def shutdown(self):
await self._runtime.shutdown()
async def get_status(self):
return await self._runtime.get_status()
async def get_sessions(self):
return self._runtime.get_sessions()
async def get_backend_info(self):
return await self._runtime.get_backend_info()
async def delete_session(self, session_id):
await self._runtime.delete_session(session_id)
async def create_session(self, spec):
return await self._runtime.create_session(spec)
def _can_open_test_socket() -> bool:
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -197,7 +228,7 @@ async def test_box_service_defaults_session_id_from_query():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'pwd', 'network': BoxNetworkMode.OFF.value}, make_query(7))
@@ -212,7 +243,7 @@ async def test_box_service_fails_closed_when_backend_unavailable():
logger = Mock()
backend = FakeBackend(logger, available=False)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
with pytest.raises(BoxBackendUnavailableError):
@@ -226,7 +257,7 @@ async def test_box_service_allows_host_mount_under_configured_root(tmp_path):
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
host_dir = tmp_path / 'mounted-workspace'
host_dir.mkdir()
service = BoxService(make_app(logger, [str(tmp_path)]), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger, [str(tmp_path)]), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool(
@@ -251,7 +282,7 @@ async def test_box_service_uses_default_host_workspace_when_host_path_omitted(tm
host_dir.mkdir()
app = make_app(logger, [str(tmp_path)])
app.instance_config.data['box']['default_host_workspace'] = str(host_dir)
service = BoxService(app, client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(app, client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'pwd'}, make_query(15))
@@ -272,7 +303,7 @@ async def test_box_service_creates_default_host_workspace_on_initialize(tmp_path
default_host_workspace = allowed_root / 'default-workspace'
app = make_app(logger, [str(allowed_root)])
app.instance_config.data['box']['default_host_workspace'] = str(default_host_workspace)
service = BoxService(app, client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(app, client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
@@ -288,7 +319,7 @@ async def test_box_service_rejects_host_mount_outside_allowed_roots(tmp_path):
disallowed_root = tmp_path / 'disallowed'
allowed_root.mkdir()
disallowed_root.mkdir()
service = BoxService(make_app(logger, [str(allowed_root)]), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger, [str(allowed_root)]), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
with pytest.raises(BoxValidationError):
@@ -379,7 +410,7 @@ async def test_truncate_short_output_unchanged():
logger = Mock()
backend = FakeBackendWithOutput(logger, stdout='hello world')
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime), output_limit_chars=100)
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime), output_limit_chars=100)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(20))
@@ -400,7 +431,7 @@ async def test_truncate_preserves_head_and_tail():
backend = FakeBackendWithOutput(logger, stdout=big_output)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
limit = 100
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime), output_limit_chars=limit)
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime), output_limit_chars=limit)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'cat big'}, make_query(21))
@@ -422,7 +453,7 @@ async def test_truncate_at_exact_limit_not_truncated():
exact_output = 'a' * 200
backend = FakeBackendWithOutput(logger, stdout=exact_output)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime), output_limit_chars=200)
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime), output_limit_chars=200)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'echo a'}, make_query(22))
@@ -436,7 +467,7 @@ async def test_truncate_stderr_independently():
logger = Mock()
backend = FakeBackendWithOutput(logger, stdout='short', stderr='E' * 300)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime), output_limit_chars=100)
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime), output_limit_chars=100)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'fail'}, make_query(23))
@@ -456,7 +487,7 @@ async def test_profile_default_provides_defaults():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(30))
@@ -474,7 +505,7 @@ async def test_profile_unlocked_field_can_be_overridden():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool(
@@ -494,7 +525,7 @@ async def test_profile_locked_field_cannot_be_overridden():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger, profile='offline_readonly'), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger, profile='offline_readonly'), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool(
@@ -514,7 +545,7 @@ async def test_profile_timeout_clamped_to_max():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool(
@@ -534,7 +565,7 @@ async def test_profile_timeout_clamped_for_coercible_inputs(timeout_value):
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
await service.execute_sandbox_tool(
@@ -551,7 +582,7 @@ def test_unknown_profile_raises_error():
logger = Mock()
runtime = BoxRuntime(logger=logger, backends=[FakeBackend(logger)], session_ttl_sec=300)
with pytest.raises(BoxValidationError, match='unknown box profile'):
BoxService(make_app(logger, profile='nonexistent'), client=LocalBoxRuntimeClient(logger, runtime))
BoxService(make_app(logger, profile='nonexistent'), client=_InProcessBoxRuntimeClient(logger, runtime))
def test_builtin_profiles_are_consistent():
@@ -586,7 +617,7 @@ async def test_profile_default_applies_resource_limits():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(40))
@@ -605,7 +636,7 @@ async def test_profile_offline_readonly_locks_read_only_rootfs():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger, profile='offline_readonly'), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger, profile='offline_readonly'), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
await service.execute_sandbox_tool(
@@ -623,7 +654,7 @@ async def test_profile_network_extended_has_relaxed_limits():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger, profile='network_extended'), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger, profile='network_extended'), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(42))
@@ -698,7 +729,7 @@ async def test_service_records_errors_on_failure():
logger = Mock()
backend = FakeBackend(logger, available=False)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
with pytest.raises(Exception):
@@ -716,7 +747,7 @@ async def test_service_error_ring_buffer_capped():
logger = Mock()
backend = FakeBackend(logger, available=False)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
for i in range(60):
@@ -735,7 +766,7 @@ async def test_service_get_status_aggregates_runtime_and_profile():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
service = BoxService(make_app(logger), client=_InProcessBoxRuntimeClient(logger, runtime))
await service.initialize()
status = await service.get_status()