refactor(box): unify box service lifecycle and local runtime

management
This commit is contained in:
youhuanghe
2026-03-20 11:15:18 +00:00
committed by WangCham
parent 15c03fe96b
commit eaae31edd0
19 changed files with 1506 additions and 61 deletions

View File

@@ -0,0 +1,125 @@
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
from langbot.pkg.box.client import RemoteBoxRuntimeClient
from langbot.pkg.box.connector import BoxRuntimeConnector
from langbot.pkg.box.errors import BoxRuntimeUnavailableError
def make_app(logger: Mock, runtime_url: str = ''):
return SimpleNamespace(
logger=logger,
instance_config=SimpleNamespace(
data={
'box': {
'runtime_url': runtime_url,
'profile': 'default',
'allowed_host_mount_roots': [],
'default_host_workspace': '',
}
}
),
)
def patch_platform(monkeypatch: pytest.MonkeyPatch, value: str):
monkeypatch.setattr('langbot.pkg.box.client.platform.get_platform', lambda: value)
monkeypatch.setattr('langbot.pkg.box.connector.platform.get_platform', lambda: value)
def test_box_runtime_connector_uses_explicit_runtime_url():
logger = Mock()
connector = BoxRuntimeConnector(make_app(logger, runtime_url='http://box-runtime:5410'))
assert connector.runtime_url == 'http://box-runtime:5410'
assert connector.manages_local_runtime is False
assert isinstance(connector.client, RemoteBoxRuntimeClient)
assert connector.client._base_url == 'http://box-runtime:5410'
def test_box_runtime_connector_uses_local_default_runtime_url(monkeypatch: pytest.MonkeyPatch):
patch_platform(monkeypatch, 'linux')
connector = BoxRuntimeConnector(make_app(Mock()))
assert connector.runtime_url == 'http://127.0.0.1:5410'
assert connector.manages_local_runtime is True
assert connector.client._base_url == 'http://127.0.0.1:5410'
def test_box_runtime_connector_uses_docker_default_runtime_url(monkeypatch: pytest.MonkeyPatch):
patch_platform(monkeypatch, 'docker')
connector = BoxRuntimeConnector(make_app(Mock()))
assert connector.runtime_url == 'http://langbot_box_runtime:5410'
assert connector.manages_local_runtime is False
assert connector.client._base_url == 'http://langbot_box_runtime:5410'
@pytest.mark.asyncio
async def test_box_runtime_connector_initialize_delegates_to_client_when_runtime_is_healthy(
monkeypatch: pytest.MonkeyPatch,
):
patch_platform(monkeypatch, 'linux')
connector = BoxRuntimeConnector(make_app(Mock()))
connector.client.initialize = AsyncMock()
connector._start_local_runtime_process = AsyncMock()
connector._wait_until_runtime_ready = AsyncMock()
await connector.initialize()
connector.client.initialize.assert_awaited_once()
connector._start_local_runtime_process.assert_not_awaited()
connector._wait_until_runtime_ready.assert_not_awaited()
@pytest.mark.asyncio
async def test_box_runtime_connector_initialize_autostarts_local_runtime_when_unavailable(
monkeypatch: pytest.MonkeyPatch,
):
patch_platform(monkeypatch, 'linux')
connector = BoxRuntimeConnector(make_app(Mock()))
connector.client.initialize = AsyncMock(side_effect=BoxRuntimeUnavailableError('down'))
connector._start_local_runtime_process = AsyncMock()
connector._wait_until_runtime_ready = AsyncMock()
await connector.initialize()
connector.client.initialize.assert_awaited_once()
connector._start_local_runtime_process.assert_awaited_once()
connector._wait_until_runtime_ready.assert_awaited_once()
@pytest.mark.asyncio
async def test_box_runtime_connector_initialize_remote_runtime_does_not_autostart():
connector = BoxRuntimeConnector(make_app(Mock(), runtime_url='http://box-runtime:5410'))
connector.client.initialize = AsyncMock()
connector._start_local_runtime_process = AsyncMock()
connector._wait_until_runtime_ready = AsyncMock()
await connector.initialize()
connector.client.initialize.assert_awaited_once()
connector._start_local_runtime_process.assert_not_awaited()
connector._wait_until_runtime_ready.assert_not_awaited()
def test_box_runtime_connector_dispose_terminates_local_runtime_process():
logger = Mock()
connector = BoxRuntimeConnector(make_app(logger))
runtime_process = Mock()
runtime_process.returncode = None
runtime_task = Mock()
connector.runtime_subprocess = runtime_process
connector.runtime_subprocess_task = runtime_task
connector.dispose()
runtime_process.terminate.assert_called_once()
runtime_task.cancel.assert_called_once()
assert connector.runtime_subprocess_task is None

View File

@@ -1,16 +1,19 @@
from __future__ import annotations
import asyncio
import datetime as dt
import os
import socket
from types import SimpleNamespace
from unittest.mock import Mock
from unittest.mock import AsyncMock, Mock
import pytest
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
from langbot.pkg.box.backend import BaseSandboxBackend
from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxValidationError
from langbot.pkg.box.client import LocalBoxRuntimeClient, RemoteBoxRuntimeClient
from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxSessionNotFoundError, BoxValidationError
from langbot.pkg.box.models import (
BUILTIN_PROFILES,
BoxExecutionResult,
@@ -27,6 +30,21 @@ from langbot.pkg.box.service import BoxService
_UTC = dt.timezone.utc
def _can_open_test_socket() -> bool:
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
except OSError:
return False
sock.close()
return True
requires_socket = pytest.mark.skipif(
not _can_open_test_socket(),
reason='local test environment does not permit opening TCP sockets',
)
class FakeBackend(BaseSandboxBackend):
def __init__(self, logger: Mock, available: bool = True):
super().__init__(logger)
@@ -95,6 +113,68 @@ def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None, pr
)
@pytest.mark.asyncio
async def test_box_service_without_explicit_client_initializes_internal_connector(monkeypatch: pytest.MonkeyPatch):
connector = Mock()
connector.client = Mock()
connector.initialize = AsyncMock()
monkeypatch.setattr('langbot.pkg.box.service.BoxRuntimeConnector', Mock(return_value=connector))
service = BoxService(make_app(Mock()))
await service.initialize()
assert service.client is connector.client
connector.initialize.assert_awaited_once()
@pytest.mark.asyncio
async def test_box_service_get_sessions_delegates_to_client():
client = Mock()
client.get_sessions = AsyncMock(return_value=[{'session_id': 'test-session'}])
service = BoxService(make_app(Mock()), client=client)
sessions = await service.get_sessions()
assert sessions == [{'session_id': 'test-session'}]
client.get_sessions.assert_awaited_once()
def test_box_service_dispose_delegates_to_internal_connector(monkeypatch: pytest.MonkeyPatch):
connector = Mock()
connector.client = Mock()
monkeypatch.setattr('langbot.pkg.box.service.BoxRuntimeConnector', Mock(return_value=connector))
service = BoxService(make_app(Mock()))
service.dispose()
connector.dispose.assert_called_once()
@pytest.mark.asyncio
async def test_box_service_dispose_schedules_shutdown_on_event_loop(monkeypatch: pytest.MonkeyPatch):
connector = Mock()
connector.client = Mock()
connector.dispose = Mock()
monkeypatch.setattr('langbot.pkg.box.service.BoxRuntimeConnector', Mock(return_value=connector))
app = make_app(Mock())
loop = asyncio.get_running_loop()
app.event_loop = loop
service = BoxService(app)
service.shutdown = AsyncMock()
service.dispose()
await asyncio.sleep(0)
connector.dispose.assert_called_once()
service.shutdown.assert_awaited_once()
@pytest.mark.asyncio
async def test_box_runtime_reuses_request_session():
logger = Mock()
@@ -117,7 +197,7 @@ async def test_box_service_defaults_session_id_from_query():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'pwd', 'network': BoxNetworkMode.OFF.value}, make_query(7))
@@ -132,7 +212,7 @@ async def test_box_service_fails_closed_when_backend_unavailable():
logger = Mock()
backend = FakeBackend(logger, available=False)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
with pytest.raises(BoxBackendUnavailableError):
@@ -146,7 +226,7 @@ async def test_box_service_allows_host_mount_under_configured_root(tmp_path):
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
host_dir = tmp_path / 'mounted-workspace'
host_dir.mkdir()
service = BoxService(make_app(logger, [str(tmp_path)]), runtime=runtime)
service = BoxService(make_app(logger, [str(tmp_path)]), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool(
@@ -171,7 +251,7 @@ async def test_box_service_uses_default_host_workspace_when_host_path_omitted(tm
host_dir.mkdir()
app = make_app(logger, [str(tmp_path)])
app.instance_config.data['box']['default_host_workspace'] = str(host_dir)
service = BoxService(app, runtime=runtime)
service = BoxService(app, client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'pwd'}, make_query(15))
@@ -182,6 +262,23 @@ async def test_box_service_uses_default_host_workspace_when_host_path_omitted(tm
assert backend.start_specs[0].host_path == os.path.realpath(host_dir)
@pytest.mark.asyncio
async def test_box_service_creates_default_host_workspace_on_initialize(tmp_path):
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
allowed_root = tmp_path / 'allowed-root'
allowed_root.mkdir()
default_host_workspace = allowed_root / 'default-workspace'
app = make_app(logger, [str(allowed_root)])
app.instance_config.data['box']['default_host_workspace'] = str(default_host_workspace)
service = BoxService(app, client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
assert default_host_workspace.is_dir()
@pytest.mark.asyncio
async def test_box_service_rejects_host_mount_outside_allowed_roots(tmp_path):
logger = Mock()
@@ -191,7 +288,7 @@ async def test_box_service_rejects_host_mount_outside_allowed_roots(tmp_path):
disallowed_root = tmp_path / 'disallowed'
allowed_root.mkdir()
disallowed_root.mkdir()
service = BoxService(make_app(logger, [str(allowed_root)]), runtime=runtime)
service = BoxService(make_app(logger, [str(allowed_root)]), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
with pytest.raises(BoxValidationError):
@@ -282,7 +379,7 @@ async def test_truncate_short_output_unchanged():
logger = Mock()
backend = FakeBackendWithOutput(logger, stdout='hello world')
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime), output_limit_chars=100)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(20))
@@ -303,7 +400,7 @@ async def test_truncate_preserves_head_and_tail():
backend = FakeBackendWithOutput(logger, stdout=big_output)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
limit = 100
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=limit)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime), output_limit_chars=limit)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'cat big'}, make_query(21))
@@ -325,7 +422,7 @@ async def test_truncate_at_exact_limit_not_truncated():
exact_output = 'a' * 200
backend = FakeBackendWithOutput(logger, stdout=exact_output)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=200)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime), output_limit_chars=200)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'echo a'}, make_query(22))
@@ -339,7 +436,7 @@ async def test_truncate_stderr_independently():
logger = Mock()
backend = FakeBackendWithOutput(logger, stdout='short', stderr='E' * 300)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime, output_limit_chars=100)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime), output_limit_chars=100)
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'fail'}, make_query(23))
@@ -359,7 +456,7 @@ async def test_profile_default_provides_defaults():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(30))
@@ -377,7 +474,7 @@ async def test_profile_unlocked_field_can_be_overridden():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool(
@@ -397,7 +494,7 @@ async def test_profile_locked_field_cannot_be_overridden():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime)
service = BoxService(make_app(logger, profile='offline_readonly'), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool(
@@ -417,7 +514,7 @@ async def test_profile_timeout_clamped_to_max():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
result = await service.execute_sandbox_tool(
@@ -437,7 +534,7 @@ async def test_profile_timeout_clamped_for_coercible_inputs(timeout_value):
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
await service.execute_sandbox_tool(
@@ -452,8 +549,9 @@ async def test_profile_timeout_clamped_for_coercible_inputs(timeout_value):
def test_unknown_profile_raises_error():
"""Config referencing a non-existent profile name raises immediately."""
logger = Mock()
runtime = BoxRuntime(logger=logger, backends=[FakeBackend(logger)], session_ttl_sec=300)
with pytest.raises(BoxValidationError, match='unknown box profile'):
BoxService(make_app(logger, profile='nonexistent'))
BoxService(make_app(logger, profile='nonexistent'), client=LocalBoxRuntimeClient(logger, runtime))
def test_builtin_profiles_are_consistent():
@@ -488,7 +586,7 @@ async def test_profile_default_applies_resource_limits():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(40))
@@ -507,7 +605,7 @@ async def test_profile_offline_readonly_locks_read_only_rootfs():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger, profile='offline_readonly'), runtime=runtime)
service = BoxService(make_app(logger, profile='offline_readonly'), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
await service.execute_sandbox_tool(
@@ -525,7 +623,7 @@ async def test_profile_network_extended_has_relaxed_limits():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger, profile='network_extended'), runtime=runtime)
service = BoxService(make_app(logger, profile='network_extended'), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
await service.execute_sandbox_tool({'cmd': 'echo hi'}, make_query(42))
@@ -600,7 +698,7 @@ async def test_service_records_errors_on_failure():
logger = Mock()
backend = FakeBackend(logger, available=False)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
with pytest.raises(Exception):
@@ -618,7 +716,7 @@ async def test_service_error_ring_buffer_capped():
logger = Mock()
backend = FakeBackend(logger, available=False)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
for i in range(60):
@@ -637,7 +735,7 @@ async def test_service_get_status_aggregates_runtime_and_profile():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
service = BoxService(make_app(logger), runtime=runtime)
service = BoxService(make_app(logger), client=LocalBoxRuntimeClient(logger, runtime))
await service.initialize()
status = await service.get_status()
@@ -646,3 +744,419 @@ async def test_service_get_status_aggregates_runtime_and_profile():
assert status['backend']['available'] is True
assert status['active_sessions'] == 0
assert status['recent_error_count'] == 0
# ── RemoteBoxRuntimeClient tests ─────────────────────────────────────
@requires_socket
@pytest.mark.asyncio
async def test_remote_client_execute():
"""RemoteBoxRuntimeClient correctly posts to server and parses result."""
from aiohttp.test_utils import TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
await server.start_server()
try:
client = RemoteBoxRuntimeClient(base_url=str(server.make_url('')), logger=logger)
await client.initialize()
spec = BoxSpec.model_validate({'cmd': 'echo remote', 'session_id': 'r-1'})
result = await client.execute(spec)
assert result.session_id == 'r-1'
assert result.status == BoxExecutionStatus.COMPLETED
assert result.exit_code == 0
assert result.stdout == 'executed: echo remote'
await client.shutdown()
finally:
await server.close()
@requires_socket
@pytest.mark.asyncio
async def test_remote_client_get_sessions():
from aiohttp.test_utils import TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
await server.start_server()
try:
client = RemoteBoxRuntimeClient(base_url=str(server.make_url('')), logger=logger)
spec = BoxSpec.model_validate({'cmd': 'echo hi', 'session_id': 'r-2'})
await client.execute(spec)
sessions = await client.get_sessions()
assert len(sessions) == 1
assert sessions[0]['session_id'] == 'r-2'
await client.shutdown()
finally:
await server.close()
@requires_socket
@pytest.mark.asyncio
async def test_remote_client_get_status():
from aiohttp.test_utils import TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
await server.start_server()
try:
client = RemoteBoxRuntimeClient(base_url=str(server.make_url('')), logger=logger)
status = await client.get_status()
assert 'backend' in status
assert 'active_sessions' in status
await client.shutdown()
finally:
await server.close()
@requires_socket
@pytest.mark.asyncio
async def test_remote_client_get_backend_info():
from aiohttp.test_utils import TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
await server.start_server()
try:
client = RemoteBoxRuntimeClient(base_url=str(server.make_url('')), logger=logger)
info = await client.get_backend_info()
assert info['name'] == 'fake'
assert info['available'] is True
await client.shutdown()
finally:
await server.close()
# ── Server endpoint tests ────────────────────────────────────────────
@requires_socket
@pytest.mark.asyncio
async def test_server_delete_session():
from aiohttp.test_utils import TestClient, TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
test_client = TestClient(server)
await test_client.start_server()
try:
# Create a session via exec
resp = await test_client.post('/v1/sessions/del-1/exec', json={'cmd': 'echo hi'})
assert resp.status == 200
# Delete it
resp = await test_client.delete('/v1/sessions/del-1')
assert resp.status == 200
data = await resp.json()
assert data['deleted'] == 'del-1'
# Verify session is gone
resp = await test_client.get('/v1/sessions')
sessions = await resp.json()
assert len(sessions) == 0
finally:
await test_client.close()
# ── Runtime delete_session / create_session tests ────────────────────
@pytest.mark.asyncio
async def test_runtime_delete_session():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
await runtime.initialize()
await runtime.execute(BoxSpec.model_validate({'cmd': 'echo', 'session_id': 'del-test'}))
assert len(runtime.get_sessions()) == 1
await runtime.delete_session('del-test')
assert len(runtime.get_sessions()) == 0
assert backend.stop_calls == ['del-test']
@pytest.mark.asyncio
async def test_runtime_delete_session_not_found():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
await runtime.initialize()
with pytest.raises(BoxSessionNotFoundError):
await runtime.delete_session('nonexistent')
@pytest.mark.asyncio
async def test_runtime_create_session():
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
await runtime.initialize()
spec = BoxSpec.model_validate({'cmd': 'placeholder', 'session_id': 'create-1'})
info = await runtime.create_session(spec)
assert info['session_id'] == 'create-1'
assert info['backend_name'] == 'fake'
sessions = runtime.get_sessions()
assert len(sessions) == 1
assert sessions[0]['session_id'] == 'create-1'
# ── Server structured error tests ────────────────────────────────────
@requires_socket
@pytest.mark.asyncio
async def test_server_delete_nonexistent_session():
from aiohttp.test_utils import TestClient, TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
test_client = TestClient(server)
await test_client.start_server()
try:
resp = await test_client.delete('/v1/sessions/nonexistent')
assert resp.status == 404
data = await resp.json()
assert data['error']['code'] == 'session_not_found'
finally:
await test_client.close()
@requires_socket
@pytest.mark.asyncio
async def test_server_exec_returns_structured_error_on_conflict():
from aiohttp.test_utils import TestClient, TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
test_client = TestClient(server)
await test_client.start_server()
try:
# Create session with network=off
resp = await test_client.post('/v1/sessions/conflict-1/exec', json={'cmd': 'echo hi', 'network': 'off'})
assert resp.status == 200
# Try to use same session with network=on -> conflict
resp = await test_client.post('/v1/sessions/conflict-1/exec', json={'cmd': 'echo hi', 'network': 'on'})
assert resp.status == 409
data = await resp.json()
assert data['error']['code'] == 'session_conflict'
finally:
await test_client.close()
@requires_socket
@pytest.mark.asyncio
async def test_server_create_session():
from aiohttp.test_utils import TestClient, TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
test_client = TestClient(server)
await test_client.start_server()
try:
resp = await test_client.post('/v1/sessions/new-1', json={'image': 'python:3.11-slim'})
assert resp.status == 201
data = await resp.json()
assert data['session_id'] == 'new-1'
assert data['backend_name'] == 'fake'
assert 'created_at' in data
# Session should appear in list
resp = await test_client.get('/v1/sessions')
sessions = await resp.json()
assert len(sessions) == 1
assert sessions[0]['session_id'] == 'new-1'
finally:
await test_client.close()
@requires_socket
@pytest.mark.asyncio
async def test_server_create_session_conflict():
from aiohttp.test_utils import TestClient, TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
test_client = TestClient(server)
await test_client.start_server()
try:
resp = await test_client.post('/v1/sessions/dup-1', json={'network': 'off'})
assert resp.status == 201
# Conflicting create with different network
resp = await test_client.post('/v1/sessions/dup-1', json={'network': 'on'})
assert resp.status == 409
data = await resp.json()
assert data['error']['code'] == 'session_conflict'
finally:
await test_client.close()
# ── Remote client error translation tests ─────────────────────────────
@requires_socket
@pytest.mark.asyncio
async def test_remote_client_delete_session():
from aiohttp.test_utils import TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
await server.start_server()
try:
client = RemoteBoxRuntimeClient(base_url=str(server.make_url('')), logger=logger)
# Create session via exec
spec = BoxSpec.model_validate({'cmd': 'echo hi', 'session_id': 'r-del-1'})
await client.execute(spec)
# Delete it
await client.delete_session('r-del-1')
# Verify empty
sessions = await client.get_sessions()
assert len(sessions) == 0
await client.shutdown()
finally:
await server.close()
@requires_socket
@pytest.mark.asyncio
async def test_remote_client_delete_session_raises_not_found():
from aiohttp.test_utils import TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
await server.start_server()
try:
client = RemoteBoxRuntimeClient(base_url=str(server.make_url('')), logger=logger)
with pytest.raises(BoxSessionNotFoundError):
await client.delete_session('nonexistent')
await client.shutdown()
finally:
await server.close()
@requires_socket
@pytest.mark.asyncio
async def test_remote_client_create_session():
from aiohttp.test_utils import TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
await server.start_server()
try:
client = RemoteBoxRuntimeClient(base_url=str(server.make_url('')), logger=logger)
spec = BoxSpec.model_validate({'cmd': 'placeholder', 'session_id': 'r-create-1'})
info = await client.create_session(spec)
assert info['session_id'] == 'r-create-1'
assert info['backend_name'] == 'fake'
sessions = await client.get_sessions()
assert len(sessions) == 1
await client.shutdown()
finally:
await server.close()
@requires_socket
@pytest.mark.asyncio
async def test_remote_client_exec_raises_conflict_error():
from aiohttp.test_utils import TestServer
from langbot.pkg.box.server import create_app as create_server_app
logger = Mock()
backend = FakeBackend(logger)
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
app = create_server_app(runtime)
server = TestServer(app)
await server.start_server()
try:
client = RemoteBoxRuntimeClient(base_url=str(server.make_url('')), logger=logger)
# Create session with network=off
spec1 = BoxSpec.model_validate({'cmd': 'echo first', 'session_id': 'r-conflict-1', 'network': 'off'})
await client.execute(spec1)
# Conflicting exec with network=on
spec2 = BoxSpec.model_validate({'cmd': 'echo second', 'session_id': 'r-conflict-1', 'network': 'on'})
with pytest.raises(BoxSessionConflictError):
await client.execute(spec2)
await client.shutdown()
finally:
await server.close()

View File

@@ -0,0 +1,65 @@
from __future__ import annotations
import langbot_plugin.api.entities.builtin.provider.message as provider_message
from langbot.pkg.pipeline.process.logging_utils import format_result_log
def cut_str(s: str) -> str:
s0 = s.split('\n')[0]
if len(s0) > 20 or '\n' in s:
s0 = s0[:20] + '...'
return s0
def test_chat_handler_formats_tool_call_request_log():
result = provider_message.Message(
role='assistant',
content='',
tool_calls=[
provider_message.ToolCall(
id='call-1',
type='function',
function=provider_message.FunctionCall(name='sandbox_exec', arguments='{}'),
)
],
)
summary = format_result_log(result, cut_str)
assert summary == 'assistant: requested tools: sandbox_exec'
def test_chat_handler_formats_tool_result_log():
result = provider_message.Message(
role='tool',
content='{"status":"completed","exit_code":0,"backend":"podman","stdout":"42\\n"}',
tool_call_id='call-1',
)
summary = format_result_log(result, cut_str)
assert summary == 'tool result: status=completed exit_code=0 backend=podman stdout=42'
def test_chat_handler_formats_tool_error_log():
result = provider_message.MessageChunk(
role='tool',
content='err: host_path must point to an existing directory on the host',
tool_call_id='call-1',
is_final=True,
)
summary = format_result_log(result, cut_str)
assert summary is not None
assert summary.startswith('tool error: err: host_path must')
assert summary.endswith('...')
def test_chat_handler_skips_empty_assistant_log():
result = provider_message.Message(role='assistant', content='')
summary = format_result_log(result, cut_str)
assert summary is None

View File

@@ -58,6 +58,46 @@ class RecordingProvider:
)
class RecordingStreamProvider:
def __init__(self):
self.stream_requests: list[dict] = []
def invoke_llm_stream(self, query, model, messages, funcs, extra_args=None, remove_think=None):
self.stream_requests.append(
{
'messages': list(messages),
'funcs': list(funcs),
'remove_think': remove_think,
}
)
async def _stream():
if len(self.stream_requests) == 1:
yield provider_message.MessageChunk(
role='assistant',
tool_calls=[
provider_message.ToolCall(
id='call-1',
type='function',
function=provider_message.FunctionCall(
name='sandbox_exec',
arguments=json.dumps({'cmd': "python -c 'print(1)'"}),
),
)
],
is_final=True,
)
return
yield provider_message.MessageChunk(
role='assistant',
content='Tool execution failed.',
is_final=True,
)
return _stream()
def make_query() -> pipeline_query.Query:
adapter = AsyncMock()
adapter.is_stream_output_supported = AsyncMock(return_value=False)
@@ -156,3 +196,38 @@ async def test_localagent_uses_sandbox_exec_for_exact_calculation():
for message in first_request['messages']
)
assert [tool.name for tool in first_request['funcs']] == ['sandbox_exec']
@pytest.mark.asyncio
async def test_localagent_streaming_tool_error_yields_message_chunks():
provider = RecordingStreamProvider()
model = SimpleNamespace(
provider=provider,
model_entity=SimpleNamespace(
uuid='test-model-uuid',
name='test-model',
abilities=['func_call'],
extra_args={},
),
)
adapter = AsyncMock()
adapter.is_stream_output_supported = AsyncMock(return_value=True)
query = make_query()
query.adapter = adapter
app = SimpleNamespace(
logger=Mock(),
model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)),
tool_mgr=SimpleNamespace(execute_func_call=AsyncMock(side_effect=RuntimeError('boom'))),
rag_mgr=SimpleNamespace(),
instance_config=SimpleNamespace(data={'box': {'default_host_workspace': '/home/yhh/workspace/box-demo'}}),
)
runner = LocalAgentRunner(app, pipeline_config={})
results = [message async for message in runner.run(query)]
assert all(isinstance(message, provider_message.MessageChunk) for message in results)
assert any(message.role == 'tool' and message.content == 'err: boom' for message in results)