mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-09 15:26:03 +00:00
feat(box): add host workspace mounting and sandbox_exec guidance
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import os
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import Mock
|
||||
|
||||
@@ -9,8 +10,15 @@ import pytest
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
|
||||
from langbot.pkg.box.backend import BaseSandboxBackend
|
||||
from langbot.pkg.box.errors import BoxBackendUnavailableError
|
||||
from langbot.pkg.box.models import BoxExecutionResult, BoxExecutionStatus, BoxNetworkMode, BoxSessionInfo, BoxSpec
|
||||
from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxValidationError
|
||||
from langbot.pkg.box.models import (
|
||||
BoxExecutionResult,
|
||||
BoxExecutionStatus,
|
||||
BoxHostMountMode,
|
||||
BoxNetworkMode,
|
||||
BoxSessionInfo,
|
||||
BoxSpec,
|
||||
)
|
||||
from langbot.pkg.box.runtime import BoxRuntime
|
||||
from langbot.pkg.box.service import BoxService
|
||||
|
||||
@@ -21,6 +29,7 @@ class FakeBackend(BaseSandboxBackend):
|
||||
self.name = 'fake'
|
||||
self.available = available
|
||||
self.start_calls: list[str] = []
|
||||
self.start_specs: list[BoxSpec] = []
|
||||
self.exec_calls: list[tuple[str, str]] = []
|
||||
self.stop_calls: list[str] = []
|
||||
|
||||
@@ -29,6 +38,7 @@ class FakeBackend(BaseSandboxBackend):
|
||||
|
||||
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
|
||||
self.start_calls.append(spec.session_id)
|
||||
self.start_specs.append(spec)
|
||||
now = dt.datetime.now(dt.UTC)
|
||||
return BoxSessionInfo(
|
||||
session_id=spec.session_id,
|
||||
@@ -36,6 +46,8 @@ class FakeBackend(BaseSandboxBackend):
|
||||
backend_session_id=f'backend-{spec.session_id}',
|
||||
image=spec.image,
|
||||
network=spec.network,
|
||||
host_path=spec.host_path,
|
||||
host_path_mode=spec.host_path_mode,
|
||||
created_at=now,
|
||||
last_used_at=now,
|
||||
)
|
||||
@@ -60,6 +72,20 @@ def make_query(query_id: int = 42) -> pipeline_query.Query:
|
||||
return pipeline_query.Query.model_construct(query_id=query_id)
|
||||
|
||||
|
||||
def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None):
|
||||
return SimpleNamespace(
|
||||
logger=logger,
|
||||
instance_config=SimpleNamespace(
|
||||
data={
|
||||
'box': {
|
||||
'allowed_host_mount_roots': allowed_host_mount_roots or [],
|
||||
'default_host_workspace': '',
|
||||
}
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_runtime_reuses_request_session():
|
||||
logger = Mock()
|
||||
@@ -82,7 +108,7 @@ async def test_box_service_defaults_session_id_from_query():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(SimpleNamespace(logger=logger), runtime=runtime)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'pwd', 'network': BoxNetworkMode.OFF.value}, make_query(7))
|
||||
@@ -97,8 +123,106 @@ async def test_box_service_fails_closed_when_backend_unavailable():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger, available=False)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(SimpleNamespace(logger=logger), runtime=runtime)
|
||||
service = BoxService(make_app(logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
with pytest.raises(BoxBackendUnavailableError):
|
||||
await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(9))
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_service_allows_host_mount_under_configured_root(tmp_path):
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
host_dir = tmp_path / 'mounted-workspace'
|
||||
host_dir.mkdir()
|
||||
service = BoxService(make_app(logger, [str(tmp_path)]), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool(
|
||||
{
|
||||
'cmd': 'pwd',
|
||||
'host_path': str(host_dir),
|
||||
'host_path_mode': BoxHostMountMode.READ_WRITE.value,
|
||||
},
|
||||
make_query(11),
|
||||
)
|
||||
|
||||
assert result['ok'] is True
|
||||
assert backend.start_calls == ['11']
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_service_uses_default_host_workspace_when_host_path_omitted(tmp_path):
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
host_dir = tmp_path / 'default-workspace'
|
||||
host_dir.mkdir()
|
||||
app = make_app(logger, [str(tmp_path)])
|
||||
app.instance_config.data['box']['default_host_workspace'] = str(host_dir)
|
||||
service = BoxService(app, runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'pwd'}, make_query(15))
|
||||
|
||||
assert result['ok'] is True
|
||||
assert backend.start_calls == ['15']
|
||||
assert backend.exec_calls == [('15', 'pwd')]
|
||||
assert backend.start_specs[0].host_path == os.path.realpath(host_dir)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_service_rejects_host_mount_outside_allowed_roots(tmp_path):
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
allowed_root = tmp_path / 'allowed'
|
||||
disallowed_root = tmp_path / 'disallowed'
|
||||
allowed_root.mkdir()
|
||||
disallowed_root.mkdir()
|
||||
service = BoxService(make_app(logger, [str(allowed_root)]), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
with pytest.raises(BoxValidationError):
|
||||
await service.execute_sandbox_tool(
|
||||
{
|
||||
'cmd': 'pwd',
|
||||
'host_path': str(disallowed_root),
|
||||
},
|
||||
make_query(12),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_runtime_rejects_host_mount_conflict_in_same_session(tmp_path):
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
await runtime.initialize()
|
||||
|
||||
first_host_dir = tmp_path / 'first'
|
||||
second_host_dir = tmp_path / 'second'
|
||||
first_host_dir.mkdir()
|
||||
second_host_dir.mkdir()
|
||||
|
||||
first = BoxSpec.model_validate(
|
||||
{
|
||||
'cmd': 'echo first',
|
||||
'session_id': 'req-mount',
|
||||
'host_path': os.path.realpath(first_host_dir),
|
||||
}
|
||||
)
|
||||
second = BoxSpec.model_validate(
|
||||
{
|
||||
'cmd': 'echo second',
|
||||
'session_id': 'req-mount',
|
||||
'host_path': os.path.realpath(second_host_dir),
|
||||
}
|
||||
)
|
||||
|
||||
await runtime.execute(first)
|
||||
|
||||
with pytest.raises(BoxSessionConflictError):
|
||||
await runtime.execute(second)
|
||||
|
||||
@@ -124,6 +124,13 @@ async def test_localagent_uses_sandbox_exec_for_exact_calculation():
|
||||
model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)),
|
||||
tool_mgr=tool_manager,
|
||||
rag_mgr=SimpleNamespace(),
|
||||
instance_config=SimpleNamespace(
|
||||
data={
|
||||
'box': {
|
||||
'default_host_workspace': '/home/yhh/workspace/box-demo',
|
||||
}
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
runner = LocalAgentRunner(app, pipeline_config={})
|
||||
@@ -144,6 +151,8 @@ async def test_localagent_uses_sandbox_exec_for_exact_calculation():
|
||||
message.role == 'system'
|
||||
and 'sandbox_exec' in str(message.content)
|
||||
and 'exact calculations' in str(message.content)
|
||||
and 'Unless the user explicitly asks for the script' in str(message.content)
|
||||
and '/workspace' in str(message.content)
|
||||
for message in first_request['messages']
|
||||
)
|
||||
assert [tool.name for tool in first_request['funcs']] == ['sandbox_exec']
|
||||
|
||||
Reference in New Issue
Block a user