fix(box): tighten sandbox exposure and restore box integration coverage

This commit is contained in:
youhuanghe
2026-03-24 04:01:16 +00:00
committed by WangCham
parent 63d22b1f8e
commit 2911220054
11 changed files with 127 additions and 48 deletions
@@ -21,13 +21,13 @@ from types import SimpleNamespace
import pytest
from langbot.pkg.box.backend import BaseSandboxBackend
from langbot.pkg.box.client import ActionRPCBoxClient
from langbot.pkg.box.errors import BoxBackendUnavailableError
from langbot.pkg.box.models import BoxExecutionStatus, BoxNetworkMode, BoxSpec
from langbot.pkg.box.runtime import BoxRuntime
from langbot.pkg.box.server import BoxServerHandler
from langbot.pkg.box.service import BoxService
from langbot_plugin.box.backend import BaseSandboxBackend
from langbot_plugin.box.client import ActionRPCBoxClient
from langbot_plugin.box.errors import BoxBackendUnavailableError
from langbot_plugin.box.models import BoxExecutionStatus, BoxNetworkMode, BoxSpec
from langbot_plugin.box.runtime import BoxRuntime
from langbot_plugin.box.server import BoxServerHandler
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
@@ -22,11 +22,11 @@ import aiohttp
import pytest
from aiohttp.test_utils import TestServer
from langbot.pkg.box.client import ActionRPCBoxClient
from langbot.pkg.box.errors import BoxSessionNotFoundError
from langbot.pkg.box.models import BoxManagedProcessSpec, BoxManagedProcessStatus, BoxSpec
from langbot.pkg.box.runtime import BoxRuntime
from langbot.pkg.box.server import BoxServerHandler, create_ws_relay_app
from langbot_plugin.box.client import ActionRPCBoxClient
from langbot_plugin.box.errors import BoxSessionNotFoundError
from langbot_plugin.box.models import BoxManagedProcessSpec, BoxManagedProcessStatus, BoxSpec
from langbot_plugin.box.runtime import BoxRuntime
from langbot_plugin.box.server import BoxServerHandler, create_ws_relay_app
_logger = logging.getLogger('test.box.mcp_integration')
@@ -1,22 +1,16 @@
from __future__ import annotations
import json
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
# TODO: unskip once runner.py adopts TYPE_CHECKING guard to break the circular import
pytest.skip(
'circular import between runner ↔ app; will be unblocked once resolved',
allow_module_level=True,
)
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.provider.session as provider_session
import json # noqa: E402
from types import SimpleNamespace # noqa: E402
from unittest.mock import AsyncMock, Mock # noqa: E402
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query # noqa: E402
import langbot_plugin.api.entities.builtin.provider.message as provider_message # noqa: E402
import langbot_plugin.api.entities.builtin.provider.session as provider_session # noqa: E402
from langbot.pkg.provider.runners.localagent import LocalAgentRunner # noqa: E402
from langbot.pkg.provider.runners.localagent import LocalAgentRunner
class RecordingProvider:
@@ -168,6 +162,8 @@ async def test_localagent_uses_sandbox_exec_for_exact_calculation():
return_value=(
'When sandbox_exec is available, use it for exact calculations, statistics, '
'structured data parsing, and code execution instead of estimating mentally. '
'Unless the user explicitly asks for the script, code, or implementation details, '
'do not include the generated script in the final answer. '
'A default host workspace is mounted at /workspace for file tasks.'
)
),
@@ -12,7 +12,9 @@ import os
import sys
import tempfile
import types
from unittest.mock import Mock
from contextlib import asynccontextmanager
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
@@ -576,3 +578,58 @@ class TestBoxConfigParsing:
assert isinstance(s.box_config, mcp_module.MCPServerBoxConfig)
assert s.box_config.image is None
assert s.box_config.host_path_mode == 'ro'
@pytest.mark.asyncio
async def test_init_box_stdio_server_keeps_host_mount_validation_enabled(mcp_module):
class FakeClientSession:
def __init__(self, *_args):
pass
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc, tb):
return False
async def initialize(self):
return None
@asynccontextmanager
async def fake_websocket_client(_url: str):
yield ('read-stream', 'write-stream')
mcp_module.ClientSession = FakeClientSession
mcp_module.websocket_client = fake_websocket_client
ap = _make_ap()
ap.box_service.available = True
ap.box_service.create_session = AsyncMock(return_value={})
ap.box_service.build_spec = Mock(return_value='validated-spec')
ap.box_service.client = SimpleNamespace(
execute=AsyncMock(return_value=SimpleNamespace(ok=True, stderr='', exit_code=0))
)
ap.box_service.start_managed_process = AsyncMock(return_value={})
ap.box_service.get_managed_process_websocket_url = Mock(return_value='ws://box.example/process')
session = _make_session(
mcp_module,
{
'name': 'test',
'uuid': 'u1',
'mode': 'stdio',
'command': '/home/user/mcp/.venv/bin/python',
'args': ['/home/user/mcp/server.py'],
'box': {'host_path': '/home/user/mcp'},
},
ap=ap,
)
session._detect_install_command = Mock(return_value='pip install --no-cache-dir -r /workspace/requirements.txt')
await session._init_box_stdio_server()
await session.exit_stack.aclose()
assert ap.box_service.create_session.await_count == 1
assert ap.box_service.create_session.await_args.kwargs.get('skip_host_mount_validation', False) is False
assert ap.box_service.build_spec.call_count == 1
assert ap.box_service.build_spec.call_args.kwargs.get('skip_host_mount_validation', False) is False
@@ -7,6 +7,7 @@ import pytest
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
from langbot.pkg.provider.tools.loaders.native import NativeToolLoader
from langbot.pkg.provider.tools.toolmgr import ToolManager
@@ -61,3 +62,21 @@ async def test_tool_manager_routes_native_tool_calls():
result = await manager.execute_func_call('sandbox_exec', {'cmd': 'pwd'}, query=Mock())
assert result == {'backend': 'fake'}
@pytest.mark.asyncio
async def test_native_tool_loader_hides_sandbox_exec_when_box_unavailable():
loader = NativeToolLoader(SimpleNamespace(box_service=SimpleNamespace(available=False)))
assert await loader.get_tools() == []
assert await loader.has_tool('sandbox_exec') is False
@pytest.mark.asyncio
async def test_native_tool_loader_exposes_sandbox_exec_when_box_available():
loader = NativeToolLoader(SimpleNamespace(box_service=SimpleNamespace(available=True)))
tools = await loader.get_tools()
assert [tool.name for tool in tools] == ['sandbox_exec']
assert await loader.has_tool('sandbox_exec') is True