fix(box): tighten sandbox exposure and restore box integration coverage

This commit is contained in:
youhuanghe
2026-03-24 04:01:16 +00:00
committed by WangCham
parent 63d22b1f8e
commit 2911220054
11 changed files with 127 additions and 48 deletions

View File

@@ -279,11 +279,6 @@ class BoxService:
default_host_workspace = os.path.join(self.shared_host_root, 'default')
return os.path.realpath(os.path.abspath(default_host_workspace))
def get_managed_skills_root(self) -> str | None:
if self.shared_host_root is None:
return None
return os.path.join(self.shared_host_root, 'skills')
def _ensure_default_host_workspace(self):
if self.default_host_workspace is None:
return

View File

@@ -2,8 +2,12 @@ from __future__ import annotations
import abc
import typing
from typing import TYPE_CHECKING
from ..core import app
if TYPE_CHECKING:
from ..core import app
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
preregistered_runners: list[typing.Type[RequestRunner]] = []
@@ -25,17 +29,17 @@ class RequestRunner(abc.ABC):
name: str = None
ap: app.Application
ap: 'app.Application'
pipeline_config: dict
def __init__(self, ap: app.Application, pipeline_config: dict):
def __init__(self, ap: 'app.Application', pipeline_config: dict):
self.ap = ap
self.pipeline_config = pipeline_config
@abc.abstractmethod
async def run(
self, query: core_entities.Query
) -> typing.AsyncGenerator[llm_entities.Message | llm_entities.MessageChunk, None]:
self, query: 'pipeline_query.Query'
) -> typing.AsyncGenerator['provider_message.Message | provider_message.MessageChunk', None]:
"""运行请求"""
pass

View File

@@ -2,12 +2,14 @@ from __future__ import annotations
import abc
import typing
from typing import TYPE_CHECKING
from langbot_plugin.api.entities.events import pipeline_query
from ...core import app
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
if TYPE_CHECKING:
from ...core import app
preregistered_loaders: list[typing.Type[ToolLoader]] = []
@@ -28,9 +30,9 @@ class ToolLoader(abc.ABC):
name: str = None
ap: app.Application
ap: 'app.Application'
def __init__(self, ap: app.Application):
def __init__(self, ap: 'app.Application'):
self.ap = ap
async def initialize(self):

View File

@@ -147,7 +147,6 @@ class RuntimeMCPSession:
try:
await box_service.create_session(
session_payload,
skip_host_mount_validation=True,
)
except Exception:
self.error_phase = MCPSessionErrorPhase.SESSION_CREATE
@@ -164,9 +163,7 @@ class RuntimeMCPSession:
exec_payload['cmd'] = install_cmd
exec_payload['timeout_sec'] = self.box_config.startup_timeout_sec or 120
try:
result = await box_service.client.execute(
box_service.build_spec(exec_payload, skip_host_mount_validation=True)
)
result = await box_service.client.execute(box_service.build_spec(exec_payload))
except Exception:
self.error_phase = MCPSessionErrorPhase.DEP_INSTALL
raise

View File

@@ -17,12 +17,14 @@ class NativeToolLoader(loader.ToolLoader):
self._sandbox_exec_tool: resource_tool.LLMTool | None = None
async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
if not self._is_sandbox_available():
return []
if self._sandbox_exec_tool is None:
self._sandbox_exec_tool = self._build_sandbox_exec_tool()
return [self._sandbox_exec_tool]
async def has_tool(self, name: str) -> bool:
return name == SANDBOX_EXEC_TOOL_NAME
return name == SANDBOX_EXEC_TOOL_NAME and self._is_sandbox_available()
async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query):
if name != SANDBOX_EXEC_TOOL_NAME:
@@ -37,6 +39,10 @@ class NativeToolLoader(loader.ToolLoader):
async def shutdown(self):
pass
def _is_sandbox_available(self) -> bool:
box_service = getattr(self.ap, 'box_service', None)
return bool(getattr(box_service, 'available', False))
def _build_sandbox_exec_tool(self) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=SANDBOX_EXEC_TOOL_NAME,

View File

@@ -1,27 +1,30 @@
from __future__ import annotations
import typing
from typing import TYPE_CHECKING
from ...core import app
from langbot.pkg.utils import importutil
from langbot.pkg.provider.tools import loaders
from langbot.pkg.provider.tools.loaders import mcp as mcp_loader, native as native_loader, plugin as plugin_loader
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
from langbot_plugin.api.entities.events import pipeline_query
if TYPE_CHECKING:
from ...core import app
importutil.import_modules_in_pkg(loaders)
class ToolManager:
"""LLM工具管理器"""
ap: app.Application
ap: 'app.Application'
native_tool_loader: native_loader.NativeToolLoader
plugin_tool_loader: plugin_loader.PluginToolLoader
mcp_tool_loader: mcp_loader.MCPLoader
def __init__(self, ap: app.Application):
def __init__(self, ap: 'app.Application'):
self.ap = ap
async def initialize(self):

View File

@@ -21,13 +21,13 @@ from types import SimpleNamespace
import pytest
from langbot.pkg.box.backend import BaseSandboxBackend
from langbot.pkg.box.client import ActionRPCBoxClient
from langbot.pkg.box.errors import BoxBackendUnavailableError
from langbot.pkg.box.models import BoxExecutionStatus, BoxNetworkMode, BoxSpec
from langbot.pkg.box.runtime import BoxRuntime
from langbot.pkg.box.server import BoxServerHandler
from langbot.pkg.box.service import BoxService
from langbot_plugin.box.backend import BaseSandboxBackend
from langbot_plugin.box.client import ActionRPCBoxClient
from langbot_plugin.box.errors import BoxBackendUnavailableError
from langbot_plugin.box.models import BoxExecutionStatus, BoxNetworkMode, BoxSpec
from langbot_plugin.box.runtime import BoxRuntime
from langbot_plugin.box.server import BoxServerHandler
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query

View File

@@ -22,11 +22,11 @@ import aiohttp
import pytest
from aiohttp.test_utils import TestServer
from langbot.pkg.box.client import ActionRPCBoxClient
from langbot.pkg.box.errors import BoxSessionNotFoundError
from langbot.pkg.box.models import BoxManagedProcessSpec, BoxManagedProcessStatus, BoxSpec
from langbot.pkg.box.runtime import BoxRuntime
from langbot.pkg.box.server import BoxServerHandler, create_ws_relay_app
from langbot_plugin.box.client import ActionRPCBoxClient
from langbot_plugin.box.errors import BoxSessionNotFoundError
from langbot_plugin.box.models import BoxManagedProcessSpec, BoxManagedProcessStatus, BoxSpec
from langbot_plugin.box.runtime import BoxRuntime
from langbot_plugin.box.server import BoxServerHandler, create_ws_relay_app
_logger = logging.getLogger('test.box.mcp_integration')

View File

@@ -1,22 +1,16 @@
from __future__ import annotations
import json
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
# TODO: unskip once runner.py adopts TYPE_CHECKING guard to break the circular import
pytest.skip(
'circular import between runner ↔ app; will be unblocked once resolved',
allow_module_level=True,
)
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.provider.session as provider_session
import json # noqa: E402
from types import SimpleNamespace # noqa: E402
from unittest.mock import AsyncMock, Mock # noqa: E402
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query # noqa: E402
import langbot_plugin.api.entities.builtin.provider.message as provider_message # noqa: E402
import langbot_plugin.api.entities.builtin.provider.session as provider_session # noqa: E402
from langbot.pkg.provider.runners.localagent import LocalAgentRunner # noqa: E402
from langbot.pkg.provider.runners.localagent import LocalAgentRunner
class RecordingProvider:
@@ -168,6 +162,8 @@ async def test_localagent_uses_sandbox_exec_for_exact_calculation():
return_value=(
'When sandbox_exec is available, use it for exact calculations, statistics, '
'structured data parsing, and code execution instead of estimating mentally. '
'Unless the user explicitly asks for the script, code, or implementation details, '
'do not include the generated script in the final answer. '
'A default host workspace is mounted at /workspace for file tasks.'
)
),

View File

@@ -12,7 +12,9 @@ import os
import sys
import tempfile
import types
from unittest.mock import Mock
from contextlib import asynccontextmanager
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
@@ -576,3 +578,58 @@ class TestBoxConfigParsing:
assert isinstance(s.box_config, mcp_module.MCPServerBoxConfig)
assert s.box_config.image is None
assert s.box_config.host_path_mode == 'ro'
@pytest.mark.asyncio
async def test_init_box_stdio_server_keeps_host_mount_validation_enabled(mcp_module):
class FakeClientSession:
def __init__(self, *_args):
pass
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc, tb):
return False
async def initialize(self):
return None
@asynccontextmanager
async def fake_websocket_client(_url: str):
yield ('read-stream', 'write-stream')
mcp_module.ClientSession = FakeClientSession
mcp_module.websocket_client = fake_websocket_client
ap = _make_ap()
ap.box_service.available = True
ap.box_service.create_session = AsyncMock(return_value={})
ap.box_service.build_spec = Mock(return_value='validated-spec')
ap.box_service.client = SimpleNamespace(
execute=AsyncMock(return_value=SimpleNamespace(ok=True, stderr='', exit_code=0))
)
ap.box_service.start_managed_process = AsyncMock(return_value={})
ap.box_service.get_managed_process_websocket_url = Mock(return_value='ws://box.example/process')
session = _make_session(
mcp_module,
{
'name': 'test',
'uuid': 'u1',
'mode': 'stdio',
'command': '/home/user/mcp/.venv/bin/python',
'args': ['/home/user/mcp/server.py'],
'box': {'host_path': '/home/user/mcp'},
},
ap=ap,
)
session._detect_install_command = Mock(return_value='pip install --no-cache-dir -r /workspace/requirements.txt')
await session._init_box_stdio_server()
await session.exit_stack.aclose()
assert ap.box_service.create_session.await_count == 1
assert ap.box_service.create_session.await_args.kwargs.get('skip_host_mount_validation', False) is False
assert ap.box_service.build_spec.call_count == 1
assert ap.box_service.build_spec.call_args.kwargs.get('skip_host_mount_validation', False) is False

View File

@@ -7,6 +7,7 @@ import pytest
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
from langbot.pkg.provider.tools.loaders.native import NativeToolLoader
from langbot.pkg.provider.tools.toolmgr import ToolManager
@@ -61,3 +62,21 @@ async def test_tool_manager_routes_native_tool_calls():
result = await manager.execute_func_call('sandbox_exec', {'cmd': 'pwd'}, query=Mock())
assert result == {'backend': 'fake'}
@pytest.mark.asyncio
async def test_native_tool_loader_hides_sandbox_exec_when_box_unavailable():
loader = NativeToolLoader(SimpleNamespace(box_service=SimpleNamespace(available=False)))
assert await loader.get_tools() == []
assert await loader.has_tool('sandbox_exec') is False
@pytest.mark.asyncio
async def test_native_tool_loader_exposes_sandbox_exec_when_box_available():
loader = NativeToolLoader(SimpleNamespace(box_service=SimpleNamespace(available=True)))
tools = await loader.get_tools()
assert [tool.name for tool in tools] == ['sandbox_exec']
assert await loader.has_tool('sandbox_exec') is True