feat(box/mcp): integrate MCP stdio with Box sandbox — auto-isolation, dep install, security

## Summary

  When Podman/Docker is available, all stdio-mode MCP servers now automatically
  run inside Box containers with dependency installation, path rewriting, and
  lifecycle management. When no container runtime exists, LangBot starts normally
  and stdio MCP falls back to host-direct execution.

  ## What changed

  ### MCP stdio → Box integration (mcp.py)
  - Add `MCPServerBoxConfig` pydantic model for structured box configuration
    with validation and defaults (network, host_path_mode, timeouts, resources)
  - Auto-infer `host_path` from command/args with venv detection: recognizes
    `.venv/bin/python` patterns and walks up to the project root
  - Rewrite host paths to container `/workspace` paths transparently
  - Replace venv python commands with container-native `python`
  - Auto-detect `pyproject.toml`/`setup.py`/`requirements.txt` and run
    `pip install` inside the container before starting the MCP server
  - Copy project to `/tmp` before install to handle read-only mounts
  - Add retry with exponential backoff (3 retries, 2s/4s/8s delays)
  - Add Box managed process health monitoring (poll every 5s)
  - Fix session leak: `_cleanup_box_stdio_session()` now runs in `finally`
    block of `_lifecycle_loop`, covering all exit paths
  - Fix retry logic: `_ready_event` is only set after all retries exhaust
    or on success, not on first failure
  - Enhance `get_runtime_info_dict()` with `box_session_id` and `box_enabled`

  ### Box security (security.py — new)
  - `validate_sandbox_security()` blocks dangerous host paths:
    `/etc`, `/proc`, `/sys`, `/dev`, `/root`, `/boot`, `/run`,
    docker.sock, podman socket
  - Called at the start of `CLISandboxBackend.start_session()`

  ### Box models (models.py)
  - Add `BoxHostMountMode.NONE` — skips volume mount entirely
  - Adjust `validate_host_mount_consistency` to allow arbitrary workdir
    when `host_path_mode=NONE`

  ### Box backend (backend.py)
  - Add `validate_sandbox_security()` call in `start_session()`
  - Add `langbot.box.config_hash` label on containers for drift detection
  - Handle `BoxHostMountMode.NONE` — skip `-v` mount arg
  - Add `cleanup_orphaned_containers()` to base class (no-op default) and
    CLI implementation (single batched `rm -f` command)

  ### Box runtime (runtime.py)
  - Call `cleanup_orphaned_containers()` during `initialize()` to remove
    lingering containers from previous runs

  ### Box service (service.py)
  - Graceful degradation: `initialize()` catches runtime errors and sets
    `available=False` instead of crashing LangBot startup
  - Add `available` property and guard on `execute_sandbox_tool()`
  - Add `skip_host_mount_validation` parameter to `build_spec()` and
    `create_session()` — MCP paths are admin-configured and trusted,
    bypassing `allowed_host_mount_roots` restrictions meant for
    LLM-generated sandbox_exec commands

  ### Default behavior
  - stdio MCP servers automatically use Box when `box_service.available`
    is True (Podman/Docker detected); no explicit `box` config needed
  - When no container runtime exists, falls back to host-direct stdio
  - MCP Box defaults: `network=on` (for pip install), `read_only_rootfs=false`
    (for site-packages), `host_path_mode=ro`, `startup_timeout=120s`

  ### Tests
  - `test_box_security.py`: blocked paths, safe paths, subpath rejection
  - `test_mcp_box_integration.py`: config model, path rewriting, venv
    unwrap, host_path inference, payload building, runtime info, box
    availability check
  - `test_box_service.py`: `BoxHostMountMode.NONE` validation tests
This commit is contained in:
youhuanghe
2026-03-21 04:34:08 +00:00
committed by WangCham
parent c802dc8029
commit e8aa7b2e6d
13 changed files with 1543 additions and 41 deletions

View File

@@ -0,0 +1,103 @@
from __future__ import annotations
import asyncio
import datetime as dt
from unittest.mock import Mock
import pytest
from langbot.pkg.box.backend import BaseSandboxBackend
from langbot.pkg.box.models import BoxManagedProcessSpec, BoxManagedProcessStatus, BoxSessionInfo, BoxSpec
from langbot.pkg.box.runtime import BoxRuntime
_UTC = dt.timezone.utc
class FakeManagedProcessBackend(BaseSandboxBackend):
name = 'fake-managed'
def __init__(self, logger: Mock):
super().__init__(logger)
async def is_available(self) -> bool:
return True
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
now = dt.datetime.now(_UTC)
return BoxSessionInfo(
session_id=spec.session_id,
backend_name=self.name,
backend_session_id=f'backend-{spec.session_id}',
image=spec.image,
network=spec.network,
host_path=spec.host_path,
host_path_mode=spec.host_path_mode,
cpus=spec.cpus,
memory_mb=spec.memory_mb,
pids_limit=spec.pids_limit,
read_only_rootfs=spec.read_only_rootfs,
created_at=now,
last_used_at=now,
)
async def exec(self, session: BoxSessionInfo, spec: BoxSpec):
raise NotImplementedError
async def stop_session(self, session: BoxSessionInfo):
return None
async def start_managed_process(self, session: BoxSessionInfo, spec: BoxManagedProcessSpec) -> asyncio.subprocess.Process:
return await asyncio.create_subprocess_exec(
'sh',
'-lc',
'cat',
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
@pytest.mark.asyncio
async def test_runtime_start_managed_process_tracks_status():
logger = Mock()
runtime = BoxRuntime(logger=logger, backends=[FakeManagedProcessBackend(logger)], session_ttl_sec=300)
await runtime.initialize()
session_spec = BoxSpec.model_validate({'cmd': 'echo bootstrap', 'session_id': 'mcp-session'})
await runtime.create_session(session_spec)
process_info = await runtime.start_managed_process(
'mcp-session',
BoxManagedProcessSpec(command='python', args=['-m', 'demo'], cwd='/workspace'),
)
assert process_info['session_id'] == 'mcp-session'
assert process_info['status'] == BoxManagedProcessStatus.RUNNING.value
assert process_info['command'] == 'python'
assert process_info['args'] == ['-m', 'demo']
queried = runtime.get_managed_process('mcp-session')
assert queried['status'] == BoxManagedProcessStatus.RUNNING.value
await runtime.shutdown()
@pytest.mark.asyncio
async def test_runtime_does_not_reap_session_with_running_managed_process():
logger = Mock()
runtime = BoxRuntime(logger=logger, backends=[FakeManagedProcessBackend(logger)], session_ttl_sec=1)
await runtime.initialize()
session_spec = BoxSpec.model_validate({'cmd': 'echo bootstrap', 'session_id': 'mcp-session'})
await runtime.create_session(session_spec)
await runtime.start_managed_process(
'mcp-session',
BoxManagedProcessSpec(command='python', args=['-m', 'demo'], cwd='/workspace'),
)
runtime._sessions['mcp-session'].info.last_used_at = dt.datetime.now(_UTC) - dt.timedelta(seconds=120)
await runtime._reap_expired_sessions_locked()
assert 'mcp-session' in runtime._sessions
await runtime.shutdown()

View File

@@ -0,0 +1,59 @@
from __future__ import annotations
import pytest
from langbot.pkg.box.errors import BoxValidationError
from langbot.pkg.box.models import BoxHostMountMode, BoxNetworkMode, BoxSpec
from langbot.pkg.box.security import BLOCKED_HOST_PATHS, validate_sandbox_security
def _make_spec(**overrides) -> BoxSpec:
defaults = {
'session_id': 'test-session',
'cmd': 'echo hi',
'image': 'python:3.11-slim',
}
defaults.update(overrides)
return BoxSpec(**defaults)
class TestValidateSandboxSecurity:
def test_no_host_path_passes(self):
spec = _make_spec(host_path=None)
validate_sandbox_security(spec) # should not raise
def test_safe_host_path_passes(self):
spec = _make_spec(host_path='/home/user/my-project')
validate_sandbox_security(spec) # should not raise
@pytest.mark.parametrize('blocked', [
'/etc',
'/proc',
'/sys',
'/dev',
'/root',
'/boot',
'/run',
'/var/run',
'/run/docker.sock',
'/var/run/docker.sock',
'/run/podman',
'/var/run/podman',
])
def test_blocked_paths_rejected(self, blocked):
spec = _make_spec(host_path=blocked)
with pytest.raises(BoxValidationError, match='blocked for security'):
validate_sandbox_security(spec)
def test_blocked_subpath_rejected(self):
spec = _make_spec(host_path='/etc/nginx')
with pytest.raises(BoxValidationError, match='blocked for security'):
validate_sandbox_security(spec)
def test_path_starting_with_blocked_prefix_but_different_dir_passes(self):
# /etcetera is NOT /etc
spec = _make_spec(host_path='/etcetera/data')
validate_sandbox_security(spec) # should not raise
def test_blocked_host_paths_is_frozenset(self):
assert isinstance(BLOCKED_HOST_PATHS, frozenset)

View File

@@ -19,6 +19,7 @@ from langbot.pkg.box.models import (
BoxExecutionResult,
BoxExecutionStatus,
BoxHostMountMode,
BoxManagedProcessSpec,
BoxNetworkMode,
BoxProfile,
BoxSessionInfo,
@@ -60,6 +61,12 @@ class _InProcessBoxRuntimeClient(BoxRuntimeClient):
async def create_session(self, spec):
return await self._runtime.create_session(spec)
async def start_managed_process(self, session_id: str, spec: BoxManagedProcessSpec):
return await self._runtime.start_managed_process(session_id, spec)
async def get_managed_process(self, session_id: str):
return self._runtime.get_managed_process(session_id)
def _can_open_test_socket() -> bool:
try:
@@ -1191,3 +1198,46 @@ async def test_remote_client_exec_raises_conflict_error():
await client.shutdown()
finally:
await server.close()
# ── BoxHostMountMode.NONE tests ─────────────────────────────────────
class TestBoxHostMountModeNone:
def test_none_mode_is_valid_enum(self):
assert BoxHostMountMode.NONE.value == 'none'
def test_spec_with_none_mode_skips_workdir_check(self):
"""When host_path_mode is NONE, workdir validation is skipped."""
spec = BoxSpec(
session_id='test',
cmd='echo hi',
host_path='/home/user/data',
host_path_mode=BoxHostMountMode.NONE,
workdir='/opt/custom', # Not under /workspace, should be allowed
)
assert spec.host_path_mode == BoxHostMountMode.NONE
assert spec.workdir == '/opt/custom'
def test_spec_with_rw_mode_requires_workspace_workdir(self):
"""When host_path_mode is RW, workdir must be under /workspace."""
with pytest.raises(Exception):
BoxSpec(
session_id='test',
cmd='echo hi',
host_path='/home/user/data',
host_path_mode=BoxHostMountMode.READ_WRITE,
workdir='/opt/custom',
)
def test_spec_with_ro_mode_requires_workspace_workdir(self):
"""When host_path_mode is RO, workdir must be under /workspace."""
with pytest.raises(Exception):
BoxSpec(
session_id='test',
cmd='echo hi',
host_path='/home/user/data',
host_path_mode=BoxHostMountMode.READ_ONLY,
workdir='/opt/custom',
)