From ec2d21fe635b2cadd5a8e9529504efac3a13d25f Mon Sep 17 00:00:00 2001 From: Junyan Qin Date: Wed, 20 May 2026 17:07:53 +0800 Subject: [PATCH] feat(box): add box.enabled toggle and gate consumers on availability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the Box sandbox runtime optional. When ``box.enabled`` is false in config (or when an enabled Box fails to connect), every dependent feature degrades to the same disabled-state UX rather than crashing or silently falling back to less safe code paths. Backend: - config.yaml: new top-level ``box.enabled: true`` flag (default true) - BoxService: - Read box.enabled on construction - initialize() short-circuits when disabled — no remote WS connect, no stdio subprocess fork - _on_runtime_disconnect is a no-op when disabled (no reconnect loop on a deliberately-off service) - get_status() now exposes ``enabled`` so the frontend can tell "disabled in config" from "configured but failed" - MCP stdio loader (mcp_stdio.uses_box_stdio): requires box_service to be available, not just installed - MCP _init_stdio_python_server: when ap.box_service exists but is unavailable, refuse the stdio server with an actionable error instead of silently falling through to host-stdio (which bypasses the sandbox the operator asked for). Setups without ap.box_service installed at all keep the legacy host-stdio fallback for pre-Box dev mode - SkillService._require_box_for_write: refuses create/update/install/ write_skill_file when ap.box_service is installed but unavailable. Distinguishes disabled vs failed in the error message so the UI can surface the right hint. Legacy setups (no ap.box_service) keep the local fallback path — that distinction is what keeps the existing local-skills tests valid Tests: - Box disabled-state behavior (4 cases) - Skill write refusal in disabled & failed states (7 cases) - MCP stdio runtime info policy updated to match new refuse-when-down behavior Co-Authored-By: Claude Opus 4.7 (1M context) --- src/langbot/pkg/api/http/service/skill.py | 33 ++++++++ src/langbot/pkg/box/service.py | 40 +++++++++ src/langbot/pkg/provider/tools/loaders/mcp.py | 20 +++++ .../pkg/provider/tools/loaders/mcp_stdio.py | 9 +- src/langbot/templates/config.yaml | 7 ++ tests/unit_tests/box/test_box_service.py | 61 ++++++++++++++ .../provider/test_mcp_box_integration.py | 11 ++- tests/unit_tests/test_skill_service.py | 84 +++++++++++++++++++ 8 files changed, 261 insertions(+), 4 deletions(-) diff --git a/src/langbot/pkg/api/http/service/skill.py b/src/langbot/pkg/api/http/service/skill.py index fa6c5bdc..d02ae45c 100644 --- a/src/langbot/pkg/api/http/service/skill.py +++ b/src/langbot/pkg/api/http/service/skill.py @@ -74,6 +74,34 @@ class SkillService: return box_service return None + def _require_box_for_write(self, action: str) -> None: + """Refuse a write operation when Box is installed but unavailable. + + Distinguishes three states: + - Box available → no-op (caller proceeds via the Box delegate path). + - Box installed but disabled by config or currently failed → raise + with a clear, actionable message. The frontend translates this to + a banner / disabled affordance. + - Box not installed at all (legacy / pre-Box dev mode, no + ``ap.box_service`` attribute) → also no-op so the local-skills + fallback still works for that minimal setup. + """ + ap_box = getattr(self.ap, 'box_service', None) + if ap_box is None: + return # legacy mode, allow local fallback + if getattr(ap_box, 'available', False): + return # Box is up, delegate path will be used + if not getattr(ap_box, 'enabled', True): + reason = 'disabled in config (box.enabled = false)' + else: + connector_error = getattr(ap_box, '_connector_error', '') or 'currently unavailable' + reason = f'unavailable: {connector_error}' + raise ValueError( + f'{action} requires the Box runtime, which is {reason}. ' + f'Enable Box in config.yaml (box.enabled = true) and ensure the ' + f'runtime is reachable before retrying.' + ) + @staticmethod def _serialize_skill(skill: dict) -> dict: return {field: skill.get(field) for field in _PUBLIC_SKILL_FIELDS if field in skill} @@ -100,6 +128,7 @@ class SkillService: return await self.get_skill(name) async def create_skill(self, data: dict) -> dict: + self._require_box_for_write('Creating a skill') box_service = self._box_service() if box_service is not None: created = await box_service.create_skill(data) @@ -146,6 +175,7 @@ class SkillService: return created async def update_skill(self, skill_name: str, data: dict) -> dict: + self._require_box_for_write('Editing a skill') box_service = self._box_service() if box_service is not None: updated = await box_service.update_skill(skill_name, data) @@ -266,6 +296,7 @@ class SkillService: } async def write_skill_file(self, skill_name: str, path: str, content: str) -> dict: + self._require_box_for_write('Editing skill files') box_service = self._box_service() if box_service is not None: result = await box_service.write_skill_file(skill_name, path, content) @@ -294,6 +325,7 @@ class SkillService: } async def install_from_github(self, data: dict) -> list[dict]: + self._require_box_for_write('Installing a skill from GitHub') owner = str(data['owner']).strip() repo = str(data['repo']).strip() release_tag = str(data.get('release_tag', '')).strip() @@ -375,6 +407,7 @@ class SkillService: source_paths: list[str] | None = None, source_path: str = '', ) -> list[dict]: + self._require_box_for_write('Installing a skill from upload') box_service = self._box_service() if box_service is not None: installed = await box_service.install_skill_zip( diff --git a/src/langbot/pkg/box/service.py b/src/langbot/pkg/box/service.py index 038d8119..77b7ad92 100644 --- a/src/langbot/pkg/box/service.py +++ b/src/langbot/pkg/box/service.py @@ -46,8 +46,12 @@ class BoxService: output_limit_chars: int = 4000, ): self.ap = ap + self._enabled = self._load_enabled() self._runtime_connector: BoxRuntimeConnector | None = None if client is None: + # Always construct a connector — its __init__ is side-effect free + # (no I/O, no subprocess). When ``box.enabled = false`` we simply + # skip ``connector.initialize()`` so no connection is attempted. self._runtime_connector = BoxRuntimeConnector(ap, runtime_disconnect_callback=self._on_runtime_disconnect) client = self._runtime_connector.client self.client = client @@ -64,8 +68,27 @@ class BoxService: self._connector_error: str = '' self._reconnecting = False + @property + def enabled(self) -> bool: + """Whether Box is enabled in config. False means the operator has + deliberately turned the sandbox off via ``box.enabled = false``. + Disabled and "enabled but unavailable" are reported as the same + ``available = False`` to consumers, but distinguished in get_status.""" + return self._enabled + async def initialize(self): self._ensure_default_workspace() + if not self._enabled: + # Disabled by config: do NOT connect to a remote runtime, do NOT + # fork a stdio subprocess. Every consumer of box_service should + # gate on ``available`` and degrade gracefully. + self._available = False + self._connector_error = 'Box runtime is disabled in config (box.enabled = false)' + self.ap.logger.info( + 'Box runtime disabled by config; sandbox features (exec/read/write/edit, ' + 'skill add/edit, stdio MCP) will be unavailable.' + ) + return try: if self._runtime_connector is not None: await self._runtime_connector.initialize() @@ -86,7 +109,11 @@ class BoxService: """Called by the connector when the Box runtime connection drops. Spawns a background reconnection loop so the caller is not blocked. + Skipped entirely when Box is disabled by config — that path should + never have connected in the first place. """ + if not self._enabled: + return if self._reconnecting: return # Another reconnect loop is already running self._reconnecting = True @@ -523,6 +550,16 @@ class BoxService: skills_root = os.path.join(self.host_root, skills_root) return os.path.realpath(os.path.abspath(skills_root)) + def _load_enabled(self) -> bool: + """Read ``box.enabled`` (top-level, not ``box.local.*``). Default True + — disabling is opt-in. Accepts bool, ``'true'``/``'false'`` strings, + and the standard env-overridden truthy values that + ``LoadConfigStage._apply_env_overrides_to_config`` produces.""" + raw = _get_box_config(self.ap).get('enabled', True) + if isinstance(raw, bool): + return raw + return str(raw).strip().lower() not in ('false', '0', 'no', 'off', '') + def _load_custom_image(self) -> str | None: raw = str(self._local_config().get('image', '') or '').strip() return raw or None @@ -714,6 +751,7 @@ class BoxService: if not self._available: return { 'available': False, + 'enabled': self._enabled, 'profile': self.profile.name, 'recent_error_count': len(self._recent_errors), 'connector_error': self._connector_error, @@ -725,6 +763,7 @@ class BoxService: # heartbeat hasn't flipped _available yet. return { 'available': False, + 'enabled': self._enabled, 'profile': self.profile.name, 'recent_error_count': len(self._recent_errors), 'connector_error': str(exc), @@ -732,6 +771,7 @@ class BoxService: return { **runtime_status, 'available': True, + 'enabled': self._enabled, 'profile': self.profile.name, 'recent_error_count': len(self._recent_errors), } diff --git a/src/langbot/pkg/provider/tools/loaders/mcp.py b/src/langbot/pkg/provider/tools/loaders/mcp.py index 0e5c07c2..0ecc3241 100644 --- a/src/langbot/pkg/provider/tools/loaders/mcp.py +++ b/src/langbot/pkg/provider/tools/loaders/mcp.py @@ -90,6 +90,26 @@ class RuntimeMCPSession: await self._box_stdio_runtime.initialize() return + # Box is configured (ap.box_service exists) but currently unavailable + # (disabled by config or connection failed). Refuse stdio MCP rather + # than silently falling through to host-stdio — the operator asked + # for the sandbox and the failure mode should be visible. + box_service = getattr(self.ap, 'box_service', None) + if box_service is not None and not getattr(box_service, 'available', False): + connector_error = getattr(box_service, '_connector_error', '') or 'currently unavailable' + if not getattr(box_service, 'enabled', True): + reason = 'disabled in config (box.enabled = false)' + else: + reason = f'unavailable: {connector_error}' + raise RuntimeError( + f'Stdio MCP server "{self.server_name}" requires the Box runtime, ' + f'which is {reason}. Either enable Box in config.yaml ' + f'(box.enabled = true) and ensure the runtime is healthy, ' + f'or switch this MCP server to http/sse transport.' + ) + + # Legacy: no box_service installed at all (pre-Box dev mode). Fall + # through to host-stdio for backward compatibility. server_params = StdioServerParameters( command=self.server_config['command'], args=self.server_config['args'], diff --git a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py index 95082e15..2af7b938 100644 --- a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py +++ b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py @@ -105,7 +105,14 @@ class BoxStdioSessionRuntime: def uses_box_stdio(self) -> bool: if self.server_config.get('mode') != 'stdio': return False - return getattr(self.ap, 'box_service', None) is not None + box_service = getattr(self.ap, 'box_service', None) + if box_service is None: + return False + # When Box is configured but currently unavailable (disabled or + # connection failed), do NOT silently fall through to host-stdio — + # that would bypass the sandbox the operator asked for. The caller + # is expected to refuse the stdio MCP server with a clear error. + return bool(getattr(box_service, 'available', False)) async def initialize(self) -> None: await self._wait_for_box_runtime() diff --git a/src/langbot/templates/config.yaml b/src/langbot/templates/config.yaml index ec70cd7f..ca63b43e 100644 --- a/src/langbot/templates/config.yaml +++ b/src/langbot/templates/config.yaml @@ -105,6 +105,13 @@ monitoring: # Number of expired rows to delete per table batch delete_batch_size: 1000 box: + # Master switch for the Box sandbox runtime. When false, LangBot does NOT + # attempt to connect to a remote Box runtime nor start a local stdio Box + # subprocess. Disabling Box also disables every feature that depends on it: + # the native sandbox tools (exec/read/write/edit/glob/grep), the activate + # skill tool, skill add/edit, and stdio-mode MCP servers. Skills can still + # be listed read-only and http/sse MCP servers continue to work. + enabled: true backend: 'local' # 'local' (Docker/nsjail), 'docker', 'nsjail', or 'e2b'. BOX_BACKEND env var takes precedence. runtime: endpoint: '' # External Box Runtime base URL, e.g. 'ws://127.0.0.1:5410'. Leave empty for local auto-managed runtime. diff --git a/tests/unit_tests/box/test_box_service.py b/tests/unit_tests/box/test_box_service.py index 1f87b073..e154fe33 100644 --- a/tests/unit_tests/box/test_box_service.py +++ b/tests/unit_tests/box/test_box_service.py @@ -152,8 +152,10 @@ def make_app( profile: str = 'default', host_root: str = '', workspace_quota_mb: int | None = None, + enabled: bool = True, ): box_config = { + 'enabled': enabled, 'backend': 'local', 'runtime': {'endpoint': ''}, 'local': { @@ -1225,6 +1227,65 @@ class TestBoxHostMountModeNone: ) +class TestBoxDisabledByConfig: + """``box.enabled = false`` must keep the BoxService usable as a status + surface but skip every connection attempt and report unavailable.""" + + @pytest.mark.asyncio + async def test_initialize_skips_connector_when_disabled(self): + logger = Mock() + app = make_app(logger, enabled=False) + client = Mock(spec=BoxRuntimeClient) + client.initialize = AsyncMock() + service = BoxService(app, client=client) + + await service.initialize() + + # The client must not be touched; we did not even open a connection. + client.initialize.assert_not_awaited() + assert service.enabled is False + assert service.available is False + # The reason is captured so the dashboard / UI can show it. + assert 'disabled' in service._connector_error.lower() + + @pytest.mark.asyncio + async def test_get_status_reports_disabled(self): + logger = Mock() + service = BoxService(make_app(logger, enabled=False), client=Mock(spec=BoxRuntimeClient)) + await service.initialize() + + status = await service.get_status() + + assert status['available'] is False + assert status['enabled'] is False + assert 'disabled' in status['connector_error'].lower() + + @pytest.mark.asyncio + async def test_get_status_distinguishes_enabled_but_unavailable(self): + logger = Mock() + client = Mock(spec=BoxRuntimeClient) + client.initialize = AsyncMock(side_effect=RuntimeError('docker daemon not running')) + service = BoxService(make_app(logger, enabled=True), client=client) + + await service.initialize() + + status = await service.get_status() + assert status['available'] is False + assert status['enabled'] is True + assert 'docker daemon' in status['connector_error'] + + @pytest.mark.asyncio + async def test_disconnect_callback_is_no_op_when_disabled(self): + logger = Mock() + service = BoxService(make_app(logger, enabled=False), client=Mock(spec=BoxRuntimeClient)) + + # Should be safe to fire; must not flip reconnect state on a disabled + # service. If it tried to schedule a reconnect, the test would hang. + await service._on_runtime_disconnect(connector=Mock()) + + assert service._reconnecting is False + + class TestBuildSkillExtraMounts: """Robustness of skill mount construction against a stale skill cache. diff --git a/tests/unit_tests/provider/test_mcp_box_integration.py b/tests/unit_tests/provider/test_mcp_box_integration.py index 31a7c15b..0123af4b 100644 --- a/tests/unit_tests/provider/test_mcp_box_integration.py +++ b/tests/unit_tests/provider/test_mcp_box_integration.py @@ -561,7 +561,12 @@ class TestGetRuntimeInfoDict: assert info['box_session_id'] == 'mcp-shared' assert info['box_enabled'] is True - def test_stdio_session_waits_for_unavailable_box_runtime(self, mcp_module): + def test_stdio_session_refuses_when_box_unavailable(self, mcp_module): + """Policy: when Box is configured but unavailable (disabled in config + OR connection failed), stdio MCP servers are NOT treated as box-stdio. + ``_init_stdio_python_server`` will raise a clear refusal at start + time; until then, the runtime info simply omits box_session_id so the + UI can render the disabled state cleanly.""" ap = _make_ap() ap.box_service.available = False s = _make_session( @@ -576,8 +581,8 @@ class TestGetRuntimeInfoDict: ap=ap, ) info = s.get_runtime_info_dict() - assert info['box_session_id'] == 'mcp-shared' - assert info['box_enabled'] is True + assert 'box_session_id' not in info + assert 'box_enabled' not in info def test_stdio_session_without_box_service_uses_local_stdio(self, mcp_module): ap = _make_ap() diff --git a/tests/unit_tests/test_skill_service.py b/tests/unit_tests/test_skill_service.py index 178563f3..fcabc10d 100644 --- a/tests/unit_tests/test_skill_service.py +++ b/tests/unit_tests/test_skill_service.py @@ -72,6 +72,90 @@ def test_scan_directory_errors_when_skill_is_deeper_than_two_levels(skill_servic skill_service.scan_directory(str(tmp_path)) +class TestRequireBoxForWrite: + """Writes must refuse when ``ap.box_service`` is installed but unavailable + (disabled in config OR connection failed). Legacy setups without + ``ap.box_service`` continue to use the local fallback.""" + + def _ap_with_disabled_box(self): + return SimpleNamespace( + skill_mgr=SimpleNamespace(reload_skills=AsyncMock()), + box_service=SimpleNamespace( + available=False, + enabled=False, + _connector_error='Box runtime is disabled in config (box.enabled = false)', + ), + ) + + def _ap_with_failed_box(self): + return SimpleNamespace( + skill_mgr=SimpleNamespace(reload_skills=AsyncMock()), + box_service=SimpleNamespace( + available=False, + enabled=True, + _connector_error='docker daemon not running', + ), + ) + + @pytest.mark.asyncio + async def test_create_skill_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='disabled in config'): + await service.create_skill({'name': 'x'}) + + @pytest.mark.asyncio + async def test_create_skill_refused_when_box_failed(self): + service = SkillService(self._ap_with_failed_box()) + with pytest.raises(ValueError, match='docker daemon not running'): + await service.create_skill({'name': 'x'}) + + @pytest.mark.asyncio + async def test_update_skill_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='Editing a skill requires the Box runtime'): + await service.update_skill('x', {}) + + @pytest.mark.asyncio + async def test_write_skill_file_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='Editing skill files requires the Box runtime'): + await service.write_skill_file('x', 'a.txt', 'hi') + + @pytest.mark.asyncio + async def test_install_from_github_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='Installing a skill from GitHub'): + await service.install_from_github({'owner': 'o', 'repo': 'r', 'asset_url': 'https://example/x.zip'}) + + @pytest.mark.asyncio + async def test_install_from_zip_upload_refused_when_box_disabled(self): + service = SkillService(self._ap_with_disabled_box()) + with pytest.raises(ValueError, match='Installing a skill from upload'): + await service.install_from_zip_upload(file_bytes=b'', filename='x.zip') + + @pytest.mark.asyncio + async def test_legacy_setup_without_box_service_still_allows_local_create(self, tmp_path, monkeypatch): + """Setups that never installed ap.box_service (pre-Box dev mode) keep + using the local-skills fallback path — that's the whole point of the + ``getattr`` distinguisher in _require_box_for_write.""" + monkeypatch.setenv('LANGBOT_DATA_ROOT', str(tmp_path / 'data')) + service = SkillService(SimpleNamespace(skill_mgr=SimpleNamespace(reload_skills=AsyncMock()))) + service.get_skill_by_name = AsyncMock(return_value=None) + service.get_skill = AsyncMock( + return_value={ + 'name': 'local-skill', + 'package_root': str(tmp_path / 'data' / 'skills' / 'local-skill'), + 'description': '', + 'instructions': '', + } + ) + + # Does not raise — gate is a no-op without ap.box_service. + await service.create_skill( + {'name': 'local-skill', 'display_name': 'Local', 'description': '', 'instructions': 'hi'} + ) + + @pytest.mark.asyncio async def test_create_skill_import_preserves_existing_skill_content_when_form_fields_blank(tmp_path, monkeypatch): source_dir = tmp_path / 'external-skills' / 'manual-skill'