fix(box): downgrade get_status.available when backend probed unavailable

Until now ``BoxService.get_status`` returned ``available: true`` whenever
the runtime connector was healthy, even if the runtime itself reported
``backend: { available: false }`` (operator selected nsjail without the
binary, Docker daemon crashed mid-session, E2B credentials wrong, ...).
The dashboard / ``useBoxStatus`` hook / skill_service gate consumed the
top-level flag and showed "connected" while every actual call to native
exec or skill management would fail.

The native-tool loader already polled ``status.backend.available``
independently and hid its tools correctly, but every other consumer
(dashboard banner, the disabled-state hint, the LLM-facing message)
disagreed with it.

Combine the two in the payload: ``available = self._available AND
status.backend.available``. When ``backend.available`` is false we now
also surface a ``connector_error`` that names the backend
("Configured sandbox backend \"nsjail\" is unavailable") so the dialog
shows the actionable reason instead of an empty error pane. The
detailed ``backend`` object is preserved unchanged for the dialog.

Internal ``box_service.available`` (used by ``skill_service`` writes,
``mcp_stdio.uses_box_stdio``, the reconnect callback) is intentionally
NOT changed — it still tracks connector health only, so a backend blip
does not trigger spurious reconnect loops.

Tests:
- ``test_get_status_downgrades_available_when_backend_dead`` — exercise
  the new branch (connector OK, backend.available=false → top-level
  available=false, connector_error mentions the backend name)
- ``test_get_status_keeps_available_true_when_backend_ok`` — guard
  against regressing the happy path

Live-verified with ``box.backend: nsjail`` on macOS (no nsjail binary):
``GET /api/v1/box/status`` now returns ``available: false`` with the
named connector_error, instead of the previous misleading
``available: true``.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Junyan Qin
2026-05-20 23:38:02 +08:00
parent 68bd786f39
commit a2a9f426fa
2 changed files with 65 additions and 2 deletions

View File

@@ -768,10 +768,27 @@ class BoxService:
'recent_error_count': len(self._recent_errors),
'connector_error': str(exc),
}
return {
# Backend state can be unavailable even when the connector is healthy
# (operator selected nsjail but the binary is missing, Docker daemon
# went down after the runtime started, E2B credentials wrong, ...).
# Report the combined state in the top-level ``available`` so the
# frontend banner / ``useBoxStatus`` hook / native-tool gate all
# agree on "actually usable" rather than "connector alive". The
# detailed ``backend`` object stays in the payload so the dialog
# can still show which backend was tried.
backend_info = runtime_status.get('backend') if isinstance(runtime_status, dict) else None
backend_ok = bool(backend_info and backend_info.get('available', False))
payload = {
**runtime_status,
'available': True,
'available': backend_ok,
'enabled': self._enabled,
'profile': self.profile.name,
'recent_error_count': len(self._recent_errors),
}
if not backend_ok and 'connector_error' not in payload:
backend_name = backend_info.get('name') if backend_info else None
if backend_name:
payload['connector_error'] = f'Configured sandbox backend "{backend_name}" is unavailable'
else:
payload['connector_error'] = 'No supported sandbox backend (Docker / nsjail / E2B) is available'
return payload

View File

@@ -1274,6 +1274,52 @@ class TestBoxDisabledByConfig:
assert status['enabled'] is True
assert 'docker daemon' in status['connector_error']
@pytest.mark.asyncio
async def test_get_status_downgrades_available_when_backend_dead(self):
"""The connector can be healthy while the runtime reports no usable
backend (operator selected nsjail but binary missing, Docker daemon
crashed after handshake, ...). The top-level ``available`` must
reflect the combined state so the dashboard / useBoxStatus hook /
skill_service gate stay consistent with the native-tool gate."""
logger = Mock()
client = Mock(spec=BoxRuntimeClient)
client.initialize = AsyncMock()
client.get_status = AsyncMock(
return_value={
'backend': {'name': 'nsjail', 'available': False},
'active_sessions': 0,
}
)
service = BoxService(make_app(logger, enabled=True), client=client)
await service.initialize()
status = await service.get_status()
assert status['available'] is False
assert status['enabled'] is True
# The detailed backend object is preserved for the dialog
assert status['backend'] == {'name': 'nsjail', 'available': False}
assert 'nsjail' in status['connector_error']
@pytest.mark.asyncio
async def test_get_status_keeps_available_true_when_backend_ok(self):
logger = Mock()
client = Mock(spec=BoxRuntimeClient)
client.initialize = AsyncMock()
client.get_status = AsyncMock(
return_value={
'backend': {'name': 'docker', 'available': True},
'active_sessions': 2,
}
)
service = BoxService(make_app(logger, enabled=True), client=client)
await service.initialize()
status = await service.get_status()
assert status['available'] is True
assert status['backend'] == {'name': 'docker', 'available': True}
# No spurious connector_error overlay when everything is healthy
assert 'connector_error' not in status or not status['connector_error']
@pytest.mark.asyncio
async def test_disconnect_callback_is_no_op_when_disabled(self):
logger = Mock()