diff --git a/Dockerfile b/Dockerfile index 1380016d..59c9331a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,25 @@ COPY web ./web RUN cd web && npm install && npx vite build +# Build nsjail from source so the image ships a self-contained sandbox backend +# that needs no host Docker socket. Pinned to a release tag for reproducibility. +# Multi-stage keeps the compile toolchain (bison/flex/protobuf-dev/libnl-dev) +# out of the final image; only the nsjail binary and its small runtime libs +# (libprotobuf, libnl-route-3) are carried over. +FROM python:3.12.7-slim AS nsjail-build + +ARG NSJAIL_VERSION=3.6 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates git build-essential \ + autoconf bison flex libtool pkg-config \ + protobuf-compiler libprotobuf-dev libnl-route-3-dev \ + && git clone --depth 1 --branch "${NSJAIL_VERSION}" https://github.com/google/nsjail.git /nsjail \ + && make -C /nsjail \ + && install -m 0755 /nsjail/nsjail /usr/local/bin/nsjail \ + && rm -rf /var/lib/apt/lists/* + FROM python:3.12.7-slim WORKDIR /app @@ -14,8 +33,15 @@ COPY . . COPY --from=node /app/web/dist ./web/dist +# nsjail binary built in the dedicated stage above. Self-contained sandbox +# backend; lets the Box runtime isolate code without a host Docker socket. +COPY --from=nsjail-build /usr/local/bin/nsjail /usr/local/bin/nsjail + RUN apt-get update \ && apt-get install -y --no-install-recommends gcc ca-certificates curl gnupg \ + # nsjail runtime libraries (the build toolchain stays in the nsjail-build + # stage; only these shared libs are needed to execute the binary). + && apt-get install -y --no-install-recommends libprotobuf32 libnl-route-3-200 \ # Install the Docker CLI (client only) so the optional langbot_box # service can drive the mounted host Docker socket and create sandbox # containers. The same image powers langbot / plugin_runtime / box; only diff --git a/pyproject.toml b/pyproject.toml index 6b847de7..977c4dbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ dependencies = [ "chromadb>=1.0.0,<2.0.0", "qdrant-client (>=1.15.1,<2.0.0)", "pyseekdb==1.1.0.post3", - "langbot-plugin==0.4.1", + "langbot-plugin==0.4.2b4", "asyncpg>=0.30.0", "line-bot-sdk>=3.19.0", "matrix-nio>=0.25.2", diff --git a/src/langbot/pkg/api/http/service/mcp.py b/src/langbot/pkg/api/http/service/mcp.py index aadbcf11..e0c64b9f 100644 --- a/src/langbot/pkg/api/http/service/mcp.py +++ b/src/langbot/pkg/api/http/service/mcp.py @@ -152,7 +152,24 @@ class MCPService: coroutine = runtime_mcp_session.refresh() else: runtime_mcp_session = await self.ap.tool_mgr.mcp_tool_loader.load_mcp_server(server_config=server_data) - coroutine = runtime_mcp_session.start() + + # A transient test owns an isolated Box session. Always tear it down + # after the test completes (success or failure) so it does not leak. + test_session = runtime_mcp_session + + async def _run_and_cleanup() -> None: + try: + await test_session.start() + finally: + try: + await test_session.shutdown() + except Exception as exc: + self.ap.logger.warning( + f'Failed to tear down transient MCP test session ' + f'{test_session.server_name}: {type(exc).__name__}: {exc}' + ) + + coroutine = _run_and_cleanup() ctx = taskmgr.TaskContext.new() wrapper = self.ap.task_mgr.create_user_task( diff --git a/src/langbot/pkg/box/connector.py b/src/langbot/pkg/box/connector.py index deda0b89..2257910d 100644 --- a/src/langbot/pkg/box/connector.py +++ b/src/langbot/pkg/box/connector.py @@ -120,13 +120,19 @@ class BoxRuntimeConnector(ManagedRuntimeConnector): self._relay_port = parsed.port or _DEFAULT_PORT self._filtered_box_config = _filter_config_for_runtime(_get_box_config(ap)) - def _uses_websocket(self) -> bool: + def uses_websocket(self) -> bool: """Whether the connector should use WebSocket to reach the Box runtime. True when: - Running inside Docker (Box runtime is a separate container) - The ``--standalone-box`` CLI flag was passed - An explicit ``runtime.endpoint`` was configured + + When this is True the Box runtime lives in a separate process with its + own filesystem view (container, pod sidecar, or remote host), so paths + it reports (e.g. skill ``package_root``) are NOT resolvable on the + LangBot side. When False, Box runs as a stdio child process that shares + LangBot's filesystem. """ return bool( self.configured_runtime_endpoint @@ -134,6 +140,10 @@ class BoxRuntimeConnector(ManagedRuntimeConnector): or platform.use_websocket_to_connect_box_runtime() ) + # Backwards-compatible private alias. + def _uses_websocket(self) -> bool: + return self.uses_websocket() + async def initialize(self) -> None: if self._uses_websocket(): if platform.get_platform() == 'win32' and not self.configured_runtime_endpoint: diff --git a/src/langbot/pkg/box/service.py b/src/langbot/pkg/box/service.py index 6c8e8299..65844de0 100644 --- a/src/langbot/pkg/box/service.py +++ b/src/langbot/pkg/box/service.py @@ -67,6 +67,10 @@ class BoxService: self._available = False self._connector_error: str = '' self._reconnecting = False + # Optional explicit override for shares_filesystem_with_box. None means + # "derive from the connector transport". Set by tests / embedders that + # know the real LangBot<->Box filesystem topology. + self._shares_filesystem_with_box_override: bool | None = None @property def enabled(self) -> bool: @@ -148,6 +152,32 @@ class BoxService: def available(self) -> bool: return self._available + @property + def shares_filesystem_with_box(self) -> bool: + """Whether LangBot and the Box runtime share a filesystem view. + + This is True only when Box runs as a local stdio child process of + LangBot (same container/host). In that case paths the Box runtime + reports — notably skill ``package_root`` — resolve identically on the + LangBot side, so LangBot may validate them against its own filesystem. + + It is False for every separated deployment (Docker Compose, k8s + sidecar, ``--standalone-box``, or an explicit ``runtime.endpoint``), + where the Box runtime owns its own filesystem and LangBot must trust + the paths it reports rather than checking them locally. + + When Box is wired up with an injected client (tests, custom embeds) + there is no connector to introspect; we conservatively report False so + LangBot never wrongly drops Box-reported skills. An explicit override + can be set via ``_shares_filesystem_with_box`` (used by tests and any + embedder that knows the real topology). + """ + if self._shares_filesystem_with_box_override is not None: + return self._shares_filesystem_with_box_override + if self._runtime_connector is None: + return False + return not self._runtime_connector.uses_websocket() + async def execute_spec_payload( self, spec_payload: dict, @@ -191,13 +221,25 @@ class BoxService: return self._serialize_result(result) def resolve_box_session_id(self, query: pipeline_query.Query) -> str: - """Resolve the Box session_id from the pipeline's template and query variables.""" - template = ( - (query.pipeline_config or {}) - .get('ai', {}) - .get('local-agent', {}) - .get('box-session-id-template', '{launcher_type}_{launcher_id}') - ) + """Resolve the Box session_id from the pipeline's template and query variables. + + When ``system.limitation.force_box_session_id_template`` is set to a + non-empty value, that template overrides whatever the pipeline + configured. This is the authoritative SaaS guard: it runs on every + ``exec`` call, so a tenant cannot escape a single shared sandbox even + by editing the pipeline config directly through the API (which only + gates the web UI). + """ + forced_template = self._forced_box_session_id_template() + if forced_template: + template = forced_template + else: + template = ( + (query.pipeline_config or {}) + .get('ai', {}) + .get('local-agent', {}) + .get('box-session-id-template', '{launcher_type}_{launcher_id}') + ) variables = dict(query.variables or {}) launcher_type = getattr(query, 'launcher_type', None) if hasattr(launcher_type, 'value'): @@ -220,14 +262,24 @@ class BoxService: all skill packages mounted, regardless of which skill is currently activated. - Skills whose ``package_root`` is missing or no longer a directory on - the LangBot-visible filesystem are skipped with a warning instead of - being passed through to the backend. Without this guard the three - backends behave inconsistently on a stale mount: nsjail refuses to - start the sandbox (failing every exec in the session), Docker - silently auto-creates a root-owned empty directory on the host, and - E2B silently skips the upload — none of which surfaces an - actionable error to the agent or operator. + Path validation is filesystem-topology dependent. When LangBot and the + Box runtime share a filesystem (local stdio mode), a skill whose + ``package_root`` is missing or no longer a directory is skipped with a + warning instead of being passed through to the backend. Without that + guard the three backends behave inconsistently on a stale mount: nsjail + refuses to start the sandbox (failing every exec in the session), + Docker silently auto-creates a root-owned empty directory on the host, + and E2B silently skips the upload — none of which surfaces an + actionable error. + + When Box runs as a separate process (Docker Compose, k8s sidecar, + ``--standalone-box``, or a remote ``runtime.endpoint``), the + ``package_root`` reported by ``list_skills`` is the Box runtime's own + filesystem path and is NOT resolvable on the LangBot side. Validating + it locally would wrongly drop every skill, so LangBot trusts the path + and lets the Box runtime resolve it. The Box runtime only ever reports + skills it discovered on its own filesystem, so the path is valid there + by construction. """ skill_mgr = getattr(self.ap, 'skill_mgr', None) if skill_mgr is None: @@ -235,13 +287,15 @@ class BoxService: from ..provider.tools.loaders import skill as skill_loader + validate_locally = self.shares_filesystem_with_box + visible_skills = skill_loader.get_visible_skills(self.ap, query) mounts: list[dict] = [] for skill_name, skill_data in visible_skills.items(): package_root = str(skill_data.get('package_root', '') or '').strip() if not package_root: continue - if not os.path.isdir(package_root): + if validate_locally and not os.path.isdir(package_root): self.ap.logger.warning( f'Skill "{skill_name}" package_root missing on filesystem ' f'({package_root}); skipping mount to prevent sandbox failures. ' @@ -564,6 +618,20 @@ class BoxService: raw = str(self._local_config().get('image', '') or '').strip() return raw or None + def _forced_box_session_id_template(self) -> str: + """Return the SaaS-forced sandbox-scope template, or '' when unset. + + Read from ``system.limitation.force_box_session_id_template``. A + non-empty value pins every pipeline to a single sandbox scope + (e.g. ``'{global}'``) and cannot be overridden per-pipeline. + """ + limitation = ( + (self.ap.instance_config.data or {}).get('system', {}).get('limitation', {}) + if getattr(self.ap, 'instance_config', None) is not None + else {} + ) + return str(limitation.get('force_box_session_id_template', '') or '').strip() + def _load_workspace_quota_mb(self) -> int | None: raw_value = self._local_config().get('workspace_quota_mb') if raw_value in (None, ''): diff --git a/src/langbot/pkg/provider/tools/loaders/mcp.py b/src/langbot/pkg/provider/tools/loaders/mcp.py index e4370d4d..117f29cd 100644 --- a/src/langbot/pkg/provider/tools/loaders/mcp.py +++ b/src/langbot/pkg/provider/tools/loaders/mcp.py @@ -73,6 +73,13 @@ class RuntimeMCPSession: self.enable = enable self.session = None + # Transient test sessions (created from the config page "test" button, + # which carry no persisted server UUID) must NOT share the live + # "mcp-shared" Box session. Otherwise a failing test churns the shared + # session and tears down healthy, already-connected servers. Callers + # flag these via server_config['_transient'] = True. + self.is_transient = bool(server_config.get('_transient', False)) + self.exit_stack = AsyncExitStack() self.functions = [] @@ -402,6 +409,11 @@ class RuntimeMCPSession: return self._box_stdio_runtime.uses_box_stdio() def _build_box_session_id(self) -> str: + # Transient test sessions get their own isolated Box session so a + # failing/short-lived test can never disturb the shared session that + # hosts live, already-connected MCP servers. + if self.is_transient: + return f'mcp-test-{self.server_uuid}' return 'mcp-shared' def _rewrite_path(self, path: str, host_path: str | None) -> str: @@ -503,10 +515,14 @@ class MCPLoader(loader.ToolLoader): - extra_args: 额外的配置参数 (可选) """ uuid_ = server_config.get('uuid') + is_transient = False if not uuid_: self.ap.logger.warning('Server UUID is None for MCP server, maybe testing in the config page.') uuid_ = str(uuid_module.uuid4()) server_config['uuid'] = uuid_ + # No persisted UUID => this is a throwaway "test" session from the + # config page. Isolate it from the shared live Box session. + is_transient = True name = server_config['name'] uuid = server_config['uuid'] @@ -519,6 +535,7 @@ class MCPLoader(loader.ToolLoader): 'uuid': uuid, 'mode': mode, 'enable': enable, + '_transient': is_transient, **extra_args, } diff --git a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py index bdddcd29..ff607e66 100644 --- a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py +++ b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py @@ -293,10 +293,25 @@ class BoxStdioSessionRuntime: if not self.uses_box_stdio(): return + workspace = self._build_workspace(host_path=None) + + # Transient test sessions own their isolated Box session, so tear the + # whole session down rather than leaking it. This cannot affect live + # servers because they live in the separate shared session. + if getattr(self.owner, 'is_transient', False): + try: + await workspace.cleanup() + except Exception as exc: + self.ap.logger.warning( + f'MCP server {self.server_name}: failed to delete transient test session ' + f'{self.owner._build_box_session_id()}: {type(exc).__name__}: {exc}' + ) + await self._cleanup_staged_workspace() + return + # In the shared-session model, we do not delete the session itself. # Stop only this MCP server's managed process; deleting the session # would kill other MCP servers sharing the same container. - workspace = self._build_workspace(host_path=None) try: await workspace.stop_managed_process(self.process_id) except Exception as exc: diff --git a/src/langbot/pkg/skill/manager.py b/src/langbot/pkg/skill/manager.py index a053697f..ddb2125c 100644 --- a/src/langbot/pkg/skill/manager.py +++ b/src/langbot/pkg/skill/manager.py @@ -46,6 +46,13 @@ class SkillManager: self.ap.logger.info('Box runtime unavailable; skill cache is empty.') return + # LangBot may only validate Box-reported paths against its own + # filesystem when the two share one (local stdio mode). In separated + # deployments (Docker Compose, k8s sidecar, --standalone-box, remote + # endpoint) the package_root lives on the Box runtime's filesystem and + # is not resolvable here, so we trust what Box reports. + validate_locally = bool(getattr(box_service, 'shares_filesystem_with_box', False)) + try: dropped = 0 for skill_data in await box_service.list_skills(): @@ -53,7 +60,7 @@ class SkillManager: if not skill_name: continue package_root = str(skill_data.get('package_root', '') or '').strip() - if package_root and not os.path.isdir(package_root): + if validate_locally and package_root and not os.path.isdir(package_root): self.ap.logger.warning( f'Skill "{skill_name}" reported by Box runtime but ' f'package_root missing on LangBot filesystem ' diff --git a/src/langbot/templates/config.yaml b/src/langbot/templates/config.yaml index 753b59d8..fc24f921 100644 --- a/src/langbot/templates/config.yaml +++ b/src/langbot/templates/config.yaml @@ -25,6 +25,12 @@ system: max_bots: -1 max_pipelines: -1 max_extensions: -1 + # When set to a non-empty string, every pipeline is forced to use this + # Box sandbox-scope template regardless of its own configuration, and + # the per-pipeline "Sandbox Scope" selector is locked in the web UI. + # Used by SaaS deployments to confine a tenant to a single shared + # sandbox (set to '{global}'). Empty string = no restriction. + force_box_session_id_template: '' task_retention: # Keep at most this many completed async task records in memory completed_limit: 200 diff --git a/src/langbot/templates/metadata/pipeline/ai.yaml b/src/langbot/templates/metadata/pipeline/ai.yaml index 16b21069..00c041c7 100644 --- a/src/langbot/templates/metadata/pipeline/ai.yaml +++ b/src/langbot/templates/metadata/pipeline/ai.yaml @@ -152,21 +152,22 @@ stages: es_ES: Determina cómo se comparten los entornos sandbox entre mensajes. ru_RU: Определяет, как песочницы используются совместно между сообщениями. disable_if: - field: __system.box_available + field: __system.box_scope_editable operator: eq value: false disabled_tooltip: en_US: >- - Box sandbox is disabled or unavailable. Enable it in config.yaml - (box.enabled = true) and ensure the runtime is reachable to change - this setting. - zh_Hans: Box 沙箱已禁用或不可用。请在配置中启用(box.enabled = true)并确认运行时连接正常,才能修改此项。 - zh_Hant: Box 沙箱已停用或無法使用。請在設定中啟用(box.enabled = true)並確認執行時連線正常,才能修改此項。 - ja_JP: Box サンドボックスが無効または利用できません。設定で有効化(box.enabled = true)し、ランタイムが接続できることを確認してから変更してください。 - vi_VN: Sandbox Box đã tắt hoặc không khả dụng. Hãy bật trong cấu hình (box.enabled = true) và đảm bảo runtime hoạt động để chỉnh sửa. - th_TH: Sandbox Box ถูกปิดใช้งานหรือไม่พร้อมใช้งาน กรุณาเปิดใช้งานในการตั้งค่า (box.enabled = true) และตรวจสอบว่ารันไทม์เชื่อมต่อปกติก่อนปรับค่า - es_ES: El sandbox de Box está desactivado o no disponible. Actívelo en la configuración (box.enabled = true) y asegúrese de que el runtime esté conectado para modificar este ajuste. - ru_RU: Песочница Box отключена или недоступна. Включите её в конфигурации (box.enabled = true) и убедитесь, что среда выполнения работает, чтобы изменить эту настройку. + Sandbox scope can't be changed: either the Box sandbox is disabled + or unavailable (enable it in config.yaml with box.enabled = true and + ensure the runtime is reachable), or this deployment pins all + pipelines to a fixed scope. + zh_Hans: "无法修改沙箱作用域:Box 沙箱已禁用或不可用(请在配置中启用 box.enabled = true 并确认运行时连接正常),或本部署已将所有流水线固定为统一作用域。" + zh_Hant: "無法修改沙箱作用域:Box 沙箱已停用或無法使用(請在設定中啟用 box.enabled = true 並確認執行時連線正常),或本部署已將所有流水線固定為統一作用域。" + ja_JP: "サンドボックススコープを変更できません:Box サンドボックスが無効/利用不可(設定で box.enabled = true にしてランタイム接続を確認)、またはこのデプロイがすべてのパイプラインを固定スコープに制限しています。" + vi_VN: "Không thể thay đổi phạm vi sandbox:Box sandbox bị tắt hoặc không khả dụng (bật box.enabled = true và đảm bảo runtime hoạt động), hoặc bản triển khai này cố định mọi pipeline về một phạm vi." + th_TH: "ไม่สามารถเปลี่ยนขอบเขต Sandbox:Box sandbox ถูกปิดหรือไม่พร้อมใช้งาน (เปิด box.enabled = true และตรวจสอบรันไทม์) หรือการ deploy นี้ล็อกทุก pipeline ไว้ที่ขอบเขตเดียว" + es_ES: "No se puede cambiar el alcance del sandbox: el sandbox de Box está desactivado o no disponible (actívelo con box.enabled = true y verifique el runtime), o este despliegue fija todas las pipelines a un alcance único." + ru_RU: "Невозможно изменить область песочницы: песочница Box отключена или недоступна (включите box.enabled = true и проверьте среду выполнения), либо это развёртывание фиксирует единую область для всех конвейеров." type: select required: false default: "{launcher_type}_{launcher_id}" diff --git a/tests/unit_tests/box/test_box_service.py b/tests/unit_tests/box/test_box_service.py index 44f42ec1..4e947653 100644 --- a/tests/unit_tests/box/test_box_service.py +++ b/tests/unit_tests/box/test_box_service.py @@ -153,6 +153,7 @@ def make_app( host_root: str = '', workspace_quota_mb: int | None = None, enabled: bool = True, + force_box_session_id_template: str = '', ): box_config = { 'enabled': enabled, @@ -171,7 +172,12 @@ def make_app( return SimpleNamespace( logger=logger, - instance_config=SimpleNamespace(data={'box': box_config}), + instance_config=SimpleNamespace( + data={ + 'box': box_config, + 'system': {'limitation': {'force_box_session_id_template': force_box_session_id_template}}, + } + ), ) @@ -190,6 +196,66 @@ async def test_box_service_without_explicit_client_initializes_internal_connecto connector.initialize.assert_awaited_once() +class TestSharesFilesystemWithBox: + """``shares_filesystem_with_box`` must reflect the real LangBot<->Box + filesystem topology, which is derived from the connector transport: + + - stdio (local child process) → shared filesystem → True + - WebSocket (Docker / sidecar / --standalone-box / remote) → separated → False + + This drives whether LangBot validates Box-reported skill paths locally. + Getting it wrong silently drops every skill in separated deployments. + """ + + def test_true_for_stdio_connector(self, monkeypatch: pytest.MonkeyPatch): + # Non-Docker Unix, no endpoint, not standalone → stdio transport. + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'linux') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', False) + + service = BoxService(make_app(Mock())) + + assert service._runtime_connector is not None + assert service._runtime_connector.uses_websocket() is False + assert service.shares_filesystem_with_box is True + + def test_false_for_websocket_connector_via_endpoint(self, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'linux') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', False) + app = make_app(Mock()) + app.instance_config.data['box']['runtime']['endpoint'] = 'ws://pod-x-box:5410' + + service = BoxService(app) + + assert service._runtime_connector is not None + assert service._runtime_connector.uses_websocket() is True + assert service.shares_filesystem_with_box is False + + def test_false_for_websocket_connector_in_docker(self, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr('langbot.pkg.utils.platform.get_platform', lambda: 'docker') + monkeypatch.setattr('langbot.pkg.utils.platform.standalone_box', False) + + service = BoxService(make_app(Mock())) + + assert service.shares_filesystem_with_box is False + + def test_false_when_client_injected_without_connector(self): + # Injected client (no connector) → unknown topology → conservative False + # so LangBot never wrongly drops Box-reported skills. + service = BoxService(make_app(Mock()), client=Mock(spec=BoxRuntimeClient)) + + assert service._runtime_connector is None + assert service.shares_filesystem_with_box is False + + def test_explicit_override_wins(self): + service = BoxService(make_app(Mock()), client=Mock(spec=BoxRuntimeClient)) + + service._shares_filesystem_with_box_override = True + assert service.shares_filesystem_with_box is True + + service._shares_filesystem_with_box_override = False + assert service.shares_filesystem_with_box is False + + @pytest.mark.asyncio async def test_box_service_get_sessions_delegates_to_client(): client = Mock() @@ -302,6 +368,69 @@ async def test_box_service_session_id_falls_back_to_query_id_for_synthetic_queri assert backend.start_calls == ['query_7'] +@pytest.mark.asyncio +async def test_box_service_forced_global_scope_overrides_pipeline_template(): + """SaaS guard: a non-empty ``force_box_session_id_template`` pins every + query to one shared sandbox regardless of the pipeline's own scope.""" + logger = Mock() + backend = FakeBackend(logger) + runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300) + service = BoxService( + make_app(logger, force_box_session_id_template='{global}'), + client=_InProcessBoxRuntimeClient(logger, runtime), + ) + await service.initialize() + + # Two distinct callers that would otherwise get separate sandboxes. + q1 = pipeline_query.Query.model_construct(query_id=1, launcher_type='group', launcher_id='room-1') + q2 = pipeline_query.Query.model_construct(query_id=2, launcher_type='person', launcher_id='alice') + + r1 = await service.execute_tool({'command': 'pwd'}, q1) + r2 = await service.execute_tool({'command': 'pwd'}, q2) + + assert r1['session_id'] == 'global' + assert r2['session_id'] == 'global' + # Only one sandbox was ever started — the shared global one. + assert backend.start_calls == ['global'] + + +def test_box_service_forced_template_ignores_pipeline_config(): + """The forced template wins even when the pipeline explicitly sets a + per-user scope — proving the override is not bypassable via pipeline config.""" + logger = Mock() + service = BoxService( + make_app(logger, force_box_session_id_template='{global}'), + client=Mock(spec=BoxRuntimeClient), + ) + query = pipeline_query.Query.model_construct( + query_id=7, + launcher_type='person', + launcher_id='test_user', + sender_id='test_user', + pipeline_config={'ai': {'local-agent': {'box-session-id-template': '{launcher_type}_{launcher_id}_{sender_id}'}}}, + ) + + assert service.resolve_box_session_id(query) == 'global' + + +def test_box_service_empty_forced_template_respects_pipeline_config(): + """An empty/whitespace forced template is a no-op: the pipeline's own + scope template is honoured (default non-SaaS behaviour).""" + logger = Mock() + service = BoxService( + make_app(logger, force_box_session_id_template=' '), + client=Mock(spec=BoxRuntimeClient), + ) + query = pipeline_query.Query.model_construct( + query_id=7, + launcher_type='group', + launcher_id='room-1', + pipeline_config={'ai': {'local-agent': {'box-session-id-template': '{launcher_type}_{launcher_id}'}}}, + ) + + assert service.resolve_box_session_id(query) == 'group_room-1' + + @pytest.mark.asyncio async def test_box_service_fails_closed_when_backend_unavailable(): logger = Mock() @@ -1342,11 +1471,16 @@ class TestBuildSkillExtraMounts: the backend never sees a bad mount. """ - def _make_service(self, logger, skills): + def _make_service(self, logger, skills, *, shares_filesystem=True): app = make_app(logger) app.skill_mgr = SimpleNamespace(skills=skills) client = Mock(spec=BoxRuntimeClient) - return BoxService(app, client=client) + service = BoxService(app, client=client) + # Tests construct BoxService with an injected client (no connector), so + # set the topology explicitly. Most cases exercise the shared-fs (local + # stdio) path where local package_root validation applies. + service._shares_filesystem_with_box_override = shares_filesystem + return service def test_skips_skill_with_missing_package_root(self): logger = Mock() @@ -1373,6 +1507,30 @@ class TestBuildSkillExtraMounts: for call in logger.warning.call_args_list ) + def test_trusts_box_paths_when_filesystem_not_shared(self): + """In separated deployments (Docker Compose, k8s sidecar, + --standalone-box, remote endpoint) the Box runtime owns its own + filesystem. package_root values it reports are NOT resolvable on the + LangBot side, so LangBot must trust them rather than dropping every + skill via a local isdir() check.""" + logger = Mock() + skills = { + 'a': {'name': 'a', 'package_root': '/box/skills/a'}, + 'b': {'name': 'b', 'package_root': '/box/skills/b'}, + } + service = self._make_service(logger, skills, shares_filesystem=False) + + mounts = service.build_skill_extra_mounts(make_query()) + + assert mounts == [ + {'host_path': '/box/skills/a', 'mount_path': '/workspace/.skills/a', 'mode': 'rw'}, + {'host_path': '/box/skills/b', 'mount_path': '/workspace/.skills/b', 'mode': 'rw'}, + ] + # No skill is dropped, so no "missing" warning should be logged. + assert not any( + 'package_root missing' in str(call.args[0]) for call in logger.warning.call_args_list + ) + def test_skips_skill_with_empty_package_root(self): logger = Mock() skills = { @@ -1383,6 +1541,14 @@ class TestBuildSkillExtraMounts: assert service.build_skill_extra_mounts(make_query()) == [] + def test_empty_package_root_skipped_even_when_not_shared(self): + """An empty package_root is always invalid regardless of topology.""" + logger = Mock() + skills = {'no_root': {'name': 'no_root', 'package_root': ''}} + service = self._make_service(logger, skills, shares_filesystem=False) + + assert service.build_skill_extra_mounts(make_query()) == [] + def test_returns_empty_when_no_skill_manager(self): logger = Mock() app = make_app(logger) diff --git a/tests/unit_tests/provider/test_mcp_box_integration.py b/tests/unit_tests/provider/test_mcp_box_integration.py index 0123af4b..3e3a7a4d 100644 --- a/tests/unit_tests/provider/test_mcp_box_integration.py +++ b/tests/unit_tests/provider/test_mcp_box_integration.py @@ -561,6 +561,42 @@ class TestGetRuntimeInfoDict: assert info['box_session_id'] == 'mcp-shared' assert info['box_enabled'] is True + def test_transient_test_session_is_isolated_from_shared(self, mcp_module): + """A transient test session (config-page "test", no persisted UUID) + must NOT share the live "mcp-shared" Box session. Regression: a failing + test churned the shared session and tore down healthy live servers.""" + ap = _make_ap() + ap.box_service.available = True + transient = _make_session( + mcp_module, + { + 'name': 'test', + 'uuid': 'gen-uuid-123', + 'mode': 'stdio', + 'command': 'uvx', + 'args': ['mcp-server-time'], + '_transient': True, + }, + ap=ap, + ) + live = _make_session( + mcp_module, + { + 'name': 'time', + 'uuid': 'real-uuid', + 'mode': 'stdio', + 'command': 'uvx', + 'args': ['mcp-server-time'], + }, + ap=ap, + ) + assert transient.is_transient is True + assert live.is_transient is False + # Isolated session id for the test, shared for the live server. + assert transient._build_box_session_id() == 'mcp-test-gen-uuid-123' + assert live._build_box_session_id() == 'mcp-shared' + assert transient._build_box_session_id() != live._build_box_session_id() + def test_stdio_session_refuses_when_box_unavailable(self, mcp_module): """Policy: when Box is configured but unavailable (disabled in config OR connection failed), stdio MCP servers are NOT treated as box-stdio. diff --git a/tests/unit_tests/provider/test_skill_tools.py b/tests/unit_tests/provider/test_skill_tools.py index 00e04bfa..847480c1 100644 --- a/tests/unit_tests/provider/test_skill_tools.py +++ b/tests/unit_tests/provider/test_skill_tools.py @@ -62,15 +62,17 @@ class TestSkillManagerCache: @pytest.mark.asyncio async def test_reload_skills_drops_box_skills_with_missing_package_root(self): - """When Box reports a skill whose package_root is gone from the - LangBot-visible filesystem, the cache must drop it instead of - keeping a stale entry that would later produce a bad mount.""" + """When LangBot shares a filesystem with Box (local stdio mode) and Box + reports a skill whose package_root is gone from that shared filesystem, + the cache must drop it instead of keeping a stale entry that would later + produce a bad mount.""" from langbot.pkg.skill.manager import SkillManager with tempfile.TemporaryDirectory() as live_dir: ghost_dir = os.path.join(live_dir, '_does_not_exist') box_service = SimpleNamespace( available=True, + shares_filesystem_with_box=True, list_skills=AsyncMock( return_value=[ _make_skill_data(name='alive', package_root=live_dir), @@ -90,6 +92,37 @@ class TestSkillManagerCache: warning_messages = [str(call.args[0]) for call in ap.logger.warning.call_args_list] assert any('ghost' in msg and 'package_root missing' in msg for msg in warning_messages) + @pytest.mark.asyncio + async def test_reload_skills_trusts_box_paths_when_filesystem_not_shared(self): + """In separated deployments (Docker Compose, k8s sidecar, + --standalone-box, remote endpoint) the package_root reported by Box + lives on the Box runtime's filesystem and is not resolvable on the + LangBot side. The cache must keep every Box-reported skill rather than + dropping them all via a local isdir() check.""" + from langbot.pkg.skill.manager import SkillManager + + box_service = SimpleNamespace( + available=True, + shares_filesystem_with_box=False, + list_skills=AsyncMock( + return_value=[ + _make_skill_data(name='alpha', package_root='/box/skills/alpha'), + _make_skill_data(name='beta', package_root='/box/skills/beta'), + ] + ), + ) + + ap = _make_ap() + ap.box_service = box_service + mgr = SkillManager(ap) + + await mgr.reload_skills() + + assert sorted(mgr.skills) == ['alpha', 'beta'] + # No skill dropped → no "package_root missing" warning. + warning_messages = [str(call.args[0]) for call in ap.logger.warning.call_args_list] + assert not any('package_root missing' in msg for msg in warning_messages) + class TestSkillActivationHelper: """Skill activation is now Tool-Call based. diff --git a/uv.lock b/uv.lock index bfdb93db..93b1e75a 100644 --- a/uv.lock +++ b/uv.lock @@ -2029,7 +2029,7 @@ requires-dist = [ { name = "ebooklib", specifier = ">=0.18" }, { name = "gewechat-client", specifier = ">=0.1.5" }, { name = "html2text", specifier = ">=2024.2.26" }, - { name = "langbot-plugin", specifier = "==0.4.1" }, + { name = "langbot-plugin", specifier = "==0.4.2b4" }, { name = "langchain", specifier = ">=0.2.0" }, { name = "langchain-core", specifier = ">=1.3.3" }, { name = "langchain-text-splitters", specifier = ">=1.1.2" }, @@ -2092,7 +2092,7 @@ dev = [ [[package]] name = "langbot-plugin" -version = "0.4.1" +version = "0.4.2b4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, @@ -2112,9 +2112,9 @@ dependencies = [ { name = "watchdog" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b2/c1/b11ce66fb2537b257ff387b8b5b708e616e5a072ae04440e24807eb3b1cf/langbot_plugin-0.4.1.tar.gz", hash = "sha256:57d3f8cd6b6c33316792ebfa0c907b2240834a84f2b8c8034c6be7721b425059", size = 289249, upload-time = "2026-06-04T05:19:08.747Z" } +sdist = { url = "https://files.pythonhosted.org/packages/59/ef/95202689f111889f4d8c4fdb112da3820a5f9b48c71176768a64a156e5c1/langbot_plugin-0.4.2b4.tar.gz", hash = "sha256:7f94e62abbdcfde7be853e746944106dd4317c7e48f3ea1a75bcc7c1670cfee8", size = 294202, upload-time = "2026-06-09T09:23:26.832Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/72/e8/335023bb5e1310621c7b7d8ae4fcac179f119709eee9a8ba65b681f66a8e/langbot_plugin-0.4.1-py3-none-any.whl", hash = "sha256:a9c319a4abb6944ae3d9a491edbeb703842a87b42b4e3b1eafba666ec2beeee7", size = 203412, upload-time = "2026-06-04T05:19:09.936Z" }, + { url = "https://files.pythonhosted.org/packages/49/a8/6e1e1330b81e5cb2e96e371db6d76b1c467fe96b2b057f46e34072d5d270/langbot_plugin-0.4.2b4-py3-none-any.whl", hash = "sha256:47b86f0c2398017013bfe06c342452a0f7f9e59c0030f2281a91de1e07deb033", size = 204915, upload-time = "2026-06-09T09:23:25.638Z" }, ] [[package]] diff --git a/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx b/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx index 078db6f7..551aa1dd 100644 --- a/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx +++ b/web/src/app/home/components/dynamic-form/DynamicFormComponent.tsx @@ -198,6 +198,35 @@ function WebhookUrlField({ ); } +// Hover-only Radix tooltips never open on touch devices (no pointer hover), +// so the ``disabled_tooltip`` explaining why a field is locked was invisible on +// mobile. This wrapper makes the info icon also toggle the tooltip on tap while +// keeping hover behavior on desktop. +function DisabledTooltipIcon({ text }: { text: string }) { + const [open, setOpen] = useState(false); + return ( + + + + + + {text} + + + ); +} + export default function DynamicFormComponent({ itemConfigList, onSubmit, @@ -551,16 +580,7 @@ export default function DynamicFormComponent({ : ''; const renderDisabledTooltipIcon = () => disabledTooltip ? ( - - - - - - - {disabledTooltip} - - - + ) : null; // Webhook URL fields are display-only; render outside of form binding diff --git a/web/src/app/home/components/home-sidebar/HomeSidebar.tsx b/web/src/app/home/components/home-sidebar/HomeSidebar.tsx index e0cc8ab9..2eff56cf 100644 --- a/web/src/app/home/components/home-sidebar/HomeSidebar.tsx +++ b/web/src/app/home/components/home-sidebar/HomeSidebar.tsx @@ -1674,24 +1674,31 @@ export default function HomeSidebar({ .catch(() => {}); } - getCloudServiceClientSync() - .getLangBotReleases() - .then((releases) => { - if (releases && releases.length > 0) { - const latestStable = releases.find((r) => !r.prerelease && !r.draft); - const latest = latestStable || releases[0]; - setLatestRelease(latest); + // Cloud edition is updated centrally by the operator, so end users should + // not see a "new version available" prompt in the sidebar. Skip the GitHub + // release check entirely for edition=cloud. + if (systemInfo?.edition !== 'cloud') { + getCloudServiceClientSync() + .getLangBotReleases() + .then((releases) => { + if (releases && releases.length > 0) { + const latestStable = releases.find( + (r) => !r.prerelease && !r.draft, + ); + const latest = latestStable || releases[0]; + setLatestRelease(latest); - const currentVersion = systemInfo?.version; - if (currentVersion && latest.tag_name) { - const isNewer = compareVersions(latest.tag_name, currentVersion); - setHasNewVersion(isNewer); + const currentVersion = systemInfo?.version; + if (currentVersion && latest.tag_name) { + const isNewer = compareVersions(latest.tag_name, currentVersion); + setHasNewVersion(isNewer); + } } - } - }) - .catch((error) => { - console.error('Failed to fetch releases:', error); - }); + }) + .catch((error) => { + console.error('Failed to fetch releases:', error); + }); + } getCloudServiceClientSync() .getGitHubRepoInfo() diff --git a/web/src/app/home/pipelines/components/pipeline-form/PipelineFormComponent.tsx b/web/src/app/home/pipelines/components/pipeline-form/PipelineFormComponent.tsx index de192298..863c2202 100644 --- a/web/src/app/home/pipelines/components/pipeline-form/PipelineFormComponent.tsx +++ b/web/src/app/home/pipelines/components/pipeline-form/PipelineFormComponent.tsx @@ -8,6 +8,7 @@ import { import DynamicFormComponent from '@/app/home/components/dynamic-form/DynamicFormComponent'; import N8nAuthFormComponent from '@/app/home/components/dynamic-form/N8nAuthFormComponent'; import { useBoxStatus } from '@/app/infra/hooks/useBoxStatus'; +import { systemInfo } from '@/app/infra/http'; import { Button } from '@/components/ui/button'; import { useForm } from 'react-hook-form'; import { zodResolver } from '@hookform/resolvers/zod'; @@ -420,11 +421,41 @@ export default function PipelineFormComponent({ // opt-in via ``disable_if`` + ``disabled_tooltip`` rather than every page // hard-coding a banner. Field-level gating keeps unrelated fields // untouched. + // + // ``box_scope_editable`` folds the two reasons the Sandbox Scope selector + // can be locked into a single flag the yaml ``disable_if`` consumes: + // 1. Box sandbox is unavailable, or + // 2. the deployment pins all pipelines to a fixed scope via + // ``system.limitation.force_box_session_id_template`` (SaaS). + const forcedBoxTemplate = + systemInfo.limitation?.force_box_session_id_template || ''; + const boxScopeForced = !!forcedBoxTemplate; const stageSystemContext = stage.name === 'local-agent' - ? { box_available: boxAvailable } + ? { + box_available: boxAvailable, + box_scope_editable: boxAvailable && !boxScopeForced, + } : undefined; + // When the deployment pins every pipeline to a fixed sandbox scope (SaaS + // ``force_box_session_id_template``), the Sandbox Scope selector is locked. + // The runtime already overrides the scope on every exec, but the stored + // pipeline value can be anything (e.g. the per-chat default), which would + // make the locked selector display a scope that is NOT the one actually in + // effect. Coerce the displayed/saved value to the forced template so the UI + // truthfully reflects runtime behavior. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const stageInitialValues: Record = + (form.watch(formName) as Record)?.[stage.name] || {}; + const effectiveInitialValues = + stage.name === 'local-agent' && boxScopeForced + ? { + ...stageInitialValues, + 'box-session-id-template': forcedBoxTemplate, + } + : stageInitialValues; + return ( @@ -438,10 +469,7 @@ export default function PipelineFormComponent({ )?.[stage.name] || {} - } + initialValues={effectiveInitialValues} onSubmit={(values) => { handleDynamicFormEmit(formName, stage.name, values); }} diff --git a/web/src/app/home/plugins/components/plugin-installed/ExtensionCardComponent.tsx b/web/src/app/home/plugins/components/plugin-installed/ExtensionCardComponent.tsx index d6b50940..781b92b3 100644 --- a/web/src/app/home/plugins/components/plugin-installed/ExtensionCardComponent.tsx +++ b/web/src/app/home/plugins/components/plugin-installed/ExtensionCardComponent.tsx @@ -160,16 +160,34 @@ export default function ExtensionCardComponent({ {cardVO.mode.toUpperCase()} )} - - {cardVO.enabled ? t('mcp.statusConnected') : t('mcp.statusDisabled')} - + {(() => { + // Reflect the real runtime status, not just the enabled flag. + // A server can be enabled but still CONNECTING or in ERROR — showing + // "Connected" in those cases is misleading. + const runtime = cardVO.enabled + ? (cardVO.runtimeStatus ?? 'connecting') + : 'disabled'; + const badgeClass: Record = { + connected: 'border-green-400 text-green-600 dark:text-green-400', + connecting: 'border-amber-400 text-amber-600 dark:text-amber-400', + error: 'border-red-400 text-red-600 dark:text-red-400', + disabled: 'border-gray-400 text-gray-600 dark:text-gray-300', + }; + const badgeLabel: Record = { + connected: t('mcp.statusConnected'), + connecting: t('mcp.connecting'), + error: t('mcp.statusError'), + disabled: t('mcp.statusDisabled'), + }; + return ( + + {badgeLabel[runtime] ?? badgeLabel.disabled} + + ); + })()}
{cardVO.description || diff --git a/web/src/app/home/plugins/components/plugin-installed/PluginInstalledComponent.tsx b/web/src/app/home/plugins/components/plugin-installed/PluginInstalledComponent.tsx index e9feefbc..4429f795 100644 --- a/web/src/app/home/plugins/components/plugin-installed/PluginInstalledComponent.tsx +++ b/web/src/app/home/plugins/components/plugin-installed/PluginInstalledComponent.tsx @@ -103,8 +103,8 @@ const PluginInstalledComponent = forwardRef< getExtensionList(); } - async function getExtensionList() { - setLoading(true); + async function getExtensionList(silent = false) { + if (!silent) setLoading(true); try { const client = getCloudServiceClientSync(); @@ -200,12 +200,25 @@ const PluginInstalledComponent = forwardRef< setExtensionList(extensions); } catch (error) { console.error('Failed to fetch extension list:', error); - setExtensionList([]); + if (!silent) setExtensionList([]); } finally { - setLoading(false); + if (!silent) setLoading(false); } } + // While any MCP server is still connecting, poll quietly so the status badge + // transitions (connecting -> connected/error) without a manual refresh. + useEffect(() => { + const hasConnecting = extensionList.some( + (e) => e.type === 'mcp' && e.enabled && e.runtimeStatus === 'connecting', + ); + if (!hasConnecting) return; + const timer = setInterval(() => { + getExtensionList(true); + }, 3000); + return () => clearInterval(timer); + }, [extensionList]); + useImperativeHandle(ref, () => ({ refreshPluginList: getExtensionList, })); diff --git a/web/src/app/infra/entities/api/index.ts b/web/src/app/infra/entities/api/index.ts index 44edd872..aa1e2c8b 100644 --- a/web/src/app/infra/entities/api/index.ts +++ b/web/src/app/infra/entities/api/index.ts @@ -325,6 +325,10 @@ export interface SystemLimitation { max_bots: number; max_pipelines: number; max_extensions: number; + /** When non-empty, every pipeline is forced to this Box sandbox-scope + * template (e.g. ``{global}``) and the per-pipeline "Sandbox Scope" + * selector is locked. Used by SaaS deployments. Empty = no restriction. */ + force_box_session_id_template?: string; } export interface WizardProgress {