diff --git a/tests/integration_tests/__init__.py b/tests/integration_tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration_tests/box/__init__.py b/tests/integration_tests/box/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration_tests/box/test_box_integration.py b/tests/integration_tests/box/test_box_integration.py new file mode 100644 index 00000000..27830744 --- /dev/null +++ b/tests/integration_tests/box/test_box_integration.py @@ -0,0 +1,304 @@ +"""Integration tests for LangBot Box. + +These tests verify the end-to-end behavior of the Box sandbox execution +system. Tests decorated with ``requires_container`` need a real container +runtime (Podman or Docker) and are skipped otherwise. + +CI only runs ``tests/unit_tests/``, so these tests never execute in the +CI pipeline. Run them locally with:: + + pytest tests/integration_tests/ -v +""" + +from __future__ import annotations + +import logging +import shutil +import socket +import subprocess +from types import SimpleNamespace + +import pytest +from aiohttp.test_utils import TestServer + +from langbot.pkg.box.backend import BaseSandboxBackend +from langbot.pkg.box.client import RemoteBoxRuntimeClient +from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxRuntimeUnavailableError +from langbot.pkg.box.models import BoxExecutionStatus, BoxNetworkMode, BoxSpec +from langbot.pkg.box.runtime import BoxRuntime +from langbot.pkg.box.server import create_app as create_server_app +from langbot.pkg.box.service import BoxService + +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query + +_logger = logging.getLogger('test.box.integration') + +# Default image for integration tests — small and fast to pull. +_TEST_IMAGE = 'alpine:latest' + + +# ── Skip helpers ────────────────────────────────────────────────────── + + +def _has_container_runtime() -> bool: + for cmd in ('podman', 'docker'): + if shutil.which(cmd) is None: + continue + try: + result = subprocess.run( + [cmd, 'info'], + capture_output=True, + timeout=10, + ) + if result.returncode == 0: + return True + except Exception: + continue + return False + + +def _can_open_test_socket() -> bool: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + except OSError: + return False + sock.close() + return True + + +requires_container = pytest.mark.skipif( + not _has_container_runtime(), + reason='no container runtime (podman/docker) available', +) + +requires_socket = pytest.mark.skipif( + not _can_open_test_socket(), + reason='local test environment does not permit opening TCP sockets', +) + + +# ── Fixtures ────────────────────────────────────────────────────────── + + +@pytest.fixture +async def box_client(): + """Yield a RemoteBoxRuntimeClient backed by a real BoxRuntime HTTP server.""" + runtime = BoxRuntime(logger=_logger) + app = create_server_app(runtime) + server = TestServer(app) + await server.start_server() + client = RemoteBoxRuntimeClient( + base_url=str(server.make_url('')), + logger=_logger, + ) + yield client + await client.shutdown() + await server.close() + + +# ── 1. Simple command execution ─────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_exec_simple_command(box_client: RemoteBoxRuntimeClient): + """Box starts a simple command and returns stdout.""" + spec = BoxSpec( + cmd='echo hello-box', + session_id='int-simple', + workdir='/tmp', + image=_TEST_IMAGE, + ) + result = await box_client.execute(spec) + + assert result.status == BoxExecutionStatus.COMPLETED + assert result.exit_code == 0 + assert 'hello-box' in result.stdout + + +# ── 2. Session file persistence ─────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_session_persists_files(box_client: RemoteBoxRuntimeClient): + """Write a file in one exec, read it back in a second exec on the same session.""" + sid = 'int-persist' + + write_result = await box_client.execute(BoxSpec( + cmd='echo "hello from file" > /tmp/testfile.txt', + session_id=sid, + workdir='/tmp', + image=_TEST_IMAGE, + )) + assert write_result.exit_code == 0 + + read_result = await box_client.execute(BoxSpec( + cmd='cat /tmp/testfile.txt', + session_id=sid, + workdir='/tmp', + image=_TEST_IMAGE, + )) + assert read_result.exit_code == 0 + assert 'hello from file' in read_result.stdout + + +# ── 3. Timeout handling ─────────────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_timeout_kills_command(box_client: RemoteBoxRuntimeClient): + """A long-running command is killed after timeout_sec.""" + spec = BoxSpec( + cmd='sleep 120', + session_id='int-timeout', + workdir='/tmp', + timeout_sec=3, + image=_TEST_IMAGE, + ) + result = await box_client.execute(spec) + + assert result.status == BoxExecutionStatus.TIMED_OUT + assert result.exit_code is None + + +# ── 4. Network isolation ───────────────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_offline_cannot_reach_network(box_client: RemoteBoxRuntimeClient): + """With network=OFF the sandbox cannot reach the internet.""" + spec = BoxSpec( + cmd='wget -q -O /dev/null --timeout=3 http://1.1.1.1 2>&1; exit $?', + session_id='int-offline', + workdir='/tmp', + network=BoxNetworkMode.OFF, + image=_TEST_IMAGE, + ) + result = await box_client.execute(spec) + + assert result.exit_code != 0 + + +# ── 5. Backend unavailable ─────────────────────────────────────────── + + +class _UnavailableBackend(BaseSandboxBackend): + """A backend that always reports itself as unavailable.""" + + name = 'unavailable' + + def __init__(self): + super().__init__(logging.getLogger('test')) + + async def is_available(self) -> bool: + return False + + async def start_session(self, spec): + raise NotImplementedError + + async def exec(self, session, spec): + raise NotImplementedError + + async def stop_session(self, session): + pass + + +@requires_socket +@pytest.mark.asyncio +async def test_backend_unavailable_returns_error(): + """When no backend is available the full HTTP path returns BoxBackendUnavailableError.""" + runtime = BoxRuntime(logger=_logger, backends=[_UnavailableBackend()]) + app = create_server_app(runtime) + server = TestServer(app) + await server.start_server() + try: + client = RemoteBoxRuntimeClient( + base_url=str(server.make_url('')), + logger=_logger, + ) + spec = BoxSpec( + cmd='echo hello', + session_id='int-no-backend', + workdir='/tmp', + ) + with pytest.raises(BoxBackendUnavailableError): + await client.execute(spec) + await client.shutdown() + finally: + await server.close() + + +# ── 6. Runtime unreachable ──────────────────────────────────────────── + + +@requires_socket +@pytest.mark.asyncio +async def test_runtime_unreachable_returns_error(): + """Connecting to a non-existent runtime raises BoxRuntimeUnavailableError.""" + client = RemoteBoxRuntimeClient( + base_url='http://127.0.0.1:19999', + logger=_logger, + ) + try: + with pytest.raises(BoxRuntimeUnavailableError): + await client.initialize() + finally: + await client.shutdown() + + +# ── 7. Full service-to-runtime path ────────────────────────────────── + + +@requires_container +@requires_socket +@pytest.mark.asyncio +async def test_full_service_to_remote_runtime(tmp_path): + """BoxService -> RemoteBoxRuntimeClient -> HTTP -> BoxRuntime -> real backend.""" + runtime = BoxRuntime(logger=_logger) + app = create_server_app(runtime) + server = TestServer(app) + await server.start_server() + try: + client = RemoteBoxRuntimeClient( + base_url=str(server.make_url('')), + logger=_logger, + ) + host_dir = tmp_path / 'workspace' + host_dir.mkdir() + + mock_ap = SimpleNamespace( + logger=_logger, + instance_config=SimpleNamespace( + data={ + 'box': { + 'profile': 'default', + 'allowed_host_mount_roots': [str(tmp_path)], + 'default_host_workspace': str(host_dir), + } + } + ), + ) + + service = BoxService(mock_ap, client=client) + await service.initialize() + + query = pipeline_query.Query.model_construct(query_id=42) + result = await service.execute_sandbox_tool( + {'cmd': 'echo service-path', 'image': _TEST_IMAGE}, + query, + ) + + assert result['ok'] is True + assert result['status'] == 'completed' + assert 'service-path' in result['stdout'] + assert result['session_id'] == '42' + await client.shutdown() + finally: + await server.close()