Harden agent runner tool runtimes (#2247)

* fix(tools): harden agent runner tool runtimes * fix(tools): bootstrap Python workspaces with available interpreter * fix(tools): clear stale Python workspace env locks * fix(tools): decouple runtime from agent runner * test(tools): cover runtime hardening edge cases * fix(tools): support binary workspace file chunks
2026-06-24 14:34:20 +00:00 · 2026-06-18 14:06:04 +00:00
parent 3a2edf9753
commit a020ca680f
10 changed files with 1008 additions and 169 deletions
@@ -54,7 +54,9 @@ def test_classify_python_workspace_detects_package_and_requirements():
 def test_wrap_python_command_with_env_contains_bootstrap_and_command():
    command = wrap_python_command_with_env('python script.py')

-    assert 'python -m venv "$_LB_VENV_DIR"' in command
+    assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
+    assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
+    assert 'kill -0 "$_LB_LOCK_OWNER"' in command
    assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
    assert command.rstrip().endswith('python script.py')

@@ -180,7 +180,7 @@ class TestMCPServerBoxConfig:
        assert cfg.host_path is None
        assert cfg.host_path_mode == 'ro'
        assert cfg.env == {}
-        assert cfg.startup_timeout_sec == 120
+        assert cfg.startup_timeout_sec == 300
        assert cfg.cpus is None
        assert cfg.memory_mb is None
        assert cfg.pids_limit is None
@@ -494,6 +494,84 @@ class TestBuildBoxProcessPayload:
        assert payload['args'] == ['/opt/other/server.py', '--flag']


+# ── Python Workspace Preparation ────────────────────────────────────
+
+
+class TestPythonWorkspacePreparation:
+    def test_requirements_workspace_uses_venv_bootstrap(self, mcp_module, tmp_path):
+        host_path = tmp_path / 'mcp-source'
+        host_path.mkdir()
+        (host_path / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
+
+        command = mcp_module.BoxStdioSessionRuntime.detect_install_command(
+            str(host_path),
+            '/workspace/.mcp/u1/workspace',
+        )
+
+        assert command is not None
+        assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
+        assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
+        assert 'python -m pip install -r "/workspace/.mcp/u1/workspace/requirements.txt"' in command
+        assert 'pip install --no-cache-dir -r' not in command
+
+    def test_staging_refresh_removes_stale_source_files_but_preserves_runtime_dirs(self, mcp_module, tmp_path):
+        source = tmp_path / 'source'
+        source.mkdir()
+        (source / 'server.py').write_text('print("new")\n', encoding='utf-8')
+        (source / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
+        (source / '.env').write_text('TOKEN=new\n', encoding='utf-8')
+
+        process_root = tmp_path / 'shared' / '.mcp' / 'u1'
+        workspace = process_root / 'workspace'
+        (workspace / '.venv' / 'bin').mkdir(parents=True)
+        (workspace / '.venv' / 'bin' / 'python').write_text('', encoding='utf-8')
+        (workspace / '.langbot').mkdir()
+        (workspace / '.langbot' / 'python-env.lock').mkdir()
+        (workspace / '.env').write_text('TOKEN=old\n', encoding='utf-8')
+        (workspace / 'server.py').write_text('print("old")\n', encoding='utf-8')
+        (workspace / 'removed.py').write_text('stale\n', encoding='utf-8')
+        (workspace / 'removed_dir').mkdir()
+        (workspace / 'removed_dir' / 'old.txt').write_text('stale\n', encoding='utf-8')
+
+        mcp_module.BoxStdioSessionRuntime._copy_workspace_tree(str(source), str(process_root), str(workspace))
+
+        assert (workspace / 'server.py').read_text(encoding='utf-8') == 'print("new")\n'
+        assert (workspace / 'requirements.txt').read_text(encoding='utf-8') == 'mcp==1.26.0\n'
+        assert (workspace / '.env').read_text(encoding='utf-8') == 'TOKEN=new\n'
+        assert not (workspace / 'removed.py').exists()
+        assert not (workspace / 'removed_dir').exists()
+        assert (workspace / '.venv' / 'bin' / 'python').exists()
+        assert (workspace / '.langbot' / 'python-env.lock').is_dir()
+
+    def test_staging_refresh_ignores_unlink_race(self, mcp_module, tmp_path, monkeypatch):
+        mcp_stdio_module = sys.modules['langbot.pkg.provider.tools.loaders.mcp_stdio']
+
+        source = tmp_path / 'source'
+        source.mkdir()
+        (source / 'server.py').write_text('print("new")\n', encoding='utf-8')
+
+        process_root = tmp_path / 'shared' / '.mcp' / 'u1'
+        workspace = process_root / 'workspace'
+        workspace.mkdir(parents=True)
+        stale_file = workspace / 'removed.py'
+        stale_file.write_text('stale\n', encoding='utf-8')
+
+        real_unlink = os.unlink
+
+        def unlink_with_race(path):
+            if os.fspath(path) == str(stale_file):
+                real_unlink(path)
+                raise FileNotFoundError(path)
+            real_unlink(path)
+
+        monkeypatch.setattr(mcp_stdio_module.os, 'unlink', unlink_with_race)
+
+        mcp_module.BoxStdioSessionRuntime._copy_workspace_tree(str(source), str(process_root), str(workspace))
+
+        assert not stale_file.exists()
+        assert (workspace / 'server.py').read_text(encoding='utf-8') == 'print("new")\n'
+
+
 # ── get_runtime_info_dict ───────────────────────────────────────────


@@ -193,6 +193,29 @@ class TestSkillPathHelpers:

        assert list(result.keys()) == ['visible']

+    def test_restore_activated_skills_uses_caller_provided_names_and_visibility(self):
+        from langbot.pkg.provider.tools.loaders.skill import (
+            ACTIVATED_SKILLS_KEY,
+            PIPELINE_BOUND_SKILLS_KEY,
+            get_activated_skill_names,
+            restore_activated_skills,
+        )
+
+        ap = _make_ap()
+        ap.skill_mgr = SimpleNamespace(
+            skills={
+                'visible': _make_skill_data(name='visible'),
+                'hidden': _make_skill_data(name='hidden'),
+            }
+        )
+        query = SimpleNamespace(variables={PIPELINE_BOUND_SKILLS_KEY: ['visible']})
+
+        restored = restore_activated_skills(ap, query, ['visible', 'hidden', 'visible', ''])
+
+        assert restored == ['visible']
+        assert list(query.variables[ACTIVATED_SKILLS_KEY].keys()) == ['visible']
+        assert get_activated_skill_names(query) == ['visible']
+
    def test_resolve_virtual_skill_path_allows_visible_skill_reads(self):
        from langbot.pkg.provider.tools.loaders.skill import (
            PIPELINE_BOUND_SKILLS_KEY,
@@ -245,7 +268,8 @@ class TestSkillPathHelpers:

        command = wrap_skill_command_with_python_env('python scripts/run.py')

-        assert 'python -m venv "$_LB_VENV_DIR"' in command
+        assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
+        assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
        assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
        assert command.rstrip().endswith('python scripts/run.py')

@@ -281,6 +305,7 @@ class TestSkillToolLoader:
        assert result['activated'] is True
        assert result['skill_name'] == 'demo'
        assert result['mount_path'] == '/workspace/.skills/demo'
+        assert result['activated_skill_names'] == ['demo']
        assert 'Step 1' in result['content']
        assert set(query.variables[ACTIVATED_SKILLS_KEY].keys()) == {'demo'}

@@ -456,7 +481,9 @@ class TestNativeToolLoaderSkillPaths:
                SimpleNamespace(query_id='q1', variables={PIPELINE_BOUND_SKILLS_KEY: ['demo']}),
            )

-            assert result == {'ok': True, 'content': 'demo instructions'}
+            assert result['ok'] is True
+            assert result['content'] == 'demo instructions'
+            assert result['truncated'] is False

    @pytest.mark.asyncio
    async def test_exec_in_activated_skill_mount_rewrites_command_and_refreshes(self):
@@ -485,7 +512,7 @@ class TestNativeToolLoaderSkillPaths:
                query,
            )

-            assert result == {'ok': True}
+            assert result['ok'] is True
            tool_parameters = ap.box_service.execute_tool.await_args.args[0]
            assert tool_parameters['command'] == 'python /workspace/.skills/demo/scripts/run.py'
            assert tool_parameters['workdir'] == '/workspace/.skills/demo'
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import base64
 import os
 import tempfile
 from types import SimpleNamespace
@@ -189,6 +190,78 @@ async def test_write_creates_subdirectories():
            assert f.read() == 'nested'


+@pytest.mark.asyncio
+async def test_read_binary_file_as_base64_chunk():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        with open(os.path.join(tmpdir, 'blob.bin'), 'wb') as f:
+            f.write(b'\x00\x01\x02\x03\x04')
+
+        result = await loader.invoke_tool(
+            'read',
+            {
+                'path': '/workspace/blob.bin',
+                'encoding': 'base64',
+                'byte_offset': 1,
+                'max_bytes': 2,
+            },
+            _make_query(),
+        )
+
+        assert result['ok'] is True
+        assert result['content'] == base64.b64encode(b'\x01\x02').decode('ascii')
+        assert result['encoding'] == 'base64'
+        assert result['byte_offset'] == 1
+        assert result['length'] == 2
+        assert result['size_bytes'] == 5
+        assert result['has_more'] is True
+        assert result['next_byte_offset'] == 3
+
+
+@pytest.mark.asyncio
+async def test_write_base64_file_append():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+
+        first = base64.b64encode(b'\x00\x01').decode('ascii')
+        second = base64.b64encode(b'\x02\x03').decode('ascii')
+        await loader.invoke_tool(
+            'write',
+            {'path': '/workspace/blob.bin', 'content': first, 'encoding': 'base64'},
+            _make_query(),
+        )
+        result = await loader.invoke_tool(
+            'write',
+            {
+                'path': '/workspace/blob.bin',
+                'content': second,
+                'encoding': 'base64',
+                'mode': 'append',
+            },
+            _make_query(),
+        )
+
+        assert result['ok'] is True
+        with open(os.path.join(tmpdir, 'blob.bin'), 'rb') as f:
+            assert f.read() == b'\x00\x01\x02\x03'
+
+
+@pytest.mark.asyncio
+async def test_write_base64_rejects_invalid_content():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+
+        result = await loader.invoke_tool(
+            'write',
+            {'path': '/workspace/blob.bin', 'content': 'not base64!', 'encoding': 'base64'},
+            _make_query(),
+        )
+
+        assert result['ok'] is False
+        assert 'invalid base64' in result['error']
+        assert not os.path.exists(os.path.join(tmpdir, 'blob.bin'))
+
+
@pytest.mark.asyncio
 async def test_edit_replaces_unique_string():
    with tempfile.TemporaryDirectory() as tmpdir:
@@ -248,3 +321,135 @@ async def test_path_escape_blocked():

        with pytest.raises(ValueError, match='escapes'):
            await loader.invoke_tool('read', {'path': '/workspace/../../etc/passwd'}, _make_query())
+
+
+@pytest.mark.asyncio
+async def test_box_availability_helper_handles_unavailable_and_errors():
+    from langbot.pkg.provider.tools.loaders.availability import is_box_backend_available
+
+    assert await is_box_backend_available(SimpleNamespace()) is False
+    assert await is_box_backend_available(SimpleNamespace(box_service=SimpleNamespace(available=False))) is False
+
+    unavailable_backend = SimpleNamespace(
+        available=True,
+        get_status=AsyncMock(return_value={'backend': {'available': False}}),
+    )
+    assert await is_box_backend_available(SimpleNamespace(box_service=unavailable_backend)) is False
+
+    failing_backend = SimpleNamespace(
+        available=True,
+        get_status=AsyncMock(side_effect=RuntimeError('box unavailable')),
+    )
+    assert await is_box_backend_available(SimpleNamespace(box_service=failing_backend)) is False
+
+
+@pytest.mark.asyncio
+async def test_read_file_supports_offset_limit_and_truncation_metadata():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        with open(os.path.join(tmpdir, 'lines.txt'), 'w', encoding='utf-8') as f:
+            f.write('one\ntwo\nthree\nfour\n')
+
+        result = await loader.invoke_tool(
+            'read',
+            {'path': '/workspace/lines.txt', 'offset': 2, 'limit': 2},
+            _make_query(),
+        )
+
+        assert result == {
+            'ok': True,
+            'content': 'two\nthree',
+            'truncated': True,
+            'truncated_by': 'lines',
+            'start_line': 2,
+            'end_line': 3,
+            'next_offset': 4,
+            'max_lines': 2,
+            'max_bytes': 50 * 1024,
+        }
+
+
+@pytest.mark.asyncio
+async def test_read_file_handles_line_larger_than_byte_limit():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        with open(os.path.join(tmpdir, 'long-line.txt'), 'w', encoding='utf-8') as f:
+            f.write('abcdef\n')
+
+        result = await loader.invoke_tool(
+            'read',
+            {'path': '/workspace/long-line.txt', 'max_bytes': 3},
+            _make_query(),
+        )
+
+        assert result['ok'] is True
+        assert result['truncated'] is True
+        assert result['truncated_by'] == 'bytes'
+        assert result['next_offset'] == 1
+        assert 'exceeds the 3B read limit' in result['content']
+
+
+@pytest.mark.asyncio
+async def test_exec_result_is_capped_and_exposes_preview_metadata():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        box_service = SimpleNamespace(
+            available=True,
+            default_workspace=tmpdir,
+            execute_tool=AsyncMock(
+                return_value={
+                    'ok': True,
+                    'stdout': 'a' * 60000,
+                    'stderr': 'b' * 60000,
+                    'exit_code': 0,
+                }
+            ),
+        )
+        loader = NativeToolLoader(SimpleNamespace(box_service=box_service, logger=Mock()))
+
+        result = await loader.invoke_tool('exec', {'command': 'python -V'}, _make_query())
+
+        assert result['ok'] is True
+        assert len(result['stdout'].encode('utf-8')) == 50 * 1024
+        assert len(result['stderr'].encode('utf-8')) == 50 * 1024
+        assert len(result['preview'].encode('utf-8')) == 50 * 1024
+        assert result['stdout_truncated'] is True
+        assert result['stderr_truncated'] is True
+        assert result['truncated'] is True
+        assert result['truncated_by'] == 'bytes'
+
+
+@pytest.mark.asyncio
+async def test_glob_caps_match_count_and_returns_preview():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        for index in range(105):
+            with open(os.path.join(tmpdir, f'file-{index:03d}.txt'), 'w', encoding='utf-8') as f:
+                f.write(str(index))
+
+        result = await loader.invoke_tool('glob', {'path': '/workspace', 'pattern': '*.txt'}, _make_query())
+
+        assert result['ok'] is True
+        assert result['total'] == 105
+        assert len(result['matches']) == 100
+        assert result['preview'] == '\n'.join(result['matches'])
+        assert result['truncated'] is True
+        assert result['truncated_by'] == 'matches'
+
+
+@pytest.mark.asyncio
+async def test_grep_reports_invalid_regex_and_truncates_long_matching_lines():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        with open(os.path.join(tmpdir, 'data.txt'), 'w', encoding='utf-8') as f:
+            f.write('needle ' + ('x' * 600) + '\n')
+
+        invalid = await loader.invoke_tool('grep', {'path': '/workspace', 'pattern': '['}, _make_query())
+        result = await loader.invoke_tool('grep', {'path': '/workspace', 'pattern': 'needle'}, _make_query())
+
+        assert invalid['ok'] is False
+        assert 'Invalid regex' in invalid['error']
+        assert result['ok'] is True
+        assert result['truncated'] is True
+        assert result['truncated_by'] == 'line'
+        assert result['matches'][0]['file'] == '/workspace/data.txt'
+        assert result['matches'][0]['content'].endswith('... [truncated]')