From a020ca680fd2054e839827c51544acc533be2195 Mon Sep 17 00:00:00 2001
From: huanghuoguoguo <1051233107@qq.com>
Date: Thu, 18 Jun 2026 14:06:04 +0000
Subject: [PATCH] Harden agent runner tool runtimes (#2247)

* fix(tools): harden agent runner tool runtimes

* fix(tools): bootstrap Python workspaces with available interpreter

* fix(tools): clear stale Python workspace env locks

* fix(tools): decouple runtime from agent runner

* test(tools): cover runtime hardening edge cases

* fix(tools): support binary workspace file chunks
---
 src/langbot/pkg/box/workspace.py              |  23 +-
 .../provider/tools/loaders/availability.py    |  18 +
 .../pkg/provider/tools/loaders/mcp_stdio.py   |  97 ++-
 .../pkg/provider/tools/loaders/native.py      | 603 +++++++++++++++---
 .../pkg/provider/tools/loaders/skill.py       |  39 ++
 .../provider/tools/loaders/skill_authoring.py |  75 +--
 tests/unit_tests/box/test_workspace.py        |   4 +-
 .../provider/test_mcp_box_integration.py      |  80 ++-
 tests/unit_tests/provider/test_skill_tools.py |  33 +-
 .../provider/test_tool_manager_native.py      | 205 ++++++
 10 files changed, 1008 insertions(+), 169 deletions(-)
 create mode 100644 src/langbot/pkg/provider/tools/loaders/availability.py

diff --git a/src/langbot/pkg/box/workspace.py b/src/langbot/pkg/box/workspace.py
index 948622ef..26d1a41e 100644
--- a/src/langbot/pkg/box/workspace.py
+++ b/src/langbot/pkg/box/workspace.py
@@ -146,13 +146,19 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
         _LB_PIP_CACHE_DIR="{mount_path}/.cache/pip"
 
         mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
+        _LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"
+        if [ -z "$_LB_SYSTEM_PYTHON" ]; then
+          echo "python3 or python is required to prepare the workspace Python environment" >&2
+          exit 127
+        fi
+
         export TMPDIR="$_LB_TMP_DIR"
         export TEMP="$_LB_TMP_DIR"
         export TMP="$_LB_TMP_DIR"
         export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
 
         _lb_python_meta() {{
-          python - <<'PY'
+          "$_LB_SYSTEM_PYTHON" - <<'PY'
         import hashlib
         import json
         import os
@@ -201,15 +207,26 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
           _LB_LOCK_WAIT=0
           while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
             if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
+              _LB_LOCK_OWNER="$(cat "$_LB_LOCK_DIR/pid" 2>/dev/null || true)"
+              if [ -n "$_LB_LOCK_OWNER" ] && kill -0 "$_LB_LOCK_OWNER" 2>/dev/null; then
+                echo "Timed out waiting for active Python environment lock: $_LB_LOCK_DIR" >&2
+                exit 1
+              fi
+              echo "Timed out waiting for Python environment lock, clearing stale lock: $_LB_LOCK_DIR" >&2
+              rm -rf "$_LB_LOCK_DIR" 2>/dev/null || true
+              if mkdir "$_LB_LOCK_DIR" 2>/dev/null; then
+                break
+              fi
               echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
               exit 1
             fi
             sleep 1
             _LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
           done
+          printf '%s\\n' "$$" > "$_LB_LOCK_DIR/pid" 2>/dev/null || true
 
           _lb_cleanup_lock() {{
-            rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
+            rm -rf "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
           }}
           trap _lb_cleanup_lock EXIT INT TERM
 
@@ -225,7 +242,7 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
 
           if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
             rm -rf "$_LB_VENV_DIR"
-            python -m venv "$_LB_VENV_DIR"
+            "$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"
             . "$_LB_VENV_DIR/bin/activate"
             python -m pip install --upgrade pip setuptools wheel
             if [ -f "{mount_path}/requirements.txt" ]; then
diff --git a/src/langbot/pkg/provider/tools/loaders/availability.py b/src/langbot/pkg/provider/tools/loaders/availability.py
new file mode 100644
index 00000000..58d79586
--- /dev/null
+++ b/src/langbot/pkg/provider/tools/loaders/availability.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from typing import Any
+
+
+async def is_box_backend_available(ap: Any) -> bool:
+    """Return whether the configured Box backend is ready for tool execution."""
+    box_service = getattr(ap, 'box_service', None)
+    if box_service is None:
+        return False
+    if not getattr(box_service, 'available', False):
+        return False
+    try:
+        status = await box_service.get_status()
+        backend_info = status.get('backend', {})
+        return bool(backend_info.get('available', False))
+    except Exception:
+        return False
diff --git a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
index ff607e66..dcfbb913 100644
--- a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
@@ -5,6 +5,8 @@ import asyncio
 import os
 import shutil
 import shlex
+import threading
+from contextlib import suppress
 from typing import TYPE_CHECKING, Any
 
 import pydantic
@@ -18,12 +20,26 @@ from ....box.workspace import (
     rewrite_mounted_path,
     rewrite_venv_command,
     unwrap_venv_path,
+    wrap_python_command_with_env,
 )
 
 if TYPE_CHECKING:
     from .mcp import RuntimeMCPSession
 
 
+_WORKSPACE_COPY_LOCKS: dict[str, threading.Lock] = {}
+_WORKSPACE_COPY_LOCKS_GUARD = threading.Lock()
+
+
+def _workspace_copy_lock(path: str) -> threading.Lock:
+    with _WORKSPACE_COPY_LOCKS_GUARD:
+        lock = _WORKSPACE_COPY_LOCKS.get(path)
+        if lock is None:
+            lock = threading.Lock()
+            _WORKSPACE_COPY_LOCKS[path] = lock
+        return lock
+
+
 class MCPSessionErrorPhase(enum.Enum):
     """Which phase of the MCP lifecycle failed."""
 
@@ -49,7 +65,7 @@ class MCPServerBoxConfig(pydantic.BaseModel):
     host_path: str | None = None
     host_path_mode: str = 'ro'  # MCP servers default to read-write mount only when explicitly requested
     env: dict[str, str] = pydantic.Field(default_factory=dict)
-    startup_timeout_sec: int = 120  # Longer default to allow dependency bootstrap
+    startup_timeout_sec: int = 300  # First Docker bootstrap may need to build a venv and install MCP deps.
     cpus: float | None = None
     memory_mb: int | None = None
     pids_limit: int | None = None
@@ -128,6 +144,7 @@ class BoxStdioSessionRuntime:
         workspace = self._build_workspace(host_path=None)
         host_path = self.resolve_host_path()
         process_cwd = '/workspace'
+        install_cmd: str | None = None
 
         try:
             await workspace.create_session()
@@ -168,6 +185,8 @@ class BoxStdioSessionRuntime:
                 env=self.server_config.get('env', {}),
                 cwd=process_cwd,
             )
+            if install_cmd:
+                payload = self._wrap_process_payload_with_python_env(payload, process_cwd)
             payload['process_id'] = self.process_id
             await workspace.box_service.start_managed_process(workspace.session_id, payload)
         except Exception:
@@ -253,14 +272,42 @@ class BoxStdioSessionRuntime:
 
     @staticmethod
     def _copy_workspace_tree(source_path: str, process_host_root: str, process_host_workspace: str) -> None:
-        shutil.rmtree(process_host_root, ignore_errors=True)
-        os.makedirs(process_host_root, exist_ok=True)
-        shutil.copytree(
-            source_path,
-            process_host_workspace,
-            symlinks=True,
-            ignore=shutil.ignore_patterns('.git', '__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache'),
-        )
+        # Docker-backed bootstrap writes root-owned runtime directories such as
+        # .venv/.tmp into the staged workspace. The host process may not be able
+        # to delete them, so refresh source files in place and preserve runtime
+        # directories instead of rmtree'ing the whole staging root.
+        with _workspace_copy_lock(process_host_root):
+            preserved_names = {'.venv', 'venv', 'env', '.cache', '.tmp', '.langbot'}
+            os.makedirs(process_host_workspace, exist_ok=True)
+            for name in os.listdir(process_host_workspace):
+                if name in preserved_names:
+                    continue
+                path = os.path.join(process_host_workspace, name)
+                if os.path.isdir(path) and not os.path.islink(path):
+                    shutil.rmtree(path, ignore_errors=True)
+                else:
+                    # The entry may disappear between listdir and unlink if cleanup races us.
+                    with suppress(FileNotFoundError):
+                        os.unlink(path)
+            shutil.copytree(
+                source_path,
+                process_host_workspace,
+                symlinks=True,
+                dirs_exist_ok=True,
+                ignore=shutil.ignore_patterns(
+                    '.git',
+                    '__pycache__',
+                    '.pytest_cache',
+                    '.mypy_cache',
+                    '.ruff_cache',
+                    '.venv',
+                    'venv',
+                    'env',
+                    '.cache',
+                    '.tmp',
+                    '.langbot',
+                ),
+            )
 
     async def _cleanup_staged_workspace(self) -> None:
         if not self.resolve_host_path():
@@ -343,23 +390,25 @@ class BoxStdioSessionRuntime:
     @staticmethod
     def detect_install_command(host_path: str, workspace_path: str = '/workspace') -> str | None:
         workspace_kind = classify_python_workspace(host_path)
-        quoted_workspace_path = shlex.quote(workspace_path)
-        if workspace_kind == 'package':
-            return (
-                'mkdir -p /opt/_lb_src'
-                f' && tar -C {quoted_workspace_path}'
-                ' --exclude=.venv --exclude=.git --exclude=__pycache__'
-                ' --exclude=node_modules --exclude=.tox --exclude=.nox'
-                ' --exclude="*.egg-info" --exclude=.uv-cache'
-                ' -cf - .'
-                ' | tar -C /opt/_lb_src -xf -'
-                ' && pip install --no-cache-dir /opt/_lb_src'
-                ' && rm -rf /opt/_lb_src'
-            )
-        if workspace_kind == 'requirements':
-            return f'pip install --no-cache-dir -r {quoted_workspace_path}/requirements.txt'
+        if workspace_kind in {'package', 'requirements'}:
+            return wrap_python_command_with_env('python -c "pass"', mount_path=workspace_path).rstrip()
         return None
 
+    @staticmethod
+    def _wrap_process_payload_with_python_env(payload: dict[str, Any], workspace_path: str) -> dict[str, Any]:
+        """Start a prepared Python workspace without writing bootstrap output to MCP stdio."""
+        workspace_root = workspace_path.rstrip('/') or '/workspace'
+        venv_dir = f'{workspace_root}/.venv'
+        venv_bin = f'{venv_dir}/bin'
+        command = ' '.join([shlex.quote(payload['command']), *[shlex.quote(arg) for arg in payload.get('args', [])]])
+        wrapped = dict(payload)
+        wrapped['command'] = 'sh'
+        wrapped['args'] = [
+            '-lc',
+            (f'export VIRTUAL_ENV={shlex.quote(venv_dir)}; export PATH={shlex.quote(venv_bin)}:$PATH; exec {command}'),
+        ]
+        return wrapped
+
     def build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict[str, Any]:
         workspace = self._build_workspace()
         workspace.session_id = session_id
diff --git a/src/langbot/pkg/provider/tools/loaders/native.py b/src/langbot/pkg/provider/tools/loaders/native.py
index 83390049..7f5ee422 100644
--- a/src/langbot/pkg/provider/tools/loaders/native.py
+++ b/src/langbot/pkg/provider/tools/loaders/native.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import base64
 import json
 import os
 
@@ -8,6 +9,7 @@ from langbot_plugin.api.entities.events import pipeline_query
 
 from .. import loader
 from ..errors import ToolNotFoundError
+from .availability import is_box_backend_available
 from . import skill as skill_loader
 
 EXEC_TOOL_NAME = 'exec'
@@ -22,6 +24,15 @@ _ALL_TOOL_NAMES = {EXEC_TOOL_NAME, READ_TOOL_NAME, WRITE_TOOL_NAME, EDIT_TOOL_NA
 # Skip these dirs during grep walk to avoid noise
 _SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', 'dist', 'build'}
 
+_DEFAULT_READ_MAX_LINES = 2000
+_MAX_READ_MAX_LINES = 10000
+_DEFAULT_TOOL_RESULT_MAX_BYTES = 50 * 1024
+_BOX_FILE_SCRIPT_MAX_BYTES = 2048
+_GLOB_MAX_MATCHES = 100
+_GREP_MAX_MATCHES = 200
+_GREP_MAX_FILES = 5000
+_GREP_MAX_LINE_CHARS = 500
+
 
 class NativeToolLoader(loader.ToolLoader):
     def __init__(self, ap):
@@ -43,18 +54,7 @@ class NativeToolLoader(loader.ToolLoader):
 
     async def _check_backend_available(self) -> bool:
         """Check if the box backend is truly available (not just the runtime)."""
-        box_service = getattr(self.ap, 'box_service', None)
-        if box_service is None:
-            return False
-        if not getattr(box_service, 'available', False):
-            return False
-        # Check if backend is truly available via get_status
-        try:
-            status = await box_service.get_status()
-            backend_info = status.get('backend', {})
-            return backend_info.get('available', False)
-        except Exception:
-            return False
+        return await is_box_backend_available(self.ap)
 
     async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
         if not self._is_sandbox_available():
@@ -139,6 +139,7 @@ class NativeToolLoader(loader.ToolLoader):
         # via execute_tool. Skills are mounted at /workspace/.skills/{name}/
         # via extra_mounts built by BoxService.
         result = await self.ap.box_service.execute_tool(parameters, query)
+        result = self._normalize_exec_result(result)
 
         if selected_skill is not None:
             self._refresh_skill_from_disk(selected_skill)
@@ -227,34 +228,121 @@ class NativeToolLoader(loader.ToolLoader):
         except Exception:
             return {'ok': False, 'error': stdout or 'Box file operation returned no result'}
 
-    async def _read_workspace_via_box(self, path: str, query: pipeline_query.Query) -> dict:
+    async def _read_workspace_via_box(self, path: str, parameters: dict, query: pipeline_query.Query) -> dict:
+        offset = self._positive_int(parameters.get('offset'), default=1)
+        byte_offset = self._non_negative_int(parameters.get('byte_offset'), default=0)
+        max_lines = self._positive_int(
+            parameters.get('limit'),
+            default=_DEFAULT_READ_MAX_LINES,
+            max_value=_MAX_READ_MAX_LINES,
+        )
+        # Box file fallback returns through exec stdout, which is already capped
+        # by BoxService. Keep this payload small enough to remain valid JSON.
+        max_bytes = min(
+            self._positive_int(parameters.get('max_bytes'), default=_DEFAULT_TOOL_RESULT_MAX_BYTES),
+            _BOX_FILE_SCRIPT_MAX_BYTES,
+        )
+        encoding = self._read_encoding(parameters)
         script = f"""
-import json, os
+import base64, json, os
 path = {json.dumps(path)}
+offset = {offset}
+byte_offset = {byte_offset}
+max_lines = {max_lines}
+max_bytes = {max_bytes}
+encoding = {json.dumps(encoding)}
 if not path.startswith('/workspace'):
     print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
 elif not os.path.exists(path):
     print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}}))
 elif os.path.isdir(path):
-    print(json.dumps({{'ok': True, 'content': '\\n'.join(sorted(os.listdir(path))), 'is_directory': True}}))
+    entries = sorted(os.listdir(path))
+    content = '\\n'.join(entries)
+    print(json.dumps({{'ok': True, 'content': content, 'is_directory': True, 'total': len(entries), 'truncated': False}}))
+elif encoding == 'base64':
+    size_bytes = os.path.getsize(path)
+    with open(path, 'rb') as f:
+        f.seek(byte_offset)
+        data = f.read(max_bytes + 1)
+    chunk = data[:max_bytes]
+    has_more = len(data) > max_bytes
+    print(json.dumps({{
+        'ok': True,
+        'content': base64.b64encode(chunk).decode('ascii'),
+        'encoding': 'base64',
+        'byte_offset': byte_offset,
+        'length': len(chunk),
+        'size_bytes': size_bytes,
+        'has_more': has_more,
+        'next_byte_offset': byte_offset + len(chunk) if has_more else None,
+        'max_bytes': max_bytes,
+    }}))
 else:
+    lines = []
+    output_bytes = 0
+    end_line = offset - 1
+    truncated = False
+    next_offset = None
     with open(path, 'r', encoding='utf-8', errors='replace') as f:
-        print(json.dumps({{'ok': True, 'content': f.read()}}))
+        for line_number, line in enumerate(f, 1):
+            if line_number < offset:
+                continue
+            if len(lines) >= max_lines:
+                truncated = True
+                next_offset = line_number
+                break
+            line_bytes = len(line.encode('utf-8'))
+            if output_bytes + line_bytes > max_bytes:
+                truncated = True
+                next_offset = line_number
+                break
+            lines.append(line.rstrip('\\n'))
+            output_bytes += line_bytes
+            end_line = line_number
+    print(json.dumps({{
+        'ok': True,
+        'content': '\\n'.join(lines),
+        'truncated': truncated,
+        'start_line': offset,
+        'end_line': end_line,
+        'next_offset': next_offset,
+        'max_lines': max_lines,
+        'max_bytes': max_bytes,
+    }}))
 """.strip()
         return await self._run_workspace_file_script(script, query)
 
-    async def _write_workspace_via_box(self, path: str, content: str, query: pipeline_query.Query) -> dict:
+    async def _write_workspace_via_box(
+        self,
+        path: str,
+        content: str,
+        parameters: dict,
+        query: pipeline_query.Query,
+    ) -> dict:
+        encoding, mode = self._write_options(parameters)
         script = f"""
-import json, os
+import base64, json, os
 path = {json.dumps(path)}
 content = {json.dumps(content)}
+encoding = {json.dumps(encoding)}
+mode = {json.dumps(mode)}
 if not path.startswith('/workspace'):
     print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
 else:
     os.makedirs(os.path.dirname(path) or '/workspace', exist_ok=True)
-    with open(path, 'w', encoding='utf-8') as f:
-        f.write(content)
-    print(json.dumps({{'ok': True, 'path': path}}))
+    if encoding == 'base64':
+        try:
+            data = base64.b64decode(content, validate=True)
+        except Exception as exc:
+            print(json.dumps({{'ok': False, 'error': f'invalid base64 content: {{exc}}'}}))
+        else:
+            with open(path, 'ab' if mode == 'append' else 'wb') as f:
+                f.write(data)
+            print(json.dumps({{'ok': True, 'path': path}}))
+    else:
+        with open(path, 'a' if mode == 'append' else 'w', encoding='utf-8') as f:
+            f.write(content)
+        print(json.dumps({{'ok': True, 'path': path}}))
 """.strip()
         return await self._run_workspace_file_script(script, query)
 
@@ -307,12 +395,27 @@ else:
         if not any(part in skip_dirs for part in item.parts)
     ]
     hits.sort(key=lambda item: item.stat().st_mtime if item.exists() else 0, reverse=True)
-    shown = hits[:100]
+    shown = hits[:{_GLOB_MAX_MATCHES}]
     matches = []
+    output_bytes = 0
+    truncated_by_bytes = False
     for item in shown:
         rel = os.path.relpath(str(item), path)
-        matches.append(os.path.join(path, rel).replace(os.sep, '/'))
-    print(json.dumps({{'ok': True, 'matches': matches, 'total': len(hits), 'truncated': len(hits) > 100}}))
+        sandbox_path = os.path.join(path, rel).replace(os.sep, '/')
+        entry_bytes = len(sandbox_path.encode('utf-8')) + (1 if matches else 0)
+        if output_bytes + entry_bytes > {_DEFAULT_TOOL_RESULT_MAX_BYTES}:
+            truncated_by_bytes = True
+            break
+        matches.append(sandbox_path)
+        output_bytes += entry_bytes
+    print(json.dumps({{
+        'ok': True,
+        'matches': matches,
+        'preview': '\\n'.join(matches),
+        'total': len(hits),
+        'truncated': len(hits) > len(matches) or truncated_by_bytes,
+        'truncated_by': 'bytes' if truncated_by_bytes else ('matches' if len(hits) > len(matches) else None),
+    }}))
 """.strip()
         return await self._run_workspace_file_script(script, query)
 
@@ -350,29 +453,54 @@ else:
                     continue
                 if item.is_file():
                     files.append(item)
-                if len(files) >= 5000:
+                if len(files) >= {_GREP_MAX_FILES}:
                     break
 
         matches = []
+        output_bytes = 0
+        truncated_by = None
         for fp in files:
             try:
-                text = fp.read_text(errors='ignore')
+                handle = fp.open('r', encoding='utf-8', errors='ignore')
             except OSError:
                 continue
-            for lineno, line in enumerate(text.splitlines(), 1):
-                if regex.search(line):
-                    if base.is_file():
-                        file_path = path
-                    else:
-                        rel = os.path.relpath(str(fp), path)
-                        file_path = os.path.join(path, rel).replace(os.sep, '/')
-                    matches.append({{'file': file_path, 'line': lineno, 'content': line.rstrip()}})
-                    if len(matches) >= 200:
-                        break
-            if len(matches) >= 200:
+            with handle:
+                for lineno, line in enumerate(handle, 1):
+                    if regex.search(line):
+                        if base.is_file():
+                            file_path = path
+                        else:
+                            rel = os.path.relpath(str(fp), path)
+                            file_path = os.path.join(path, rel).replace(os.sep, '/')
+                        content = line.rstrip()
+                        line_truncated = False
+                        if len(content) > {_GREP_MAX_LINE_CHARS}:
+                            content = content[:{_GREP_MAX_LINE_CHARS}] + '... [truncated]'
+                            line_truncated = True
+                        entry = {{'file': file_path, 'line': lineno, 'content': content}}
+                        entry_bytes = len(json.dumps(entry, ensure_ascii=False).encode('utf-8')) + 1
+                        if output_bytes + entry_bytes > {_DEFAULT_TOOL_RESULT_MAX_BYTES}:
+                            truncated_by = 'bytes'
+                            break
+                        if line_truncated and truncated_by is None:
+                            truncated_by = 'line'
+                        matches.append(entry)
+                        output_bytes += entry_bytes
+                        if len(matches) >= {_GREP_MAX_MATCHES}:
+                            truncated_by = truncated_by or 'matches'
+                            break
+                if truncated_by == 'bytes' or len(matches) >= {_GREP_MAX_MATCHES}:
+                    break
+            if truncated_by == 'bytes' or len(matches) >= {_GREP_MAX_MATCHES}:
                 break
 
-        print(json.dumps({{'ok': True, 'matches': matches, 'total': len(matches), 'truncated': len(matches) >= 200}}))
+        print(json.dumps({{
+            'ok': True,
+            'matches': matches,
+            'total': len(matches),
+            'truncated': truncated_by is not None,
+            'truncated_by': truncated_by,
+        }}))
 """.strip()
         return await self._run_workspace_file_script(script, query)
 
@@ -387,14 +515,20 @@ else:
         )
         if skill_request is not None and hasattr(self.ap.box_service, 'read_skill_file'):
             selected_skill, relative = skill_request
+            host_path = self._resolve_skill_host_path(selected_skill, relative)
+            if host_path and os.path.exists(host_path):
+                if os.path.isdir(host_path):
+                    return self._build_directory_result(os.listdir(host_path))
+                return self._read_text_file_preview(host_path, parameters)
+
             try:
                 result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative)
-                return {'ok': True, 'content': result.get('content', '')}
+                return self._build_read_result_from_text(str(result.get('content', '')), parameters)
             except Exception:
                 try:
                     result = await self.ap.box_service.list_skill_files(selected_skill['name'], relative)
                     entries = [entry['name'] for entry in result.get('entries', [])]
-                    return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
+                    return self._build_directory_result(entries)
                 except Exception as exc:
                     return {'ok': False, 'error': str(exc)}
 
@@ -405,20 +539,19 @@ else:
             include_activated=True,
         )
         if self._should_use_box_workspace_files(selected_skill):
-            return await self._read_workspace_via_box(path, query)
+            return await self._read_workspace_via_box(path, parameters, query)
         if not os.path.exists(host_path):
             return {'ok': False, 'error': f'File not found: {path}'}
         if os.path.isdir(host_path):
             entries = os.listdir(host_path)
-            return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
-        with open(host_path, 'r', errors='replace') as f:
-            content = f.read()
-        return {'ok': True, 'content': content}
+            return self._build_directory_result(entries)
+        return self._read_text_file_preview(host_path, parameters)
 
     async def _invoke_write(self, parameters: dict, query: pipeline_query.Query) -> dict:
         path = parameters['path']
         content = parameters['content']
         self.ap.logger.info(f'write tool invoked: query_id={query.query_id} path={path} length={len(content)}')
+        encoding, _mode = self._write_options(parameters)
         skill_request = self._resolve_skill_relative_path(
             query,
             path,
@@ -426,6 +559,8 @@ else:
             include_activated=True,
         )
         if skill_request is not None and hasattr(self.ap.box_service, 'write_skill_file'):
+            if encoding != 'text':
+                return {'ok': False, 'error': 'base64 writes to skill packages are not supported.'}
             selected_skill, relative = skill_request
             await self.ap.box_service.write_skill_file(selected_skill['name'], relative, content)
             await self.ap.skill_mgr.reload_skills()
@@ -438,10 +573,12 @@ else:
             include_activated=True,
         )
         if self._should_use_box_workspace_files(selected_skill):
-            return await self._write_workspace_via_box(path, content, query)
+            return await self._write_workspace_via_box(path, content, parameters, query)
         os.makedirs(os.path.dirname(host_path), exist_ok=True)
-        with open(host_path, 'w', encoding='utf-8') as f:
-            f.write(content)
+        try:
+            self._write_host_file(host_path, content, parameters)
+        except ValueError as exc:
+            return {'ok': False, 'error': str(exc)}
         self._refresh_skill_from_disk(selected_skill)
         return {'ok': True, 'path': path}
 
@@ -584,6 +721,40 @@ else:
                         'type': 'string',
                         'description': 'Absolute path to the file (must be under /workspace).',
                     },
+                    'offset': {
+                        'type': 'integer',
+                        'description': '1-indexed line number to start reading from. Defaults to 1.',
+                        'default': 1,
+                        'minimum': 1,
+                    },
+                    'limit': {
+                        'type': 'integer',
+                        'description': f'Maximum number of lines to return. Defaults to {_DEFAULT_READ_MAX_LINES}.',
+                        'default': _DEFAULT_READ_MAX_LINES,
+                        'minimum': 1,
+                        'maximum': _MAX_READ_MAX_LINES,
+                    },
+                    'max_bytes': {
+                        'type': 'integer',
+                        'description': (
+                            f'Maximum bytes of file content to return. Defaults to {_DEFAULT_TOOL_RESULT_MAX_BYTES}.'
+                        ),
+                        'default': _DEFAULT_TOOL_RESULT_MAX_BYTES,
+                        'minimum': 1,
+                        'maximum': _DEFAULT_TOOL_RESULT_MAX_BYTES,
+                    },
+                    'encoding': {
+                        'type': 'string',
+                        'description': 'Return text by default, or base64 for binary byte-range reads.',
+                        'enum': ['text', 'base64'],
+                        'default': 'text',
+                    },
+                    'byte_offset': {
+                        'type': 'integer',
+                        'description': '0-indexed byte offset used when encoding is base64. Defaults to 0.',
+                        'default': 0,
+                        'minimum': 0,
+                    },
                 },
                 'required': ['path'],
                 'additionalProperties': False,
@@ -609,7 +780,19 @@ else:
                     },
                     'content': {
                         'type': 'string',
-                        'description': 'Content to write to the file.',
+                        'description': 'Text content, or base64 content when encoding is base64.',
+                    },
+                    'encoding': {
+                        'type': 'string',
+                        'description': 'Write content as text by default, or decode it from base64 for binary files.',
+                        'enum': ['text', 'base64'],
+                        'default': 'text',
+                    },
+                    'mode': {
+                        'type': 'string',
+                        'description': 'Overwrite the file by default, or append to it.',
+                        'enum': ['overwrite', 'append'],
+                        'default': 'overwrite',
                     },
                 },
                 'required': ['path', 'content'],
@@ -740,22 +923,30 @@ else:
         hits.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0, reverse=True)
 
         total = len(hits)
-        shown = hits[:100]
+        shown = hits[:_GLOB_MAX_MATCHES]
 
         # Convert back to sandbox paths
         sandbox_paths = []
+        output_bytes = 0
+        truncated_by_bytes = False
         for h in shown:
             rel = os.path.relpath(str(h), host_path)
             sandbox_path = os.path.join(path, rel)
+            entry_bytes = len(sandbox_path.encode('utf-8')) + (1 if sandbox_paths else 0)
+            if output_bytes + entry_bytes > _DEFAULT_TOOL_RESULT_MAX_BYTES:
+                truncated_by_bytes = True
+                break
             sandbox_paths.append(sandbox_path)
+            output_bytes += entry_bytes
 
-        result_lines = sandbox_paths
-        result = '\n'.join(result_lines)
-
-        if total > 100:
-            result += f'\n... ({total} matches, showing first 100)'
-
-        return {'ok': True, 'matches': result_lines, 'total': total, 'truncated': total > 100}
+        return {
+            'ok': True,
+            'matches': sandbox_paths,
+            'preview': '\n'.join(sandbox_paths),
+            'total': total,
+            'truncated': total > len(sandbox_paths) or truncated_by_bytes,
+            'truncated_by': 'bytes' if truncated_by_bytes else ('matches' if total > len(sandbox_paths) else None),
+        }
 
     async def _invoke_grep(self, parameters: dict, query: pipeline_query.Query) -> dict:
         pattern = parameters['pattern']
@@ -791,32 +982,46 @@ else:
             files = self._grep_walk(base, include)
 
         matches = []
+        output_bytes = 0
+        truncated_by = None
         for fp in files:
             try:
-                text = fp.read_text(errors='ignore')
+                handle = fp.open('r', encoding='utf-8', errors='ignore')
             except OSError:
                 continue
-            for lineno, line in enumerate(text.splitlines(), 1):
-                if regex.search(line):
-                    rel = os.path.relpath(str(fp), host_path)
-                    sandbox_path = os.path.join(path, rel)
-                    matches.append(
-                        {
+            with handle:
+                for lineno, line in enumerate(handle, 1):
+                    if regex.search(line):
+                        rel = os.path.relpath(str(fp), host_path)
+                        sandbox_path = os.path.join(path, rel)
+                        content, line_truncated = self._truncate_grep_line(line.rstrip())
+                        entry = {
                             'file': sandbox_path,
                             'line': lineno,
-                            'content': line.rstrip(),
+                            'content': content,
                         }
-                    )
-                    if len(matches) >= 200:
-                        break
-            if len(matches) >= 200:
+                        entry_bytes = len(json.dumps(entry, ensure_ascii=False).encode('utf-8')) + 1
+                        if output_bytes + entry_bytes > _DEFAULT_TOOL_RESULT_MAX_BYTES:
+                            truncated_by = 'bytes'
+                            break
+                        if line_truncated and truncated_by is None:
+                            truncated_by = 'line'
+                        matches.append(entry)
+                        output_bytes += entry_bytes
+                        if len(matches) >= _GREP_MAX_MATCHES:
+                            truncated_by = truncated_by or 'matches'
+                            break
+                if truncated_by == 'bytes' or len(matches) >= _GREP_MAX_MATCHES:
+                    break
+            if truncated_by == 'bytes' or len(matches) >= _GREP_MAX_MATCHES:
                 break
 
         return {
             'ok': True,
             'matches': matches,
             'total': len(matches),
-            'truncated': len(matches) >= 200,
+            'truncated': truncated_by is not None,
+            'truncated_by': truncated_by,
         }
 
     @staticmethod
@@ -828,10 +1033,266 @@ else:
                 continue
             if item.is_file():
                 results.append(item)
-            if len(results) >= 5000:
+            if len(results) >= _GREP_MAX_FILES:
                 break
         return results
 
+    @staticmethod
+    def _resolve_skill_host_path(selected_skill: dict, relative: str) -> str | None:
+        package_root = str(selected_skill.get('package_root', '') or '').strip()
+        if not package_root:
+            return None
+
+        host_root = os.path.realpath(package_root)
+        host_path = os.path.realpath(os.path.join(host_root, relative))
+        if not (host_path == host_root or host_path.startswith(host_root + os.sep)):
+            raise ValueError('Path escapes the skill package boundary.')
+        return host_path
+
+    def _normalize_exec_result(self, result: dict) -> dict:
+        normalized = dict(result)
+        stdout = str(normalized.get('stdout') or '')
+        stderr = str(normalized.get('stderr') or '')
+        stdout, stdout_capped = self._truncate_text_to_bytes_with_flag(stdout, _DEFAULT_TOOL_RESULT_MAX_BYTES)
+        stderr, stderr_capped = self._truncate_text_to_bytes_with_flag(stderr, _DEFAULT_TOOL_RESULT_MAX_BYTES)
+        normalized['stdout'] = stdout
+        normalized['stderr'] = stderr
+        normalized['stdout_truncated'] = bool(normalized.get('stdout_truncated') or stdout_capped)
+        normalized['stderr_truncated'] = bool(normalized.get('stderr_truncated') or stderr_capped)
+
+        if stdout and stderr:
+            preview_raw = f'stdout:\n{stdout}\n\nstderr:\n{stderr}'
+        else:
+            preview_raw = stdout or stderr
+        preview, preview_capped = self._truncate_text_to_bytes_with_flag(preview_raw, _DEFAULT_TOOL_RESULT_MAX_BYTES)
+        normalized['preview'] = preview
+        normalized['truncated'] = bool(
+            normalized['stdout_truncated'] or normalized['stderr_truncated'] or preview_capped
+        )
+        if preview_capped and not normalized.get('truncated_by'):
+            normalized['truncated_by'] = 'bytes'
+        return normalized
+
+    def _build_directory_result(self, entries: list[str]) -> dict:
+        sorted_entries = sorted(str(entry) for entry in entries)
+        content = '\n'.join(sorted_entries)
+        preview = self._truncate_text_to_bytes(content, _DEFAULT_TOOL_RESULT_MAX_BYTES)
+        truncated = preview != content
+        return {
+            'ok': True,
+            'content': preview,
+            'is_directory': True,
+            'total': len(sorted_entries),
+            'truncated': truncated,
+            'truncated_by': 'bytes' if truncated else None,
+        }
+
+    def _read_text_file_preview(self, host_path: str, parameters: dict) -> dict:
+        if self._read_encoding(parameters) == 'base64':
+            return self._read_binary_file_chunk(host_path, parameters)
+
+        offset = self._positive_int(parameters.get('offset'), default=1)
+        max_lines = self._positive_int(
+            parameters.get('limit'),
+            default=_DEFAULT_READ_MAX_LINES,
+            max_value=_MAX_READ_MAX_LINES,
+        )
+        max_bytes = self._positive_int(
+            parameters.get('max_bytes'),
+            default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+            max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+        )
+        lines: list[str] = []
+        output_bytes = 0
+        end_line = offset - 1
+        truncated = False
+        truncated_by: str | None = None
+        next_offset: int | None = None
+
+        with open(host_path, 'r', encoding='utf-8', errors='replace') as f:
+            for line_number, line in enumerate(f, 1):
+                if line_number < offset:
+                    continue
+                if len(lines) >= max_lines:
+                    truncated = True
+                    truncated_by = 'lines'
+                    next_offset = line_number
+                    break
+
+                line_bytes = len(line.encode('utf-8'))
+                if output_bytes + line_bytes > max_bytes:
+                    truncated = True
+                    truncated_by = 'bytes'
+                    next_offset = line_number
+                    break
+
+                lines.append(line.rstrip('\n'))
+                output_bytes += line_bytes
+                end_line = line_number
+
+        if not lines and truncated_by == 'bytes':
+            content = (
+                f'[Line {next_offset or offset} exceeds the {self._format_size(max_bytes)} read limit. '
+                'Use exec with a byte-range command for this line, or read a different offset.]'
+            )
+        else:
+            content = '\n'.join(lines)
+
+        return {
+            'ok': True,
+            'content': content,
+            'truncated': truncated,
+            'truncated_by': truncated_by,
+            'start_line': offset,
+            'end_line': end_line,
+            'next_offset': next_offset,
+            'max_lines': max_lines,
+            'max_bytes': max_bytes,
+        }
+
+    def _read_binary_file_chunk(self, host_path: str, parameters: dict) -> dict:
+        byte_offset = self._non_negative_int(parameters.get('byte_offset'), default=0)
+        max_bytes = self._positive_int(
+            parameters.get('max_bytes'),
+            default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+            max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+        )
+        size_bytes = os.path.getsize(host_path)
+        with open(host_path, 'rb') as f:
+            f.seek(byte_offset)
+            data = f.read(max_bytes + 1)
+        chunk = data[:max_bytes]
+        has_more = len(data) > max_bytes
+        return {
+            'ok': True,
+            'content': base64.b64encode(chunk).decode('ascii'),
+            'encoding': 'base64',
+            'byte_offset': byte_offset,
+            'length': len(chunk),
+            'size_bytes': size_bytes,
+            'has_more': has_more,
+            'next_byte_offset': byte_offset + len(chunk) if has_more else None,
+            'max_bytes': max_bytes,
+        }
+
+    def _write_host_file(self, host_path: str, content: str, parameters: dict) -> None:
+        encoding, mode = self._write_options(parameters)
+        if encoding == 'base64':
+            try:
+                data = base64.b64decode(content, validate=True)
+            except Exception as exc:
+                raise ValueError(f'invalid base64 content: {exc}') from exc
+            with open(host_path, 'ab' if mode == 'append' else 'wb') as f:
+                f.write(data)
+            return
+        with open(host_path, 'a' if mode == 'append' else 'w', encoding='utf-8') as f:
+            f.write(content)
+
+    @staticmethod
+    def _read_encoding(parameters: dict) -> str:
+        return 'base64' if parameters.get('encoding') == 'base64' else 'text'
+
+    @staticmethod
+    def _write_options(parameters: dict) -> tuple[str, str]:
+        encoding = 'base64' if parameters.get('encoding') == 'base64' else 'text'
+        mode = 'append' if parameters.get('mode') == 'append' else 'overwrite'
+        return encoding, mode
+
+    def _build_read_result_from_text(self, content: str, parameters: dict) -> dict:
+        offset = self._positive_int(parameters.get('offset'), default=1)
+        max_lines = self._positive_int(
+            parameters.get('limit'),
+            default=_DEFAULT_READ_MAX_LINES,
+            max_value=_MAX_READ_MAX_LINES,
+        )
+        max_bytes = self._positive_int(
+            parameters.get('max_bytes'),
+            default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+            max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
+        )
+        all_lines = content.splitlines()
+        start_index = offset - 1
+        if start_index >= len(all_lines) and all_lines:
+            return {'ok': False, 'error': f'Offset {offset} is beyond end of file ({len(all_lines)} lines total)'}
+        output_lines: list[str] = []
+        output_bytes = 0
+        truncated = False
+        truncated_by: str | None = None
+        next_offset: int | None = None
+        for index, line in enumerate(all_lines[start_index:], start_index + 1):
+            if len(output_lines) >= max_lines:
+                truncated = True
+                truncated_by = 'lines'
+                next_offset = index
+                break
+            line_bytes = len(line.encode('utf-8')) + (1 if output_lines else 0)
+            if output_bytes + line_bytes > max_bytes:
+                truncated = True
+                truncated_by = 'bytes'
+                next_offset = index
+                break
+            output_lines.append(line)
+            output_bytes += line_bytes
+
+        end_line = offset + len(output_lines) - 1
+        return {
+            'ok': True,
+            'content': '\n'.join(output_lines),
+            'truncated': truncated,
+            'truncated_by': truncated_by,
+            'start_line': offset,
+            'end_line': end_line,
+            'next_offset': next_offset,
+            'max_lines': max_lines,
+            'max_bytes': max_bytes,
+        }
+
+    @staticmethod
+    def _positive_int(value, *, default: int, max_value: int | None = None) -> int:
+        try:
+            parsed = int(value)
+        except (TypeError, ValueError):
+            parsed = default
+        if parsed <= 0:
+            parsed = default
+        if max_value is not None:
+            parsed = min(parsed, max_value)
+        return parsed
+
+    @staticmethod
+    def _non_negative_int(value, *, default: int) -> int:
+        try:
+            parsed = int(value)
+        except (TypeError, ValueError):
+            parsed = default
+        return parsed if parsed >= 0 else default
+
+    @staticmethod
+    def _truncate_grep_line(line: str) -> tuple[str, bool]:
+        if len(line) <= _GREP_MAX_LINE_CHARS:
+            return line, False
+        return f'{line[:_GREP_MAX_LINE_CHARS]}... [truncated]', True
+
+    @staticmethod
+    def _truncate_text_to_bytes(text: str, max_bytes: int) -> str:
+        return NativeToolLoader._truncate_text_to_bytes_with_flag(text, max_bytes)[0]
+
+    @staticmethod
+    def _truncate_text_to_bytes_with_flag(text: str, max_bytes: int) -> tuple[str, bool]:
+        data = text.encode('utf-8')
+        if len(data) <= max_bytes:
+            return text, False
+        truncated = data[:max_bytes]
+        while truncated and (truncated[-1] & 0xC0) == 0x80:
+            truncated = truncated[:-1]
+        return truncated.decode('utf-8', errors='ignore'), True
+
+    @staticmethod
+    def _format_size(bytes_count: int) -> str:
+        if bytes_count < 1024:
+            return f'{bytes_count}B'
+        return f'{bytes_count / 1024:.1f}KB'
+
     def _summarize_parameters(self, parameters: dict) -> dict:
         summary = dict(parameters)
         cmd = str(summary.get('command', '')).strip()
diff --git a/src/langbot/pkg/provider/tools/loaders/skill.py b/src/langbot/pkg/provider/tools/loaders/skill.py
index 9df94fd2..b62f3e7d 100644
--- a/src/langbot/pkg/provider/tools/loaders/skill.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill.py
@@ -72,6 +72,45 @@ def register_activated_skill(query: pipeline_query.Query, skill_data: dict) -> N
         activated[skill_name] = skill_data
 
 
+def normalize_skill_names(value: typing.Any) -> list[str]:
+    """Return a de-duplicated list of non-empty skill names."""
+    if not isinstance(value, list):
+        return []
+
+    names: list[str] = []
+    for item in value:
+        skill_name = str(item or '').strip()
+        if skill_name and skill_name not in names:
+            names.append(skill_name)
+    return names
+
+
+def get_activated_skill_names(query: pipeline_query.Query) -> list[str]:
+    """Return activated skill names for callers that own persistence policy."""
+    return normalize_skill_names(list(get_activated_skills(query).keys()))
+
+
+def restore_activated_skills(
+    ap: app.Application,
+    query: pipeline_query.Query,
+    skill_names: typing.Any,
+) -> list[str]:
+    """Restore caller-provided activated skill names into Query variables.
+
+    Persistence and state scope ownership belong to higher-level flows. This
+    helper only rebuilds current Query state from pipeline-visible skills, so
+    removed or unbound skills stay unavailable to native exec/write/edit.
+    """
+    restored: list[str] = []
+    for skill_name in normalize_skill_names(skill_names):
+        skill_data = get_visible_skill(ap, query, skill_name)
+        if skill_data is None:
+            continue
+        register_activated_skill(query, skill_data)
+        restored.append(skill_name)
+    return restored
+
+
 def parse_skill_mount_path(sandbox_path: str) -> tuple[str | None, str]:
     normalized_path = str(sandbox_path or '/workspace').strip() or '/workspace'
     if normalized_path == SKILL_MOUNT_PREFIX:
diff --git a/src/langbot/pkg/provider/tools/loaders/skill_authoring.py b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
index 9d0fe6e9..d5372178 100644
--- a/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
@@ -6,6 +6,7 @@ import typing
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 
 from .. import loader
+from .availability import is_box_backend_available
 
 # Align with Claude Code's Skill tool design:
 # - activate: Activate a skill via Tool Call, returns SKILL.md content
@@ -45,18 +46,7 @@ class SkillToolLoader(loader.ToolLoader):
 
     async def _check_sandbox_available(self) -> bool:
         """Check if the box backend is truly available (not just the runtime)."""
-        box_service = getattr(self.ap, 'box_service', None)
-        if box_service is None:
-            return False
-        if not getattr(box_service, 'available', False):
-            return False
-        # Check if backend is truly available via get_status
-        try:
-            status = await box_service.get_status()
-            backend_info = status.get('backend', {})
-            return backend_info.get('available', False)
-        except Exception:
-            return False
+        return await is_box_backend_available(self.ap)
 
     async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
         if not self._is_available():
@@ -92,16 +82,15 @@ class SkillToolLoader(loader.ToolLoader):
         if not skill_name:
             raise ValueError('skill_name is required')
 
-        skill_mgr = self.ap.skill_mgr
-        skill_data = skill_mgr.get_skill_by_name(skill_name)
+        from . import skill as skill_loader
+
+        skill_data = skill_loader.get_visible_skill(self.ap, query, skill_name)
         if skill_data is None:
-            visible_skills = getattr(skill_mgr, 'skills', {})
+            visible_skills = skill_loader.get_visible_skills(self.ap, query)
             available_names = ', '.join(sorted(visible_skills.keys())) or 'none'
             raise ValueError(f'Skill "{skill_name}" not found. Available skills: {available_names}')
 
         # Register activated skill for sandbox mount path resolution
-        from . import skill as skill_loader
-
         skill_loader.register_activated_skill(query, skill_data)
 
         # Return SKILL.md content as Tool Result (injects into context)
@@ -127,6 +116,7 @@ class SkillToolLoader(loader.ToolLoader):
             'activated': True,
             'skill_name': skill_name,
             'mount_path': mount_path,
+            'activated_skill_names': skill_loader.get_activated_skill_names(query),
             'content': result_content,
         }
 
@@ -201,13 +191,13 @@ class SkillToolLoader(loader.ToolLoader):
         return resource_tool.LLMTool(
             name=ACTIVATE_SKILL_TOOL_NAME,
             human_desc='Activate a skill',
-            description=self._build_activate_tool_description(),
+            description='Activate a pipeline-visible skill by name and return its instructions as a tool result.',
             parameters={
                 'type': 'object',
                 'properties': {
                     'skill_name': {
                         'type': 'string',
-                        'description': 'The skill name to activate (no arguments). E.g., "pdf" or "data-analysis"',
+                        'description': 'The skill name to activate.',
                     },
                 },
                 'required': ['skill_name'],
@@ -255,50 +245,3 @@ class SkillToolLoader(loader.ToolLoader):
             },
             func=lambda parameters: parameters,
         )
-
-    def _build_activate_tool_description(self) -> str:
-        """Build tool description with embedded available_skills list."""
-        skill_mgr = getattr(self.ap, 'skill_mgr', None)
-        if skill_mgr is None:
-            return 'Activate a skill. No skills are currently available.'
-
-        skills = getattr(skill_mgr, 'skills', {})
-        if not skills:
-            return 'Activate a skill. No skills are currently available.'
-
-        # Build <available_skills> section
-        available_skills_lines = ['<available_skills>']
-        for skill_name, skill_data in sorted(skills.items()):
-            description = skill_data.get('description', '')
-            available_skills_lines.append('<skill>')
-            available_skills_lines.append(f'<name>{skill_name}</name>')
-            available_skills_lines.append(f'<description>{description}</description>')
-            available_skills_lines.append('</skill>')
-        available_skills_lines.append('</available_skills>')
-
-        available_skills_block = '\n'.join(available_skills_lines)
-
-        return f"""Activate a skill within the main conversation.
-
-<skills_instructions>
-When users ask you to perform tasks, check if any of the available skills
-below can help complete the task more effectively. Skills provide specialized
-capabilities and domain knowledge.
-
-How to use skills:
-- Invoke skills using this tool with the skill name only (no arguments)
-- When you invoke a skill, you will see <command-message>
-The skill is activated
-</command-message>
-- The skill's instructions will be provided in the tool result
-- Examples:
-  - skill_name: "pdf" - invoke the pdf skill
-  - skill_name: "data-analysis" - invoke the data-analysis skill
-
-Important:
-- Only use skills listed in <available_skills> below
-- Do not invoke a skill that is already running
-- To create a new skill: prepare it in /workspace, then use register_skill tool
-</skills_instructions>
-
-{available_skills_block}"""
diff --git a/tests/unit_tests/box/test_workspace.py b/tests/unit_tests/box/test_workspace.py
index 809347e5..e4620ad3 100644
--- a/tests/unit_tests/box/test_workspace.py
+++ b/tests/unit_tests/box/test_workspace.py
@@ -54,7 +54,9 @@ def test_classify_python_workspace_detects_package_and_requirements():
 def test_wrap_python_command_with_env_contains_bootstrap_and_command():
     command = wrap_python_command_with_env('python script.py')
 
-    assert 'python -m venv "$_LB_VENV_DIR"' in command
+    assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
+    assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
+    assert 'kill -0 "$_LB_LOCK_OWNER"' in command
     assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
     assert command.rstrip().endswith('python script.py')
 
diff --git a/tests/unit_tests/provider/test_mcp_box_integration.py b/tests/unit_tests/provider/test_mcp_box_integration.py
index 3e3a7a4d..74cd2487 100644
--- a/tests/unit_tests/provider/test_mcp_box_integration.py
+++ b/tests/unit_tests/provider/test_mcp_box_integration.py
@@ -180,7 +180,7 @@ class TestMCPServerBoxConfig:
         assert cfg.host_path is None
         assert cfg.host_path_mode == 'ro'
         assert cfg.env == {}
-        assert cfg.startup_timeout_sec == 120
+        assert cfg.startup_timeout_sec == 300
         assert cfg.cpus is None
         assert cfg.memory_mb is None
         assert cfg.pids_limit is None
@@ -494,6 +494,84 @@ class TestBuildBoxProcessPayload:
         assert payload['args'] == ['/opt/other/server.py', '--flag']
 
 
+# ── Python Workspace Preparation ────────────────────────────────────
+
+
+class TestPythonWorkspacePreparation:
+    def test_requirements_workspace_uses_venv_bootstrap(self, mcp_module, tmp_path):
+        host_path = tmp_path / 'mcp-source'
+        host_path.mkdir()
+        (host_path / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
+
+        command = mcp_module.BoxStdioSessionRuntime.detect_install_command(
+            str(host_path),
+            '/workspace/.mcp/u1/workspace',
+        )
+
+        assert command is not None
+        assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
+        assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
+        assert 'python -m pip install -r "/workspace/.mcp/u1/workspace/requirements.txt"' in command
+        assert 'pip install --no-cache-dir -r' not in command
+
+    def test_staging_refresh_removes_stale_source_files_but_preserves_runtime_dirs(self, mcp_module, tmp_path):
+        source = tmp_path / 'source'
+        source.mkdir()
+        (source / 'server.py').write_text('print("new")\n', encoding='utf-8')
+        (source / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
+        (source / '.env').write_text('TOKEN=new\n', encoding='utf-8')
+
+        process_root = tmp_path / 'shared' / '.mcp' / 'u1'
+        workspace = process_root / 'workspace'
+        (workspace / '.venv' / 'bin').mkdir(parents=True)
+        (workspace / '.venv' / 'bin' / 'python').write_text('', encoding='utf-8')
+        (workspace / '.langbot').mkdir()
+        (workspace / '.langbot' / 'python-env.lock').mkdir()
+        (workspace / '.env').write_text('TOKEN=old\n', encoding='utf-8')
+        (workspace / 'server.py').write_text('print("old")\n', encoding='utf-8')
+        (workspace / 'removed.py').write_text('stale\n', encoding='utf-8')
+        (workspace / 'removed_dir').mkdir()
+        (workspace / 'removed_dir' / 'old.txt').write_text('stale\n', encoding='utf-8')
+
+        mcp_module.BoxStdioSessionRuntime._copy_workspace_tree(str(source), str(process_root), str(workspace))
+
+        assert (workspace / 'server.py').read_text(encoding='utf-8') == 'print("new")\n'
+        assert (workspace / 'requirements.txt').read_text(encoding='utf-8') == 'mcp==1.26.0\n'
+        assert (workspace / '.env').read_text(encoding='utf-8') == 'TOKEN=new\n'
+        assert not (workspace / 'removed.py').exists()
+        assert not (workspace / 'removed_dir').exists()
+        assert (workspace / '.venv' / 'bin' / 'python').exists()
+        assert (workspace / '.langbot' / 'python-env.lock').is_dir()
+
+    def test_staging_refresh_ignores_unlink_race(self, mcp_module, tmp_path, monkeypatch):
+        mcp_stdio_module = sys.modules['langbot.pkg.provider.tools.loaders.mcp_stdio']
+
+        source = tmp_path / 'source'
+        source.mkdir()
+        (source / 'server.py').write_text('print("new")\n', encoding='utf-8')
+
+        process_root = tmp_path / 'shared' / '.mcp' / 'u1'
+        workspace = process_root / 'workspace'
+        workspace.mkdir(parents=True)
+        stale_file = workspace / 'removed.py'
+        stale_file.write_text('stale\n', encoding='utf-8')
+
+        real_unlink = os.unlink
+
+        def unlink_with_race(path):
+            if os.fspath(path) == str(stale_file):
+                real_unlink(path)
+                raise FileNotFoundError(path)
+            real_unlink(path)
+
+        monkeypatch.setattr(mcp_stdio_module.os, 'unlink', unlink_with_race)
+
+        mcp_module.BoxStdioSessionRuntime._copy_workspace_tree(str(source), str(process_root), str(workspace))
+
+        assert not stale_file.exists()
+        assert (workspace / 'server.py').read_text(encoding='utf-8') == 'print("new")\n'
+
+
 # ── get_runtime_info_dict ───────────────────────────────────────────
 
 
diff --git a/tests/unit_tests/provider/test_skill_tools.py b/tests/unit_tests/provider/test_skill_tools.py
index 847480c1..9db7b945 100644
--- a/tests/unit_tests/provider/test_skill_tools.py
+++ b/tests/unit_tests/provider/test_skill_tools.py
@@ -193,6 +193,29 @@ class TestSkillPathHelpers:
 
         assert list(result.keys()) == ['visible']
 
+    def test_restore_activated_skills_uses_caller_provided_names_and_visibility(self):
+        from langbot.pkg.provider.tools.loaders.skill import (
+            ACTIVATED_SKILLS_KEY,
+            PIPELINE_BOUND_SKILLS_KEY,
+            get_activated_skill_names,
+            restore_activated_skills,
+        )
+
+        ap = _make_ap()
+        ap.skill_mgr = SimpleNamespace(
+            skills={
+                'visible': _make_skill_data(name='visible'),
+                'hidden': _make_skill_data(name='hidden'),
+            }
+        )
+        query = SimpleNamespace(variables={PIPELINE_BOUND_SKILLS_KEY: ['visible']})
+
+        restored = restore_activated_skills(ap, query, ['visible', 'hidden', 'visible', ''])
+
+        assert restored == ['visible']
+        assert list(query.variables[ACTIVATED_SKILLS_KEY].keys()) == ['visible']
+        assert get_activated_skill_names(query) == ['visible']
+
     def test_resolve_virtual_skill_path_allows_visible_skill_reads(self):
         from langbot.pkg.provider.tools.loaders.skill import (
             PIPELINE_BOUND_SKILLS_KEY,
@@ -245,7 +268,8 @@ class TestSkillPathHelpers:
 
         command = wrap_skill_command_with_python_env('python scripts/run.py')
 
-        assert 'python -m venv "$_LB_VENV_DIR"' in command
+        assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
+        assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
         assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
         assert command.rstrip().endswith('python scripts/run.py')
 
@@ -281,6 +305,7 @@ class TestSkillToolLoader:
         assert result['activated'] is True
         assert result['skill_name'] == 'demo'
         assert result['mount_path'] == '/workspace/.skills/demo'
+        assert result['activated_skill_names'] == ['demo']
         assert 'Step 1' in result['content']
         assert set(query.variables[ACTIVATED_SKILLS_KEY].keys()) == {'demo'}
 
@@ -456,7 +481,9 @@ class TestNativeToolLoaderSkillPaths:
                 SimpleNamespace(query_id='q1', variables={PIPELINE_BOUND_SKILLS_KEY: ['demo']}),
             )
 
-            assert result == {'ok': True, 'content': 'demo instructions'}
+            assert result['ok'] is True
+            assert result['content'] == 'demo instructions'
+            assert result['truncated'] is False
 
     @pytest.mark.asyncio
     async def test_exec_in_activated_skill_mount_rewrites_command_and_refreshes(self):
@@ -485,7 +512,7 @@ class TestNativeToolLoaderSkillPaths:
                 query,
             )
 
-            assert result == {'ok': True}
+            assert result['ok'] is True
             tool_parameters = ap.box_service.execute_tool.await_args.args[0]
             assert tool_parameters['command'] == 'python /workspace/.skills/demo/scripts/run.py'
             assert tool_parameters['workdir'] == '/workspace/.skills/demo'
diff --git a/tests/unit_tests/provider/test_tool_manager_native.py b/tests/unit_tests/provider/test_tool_manager_native.py
index 117a20fd..01e044e5 100644
--- a/tests/unit_tests/provider/test_tool_manager_native.py
+++ b/tests/unit_tests/provider/test_tool_manager_native.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import base64
 import os
 import tempfile
 from types import SimpleNamespace
@@ -189,6 +190,78 @@ async def test_write_creates_subdirectories():
             assert f.read() == 'nested'
 
 
+@pytest.mark.asyncio
+async def test_read_binary_file_as_base64_chunk():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        with open(os.path.join(tmpdir, 'blob.bin'), 'wb') as f:
+            f.write(b'\x00\x01\x02\x03\x04')
+
+        result = await loader.invoke_tool(
+            'read',
+            {
+                'path': '/workspace/blob.bin',
+                'encoding': 'base64',
+                'byte_offset': 1,
+                'max_bytes': 2,
+            },
+            _make_query(),
+        )
+
+        assert result['ok'] is True
+        assert result['content'] == base64.b64encode(b'\x01\x02').decode('ascii')
+        assert result['encoding'] == 'base64'
+        assert result['byte_offset'] == 1
+        assert result['length'] == 2
+        assert result['size_bytes'] == 5
+        assert result['has_more'] is True
+        assert result['next_byte_offset'] == 3
+
+
+@pytest.mark.asyncio
+async def test_write_base64_file_append():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+
+        first = base64.b64encode(b'\x00\x01').decode('ascii')
+        second = base64.b64encode(b'\x02\x03').decode('ascii')
+        await loader.invoke_tool(
+            'write',
+            {'path': '/workspace/blob.bin', 'content': first, 'encoding': 'base64'},
+            _make_query(),
+        )
+        result = await loader.invoke_tool(
+            'write',
+            {
+                'path': '/workspace/blob.bin',
+                'content': second,
+                'encoding': 'base64',
+                'mode': 'append',
+            },
+            _make_query(),
+        )
+
+        assert result['ok'] is True
+        with open(os.path.join(tmpdir, 'blob.bin'), 'rb') as f:
+            assert f.read() == b'\x00\x01\x02\x03'
+
+
+@pytest.mark.asyncio
+async def test_write_base64_rejects_invalid_content():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+
+        result = await loader.invoke_tool(
+            'write',
+            {'path': '/workspace/blob.bin', 'content': 'not base64!', 'encoding': 'base64'},
+            _make_query(),
+        )
+
+        assert result['ok'] is False
+        assert 'invalid base64' in result['error']
+        assert not os.path.exists(os.path.join(tmpdir, 'blob.bin'))
+
+
 @pytest.mark.asyncio
 async def test_edit_replaces_unique_string():
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -248,3 +321,135 @@ async def test_path_escape_blocked():
 
         with pytest.raises(ValueError, match='escapes'):
             await loader.invoke_tool('read', {'path': '/workspace/../../etc/passwd'}, _make_query())
+
+
+@pytest.mark.asyncio
+async def test_box_availability_helper_handles_unavailable_and_errors():
+    from langbot.pkg.provider.tools.loaders.availability import is_box_backend_available
+
+    assert await is_box_backend_available(SimpleNamespace()) is False
+    assert await is_box_backend_available(SimpleNamespace(box_service=SimpleNamespace(available=False))) is False
+
+    unavailable_backend = SimpleNamespace(
+        available=True,
+        get_status=AsyncMock(return_value={'backend': {'available': False}}),
+    )
+    assert await is_box_backend_available(SimpleNamespace(box_service=unavailable_backend)) is False
+
+    failing_backend = SimpleNamespace(
+        available=True,
+        get_status=AsyncMock(side_effect=RuntimeError('box unavailable')),
+    )
+    assert await is_box_backend_available(SimpleNamespace(box_service=failing_backend)) is False
+
+
+@pytest.mark.asyncio
+async def test_read_file_supports_offset_limit_and_truncation_metadata():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        with open(os.path.join(tmpdir, 'lines.txt'), 'w', encoding='utf-8') as f:
+            f.write('one\ntwo\nthree\nfour\n')
+
+        result = await loader.invoke_tool(
+            'read',
+            {'path': '/workspace/lines.txt', 'offset': 2, 'limit': 2},
+            _make_query(),
+        )
+
+        assert result == {
+            'ok': True,
+            'content': 'two\nthree',
+            'truncated': True,
+            'truncated_by': 'lines',
+            'start_line': 2,
+            'end_line': 3,
+            'next_offset': 4,
+            'max_lines': 2,
+            'max_bytes': 50 * 1024,
+        }
+
+
+@pytest.mark.asyncio
+async def test_read_file_handles_line_larger_than_byte_limit():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        with open(os.path.join(tmpdir, 'long-line.txt'), 'w', encoding='utf-8') as f:
+            f.write('abcdef\n')
+
+        result = await loader.invoke_tool(
+            'read',
+            {'path': '/workspace/long-line.txt', 'max_bytes': 3},
+            _make_query(),
+        )
+
+        assert result['ok'] is True
+        assert result['truncated'] is True
+        assert result['truncated_by'] == 'bytes'
+        assert result['next_offset'] == 1
+        assert 'exceeds the 3B read limit' in result['content']
+
+
+@pytest.mark.asyncio
+async def test_exec_result_is_capped_and_exposes_preview_metadata():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        box_service = SimpleNamespace(
+            available=True,
+            default_workspace=tmpdir,
+            execute_tool=AsyncMock(
+                return_value={
+                    'ok': True,
+                    'stdout': 'a' * 60000,
+                    'stderr': 'b' * 60000,
+                    'exit_code': 0,
+                }
+            ),
+        )
+        loader = NativeToolLoader(SimpleNamespace(box_service=box_service, logger=Mock()))
+
+        result = await loader.invoke_tool('exec', {'command': 'python -V'}, _make_query())
+
+        assert result['ok'] is True
+        assert len(result['stdout'].encode('utf-8')) == 50 * 1024
+        assert len(result['stderr'].encode('utf-8')) == 50 * 1024
+        assert len(result['preview'].encode('utf-8')) == 50 * 1024
+        assert result['stdout_truncated'] is True
+        assert result['stderr_truncated'] is True
+        assert result['truncated'] is True
+        assert result['truncated_by'] == 'bytes'
+
+
+@pytest.mark.asyncio
+async def test_glob_caps_match_count_and_returns_preview():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        for index in range(105):
+            with open(os.path.join(tmpdir, f'file-{index:03d}.txt'), 'w', encoding='utf-8') as f:
+                f.write(str(index))
+
+        result = await loader.invoke_tool('glob', {'path': '/workspace', 'pattern': '*.txt'}, _make_query())
+
+        assert result['ok'] is True
+        assert result['total'] == 105
+        assert len(result['matches']) == 100
+        assert result['preview'] == '\n'.join(result['matches'])
+        assert result['truncated'] is True
+        assert result['truncated_by'] == 'matches'
+
+
+@pytest.mark.asyncio
+async def test_grep_reports_invalid_regex_and_truncates_long_matching_lines():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        loader, _ = _make_loader_with_workspace(tmpdir)
+        with open(os.path.join(tmpdir, 'data.txt'), 'w', encoding='utf-8') as f:
+            f.write('needle ' + ('x' * 600) + '\n')
+
+        invalid = await loader.invoke_tool('grep', {'path': '/workspace', 'pattern': '['}, _make_query())
+        result = await loader.invoke_tool('grep', {'path': '/workspace', 'pattern': 'needle'}, _make_query())
+
+        assert invalid['ok'] is False
+        assert 'Invalid regex' in invalid['error']
+        assert result['ok'] is True
+        assert result['truncated'] is True
+        assert result['truncated_by'] == 'line'
+        assert result['matches'][0]['file'] == '/workspace/data.txt'
+        assert result['matches'][0]['content'].endswith('... [truncated]')