fix(tools): clear stale Python workspace env locks

fix(tools): bootstrap Python workspaces with available interpreter
fix(tools): harden agent runner tool runtimes
2026-06-14 09:46:03 +00:00 · 2026-06-14 11:32:10 +08:00 · 2026-06-14 11:32:10 +08:00 · 2026-06-14 11:32:10 +08:00 · 2026-06-14 11:29:57 +08:00 · 2026-06-14 11:12:29 +08:00
25 changed files with 967 additions and 193 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "langbot"
-version = "4.10.1"
+version = "4.10.2"
 description = "Production-grade platform for building agentic IM bots"
 readme = "README.md"
 license-files = ["LICENSE"]
--- a/src/langbot/init.py
+++ b/src/langbot/init.py
@@ -1,3 +1,3 @@
 """LangBot - Production-grade platform for building agentic IM bots"""
-__version__ = '4.10.1'
+__version__ = '4.10.2'
--- a/src/langbot/pkg/box/workspace.py
+++ b/src/langbot/pkg/box/workspace.py
@@ -146,13 +146,19 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
        _LB_PIP_CACHE_DIR="{mount_path}/.cache/pip"
        mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
        _LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"
        if [ -z "$_LB_SYSTEM_PYTHON" ]; then
          echo "python3 or python is required to prepare the workspace Python environment" >&2
          exit 127
        fi
        export TMPDIR="$_LB_TMP_DIR"
        export TEMP="$_LB_TMP_DIR"
        export TMP="$_LB_TMP_DIR"
        export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
        _lb_python_meta() {{
-          python - <<'PY'
+          "$_LB_SYSTEM_PYTHON" - <<'PY'
        import hashlib
        import json
        import os
@@ -198,18 +204,29 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
        fi
        if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
          if [ -d "$_LB_LOCK_DIR" ] && [ ! -f "$_LB_LOCK_DIR/pid" ]; then
            echo "Clearing stale Python environment lock without owner: $_LB_LOCK_DIR" >&2
            rm -rf "$_LB_LOCK_DIR" 2>/dev/null || true
          fi
          _LB_LOCK_WAIT=0
          while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
            if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
              echo "Timed out waiting for Python environment lock, clearing stale lock: $_LB_LOCK_DIR" >&2
              rm -rf "$_LB_LOCK_DIR" 2>/dev/null || true
              if mkdir "$_LB_LOCK_DIR" 2>/dev/null; then
                break
              fi
              echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
              exit 1
            fi
            sleep 1
            _LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
          done
          printf '%s\\n' "$$" > "$_LB_LOCK_DIR/pid" 2>/dev/null || true
          _lb_cleanup_lock() {{
-            rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
+            rm -rf "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
          }}
          trap _lb_cleanup_lock EXIT INT TERM
@@ -225,7 +242,7 @@ def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace'
          if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
            rm -rf "$_LB_VENV_DIR"
-            python -m venv "$_LB_VENV_DIR"
+            "$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"
            . "$_LB_VENV_DIR/bin/activate"
            python -m pip install --upgrade pip setuptools wheel
            if [ -f "{mount_path}/requirements.txt" ]; then
--- a/src/langbot/pkg/core/stages/load_config.py
+++ b/src/langbot/pkg/core/stages/load_config.py
@@ -202,6 +202,16 @@ class LoadConfigStage(stage.BootingStage):
                constants.instance_id = new_id
        constants.edition = ap.instance_config.data.get('system', {}).get('edition', 'community')
        # Instance creation timestamp: sourced from data/labels/instance_id.json.
        # Instances created before this field existed (or supplied via
        # system.instance_id) won't have it, so backfill with the current time
        # and persist it via the dump below — from then on it stays stable.
        instance_create_ts = ap.instance_id.data.get('instance_create_ts', 0)
        if not isinstance(instance_create_ts, int) or instance_create_ts <= 0:
            instance_create_ts = int(time.time())
            ap.instance_id.data['instance_create_ts'] = instance_create_ts
        constants.instance_create_ts = instance_create_ts
        print(f'LangBot instance id: {constants.instance_id}')
        print(f'LangBot edition: {constants.edition}')
--- a/src/langbot/pkg/platform/sources/web_page_bot_adapter.py
+++ b/src/langbot/pkg/platform/sources/web_page_bot_adapter.py
@@ -84,6 +84,18 @@ class WebPageBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter
    ):
        self.listeners.pop(event_type, None)
    async def is_stream_output_supported(self) -> bool:
        """Delegate stream output check to ws_adapter."""
        if self._ws_adapter is not None:
            return await self._ws_adapter.is_stream_output_supported()
        return False
    async def create_message_card(self, message_id: str | int, event: platform_events.MessageEvent) -> bool:
        """Delegate create_message_card to ws_adapter."""
        if self._ws_adapter is not None:
            return await self._ws_adapter.create_message_card(message_id, event)
        return False
    async def is_muted(self, group_id: int) -> bool:
        return False
--- a/src/langbot/pkg/provider/modelmgr/requester.py
+++ b/src/langbot/pkg/provider/modelmgr/requester.py
@@ -12,6 +12,19 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 LLM_USAGE_QUERY_VARIABLE = '_llm_usage'
 STREAM_USAGE_QUERY_VARIABLE = '_stream_usage'
 def _store_llm_usage(query: pipeline_query.Query | None, usage_info: dict | None) -> None:
    """Store the latest provider usage on the query for upstream action handlers."""
    if query is None or not usage_info:
        return
    if query.variables is None:
        query.variables = {}
    query.variables[LLM_USAGE_QUERY_VARIABLE] = dict(usage_info)
 class RuntimeProvider:
    """运行时模型提供商"""
@@ -67,6 +80,7 @@ class RuntimeProvider:
            if isinstance(result, tuple):
                msg, usage_info = result
                if usage_info:
                    _store_llm_usage(query, usage_info)
                    input_tokens = usage_info.get('prompt_tokens', 0)
                    output_tokens = usage_info.get('completion_tokens', 0)
                return msg
@@ -146,11 +160,12 @@ class RuntimeProvider:
            if query:
                if query.variables is None:
                    query.variables = {}
-                if '_stream_usage' in query.variables:
+                if STREAM_USAGE_QUERY_VARIABLE in query.variables:
-                    usage_info = query.variables['_stream_usage']
+                    usage_info = query.variables[STREAM_USAGE_QUERY_VARIABLE]
                    _store_llm_usage(query, usage_info)
                    input_tokens = usage_info.get('prompt_tokens', 0)
                    output_tokens = usage_info.get('completion_tokens', 0)
-                    del query.variables['_stream_usage']
+                    del query.variables[STREAM_USAGE_QUERY_VARIABLE]
        except Exception as e:
            status = 'error'
            error_message = str(e)
--- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
@@ -262,32 +262,82 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
        - dict with the same keys
        - missing ``total_tokens`` (derived from prompt + completion)
        - ``None`` / partially-populated usage (defaults to 0)
        - provider-specific token details, including cache token counters
        """
        if usage is None:
            return {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}
-        def _get(key: str) -> typing.Any:
+        def _plain_value(value: typing.Any) -> typing.Any:
-            if isinstance(usage, dict):
+            if value is None:
-                return usage.get(key)
+                return None
-            return getattr(usage, key, None)
+            if isinstance(value, dict):
                return {k: _plain_value(v) for k, v in value.items() if v is not None}
            if isinstance(value, (list, tuple)):
                return [_plain_value(v) for v in value]
-        prompt_tokens = _get('prompt_tokens') or 0
+            model_dump = getattr(value, 'model_dump', None)
-        completion_tokens = _get('completion_tokens') or 0
+            if callable(model_dump):
-        total_tokens = _get('total_tokens') or 0
+                try:
                    dumped = model_dump()
                    if isinstance(dumped, dict):
                        return _plain_value(dumped)
                except Exception:
                    pass
            return value
        def _usage_dict(value: typing.Any) -> dict[str, typing.Any]:
            if value is None:
                return {}
            plain = _plain_value(value)
            if isinstance(plain, dict):
                return plain
            def _is_mock_attr(attr: typing.Any) -> bool:
                return type(attr).__module__.startswith('unittest.mock')
            data: dict[str, typing.Any] = {}
            for key in (
                'prompt_tokens',
                'completion_tokens',
                'total_tokens',
                'prompt_tokens_details',
                'completion_tokens_details',
                'cache_creation_input_tokens',
                'cache_read_input_tokens',
                'input_token_details',
                'output_token_details',
            ):
                attr_value = getattr(value, key, None)
                if attr_value is not None and not _is_mock_attr(attr_value):
                    data[key] = _plain_value(attr_value)
            return data
        def _to_int(value: typing.Any) -> int:
            try:
                return int(value or 0)
            except (TypeError, ValueError):
                return 0
        normalized = _usage_dict(usage)
        prompt_tokens = _to_int(normalized.get('prompt_tokens'))
        completion_tokens = _to_int(normalized.get('completion_tokens'))
        total_tokens = _to_int(normalized.get('total_tokens'))
        # Some providers omit total_tokens in streaming usage; derive it.
        if not total_tokens:
            total_tokens = prompt_tokens + completion_tokens
-        return {
+        normalized['prompt_tokens'] = prompt_tokens
-            'prompt_tokens': int(prompt_tokens),
+        normalized['completion_tokens'] = completion_tokens
-            'completion_tokens': int(completion_tokens),
+        normalized['total_tokens'] = total_tokens
-            'total_tokens': int(total_tokens),
+        return normalized
        }
-    def _extract_usage(self, response) -> dict:
+    def _extract_usage(self, response) -> dict | None:
        """Extract usage info from a non-streaming LiteLLM response."""
-        return self._normalize_usage(getattr(response, 'usage', None))
+        usage = getattr(response, 'usage', None)
        if usage is None:
            return None
        return self._normalize_usage(usage)
    @staticmethod
    def _as_dict(value: typing.Any) -> dict:
@@ -486,7 +536,7 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
                    if query is not None:
                        if query.variables is None:
                            query.variables = {}
-                        query.variables['_stream_usage'] = usage_info
+                        query.variables[requester.STREAM_USAGE_QUERY_VARIABLE] = usage_info
                if not hasattr(chunk, 'choices') or not chunk.choices:
                    continue
--- a/src/langbot/pkg/provider/tools/errors.py
+++ b/src/langbot/pkg/provider/tools/errors.py
@@ -0,0 +1,6 @@
 class ToolNotFoundError(ValueError):
    """Raised when a requested tool cannot be found in any active loader."""
    def __init__(self, name: str):
        self.name = name
        super().__init__(f'Tool not found: {name}')
--- a/src/langbot/pkg/provider/tools/loader.py
+++ b/src/langbot/pkg/provider/tools/loader.py
@@ -4,12 +4,15 @@ import abc
 import typing
 from typing import TYPE_CHECKING
 from langbot_plugin.api.definition.components.manifest import ComponentManifest
 from langbot_plugin.api.entities.events import pipeline_query
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 if TYPE_CHECKING:
    from ...core import app
 ToolLookupResult = resource_tool.LLMTool | ComponentManifest
 preregistered_loaders: list[typing.Type[ToolLoader]] = []
@@ -43,6 +46,13 @@ class ToolLoader(abc.ABC):
        """获取所有工具"""
        pass
    async def get_tool(self, name: str) -> ToolLookupResult | None:
        """Get one tool by name."""
        for tool in await self.get_tools():
            if tool.name == name:
                return tool
        return None
    @abc.abstractmethod
    async def has_tool(self, name: str) -> bool:
        """检查工具是否存在"""
--- a/src/langbot/pkg/provider/tools/loaders/availability.py
+++ b/src/langbot/pkg/provider/tools/loaders/availability.py
@@ -0,0 +1,18 @@
 from __future__ import annotations
 from typing import Any
 async def is_box_backend_available(ap: Any) -> bool:
    """Return whether the configured Box backend is ready for tool execution."""
    box_service = getattr(ap, 'box_service', None)
    if box_service is None:
        return False
    if not getattr(box_service, 'available', False):
        return False
    try:
        status = await box_service.get_status()
        backend_info = status.get('backend', {})
        return bool(backend_info.get('available', False))
    except Exception:
        return False
--- a/src/langbot/pkg/provider/tools/loaders/mcp.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp.py
@@ -567,6 +567,13 @@ class MCPLoader(loader.ToolLoader):
                    return True
        return False
    async def get_tool(self, name: str) -> resource_tool.LLMTool | None:
        for session in self.sessions.values():
            for function in session.get_tools():
                if function.name == name:
                    return function
        return None
    async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any:
        """执行工具调用"""
        for session in self.sessions.values():
--- a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
@@ -5,6 +5,7 @@ import asyncio
 import os
 import shutil
 import shlex
 import threading
 from typing import TYPE_CHECKING, Any
 import pydantic
@@ -18,12 +19,26 @@ from ....box.workspace import (
    rewrite_mounted_path,
    rewrite_venv_command,
    unwrap_venv_path,
    wrap_python_command_with_env,
 )
 if TYPE_CHECKING:
    from .mcp import RuntimeMCPSession
 _WORKSPACE_COPY_LOCKS: dict[str, threading.Lock] = {}
 _WORKSPACE_COPY_LOCKS_GUARD = threading.Lock()
 def _workspace_copy_lock(path: str) -> threading.Lock:
    with _WORKSPACE_COPY_LOCKS_GUARD:
        lock = _WORKSPACE_COPY_LOCKS.get(path)
        if lock is None:
            lock = threading.Lock()
            _WORKSPACE_COPY_LOCKS[path] = lock
        return lock
 class MCPSessionErrorPhase(enum.Enum):
    """Which phase of the MCP lifecycle failed."""
@@ -49,7 +64,7 @@ class MCPServerBoxConfig(pydantic.BaseModel):
    host_path: str | None = None
    host_path_mode: str = 'ro'  # MCP servers default to read-write mount only when explicitly requested
    env: dict[str, str] = pydantic.Field(default_factory=dict)
-    startup_timeout_sec: int = 120  # Longer default to allow dependency bootstrap
+    startup_timeout_sec: int = 300  # First Docker bootstrap may need to build a venv and install MCP deps.
    cpus: float | None = None
    memory_mb: int | None = None
    pids_limit: int | None = None
@@ -128,6 +143,7 @@ class BoxStdioSessionRuntime:
        workspace = self._build_workspace(host_path=None)
        host_path = self.resolve_host_path()
        process_cwd = '/workspace'
        install_cmd: str | None = None
        try:
            await workspace.create_session()
@@ -168,6 +184,8 @@ class BoxStdioSessionRuntime:
                env=self.server_config.get('env', {}),
                cwd=process_cwd,
            )
            if install_cmd:
                payload = self._wrap_process_payload_with_python_env(payload, process_cwd)
            payload['process_id'] = self.process_id
            await workspace.box_service.start_managed_process(workspace.session_id, payload)
        except Exception:
@@ -253,13 +271,43 @@ class BoxStdioSessionRuntime:
    @staticmethod
    def _copy_workspace_tree(source_path: str, process_host_root: str, process_host_workspace: str) -> None:
-        shutil.rmtree(process_host_root, ignore_errors=True)
+        # Docker-backed bootstrap writes root-owned runtime directories such as
-        os.makedirs(process_host_root, exist_ok=True)
+        # .venv/.tmp into the staged workspace. The host process may not be able
        # to delete them, so refresh source files in place and preserve runtime
        # directories instead of rmtree'ing the whole staging root.
        with _workspace_copy_lock(process_host_root):
            preserved_names = {'.venv', 'venv', 'env', '.env', '.cache', '.tmp', '.langbot'}
            os.makedirs(process_host_workspace, exist_ok=True)
            for name in os.listdir(process_host_workspace):
                if name in preserved_names:
                    continue
                path = os.path.join(process_host_workspace, name)
                if os.path.isdir(path) and not os.path.islink(path):
                    shutil.rmtree(path, ignore_errors=True)
                else:
                    try:
                        os.unlink(path)
                    except FileNotFoundError:
                        pass
            shutil.copytree(
                source_path,
                process_host_workspace,
                symlinks=True,
-            ignore=shutil.ignore_patterns('.git', '__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache'),
+                dirs_exist_ok=True,
                ignore=shutil.ignore_patterns(
                    '.git',
                    '__pycache__',
                    '.pytest_cache',
                    '.mypy_cache',
                    '.ruff_cache',
                    '.venv',
                    'venv',
                    'env',
                    '.env',
                    '.cache',
                    '.tmp',
                    '.langbot',
                ),
            )
    async def _cleanup_staged_workspace(self) -> None:
@@ -343,23 +391,31 @@ class BoxStdioSessionRuntime:
    @staticmethod
    def detect_install_command(host_path: str, workspace_path: str = '/workspace') -> str | None:
        workspace_kind = classify_python_workspace(host_path)
-        quoted_workspace_path = shlex.quote(workspace_path)
+        if workspace_kind in {'package', 'requirements'}:
-        if workspace_kind == 'package':
+            return wrap_python_command_with_env('python -c "pass"', mount_path=workspace_path).rstrip()
            return (
                'mkdir -p /opt/_lb_src'
                f' && tar -C {quoted_workspace_path}'
                ' --exclude=.venv --exclude=.git --exclude=__pycache__'
                ' --exclude=node_modules --exclude=.tox --exclude=.nox'
                ' --exclude="*.egg-info" --exclude=.uv-cache'
                ' -cf - .'
                ' | tar -C /opt/_lb_src -xf -'
                ' && pip install --no-cache-dir /opt/_lb_src'
                ' && rm -rf /opt/_lb_src'
            )
        if workspace_kind == 'requirements':
            return f'pip install --no-cache-dir -r {quoted_workspace_path}/requirements.txt'
        return None
    @staticmethod
    def _wrap_process_payload_with_python_env(payload: dict[str, Any], workspace_path: str) -> dict[str, Any]:
        """Start a prepared Python workspace without writing bootstrap output to MCP stdio."""
        workspace_root = workspace_path.rstrip('/') or '/workspace'
        venv_dir = f'{workspace_root}/.venv'
        venv_bin = f'{venv_dir}/bin'
        command = ' '.join(
            [shlex.quote(payload['command']), *[shlex.quote(arg) for arg in payload.get('args', [])]]
        )
        wrapped = dict(payload)
        wrapped['command'] = 'sh'
        wrapped['args'] = [
            '-lc',
            (
                f'export VIRTUAL_ENV={shlex.quote(venv_dir)}; '
                f'export PATH={shlex.quote(venv_bin)}:$PATH; '
                f'exec {command}'
            ),
        ]
        return wrapped
    def build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict[str, Any]:
        workspace = self._build_workspace()
        workspace.session_id = session_id
--- a/src/langbot/pkg/provider/tools/loaders/native.py
+++ b/src/langbot/pkg/provider/tools/loaders/native.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
 import json
 import mimetypes
 import os
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 from langbot_plugin.api.entities.events import pipeline_query
 from .. import loader
 from ..errors import ToolNotFoundError
 from .availability import is_box_backend_available
 from . import skill as skill_loader
 EXEC_TOOL_NAME = 'exec'
@@ -21,6 +24,15 @@ _ALL_TOOL_NAMES = {EXEC_TOOL_NAME, READ_TOOL_NAME, WRITE_TOOL_NAME, EDIT_TOOL_NA
 # Skip these dirs during grep walk to avoid noise
 _SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', 'dist', 'build'}
 _DEFAULT_READ_MAX_LINES = 2000
 _MAX_READ_MAX_LINES = 10000
 _DEFAULT_TOOL_RESULT_MAX_BYTES = 50 * 1024
 _BOX_FILE_SCRIPT_MAX_BYTES = 2048
 _GLOB_MAX_MATCHES = 100
 _GREP_MAX_MATCHES = 200
 _GREP_MAX_FILES = 5000
 _GREP_MAX_LINE_CHARS = 500
 class NativeToolLoader(loader.ToolLoader):
    def __init__(self, ap):
@@ -42,18 +54,7 @@ class NativeToolLoader(loader.ToolLoader):
    async def _check_backend_available(self) -> bool:
        """Check if the box backend is truly available (not just the runtime)."""
-        box_service = getattr(self.ap, 'box_service', None)
+        return await is_box_backend_available(self.ap)
        if box_service is None:
            return False
        if not getattr(box_service, 'available', False):
            return False
        # Check if backend is truly available via get_status
        try:
            status = await box_service.get_status()
            backend_info = status.get('backend', {})
            return backend_info.get('available', False)
        except Exception:
            return False
    async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
        if not self._is_sandbox_available():
@@ -90,7 +91,7 @@ class NativeToolLoader(loader.ToolLoader):
            return await self._invoke_glob(parameters, query)
        if name == GREP_TOOL_NAME:
            return await self._invoke_grep(parameters, query)
-        raise ValueError(f'未找到工具: {name}')
+        raise ToolNotFoundError(name)
    async def shutdown(self):
        pass
@@ -138,6 +139,7 @@ class NativeToolLoader(loader.ToolLoader):
        # via execute_tool. Skills are mounted at /workspace/.skills/{name}/
        # via extra_mounts built by BoxService.
        result = await self.ap.box_service.execute_tool(parameters, query)
        result = self._normalize_exec_result(result)
        if selected_skill is not None:
            self._refresh_skill_from_disk(selected_skill)
@@ -226,19 +228,65 @@ class NativeToolLoader(loader.ToolLoader):
        except Exception:
            return {'ok': False, 'error': stdout or 'Box file operation returned no result'}
-    async def _read_workspace_via_box(self, path: str, query: pipeline_query.Query) -> dict:
+    async def _read_workspace_via_box(self, path: str, parameters: dict, query: pipeline_query.Query) -> dict:
        offset = self._positive_int(parameters.get('offset'), default=1)
        max_lines = self._positive_int(
            parameters.get('limit'),
            default=_DEFAULT_READ_MAX_LINES,
            max_value=_MAX_READ_MAX_LINES,
        )
        # Box file fallback returns through exec stdout, which is already capped
        # by BoxService. Keep this payload small enough to remain valid JSON.
        max_bytes = min(
            self._positive_int(parameters.get('max_bytes'), default=_DEFAULT_TOOL_RESULT_MAX_BYTES),
            _BOX_FILE_SCRIPT_MAX_BYTES,
        )
        script = f"""
 import json, os
 path = {json.dumps(path)}
 offset = {offset}
 max_lines = {max_lines}
 max_bytes = {max_bytes}
 if not path.startswith('/workspace'):
    print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
 elif not os.path.exists(path):
    print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}}))
 elif os.path.isdir(path):
-    print(json.dumps({{'ok': True, 'content': '\\n'.join(sorted(os.listdir(path))), 'is_directory': True}}))
+    entries = sorted(os.listdir(path))
    content = '\\n'.join(entries)
    print(json.dumps({{'ok': True, 'content': content, 'is_directory': True, 'total': len(entries), 'truncated': False}}))
 else:
    lines = []
    output_bytes = 0
    end_line = offset - 1
    truncated = False
    next_offset = None
    with open(path, 'r', encoding='utf-8', errors='replace') as f:
-        print(json.dumps({{'ok': True, 'content': f.read()}}))
+        for line_number, line in enumerate(f, 1):
            if line_number < offset:
                continue
            if len(lines) >= max_lines:
                truncated = True
                next_offset = line_number
                break
            line_bytes = len(line.encode('utf-8'))
            if output_bytes + line_bytes > max_bytes:
                truncated = True
                next_offset = line_number
                break
            lines.append(line.rstrip('\\n'))
            output_bytes += line_bytes
            end_line = line_number
    print(json.dumps({{
        'ok': True,
        'content': '\\n'.join(lines),
        'truncated': truncated,
        'start_line': offset,
        'end_line': end_line,
        'next_offset': next_offset,
        'max_lines': max_lines,
        'max_bytes': max_bytes,
    }}))
 """.strip()
        return await self._run_workspace_file_script(script, query)
@@ -306,12 +354,27 @@ else:
        if not any(part in skip_dirs for part in item.parts)
    ]
    hits.sort(key=lambda item: item.stat().st_mtime if item.exists() else 0, reverse=True)
-    shown = hits[:100]
+    shown = hits[:{_GLOB_MAX_MATCHES}]
    matches = []
    output_bytes = 0
    truncated_by_bytes = False
    for item in shown:
        rel = os.path.relpath(str(item), path)
-        matches.append(os.path.join(path, rel).replace(os.sep, '/'))
+        sandbox_path = os.path.join(path, rel).replace(os.sep, '/')
-    print(json.dumps({{'ok': True, 'matches': matches, 'total': len(hits), 'truncated': len(hits) > 100}}))
+        entry_bytes = len(sandbox_path.encode('utf-8')) + (1 if matches else 0)
        if output_bytes + entry_bytes > {_DEFAULT_TOOL_RESULT_MAX_BYTES}:
            truncated_by_bytes = True
            break
        matches.append(sandbox_path)
        output_bytes += entry_bytes
    print(json.dumps({{
        'ok': True,
        'matches': matches,
        'preview': '\\n'.join(matches),
        'total': len(hits),
        'truncated': len(hits) > len(matches) or truncated_by_bytes,
        'truncated_by': 'bytes' if truncated_by_bytes else ('matches' if len(hits) > len(matches) else None),
    }}))
 """.strip()
        return await self._run_workspace_file_script(script, query)
@@ -349,29 +412,54 @@ else:
                    continue
                if item.is_file():
                    files.append(item)
-                if len(files) >= 5000:
+                if len(files) >= {_GREP_MAX_FILES}:
                    break
        matches = []
        output_bytes = 0
        truncated_by = None
        for fp in files:
            try:
-                text = fp.read_text(errors='ignore')
+                handle = fp.open('r', encoding='utf-8', errors='ignore')
            except OSError:
                continue
-            for lineno, line in enumerate(text.splitlines(), 1):
+            with handle:
                for lineno, line in enumerate(handle, 1):
                    if regex.search(line):
                        if base.is_file():
                            file_path = path
                        else:
                            rel = os.path.relpath(str(fp), path)
                            file_path = os.path.join(path, rel).replace(os.sep, '/')
-                    matches.append({{'file': file_path, 'line': lineno, 'content': line.rstrip()}})
+                        content = line.rstrip()
-                    if len(matches) >= 200:
+                        line_truncated = False
                        if len(content) > {_GREP_MAX_LINE_CHARS}:
                            content = content[:{_GREP_MAX_LINE_CHARS}] + '... [truncated]'
                            line_truncated = True
                        entry = {{'file': file_path, 'line': lineno, 'content': content}}
                        entry_bytes = len(json.dumps(entry, ensure_ascii=False).encode('utf-8')) + 1
                        if output_bytes + entry_bytes > {_DEFAULT_TOOL_RESULT_MAX_BYTES}:
                            truncated_by = 'bytes'
                            break
-            if len(matches) >= 200:
+                        if line_truncated and truncated_by is None:
                            truncated_by = 'line'
                        matches.append(entry)
                        output_bytes += entry_bytes
                        if len(matches) >= {_GREP_MAX_MATCHES}:
                            truncated_by = truncated_by or 'matches'
                            break
                if truncated_by == 'bytes' or len(matches) >= {_GREP_MAX_MATCHES}:
                    break
            if truncated_by == 'bytes' or len(matches) >= {_GREP_MAX_MATCHES}:
                break
-        print(json.dumps({{'ok': True, 'matches': matches, 'total': len(matches), 'truncated': len(matches) >= 200}}))
+        print(json.dumps({{
            'ok': True,
            'matches': matches,
            'total': len(matches),
            'truncated': truncated_by is not None,
            'truncated_by': truncated_by,
        }}))
 """.strip()
        return await self._run_workspace_file_script(script, query)
@@ -386,14 +474,22 @@ else:
        )
        if skill_request is not None and hasattr(self.ap.box_service, 'read_skill_file'):
            selected_skill, relative = skill_request
            host_path = self._resolve_skill_host_path(selected_skill, relative)
            if host_path and os.path.exists(host_path):
                if os.path.isdir(host_path):
                    return self._build_directory_result(os.listdir(host_path))
                result = self._read_text_file_preview(host_path, parameters)
                host_root = str(selected_skill.get('package_root', '') or '')
                return await self._attach_file_artifact_ref(result, host_path, host_root, path, query)
            try:
                result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative)
-                return {'ok': True, 'content': result.get('content', '')}
+                return self._build_read_result_from_text(str(result.get('content', '')), parameters)
            except Exception:
                try:
                    result = await self.ap.box_service.list_skill_files(selected_skill['name'], relative)
                    entries = [entry['name'] for entry in result.get('entries', [])]
-                    return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
+                    return self._build_directory_result(entries)
                except Exception as exc:
                    return {'ok': False, 'error': str(exc)}
@@ -404,15 +500,15 @@ else:
            include_activated=True,
        )
        if self._should_use_box_workspace_files(selected_skill):
-            return await self._read_workspace_via_box(path, query)
+            return await self._read_workspace_via_box(path, parameters, query)
        if not os.path.exists(host_path):
            return {'ok': False, 'error': f'File not found: {path}'}
        if os.path.isdir(host_path):
            entries = os.listdir(host_path)
-            return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
+            return self._build_directory_result(entries)
-        with open(host_path, 'r', errors='replace') as f:
+        result = self._read_text_file_preview(host_path, parameters)
-            content = f.read()
+        host_root = self._get_host_root(selected_skill)
-        return {'ok': True, 'content': content}
+        return await self._attach_file_artifact_ref(result, host_path, host_root, path, query)
    async def _invoke_write(self, parameters: dict, query: pipeline_query.Query) -> dict:
        path = parameters['path']
@@ -583,6 +679,29 @@ else:
                        'type': 'string',
                        'description': 'Absolute path to the file (must be under /workspace).',
                    },
                    'offset': {
                        'type': 'integer',
                        'description': '1-indexed line number to start reading from. Defaults to 1.',
                        'default': 1,
                        'minimum': 1,
                    },
                    'limit': {
                        'type': 'integer',
                        'description': f'Maximum number of lines to return. Defaults to {_DEFAULT_READ_MAX_LINES}.',
                        'default': _DEFAULT_READ_MAX_LINES,
                        'minimum': 1,
                        'maximum': _MAX_READ_MAX_LINES,
                    },
                    'max_bytes': {
                        'type': 'integer',
                        'description': (
                            'Maximum bytes of file content to return. '
                            f'Defaults to {_DEFAULT_TOOL_RESULT_MAX_BYTES}.'
                        ),
                        'default': _DEFAULT_TOOL_RESULT_MAX_BYTES,
                        'minimum': 1,
                        'maximum': _DEFAULT_TOOL_RESULT_MAX_BYTES,
                    },
                },
                'required': ['path'],
                'additionalProperties': False,
@@ -739,22 +858,30 @@ else:
        hits.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0, reverse=True)
        total = len(hits)
-        shown = hits[:100]
+        shown = hits[:_GLOB_MAX_MATCHES]
        # Convert back to sandbox paths
        sandbox_paths = []
        output_bytes = 0
        truncated_by_bytes = False
        for h in shown:
            rel = os.path.relpath(str(h), host_path)
            sandbox_path = os.path.join(path, rel)
            entry_bytes = len(sandbox_path.encode('utf-8')) + (1 if sandbox_paths else 0)
            if output_bytes + entry_bytes > _DEFAULT_TOOL_RESULT_MAX_BYTES:
                truncated_by_bytes = True
                break
            sandbox_paths.append(sandbox_path)
            output_bytes += entry_bytes
-        result_lines = sandbox_paths
+        return {
-        result = '\n'.join(result_lines)
+            'ok': True,
-
+            'matches': sandbox_paths,
-        if total > 100:
+            'preview': '\n'.join(sandbox_paths),
-            result += f'\n... ({total} matches, showing first 100)'
+            'total': total,
-
+            'truncated': total > len(sandbox_paths) or truncated_by_bytes,
-        return {'ok': True, 'matches': result_lines, 'total': total, 'truncated': total > 100}
+            'truncated_by': 'bytes' if truncated_by_bytes else ('matches' if total > len(sandbox_paths) else None),
        }
    async def _invoke_grep(self, parameters: dict, query: pipeline_query.Query) -> dict:
        pattern = parameters['pattern']
@@ -790,32 +917,46 @@ else:
            files = self._grep_walk(base, include)
        matches = []
        output_bytes = 0
        truncated_by = None
        for fp in files:
            try:
-                text = fp.read_text(errors='ignore')
+                handle = fp.open('r', encoding='utf-8', errors='ignore')
            except OSError:
                continue
-            for lineno, line in enumerate(text.splitlines(), 1):
+            with handle:
                for lineno, line in enumerate(handle, 1):
                    if regex.search(line):
                        rel = os.path.relpath(str(fp), host_path)
                        sandbox_path = os.path.join(path, rel)
-                    matches.append(
+                        content, line_truncated = self._truncate_grep_line(line.rstrip())
-                        {
+                        entry = {
                            'file': sandbox_path,
                            'line': lineno,
-                            'content': line.rstrip(),
+                            'content': content,
                        }
-                    )
+                        entry_bytes = len(json.dumps(entry, ensure_ascii=False).encode('utf-8')) + 1
-                    if len(matches) >= 200:
+                        if output_bytes + entry_bytes > _DEFAULT_TOOL_RESULT_MAX_BYTES:
                            truncated_by = 'bytes'
                            break
-            if len(matches) >= 200:
+                        if line_truncated and truncated_by is None:
                            truncated_by = 'line'
                        matches.append(entry)
                        output_bytes += entry_bytes
                        if len(matches) >= _GREP_MAX_MATCHES:
                            truncated_by = truncated_by or 'matches'
                            break
                if truncated_by == 'bytes' or len(matches) >= _GREP_MAX_MATCHES:
                    break
            if truncated_by == 'bytes' or len(matches) >= _GREP_MAX_MATCHES:
                break
        return {
            'ok': True,
            'matches': matches,
            'total': len(matches),
-            'truncated': len(matches) >= 200,
+            'truncated': truncated_by is not None,
            'truncated_by': truncated_by,
        }
    @staticmethod
@@ -827,10 +968,285 @@ else:
                continue
            if item.is_file():
                results.append(item)
-            if len(results) >= 5000:
+            if len(results) >= _GREP_MAX_FILES:
                break
        return results
    @staticmethod
    def _resolve_skill_host_path(selected_skill: dict, relative: str) -> str | None:
        package_root = str(selected_skill.get('package_root', '') or '').strip()
        if not package_root:
            return None
        host_root = os.path.realpath(package_root)
        host_path = os.path.realpath(os.path.join(host_root, relative))
        if not (host_path == host_root or host_path.startswith(host_root + os.sep)):
            raise ValueError('Path escapes the skill package boundary.')
        return host_path
    def _get_host_root(self, selected_skill: dict | None) -> str:
        if selected_skill is not None:
            return str(selected_skill.get('package_root', '') or '')
        return str(getattr(self.ap.box_service, 'default_workspace', '') or '')
    async def _attach_file_artifact_ref(
        self,
        result: dict,
        host_path: str,
        host_root: str,
        sandbox_path: str,
        query: pipeline_query.Query,
    ) -> dict:
        if not result.get('ok') or not result.get('truncated') or result.get('artifact_refs'):
            return result
        if not host_root or not os.path.isfile(host_path):
            return result
        run_session = self._get_agent_run_session(query)
        if not run_session:
            return result
        persistence_mgr = getattr(self.ap, 'persistence_mgr', None)
        get_db_engine = getattr(persistence_mgr, 'get_db_engine', None)
        if not callable(get_db_engine):
            return result
        try:
            from langbot.pkg.agent.runner.artifact_store import ArtifactStore
            authorization = run_session.get('authorization', {}) if isinstance(run_session, dict) else {}
            mime_type = mimetypes.guess_type(host_path)[0] or 'text/plain'
            size_bytes = os.path.getsize(host_path)
            metadata = {
                'tool_name': READ_TOOL_NAME,
                'sandbox_path': sandbox_path,
                'truncated_by': result.get('truncated_by'),
                'start_line': result.get('start_line'),
                'end_line': result.get('end_line'),
                'next_offset': result.get('next_offset'),
            }
            artifact_id = await ArtifactStore(get_db_engine()).register_file_artifact(
                artifact_id=None,
                host_path=host_path,
                host_root=host_root,
                artifact_type='file',
                source='tool',
                mime_type=mime_type,
                name=os.path.basename(host_path),
                size_bytes=size_bytes,
                conversation_id=authorization.get('conversation_id'),
                run_id=run_session.get('run_id') if isinstance(run_session, dict) else None,
                runner_id=run_session.get('runner_id') if isinstance(run_session, dict) else None,
                bot_id=getattr(query, 'bot_uuid', None),
                workspace_id=authorization.get('workspace_id'),
                thread_id=authorization.get('thread_id'),
                metadata=metadata,
            )
            artifact_ref = {
                'artifact_id': artifact_id,
                'artifact_type': 'file',
                'mime_type': mime_type,
                'name': os.path.basename(host_path),
                'size_bytes': size_bytes,
            }
            enriched = dict(result)
            enriched['preview'] = str(result.get('content') or '')
            enriched['artifact_refs'] = [artifact_ref]
            return enriched
        except Exception as exc:
            self.ap.logger.warning(f'Failed to register read artifact for {sandbox_path}: {exc}')
            return result
    @staticmethod
    def _get_agent_run_session(query: pipeline_query.Query) -> dict | None:
        session = getattr(query, '_agent_run_session', None)
        return session if isinstance(session, dict) else None
    def _normalize_exec_result(self, result: dict) -> dict:
        normalized = dict(result)
        stdout = str(normalized.get('stdout') or '')
        stderr = str(normalized.get('stderr') or '')
        stdout, stdout_capped = self._truncate_text_to_bytes_with_flag(stdout, _DEFAULT_TOOL_RESULT_MAX_BYTES)
        stderr, stderr_capped = self._truncate_text_to_bytes_with_flag(stderr, _DEFAULT_TOOL_RESULT_MAX_BYTES)
        normalized['stdout'] = stdout
        normalized['stderr'] = stderr
        normalized['stdout_truncated'] = bool(normalized.get('stdout_truncated') or stdout_capped)
        normalized['stderr_truncated'] = bool(normalized.get('stderr_truncated') or stderr_capped)
        if stdout and stderr:
            preview_raw = f'stdout:\n{stdout}\n\nstderr:\n{stderr}'
        else:
            preview_raw = stdout or stderr
        preview, preview_capped = self._truncate_text_to_bytes_with_flag(preview_raw, _DEFAULT_TOOL_RESULT_MAX_BYTES)
        normalized['preview'] = preview
        normalized['truncated'] = bool(
            normalized['stdout_truncated'] or normalized['stderr_truncated'] or preview_capped
        )
        if preview_capped and not normalized.get('truncated_by'):
            normalized['truncated_by'] = 'bytes'
        return normalized
    def _build_directory_result(self, entries: list[str]) -> dict:
        sorted_entries = sorted(str(entry) for entry in entries)
        content = '\n'.join(sorted_entries)
        preview = self._truncate_text_to_bytes(content, _DEFAULT_TOOL_RESULT_MAX_BYTES)
        truncated = preview != content
        return {
            'ok': True,
            'content': preview,
            'is_directory': True,
            'total': len(sorted_entries),
            'truncated': truncated,
            'truncated_by': 'bytes' if truncated else None,
        }
    def _read_text_file_preview(self, host_path: str, parameters: dict) -> dict:
        offset = self._positive_int(parameters.get('offset'), default=1)
        max_lines = self._positive_int(
            parameters.get('limit'),
            default=_DEFAULT_READ_MAX_LINES,
            max_value=_MAX_READ_MAX_LINES,
        )
        max_bytes = self._positive_int(
            parameters.get('max_bytes'),
            default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
            max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
        )
        lines: list[str] = []
        output_bytes = 0
        end_line = offset - 1
        truncated = False
        truncated_by: str | None = None
        next_offset: int | None = None
        with open(host_path, 'r', encoding='utf-8', errors='replace') as f:
            for line_number, line in enumerate(f, 1):
                if line_number < offset:
                    continue
                if len(lines) >= max_lines:
                    truncated = True
                    truncated_by = 'lines'
                    next_offset = line_number
                    break
                line_bytes = len(line.encode('utf-8'))
                if output_bytes + line_bytes > max_bytes:
                    truncated = True
                    truncated_by = 'bytes'
                    next_offset = line_number
                    break
                lines.append(line.rstrip('\n'))
                output_bytes += line_bytes
                end_line = line_number
        if not lines and truncated_by == 'bytes':
            content = (
                f'[Line {next_offset or offset} exceeds the {self._format_size(max_bytes)} read limit. '
                'Use exec with a byte-range command for this line, or read a different offset.]'
            )
        else:
            content = '\n'.join(lines)
        return {
            'ok': True,
            'content': content,
            'truncated': truncated,
            'truncated_by': truncated_by,
            'start_line': offset,
            'end_line': end_line,
            'next_offset': next_offset,
            'max_lines': max_lines,
            'max_bytes': max_bytes,
        }
    def _build_read_result_from_text(self, content: str, parameters: dict) -> dict:
        offset = self._positive_int(parameters.get('offset'), default=1)
        max_lines = self._positive_int(
            parameters.get('limit'),
            default=_DEFAULT_READ_MAX_LINES,
            max_value=_MAX_READ_MAX_LINES,
        )
        max_bytes = self._positive_int(
            parameters.get('max_bytes'),
            default=_DEFAULT_TOOL_RESULT_MAX_BYTES,
            max_value=_DEFAULT_TOOL_RESULT_MAX_BYTES,
        )
        all_lines = content.splitlines()
        start_index = offset - 1
        if start_index >= len(all_lines) and all_lines:
            return {'ok': False, 'error': f'Offset {offset} is beyond end of file ({len(all_lines)} lines total)'}
        output_lines: list[str] = []
        output_bytes = 0
        truncated = False
        truncated_by: str | None = None
        next_offset: int | None = None
        for index, line in enumerate(all_lines[start_index:], start_index + 1):
            if len(output_lines) >= max_lines:
                truncated = True
                truncated_by = 'lines'
                next_offset = index
                break
            line_bytes = len(line.encode('utf-8')) + (1 if output_lines else 0)
            if output_bytes + line_bytes > max_bytes:
                truncated = True
                truncated_by = 'bytes'
                next_offset = index
                break
            output_lines.append(line)
            output_bytes += line_bytes
        end_line = offset + len(output_lines) - 1
        return {
            'ok': True,
            'content': '\n'.join(output_lines),
            'truncated': truncated,
            'truncated_by': truncated_by,
            'start_line': offset,
            'end_line': end_line,
            'next_offset': next_offset,
            'max_lines': max_lines,
            'max_bytes': max_bytes,
        }
    @staticmethod
    def _positive_int(value, *, default: int, max_value: int | None = None) -> int:
        try:
            parsed = int(value)
        except (TypeError, ValueError):
            parsed = default
        if parsed <= 0:
            parsed = default
        if max_value is not None:
            parsed = min(parsed, max_value)
        return parsed
    @staticmethod
    def _truncate_grep_line(line: str) -> tuple[str, bool]:
        if len(line) <= _GREP_MAX_LINE_CHARS:
            return line, False
        return f'{line[:_GREP_MAX_LINE_CHARS]}... [truncated]', True
    @staticmethod
    def _truncate_text_to_bytes(text: str, max_bytes: int) -> str:
        return NativeToolLoader._truncate_text_to_bytes_with_flag(text, max_bytes)[0]
    @staticmethod
    def _truncate_text_to_bytes_with_flag(text: str, max_bytes: int) -> tuple[str, bool]:
        data = text.encode('utf-8')
        if len(data) <= max_bytes:
            return text, False
        truncated = data[:max_bytes]
        while truncated and (truncated[-1] & 0xC0) == 0x80:
            truncated = truncated[:-1]
        return truncated.decode('utf-8', errors='ignore'), True
    @staticmethod
    def _format_size(bytes_count: int) -> str:
        if bytes_count < 1024:
            return f'{bytes_count}B'
        return f'{bytes_count / 1024:.1f}KB'
    def _summarize_parameters(self, parameters: dict) -> dict:
        summary = dict(parameters)
        cmd = str(summary.get('command', '')).strip()
--- a/src/langbot/pkg/provider/tools/loaders/plugin.py
+++ b/src/langbot/pkg/provider/tools/loaders/plugin.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import typing
 import traceback
 from langbot_plugin.api.definition.components.manifest import ComponentManifest
 from langbot_plugin.api.entities.events import pipeline_query
 from .. import loader
@@ -39,7 +40,7 @@ class PluginToolLoader(loader.ToolLoader):
                return True
        return False
-    async def _get_tool(self, name: str) -> resource_tool.LLMTool:
+    async def get_tool(self, name: str) -> ComponentManifest | None:
        for tool in await self.ap.plugin_connector.list_tools():
            if tool.metadata.name == name:
                return tool
--- a/src/langbot/pkg/provider/tools/loaders/skill.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill.py
@@ -10,6 +10,7 @@ if typing.TYPE_CHECKING:
    from langbot_plugin.api.entities.events import pipeline_query
 ACTIVATED_SKILLS_KEY = '_activated_skills'
 ACTIVATED_SKILL_NAMES_STATE_KEY = 'host.activated_skills'
 PIPELINE_BOUND_SKILLS_KEY = '_pipeline_bound_skills'
 SKILL_MOUNT_PREFIX = '/workspace/.skills'
 _SKILL_MOUNT_PATTERN = re.compile(r'/workspace/\.skills/([A-Za-z0-9_-]+)')
@@ -72,6 +73,116 @@ def register_activated_skill(query: pipeline_query.Query, skill_data: dict) -> N
        activated[skill_name] = skill_data
 def _normalize_skill_names(value: typing.Any) -> list[str]:
    if not isinstance(value, list):
        return []
    names: list[str] = []
    for item in value:
        skill_name = str(item or '').strip()
        if skill_name and skill_name not in names:
            names.append(skill_name)
    return names
 def restore_activated_skills_from_state(
    ap: app.Application,
    query: pipeline_query.Query,
    state: dict[str, dict[str, typing.Any]],
 ) -> list[str]:
    """Restore persisted activated skill names into Query variables.
    The state value stores names only. Full skill metadata is rebuilt from the
    current pipeline-visible skill cache so removed or unbound skills remain
    unavailable to native exec/write/edit.
    """
    conversation_state = state.get('conversation', {}) if isinstance(state, dict) else {}
    skill_names = _normalize_skill_names(conversation_state.get(ACTIVATED_SKILL_NAMES_STATE_KEY))
    restored: list[str] = []
    for skill_name in skill_names:
        skill_data = get_visible_skill(ap, query, skill_name)
        if skill_data is None:
            continue
        register_activated_skill(query, skill_data)
        restored.append(skill_name)
    return restored
 def _get_agent_run_authorization(query: pipeline_query.Query) -> dict[str, typing.Any] | None:
    session = getattr(query, '_agent_run_session', None)
    if not isinstance(session, dict):
        return None
    authorization = session.get('authorization')
    return authorization if isinstance(authorization, dict) else None
 def _get_conversation_state_target(query: pipeline_query.Query) -> tuple[str, str, str, dict[str, typing.Any]] | None:
    session = getattr(query, '_agent_run_session', None)
    if not isinstance(session, dict):
        return None
    authorization = _get_agent_run_authorization(query)
    if authorization is None:
        return None
    state_policy = authorization.get('state_policy') or {}
    if not state_policy.get('enable_state', True):
        return None
    state_scopes = state_policy.get('state_scopes', ['conversation', 'actor'])
    if 'conversation' not in state_scopes:
        return None
    state_context = authorization.get('state_context') or {}
    scope_keys = state_context.get('scope_keys') or {}
    scope_key = scope_keys.get('conversation')
    if not scope_key:
        return None
    runner_id = str(session.get('runner_id') or 'unknown')
    binding_identity = str(state_context.get('binding_identity') or 'unknown')
    return scope_key, runner_id, binding_identity, state_context
 async def persist_activated_skill(ap: app.Application, query: pipeline_query.Query, skill_name: str) -> bool:
    """Persist activated skill names for the current AgentRunner conversation.
    Returns False when the call is outside an AgentRunner run or state policy
    does not expose a conversation scope. The in-memory Query activation still
    remains valid for the current turn.
    """
    target = _get_conversation_state_target(query)
    if target is None:
        return False
    persistence_mgr = getattr(ap, 'persistence_mgr', None)
    if persistence_mgr is None or not hasattr(persistence_mgr, 'get_db_engine'):
        return False
    from ....agent.runner.persistent_state_store import get_persistent_state_store
    scope_key, runner_id, binding_identity, state_context = target
    store = get_persistent_state_store(persistence_mgr.get_db_engine())
    existing_names = _normalize_skill_names(await store.state_get(scope_key, ACTIVATED_SKILL_NAMES_STATE_KEY))
    if skill_name not in existing_names:
        existing_names.append(skill_name)
    success, error = await store.state_set(
        scope_key=scope_key,
        state_key=ACTIVATED_SKILL_NAMES_STATE_KEY,
        value=existing_names,
        runner_id=runner_id,
        binding_identity=binding_identity,
        scope='conversation',
        context=state_context,
        logger=getattr(ap, 'logger', None),
    )
    if not success:
        logger = getattr(ap, 'logger', None)
        if logger is not None:
            logger.warning(f'Failed to persist activated skill "{skill_name}": {error}')
    return success
 def parse_skill_mount_path(sandbox_path: str) -> tuple[str | None, str]:
    normalized_path = str(sandbox_path or '/workspace').strip() or '/workspace'
    if normalized_path == SKILL_MOUNT_PREFIX:
--- a/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
@@ -6,6 +6,7 @@ import typing
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 from .. import loader
 from .availability import is_box_backend_available
 # Align with Claude Code's Skill tool design:
 # - activate: Activate a skill via Tool Call, returns SKILL.md content
@@ -45,18 +46,7 @@ class SkillToolLoader(loader.ToolLoader):
    async def _check_sandbox_available(self) -> bool:
        """Check if the box backend is truly available (not just the runtime)."""
-        box_service = getattr(self.ap, 'box_service', None)
+        return await is_box_backend_available(self.ap)
        if box_service is None:
            return False
        if not getattr(box_service, 'available', False):
            return False
        # Check if backend is truly available via get_status
        try:
            status = await box_service.get_status()
            backend_info = status.get('backend', {})
            return backend_info.get('available', False)
        except Exception:
            return False
    async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
        if not self._is_available():
@@ -92,17 +82,17 @@ class SkillToolLoader(loader.ToolLoader):
        if not skill_name:
            raise ValueError('skill_name is required')
-        skill_mgr = self.ap.skill_mgr
+        from . import skill as skill_loader
-        skill_data = skill_mgr.get_skill_by_name(skill_name)
+
        skill_data = skill_loader.get_visible_skill(self.ap, query, skill_name)
        if skill_data is None:
-            visible_skills = getattr(skill_mgr, 'skills', {})
+            visible_skills = skill_loader.get_visible_skills(self.ap, query)
            available_names = ', '.join(sorted(visible_skills.keys())) or 'none'
            raise ValueError(f'Skill "{skill_name}" not found. Available skills: {available_names}')
        # Register activated skill for sandbox mount path resolution
        from . import skill as skill_loader
        skill_loader.register_activated_skill(query, skill_data)
        await skill_loader.persist_activated_skill(self.ap, query, skill_name)
        # Return SKILL.md content as Tool Result (injects into context)
        instructions = skill_data.get('instructions', '')
@@ -201,13 +191,13 @@ class SkillToolLoader(loader.ToolLoader):
        return resource_tool.LLMTool(
            name=ACTIVATE_SKILL_TOOL_NAME,
            human_desc='Activate a skill',
-            description=self._build_activate_tool_description(),
+            description='Activate a pipeline-visible skill by name and return its instructions as a tool result.',
            parameters={
                'type': 'object',
                'properties': {
                    'skill_name': {
                        'type': 'string',
-                        'description': 'The skill name to activate (no arguments). E.g., "pdf" or "data-analysis"',
+                        'description': 'The skill name to activate.',
                    },
                },
                'required': ['skill_name'],
@@ -255,50 +245,3 @@ class SkillToolLoader(loader.ToolLoader):
            },
            func=lambda parameters: parameters,
        )
    def _build_activate_tool_description(self) -> str:
        """Build tool description with embedded available_skills list."""
        skill_mgr = getattr(self.ap, 'skill_mgr', None)
        if skill_mgr is None:
            return 'Activate a skill. No skills are currently available.'
        skills = getattr(skill_mgr, 'skills', {})
        if not skills:
            return 'Activate a skill. No skills are currently available.'
        # Build <available_skills> section
        available_skills_lines = ['<available_skills>']
        for skill_name, skill_data in sorted(skills.items()):
            description = skill_data.get('description', '')
            available_skills_lines.append('<skill>')
            available_skills_lines.append(f'<name>{skill_name}</name>')
            available_skills_lines.append(f'<description>{description}</description>')
            available_skills_lines.append('</skill>')
        available_skills_lines.append('</available_skills>')
        available_skills_block = '\n'.join(available_skills_lines)
        return f"""Activate a skill within the main conversation.
 <skills_instructions>
 When users ask you to perform tasks, check if any of the available skills
 below can help complete the task more effectively. Skills provide specialized
 capabilities and domain knowledge.
 How to use skills:
 - Invoke skills using this tool with the skill name only (no arguments)
 - When you invoke a skill, you will see <command-message>
 The skill is activated
 </command-message>
 - The skill's instructions will be provided in the tool result
 - Examples:
  - skill_name: "pdf" - invoke the pdf skill
  - skill_name: "data-analysis" - invoke the data-analysis skill
 Important:
 - Only use skills listed in <available_skills> below
 - Do not invoke a skill that is already running
 - To create a new skill: prepare it in /workspace, then use register_skill tool
 </skills_instructions>
 {available_skills_block}"""
--- a/src/langbot/pkg/provider/tools/toolmgr.py
+++ b/src/langbot/pkg/provider/tools/toolmgr.py
@@ -6,6 +6,9 @@ from typing import TYPE_CHECKING
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 from langbot_plugin.api.entities.events import pipeline_query
 from . import loader as tool_loader
 from .errors import ToolNotFoundError
 if TYPE_CHECKING:
    from ...core import app
    from langbot.pkg.provider.tools.loaders import (
@@ -67,6 +70,20 @@ class ToolManager:
        return all_functions
    async def get_tool_by_name(self, name: str) -> tool_loader.ToolLookupResult | None:
        """Get tool by name from any active loader."""
        for active_loader in (
            self.native_tool_loader,
            self.plugin_tool_loader,
            self.mcp_tool_loader,
            self.skill_tool_loader,
        ):
            tool = await active_loader.get_tool(name)
            if tool:
                return tool
        return None
    async def generate_tools_for_openai(self, use_funcs: list[resource_tool.LLMTool]) -> list:
        tools = []
@@ -98,7 +115,7 @@ class ToolManager:
        if await self.skill_tool_loader.has_tool(name):
            telemetry_features.increment(query, 'tool_calls', 'skill')
            return await self.skill_tool_loader.invoke_tool(name, parameters, query)
-        raise ValueError(f'未找到工具: {name}')
+        raise ToolNotFoundError(name)
    async def shutdown(self):
        await self.native_tool_loader.shutdown()
--- a/src/langbot/pkg/telemetry/heartbeat.py
+++ b/src/langbot/pkg/telemetry/heartbeat.py
@@ -109,6 +109,7 @@ async def build_heartbeat_payload(ap: core_app.Application) -> dict:
        'query_id': '',
        'version': constants.semantic_version,
        'instance_id': constants.instance_id,
        'instance_create_ts': constants.instance_create_ts,
        'edition': constants.edition,
        'features': features,
        'timestamp': datetime.now(timezone.utc).isoformat(),
--- a/src/langbot/pkg/utils/constants.py
+++ b/src/langbot/pkg/utils/constants.py
@@ -16,3 +16,11 @@ debug_mode = False
 edition = 'community'
 instance_id = ''
 instance_create_ts = 0
 """Unix timestamp (seconds) of when this instance was first created.
 Sourced from ``data/labels/instance_id.json``. Backfilled to the current
 time for instances created before this field existed, so it is always a
 positive value once load_config has run.
 """
--- a/tests/unit_tests/provider/test_litellmchat.py
+++ b/tests/unit_tests/provider/test_litellmchat.py
@@ -115,6 +115,15 @@ class TestExtractUsage:
        assert result['prompt_tokens'] == 0
        assert result['completion_tokens'] == 0
    def test_extract_usage_without_provider_usage(self):
        """Missing provider usage is not treated as authoritative zero usage."""
        requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
        response = Mock()
        response.usage = None
        assert requester._extract_usage(response) is None
 class TestNormalizeUsage:
    """Test _normalize_usage helper covering real-world usage shapes"""
@@ -131,6 +140,22 @@ class TestNormalizeUsage:
        )
        assert result == {'prompt_tokens': 12, 'completion_tokens': 8, 'total_tokens': 20}
    def test_preserves_token_details(self):
        """Provider token details such as cache counters are preserved."""
        result = litellmchat.LiteLLMRequester._normalize_usage(
            {
                'prompt_tokens': 12,
                'completion_tokens': 8,
                'total_tokens': 20,
                'prompt_tokens_details': {'cached_tokens': 7},
                'completion_tokens_details': {'reasoning_tokens': 3},
            }
        )
        assert result['prompt_tokens'] == 12
        assert result['prompt_tokens_details'] == {'cached_tokens': 7}
        assert result['completion_tokens_details'] == {'reasoning_tokens': 3}
    def test_missing_total_is_derived(self):
        """When total_tokens is absent/zero it is derived from prompt + completion"""
        usage = Mock()
@@ -166,9 +191,7 @@ class TestInvokeLLMStreamUsage:
        if has_choice:
            choice = Mock()
            delta = Mock()
-            delta.model_dump = Mock(
+            delta.model_dump = Mock(return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls})
                return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls}
            )
            choice.delta = delta
            choice.finish_reason = finish_reason
            chunk.choices = [choice]
@@ -313,7 +336,8 @@ class TestInvokeLLMStreamUsage:
        with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
            collected = [
-                chunk async for chunk in requester.invoke_llm_stream(
+                chunk
                async for chunk in requester.invoke_llm_stream(
                    query=query,
                    model=model,
                    messages=messages,
@@ -788,7 +812,9 @@ class TestInvokeRerank:
        with patch('httpx.AsyncClient', return_value=mock_client):
            # arerank must NOT be called on the openai-compatible path
            with patch.object(
-                litellmchat, 'arerank', new_callable=AsyncMock,
+                litellmchat,
                'arerank',
                new_callable=AsyncMock,
                side_effect=AssertionError('arerank must not be used for openai-compatible provider'),
            ):
                results = await requester.invoke_rerank(
@@ -1068,8 +1094,7 @@ class TestScanModels:
        with patch.object(litellmchat.litellm, 'supports_function_calling') as mock_supports_function_calling:
            mock_supports_function_calling.side_effect = (
-                lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6'
+                lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6' and custom_llm_provider is None
                and custom_llm_provider is None
            )
            assert requester._supports_function_calling('kimi-k2.6') is True
--- a/tests/unit_tests/provider/test_mcp_box_integration.py
+++ b/tests/unit_tests/provider/test_mcp_box_integration.py
@@ -180,7 +180,7 @@ class TestMCPServerBoxConfig:
        assert cfg.host_path is None
        assert cfg.host_path_mode == 'ro'
        assert cfg.env == {}
-        assert cfg.startup_timeout_sec == 120
+        assert cfg.startup_timeout_sec == 300
        assert cfg.cpus is None
        assert cfg.memory_mb is None
        assert cfg.pids_limit is None
@@ -494,6 +494,53 @@ class TestBuildBoxProcessPayload:
        assert payload['args'] == ['/opt/other/server.py', '--flag']
 # ── Python Workspace Preparation ────────────────────────────────────
 class TestPythonWorkspacePreparation:
    def test_requirements_workspace_uses_venv_bootstrap(self, mcp_module, tmp_path):
        host_path = tmp_path / 'mcp-source'
        host_path.mkdir()
        (host_path / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
        command = mcp_module.BoxStdioSessionRuntime.detect_install_command(
            str(host_path),
            '/workspace/.mcp/u1/workspace',
        )
        assert command is not None
        assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
        assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
        assert 'python -m pip install -r "/workspace/.mcp/u1/workspace/requirements.txt"' in command
        assert 'pip install --no-cache-dir -r' not in command
    def test_staging_refresh_removes_stale_source_files_but_preserves_runtime_dirs(self, mcp_module, tmp_path):
        source = tmp_path / 'source'
        source.mkdir()
        (source / 'server.py').write_text('print("new")\n', encoding='utf-8')
        (source / 'requirements.txt').write_text('mcp==1.26.0\n', encoding='utf-8')
        process_root = tmp_path / 'shared' / '.mcp' / 'u1'
        workspace = process_root / 'workspace'
        (workspace / '.venv' / 'bin').mkdir(parents=True)
        (workspace / '.venv' / 'bin' / 'python').write_text('', encoding='utf-8')
        (workspace / '.langbot').mkdir()
        (workspace / '.langbot' / 'python-env.lock').mkdir()
        (workspace / 'server.py').write_text('print("old")\n', encoding='utf-8')
        (workspace / 'removed.py').write_text('stale\n', encoding='utf-8')
        (workspace / 'removed_dir').mkdir()
        (workspace / 'removed_dir' / 'old.txt').write_text('stale\n', encoding='utf-8')
        mcp_module.BoxStdioSessionRuntime._copy_workspace_tree(str(source), str(process_root), str(workspace))
        assert (workspace / 'server.py').read_text(encoding='utf-8') == 'print("new")\n'
        assert (workspace / 'requirements.txt').read_text(encoding='utf-8') == 'mcp==1.26.0\n'
        assert not (workspace / 'removed.py').exists()
        assert not (workspace / 'removed_dir').exists()
        assert (workspace / '.venv' / 'bin' / 'python').exists()
        assert (workspace / '.langbot' / 'python-env.lock').is_dir()
 # ── get_runtime_info_dict ───────────────────────────────────────────
--- a/tests/unit_tests/provider/test_skill_tools.py
+++ b/tests/unit_tests/provider/test_skill_tools.py
@@ -245,7 +245,8 @@ class TestSkillPathHelpers:
        command = wrap_skill_command_with_python_env('python scripts/run.py')
-        assert 'python -m venv "$_LB_VENV_DIR"' in command
+        assert '_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"' in command
        assert '"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"' in command
        assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
        assert command.rstrip().endswith('python scripts/run.py')
@@ -456,7 +457,9 @@ class TestNativeToolLoaderSkillPaths:
                SimpleNamespace(query_id='q1', variables={PIPELINE_BOUND_SKILLS_KEY: ['demo']}),
            )
-            assert result == {'ok': True, 'content': 'demo instructions'}
+            assert result['ok'] is True
            assert result['content'] == 'demo instructions'
            assert result['truncated'] is False
    @pytest.mark.asyncio
    async def test_exec_in_activated_skill_mount_rewrites_command_and_refreshes(self):
@@ -485,7 +488,7 @@ class TestNativeToolLoaderSkillPaths:
                query,
            )
-            assert result == {'ok': True}
+            assert result['ok'] is True
            tool_parameters = ap.box_service.execute_tool.await_args.args[0]
            assert tool_parameters['command'] == 'python /workspace/.skills/demo/scripts/run.py'
            assert tool_parameters['workdir'] == '/workspace/.skills/demo'
--- a/tests/unit_tests/provider/test_tool_manager.py
+++ b/tests/unit_tests/provider/test_tool_manager.py
@@ -226,7 +226,7 @@ class TestToolManagerExecuteFuncCall:
    @pytest.mark.asyncio
    async def test_execute_raises_when_tool_not_found(self, mock_app_with_loaders, sample_query):
-        """Test that execute_func_call raises ValueError when tool not found."""
+        """Test that execute_func_call raises ToolNotFoundError when tool not found."""
        toolmgr = get_toolmgr_module()
        mock_app, mock_plugin_loader, mock_mcp_loader = mock_app_with_loaders
@@ -236,7 +236,7 @@ class TestToolManagerExecuteFuncCall:
        manager = toolmgr.ToolManager(mock_app)
        self._wire_loaders(manager, mock_app, mock_plugin_loader, mock_mcp_loader)
-        with pytest.raises(ValueError, match='未找到工具'):
+        with pytest.raises(toolmgr.ToolNotFoundError, match='Tool not found: unknown_tool'):
            await manager.execute_func_call('unknown_tool', {}, sample_query)
    @pytest.mark.asyncio
--- a/tests/unit_tests/telemetry/test_heartbeat.py
+++ b/tests/unit_tests/telemetry/test_heartbeat.py
@@ -62,6 +62,7 @@ class TestBuildHeartbeatPayload:
        assert payload['event_type'] == 'instance_heartbeat'
        assert payload['query_id'] == ''
        assert 'instance_create_ts' in payload
        assert 'timestamp' in payload
        f = payload['features']
        assert f['database'] == 'postgresql'
--- a/uv.lock
+++ b/uv.lock
@@ -1967,7 +1967,7 @@ wheels = [
 [[package]]
 name = "langbot"
-version = "4.10.1"
+version = "4.10.2"
 source = { editable = "." }
 dependencies = [
    { name = "aiocqhttp" },
Author	SHA1	Message	Date
huanghuoguoguo	64b7e9c509	fix(tools): clear stale Python workspace env locks	2026-06-14 11:32:10 +08:00
huanghuoguoguo	7b67dcc302	fix(tools): bootstrap Python workspaces with available interpreter	2026-06-14 11:32:10 +08:00
huanghuoguoguo	a60827f221	fix(tools): harden agent runner tool runtimes	2026-06-14 11:32:10 +08:00
huanghuoguoguo	e9fe2f2d43	feat(agent-runner): support host tool lookup (#2244 )	2026-06-14 11:29:57 +08:00
huanghuoguoguo	27be09ab15	fix(provider): preserve litellm usage details (#2246 )	2026-06-14 11:12:29 +08:00
huanghuoguoguo	1ef4507d9a	[codex] Delegate web page bot stream helpers (#2245 ) * fix(platform): delegate web page bot stream helpers * style(platform): format web page bot adapter	2026-06-14 10:57:53 +08:00
RockChinQ	2e7978317c	chore(release): bump version to 4.10.2	2026-06-13 11:21:44 -04:00
RockChinQ	b7d8332cb0	feat(telemetry): include instance_create_ts in heartbeat payload Load the instance creation timestamp from data/labels/instance_id.json (backfilling+persisting it for instances created before the field existed), expose it as constants.instance_create_ts, and include it in the heartbeat payload so Space can anchor Time-To-Value / onboarding analytics on real install time rather than first-heartbeat. Verified: py_compile, ruff, pytest tests/unit_tests/telemetry/ (37 passed).	2026-06-13 11:13:18 -04:00
huanghuoguoguo	7fe3eedeea	fix(provider): use LiteLLM input window for context length (#2243 )	2026-06-13 21:27:47 +08:00
`@@ -1,3 +1,3 @@`
	`"""LangBot - Production-grade platform for building agentic IM bots"""`	`"""LangBot - Production-grade platform for building agentic IM bots"""`

	`__version__ = '4.10.1'`	`__version__ = '4.10.2'`