diff --git a/src/langbot/pkg/box/__init__.py b/src/langbot/pkg/box/__init__.py index c1ea6e13..de639417 100644 --- a/src/langbot/pkg/box/__init__.py +++ b/src/langbot/pkg/box/__init__.py @@ -1 +1,5 @@ """LangBot Box runtime package.""" + +from .workspace import BoxWorkspaceSession + +__all__ = ['BoxWorkspaceSession'] diff --git a/src/langbot/pkg/box/workspace.py b/src/langbot/pkg/box/workspace.py new file mode 100644 index 00000000..cd7feed1 --- /dev/null +++ b/src/langbot/pkg/box/workspace.py @@ -0,0 +1,404 @@ +from __future__ import annotations +"""Reusable workspace/session helpers built on top of Box. + +This module is the middle layer between the raw Box runtime primitives and +application-specific flows such as skills or MCP stdio. + +It intentionally stays generic: +- path and virtualenv rewriting are workspace concerns +- Python project detection/bootstrap are workspace concerns +- session exec / managed-process helpers are workspace concerns + +Higher layers add their own semantics on top, for example: +- skills choose a stable per-skill session id and use repeated exec +- MCP stdio chooses how to prepare dependencies and attaches to a managed process +""" + +import os +import textwrap +from typing import Any + +PYTHON_MANIFEST_FILES = ( + 'requirements.txt', + 'pyproject.toml', + 'setup.py', + 'setup.cfg', +) +_VENV_DIRS = frozenset({'.venv', 'venv', 'env', '.env'}) +_VENV_BIN_DIRS = frozenset({'bin', 'Scripts'}) + + +def normalize_host_path(path: str | None) -> str: + if path is None: + return '' + stripped = str(path).strip() + if not stripped: + return '' + return os.path.realpath(os.path.abspath(stripped)) + + +def rewrite_mounted_path(path: str, host_path: str | None, *, mount_path: str = '/workspace') -> str: + """Translate a host path into the path visible inside the sandbox mount.""" + if not host_path or not path: + return path + normalized_host = os.path.realpath(host_path) + if path.startswith(normalized_host + '/'): + return mount_path + path[len(normalized_host) :] + if path == normalized_host: + return mount_path + return path + + +def unwrap_venv_path(directory: str) -> str: + """Collapse ``.../.venv/bin`` style paths back to the project root.""" + parts = directory.replace('\\', '/').split('/') + for i in range(len(parts) - 1, 0, -1): + if parts[i] in _VENV_BIN_DIRS and i >= 1: + venv_dir = parts[i - 1] + if venv_dir in _VENV_DIRS: + project_root = '/'.join(parts[: i - 1]) + return project_root if project_root else '/' + return directory + + +def infer_workspace_host_path(command: str, args: list[str] | None = None) -> str | None: + """Infer the project/workspace root from absolute command/arg paths.""" + candidates: list[str] = [] + for part in [command, *(args or [])]: + if not os.path.isabs(part): + continue + if os.path.exists(part): + directory = os.path.dirname(part) + candidates.append(os.path.realpath(unwrap_venv_path(directory))) + if not candidates: + return None + common = os.path.commonpath(candidates) + return common if common != '/' else None + + +def rewrite_venv_command(command: str, host_path: str | None, *, mount_path: str = '/workspace') -> str: + """Rewrite host venv interpreters to plain ``python`` inside the sandbox. + + Once a project is mounted into the sandbox, host virtualenv paths are no + longer valid. For those paths we intentionally drop down to ``python`` and + let the sandbox-side environment/bootstrap decide what interpreter to use. + """ + if not host_path or not command: + return command + normalized_host = os.path.realpath(host_path) + if not command.startswith(normalized_host + '/'): + return command + rel = command[len(normalized_host) + 1 :] + parts = rel.replace('\\', '/').split('/') + if len(parts) >= 3 and parts[0] in _VENV_DIRS and parts[1] in _VENV_BIN_DIRS and parts[2].startswith('python'): + return 'python' + return rewrite_mounted_path(command, host_path, mount_path=mount_path) + + +def list_python_manifest_files(host_path: str | None) -> list[str]: + normalized_root = normalize_host_path(host_path) + if not normalized_root: + return [] + return [ + filename for filename in PYTHON_MANIFEST_FILES if os.path.isfile(os.path.join(normalized_root, filename)) + ] + + +def classify_python_workspace(host_path: str | None) -> str | None: + """Return the generic Python workspace shape, without app-specific policy.""" + manifest_files = set(list_python_manifest_files(host_path)) + if not manifest_files: + return None + if {'pyproject.toml', 'setup.py', 'setup.cfg'} & manifest_files: + return 'package' + if 'requirements.txt' in manifest_files: + return 'requirements' + return None + + +def should_prepare_python_env(host_path: str | None) -> bool: + normalized_root = normalize_host_path(host_path) + if not normalized_root: + return False + if os.path.isdir(os.path.join(normalized_root, '.venv')): + return True + return bool(list_python_manifest_files(normalized_root)) + + +def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace') -> str: + """Wrap a command with a reusable sandbox-local Python env bootstrap. + + This is the generic "workspace is a Python project" path used by mutable + workspaces such as skills. Read-only installation strategies stay in the + higher-level caller because they are application policy, not workspace + semantics. + """ + bootstrap = textwrap.dedent( + f""" + set -e + + _LB_VENV_DIR="{mount_path}/.venv" + _LB_META_DIR="{mount_path}/.langbot" + _LB_META_FILE="$_LB_META_DIR/python-env.json" + _LB_LOCK_DIR="$_LB_META_DIR/python-env.lock" + _LB_TMP_DIR="{mount_path}/.tmp" + _LB_PIP_CACHE_DIR="{mount_path}/.cache/pip" + + mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR" + export TMPDIR="$_LB_TMP_DIR" + export TEMP="$_LB_TMP_DIR" + export TMP="$_LB_TMP_DIR" + export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR" + + _lb_python_meta() {{ + python - <<'PY' + import hashlib + import json + import os + import sys + + root = "{mount_path}" + digest = hashlib.sha256() + manifest_files = [] + for rel in ("requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"): + path = os.path.join(root, rel) + if not os.path.isfile(path): + continue + manifest_files.append(rel) + with open(path, "rb") as handle: + digest.update(rel.encode("utf-8")) + digest.update(b"\\0") + digest.update(handle.read()) + digest.update(b"\\0") + + print( + json.dumps( + {{ + "python_executable": sys.executable, + "python_version": list(sys.version_info[:3]), + "manifest_files": manifest_files, + "manifest_sha256": digest.hexdigest(), + }}, + sort_keys=True, + ) + ) + PY + }} + + _LB_CURRENT_META="$(_lb_python_meta)" + _LB_NEEDS_BOOTSTRAP=0 + + if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then + _LB_NEEDS_BOOTSTRAP=1 + elif [ ! -f "$_LB_META_FILE" ]; then + _LB_NEEDS_BOOTSTRAP=1 + elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then + _LB_NEEDS_BOOTSTRAP=1 + fi + + if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then + _LB_LOCK_WAIT=0 + while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do + if [ "$_LB_LOCK_WAIT" -ge 120 ]; then + echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2 + exit 1 + fi + sleep 1 + _LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1)) + done + + _lb_cleanup_lock() {{ + rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true + }} + trap _lb_cleanup_lock EXIT INT TERM + + _LB_CURRENT_META="$(_lb_python_meta)" + _LB_NEEDS_BOOTSTRAP=0 + if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then + _LB_NEEDS_BOOTSTRAP=1 + elif [ ! -f "$_LB_META_FILE" ]; then + _LB_NEEDS_BOOTSTRAP=1 + elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then + _LB_NEEDS_BOOTSTRAP=1 + fi + + if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then + rm -rf "$_LB_VENV_DIR" + python -m venv "$_LB_VENV_DIR" + . "$_LB_VENV_DIR/bin/activate" + python -m pip install --upgrade pip setuptools wheel + if [ -f "{mount_path}/requirements.txt" ]; then + python -m pip install -r "{mount_path}/requirements.txt" + elif [ -f "{mount_path}/pyproject.toml" ] || [ -f "{mount_path}/setup.py" ] || [ -f "{mount_path}/setup.cfg" ]; then + python -m pip install "{mount_path}" + fi + printf '%s' "$_LB_CURRENT_META" > "$_LB_META_FILE" + fi + fi + + export VIRTUAL_ENV="$_LB_VENV_DIR" + export PATH="$_LB_VENV_DIR/bin:$PATH" + {command} + """ + ).strip() + return bootstrap + '\n' + + +class BoxWorkspaceSession: + """High-level handle for one reusable workspace-backed Box session. + + The Box runtime already understands sessions and managed processes. This + wrapper adds LangBot's workspace-centric view on top: a mounted host path, + a stable ``session_id``, optional environment defaults, and convenience + helpers for exec or long-running processes inside that workspace. + """ + + def __init__( + self, + box_service, + session_id: str, + *, + host_path: str | None = None, + host_path_mode: str = 'rw', + workdir: str = '/workspace', + env: dict[str, str] | None = None, + mount_path: str = '/workspace', + network: str | None = None, + read_only_rootfs: bool | None = None, + image: str | None = None, + cpus: float | None = None, + memory_mb: int | None = None, + pids_limit: int | None = None, + ): + self.box_service = box_service + self.session_id = session_id + self.host_path = host_path + self.host_path_mode = host_path_mode + self.workdir = workdir + self.env = dict(env or {}) + self.mount_path = mount_path + self.network = network + self.read_only_rootfs = read_only_rootfs + self.image = image + self.cpus = cpus + self.memory_mb = memory_mb + self.pids_limit = pids_limit + + def rewrite_path(self, path: str) -> str: + return rewrite_mounted_path(path, self.host_path, mount_path=self.mount_path) + + def rewrite_venv_command(self, command: str) -> str: + return rewrite_venv_command(command, self.host_path, mount_path=self.mount_path) + + def build_session_payload(self) -> dict[str, Any]: + # Keep this payload generic so callers can reuse the same workspace + # handle for plain exec, file-producing tasks, or managed processes. + payload: dict[str, Any] = { + 'session_id': self.session_id, + 'workdir': self.workdir, + 'env': self.env, + } + if self.network is not None: + payload['network'] = self.network + if self.read_only_rootfs is not None: + payload['read_only_rootfs'] = self.read_only_rootfs + if self.host_path: + payload['host_path'] = self.host_path + payload['host_path_mode'] = self.host_path_mode + for key in ('image', 'cpus', 'memory_mb', 'pids_limit'): + value = getattr(self, key) + if value is not None: + payload[key] = value + return payload + + def build_exec_payload( + self, + cmd: str, + *, + workdir: str | None = None, + env: dict[str, str] | None = None, + timeout_sec: int | None = None, + ) -> dict[str, Any]: + # Exec payloads inherit the session-level workspace config, then layer + # per-call command/workdir/env overrides on top. + payload = self.build_session_payload() + payload['cmd'] = cmd + payload['workdir'] = workdir or self.workdir + if timeout_sec is not None: + payload['timeout_sec'] = timeout_sec + resolved_env = self.env if env is None else env + if resolved_env: + payload['env'] = resolved_env + elif 'env' in payload and not payload['env']: + payload.pop('env') + return payload + + async def execute_raw( + self, + cmd: str, + *, + workdir: str | None = None, + env: dict[str, str] | None = None, + timeout_sec: int | None = None, + ): + payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec) + return await self.box_service.client.execute(self.box_service.build_spec(payload)) + + async def execute_for_query( + self, + query, + cmd: str, + *, + workdir: str | None = None, + env: dict[str, str] | None = None, + timeout_sec: int | None = None, + ) -> dict: + payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec) + return await self.box_service.execute_spec_payload(payload, query) + + async def create_session(self): + return await self.box_service.create_session(self.build_session_payload()) + + def build_process_payload( + self, + command: str, + args: list[str] | None = None, + *, + env: dict[str, str] | None = None, + cwd: str = '/workspace', + ) -> dict[str, Any]: + # Managed processes run inside the same workspace model as one-shot + # execs, so path/venv rewriting is shared here. + normalized_command = command + normalized_args = list(args or []) + normalized_cwd = cwd + if self.host_path: + normalized_command = self.rewrite_venv_command(command) + normalized_args = [self.rewrite_path(arg) for arg in normalized_args] + normalized_cwd = self.rewrite_path(cwd) + return { + 'command': normalized_command, + 'args': normalized_args, + 'env': dict(env or {}), + 'cwd': normalized_cwd, + } + + async def start_managed_process( + self, + command: str, + args: list[str] | None = None, + *, + env: dict[str, str] | None = None, + cwd: str = '/workspace', + ): + payload = self.build_process_payload(command, args, env=env, cwd=cwd) + return await self.box_service.start_managed_process(self.session_id, payload) + + async def get_managed_process(self): + return await self.box_service.get_managed_process(self.session_id) + + def get_managed_process_websocket_url(self) -> str: + return self.box_service.get_managed_process_websocket_url(self.session_id) + + async def cleanup(self) -> None: + await self.box_service.client.delete_session(self.session_id) diff --git a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py index 22b9e7fd..d1f6f2ab 100644 --- a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py +++ b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py @@ -1,13 +1,20 @@ from __future__ import annotations import enum -import os import asyncio from typing import TYPE_CHECKING, Any import pydantic from mcp import ClientSession from mcp.client.websocket import websocket_client +from ....box.workspace import ( + BoxWorkspaceSession, + classify_python_workspace, + infer_workspace_host_path, + rewrite_mounted_path, + rewrite_venv_command, + unwrap_venv_path, +) if TYPE_CHECKING: from .mcp import RuntimeMCPSession @@ -25,10 +32,6 @@ class MCPSessionErrorPhase(enum.Enum): TOOL_CALL = 'tool_call' -_VENV_DIRS = frozenset({'.venv', 'venv', 'env', '.env'}) -_VENV_BIN_DIRS = frozenset({'bin', 'Scripts'}) - - class MCPServerBoxConfig(pydantic.BaseModel): """Structured configuration for running an MCP server inside a Box container.""" @@ -65,6 +68,21 @@ class BoxStdioSessionRuntime: def server_config(self) -> dict: return self.owner.server_config + def _build_workspace(self) -> BoxWorkspaceSession: + return BoxWorkspaceSession( + self.ap.box_service, + self.owner._build_box_session_id(), + host_path=self.resolve_host_path(), + host_path_mode=self.config.host_path_mode, + env=self.config.env, + network=self.config.network, + read_only_rootfs=self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False, + image=self.config.image, + cpus=self.config.cpus, + memory_mb=self.config.memory_mb, + pids_limit=self.config.pids_limit, + ) + def uses_box_stdio(self) -> bool: if self.server_config.get('mode') != 'stdio': return False @@ -74,13 +92,11 @@ class BoxStdioSessionRuntime: return False async def initialize(self) -> None: - box_service = self.ap.box_service - session_id = self.owner._build_box_session_id() - host_path = self.resolve_host_path() - session_payload = self.build_box_session_payload(session_id, host_path) + workspace = self._build_workspace() + host_path = workspace.host_path try: - await box_service.create_session(session_payload) + await workspace.create_session() except Exception: self.owner.error_phase = MCPSessionErrorPhase.SESSION_CREATE raise @@ -89,11 +105,11 @@ class BoxStdioSessionRuntime: install_cmd = self.owner._detect_install_command(host_path) if install_cmd: self.ap.logger.info(f'MCP server {self.server_name}: installing dependencies in Box with: {install_cmd}') - exec_payload = dict(session_payload) - exec_payload['cmd'] = install_cmd - exec_payload['timeout_sec'] = self.config.startup_timeout_sec or 120 try: - result = await box_service.client.execute(box_service.build_spec(exec_payload)) + result = await workspace.execute_raw( + install_cmd, + timeout_sec=self.config.startup_timeout_sec or 120, + ) except Exception: self.owner.error_phase = MCPSessionErrorPhase.DEP_INSTALL raise @@ -103,13 +119,17 @@ class BoxStdioSessionRuntime: raise Exception(f'Dependency install failed (exit code {result.exit_code}): {stderr_preview}') try: - await box_service.start_managed_process(session_id, self.build_box_process_payload(host_path)) + await workspace.start_managed_process( + self.server_config['command'], + self.server_config.get('args', []), + env=self.server_config.get('env', {}), + ) except Exception: self.owner.error_phase = MCPSessionErrorPhase.PROCESS_START raise try: - websocket_url = box_service.get_managed_process_websocket_url(session_id) + websocket_url = workspace.get_managed_process_websocket_url() transport = await self.owner.exit_stack.enter_async_context(websocket_client(websocket_url)) read_stream, write_stream = transport self.owner.session = await self.owner.exit_stack.enter_async_context(ClientSession(read_stream, write_stream)) @@ -126,11 +146,11 @@ class BoxStdioSessionRuntime: async def monitor_process_health(self) -> None: from langbot_plugin.box.models import BoxManagedProcessStatus - session_id = self.owner._build_box_session_id() + workspace = self._build_workspace() consecutive_errors = 0 while not self.owner._shutdown_event.is_set(): try: - info = await self.ap.box_service.get_managed_process(session_id) + info = await workspace.get_managed_process() if isinstance(info, dict): status = info.get('status', '') else: @@ -154,120 +174,58 @@ class BoxStdioSessionRuntime: return try: - await self.ap.box_service.client.delete_session(self.owner._build_box_session_id()) + await self._build_workspace().cleanup() except Exception as exc: self.ap.logger.warning(f'Failed to cleanup Box session for MCP server {self.server_name}: {exc}') def rewrite_path(self, path: str, host_path: str | None) -> str: - if not host_path or not path: - return path - normalized_host = os.path.realpath(host_path) - if path.startswith(normalized_host + '/'): - return '/workspace' + path[len(normalized_host) :] - if path == normalized_host: - return '/workspace' - return path + return rewrite_mounted_path(path, host_path) def infer_host_path(self) -> str | None: - candidates = [] - parts = [self.server_config.get('command', '')] + self.server_config.get('args', []) - for part in parts: - if not os.path.isabs(part): - continue - if os.path.exists(part): - directory = os.path.dirname(part) - directory = self.unwrap_venv_path(directory) - candidates.append(os.path.realpath(directory)) - if not candidates: - return None - common = os.path.commonpath(candidates) - return common if common != '/' else None + return infer_workspace_host_path(self.server_config.get('command', ''), self.server_config.get('args', [])) @staticmethod def unwrap_venv_path(directory: str) -> str: - parts = directory.replace('\\', '/').split('/') - for i in range(len(parts) - 1, 0, -1): - if parts[i] in _VENV_BIN_DIRS and i >= 1: - venv_dir = parts[i - 1] - if venv_dir in _VENV_DIRS: - project_root = '/'.join(parts[: i - 1]) - return project_root if project_root else '/' - return directory + return unwrap_venv_path(directory) def resolve_host_path(self) -> str | None: return self.config.host_path or self.infer_host_path() @staticmethod def detect_install_command(host_path: str) -> str | None: - copy_and_install = ( - 'mkdir -p /opt/_mcp_src' - ' && tar -C /workspace' - ' --exclude=.venv --exclude=.git --exclude=__pycache__' - ' --exclude=node_modules --exclude=.tox --exclude=.nox' - ' --exclude="*.egg-info" --exclude=.uv-cache' - ' -cf - .' - ' | tar -C /opt/_mcp_src -xf -' - ' && pip install --no-cache-dir /opt/_mcp_src' - ' && rm -rf /opt/_mcp_src' - ) - install_requirements = 'pip install --no-cache-dir -r /workspace/requirements.txt' - - if os.path.isfile(os.path.join(host_path, 'pyproject.toml')): - return copy_and_install - if os.path.isfile(os.path.join(host_path, 'setup.py')): - return copy_and_install - if os.path.isfile(os.path.join(host_path, 'requirements.txt')): - return install_requirements + workspace_kind = classify_python_workspace(host_path) + if workspace_kind == 'package': + return ( + 'mkdir -p /opt/_lb_src' + ' && tar -C /workspace' + ' --exclude=.venv --exclude=.git --exclude=__pycache__' + ' --exclude=node_modules --exclude=.tox --exclude=.nox' + ' --exclude="*.egg-info" --exclude=.uv-cache' + ' -cf - .' + ' | tar -C /opt/_lb_src -xf -' + ' && pip install --no-cache-dir /opt/_lb_src' + ' && rm -rf /opt/_lb_src' + ) + if workspace_kind == 'requirements': + return 'pip install --no-cache-dir -r /workspace/requirements.txt' return None def build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict[str, Any]: - if host_path is None: - host_path = self.resolve_host_path() - - payload: dict[str, Any] = { - 'session_id': session_id, - 'workdir': '/workspace', - 'env': self.config.env, - 'network': self.config.network, - 'read_only_rootfs': self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False, - } - if host_path: - payload['host_path'] = host_path - payload['host_path_mode'] = self.config.host_path_mode - for key in ('image', 'cpus', 'memory_mb', 'pids_limit'): - value = getattr(self.config, key) - if value is not None: - payload[key] = value if not isinstance(value, enum.Enum) else value.value - return payload + workspace = self._build_workspace() + workspace.session_id = session_id + if host_path is not None: + workspace.host_path = host_path + return workspace.build_session_payload() def build_box_process_payload(self, host_path: str | None = None) -> dict[str, Any]: - if host_path is None: - host_path = self.resolve_host_path() - - command = self.server_config['command'] - args = self.server_config.get('args', []) - cwd = '/workspace' - - if host_path: - command = self.rewrite_venv_command(command, host_path) - args = [self.rewrite_path(arg, host_path) for arg in args] - cwd = self.rewrite_path(cwd, host_path) - - return { - 'command': command, - 'args': args, - 'env': self.server_config.get('env', {}), - 'cwd': cwd, - } + workspace = self._build_workspace() + if host_path is not None: + workspace.host_path = host_path + return workspace.build_process_payload( + self.server_config['command'], + self.server_config.get('args', []), + env=self.server_config.get('env', {}), + ) def rewrite_venv_command(self, command: str, host_path: str) -> str: - if not host_path or not command: - return command - normalized_host = os.path.realpath(host_path) - if not command.startswith(normalized_host + '/'): - return command - rel = command[len(normalized_host) + 1 :] - parts = rel.replace('\\', '/').split('/') - if len(parts) >= 3 and parts[0] in _VENV_DIRS and parts[1] in _VENV_BIN_DIRS and parts[2].startswith('python'): - return 'python' - return self.rewrite_path(command, host_path) + return rewrite_venv_command(command, host_path) diff --git a/src/langbot/pkg/provider/tools/loaders/native.py b/src/langbot/pkg/provider/tools/loaders/native.py index 582c2a60..badf3717 100644 --- a/src/langbot/pkg/provider/tools/loaders/native.py +++ b/src/langbot/pkg/provider/tools/loaders/native.py @@ -6,6 +6,7 @@ import os import langbot_plugin.api.entities.builtin.resource.tool as resource_tool from langbot_plugin.api.entities.events import pipeline_query +from ....box.workspace import BoxWorkspaceSession from .. import loader from . import skill as skill_loader @@ -94,18 +95,19 @@ class NativeToolLoader(loader.ToolLoader): if skill_loader.should_prepare_skill_python_env(package_root): rewritten_command = skill_loader.wrap_skill_command_with_python_env(rewritten_command) - spec_payload: dict = { - 'cmd': rewritten_command, - 'workdir': rewritten_workdir, - 'host_path': package_root, - 'host_path_mode': 'rw', - 'session_id': skill_loader.build_skill_session_id(selected_skill, query), - } - for key in ('timeout_sec', 'env'): - if key in parameters: - spec_payload[key] = parameters[key] - - result = await self.ap.box_service.execute_spec_payload(spec_payload, query) + workspace = BoxWorkspaceSession( + self.ap.box_service, + skill_loader.build_skill_session_id(selected_skill, query), + host_path=package_root, + host_path_mode='rw', + ) + result = await workspace.execute_for_query( + query, + rewritten_command, + workdir=rewritten_workdir, + timeout_sec=parameters.get('timeout_sec'), + env=parameters.get('env'), + ) self._refresh_skill_from_disk(selected_skill) return result diff --git a/src/langbot/pkg/provider/tools/loaders/skill.py b/src/langbot/pkg/provider/tools/loaders/skill.py index 48e66c4b..e3ca9dbb 100644 --- a/src/langbot/pkg/provider/tools/loaders/skill.py +++ b/src/langbot/pkg/provider/tools/loaders/skill.py @@ -2,9 +2,10 @@ from __future__ import annotations import os import re -import textwrap import typing +from ....box import workspace as box_workspace + if typing.TYPE_CHECKING: from ....core import app from langbot_plugin.api.entities.events import pipeline_query @@ -13,23 +14,6 @@ ACTIVATED_SKILLS_KEY = '_activated_skills' PIPELINE_BOUND_SKILLS_KEY = '_pipeline_bound_skills' SKILL_MOUNT_PREFIX = '/workspace/.skills' _SKILL_MOUNT_PATTERN = re.compile(r'/workspace/\.skills/([A-Za-z0-9_-]+)') -_PYTHON_SKILL_MANIFESTS = ( - 'requirements.txt', - 'pyproject.toml', - 'setup.py', - 'setup.cfg', -) - - -def _normalize_host_path(path: str | None) -> str: - if path is None: - return '' - stripped = str(path).strip() - if not stripped: - return '' - return os.path.realpath(os.path.abspath(stripped)) - - def get_virtual_skill_mount_path(skill_name: str) -> str: return f'{SKILL_MOUNT_PREFIX}/{skill_name}' @@ -165,121 +149,8 @@ def build_skill_session_id(skill_data: dict, query: pipeline_query.Query) -> str def should_prepare_skill_python_env(package_root: str | None) -> bool: - normalized_root = _normalize_host_path(package_root) - if not normalized_root: - return False - if os.path.isdir(os.path.join(normalized_root, '.venv')): - return True - return any(os.path.isfile(os.path.join(normalized_root, filename)) for filename in _PYTHON_SKILL_MANIFESTS) + return box_workspace.should_prepare_python_env(package_root) def wrap_skill_command_with_python_env(command: str) -> str: - bootstrap = textwrap.dedent( - """ - set -e - - _LB_VENV_DIR="/workspace/.venv" - _LB_META_DIR="/workspace/.langbot" - _LB_META_FILE="$_LB_META_DIR/python-env.json" - _LB_LOCK_DIR="$_LB_META_DIR/python-env.lock" - _LB_TMP_DIR="/workspace/.tmp" - _LB_PIP_CACHE_DIR="/workspace/.cache/pip" - - mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR" - export TMPDIR="$_LB_TMP_DIR" - export TEMP="$_LB_TMP_DIR" - export TMP="$_LB_TMP_DIR" - export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR" - - _lb_python_meta() { - python - <<'PY' - import hashlib - import json - import os - import sys - - root = "/workspace" - digest = hashlib.sha256() - manifest_files = [] - for rel in ("requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"): - path = os.path.join(root, rel) - if not os.path.isfile(path): - continue - manifest_files.append(rel) - with open(path, "rb") as handle: - digest.update(rel.encode("utf-8")) - digest.update(b"\0") - digest.update(handle.read()) - digest.update(b"\0") - - print( - json.dumps( - { - "python_executable": sys.executable, - "python_version": list(sys.version_info[:3]), - "manifest_files": manifest_files, - "manifest_sha256": digest.hexdigest(), - }, - sort_keys=True, - ) - ) - PY - } - - _LB_CURRENT_META="$(_lb_python_meta)" - _LB_NEEDS_BOOTSTRAP=0 - - if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then - _LB_NEEDS_BOOTSTRAP=1 - elif [ ! -f "$_LB_META_FILE" ]; then - _LB_NEEDS_BOOTSTRAP=1 - elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then - _LB_NEEDS_BOOTSTRAP=1 - fi - - if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then - _LB_LOCK_WAIT=0 - while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do - if [ "$_LB_LOCK_WAIT" -ge 120 ]; then - echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2 - exit 1 - fi - sleep 1 - _LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1)) - done - - _lb_cleanup_lock() { - rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true - } - trap _lb_cleanup_lock EXIT INT TERM - - _LB_CURRENT_META="$(_lb_python_meta)" - _LB_NEEDS_BOOTSTRAP=0 - if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then - _LB_NEEDS_BOOTSTRAP=1 - elif [ ! -f "$_LB_META_FILE" ]; then - _LB_NEEDS_BOOTSTRAP=1 - elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then - _LB_NEEDS_BOOTSTRAP=1 - fi - - if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then - rm -rf "$_LB_VENV_DIR" - python -m venv "$_LB_VENV_DIR" - - if [ -f /workspace/requirements.txt ]; then - "$_LB_VENV_DIR/bin/python" -m pip install -r /workspace/requirements.txt - elif [ -f /workspace/pyproject.toml ] || [ -f /workspace/setup.py ] || [ -f /workspace/setup.cfg ]; then - "$_LB_VENV_DIR/bin/python" -m pip install -e /workspace - fi - - printf '%s' "$_LB_CURRENT_META" > "$_LB_META_FILE" - fi - fi - - export VIRTUAL_ENV="$_LB_VENV_DIR" - export PATH="$_LB_VENV_DIR/bin:$PATH" - """ - ).strip() - - return f'{bootstrap}\n\n{command}' + return box_workspace.wrap_python_command_with_env(command).rstrip() diff --git a/tests/unit_tests/box/test_workspace.py b/tests/unit_tests/box/test_workspace.py new file mode 100644 index 00000000..ea78d7fc --- /dev/null +++ b/tests/unit_tests/box/test_workspace.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import os +import tempfile +from types import SimpleNamespace +from unittest.mock import AsyncMock, Mock + +import pytest + +from langbot.pkg.box.workspace import ( + BoxWorkspaceSession, + classify_python_workspace, + infer_workspace_host_path, + rewrite_mounted_path, + wrap_python_command_with_env, +) + + +def test_rewrite_mounted_path_translates_host_prefix(): + result = rewrite_mounted_path('/tmp/demo/project/app.py', '/tmp/demo/project') + assert result == '/workspace/app.py' + + +def test_infer_workspace_host_path_unwraps_virtualenv_bin_dir(): + with tempfile.TemporaryDirectory() as tmpdir: + project_root = os.path.join(tmpdir, 'project') + os.makedirs(os.path.join(project_root, '.venv', 'bin')) + python_bin = os.path.join(project_root, '.venv', 'bin', 'python') + script = os.path.join(project_root, 'server.py') + + with open(python_bin, 'w', encoding='utf-8') as handle: + handle.write('') + with open(script, 'w', encoding='utf-8') as handle: + handle.write('print("ok")\n') + + result = infer_workspace_host_path(python_bin, [script]) + + assert result == os.path.realpath(project_root) + + +def test_classify_python_workspace_detects_package_and_requirements(): + with tempfile.TemporaryDirectory() as tmpdir: + assert classify_python_workspace(tmpdir) is None + + with open(os.path.join(tmpdir, 'requirements.txt'), 'w', encoding='utf-8') as handle: + handle.write('requests\n') + assert classify_python_workspace(tmpdir) == 'requirements' + + with open(os.path.join(tmpdir, 'pyproject.toml'), 'w', encoding='utf-8') as handle: + handle.write('[project]\nname = "demo"\n') + assert classify_python_workspace(tmpdir) == 'package' + + +def test_wrap_python_command_with_env_contains_bootstrap_and_command(): + command = wrap_python_command_with_env('python script.py') + + assert 'python -m venv "$_LB_VENV_DIR"' in command + assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command + assert command.rstrip().endswith('python script.py') + + +@pytest.mark.asyncio +async def test_workspace_session_execute_for_query_uses_session_payload(): + box_service = SimpleNamespace(execute_spec_payload=AsyncMock(return_value={'ok': True})) + workspace = BoxWorkspaceSession( + box_service, + 'skill-person_123-demo', + host_path='/tmp/project', + host_path_mode='rw', + env={'FOO': 'bar'}, + ) + + query = SimpleNamespace(query_id='q1') + result = await workspace.execute_for_query(query, 'python run.py', workdir='/workspace', timeout_sec=30) + + assert result == {'ok': True} + payload = box_service.execute_spec_payload.await_args.args[0] + assert payload == { + 'session_id': 'skill-person_123-demo', + 'workdir': '/workspace', + 'env': {'FOO': 'bar'}, + 'host_path': '/tmp/project', + 'host_path_mode': 'rw', + 'cmd': 'python run.py', + 'timeout_sec': 30, + } + + +@pytest.mark.asyncio +async def test_workspace_session_start_managed_process_rewrites_command_and_args(): + box_service = SimpleNamespace(start_managed_process=AsyncMock(return_value={'status': 'running'})) + workspace = BoxWorkspaceSession( + box_service, + 'mcp-u1', + host_path='/tmp/project', + host_path_mode='ro', + ) + + result = await workspace.start_managed_process( + '/tmp/project/.venv/bin/python', + ['/tmp/project/server.py', '--config', '/tmp/project/config.json'], + env={'TOKEN': '1'}, + ) + + assert result == {'status': 'running'} + session_id = box_service.start_managed_process.await_args.args[0] + payload = box_service.start_managed_process.await_args.args[1] + assert session_id == 'mcp-u1' + assert payload == { + 'command': 'python', + 'args': ['/workspace/server.py', '--config', '/workspace/config.json'], + 'env': {'TOKEN': '1'}, + 'cwd': '/workspace', + } + + +def test_workspace_session_build_session_payload_keeps_generic_workspace_shape(): + workspace = BoxWorkspaceSession( + Mock(), + 'workspace-1', + host_path='/tmp/project', + host_path_mode='rw', + env={'FOO': 'bar'}, + network='on', + read_only_rootfs=False, + image='python:3.11', + cpus=1.0, + memory_mb=512, + pids_limit=128, + ) + + assert workspace.build_session_payload() == { + 'session_id': 'workspace-1', + 'workdir': '/workspace', + 'env': {'FOO': 'bar'}, + 'network': 'on', + 'read_only_rootfs': False, + 'host_path': '/tmp/project', + 'host_path_mode': 'rw', + 'image': 'python:3.11', + 'cpus': 1.0, + 'memory_mb': 512, + 'pids_limit': 128, + } diff --git a/tests/unit_tests/provider/test_mcp_box_integration.py b/tests/unit_tests/provider/test_mcp_box_integration.py index 23d1bbc0..acb08916 100644 --- a/tests/unit_tests/provider/test_mcp_box_integration.py +++ b/tests/unit_tests/provider/test_mcp_box_integration.py @@ -125,6 +125,8 @@ def mcp_module(): 'mcp.py', ) mcp_path = os.path.normpath(mcp_path) + pkg_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(mcp_path)))) + sys.modules['langbot.pkg'].__path__ = [pkg_root] sys.modules['langbot.pkg.provider.tools.loaders'].__path__ = [os.path.dirname(mcp_path)] spec = importlib.util.spec_from_file_location(mod_fqn, mcp_path) mod = importlib.util.module_from_spec(spec) @@ -136,6 +138,7 @@ def mcp_module(): # Cleanup sys.modules.pop(mod_fqn, None) sys.modules.pop('langbot.pkg.provider.tools.loaders.mcp_stdio', None) + sys.modules.pop('langbot.pkg.box.workspace', None) for name in reversed(list(saved)): if saved[name] is None: sys.modules.pop(name, None)