mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-04 21:06:03 +00:00
refactor(box): introduce reusable workspace session helper
This commit is contained in:
@@ -1 +1,5 @@
|
||||
"""LangBot Box runtime package."""
|
||||
|
||||
from .workspace import BoxWorkspaceSession
|
||||
|
||||
__all__ = ['BoxWorkspaceSession']
|
||||
|
||||
404
src/langbot/pkg/box/workspace.py
Normal file
404
src/langbot/pkg/box/workspace.py
Normal file
@@ -0,0 +1,404 @@
|
||||
from __future__ import annotations
|
||||
"""Reusable workspace/session helpers built on top of Box.
|
||||
|
||||
This module is the middle layer between the raw Box runtime primitives and
|
||||
application-specific flows such as skills or MCP stdio.
|
||||
|
||||
It intentionally stays generic:
|
||||
- path and virtualenv rewriting are workspace concerns
|
||||
- Python project detection/bootstrap are workspace concerns
|
||||
- session exec / managed-process helpers are workspace concerns
|
||||
|
||||
Higher layers add their own semantics on top, for example:
|
||||
- skills choose a stable per-skill session id and use repeated exec
|
||||
- MCP stdio chooses how to prepare dependencies and attaches to a managed process
|
||||
"""
|
||||
|
||||
import os
|
||||
import textwrap
|
||||
from typing import Any
|
||||
|
||||
PYTHON_MANIFEST_FILES = (
|
||||
'requirements.txt',
|
||||
'pyproject.toml',
|
||||
'setup.py',
|
||||
'setup.cfg',
|
||||
)
|
||||
_VENV_DIRS = frozenset({'.venv', 'venv', 'env', '.env'})
|
||||
_VENV_BIN_DIRS = frozenset({'bin', 'Scripts'})
|
||||
|
||||
|
||||
def normalize_host_path(path: str | None) -> str:
|
||||
if path is None:
|
||||
return ''
|
||||
stripped = str(path).strip()
|
||||
if not stripped:
|
||||
return ''
|
||||
return os.path.realpath(os.path.abspath(stripped))
|
||||
|
||||
|
||||
def rewrite_mounted_path(path: str, host_path: str | None, *, mount_path: str = '/workspace') -> str:
|
||||
"""Translate a host path into the path visible inside the sandbox mount."""
|
||||
if not host_path or not path:
|
||||
return path
|
||||
normalized_host = os.path.realpath(host_path)
|
||||
if path.startswith(normalized_host + '/'):
|
||||
return mount_path + path[len(normalized_host) :]
|
||||
if path == normalized_host:
|
||||
return mount_path
|
||||
return path
|
||||
|
||||
|
||||
def unwrap_venv_path(directory: str) -> str:
|
||||
"""Collapse ``.../.venv/bin`` style paths back to the project root."""
|
||||
parts = directory.replace('\\', '/').split('/')
|
||||
for i in range(len(parts) - 1, 0, -1):
|
||||
if parts[i] in _VENV_BIN_DIRS and i >= 1:
|
||||
venv_dir = parts[i - 1]
|
||||
if venv_dir in _VENV_DIRS:
|
||||
project_root = '/'.join(parts[: i - 1])
|
||||
return project_root if project_root else '/'
|
||||
return directory
|
||||
|
||||
|
||||
def infer_workspace_host_path(command: str, args: list[str] | None = None) -> str | None:
|
||||
"""Infer the project/workspace root from absolute command/arg paths."""
|
||||
candidates: list[str] = []
|
||||
for part in [command, *(args or [])]:
|
||||
if not os.path.isabs(part):
|
||||
continue
|
||||
if os.path.exists(part):
|
||||
directory = os.path.dirname(part)
|
||||
candidates.append(os.path.realpath(unwrap_venv_path(directory)))
|
||||
if not candidates:
|
||||
return None
|
||||
common = os.path.commonpath(candidates)
|
||||
return common if common != '/' else None
|
||||
|
||||
|
||||
def rewrite_venv_command(command: str, host_path: str | None, *, mount_path: str = '/workspace') -> str:
|
||||
"""Rewrite host venv interpreters to plain ``python`` inside the sandbox.
|
||||
|
||||
Once a project is mounted into the sandbox, host virtualenv paths are no
|
||||
longer valid. For those paths we intentionally drop down to ``python`` and
|
||||
let the sandbox-side environment/bootstrap decide what interpreter to use.
|
||||
"""
|
||||
if not host_path or not command:
|
||||
return command
|
||||
normalized_host = os.path.realpath(host_path)
|
||||
if not command.startswith(normalized_host + '/'):
|
||||
return command
|
||||
rel = command[len(normalized_host) + 1 :]
|
||||
parts = rel.replace('\\', '/').split('/')
|
||||
if len(parts) >= 3 and parts[0] in _VENV_DIRS and parts[1] in _VENV_BIN_DIRS and parts[2].startswith('python'):
|
||||
return 'python'
|
||||
return rewrite_mounted_path(command, host_path, mount_path=mount_path)
|
||||
|
||||
|
||||
def list_python_manifest_files(host_path: str | None) -> list[str]:
|
||||
normalized_root = normalize_host_path(host_path)
|
||||
if not normalized_root:
|
||||
return []
|
||||
return [
|
||||
filename for filename in PYTHON_MANIFEST_FILES if os.path.isfile(os.path.join(normalized_root, filename))
|
||||
]
|
||||
|
||||
|
||||
def classify_python_workspace(host_path: str | None) -> str | None:
|
||||
"""Return the generic Python workspace shape, without app-specific policy."""
|
||||
manifest_files = set(list_python_manifest_files(host_path))
|
||||
if not manifest_files:
|
||||
return None
|
||||
if {'pyproject.toml', 'setup.py', 'setup.cfg'} & manifest_files:
|
||||
return 'package'
|
||||
if 'requirements.txt' in manifest_files:
|
||||
return 'requirements'
|
||||
return None
|
||||
|
||||
|
||||
def should_prepare_python_env(host_path: str | None) -> bool:
|
||||
normalized_root = normalize_host_path(host_path)
|
||||
if not normalized_root:
|
||||
return False
|
||||
if os.path.isdir(os.path.join(normalized_root, '.venv')):
|
||||
return True
|
||||
return bool(list_python_manifest_files(normalized_root))
|
||||
|
||||
|
||||
def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace') -> str:
|
||||
"""Wrap a command with a reusable sandbox-local Python env bootstrap.
|
||||
|
||||
This is the generic "workspace is a Python project" path used by mutable
|
||||
workspaces such as skills. Read-only installation strategies stay in the
|
||||
higher-level caller because they are application policy, not workspace
|
||||
semantics.
|
||||
"""
|
||||
bootstrap = textwrap.dedent(
|
||||
f"""
|
||||
set -e
|
||||
|
||||
_LB_VENV_DIR="{mount_path}/.venv"
|
||||
_LB_META_DIR="{mount_path}/.langbot"
|
||||
_LB_META_FILE="$_LB_META_DIR/python-env.json"
|
||||
_LB_LOCK_DIR="$_LB_META_DIR/python-env.lock"
|
||||
_LB_TMP_DIR="{mount_path}/.tmp"
|
||||
_LB_PIP_CACHE_DIR="{mount_path}/.cache/pip"
|
||||
|
||||
mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
|
||||
export TMPDIR="$_LB_TMP_DIR"
|
||||
export TEMP="$_LB_TMP_DIR"
|
||||
export TMP="$_LB_TMP_DIR"
|
||||
export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
|
||||
|
||||
_lb_python_meta() {{
|
||||
python - <<'PY'
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
root = "{mount_path}"
|
||||
digest = hashlib.sha256()
|
||||
manifest_files = []
|
||||
for rel in ("requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"):
|
||||
path = os.path.join(root, rel)
|
||||
if not os.path.isfile(path):
|
||||
continue
|
||||
manifest_files.append(rel)
|
||||
with open(path, "rb") as handle:
|
||||
digest.update(rel.encode("utf-8"))
|
||||
digest.update(b"\\0")
|
||||
digest.update(handle.read())
|
||||
digest.update(b"\\0")
|
||||
|
||||
print(
|
||||
json.dumps(
|
||||
{{
|
||||
"python_executable": sys.executable,
|
||||
"python_version": list(sys.version_info[:3]),
|
||||
"manifest_files": manifest_files,
|
||||
"manifest_sha256": digest.hexdigest(),
|
||||
}},
|
||||
sort_keys=True,
|
||||
)
|
||||
)
|
||||
PY
|
||||
}}
|
||||
|
||||
_LB_CURRENT_META="$(_lb_python_meta)"
|
||||
_LB_NEEDS_BOOTSTRAP=0
|
||||
|
||||
if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
elif [ ! -f "$_LB_META_FILE" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
fi
|
||||
|
||||
if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
|
||||
_LB_LOCK_WAIT=0
|
||||
while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
|
||||
if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
|
||||
echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
_LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
|
||||
done
|
||||
|
||||
_lb_cleanup_lock() {{
|
||||
rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
|
||||
}}
|
||||
trap _lb_cleanup_lock EXIT INT TERM
|
||||
|
||||
_LB_CURRENT_META="$(_lb_python_meta)"
|
||||
_LB_NEEDS_BOOTSTRAP=0
|
||||
if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
elif [ ! -f "$_LB_META_FILE" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
fi
|
||||
|
||||
if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
|
||||
rm -rf "$_LB_VENV_DIR"
|
||||
python -m venv "$_LB_VENV_DIR"
|
||||
. "$_LB_VENV_DIR/bin/activate"
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
if [ -f "{mount_path}/requirements.txt" ]; then
|
||||
python -m pip install -r "{mount_path}/requirements.txt"
|
||||
elif [ -f "{mount_path}/pyproject.toml" ] || [ -f "{mount_path}/setup.py" ] || [ -f "{mount_path}/setup.cfg" ]; then
|
||||
python -m pip install "{mount_path}"
|
||||
fi
|
||||
printf '%s' "$_LB_CURRENT_META" > "$_LB_META_FILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
export VIRTUAL_ENV="$_LB_VENV_DIR"
|
||||
export PATH="$_LB_VENV_DIR/bin:$PATH"
|
||||
{command}
|
||||
"""
|
||||
).strip()
|
||||
return bootstrap + '\n'
|
||||
|
||||
|
||||
class BoxWorkspaceSession:
|
||||
"""High-level handle for one reusable workspace-backed Box session.
|
||||
|
||||
The Box runtime already understands sessions and managed processes. This
|
||||
wrapper adds LangBot's workspace-centric view on top: a mounted host path,
|
||||
a stable ``session_id``, optional environment defaults, and convenience
|
||||
helpers for exec or long-running processes inside that workspace.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
box_service,
|
||||
session_id: str,
|
||||
*,
|
||||
host_path: str | None = None,
|
||||
host_path_mode: str = 'rw',
|
||||
workdir: str = '/workspace',
|
||||
env: dict[str, str] | None = None,
|
||||
mount_path: str = '/workspace',
|
||||
network: str | None = None,
|
||||
read_only_rootfs: bool | None = None,
|
||||
image: str | None = None,
|
||||
cpus: float | None = None,
|
||||
memory_mb: int | None = None,
|
||||
pids_limit: int | None = None,
|
||||
):
|
||||
self.box_service = box_service
|
||||
self.session_id = session_id
|
||||
self.host_path = host_path
|
||||
self.host_path_mode = host_path_mode
|
||||
self.workdir = workdir
|
||||
self.env = dict(env or {})
|
||||
self.mount_path = mount_path
|
||||
self.network = network
|
||||
self.read_only_rootfs = read_only_rootfs
|
||||
self.image = image
|
||||
self.cpus = cpus
|
||||
self.memory_mb = memory_mb
|
||||
self.pids_limit = pids_limit
|
||||
|
||||
def rewrite_path(self, path: str) -> str:
|
||||
return rewrite_mounted_path(path, self.host_path, mount_path=self.mount_path)
|
||||
|
||||
def rewrite_venv_command(self, command: str) -> str:
|
||||
return rewrite_venv_command(command, self.host_path, mount_path=self.mount_path)
|
||||
|
||||
def build_session_payload(self) -> dict[str, Any]:
|
||||
# Keep this payload generic so callers can reuse the same workspace
|
||||
# handle for plain exec, file-producing tasks, or managed processes.
|
||||
payload: dict[str, Any] = {
|
||||
'session_id': self.session_id,
|
||||
'workdir': self.workdir,
|
||||
'env': self.env,
|
||||
}
|
||||
if self.network is not None:
|
||||
payload['network'] = self.network
|
||||
if self.read_only_rootfs is not None:
|
||||
payload['read_only_rootfs'] = self.read_only_rootfs
|
||||
if self.host_path:
|
||||
payload['host_path'] = self.host_path
|
||||
payload['host_path_mode'] = self.host_path_mode
|
||||
for key in ('image', 'cpus', 'memory_mb', 'pids_limit'):
|
||||
value = getattr(self, key)
|
||||
if value is not None:
|
||||
payload[key] = value
|
||||
return payload
|
||||
|
||||
def build_exec_payload(
|
||||
self,
|
||||
cmd: str,
|
||||
*,
|
||||
workdir: str | None = None,
|
||||
env: dict[str, str] | None = None,
|
||||
timeout_sec: int | None = None,
|
||||
) -> dict[str, Any]:
|
||||
# Exec payloads inherit the session-level workspace config, then layer
|
||||
# per-call command/workdir/env overrides on top.
|
||||
payload = self.build_session_payload()
|
||||
payload['cmd'] = cmd
|
||||
payload['workdir'] = workdir or self.workdir
|
||||
if timeout_sec is not None:
|
||||
payload['timeout_sec'] = timeout_sec
|
||||
resolved_env = self.env if env is None else env
|
||||
if resolved_env:
|
||||
payload['env'] = resolved_env
|
||||
elif 'env' in payload and not payload['env']:
|
||||
payload.pop('env')
|
||||
return payload
|
||||
|
||||
async def execute_raw(
|
||||
self,
|
||||
cmd: str,
|
||||
*,
|
||||
workdir: str | None = None,
|
||||
env: dict[str, str] | None = None,
|
||||
timeout_sec: int | None = None,
|
||||
):
|
||||
payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec)
|
||||
return await self.box_service.client.execute(self.box_service.build_spec(payload))
|
||||
|
||||
async def execute_for_query(
|
||||
self,
|
||||
query,
|
||||
cmd: str,
|
||||
*,
|
||||
workdir: str | None = None,
|
||||
env: dict[str, str] | None = None,
|
||||
timeout_sec: int | None = None,
|
||||
) -> dict:
|
||||
payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec)
|
||||
return await self.box_service.execute_spec_payload(payload, query)
|
||||
|
||||
async def create_session(self):
|
||||
return await self.box_service.create_session(self.build_session_payload())
|
||||
|
||||
def build_process_payload(
|
||||
self,
|
||||
command: str,
|
||||
args: list[str] | None = None,
|
||||
*,
|
||||
env: dict[str, str] | None = None,
|
||||
cwd: str = '/workspace',
|
||||
) -> dict[str, Any]:
|
||||
# Managed processes run inside the same workspace model as one-shot
|
||||
# execs, so path/venv rewriting is shared here.
|
||||
normalized_command = command
|
||||
normalized_args = list(args or [])
|
||||
normalized_cwd = cwd
|
||||
if self.host_path:
|
||||
normalized_command = self.rewrite_venv_command(command)
|
||||
normalized_args = [self.rewrite_path(arg) for arg in normalized_args]
|
||||
normalized_cwd = self.rewrite_path(cwd)
|
||||
return {
|
||||
'command': normalized_command,
|
||||
'args': normalized_args,
|
||||
'env': dict(env or {}),
|
||||
'cwd': normalized_cwd,
|
||||
}
|
||||
|
||||
async def start_managed_process(
|
||||
self,
|
||||
command: str,
|
||||
args: list[str] | None = None,
|
||||
*,
|
||||
env: dict[str, str] | None = None,
|
||||
cwd: str = '/workspace',
|
||||
):
|
||||
payload = self.build_process_payload(command, args, env=env, cwd=cwd)
|
||||
return await self.box_service.start_managed_process(self.session_id, payload)
|
||||
|
||||
async def get_managed_process(self):
|
||||
return await self.box_service.get_managed_process(self.session_id)
|
||||
|
||||
def get_managed_process_websocket_url(self) -> str:
|
||||
return self.box_service.get_managed_process_websocket_url(self.session_id)
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
await self.box_service.client.delete_session(self.session_id)
|
||||
@@ -1,13 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import os
|
||||
import asyncio
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import pydantic
|
||||
from mcp import ClientSession
|
||||
from mcp.client.websocket import websocket_client
|
||||
from ....box.workspace import (
|
||||
BoxWorkspaceSession,
|
||||
classify_python_workspace,
|
||||
infer_workspace_host_path,
|
||||
rewrite_mounted_path,
|
||||
rewrite_venv_command,
|
||||
unwrap_venv_path,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .mcp import RuntimeMCPSession
|
||||
@@ -25,10 +32,6 @@ class MCPSessionErrorPhase(enum.Enum):
|
||||
TOOL_CALL = 'tool_call'
|
||||
|
||||
|
||||
_VENV_DIRS = frozenset({'.venv', 'venv', 'env', '.env'})
|
||||
_VENV_BIN_DIRS = frozenset({'bin', 'Scripts'})
|
||||
|
||||
|
||||
class MCPServerBoxConfig(pydantic.BaseModel):
|
||||
"""Structured configuration for running an MCP server inside a Box container."""
|
||||
|
||||
@@ -65,6 +68,21 @@ class BoxStdioSessionRuntime:
|
||||
def server_config(self) -> dict:
|
||||
return self.owner.server_config
|
||||
|
||||
def _build_workspace(self) -> BoxWorkspaceSession:
|
||||
return BoxWorkspaceSession(
|
||||
self.ap.box_service,
|
||||
self.owner._build_box_session_id(),
|
||||
host_path=self.resolve_host_path(),
|
||||
host_path_mode=self.config.host_path_mode,
|
||||
env=self.config.env,
|
||||
network=self.config.network,
|
||||
read_only_rootfs=self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False,
|
||||
image=self.config.image,
|
||||
cpus=self.config.cpus,
|
||||
memory_mb=self.config.memory_mb,
|
||||
pids_limit=self.config.pids_limit,
|
||||
)
|
||||
|
||||
def uses_box_stdio(self) -> bool:
|
||||
if self.server_config.get('mode') != 'stdio':
|
||||
return False
|
||||
@@ -74,13 +92,11 @@ class BoxStdioSessionRuntime:
|
||||
return False
|
||||
|
||||
async def initialize(self) -> None:
|
||||
box_service = self.ap.box_service
|
||||
session_id = self.owner._build_box_session_id()
|
||||
host_path = self.resolve_host_path()
|
||||
session_payload = self.build_box_session_payload(session_id, host_path)
|
||||
workspace = self._build_workspace()
|
||||
host_path = workspace.host_path
|
||||
|
||||
try:
|
||||
await box_service.create_session(session_payload)
|
||||
await workspace.create_session()
|
||||
except Exception:
|
||||
self.owner.error_phase = MCPSessionErrorPhase.SESSION_CREATE
|
||||
raise
|
||||
@@ -89,11 +105,11 @@ class BoxStdioSessionRuntime:
|
||||
install_cmd = self.owner._detect_install_command(host_path)
|
||||
if install_cmd:
|
||||
self.ap.logger.info(f'MCP server {self.server_name}: installing dependencies in Box with: {install_cmd}')
|
||||
exec_payload = dict(session_payload)
|
||||
exec_payload['cmd'] = install_cmd
|
||||
exec_payload['timeout_sec'] = self.config.startup_timeout_sec or 120
|
||||
try:
|
||||
result = await box_service.client.execute(box_service.build_spec(exec_payload))
|
||||
result = await workspace.execute_raw(
|
||||
install_cmd,
|
||||
timeout_sec=self.config.startup_timeout_sec or 120,
|
||||
)
|
||||
except Exception:
|
||||
self.owner.error_phase = MCPSessionErrorPhase.DEP_INSTALL
|
||||
raise
|
||||
@@ -103,13 +119,17 @@ class BoxStdioSessionRuntime:
|
||||
raise Exception(f'Dependency install failed (exit code {result.exit_code}): {stderr_preview}')
|
||||
|
||||
try:
|
||||
await box_service.start_managed_process(session_id, self.build_box_process_payload(host_path))
|
||||
await workspace.start_managed_process(
|
||||
self.server_config['command'],
|
||||
self.server_config.get('args', []),
|
||||
env=self.server_config.get('env', {}),
|
||||
)
|
||||
except Exception:
|
||||
self.owner.error_phase = MCPSessionErrorPhase.PROCESS_START
|
||||
raise
|
||||
|
||||
try:
|
||||
websocket_url = box_service.get_managed_process_websocket_url(session_id)
|
||||
websocket_url = workspace.get_managed_process_websocket_url()
|
||||
transport = await self.owner.exit_stack.enter_async_context(websocket_client(websocket_url))
|
||||
read_stream, write_stream = transport
|
||||
self.owner.session = await self.owner.exit_stack.enter_async_context(ClientSession(read_stream, write_stream))
|
||||
@@ -126,11 +146,11 @@ class BoxStdioSessionRuntime:
|
||||
async def monitor_process_health(self) -> None:
|
||||
from langbot_plugin.box.models import BoxManagedProcessStatus
|
||||
|
||||
session_id = self.owner._build_box_session_id()
|
||||
workspace = self._build_workspace()
|
||||
consecutive_errors = 0
|
||||
while not self.owner._shutdown_event.is_set():
|
||||
try:
|
||||
info = await self.ap.box_service.get_managed_process(session_id)
|
||||
info = await workspace.get_managed_process()
|
||||
if isinstance(info, dict):
|
||||
status = info.get('status', '')
|
||||
else:
|
||||
@@ -154,120 +174,58 @@ class BoxStdioSessionRuntime:
|
||||
return
|
||||
|
||||
try:
|
||||
await self.ap.box_service.client.delete_session(self.owner._build_box_session_id())
|
||||
await self._build_workspace().cleanup()
|
||||
except Exception as exc:
|
||||
self.ap.logger.warning(f'Failed to cleanup Box session for MCP server {self.server_name}: {exc}')
|
||||
|
||||
def rewrite_path(self, path: str, host_path: str | None) -> str:
|
||||
if not host_path or not path:
|
||||
return path
|
||||
normalized_host = os.path.realpath(host_path)
|
||||
if path.startswith(normalized_host + '/'):
|
||||
return '/workspace' + path[len(normalized_host) :]
|
||||
if path == normalized_host:
|
||||
return '/workspace'
|
||||
return path
|
||||
return rewrite_mounted_path(path, host_path)
|
||||
|
||||
def infer_host_path(self) -> str | None:
|
||||
candidates = []
|
||||
parts = [self.server_config.get('command', '')] + self.server_config.get('args', [])
|
||||
for part in parts:
|
||||
if not os.path.isabs(part):
|
||||
continue
|
||||
if os.path.exists(part):
|
||||
directory = os.path.dirname(part)
|
||||
directory = self.unwrap_venv_path(directory)
|
||||
candidates.append(os.path.realpath(directory))
|
||||
if not candidates:
|
||||
return None
|
||||
common = os.path.commonpath(candidates)
|
||||
return common if common != '/' else None
|
||||
return infer_workspace_host_path(self.server_config.get('command', ''), self.server_config.get('args', []))
|
||||
|
||||
@staticmethod
|
||||
def unwrap_venv_path(directory: str) -> str:
|
||||
parts = directory.replace('\\', '/').split('/')
|
||||
for i in range(len(parts) - 1, 0, -1):
|
||||
if parts[i] in _VENV_BIN_DIRS and i >= 1:
|
||||
venv_dir = parts[i - 1]
|
||||
if venv_dir in _VENV_DIRS:
|
||||
project_root = '/'.join(parts[: i - 1])
|
||||
return project_root if project_root else '/'
|
||||
return directory
|
||||
return unwrap_venv_path(directory)
|
||||
|
||||
def resolve_host_path(self) -> str | None:
|
||||
return self.config.host_path or self.infer_host_path()
|
||||
|
||||
@staticmethod
|
||||
def detect_install_command(host_path: str) -> str | None:
|
||||
copy_and_install = (
|
||||
'mkdir -p /opt/_mcp_src'
|
||||
' && tar -C /workspace'
|
||||
' --exclude=.venv --exclude=.git --exclude=__pycache__'
|
||||
' --exclude=node_modules --exclude=.tox --exclude=.nox'
|
||||
' --exclude="*.egg-info" --exclude=.uv-cache'
|
||||
' -cf - .'
|
||||
' | tar -C /opt/_mcp_src -xf -'
|
||||
' && pip install --no-cache-dir /opt/_mcp_src'
|
||||
' && rm -rf /opt/_mcp_src'
|
||||
)
|
||||
install_requirements = 'pip install --no-cache-dir -r /workspace/requirements.txt'
|
||||
|
||||
if os.path.isfile(os.path.join(host_path, 'pyproject.toml')):
|
||||
return copy_and_install
|
||||
if os.path.isfile(os.path.join(host_path, 'setup.py')):
|
||||
return copy_and_install
|
||||
if os.path.isfile(os.path.join(host_path, 'requirements.txt')):
|
||||
return install_requirements
|
||||
workspace_kind = classify_python_workspace(host_path)
|
||||
if workspace_kind == 'package':
|
||||
return (
|
||||
'mkdir -p /opt/_lb_src'
|
||||
' && tar -C /workspace'
|
||||
' --exclude=.venv --exclude=.git --exclude=__pycache__'
|
||||
' --exclude=node_modules --exclude=.tox --exclude=.nox'
|
||||
' --exclude="*.egg-info" --exclude=.uv-cache'
|
||||
' -cf - .'
|
||||
' | tar -C /opt/_lb_src -xf -'
|
||||
' && pip install --no-cache-dir /opt/_lb_src'
|
||||
' && rm -rf /opt/_lb_src'
|
||||
)
|
||||
if workspace_kind == 'requirements':
|
||||
return 'pip install --no-cache-dir -r /workspace/requirements.txt'
|
||||
return None
|
||||
|
||||
def build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict[str, Any]:
|
||||
if host_path is None:
|
||||
host_path = self.resolve_host_path()
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
'session_id': session_id,
|
||||
'workdir': '/workspace',
|
||||
'env': self.config.env,
|
||||
'network': self.config.network,
|
||||
'read_only_rootfs': self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False,
|
||||
}
|
||||
if host_path:
|
||||
payload['host_path'] = host_path
|
||||
payload['host_path_mode'] = self.config.host_path_mode
|
||||
for key in ('image', 'cpus', 'memory_mb', 'pids_limit'):
|
||||
value = getattr(self.config, key)
|
||||
if value is not None:
|
||||
payload[key] = value if not isinstance(value, enum.Enum) else value.value
|
||||
return payload
|
||||
workspace = self._build_workspace()
|
||||
workspace.session_id = session_id
|
||||
if host_path is not None:
|
||||
workspace.host_path = host_path
|
||||
return workspace.build_session_payload()
|
||||
|
||||
def build_box_process_payload(self, host_path: str | None = None) -> dict[str, Any]:
|
||||
if host_path is None:
|
||||
host_path = self.resolve_host_path()
|
||||
|
||||
command = self.server_config['command']
|
||||
args = self.server_config.get('args', [])
|
||||
cwd = '/workspace'
|
||||
|
||||
if host_path:
|
||||
command = self.rewrite_venv_command(command, host_path)
|
||||
args = [self.rewrite_path(arg, host_path) for arg in args]
|
||||
cwd = self.rewrite_path(cwd, host_path)
|
||||
|
||||
return {
|
||||
'command': command,
|
||||
'args': args,
|
||||
'env': self.server_config.get('env', {}),
|
||||
'cwd': cwd,
|
||||
}
|
||||
workspace = self._build_workspace()
|
||||
if host_path is not None:
|
||||
workspace.host_path = host_path
|
||||
return workspace.build_process_payload(
|
||||
self.server_config['command'],
|
||||
self.server_config.get('args', []),
|
||||
env=self.server_config.get('env', {}),
|
||||
)
|
||||
|
||||
def rewrite_venv_command(self, command: str, host_path: str) -> str:
|
||||
if not host_path or not command:
|
||||
return command
|
||||
normalized_host = os.path.realpath(host_path)
|
||||
if not command.startswith(normalized_host + '/'):
|
||||
return command
|
||||
rel = command[len(normalized_host) + 1 :]
|
||||
parts = rel.replace('\\', '/').split('/')
|
||||
if len(parts) >= 3 and parts[0] in _VENV_DIRS and parts[1] in _VENV_BIN_DIRS and parts[2].startswith('python'):
|
||||
return 'python'
|
||||
return self.rewrite_path(command, host_path)
|
||||
return rewrite_venv_command(command, host_path)
|
||||
|
||||
@@ -6,6 +6,7 @@ import os
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
from langbot_plugin.api.entities.events import pipeline_query
|
||||
|
||||
from ....box.workspace import BoxWorkspaceSession
|
||||
from .. import loader
|
||||
from . import skill as skill_loader
|
||||
|
||||
@@ -94,18 +95,19 @@ class NativeToolLoader(loader.ToolLoader):
|
||||
if skill_loader.should_prepare_skill_python_env(package_root):
|
||||
rewritten_command = skill_loader.wrap_skill_command_with_python_env(rewritten_command)
|
||||
|
||||
spec_payload: dict = {
|
||||
'cmd': rewritten_command,
|
||||
'workdir': rewritten_workdir,
|
||||
'host_path': package_root,
|
||||
'host_path_mode': 'rw',
|
||||
'session_id': skill_loader.build_skill_session_id(selected_skill, query),
|
||||
}
|
||||
for key in ('timeout_sec', 'env'):
|
||||
if key in parameters:
|
||||
spec_payload[key] = parameters[key]
|
||||
|
||||
result = await self.ap.box_service.execute_spec_payload(spec_payload, query)
|
||||
workspace = BoxWorkspaceSession(
|
||||
self.ap.box_service,
|
||||
skill_loader.build_skill_session_id(selected_skill, query),
|
||||
host_path=package_root,
|
||||
host_path_mode='rw',
|
||||
)
|
||||
result = await workspace.execute_for_query(
|
||||
query,
|
||||
rewritten_command,
|
||||
workdir=rewritten_workdir,
|
||||
timeout_sec=parameters.get('timeout_sec'),
|
||||
env=parameters.get('env'),
|
||||
)
|
||||
self._refresh_skill_from_disk(selected_skill)
|
||||
return result
|
||||
|
||||
|
||||
@@ -2,9 +2,10 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import textwrap
|
||||
import typing
|
||||
|
||||
from ....box import workspace as box_workspace
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from ....core import app
|
||||
from langbot_plugin.api.entities.events import pipeline_query
|
||||
@@ -13,23 +14,6 @@ ACTIVATED_SKILLS_KEY = '_activated_skills'
|
||||
PIPELINE_BOUND_SKILLS_KEY = '_pipeline_bound_skills'
|
||||
SKILL_MOUNT_PREFIX = '/workspace/.skills'
|
||||
_SKILL_MOUNT_PATTERN = re.compile(r'/workspace/\.skills/([A-Za-z0-9_-]+)')
|
||||
_PYTHON_SKILL_MANIFESTS = (
|
||||
'requirements.txt',
|
||||
'pyproject.toml',
|
||||
'setup.py',
|
||||
'setup.cfg',
|
||||
)
|
||||
|
||||
|
||||
def _normalize_host_path(path: str | None) -> str:
|
||||
if path is None:
|
||||
return ''
|
||||
stripped = str(path).strip()
|
||||
if not stripped:
|
||||
return ''
|
||||
return os.path.realpath(os.path.abspath(stripped))
|
||||
|
||||
|
||||
def get_virtual_skill_mount_path(skill_name: str) -> str:
|
||||
return f'{SKILL_MOUNT_PREFIX}/{skill_name}'
|
||||
|
||||
@@ -165,121 +149,8 @@ def build_skill_session_id(skill_data: dict, query: pipeline_query.Query) -> str
|
||||
|
||||
|
||||
def should_prepare_skill_python_env(package_root: str | None) -> bool:
|
||||
normalized_root = _normalize_host_path(package_root)
|
||||
if not normalized_root:
|
||||
return False
|
||||
if os.path.isdir(os.path.join(normalized_root, '.venv')):
|
||||
return True
|
||||
return any(os.path.isfile(os.path.join(normalized_root, filename)) for filename in _PYTHON_SKILL_MANIFESTS)
|
||||
return box_workspace.should_prepare_python_env(package_root)
|
||||
|
||||
|
||||
def wrap_skill_command_with_python_env(command: str) -> str:
|
||||
bootstrap = textwrap.dedent(
|
||||
"""
|
||||
set -e
|
||||
|
||||
_LB_VENV_DIR="/workspace/.venv"
|
||||
_LB_META_DIR="/workspace/.langbot"
|
||||
_LB_META_FILE="$_LB_META_DIR/python-env.json"
|
||||
_LB_LOCK_DIR="$_LB_META_DIR/python-env.lock"
|
||||
_LB_TMP_DIR="/workspace/.tmp"
|
||||
_LB_PIP_CACHE_DIR="/workspace/.cache/pip"
|
||||
|
||||
mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
|
||||
export TMPDIR="$_LB_TMP_DIR"
|
||||
export TEMP="$_LB_TMP_DIR"
|
||||
export TMP="$_LB_TMP_DIR"
|
||||
export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
|
||||
|
||||
_lb_python_meta() {
|
||||
python - <<'PY'
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
root = "/workspace"
|
||||
digest = hashlib.sha256()
|
||||
manifest_files = []
|
||||
for rel in ("requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"):
|
||||
path = os.path.join(root, rel)
|
||||
if not os.path.isfile(path):
|
||||
continue
|
||||
manifest_files.append(rel)
|
||||
with open(path, "rb") as handle:
|
||||
digest.update(rel.encode("utf-8"))
|
||||
digest.update(b"\0")
|
||||
digest.update(handle.read())
|
||||
digest.update(b"\0")
|
||||
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"python_executable": sys.executable,
|
||||
"python_version": list(sys.version_info[:3]),
|
||||
"manifest_files": manifest_files,
|
||||
"manifest_sha256": digest.hexdigest(),
|
||||
},
|
||||
sort_keys=True,
|
||||
)
|
||||
)
|
||||
PY
|
||||
}
|
||||
|
||||
_LB_CURRENT_META="$(_lb_python_meta)"
|
||||
_LB_NEEDS_BOOTSTRAP=0
|
||||
|
||||
if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
elif [ ! -f "$_LB_META_FILE" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
fi
|
||||
|
||||
if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
|
||||
_LB_LOCK_WAIT=0
|
||||
while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
|
||||
if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
|
||||
echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
_LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
|
||||
done
|
||||
|
||||
_lb_cleanup_lock() {
|
||||
rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
|
||||
}
|
||||
trap _lb_cleanup_lock EXIT INT TERM
|
||||
|
||||
_LB_CURRENT_META="$(_lb_python_meta)"
|
||||
_LB_NEEDS_BOOTSTRAP=0
|
||||
if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
elif [ ! -f "$_LB_META_FILE" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
|
||||
_LB_NEEDS_BOOTSTRAP=1
|
||||
fi
|
||||
|
||||
if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
|
||||
rm -rf "$_LB_VENV_DIR"
|
||||
python -m venv "$_LB_VENV_DIR"
|
||||
|
||||
if [ -f /workspace/requirements.txt ]; then
|
||||
"$_LB_VENV_DIR/bin/python" -m pip install -r /workspace/requirements.txt
|
||||
elif [ -f /workspace/pyproject.toml ] || [ -f /workspace/setup.py ] || [ -f /workspace/setup.cfg ]; then
|
||||
"$_LB_VENV_DIR/bin/python" -m pip install -e /workspace
|
||||
fi
|
||||
|
||||
printf '%s' "$_LB_CURRENT_META" > "$_LB_META_FILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
export VIRTUAL_ENV="$_LB_VENV_DIR"
|
||||
export PATH="$_LB_VENV_DIR/bin:$PATH"
|
||||
"""
|
||||
).strip()
|
||||
|
||||
return f'{bootstrap}\n\n{command}'
|
||||
return box_workspace.wrap_python_command_with_env(command).rstrip()
|
||||
|
||||
144
tests/unit_tests/box/test_workspace.py
Normal file
144
tests/unit_tests/box/test_workspace.py
Normal file
@@ -0,0 +1,144 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from langbot.pkg.box.workspace import (
|
||||
BoxWorkspaceSession,
|
||||
classify_python_workspace,
|
||||
infer_workspace_host_path,
|
||||
rewrite_mounted_path,
|
||||
wrap_python_command_with_env,
|
||||
)
|
||||
|
||||
|
||||
def test_rewrite_mounted_path_translates_host_prefix():
|
||||
result = rewrite_mounted_path('/tmp/demo/project/app.py', '/tmp/demo/project')
|
||||
assert result == '/workspace/app.py'
|
||||
|
||||
|
||||
def test_infer_workspace_host_path_unwraps_virtualenv_bin_dir():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
project_root = os.path.join(tmpdir, 'project')
|
||||
os.makedirs(os.path.join(project_root, '.venv', 'bin'))
|
||||
python_bin = os.path.join(project_root, '.venv', 'bin', 'python')
|
||||
script = os.path.join(project_root, 'server.py')
|
||||
|
||||
with open(python_bin, 'w', encoding='utf-8') as handle:
|
||||
handle.write('')
|
||||
with open(script, 'w', encoding='utf-8') as handle:
|
||||
handle.write('print("ok")\n')
|
||||
|
||||
result = infer_workspace_host_path(python_bin, [script])
|
||||
|
||||
assert result == os.path.realpath(project_root)
|
||||
|
||||
|
||||
def test_classify_python_workspace_detects_package_and_requirements():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
assert classify_python_workspace(tmpdir) is None
|
||||
|
||||
with open(os.path.join(tmpdir, 'requirements.txt'), 'w', encoding='utf-8') as handle:
|
||||
handle.write('requests\n')
|
||||
assert classify_python_workspace(tmpdir) == 'requirements'
|
||||
|
||||
with open(os.path.join(tmpdir, 'pyproject.toml'), 'w', encoding='utf-8') as handle:
|
||||
handle.write('[project]\nname = "demo"\n')
|
||||
assert classify_python_workspace(tmpdir) == 'package'
|
||||
|
||||
|
||||
def test_wrap_python_command_with_env_contains_bootstrap_and_command():
|
||||
command = wrap_python_command_with_env('python script.py')
|
||||
|
||||
assert 'python -m venv "$_LB_VENV_DIR"' in command
|
||||
assert 'export VIRTUAL_ENV="$_LB_VENV_DIR"' in command
|
||||
assert command.rstrip().endswith('python script.py')
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_workspace_session_execute_for_query_uses_session_payload():
|
||||
box_service = SimpleNamespace(execute_spec_payload=AsyncMock(return_value={'ok': True}))
|
||||
workspace = BoxWorkspaceSession(
|
||||
box_service,
|
||||
'skill-person_123-demo',
|
||||
host_path='/tmp/project',
|
||||
host_path_mode='rw',
|
||||
env={'FOO': 'bar'},
|
||||
)
|
||||
|
||||
query = SimpleNamespace(query_id='q1')
|
||||
result = await workspace.execute_for_query(query, 'python run.py', workdir='/workspace', timeout_sec=30)
|
||||
|
||||
assert result == {'ok': True}
|
||||
payload = box_service.execute_spec_payload.await_args.args[0]
|
||||
assert payload == {
|
||||
'session_id': 'skill-person_123-demo',
|
||||
'workdir': '/workspace',
|
||||
'env': {'FOO': 'bar'},
|
||||
'host_path': '/tmp/project',
|
||||
'host_path_mode': 'rw',
|
||||
'cmd': 'python run.py',
|
||||
'timeout_sec': 30,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_workspace_session_start_managed_process_rewrites_command_and_args():
|
||||
box_service = SimpleNamespace(start_managed_process=AsyncMock(return_value={'status': 'running'}))
|
||||
workspace = BoxWorkspaceSession(
|
||||
box_service,
|
||||
'mcp-u1',
|
||||
host_path='/tmp/project',
|
||||
host_path_mode='ro',
|
||||
)
|
||||
|
||||
result = await workspace.start_managed_process(
|
||||
'/tmp/project/.venv/bin/python',
|
||||
['/tmp/project/server.py', '--config', '/tmp/project/config.json'],
|
||||
env={'TOKEN': '1'},
|
||||
)
|
||||
|
||||
assert result == {'status': 'running'}
|
||||
session_id = box_service.start_managed_process.await_args.args[0]
|
||||
payload = box_service.start_managed_process.await_args.args[1]
|
||||
assert session_id == 'mcp-u1'
|
||||
assert payload == {
|
||||
'command': 'python',
|
||||
'args': ['/workspace/server.py', '--config', '/workspace/config.json'],
|
||||
'env': {'TOKEN': '1'},
|
||||
'cwd': '/workspace',
|
||||
}
|
||||
|
||||
|
||||
def test_workspace_session_build_session_payload_keeps_generic_workspace_shape():
|
||||
workspace = BoxWorkspaceSession(
|
||||
Mock(),
|
||||
'workspace-1',
|
||||
host_path='/tmp/project',
|
||||
host_path_mode='rw',
|
||||
env={'FOO': 'bar'},
|
||||
network='on',
|
||||
read_only_rootfs=False,
|
||||
image='python:3.11',
|
||||
cpus=1.0,
|
||||
memory_mb=512,
|
||||
pids_limit=128,
|
||||
)
|
||||
|
||||
assert workspace.build_session_payload() == {
|
||||
'session_id': 'workspace-1',
|
||||
'workdir': '/workspace',
|
||||
'env': {'FOO': 'bar'},
|
||||
'network': 'on',
|
||||
'read_only_rootfs': False,
|
||||
'host_path': '/tmp/project',
|
||||
'host_path_mode': 'rw',
|
||||
'image': 'python:3.11',
|
||||
'cpus': 1.0,
|
||||
'memory_mb': 512,
|
||||
'pids_limit': 128,
|
||||
}
|
||||
@@ -125,6 +125,8 @@ def mcp_module():
|
||||
'mcp.py',
|
||||
)
|
||||
mcp_path = os.path.normpath(mcp_path)
|
||||
pkg_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(mcp_path))))
|
||||
sys.modules['langbot.pkg'].__path__ = [pkg_root]
|
||||
sys.modules['langbot.pkg.provider.tools.loaders'].__path__ = [os.path.dirname(mcp_path)]
|
||||
spec = importlib.util.spec_from_file_location(mod_fqn, mcp_path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
@@ -136,6 +138,7 @@ def mcp_module():
|
||||
# Cleanup
|
||||
sys.modules.pop(mod_fqn, None)
|
||||
sys.modules.pop('langbot.pkg.provider.tools.loaders.mcp_stdio', None)
|
||||
sys.modules.pop('langbot.pkg.box.workspace', None)
|
||||
for name in reversed(list(saved)):
|
||||
if saved[name] is None:
|
||||
sys.modules.pop(name, None)
|
||||
|
||||
Reference in New Issue
Block a user