mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-07 06:16:02 +00:00
420 lines
15 KiB
Python
420 lines
15 KiB
Python
"""Reusable workspace/session helpers built on top of Box.
|
|
|
|
This module is the middle layer between the raw Box runtime primitives and
|
|
application-specific flows such as skills or MCP stdio.
|
|
|
|
It intentionally stays generic:
|
|
- path and virtualenv rewriting are workspace concerns
|
|
- Python project detection/bootstrap are workspace concerns
|
|
- session exec / managed-process helpers are workspace concerns
|
|
|
|
Higher layers add their own semantics on top, for example:
|
|
- skills choose a stable per-skill session id and use repeated exec
|
|
- MCP stdio chooses how to prepare dependencies and attaches to a managed process
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import textwrap
|
|
from typing import Any
|
|
|
|
PYTHON_MANIFEST_FILES = (
|
|
'requirements.txt',
|
|
'pyproject.toml',
|
|
'setup.py',
|
|
'setup.cfg',
|
|
)
|
|
_VENV_DIRS = frozenset({'.venv', 'venv', 'env', '.env'})
|
|
_VENV_BIN_DIRS = frozenset({'bin', 'Scripts'})
|
|
|
|
|
|
def normalize_host_path(path: str | None) -> str:
|
|
if path is None:
|
|
return ''
|
|
stripped = str(path).strip()
|
|
if not stripped:
|
|
return ''
|
|
return os.path.realpath(os.path.abspath(stripped))
|
|
|
|
|
|
def rewrite_mounted_path(path: str, host_path: str | None, *, mount_path: str = '/workspace') -> str:
|
|
"""Translate a host path into the path visible inside the sandbox mount."""
|
|
if not host_path or not path:
|
|
return path
|
|
normalized_host = os.path.realpath(host_path)
|
|
normalized_path = os.path.realpath(path)
|
|
if normalized_path.startswith(normalized_host + '/'):
|
|
return mount_path + normalized_path[len(normalized_host) :]
|
|
if normalized_path == normalized_host:
|
|
return mount_path
|
|
return path
|
|
|
|
|
|
def unwrap_venv_path(directory: str) -> str:
|
|
"""Collapse ``.../.venv/bin`` style paths back to the project root."""
|
|
parts = directory.replace('\\', '/').split('/')
|
|
for i in range(len(parts) - 1, 0, -1):
|
|
if parts[i] in _VENV_BIN_DIRS and i >= 1:
|
|
venv_dir = parts[i - 1]
|
|
if venv_dir in _VENV_DIRS:
|
|
project_root = '/'.join(parts[: i - 1])
|
|
return project_root if project_root else '/'
|
|
return directory
|
|
|
|
|
|
def infer_workspace_host_path(command: str, args: list[str] | None = None) -> str | None:
|
|
"""Infer the project/workspace root from absolute command/arg paths."""
|
|
candidates: list[str] = []
|
|
for part in [command, *(args or [])]:
|
|
if not os.path.isabs(part):
|
|
continue
|
|
if os.path.exists(part):
|
|
directory = os.path.dirname(part)
|
|
candidates.append(os.path.realpath(unwrap_venv_path(directory)))
|
|
if not candidates:
|
|
return None
|
|
common = os.path.commonpath(candidates)
|
|
return common if common != '/' else None
|
|
|
|
|
|
def rewrite_venv_command(command: str, host_path: str | None, *, mount_path: str = '/workspace') -> str:
|
|
"""Rewrite host venv interpreters to plain ``python`` inside the sandbox.
|
|
|
|
Once a project is mounted into the sandbox, host virtualenv paths are no
|
|
longer valid. For those paths we intentionally drop down to ``python`` and
|
|
let the sandbox-side environment/bootstrap decide what interpreter to use.
|
|
"""
|
|
if not host_path or not command:
|
|
return command
|
|
normalized_host = os.path.realpath(host_path)
|
|
normalized_command = os.path.realpath(command)
|
|
if not normalized_command.startswith(normalized_host + '/'):
|
|
return command
|
|
rel = normalized_command[len(normalized_host) + 1 :]
|
|
parts = rel.replace('\\', '/').split('/')
|
|
if len(parts) >= 3 and parts[0] in _VENV_DIRS and parts[1] in _VENV_BIN_DIRS and parts[2].startswith('python'):
|
|
return 'python'
|
|
return rewrite_mounted_path(normalized_command, host_path, mount_path=mount_path)
|
|
|
|
|
|
def list_python_manifest_files(host_path: str | None) -> list[str]:
|
|
normalized_root = normalize_host_path(host_path)
|
|
if not normalized_root:
|
|
return []
|
|
return [filename for filename in PYTHON_MANIFEST_FILES if os.path.isfile(os.path.join(normalized_root, filename))]
|
|
|
|
|
|
def classify_python_workspace(host_path: str | None) -> str | None:
|
|
"""Return the generic Python workspace shape, without app-specific policy."""
|
|
manifest_files = set(list_python_manifest_files(host_path))
|
|
if not manifest_files:
|
|
return None
|
|
if {'pyproject.toml', 'setup.py', 'setup.cfg'} & manifest_files:
|
|
return 'package'
|
|
if 'requirements.txt' in manifest_files:
|
|
return 'requirements'
|
|
return None
|
|
|
|
|
|
def should_prepare_python_env(host_path: str | None) -> bool:
|
|
normalized_root = normalize_host_path(host_path)
|
|
if not normalized_root:
|
|
return False
|
|
if os.path.isdir(os.path.join(normalized_root, '.venv')):
|
|
return True
|
|
return bool(list_python_manifest_files(normalized_root))
|
|
|
|
|
|
def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace') -> str:
|
|
"""Wrap a command with a reusable sandbox-local Python env bootstrap.
|
|
|
|
This is the generic "workspace is a Python project" path used by mutable
|
|
workspaces such as skills. Read-only installation strategies stay in the
|
|
higher-level caller because they are application policy, not workspace
|
|
semantics.
|
|
"""
|
|
bootstrap = textwrap.dedent(
|
|
f"""
|
|
set -e
|
|
|
|
_LB_VENV_DIR="{mount_path}/.venv"
|
|
_LB_META_DIR="{mount_path}/.langbot"
|
|
_LB_META_FILE="$_LB_META_DIR/python-env.json"
|
|
_LB_LOCK_DIR="$_LB_META_DIR/python-env.lock"
|
|
_LB_TMP_DIR="{mount_path}/.tmp"
|
|
_LB_PIP_CACHE_DIR="{mount_path}/.cache/pip"
|
|
|
|
mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
|
|
_LB_SYSTEM_PYTHON="$(command -v python3 || command -v python || true)"
|
|
if [ -z "$_LB_SYSTEM_PYTHON" ]; then
|
|
echo "python3 or python is required to prepare the workspace Python environment" >&2
|
|
exit 127
|
|
fi
|
|
|
|
export TMPDIR="$_LB_TMP_DIR"
|
|
export TEMP="$_LB_TMP_DIR"
|
|
export TMP="$_LB_TMP_DIR"
|
|
export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
|
|
|
|
_lb_python_meta() {{
|
|
"$_LB_SYSTEM_PYTHON" - <<'PY'
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
root = "{mount_path}"
|
|
digest = hashlib.sha256()
|
|
manifest_files = []
|
|
for rel in ("requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"):
|
|
path = os.path.join(root, rel)
|
|
if not os.path.isfile(path):
|
|
continue
|
|
manifest_files.append(rel)
|
|
with open(path, "rb") as handle:
|
|
digest.update(rel.encode("utf-8"))
|
|
digest.update(b"\\0")
|
|
digest.update(handle.read())
|
|
digest.update(b"\\0")
|
|
|
|
print(
|
|
json.dumps(
|
|
{{
|
|
"python_executable": sys.executable,
|
|
"python_version": list(sys.version_info[:3]),
|
|
"manifest_files": manifest_files,
|
|
"manifest_sha256": digest.hexdigest(),
|
|
}},
|
|
sort_keys=True,
|
|
)
|
|
)
|
|
PY
|
|
}}
|
|
|
|
_LB_CURRENT_META="$(_lb_python_meta)"
|
|
_LB_NEEDS_BOOTSTRAP=0
|
|
|
|
if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
|
|
_LB_NEEDS_BOOTSTRAP=1
|
|
elif [ ! -f "$_LB_META_FILE" ]; then
|
|
_LB_NEEDS_BOOTSTRAP=1
|
|
elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
|
|
_LB_NEEDS_BOOTSTRAP=1
|
|
fi
|
|
|
|
if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
|
|
_LB_LOCK_WAIT=0
|
|
while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
|
|
if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
|
|
echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
|
|
exit 1
|
|
fi
|
|
sleep 1
|
|
_LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
|
|
done
|
|
|
|
_lb_cleanup_lock() {{
|
|
rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
|
|
}}
|
|
trap _lb_cleanup_lock EXIT INT TERM
|
|
|
|
_LB_CURRENT_META="$(_lb_python_meta)"
|
|
_LB_NEEDS_BOOTSTRAP=0
|
|
if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
|
|
_LB_NEEDS_BOOTSTRAP=1
|
|
elif [ ! -f "$_LB_META_FILE" ]; then
|
|
_LB_NEEDS_BOOTSTRAP=1
|
|
elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
|
|
_LB_NEEDS_BOOTSTRAP=1
|
|
fi
|
|
|
|
if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
|
|
rm -rf "$_LB_VENV_DIR"
|
|
"$_LB_SYSTEM_PYTHON" -m venv "$_LB_VENV_DIR"
|
|
. "$_LB_VENV_DIR/bin/activate"
|
|
python -m pip install --upgrade pip setuptools wheel
|
|
if [ -f "{mount_path}/requirements.txt" ]; then
|
|
python -m pip install -r "{mount_path}/requirements.txt"
|
|
elif [ -f "{mount_path}/pyproject.toml" ] || [ -f "{mount_path}/setup.py" ] || [ -f "{mount_path}/setup.cfg" ]; then
|
|
python -m pip install "{mount_path}"
|
|
fi
|
|
printf '%s' "$_LB_CURRENT_META" > "$_LB_META_FILE"
|
|
fi
|
|
fi
|
|
|
|
export VIRTUAL_ENV="$_LB_VENV_DIR"
|
|
export PATH="$_LB_VENV_DIR/bin:$PATH"
|
|
{command}
|
|
"""
|
|
).strip()
|
|
return bootstrap + '\n'
|
|
|
|
|
|
class BoxWorkspaceSession:
|
|
"""High-level handle for one reusable workspace-backed Box session.
|
|
|
|
The Box runtime already understands sessions and managed processes. This
|
|
wrapper adds LangBot's workspace-centric view on top: a mounted host path,
|
|
a stable ``session_id``, optional environment defaults, and convenience
|
|
helpers for exec or long-running processes inside that workspace.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
box_service,
|
|
session_id: str,
|
|
*,
|
|
host_path: str | None = None,
|
|
host_path_mode: str = 'rw',
|
|
workdir: str = '/workspace',
|
|
env: dict[str, str] | None = None,
|
|
mount_path: str = '/workspace',
|
|
network: str | None = None,
|
|
read_only_rootfs: bool | None = None,
|
|
image: str | None = None,
|
|
cpus: float | None = None,
|
|
memory_mb: int | None = None,
|
|
pids_limit: int | None = None,
|
|
persistent: bool = False,
|
|
):
|
|
self.box_service = box_service
|
|
self.session_id = session_id
|
|
self.host_path = host_path
|
|
self.host_path_mode = host_path_mode
|
|
self.workdir = workdir
|
|
self.env = dict(env or {})
|
|
self.mount_path = mount_path
|
|
self.network = network
|
|
self.read_only_rootfs = read_only_rootfs
|
|
self.image = image
|
|
self.cpus = cpus
|
|
self.memory_mb = memory_mb
|
|
self.pids_limit = pids_limit
|
|
self.persistent = persistent
|
|
|
|
def rewrite_path(self, path: str) -> str:
|
|
return rewrite_mounted_path(path, self.host_path, mount_path=self.mount_path)
|
|
|
|
def rewrite_venv_command(self, command: str) -> str:
|
|
return rewrite_venv_command(command, self.host_path, mount_path=self.mount_path)
|
|
|
|
def build_session_payload(self) -> dict[str, Any]:
|
|
# Keep this payload generic so callers can reuse the same workspace
|
|
# handle for plain exec, file-producing tasks, or managed processes.
|
|
payload: dict[str, Any] = {
|
|
'session_id': self.session_id,
|
|
'workdir': self.workdir,
|
|
'env': self.env,
|
|
'persistent': self.persistent,
|
|
}
|
|
if self.network is not None:
|
|
payload['network'] = self.network
|
|
if self.read_only_rootfs is not None:
|
|
payload['read_only_rootfs'] = self.read_only_rootfs
|
|
if self.host_path:
|
|
payload['host_path'] = self.host_path
|
|
payload['host_path_mode'] = self.host_path_mode
|
|
for key in ('image', 'cpus', 'memory_mb', 'pids_limit'):
|
|
value = getattr(self, key)
|
|
if value is not None:
|
|
payload[key] = value
|
|
return payload
|
|
|
|
def build_exec_payload(
|
|
self,
|
|
cmd: str,
|
|
*,
|
|
workdir: str | None = None,
|
|
env: dict[str, str] | None = None,
|
|
timeout_sec: int | None = None,
|
|
) -> dict[str, Any]:
|
|
# Exec payloads inherit the session-level workspace config, then layer
|
|
# per-call command/workdir/env overrides on top.
|
|
payload = self.build_session_payload()
|
|
payload['cmd'] = cmd
|
|
payload['workdir'] = workdir or self.workdir
|
|
if timeout_sec is not None:
|
|
payload['timeout_sec'] = timeout_sec
|
|
resolved_env = self.env if env is None else env
|
|
if resolved_env:
|
|
payload['env'] = resolved_env
|
|
elif 'env' in payload and not payload['env']:
|
|
payload.pop('env')
|
|
return payload
|
|
|
|
async def execute_raw(
|
|
self,
|
|
cmd: str,
|
|
*,
|
|
workdir: str | None = None,
|
|
env: dict[str, str] | None = None,
|
|
timeout_sec: int | None = None,
|
|
):
|
|
payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec)
|
|
return await self.box_service.client.execute(self.box_service.build_spec(payload))
|
|
|
|
async def execute_for_query(
|
|
self,
|
|
query,
|
|
cmd: str,
|
|
*,
|
|
workdir: str | None = None,
|
|
env: dict[str, str] | None = None,
|
|
timeout_sec: int | None = None,
|
|
) -> dict:
|
|
payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec)
|
|
return await self.box_service.execute_spec_payload(payload, query)
|
|
|
|
async def create_session(self):
|
|
return await self.box_service.create_session(self.build_session_payload())
|
|
|
|
def build_process_payload(
|
|
self,
|
|
command: str,
|
|
args: list[str] | None = None,
|
|
*,
|
|
env: dict[str, str] | None = None,
|
|
cwd: str = '/workspace',
|
|
) -> dict[str, Any]:
|
|
# Managed processes run inside the same workspace model as one-shot
|
|
# execs, so path/venv rewriting is shared here.
|
|
normalized_command = command
|
|
normalized_args = list(args or [])
|
|
normalized_cwd = cwd
|
|
if self.host_path:
|
|
normalized_command = self.rewrite_venv_command(command)
|
|
normalized_args = [self.rewrite_path(arg) for arg in normalized_args]
|
|
normalized_cwd = self.rewrite_path(cwd)
|
|
return {
|
|
'command': normalized_command,
|
|
'args': normalized_args,
|
|
'env': dict(env or {}),
|
|
'cwd': normalized_cwd,
|
|
}
|
|
|
|
async def start_managed_process(
|
|
self,
|
|
command: str,
|
|
args: list[str] | None = None,
|
|
*,
|
|
process_id: str = 'default',
|
|
env: dict[str, str] | None = None,
|
|
cwd: str = '/workspace',
|
|
):
|
|
payload = self.build_process_payload(command, args, env=env, cwd=cwd)
|
|
payload['process_id'] = process_id
|
|
return await self.box_service.start_managed_process(self.session_id, payload)
|
|
|
|
async def get_managed_process(self, process_id: str = 'default'):
|
|
return await self.box_service.get_managed_process(self.session_id, process_id)
|
|
|
|
async def stop_managed_process(self, process_id: str = 'default') -> None:
|
|
await self.box_service.stop_managed_process(self.session_id, process_id)
|
|
|
|
def get_managed_process_websocket_url(self, process_id: str = 'default') -> str:
|
|
return self.box_service.get_managed_process_websocket_url(self.session_id, process_id)
|
|
|
|
async def cleanup(self) -> None:
|
|
await self.box_service.client.delete_session(self.session_id)
|