mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-28 00:14:21 +00:00
feat(box): add host workspace mounting and sandbox_exec guidance
This commit is contained in:
@@ -11,7 +11,7 @@ import shutil
|
|||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from .errors import BoxError
|
from .errors import BoxError
|
||||||
from .models import BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
|
from .models import DEFAULT_BOX_MOUNT_PATH, BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(slots=True)
|
@dataclasses.dataclass(slots=True)
|
||||||
@@ -83,8 +83,19 @@ class CLISandboxBackend(BaseSandboxBackend):
|
|||||||
if spec.network.value == 'off':
|
if spec.network.value == 'off':
|
||||||
args.extend(['--network', 'none'])
|
args.extend(['--network', 'none'])
|
||||||
|
|
||||||
|
if spec.host_path is not None:
|
||||||
|
mount_spec = f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}:{spec.host_path_mode.value}'
|
||||||
|
args.extend(['-v', mount_spec])
|
||||||
|
|
||||||
args.extend([spec.image, 'sh', '-lc', 'while true; do sleep 3600; done'])
|
args.extend([spec.image, 'sh', '-lc', 'while true; do sleep 3600; done'])
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f'LangBot Box backend start_session: backend={self.name} '
|
||||||
|
f'session_id={spec.session_id} container_name={container_name} '
|
||||||
|
f'image={spec.image} network={spec.network.value} '
|
||||||
|
f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value}'
|
||||||
|
)
|
||||||
|
|
||||||
await self._run_command(args, timeout_sec=30, check=True)
|
await self._run_command(args, timeout_sec=30, check=True)
|
||||||
|
|
||||||
return BoxSessionInfo(
|
return BoxSessionInfo(
|
||||||
@@ -93,6 +104,8 @@ class CLISandboxBackend(BaseSandboxBackend):
|
|||||||
backend_session_id=container_name,
|
backend_session_id=container_name,
|
||||||
image=spec.image,
|
image=spec.image,
|
||||||
network=spec.network,
|
network=spec.network,
|
||||||
|
host_path=spec.host_path,
|
||||||
|
host_path_mode=spec.host_path_mode,
|
||||||
created_at=now,
|
created_at=now,
|
||||||
last_used_at=now,
|
last_used_at=now,
|
||||||
)
|
)
|
||||||
@@ -113,6 +126,16 @@ class CLISandboxBackend(BaseSandboxBackend):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cmd_preview = spec.cmd.strip()
|
||||||
|
if len(cmd_preview) > 400:
|
||||||
|
cmd_preview = f'{cmd_preview[:397]}...'
|
||||||
|
self.logger.info(
|
||||||
|
f'LangBot Box backend exec: backend={self.name} '
|
||||||
|
f'session_id={session.session_id} container_name={session.backend_session_id} '
|
||||||
|
f'workdir={spec.workdir} timeout_sec={spec.timeout_sec} '
|
||||||
|
f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}'
|
||||||
|
)
|
||||||
|
|
||||||
result = await self._run_command(args, timeout_sec=spec.timeout_sec, check=False)
|
result = await self._run_command(args, timeout_sec=spec.timeout_sec, check=False)
|
||||||
duration_ms = int((dt.datetime.now(dt.UTC) - start).total_seconds() * 1000)
|
duration_ms = int((dt.datetime.now(dt.UTC) - start).total_seconds() * 1000)
|
||||||
|
|
||||||
@@ -138,6 +161,10 @@ class CLISandboxBackend(BaseSandboxBackend):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def stop_session(self, session: BoxSessionInfo):
|
async def stop_session(self, session: BoxSessionInfo):
|
||||||
|
self.logger.info(
|
||||||
|
f'LangBot Box backend stop_session: backend={self.name} '
|
||||||
|
f'session_id={session.session_id} container_name={session.backend_session_id}'
|
||||||
|
)
|
||||||
await self._run_command(
|
await self._run_command(
|
||||||
[self.command, 'rm', '-f', session.backend_session_id],
|
[self.command, 'rm', '-f', session.backend_session_id],
|
||||||
timeout_sec=20,
|
timeout_sec=20,
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import pydantic
|
|||||||
|
|
||||||
|
|
||||||
DEFAULT_BOX_IMAGE = 'python:3.11-slim'
|
DEFAULT_BOX_IMAGE = 'python:3.11-slim'
|
||||||
|
DEFAULT_BOX_MOUNT_PATH = '/workspace'
|
||||||
|
|
||||||
|
|
||||||
class BoxNetworkMode(str, enum.Enum):
|
class BoxNetworkMode(str, enum.Enum):
|
||||||
@@ -19,6 +20,11 @@ class BoxExecutionStatus(str, enum.Enum):
|
|||||||
TIMED_OUT = 'timed_out'
|
TIMED_OUT = 'timed_out'
|
||||||
|
|
||||||
|
|
||||||
|
class BoxHostMountMode(str, enum.Enum):
|
||||||
|
READ_ONLY = 'ro'
|
||||||
|
READ_WRITE = 'rw'
|
||||||
|
|
||||||
|
|
||||||
class BoxSpec(pydantic.BaseModel):
|
class BoxSpec(pydantic.BaseModel):
|
||||||
cmd: str
|
cmd: str
|
||||||
workdir: str = '/workspace'
|
workdir: str = '/workspace'
|
||||||
@@ -27,6 +33,8 @@ class BoxSpec(pydantic.BaseModel):
|
|||||||
session_id: str
|
session_id: str
|
||||||
env: dict[str, str] = pydantic.Field(default_factory=dict)
|
env: dict[str, str] = pydantic.Field(default_factory=dict)
|
||||||
image: str = DEFAULT_BOX_IMAGE
|
image: str = DEFAULT_BOX_IMAGE
|
||||||
|
host_path: str | None = None
|
||||||
|
host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
|
||||||
|
|
||||||
@pydantic.field_validator('cmd')
|
@pydantic.field_validator('cmd')
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -64,6 +72,24 @@ class BoxSpec(pydantic.BaseModel):
|
|||||||
def validate_env(cls, value: dict[str, str]) -> dict[str, str]:
|
def validate_env(cls, value: dict[str, str]) -> dict[str, str]:
|
||||||
return {str(k): str(v) for k, v in value.items()}
|
return {str(k): str(v) for k, v in value.items()}
|
||||||
|
|
||||||
|
@pydantic.field_validator('host_path')
|
||||||
|
@classmethod
|
||||||
|
def validate_host_path(cls, value: str | None) -> str | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
value = value.strip()
|
||||||
|
if not value.startswith('/'):
|
||||||
|
raise ValueError('host_path must be an absolute host path')
|
||||||
|
return value
|
||||||
|
|
||||||
|
@pydantic.model_validator(mode='after')
|
||||||
|
def validate_host_mount_consistency(self) -> 'BoxSpec':
|
||||||
|
if self.host_path is None:
|
||||||
|
return self
|
||||||
|
if not self.workdir.startswith(DEFAULT_BOX_MOUNT_PATH):
|
||||||
|
raise ValueError('workdir must stay under /workspace when host_path is provided')
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
class BoxSessionInfo(pydantic.BaseModel):
|
class BoxSessionInfo(pydantic.BaseModel):
|
||||||
session_id: str
|
session_id: str
|
||||||
@@ -71,6 +97,8 @@ class BoxSessionInfo(pydantic.BaseModel):
|
|||||||
backend_session_id: str
|
backend_session_id: str
|
||||||
image: str
|
image: str
|
||||||
network: BoxNetworkMode
|
network: BoxNetworkMode
|
||||||
|
host_path: str | None = None
|
||||||
|
host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
|
||||||
created_at: dt.datetime
|
created_at: dt.datetime
|
||||||
last_used_at: dt.datetime
|
last_used_at: dt.datetime
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,14 @@ class BoxRuntime:
|
|||||||
session = await self._get_or_create_session(spec)
|
session = await self._get_or_create_session(spec)
|
||||||
|
|
||||||
async with session.lock:
|
async with session.lock:
|
||||||
|
self.logger.info(
|
||||||
|
'LangBot Box execute: '
|
||||||
|
f'session_id={spec.session_id} '
|
||||||
|
f'backend_session_id={session.info.backend_session_id} '
|
||||||
|
f'backend={session.info.backend_name} '
|
||||||
|
f'workdir={spec.workdir} '
|
||||||
|
f'timeout_sec={spec.timeout_sec}'
|
||||||
|
)
|
||||||
result = await (await self._get_backend()).exec(session.info, spec)
|
result = await (await self._get_backend()).exec(session.info, spec)
|
||||||
|
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
@@ -63,12 +71,28 @@ class BoxRuntime:
|
|||||||
if existing is not None:
|
if existing is not None:
|
||||||
self._assert_session_compatible(existing.info, spec)
|
self._assert_session_compatible(existing.info, spec)
|
||||||
existing.info.last_used_at = dt.datetime.now(dt.UTC)
|
existing.info.last_used_at = dt.datetime.now(dt.UTC)
|
||||||
|
self.logger.info(
|
||||||
|
'LangBot Box session reused: '
|
||||||
|
f'session_id={spec.session_id} '
|
||||||
|
f'backend_session_id={existing.info.backend_session_id} '
|
||||||
|
f'backend={existing.info.backend_name}'
|
||||||
|
)
|
||||||
return existing
|
return existing
|
||||||
|
|
||||||
backend = await self._get_backend()
|
backend = await self._get_backend()
|
||||||
info = await backend.start_session(spec)
|
info = await backend.start_session(spec)
|
||||||
runtime_session = _RuntimeSession(info=info, lock=asyncio.Lock())
|
runtime_session = _RuntimeSession(info=info, lock=asyncio.Lock())
|
||||||
self._sessions[spec.session_id] = runtime_session
|
self._sessions[spec.session_id] = runtime_session
|
||||||
|
self.logger.info(
|
||||||
|
'LangBot Box session created: '
|
||||||
|
f'session_id={spec.session_id} '
|
||||||
|
f'backend_session_id={info.backend_session_id} '
|
||||||
|
f'backend={info.backend_name} '
|
||||||
|
f'image={info.image} '
|
||||||
|
f'network={info.network.value} '
|
||||||
|
f'host_path={info.host_path} '
|
||||||
|
f'host_path_mode={info.host_path_mode.value}'
|
||||||
|
)
|
||||||
return runtime_session
|
return runtime_session
|
||||||
|
|
||||||
async def _get_backend(self) -> BaseSandboxBackend:
|
async def _get_backend(self) -> BaseSandboxBackend:
|
||||||
@@ -113,6 +137,12 @@ class BoxRuntime:
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
self.logger.info(
|
||||||
|
'LangBot Box session cleanup: '
|
||||||
|
f'session_id={session_id} '
|
||||||
|
f'backend_session_id={runtime_session.info.backend_session_id} '
|
||||||
|
f'backend={runtime_session.info.backend_name}'
|
||||||
|
)
|
||||||
await self._backend.stop_session(runtime_session.info)
|
await self._backend.stop_session(runtime_session.info)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger.warning(f'Failed to clean up box session {session_id}: {exc}')
|
self.logger.warning(f'Failed to clean up box session {session_id}: {exc}')
|
||||||
@@ -126,3 +156,11 @@ class BoxRuntime:
|
|||||||
raise BoxSessionConflictError(
|
raise BoxSessionConflictError(
|
||||||
f'sandbox_exec session {spec.session_id} already exists with image={session.image}'
|
f'sandbox_exec session {spec.session_id} already exists with image={session.image}'
|
||||||
)
|
)
|
||||||
|
if session.host_path != spec.host_path:
|
||||||
|
raise BoxSessionConflictError(
|
||||||
|
f'sandbox_exec session {spec.session_id} already exists with host_path={session.host_path}'
|
||||||
|
)
|
||||||
|
if session.host_path_mode != spec.host_path_mode:
|
||||||
|
raise BoxSessionConflictError(
|
||||||
|
f'sandbox_exec session {spec.session_id} already exists with host_path_mode={session.host_path_mode.value}'
|
||||||
|
)
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import pydantic
|
import pydantic
|
||||||
@@ -23,6 +25,8 @@ class BoxService:
|
|||||||
self.ap = ap
|
self.ap = ap
|
||||||
self.runtime = runtime or BoxRuntime(logger=ap.logger)
|
self.runtime = runtime or BoxRuntime(logger=ap.logger)
|
||||||
self.output_limit_chars = output_limit_chars
|
self.output_limit_chars = output_limit_chars
|
||||||
|
self.allowed_host_mount_roots = self._load_allowed_host_mount_roots()
|
||||||
|
self.default_host_workspace = self._load_default_host_workspace()
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
await self.runtime.initialize()
|
await self.runtime.initialize()
|
||||||
@@ -31,6 +35,8 @@ class BoxService:
|
|||||||
spec_payload = dict(parameters)
|
spec_payload = dict(parameters)
|
||||||
spec_payload.setdefault('session_id', str(query.query_id))
|
spec_payload.setdefault('session_id', str(query.query_id))
|
||||||
spec_payload.setdefault('env', {})
|
spec_payload.setdefault('env', {})
|
||||||
|
if spec_payload.get('host_path') in (None, '') and self.default_host_workspace is not None:
|
||||||
|
spec_payload['host_path'] = self.default_host_workspace
|
||||||
|
|
||||||
try:
|
try:
|
||||||
spec = BoxSpec.model_validate(spec_payload)
|
spec = BoxSpec.model_validate(spec_payload)
|
||||||
@@ -38,7 +44,18 @@ class BoxService:
|
|||||||
first_error = exc.errors()[0]
|
first_error = exc.errors()[0]
|
||||||
raise BoxValidationError(first_error.get('msg', 'invalid sandbox_exec arguments')) from exc
|
raise BoxValidationError(first_error.get('msg', 'invalid sandbox_exec arguments')) from exc
|
||||||
|
|
||||||
|
self._validate_host_mount(spec)
|
||||||
|
self.ap.logger.info(
|
||||||
|
'LangBot Box request: '
|
||||||
|
f'query_id={query.query_id} '
|
||||||
|
f'spec={json.dumps(self._summarize_spec(spec), ensure_ascii=False)}'
|
||||||
|
)
|
||||||
result = await self.runtime.execute(spec)
|
result = await self.runtime.execute(spec)
|
||||||
|
self.ap.logger.info(
|
||||||
|
'LangBot Box result: '
|
||||||
|
f'query_id={query.query_id} '
|
||||||
|
f'summary={json.dumps(self._summarize_result(result), ensure_ascii=False)}'
|
||||||
|
)
|
||||||
return self._serialize_result(result)
|
return self._serialize_result(result)
|
||||||
|
|
||||||
async def shutdown(self):
|
async def shutdown(self):
|
||||||
@@ -65,3 +82,78 @@ class BoxService:
|
|||||||
if len(text) <= self.output_limit_chars:
|
if len(text) <= self.output_limit_chars:
|
||||||
return text, False
|
return text, False
|
||||||
return f'{text[: self.output_limit_chars]}...', True
|
return f'{text[: self.output_limit_chars]}...', True
|
||||||
|
|
||||||
|
def _summarize_spec(self, spec: BoxSpec) -> dict:
|
||||||
|
cmd = spec.cmd.strip()
|
||||||
|
if len(cmd) > 400:
|
||||||
|
cmd = f'{cmd[:397]}...'
|
||||||
|
|
||||||
|
return {
|
||||||
|
'session_id': spec.session_id,
|
||||||
|
'workdir': spec.workdir,
|
||||||
|
'timeout_sec': spec.timeout_sec,
|
||||||
|
'network': spec.network.value,
|
||||||
|
'image': spec.image,
|
||||||
|
'host_path': spec.host_path,
|
||||||
|
'host_path_mode': spec.host_path_mode.value,
|
||||||
|
'env_keys': sorted(spec.env.keys()),
|
||||||
|
'cmd': cmd,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _summarize_result(self, result: BoxExecutionResult) -> dict:
|
||||||
|
stdout_preview = result.stdout[:200]
|
||||||
|
stderr_preview = result.stderr[:200]
|
||||||
|
if len(result.stdout) > 200:
|
||||||
|
stdout_preview = f'{stdout_preview}...'
|
||||||
|
if len(result.stderr) > 200:
|
||||||
|
stderr_preview = f'{stderr_preview}...'
|
||||||
|
|
||||||
|
return {
|
||||||
|
'session_id': result.session_id,
|
||||||
|
'backend': result.backend_name,
|
||||||
|
'status': result.status.value,
|
||||||
|
'exit_code': result.exit_code,
|
||||||
|
'duration_ms': result.duration_ms,
|
||||||
|
'stdout_preview': stdout_preview,
|
||||||
|
'stderr_preview': stderr_preview,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _load_allowed_host_mount_roots(self) -> list[str]:
|
||||||
|
box_config = getattr(self.ap, 'instance_config', None)
|
||||||
|
box_config_data = getattr(box_config, 'data', {}) if box_config is not None else {}
|
||||||
|
configured_roots = box_config_data.get('box', {}).get('allowed_host_mount_roots', [])
|
||||||
|
|
||||||
|
normalized_roots: list[str] = []
|
||||||
|
for root in configured_roots:
|
||||||
|
root_value = str(root).strip()
|
||||||
|
if not root_value:
|
||||||
|
continue
|
||||||
|
normalized_roots.append(os.path.realpath(os.path.abspath(root_value)))
|
||||||
|
|
||||||
|
return normalized_roots
|
||||||
|
|
||||||
|
def _load_default_host_workspace(self) -> str | None:
|
||||||
|
box_config = getattr(self.ap, 'instance_config', None)
|
||||||
|
box_config_data = getattr(box_config, 'data', {}) if box_config is not None else {}
|
||||||
|
default_host_workspace = str(box_config_data.get('box', {}).get('default_host_workspace', '')).strip()
|
||||||
|
if not default_host_workspace:
|
||||||
|
return None
|
||||||
|
return os.path.realpath(os.path.abspath(default_host_workspace))
|
||||||
|
|
||||||
|
def _validate_host_mount(self, spec: BoxSpec):
|
||||||
|
if spec.host_path is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
host_path = os.path.realpath(spec.host_path)
|
||||||
|
if not os.path.isdir(host_path):
|
||||||
|
raise BoxValidationError('host_path must point to an existing directory on the host')
|
||||||
|
|
||||||
|
if not self.allowed_host_mount_roots:
|
||||||
|
raise BoxValidationError('host_path mounting is disabled because no allowed_host_mount_roots are configured')
|
||||||
|
|
||||||
|
for allowed_root in self.allowed_host_mount_roots:
|
||||||
|
if host_path == allowed_root or host_path.startswith(f'{allowed_root}{os.sep}'):
|
||||||
|
return
|
||||||
|
|
||||||
|
allowed_roots = ', '.join(self.allowed_host_mount_roots)
|
||||||
|
raise BoxValidationError(f'host_path is outside allowed_host_mount_roots: {allowed_roots}')
|
||||||
|
|||||||
@@ -29,7 +29,13 @@ SANDBOX_EXEC_SYSTEM_GUIDANCE = (
|
|||||||
'When sandbox_exec is available, use it for exact calculations, statistics, structured data parsing, '
|
'When sandbox_exec is available, use it for exact calculations, statistics, structured data parsing, '
|
||||||
'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
|
'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
|
||||||
'JSON, or other data and asks for a computed answer, prefer running a short Python script in sandbox_exec '
|
'JSON, or other data and asks for a computed answer, prefer running a short Python script in sandbox_exec '
|
||||||
'and then answer from the tool result.'
|
'and then answer from the tool result. Unless the user explicitly asks for the script, code, or implementation '
|
||||||
|
'details, do not include the generated script in the final answer; return the result and a brief explanation only.'
|
||||||
|
)
|
||||||
|
SANDBOX_EXEC_WORKSPACE_GUIDANCE = (
|
||||||
|
'A default host workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
|
||||||
|
'modify local files in the working directory, use sandbox_exec with /workspace paths directly; do not ask the '
|
||||||
|
'user for sandbox parameters such as host_path unless they explicitly need a different directory.'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -37,6 +43,15 @@ SANDBOX_EXEC_SYSTEM_GUIDANCE = (
|
|||||||
class LocalAgentRunner(runner.RequestRunner):
|
class LocalAgentRunner(runner.RequestRunner):
|
||||||
"""Local agent request runner"""
|
"""Local agent request runner"""
|
||||||
|
|
||||||
|
def _build_sandbox_system_guidance(self) -> str:
|
||||||
|
guidance = SANDBOX_EXEC_SYSTEM_GUIDANCE
|
||||||
|
default_host_workspace = str(
|
||||||
|
getattr(getattr(self.ap, 'instance_config', None), 'data', {}).get('box', {}).get('default_host_workspace', '')
|
||||||
|
).strip()
|
||||||
|
if default_host_workspace:
|
||||||
|
guidance = f'{guidance} {SANDBOX_EXEC_WORKSPACE_GUIDANCE}'
|
||||||
|
return guidance
|
||||||
|
|
||||||
def _build_request_messages(
|
def _build_request_messages(
|
||||||
self,
|
self,
|
||||||
query: pipeline_query.Query,
|
query: pipeline_query.Query,
|
||||||
@@ -48,7 +63,7 @@ class LocalAgentRunner(runner.RequestRunner):
|
|||||||
req_messages.append(
|
req_messages.append(
|
||||||
provider_message.Message(
|
provider_message.Message(
|
||||||
role='system',
|
role='system',
|
||||||
content=SANDBOX_EXEC_SYSTEM_GUIDANCE,
|
content=self._build_sandbox_system_guidance(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||||
from langbot_plugin.api.entities.events import pipeline_query
|
from langbot_plugin.api.entities.events import pipeline_query
|
||||||
|
|
||||||
@@ -18,6 +20,11 @@ class NativeToolLoader(loader.ToolLoader):
|
|||||||
async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query):
|
async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query):
|
||||||
if name != self.SANDBOX_EXEC_TOOL_NAME:
|
if name != self.SANDBOX_EXEC_TOOL_NAME:
|
||||||
raise ValueError(f'未找到工具: {name}')
|
raise ValueError(f'未找到工具: {name}')
|
||||||
|
self.ap.logger.info(
|
||||||
|
'sandbox_exec tool invoked: '
|
||||||
|
f'query_id={query.query_id} '
|
||||||
|
f'parameters={json.dumps(self._summarize_parameters(parameters), ensure_ascii=False)}'
|
||||||
|
)
|
||||||
return await self.ap.box_service.execute_sandbox_tool(parameters, query)
|
return await self.ap.box_service.execute_sandbox_tool(parameters, query)
|
||||||
|
|
||||||
async def shutdown(self):
|
async def shutdown(self):
|
||||||
@@ -61,6 +68,19 @@ class NativeToolLoader(loader.ToolLoader):
|
|||||||
'type': 'string',
|
'type': 'string',
|
||||||
'description': 'Optional sandbox session id. Defaults to the current request id for reuse.',
|
'description': 'Optional sandbox session id. Defaults to the current request id for reuse.',
|
||||||
},
|
},
|
||||||
|
'host_path': {
|
||||||
|
'type': 'string',
|
||||||
|
'description': (
|
||||||
|
'Optional absolute host directory path to mount into the sandbox as /workspace. '
|
||||||
|
'The path must be under an allowed host mount root.'
|
||||||
|
),
|
||||||
|
},
|
||||||
|
'host_path_mode': {
|
||||||
|
'type': 'string',
|
||||||
|
'description': 'Mount mode for host_path. Use rw to create or modify host files.',
|
||||||
|
'enum': ['ro', 'rw'],
|
||||||
|
'default': 'rw',
|
||||||
|
},
|
||||||
'env': {
|
'env': {
|
||||||
'type': 'object',
|
'type': 'object',
|
||||||
'description': 'Optional environment variables to expose inside the sandbox.',
|
'description': 'Optional environment variables to expose inside the sandbox.',
|
||||||
@@ -73,3 +93,17 @@ class NativeToolLoader(loader.ToolLoader):
|
|||||||
},
|
},
|
||||||
func=lambda parameters: parameters,
|
func=lambda parameters: parameters,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _summarize_parameters(self, parameters: dict) -> dict:
|
||||||
|
summary = dict(parameters)
|
||||||
|
cmd = str(summary.get('cmd', '')).strip()
|
||||||
|
if len(cmd) > 400:
|
||||||
|
cmd = f'{cmd[:397]}...'
|
||||||
|
summary['cmd'] = cmd
|
||||||
|
|
||||||
|
env = summary.get('env')
|
||||||
|
if isinstance(env, dict):
|
||||||
|
summary['env_keys'] = sorted(str(key) for key in env.keys())
|
||||||
|
del summary['env']
|
||||||
|
|
||||||
|
return summary
|
||||||
|
|||||||
@@ -87,6 +87,11 @@ monitoring:
|
|||||||
retention_days: 30
|
retention_days: 30
|
||||||
# Cleanup check interval in hours
|
# Cleanup check interval in hours
|
||||||
check_interval_hours: 1
|
check_interval_hours: 1
|
||||||
|
box:
|
||||||
|
default_host_workspace: './data/box-workspaces/default'
|
||||||
|
allowed_host_mount_roots:
|
||||||
|
- './data/box-workspaces'
|
||||||
|
- '/tmp'
|
||||||
space:
|
space:
|
||||||
# Space service URL for OAuth and API
|
# Space service URL for OAuth and API
|
||||||
url: 'https://space.langbot.app'
|
url: 'https://space.langbot.app'
|
||||||
|
|||||||
@@ -49,7 +49,7 @@
|
|||||||
"prompt": [
|
"prompt": [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": "You are a helpful assistant. When tools are available, use them for exact calculations, data processing, and code execution instead of guessing."
|
"content": "You are a helpful assistant. When tools are available, use them for exact calculations, data processing, and code execution instead of guessing. Unless the user explicitly asks for code or a script, return the result directly instead of printing the generated code."
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"knowledge-bases": [],
|
"knowledge-bases": [],
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
|
import os
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
@@ -9,8 +10,15 @@ import pytest
|
|||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||||
|
|
||||||
from langbot.pkg.box.backend import BaseSandboxBackend
|
from langbot.pkg.box.backend import BaseSandboxBackend
|
||||||
from langbot.pkg.box.errors import BoxBackendUnavailableError
|
from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxValidationError
|
||||||
from langbot.pkg.box.models import BoxExecutionResult, BoxExecutionStatus, BoxNetworkMode, BoxSessionInfo, BoxSpec
|
from langbot.pkg.box.models import (
|
||||||
|
BoxExecutionResult,
|
||||||
|
BoxExecutionStatus,
|
||||||
|
BoxHostMountMode,
|
||||||
|
BoxNetworkMode,
|
||||||
|
BoxSessionInfo,
|
||||||
|
BoxSpec,
|
||||||
|
)
|
||||||
from langbot.pkg.box.runtime import BoxRuntime
|
from langbot.pkg.box.runtime import BoxRuntime
|
||||||
from langbot.pkg.box.service import BoxService
|
from langbot.pkg.box.service import BoxService
|
||||||
|
|
||||||
@@ -21,6 +29,7 @@ class FakeBackend(BaseSandboxBackend):
|
|||||||
self.name = 'fake'
|
self.name = 'fake'
|
||||||
self.available = available
|
self.available = available
|
||||||
self.start_calls: list[str] = []
|
self.start_calls: list[str] = []
|
||||||
|
self.start_specs: list[BoxSpec] = []
|
||||||
self.exec_calls: list[tuple[str, str]] = []
|
self.exec_calls: list[tuple[str, str]] = []
|
||||||
self.stop_calls: list[str] = []
|
self.stop_calls: list[str] = []
|
||||||
|
|
||||||
@@ -29,6 +38,7 @@ class FakeBackend(BaseSandboxBackend):
|
|||||||
|
|
||||||
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
|
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
|
||||||
self.start_calls.append(spec.session_id)
|
self.start_calls.append(spec.session_id)
|
||||||
|
self.start_specs.append(spec)
|
||||||
now = dt.datetime.now(dt.UTC)
|
now = dt.datetime.now(dt.UTC)
|
||||||
return BoxSessionInfo(
|
return BoxSessionInfo(
|
||||||
session_id=spec.session_id,
|
session_id=spec.session_id,
|
||||||
@@ -36,6 +46,8 @@ class FakeBackend(BaseSandboxBackend):
|
|||||||
backend_session_id=f'backend-{spec.session_id}',
|
backend_session_id=f'backend-{spec.session_id}',
|
||||||
image=spec.image,
|
image=spec.image,
|
||||||
network=spec.network,
|
network=spec.network,
|
||||||
|
host_path=spec.host_path,
|
||||||
|
host_path_mode=spec.host_path_mode,
|
||||||
created_at=now,
|
created_at=now,
|
||||||
last_used_at=now,
|
last_used_at=now,
|
||||||
)
|
)
|
||||||
@@ -60,6 +72,20 @@ def make_query(query_id: int = 42) -> pipeline_query.Query:
|
|||||||
return pipeline_query.Query.model_construct(query_id=query_id)
|
return pipeline_query.Query.model_construct(query_id=query_id)
|
||||||
|
|
||||||
|
|
||||||
|
def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None):
|
||||||
|
return SimpleNamespace(
|
||||||
|
logger=logger,
|
||||||
|
instance_config=SimpleNamespace(
|
||||||
|
data={
|
||||||
|
'box': {
|
||||||
|
'allowed_host_mount_roots': allowed_host_mount_roots or [],
|
||||||
|
'default_host_workspace': '',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_box_runtime_reuses_request_session():
|
async def test_box_runtime_reuses_request_session():
|
||||||
logger = Mock()
|
logger = Mock()
|
||||||
@@ -82,7 +108,7 @@ async def test_box_service_defaults_session_id_from_query():
|
|||||||
logger = Mock()
|
logger = Mock()
|
||||||
backend = FakeBackend(logger)
|
backend = FakeBackend(logger)
|
||||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||||
service = BoxService(SimpleNamespace(logger=logger), runtime=runtime)
|
service = BoxService(make_app(logger), runtime=runtime)
|
||||||
await service.initialize()
|
await service.initialize()
|
||||||
|
|
||||||
result = await service.execute_sandbox_tool({'cmd': 'pwd', 'network': BoxNetworkMode.OFF.value}, make_query(7))
|
result = await service.execute_sandbox_tool({'cmd': 'pwd', 'network': BoxNetworkMode.OFF.value}, make_query(7))
|
||||||
@@ -97,8 +123,106 @@ async def test_box_service_fails_closed_when_backend_unavailable():
|
|||||||
logger = Mock()
|
logger = Mock()
|
||||||
backend = FakeBackend(logger, available=False)
|
backend = FakeBackend(logger, available=False)
|
||||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||||
service = BoxService(SimpleNamespace(logger=logger), runtime=runtime)
|
service = BoxService(make_app(logger), runtime=runtime)
|
||||||
await service.initialize()
|
await service.initialize()
|
||||||
|
|
||||||
with pytest.raises(BoxBackendUnavailableError):
|
with pytest.raises(BoxBackendUnavailableError):
|
||||||
await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(9))
|
await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(9))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_box_service_allows_host_mount_under_configured_root(tmp_path):
|
||||||
|
logger = Mock()
|
||||||
|
backend = FakeBackend(logger)
|
||||||
|
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||||
|
host_dir = tmp_path / 'mounted-workspace'
|
||||||
|
host_dir.mkdir()
|
||||||
|
service = BoxService(make_app(logger, [str(tmp_path)]), runtime=runtime)
|
||||||
|
await service.initialize()
|
||||||
|
|
||||||
|
result = await service.execute_sandbox_tool(
|
||||||
|
{
|
||||||
|
'cmd': 'pwd',
|
||||||
|
'host_path': str(host_dir),
|
||||||
|
'host_path_mode': BoxHostMountMode.READ_WRITE.value,
|
||||||
|
},
|
||||||
|
make_query(11),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result['ok'] is True
|
||||||
|
assert backend.start_calls == ['11']
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_box_service_uses_default_host_workspace_when_host_path_omitted(tmp_path):
|
||||||
|
logger = Mock()
|
||||||
|
backend = FakeBackend(logger)
|
||||||
|
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||||
|
host_dir = tmp_path / 'default-workspace'
|
||||||
|
host_dir.mkdir()
|
||||||
|
app = make_app(logger, [str(tmp_path)])
|
||||||
|
app.instance_config.data['box']['default_host_workspace'] = str(host_dir)
|
||||||
|
service = BoxService(app, runtime=runtime)
|
||||||
|
await service.initialize()
|
||||||
|
|
||||||
|
result = await service.execute_sandbox_tool({'cmd': 'pwd'}, make_query(15))
|
||||||
|
|
||||||
|
assert result['ok'] is True
|
||||||
|
assert backend.start_calls == ['15']
|
||||||
|
assert backend.exec_calls == [('15', 'pwd')]
|
||||||
|
assert backend.start_specs[0].host_path == os.path.realpath(host_dir)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_box_service_rejects_host_mount_outside_allowed_roots(tmp_path):
|
||||||
|
logger = Mock()
|
||||||
|
backend = FakeBackend(logger)
|
||||||
|
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||||
|
allowed_root = tmp_path / 'allowed'
|
||||||
|
disallowed_root = tmp_path / 'disallowed'
|
||||||
|
allowed_root.mkdir()
|
||||||
|
disallowed_root.mkdir()
|
||||||
|
service = BoxService(make_app(logger, [str(allowed_root)]), runtime=runtime)
|
||||||
|
await service.initialize()
|
||||||
|
|
||||||
|
with pytest.raises(BoxValidationError):
|
||||||
|
await service.execute_sandbox_tool(
|
||||||
|
{
|
||||||
|
'cmd': 'pwd',
|
||||||
|
'host_path': str(disallowed_root),
|
||||||
|
},
|
||||||
|
make_query(12),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_box_runtime_rejects_host_mount_conflict_in_same_session(tmp_path):
|
||||||
|
logger = Mock()
|
||||||
|
backend = FakeBackend(logger)
|
||||||
|
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||||
|
await runtime.initialize()
|
||||||
|
|
||||||
|
first_host_dir = tmp_path / 'first'
|
||||||
|
second_host_dir = tmp_path / 'second'
|
||||||
|
first_host_dir.mkdir()
|
||||||
|
second_host_dir.mkdir()
|
||||||
|
|
||||||
|
first = BoxSpec.model_validate(
|
||||||
|
{
|
||||||
|
'cmd': 'echo first',
|
||||||
|
'session_id': 'req-mount',
|
||||||
|
'host_path': os.path.realpath(first_host_dir),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
second = BoxSpec.model_validate(
|
||||||
|
{
|
||||||
|
'cmd': 'echo second',
|
||||||
|
'session_id': 'req-mount',
|
||||||
|
'host_path': os.path.realpath(second_host_dir),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
await runtime.execute(first)
|
||||||
|
|
||||||
|
with pytest.raises(BoxSessionConflictError):
|
||||||
|
await runtime.execute(second)
|
||||||
|
|||||||
@@ -124,6 +124,13 @@ async def test_localagent_uses_sandbox_exec_for_exact_calculation():
|
|||||||
model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)),
|
model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)),
|
||||||
tool_mgr=tool_manager,
|
tool_mgr=tool_manager,
|
||||||
rag_mgr=SimpleNamespace(),
|
rag_mgr=SimpleNamespace(),
|
||||||
|
instance_config=SimpleNamespace(
|
||||||
|
data={
|
||||||
|
'box': {
|
||||||
|
'default_host_workspace': '/home/yhh/workspace/box-demo',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
runner = LocalAgentRunner(app, pipeline_config={})
|
runner = LocalAgentRunner(app, pipeline_config={})
|
||||||
@@ -144,6 +151,8 @@ async def test_localagent_uses_sandbox_exec_for_exact_calculation():
|
|||||||
message.role == 'system'
|
message.role == 'system'
|
||||||
and 'sandbox_exec' in str(message.content)
|
and 'sandbox_exec' in str(message.content)
|
||||||
and 'exact calculations' in str(message.content)
|
and 'exact calculations' in str(message.content)
|
||||||
|
and 'Unless the user explicitly asks for the script' in str(message.content)
|
||||||
|
and '/workspace' in str(message.content)
|
||||||
for message in first_request['messages']
|
for message in first_request['messages']
|
||||||
)
|
)
|
||||||
assert [tool.name for tool in first_request['funcs']] == ['sandbox_exec']
|
assert [tool.name for tool in first_request['funcs']] == ['sandbox_exec']
|
||||||
|
|||||||
Reference in New Issue
Block a user