From 70c56af4eec715735cf722dcfc0cbd110edbac89 Mon Sep 17 00:00:00 2001
From: youhuanghe <1051233107@qq.com>
Date: Thu, 19 Mar 2026 14:04:37 +0000
Subject: [PATCH] feat(box): add host workspace mounting and sandbox_exec
 guidance

---
 src/langbot/pkg/box/backend.py                |  29 +++-
 src/langbot/pkg/box/models.py                 |  28 ++++
 src/langbot/pkg/box/runtime.py                |  38 +++++
 src/langbot/pkg/box/service.py                |  92 ++++++++++++
 .../pkg/provider/runners/localagent.py        |  19 ++-
 .../pkg/provider/tools/loaders/native.py      |  34 +++++
 src/langbot/templates/config.yaml             |   5 +
 .../templates/default-pipeline-config.json    |   2 +-
 tests/unit_tests/box/test_box_service.py      | 132 +++++++++++++++++-
 .../provider/test_localagent_sandbox_exec.py  |   9 ++
 10 files changed, 380 insertions(+), 8 deletions(-)

diff --git a/src/langbot/pkg/box/backend.py b/src/langbot/pkg/box/backend.py
index 3c6672de..96e3432e 100644
--- a/src/langbot/pkg/box/backend.py
+++ b/src/langbot/pkg/box/backend.py
@@ -11,7 +11,7 @@ import shutil
 import uuid
 
 from .errors import BoxError
-from .models import BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
+from .models import DEFAULT_BOX_MOUNT_PATH, BoxExecutionResult, BoxExecutionStatus, BoxSessionInfo, BoxSpec
 
 
 @dataclasses.dataclass(slots=True)
@@ -83,8 +83,19 @@ class CLISandboxBackend(BaseSandboxBackend):
         if spec.network.value == 'off':
             args.extend(['--network', 'none'])
 
+        if spec.host_path is not None:
+            mount_spec = f'{spec.host_path}:{DEFAULT_BOX_MOUNT_PATH}:{spec.host_path_mode.value}'
+            args.extend(['-v', mount_spec])
+
         args.extend([spec.image, 'sh', '-lc', 'while true; do sleep 3600; done'])
 
+        self.logger.info(
+            f'LangBot Box backend start_session: backend={self.name} '
+            f'session_id={spec.session_id} container_name={container_name} '
+            f'image={spec.image} network={spec.network.value} '
+            f'host_path={spec.host_path} host_path_mode={spec.host_path_mode.value}'
+        )
+
         await self._run_command(args, timeout_sec=30, check=True)
 
         return BoxSessionInfo(
@@ -93,6 +104,8 @@ class CLISandboxBackend(BaseSandboxBackend):
             backend_session_id=container_name,
             image=spec.image,
             network=spec.network,
+            host_path=spec.host_path,
+            host_path_mode=spec.host_path_mode,
             created_at=now,
             last_used_at=now,
         )
@@ -113,6 +126,16 @@ class CLISandboxBackend(BaseSandboxBackend):
             ]
         )
 
+        cmd_preview = spec.cmd.strip()
+        if len(cmd_preview) > 400:
+            cmd_preview = f'{cmd_preview[:397]}...'
+        self.logger.info(
+            f'LangBot Box backend exec: backend={self.name} '
+            f'session_id={session.session_id} container_name={session.backend_session_id} '
+            f'workdir={spec.workdir} timeout_sec={spec.timeout_sec} '
+            f'env_keys={sorted(spec.env.keys())} cmd={cmd_preview}'
+        )
+
         result = await self._run_command(args, timeout_sec=spec.timeout_sec, check=False)
         duration_ms = int((dt.datetime.now(dt.UTC) - start).total_seconds() * 1000)
 
@@ -138,6 +161,10 @@ class CLISandboxBackend(BaseSandboxBackend):
         )
 
     async def stop_session(self, session: BoxSessionInfo):
+        self.logger.info(
+            f'LangBot Box backend stop_session: backend={self.name} '
+            f'session_id={session.session_id} container_name={session.backend_session_id}'
+        )
         await self._run_command(
             [self.command, 'rm', '-f', session.backend_session_id],
             timeout_sec=20,
diff --git a/src/langbot/pkg/box/models.py b/src/langbot/pkg/box/models.py
index 8c9d4a23..9c1bb2f7 100644
--- a/src/langbot/pkg/box/models.py
+++ b/src/langbot/pkg/box/models.py
@@ -7,6 +7,7 @@ import pydantic
 
 
 DEFAULT_BOX_IMAGE = 'python:3.11-slim'
+DEFAULT_BOX_MOUNT_PATH = '/workspace'
 
 
 class BoxNetworkMode(str, enum.Enum):
@@ -19,6 +20,11 @@ class BoxExecutionStatus(str, enum.Enum):
     TIMED_OUT = 'timed_out'
 
 
+class BoxHostMountMode(str, enum.Enum):
+    READ_ONLY = 'ro'
+    READ_WRITE = 'rw'
+
+
 class BoxSpec(pydantic.BaseModel):
     cmd: str
     workdir: str = '/workspace'
@@ -27,6 +33,8 @@ class BoxSpec(pydantic.BaseModel):
     session_id: str
     env: dict[str, str] = pydantic.Field(default_factory=dict)
     image: str = DEFAULT_BOX_IMAGE
+    host_path: str | None = None
+    host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
 
     @pydantic.field_validator('cmd')
     @classmethod
@@ -64,6 +72,24 @@ class BoxSpec(pydantic.BaseModel):
     def validate_env(cls, value: dict[str, str]) -> dict[str, str]:
         return {str(k): str(v) for k, v in value.items()}
 
+    @pydantic.field_validator('host_path')
+    @classmethod
+    def validate_host_path(cls, value: str | None) -> str | None:
+        if value is None:
+            return None
+        value = value.strip()
+        if not value.startswith('/'):
+            raise ValueError('host_path must be an absolute host path')
+        return value
+
+    @pydantic.model_validator(mode='after')
+    def validate_host_mount_consistency(self) -> 'BoxSpec':
+        if self.host_path is None:
+            return self
+        if not self.workdir.startswith(DEFAULT_BOX_MOUNT_PATH):
+            raise ValueError('workdir must stay under /workspace when host_path is provided')
+        return self
+
 
 class BoxSessionInfo(pydantic.BaseModel):
     session_id: str
@@ -71,6 +97,8 @@ class BoxSessionInfo(pydantic.BaseModel):
     backend_session_id: str
     image: str
     network: BoxNetworkMode
+    host_path: str | None = None
+    host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
     created_at: dt.datetime
     last_used_at: dt.datetime
 
diff --git a/src/langbot/pkg/box/runtime.py b/src/langbot/pkg/box/runtime.py
index 6bfdab12..cfbfc40a 100644
--- a/src/langbot/pkg/box/runtime.py
+++ b/src/langbot/pkg/box/runtime.py
@@ -37,6 +37,14 @@ class BoxRuntime:
         session = await self._get_or_create_session(spec)
 
         async with session.lock:
+            self.logger.info(
+                'LangBot Box execute: '
+                f'session_id={spec.session_id} '
+                f'backend_session_id={session.info.backend_session_id} '
+                f'backend={session.info.backend_name} '
+                f'workdir={spec.workdir} '
+                f'timeout_sec={spec.timeout_sec}'
+            )
             result = await (await self._get_backend()).exec(session.info, spec)
 
         async with self._lock:
@@ -63,12 +71,28 @@ class BoxRuntime:
             if existing is not None:
                 self._assert_session_compatible(existing.info, spec)
                 existing.info.last_used_at = dt.datetime.now(dt.UTC)
+                self.logger.info(
+                    'LangBot Box session reused: '
+                    f'session_id={spec.session_id} '
+                    f'backend_session_id={existing.info.backend_session_id} '
+                    f'backend={existing.info.backend_name}'
+                )
                 return existing
 
             backend = await self._get_backend()
             info = await backend.start_session(spec)
             runtime_session = _RuntimeSession(info=info, lock=asyncio.Lock())
             self._sessions[spec.session_id] = runtime_session
+            self.logger.info(
+                'LangBot Box session created: '
+                f'session_id={spec.session_id} '
+                f'backend_session_id={info.backend_session_id} '
+                f'backend={info.backend_name} '
+                f'image={info.image} '
+                f'network={info.network.value} '
+                f'host_path={info.host_path} '
+                f'host_path_mode={info.host_path_mode.value}'
+            )
             return runtime_session
 
     async def _get_backend(self) -> BaseSandboxBackend:
@@ -113,6 +137,12 @@ class BoxRuntime:
             return
 
         try:
+            self.logger.info(
+                'LangBot Box session cleanup: '
+                f'session_id={session_id} '
+                f'backend_session_id={runtime_session.info.backend_session_id} '
+                f'backend={runtime_session.info.backend_name}'
+            )
             await self._backend.stop_session(runtime_session.info)
         except Exception as exc:
             self.logger.warning(f'Failed to clean up box session {session_id}: {exc}')
@@ -126,3 +156,11 @@ class BoxRuntime:
             raise BoxSessionConflictError(
                 f'sandbox_exec session {spec.session_id} already exists with image={session.image}'
             )
+        if session.host_path != spec.host_path:
+            raise BoxSessionConflictError(
+                f'sandbox_exec session {spec.session_id} already exists with host_path={session.host_path}'
+            )
+        if session.host_path_mode != spec.host_path_mode:
+            raise BoxSessionConflictError(
+                f'sandbox_exec session {spec.session_id} already exists with host_path_mode={session.host_path_mode.value}'
+            )
diff --git a/src/langbot/pkg/box/service.py b/src/langbot/pkg/box/service.py
index d1114749..650c76ff 100644
--- a/src/langbot/pkg/box/service.py
+++ b/src/langbot/pkg/box/service.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import json
+import os
 from typing import TYPE_CHECKING
 
 import pydantic
@@ -23,6 +25,8 @@ class BoxService:
         self.ap = ap
         self.runtime = runtime or BoxRuntime(logger=ap.logger)
         self.output_limit_chars = output_limit_chars
+        self.allowed_host_mount_roots = self._load_allowed_host_mount_roots()
+        self.default_host_workspace = self._load_default_host_workspace()
 
     async def initialize(self):
         await self.runtime.initialize()
@@ -31,6 +35,8 @@ class BoxService:
         spec_payload = dict(parameters)
         spec_payload.setdefault('session_id', str(query.query_id))
         spec_payload.setdefault('env', {})
+        if spec_payload.get('host_path') in (None, '') and self.default_host_workspace is not None:
+            spec_payload['host_path'] = self.default_host_workspace
 
         try:
             spec = BoxSpec.model_validate(spec_payload)
@@ -38,7 +44,18 @@ class BoxService:
             first_error = exc.errors()[0]
             raise BoxValidationError(first_error.get('msg', 'invalid sandbox_exec arguments')) from exc
 
+        self._validate_host_mount(spec)
+        self.ap.logger.info(
+            'LangBot Box request: '
+            f'query_id={query.query_id} '
+            f'spec={json.dumps(self._summarize_spec(spec), ensure_ascii=False)}'
+        )
         result = await self.runtime.execute(spec)
+        self.ap.logger.info(
+            'LangBot Box result: '
+            f'query_id={query.query_id} '
+            f'summary={json.dumps(self._summarize_result(result), ensure_ascii=False)}'
+        )
         return self._serialize_result(result)
 
     async def shutdown(self):
@@ -65,3 +82,78 @@ class BoxService:
         if len(text) <= self.output_limit_chars:
             return text, False
         return f'{text[: self.output_limit_chars]}...', True
+
+    def _summarize_spec(self, spec: BoxSpec) -> dict:
+        cmd = spec.cmd.strip()
+        if len(cmd) > 400:
+            cmd = f'{cmd[:397]}...'
+
+        return {
+            'session_id': spec.session_id,
+            'workdir': spec.workdir,
+            'timeout_sec': spec.timeout_sec,
+            'network': spec.network.value,
+            'image': spec.image,
+            'host_path': spec.host_path,
+            'host_path_mode': spec.host_path_mode.value,
+            'env_keys': sorted(spec.env.keys()),
+            'cmd': cmd,
+        }
+
+    def _summarize_result(self, result: BoxExecutionResult) -> dict:
+        stdout_preview = result.stdout[:200]
+        stderr_preview = result.stderr[:200]
+        if len(result.stdout) > 200:
+            stdout_preview = f'{stdout_preview}...'
+        if len(result.stderr) > 200:
+            stderr_preview = f'{stderr_preview}...'
+
+        return {
+            'session_id': result.session_id,
+            'backend': result.backend_name,
+            'status': result.status.value,
+            'exit_code': result.exit_code,
+            'duration_ms': result.duration_ms,
+            'stdout_preview': stdout_preview,
+            'stderr_preview': stderr_preview,
+        }
+
+    def _load_allowed_host_mount_roots(self) -> list[str]:
+        box_config = getattr(self.ap, 'instance_config', None)
+        box_config_data = getattr(box_config, 'data', {}) if box_config is not None else {}
+        configured_roots = box_config_data.get('box', {}).get('allowed_host_mount_roots', [])
+
+        normalized_roots: list[str] = []
+        for root in configured_roots:
+            root_value = str(root).strip()
+            if not root_value:
+                continue
+            normalized_roots.append(os.path.realpath(os.path.abspath(root_value)))
+
+        return normalized_roots
+
+    def _load_default_host_workspace(self) -> str | None:
+        box_config = getattr(self.ap, 'instance_config', None)
+        box_config_data = getattr(box_config, 'data', {}) if box_config is not None else {}
+        default_host_workspace = str(box_config_data.get('box', {}).get('default_host_workspace', '')).strip()
+        if not default_host_workspace:
+            return None
+        return os.path.realpath(os.path.abspath(default_host_workspace))
+
+    def _validate_host_mount(self, spec: BoxSpec):
+        if spec.host_path is None:
+            return
+
+        host_path = os.path.realpath(spec.host_path)
+        if not os.path.isdir(host_path):
+            raise BoxValidationError('host_path must point to an existing directory on the host')
+
+        if not self.allowed_host_mount_roots:
+            raise BoxValidationError('host_path mounting is disabled because no allowed_host_mount_roots are configured')
+
+        for allowed_root in self.allowed_host_mount_roots:
+            if host_path == allowed_root or host_path.startswith(f'{allowed_root}{os.sep}'):
+                return
+
+        allowed_roots = ', '.join(self.allowed_host_mount_roots)
+        raise BoxValidationError(f'host_path is outside allowed_host_mount_roots: {allowed_roots}')
diff --git a/src/langbot/pkg/provider/runners/localagent.py b/src/langbot/pkg/provider/runners/localagent.py
index 7b7088b0..03b28a18 100644
--- a/src/langbot/pkg/provider/runners/localagent.py
+++ b/src/langbot/pkg/provider/runners/localagent.py
@@ -29,7 +29,13 @@ SANDBOX_EXEC_SYSTEM_GUIDANCE = (
     'When sandbox_exec is available, use it for exact calculations, statistics, structured data parsing, '
     'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
     'JSON, or other data and asks for a computed answer, prefer running a short Python script in sandbox_exec '
-    'and then answer from the tool result.'
+    'and then answer from the tool result. Unless the user explicitly asks for the script, code, or implementation '
+    'details, do not include the generated script in the final answer; return the result and a brief explanation only.'
+)
+SANDBOX_EXEC_WORKSPACE_GUIDANCE = (
+    'A default host workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
+    'modify local files in the working directory, use sandbox_exec with /workspace paths directly; do not ask the '
+    'user for sandbox parameters such as host_path unless they explicitly need a different directory.'
 )
 
 
@@ -37,6 +43,15 @@ SANDBOX_EXEC_SYSTEM_GUIDANCE = (
 class LocalAgentRunner(runner.RequestRunner):
     """Local agent request runner"""
 
+    def _build_sandbox_system_guidance(self) -> str:
+        guidance = SANDBOX_EXEC_SYSTEM_GUIDANCE
+        default_host_workspace = str(
+            getattr(getattr(self.ap, 'instance_config', None), 'data', {}).get('box', {}).get('default_host_workspace', '')
+        ).strip()
+        if default_host_workspace:
+            guidance = f'{guidance} {SANDBOX_EXEC_WORKSPACE_GUIDANCE}'
+        return guidance
+
     def _build_request_messages(
         self,
         query: pipeline_query.Query,
@@ -48,7 +63,7 @@ class LocalAgentRunner(runner.RequestRunner):
             req_messages.append(
                 provider_message.Message(
                     role='system',
-                    content=SANDBOX_EXEC_SYSTEM_GUIDANCE,
+                    content=self._build_sandbox_system_guidance(),
                 )
             )
 
diff --git a/src/langbot/pkg/provider/tools/loaders/native.py b/src/langbot/pkg/provider/tools/loaders/native.py
index 0fe787ee..6087351e 100644
--- a/src/langbot/pkg/provider/tools/loaders/native.py
+++ b/src/langbot/pkg/provider/tools/loaders/native.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import json
+
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 from langbot_plugin.api.entities.events import pipeline_query
 
@@ -18,6 +20,11 @@ class NativeToolLoader(loader.ToolLoader):
     async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query):
         if name != self.SANDBOX_EXEC_TOOL_NAME:
             raise ValueError(f'未找到工具: {name}')
+        self.ap.logger.info(
+            'sandbox_exec tool invoked: '
+            f'query_id={query.query_id} '
+            f'parameters={json.dumps(self._summarize_parameters(parameters), ensure_ascii=False)}'
+        )
         return await self.ap.box_service.execute_sandbox_tool(parameters, query)
 
     async def shutdown(self):
@@ -61,6 +68,19 @@ class NativeToolLoader(loader.ToolLoader):
                         'type': 'string',
                         'description': 'Optional sandbox session id. Defaults to the current request id for reuse.',
                     },
+                    'host_path': {
+                        'type': 'string',
+                        'description': (
+                            'Optional absolute host directory path to mount into the sandbox as /workspace. '
+                            'The path must be under an allowed host mount root.'
+                        ),
+                    },
+                    'host_path_mode': {
+                        'type': 'string',
+                        'description': 'Mount mode for host_path. Use rw to create or modify host files.',
+                        'enum': ['ro', 'rw'],
+                        'default': 'rw',
+                    },
                     'env': {
                         'type': 'object',
                         'description': 'Optional environment variables to expose inside the sandbox.',
@@ -73,3 +93,17 @@ class NativeToolLoader(loader.ToolLoader):
             },
             func=lambda parameters: parameters,
         )
+
+    def _summarize_parameters(self, parameters: dict) -> dict:
+        summary = dict(parameters)
+        cmd = str(summary.get('cmd', '')).strip()
+        if len(cmd) > 400:
+            cmd = f'{cmd[:397]}...'
+        summary['cmd'] = cmd
+
+        env = summary.get('env')
+        if isinstance(env, dict):
+            summary['env_keys'] = sorted(str(key) for key in env.keys())
+            del summary['env']
+
+        return summary
diff --git a/src/langbot/templates/config.yaml b/src/langbot/templates/config.yaml
index 7b7e59fe..ef6d1ec9 100644
--- a/src/langbot/templates/config.yaml
+++ b/src/langbot/templates/config.yaml
@@ -87,6 +87,11 @@ monitoring:
         retention_days: 30
         # Cleanup check interval in hours
         check_interval_hours: 1
+box:
+    default_host_workspace: './data/box-workspaces/default'
+    allowed_host_mount_roots:
+        - './data/box-workspaces'
+        - '/tmp'
 space:
     # Space service URL for OAuth and API
     url: 'https://space.langbot.app'
diff --git a/src/langbot/templates/default-pipeline-config.json b/src/langbot/templates/default-pipeline-config.json
index eb89053e..d90d31ed 100644
--- a/src/langbot/templates/default-pipeline-config.json
+++ b/src/langbot/templates/default-pipeline-config.json
@@ -49,7 +49,7 @@
             "prompt": [
                 {
                     "role": "system",
-                    "content": "You are a helpful assistant. When tools are available, use them for exact calculations, data processing, and code execution instead of guessing."
+                    "content": "You are a helpful assistant. When tools are available, use them for exact calculations, data processing, and code execution instead of guessing. Unless the user explicitly asks for code or a script, return the result directly instead of printing the generated code."
                 }
             ],
             "knowledge-bases": [],
diff --git a/tests/unit_tests/box/test_box_service.py b/tests/unit_tests/box/test_box_service.py
index ab4b7c9e..ffb06b58 100644
--- a/tests/unit_tests/box/test_box_service.py
+++ b/tests/unit_tests/box/test_box_service.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import datetime as dt
+import os
 from types import SimpleNamespace
 from unittest.mock import Mock
 
@@ -9,8 +10,15 @@ import pytest
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 
 from langbot.pkg.box.backend import BaseSandboxBackend
-from langbot.pkg.box.errors import BoxBackendUnavailableError
-from langbot.pkg.box.models import BoxExecutionResult, BoxExecutionStatus, BoxNetworkMode, BoxSessionInfo, BoxSpec
+from langbot.pkg.box.errors import BoxBackendUnavailableError, BoxSessionConflictError, BoxValidationError
+from langbot.pkg.box.models import (
+    BoxExecutionResult,
+    BoxExecutionStatus,
+    BoxHostMountMode,
+    BoxNetworkMode,
+    BoxSessionInfo,
+    BoxSpec,
+)
 from langbot.pkg.box.runtime import BoxRuntime
 from langbot.pkg.box.service import BoxService
 
@@ -21,6 +29,7 @@ class FakeBackend(BaseSandboxBackend):
         self.name = 'fake'
         self.available = available
         self.start_calls: list[str] = []
+        self.start_specs: list[BoxSpec] = []
         self.exec_calls: list[tuple[str, str]] = []
         self.stop_calls: list[str] = []
 
@@ -29,6 +38,7 @@ class FakeBackend(BaseSandboxBackend):
 
     async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
         self.start_calls.append(spec.session_id)
+        self.start_specs.append(spec)
         now = dt.datetime.now(dt.UTC)
         return BoxSessionInfo(
             session_id=spec.session_id,
@@ -36,6 +46,8 @@ class FakeBackend(BaseSandboxBackend):
             backend_session_id=f'backend-{spec.session_id}',
             image=spec.image,
             network=spec.network,
+            host_path=spec.host_path,
+            host_path_mode=spec.host_path_mode,
             created_at=now,
             last_used_at=now,
         )
@@ -60,6 +72,20 @@ def make_query(query_id: int = 42) -> pipeline_query.Query:
     return pipeline_query.Query.model_construct(query_id=query_id)
 
 
+def make_app(logger: Mock, allowed_host_mount_roots: list[str] | None = None):
+    return SimpleNamespace(
+        logger=logger,
+        instance_config=SimpleNamespace(
+            data={
+                'box': {
+                    'allowed_host_mount_roots': allowed_host_mount_roots or [],
+                    'default_host_workspace': '',
+                }
+            }
+        ),
+    )
+
+
 @pytest.mark.asyncio
 async def test_box_runtime_reuses_request_session():
     logger = Mock()
@@ -82,7 +108,7 @@ async def test_box_service_defaults_session_id_from_query():
     logger = Mock()
     backend = FakeBackend(logger)
     runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
-    service = BoxService(SimpleNamespace(logger=logger), runtime=runtime)
+    service = BoxService(make_app(logger), runtime=runtime)
     await service.initialize()
 
     result = await service.execute_sandbox_tool({'cmd': 'pwd', 'network': BoxNetworkMode.OFF.value}, make_query(7))
@@ -97,8 +123,106 @@ async def test_box_service_fails_closed_when_backend_unavailable():
     logger = Mock()
     backend = FakeBackend(logger, available=False)
     runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
-    service = BoxService(SimpleNamespace(logger=logger), runtime=runtime)
+    service = BoxService(make_app(logger), runtime=runtime)
     await service.initialize()
 
     with pytest.raises(BoxBackendUnavailableError):
         await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(9))
+
+
+@pytest.mark.asyncio
+async def test_box_service_allows_host_mount_under_configured_root(tmp_path):
+    logger = Mock()
+    backend = FakeBackend(logger)
+    runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
+    host_dir = tmp_path / 'mounted-workspace'
+    host_dir.mkdir()
+    service = BoxService(make_app(logger, [str(tmp_path)]), runtime=runtime)
+    await service.initialize()
+
+    result = await service.execute_sandbox_tool(
+        {
+            'cmd': 'pwd',
+            'host_path': str(host_dir),
+            'host_path_mode': BoxHostMountMode.READ_WRITE.value,
+        },
+        make_query(11),
+    )
+
+    assert result['ok'] is True
+    assert backend.start_calls == ['11']
+
+
+@pytest.mark.asyncio
+async def test_box_service_uses_default_host_workspace_when_host_path_omitted(tmp_path):
+    logger = Mock()
+    backend = FakeBackend(logger)
+    runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
+    host_dir = tmp_path / 'default-workspace'
+    host_dir.mkdir()
+    app = make_app(logger, [str(tmp_path)])
+    app.instance_config.data['box']['default_host_workspace'] = str(host_dir)
+    service = BoxService(app, runtime=runtime)
+    await service.initialize()
+
+    result = await service.execute_sandbox_tool({'cmd': 'pwd'}, make_query(15))
+
+    assert result['ok'] is True
+    assert backend.start_calls == ['15']
+    assert backend.exec_calls == [('15', 'pwd')]
+    assert backend.start_specs[0].host_path == os.path.realpath(host_dir)
+
+
+@pytest.mark.asyncio
+async def test_box_service_rejects_host_mount_outside_allowed_roots(tmp_path):
+    logger = Mock()
+    backend = FakeBackend(logger)
+    runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
+    allowed_root = tmp_path / 'allowed'
+    disallowed_root = tmp_path / 'disallowed'
+    allowed_root.mkdir()
+    disallowed_root.mkdir()
+    service = BoxService(make_app(logger, [str(allowed_root)]), runtime=runtime)
+    await service.initialize()
+
+    with pytest.raises(BoxValidationError):
+        await service.execute_sandbox_tool(
+            {
+                'cmd': 'pwd',
+                'host_path': str(disallowed_root),
+            },
+            make_query(12),
+        )
+
+
+@pytest.mark.asyncio
+async def test_box_runtime_rejects_host_mount_conflict_in_same_session(tmp_path):
+    logger = Mock()
+    backend = FakeBackend(logger)
+    runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
+    await runtime.initialize()
+
+    first_host_dir = tmp_path / 'first'
+    second_host_dir = tmp_path / 'second'
+    first_host_dir.mkdir()
+    second_host_dir.mkdir()
+
+    first = BoxSpec.model_validate(
+        {
+            'cmd': 'echo first',
+            'session_id': 'req-mount',
+            'host_path': os.path.realpath(first_host_dir),
+        }
+    )
+    second = BoxSpec.model_validate(
+        {
+            'cmd': 'echo second',
+            'session_id': 'req-mount',
+            'host_path': os.path.realpath(second_host_dir),
+        }
+    )
+
+    await runtime.execute(first)
+
+    with pytest.raises(BoxSessionConflictError):
+        await runtime.execute(second)
diff --git a/tests/unit_tests/provider/test_localagent_sandbox_exec.py b/tests/unit_tests/provider/test_localagent_sandbox_exec.py
index d192ac1e..eb013748 100644
--- a/tests/unit_tests/provider/test_localagent_sandbox_exec.py
+++ b/tests/unit_tests/provider/test_localagent_sandbox_exec.py
@@ -124,6 +124,13 @@ async def test_localagent_uses_sandbox_exec_for_exact_calculation():
         model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)),
         tool_mgr=tool_manager,
         rag_mgr=SimpleNamespace(),
+        instance_config=SimpleNamespace(
+            data={
+                'box': {
+                    'default_host_workspace': '/home/yhh/workspace/box-demo',
+                }
+            }
+        ),
     )
 
     runner = LocalAgentRunner(app, pipeline_config={})
@@ -144,6 +151,8 @@ async def test_localagent_uses_sandbox_exec_for_exact_calculation():
         message.role == 'system'
         and 'sandbox_exec' in str(message.content)
         and 'exact calculations' in str(message.content)
+        and 'Unless the user explicitly asks for the script' in str(message.content)
+        and '/workspace' in str(message.content)
         for message in first_request['messages']
     )
     assert [tool.name for tool in first_request['funcs']] == ['sandbox_exec']