feat(box): unify native agent tools around exec/read/write/edit

2026-06-19 03:54:19 +00:00 · 2026-03-24 07:57:05 +00:00
parent 3f368c5764
commit 93104a947a
10 changed files with 519 additions and 114 deletions
@@ -0,0 +1,98 @@
+"""Three-layer security policy for LangBot Box.
+
+The design separates concerns into three independent layers, aligned with
+OpenCode / OpenClaw patterns:
+
+1. **SandboxPolicy** – *where* tools run (host vs sandbox).
+2. **ToolPolicy** – *which* tools are allowed (allow/deny lists).
+3. **ElevatedPolicy** – *whether* a single exec call may temporarily
+   escape the default sandbox boundary.
+
+These three layers are orthogonal:
+- ToolPolicy is a hard boundary; ``elevated`` cannot bypass a denied tool.
+- SandboxPolicy decides the default execution location.
+- ElevatedPolicy only affects ``exec`` and only when the framework allows it.
+"""
+
+from __future__ import annotations
+
+import enum
+from typing import Sequence
+
+
+# ── Layer 1: Sandbox Policy ──────────────────────────────────────────
+
+
+class SandboxMode(str, enum.Enum):
+    """Determines when agent execution is routed through the sandbox."""
+
+    OFF = 'off'
+    """Sandbox disabled; all exec runs on the host."""
+
+    NON_DEFAULT = 'non_default'
+    """Only non-default sessions are sandboxed (e.g. sub-agents, MCP)."""
+
+    ALL = 'all'
+    """Every agent exec call is routed through the sandbox."""
+
+
+class SandboxPolicy:
+    """Decides whether a given execution context should use the sandbox."""
+
+    def __init__(self, mode: SandboxMode = SandboxMode.ALL):
+        self.mode = mode
+
+    def should_sandbox(self, *, is_default_session: bool = True) -> bool:
+        if self.mode == SandboxMode.OFF:
+            return False
+        if self.mode == SandboxMode.ALL:
+            return True
+        # NON_DEFAULT: sandbox everything except the default session
+        return not is_default_session
+
+
+# ── Layer 2: Tool Policy ─────────────────────────────────────────────
+
+
+class ToolPolicy:
+    """Controls which tools are available to the current agent/session.
+
+    Rules:
+    - ``deny`` always takes precedence over ``allow``.
+    - An empty ``allow`` list means "all tools allowed" (no allowlist filter).
+    - ``elevated`` cannot bypass a denied tool.
+    """
+
+    def __init__(
+        self,
+        allow: Sequence[str] = (),
+        deny: Sequence[str] = (),
+    ):
+        self._allow: frozenset[str] = frozenset(allow)
+        self._deny: frozenset[str] = frozenset(deny)
+
+    def is_tool_allowed(self, tool_name: str) -> bool:
+        if tool_name in self._deny:
+            return False
+        if self._allow and tool_name not in self._allow:
+            return False
+        return True
+
+
+# ── Layer 3: Elevated Policy ─────────────────────────────────────────
+
+
+class ElevatedPolicy:
+    """Controls whether ``exec`` may request temporary privilege escalation.
+
+    ``elevated`` only applies to the ``exec`` tool.  It means "run this
+    command outside the default sandbox boundary" (e.g. with network, or
+    on the host).  The framework decides whether to honor the request.
+    """
+
+    def __init__(self, *, allow_elevated: bool = False, require_approval: bool = True):
+        self.allow_elevated = allow_elevated
+        self.require_approval = require_approval
+
+    def is_elevation_permitted(self) -> bool:
+        return self.allow_elevated
@@ -105,9 +105,22 @@ class BoxService:
        )
        return self._serialize_result(result)

-    async def execute_sandbox_tool(self, parameters: dict, query: pipeline_query.Query) -> dict:
-        spec_payload = dict(parameters)
+    async def execute_tool(self, parameters: dict, query: pipeline_query.Query) -> dict:
+        """Execute an agent-facing ``exec`` tool call.
+
+        Translates the agent-facing ``command`` field to the internal
+        ``BoxSpec.cmd`` field and injects the session id from the query.
+        """
+        spec_payload: dict = {'cmd': parameters['command']}
+
+        # Pass through allowed agent-facing fields
+        for key in ('workdir', 'timeout_sec', 'env'):
+            if key in parameters:
+                spec_payload[key] = parameters[key]
+
+        # Inject context the agent must not control
        spec_payload.setdefault('session_id', str(query.query_id))
+
        return await self.execute_spec_payload(spec_payload, query)

    async def shutdown(self):
@@ -379,23 +392,23 @@ class BoxService:
        return list(self._recent_errors)

    def get_system_guidance(self) -> str:
-        """Return LLM system-prompt guidance for sandbox_exec.
+        """Return LLM system-prompt guidance for the exec tool.

-        All sandbox-specific prompt text is kept here so that callers
+        All execution-specific prompt text is kept here so that callers
        (e.g. LocalAgentRunner) stay free of box domain knowledge.
        """
        guidance = (
-            'When sandbox_exec is available, use it for exact calculations, statistics, structured data parsing, '
+            'When the exec tool is available, use it for exact calculations, statistics, structured data parsing, '
            'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
-            'JSON, or other data and asks for a computed answer, prefer running a short Python script in sandbox_exec '
+            'JSON, or other data and asks for a computed answer, prefer running a short Python script via exec '
            'and then answer from the tool result. Unless the user explicitly asks for the script, code, or implementation '
            'details, do not include the generated script in the final answer; return the result and a brief explanation only.'
        )
        if self.default_host_workspace:
            guidance += (
-                ' A default host workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
-                'modify local files in the working directory, use sandbox_exec with /workspace paths directly; do not ask the '
-                'user for sandbox parameters such as host_path unless they explicitly need a different directory.'
+                ' A default workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
+                'modify local files in the working directory, use exec with /workspace paths directly; do not ask the '
+                'user for directory parameters unless they explicitly need a different directory.'
            )
        return guidance