From cafef1a306266f0a0c4c9a7aafb4993496407480 Mon Sep 17 00:00:00 2001
From: Junyan Qin <rockchinq@gmail.com>
Date: Tue, 2 Jun 2026 15:07:58 +0800
Subject: [PATCH] fix(box): cap tool-call loop and run workspace-quota walk off
 the event loop

Two robustness fixes that bite under normal sandbox usage (not just attack),
hardening the self-hosted community edition before release:

- localagent: cap the tool-call loop at MAX_TOOL_CALL_ROUNDS (128). A looping
  or adversarial model could otherwise emit tool calls indefinitely (each
  potentially a sandbox exec), producing a non-terminating request and runaway
  cost. The cap is generous enough not to interrupt legitimate multi-step
  agentic workflows.
- box.service: make _enforce_workspace_quota async and run the recursive
  workspace scan via asyncio.to_thread. It ran on every quota-enforced exec and
  a large workspace would block the whole asyncio runtime (all bots/pipelines).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/langbot/pkg/box/service.py                 | 11 +++++++----
 src/langbot/pkg/provider/runners/localagent.py | 15 +++++++++++++++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/langbot/pkg/box/service.py b/src/langbot/pkg/box/service.py
index 13469634..6c8e8299 100644
--- a/src/langbot/pkg/box/service.py
+++ b/src/langbot/pkg/box/service.py
@@ -168,7 +168,7 @@ class BoxService:
             f'spec={json.dumps(self._summarize_spec(spec), ensure_ascii=False)}'
         )
         try:
-            self._enforce_workspace_quota(spec, phase='before execution')
+            await self._enforce_workspace_quota(spec, phase='before execution')
         except BoxError as exc:
             self._record_error(exc, query)
             raise
@@ -178,7 +178,7 @@ class BoxService:
             self._record_error(exc, query)
             raise
         try:
-            self._enforce_workspace_quota(spec, phase='after execution')
+            await self._enforce_workspace_quota(spec, phase='after execution')
         except BoxError as exc:
             await self._cleanup_exceeded_session(spec)
             self._record_error(exc, query)
@@ -683,7 +683,7 @@ class BoxService:
         _walk(root)
         return total
 
-    def _enforce_workspace_quota(self, spec: BoxSpec, *, phase: str) -> None:
+    async def _enforce_workspace_quota(self, spec: BoxSpec, *, phase: str) -> None:
         if spec.host_path is None or spec.workspace_quota_mb <= 0:
             return
 
@@ -691,7 +691,10 @@ class BoxService:
         if not os.path.isdir(host_path):
             return
 
-        used_bytes = self._get_workspace_size_bytes(host_path)
+        # Walk the workspace off the event loop — this runs on every
+        # quota-enforced exec, and a large tree would otherwise block the whole
+        # asyncio runtime (all bots/pipelines) for the duration of the scan.
+        used_bytes = await asyncio.to_thread(self._get_workspace_size_bytes, host_path)
         limit_bytes = spec.workspace_quota_mb * _MIB
         if used_bytes <= limit_bytes:
             return
diff --git a/src/langbot/pkg/provider/runners/localagent.py b/src/langbot/pkg/provider/runners/localagent.py
index 6cac3e83..28d014d0 100644
--- a/src/langbot/pkg/provider/runners/localagent.py
+++ b/src/langbot/pkg/provider/runners/localagent.py
@@ -34,6 +34,13 @@ SANDBOX_EXEC_SYSTEM_GUIDANCE = (
 )
 
 
+# Hard cap on tool-call rounds within a single agent turn. A looping or
+# adversarial model can otherwise emit tool calls indefinitely (each potentially
+# a sandbox exec), yielding a non-terminating request and runaway cost. Set
+# generously so it never interrupts legitimate multi-step agentic workflows.
+MAX_TOOL_CALL_ROUNDS = 128
+
+
 @runner.runner_class('local-agent')
 class LocalAgentRunner(runner.RequestRunner):
     """Local agent request runner"""
@@ -363,7 +370,15 @@ class LocalAgentRunner(runner.RequestRunner):
 
         # Once a model succeeds, commit to it for the tool call loop
         # (no fallback mid-conversation — different models may interpret tool results differently)
+        tool_call_round = 0
         while pending_tool_calls:
+            tool_call_round += 1
+            if tool_call_round > MAX_TOOL_CALL_ROUNDS:
+                self.ap.logger.warning(
+                    f'Tool-call loop reached the {MAX_TOOL_CALL_ROUNDS}-round cap '
+                    f'(query_id={query.query_id}); stopping to avoid a non-terminating request.'
+                )
+                break
             for tool_call in pending_tool_calls:
                 try:
                     func = tool_call.function