feat(box): add sandbox_exec tool loop for local-agent calculations

This commit is contained in:
youhuanghe
2026-03-19 12:28:10 +00:00
committed by WangCham
parent 3b3deec080
commit ba7a45713d
17 changed files with 952 additions and 10 deletions

View File

@@ -0,0 +1,149 @@
from __future__ import annotations
import json
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
import pytest
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.provider.session as provider_session
from langbot.pkg.provider.runners.localagent import LocalAgentRunner
class RecordingProvider:
def __init__(self):
self.requests: list[dict] = []
async def invoke_llm(self, query, model, messages, funcs, extra_args=None, remove_think=None):
self.requests.append(
{
'messages': list(messages),
'funcs': list(funcs),
'remove_think': remove_think,
}
)
if len(self.requests) == 1:
return provider_message.Message(
role='assistant',
content='Let me calculate that exactly.',
tool_calls=[
provider_message.ToolCall(
id='call-1',
type='function',
function=provider_message.FunctionCall(
name='sandbox_exec',
arguments=json.dumps(
{
'cmd': (
"python - <<'PY'\n"
"nums = [1, 2, 3, 4]\n"
'print(sum(nums) / len(nums))\n'
'PY'
)
}
),
),
)
],
)
tool_result = json.loads(messages[-1].content)
return provider_message.Message(
role='assistant',
content=f"The average is {tool_result['stdout']}.",
)
def make_query() -> pipeline_query.Query:
adapter = AsyncMock()
adapter.is_stream_output_supported = AsyncMock(return_value=False)
return pipeline_query.Query.model_construct(
query_id='avg-query',
launcher_type=provider_session.LauncherTypes.PERSON,
launcher_id=12345,
sender_id=12345,
message_chain=[],
message_event=None,
adapter=adapter,
pipeline_uuid='pipeline-uuid',
bot_uuid='bot-uuid',
pipeline_config={
'ai': {
'runner': {'runner': 'local-agent'},
'local-agent': {'model': {'primary': 'test-model-uuid', 'fallbacks': []}, 'prompt': 'test-prompt'},
},
'output': {'misc': {'remove-think': False}},
},
prompt=SimpleNamespace(messages=[]),
messages=[],
user_message=provider_message.Message(
role='user',
content='Please calculate the average of 1, 2, 3, and 4.',
),
use_funcs=[SimpleNamespace(name='sandbox_exec')],
use_llm_model_uuid='test-model-uuid',
variables={},
)
@pytest.mark.asyncio
async def test_localagent_uses_sandbox_exec_for_exact_calculation():
provider = RecordingProvider()
model = SimpleNamespace(
provider=provider,
model_entity=SimpleNamespace(
uuid='test-model-uuid',
name='test-model',
abilities=['func_call'],
extra_args={},
),
)
tool_manager = SimpleNamespace(
execute_func_call=AsyncMock(
return_value={
'session_id': 'avg-query',
'backend': 'podman',
'status': 'completed',
'ok': True,
'exit_code': 0,
'stdout': '2.5',
'stderr': '',
'duration_ms': 18,
}
)
)
app = SimpleNamespace(
logger=Mock(),
model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)),
tool_mgr=tool_manager,
rag_mgr=SimpleNamespace(),
)
runner = LocalAgentRunner(app, pipeline_config={})
query = make_query()
results = [message async for message in runner.run(query)]
assert [message.role for message in results] == ['assistant', 'tool', 'assistant']
assert results[-1].content == 'The average is 2.5.'
tool_manager.execute_func_call.assert_awaited_once()
tool_name, tool_parameters = tool_manager.execute_func_call.await_args.args[:2]
assert tool_name == 'sandbox_exec'
assert "print(sum(nums) / len(nums))" in tool_parameters['cmd']
first_request = provider.requests[0]
assert any(
message.role == 'system'
and 'sandbox_exec' in str(message.content)
and 'exact calculations' in str(message.content)
for message in first_request['messages']
)
assert [tool.name for tool in first_request['funcs']] == ['sandbox_exec']

View File

@@ -0,0 +1,63 @@
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import Mock
import pytest
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
from langbot.pkg.provider.tools.toolmgr import ToolManager
class StubLoader:
def __init__(self, tools: list[resource_tool.LLMTool] | None = None, invoke_result=None):
self._tools = tools or []
self._invoke_result = invoke_result
async def get_tools(self, *_args, **_kwargs):
return self._tools
async def has_tool(self, name: str) -> bool:
return any(tool.name == name for tool in self._tools)
async def invoke_tool(self, name: str, parameters: dict, query):
return self._invoke_result(name, parameters, query) if callable(self._invoke_result) else self._invoke_result
async def shutdown(self):
return None
def make_tool(name: str) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=name,
human_desc=name,
description=name,
parameters={'type': 'object', 'properties': {}},
func=lambda parameters: parameters,
)
@pytest.mark.asyncio
async def test_tool_manager_lists_native_tools_first():
manager = ToolManager(SimpleNamespace())
manager.native_tool_loader = StubLoader([make_tool('sandbox_exec')])
manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')])
manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')])
tools = await manager.get_all_tools()
assert [tool.name for tool in tools] == ['sandbox_exec', 'plugin_tool', 'mcp_tool']
@pytest.mark.asyncio
async def test_tool_manager_routes_native_tool_calls():
app = SimpleNamespace()
manager = ToolManager(app)
manager.native_tool_loader = StubLoader([make_tool('sandbox_exec')], invoke_result={'backend': 'fake'})
manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')])
manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')])
result = await manager.execute_func_call('sandbox_exec', {'cmd': 'pwd'}, query=Mock())
assert result == {'backend': 'fake'}