mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-12 08:46:02 +00:00
feat(box): add sandbox_exec tool loop for local-agent calculations
This commit is contained in:
149
tests/unit_tests/provider/test_localagent_sandbox_exec.py
Normal file
149
tests/unit_tests/provider/test_localagent_sandbox_exec.py
Normal file
@@ -0,0 +1,149 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, Mock
|
||||
|
||||
import pytest
|
||||
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
import langbot_plugin.api.entities.builtin.provider.session as provider_session
|
||||
|
||||
from langbot.pkg.provider.runners.localagent import LocalAgentRunner
|
||||
|
||||
|
||||
class RecordingProvider:
|
||||
def __init__(self):
|
||||
self.requests: list[dict] = []
|
||||
|
||||
async def invoke_llm(self, query, model, messages, funcs, extra_args=None, remove_think=None):
|
||||
self.requests.append(
|
||||
{
|
||||
'messages': list(messages),
|
||||
'funcs': list(funcs),
|
||||
'remove_think': remove_think,
|
||||
}
|
||||
)
|
||||
|
||||
if len(self.requests) == 1:
|
||||
return provider_message.Message(
|
||||
role='assistant',
|
||||
content='Let me calculate that exactly.',
|
||||
tool_calls=[
|
||||
provider_message.ToolCall(
|
||||
id='call-1',
|
||||
type='function',
|
||||
function=provider_message.FunctionCall(
|
||||
name='sandbox_exec',
|
||||
arguments=json.dumps(
|
||||
{
|
||||
'cmd': (
|
||||
"python - <<'PY'\n"
|
||||
"nums = [1, 2, 3, 4]\n"
|
||||
'print(sum(nums) / len(nums))\n'
|
||||
'PY'
|
||||
)
|
||||
}
|
||||
),
|
||||
),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
tool_result = json.loads(messages[-1].content)
|
||||
return provider_message.Message(
|
||||
role='assistant',
|
||||
content=f"The average is {tool_result['stdout']}.",
|
||||
)
|
||||
|
||||
|
||||
def make_query() -> pipeline_query.Query:
|
||||
adapter = AsyncMock()
|
||||
adapter.is_stream_output_supported = AsyncMock(return_value=False)
|
||||
|
||||
return pipeline_query.Query.model_construct(
|
||||
query_id='avg-query',
|
||||
launcher_type=provider_session.LauncherTypes.PERSON,
|
||||
launcher_id=12345,
|
||||
sender_id=12345,
|
||||
message_chain=[],
|
||||
message_event=None,
|
||||
adapter=adapter,
|
||||
pipeline_uuid='pipeline-uuid',
|
||||
bot_uuid='bot-uuid',
|
||||
pipeline_config={
|
||||
'ai': {
|
||||
'runner': {'runner': 'local-agent'},
|
||||
'local-agent': {'model': {'primary': 'test-model-uuid', 'fallbacks': []}, 'prompt': 'test-prompt'},
|
||||
},
|
||||
'output': {'misc': {'remove-think': False}},
|
||||
},
|
||||
prompt=SimpleNamespace(messages=[]),
|
||||
messages=[],
|
||||
user_message=provider_message.Message(
|
||||
role='user',
|
||||
content='Please calculate the average of 1, 2, 3, and 4.',
|
||||
),
|
||||
use_funcs=[SimpleNamespace(name='sandbox_exec')],
|
||||
use_llm_model_uuid='test-model-uuid',
|
||||
variables={},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_localagent_uses_sandbox_exec_for_exact_calculation():
|
||||
provider = RecordingProvider()
|
||||
model = SimpleNamespace(
|
||||
provider=provider,
|
||||
model_entity=SimpleNamespace(
|
||||
uuid='test-model-uuid',
|
||||
name='test-model',
|
||||
abilities=['func_call'],
|
||||
extra_args={},
|
||||
),
|
||||
)
|
||||
|
||||
tool_manager = SimpleNamespace(
|
||||
execute_func_call=AsyncMock(
|
||||
return_value={
|
||||
'session_id': 'avg-query',
|
||||
'backend': 'podman',
|
||||
'status': 'completed',
|
||||
'ok': True,
|
||||
'exit_code': 0,
|
||||
'stdout': '2.5',
|
||||
'stderr': '',
|
||||
'duration_ms': 18,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
app = SimpleNamespace(
|
||||
logger=Mock(),
|
||||
model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)),
|
||||
tool_mgr=tool_manager,
|
||||
rag_mgr=SimpleNamespace(),
|
||||
)
|
||||
|
||||
runner = LocalAgentRunner(app, pipeline_config={})
|
||||
query = make_query()
|
||||
|
||||
results = [message async for message in runner.run(query)]
|
||||
|
||||
assert [message.role for message in results] == ['assistant', 'tool', 'assistant']
|
||||
assert results[-1].content == 'The average is 2.5.'
|
||||
|
||||
tool_manager.execute_func_call.assert_awaited_once()
|
||||
tool_name, tool_parameters = tool_manager.execute_func_call.await_args.args[:2]
|
||||
assert tool_name == 'sandbox_exec'
|
||||
assert "print(sum(nums) / len(nums))" in tool_parameters['cmd']
|
||||
|
||||
first_request = provider.requests[0]
|
||||
assert any(
|
||||
message.role == 'system'
|
||||
and 'sandbox_exec' in str(message.content)
|
||||
and 'exact calculations' in str(message.content)
|
||||
for message in first_request['messages']
|
||||
)
|
||||
assert [tool.name for tool in first_request['funcs']] == ['sandbox_exec']
|
||||
63
tests/unit_tests/provider/test_tool_manager_native.py
Normal file
63
tests/unit_tests/provider/test_tool_manager_native.py
Normal file
@@ -0,0 +1,63 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
|
||||
from langbot.pkg.provider.tools.toolmgr import ToolManager
|
||||
|
||||
|
||||
class StubLoader:
|
||||
def __init__(self, tools: list[resource_tool.LLMTool] | None = None, invoke_result=None):
|
||||
self._tools = tools or []
|
||||
self._invoke_result = invoke_result
|
||||
|
||||
async def get_tools(self, *_args, **_kwargs):
|
||||
return self._tools
|
||||
|
||||
async def has_tool(self, name: str) -> bool:
|
||||
return any(tool.name == name for tool in self._tools)
|
||||
|
||||
async def invoke_tool(self, name: str, parameters: dict, query):
|
||||
return self._invoke_result(name, parameters, query) if callable(self._invoke_result) else self._invoke_result
|
||||
|
||||
async def shutdown(self):
|
||||
return None
|
||||
|
||||
|
||||
def make_tool(name: str) -> resource_tool.LLMTool:
|
||||
return resource_tool.LLMTool(
|
||||
name=name,
|
||||
human_desc=name,
|
||||
description=name,
|
||||
parameters={'type': 'object', 'properties': {}},
|
||||
func=lambda parameters: parameters,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_manager_lists_native_tools_first():
|
||||
manager = ToolManager(SimpleNamespace())
|
||||
manager.native_tool_loader = StubLoader([make_tool('sandbox_exec')])
|
||||
manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')])
|
||||
manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')])
|
||||
|
||||
tools = await manager.get_all_tools()
|
||||
|
||||
assert [tool.name for tool in tools] == ['sandbox_exec', 'plugin_tool', 'mcp_tool']
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_manager_routes_native_tool_calls():
|
||||
app = SimpleNamespace()
|
||||
manager = ToolManager(app)
|
||||
manager.native_tool_loader = StubLoader([make_tool('sandbox_exec')], invoke_result={'backend': 'fake'})
|
||||
manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')])
|
||||
manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')])
|
||||
|
||||
result = await manager.execute_func_call('sandbox_exec', {'cmd': 'pwd'}, query=Mock())
|
||||
|
||||
assert result == {'backend': 'fake'}
|
||||
Reference in New Issue
Block a user