mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-08 23:06:03 +00:00
feat(box): add sandbox_exec tool loop for local-agent calculations
This commit is contained in:
104
tests/unit_tests/box/test_box_service.py
Normal file
104
tests/unit_tests/box/test_box_service.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
|
||||
from langbot.pkg.box.backend import BaseSandboxBackend
|
||||
from langbot.pkg.box.errors import BoxBackendUnavailableError
|
||||
from langbot.pkg.box.models import BoxExecutionResult, BoxExecutionStatus, BoxNetworkMode, BoxSessionInfo, BoxSpec
|
||||
from langbot.pkg.box.runtime import BoxRuntime
|
||||
from langbot.pkg.box.service import BoxService
|
||||
|
||||
|
||||
class FakeBackend(BaseSandboxBackend):
|
||||
def __init__(self, logger: Mock, available: bool = True):
|
||||
super().__init__(logger)
|
||||
self.name = 'fake'
|
||||
self.available = available
|
||||
self.start_calls: list[str] = []
|
||||
self.exec_calls: list[tuple[str, str]] = []
|
||||
self.stop_calls: list[str] = []
|
||||
|
||||
async def is_available(self) -> bool:
|
||||
return self.available
|
||||
|
||||
async def start_session(self, spec: BoxSpec) -> BoxSessionInfo:
|
||||
self.start_calls.append(spec.session_id)
|
||||
now = dt.datetime.now(dt.UTC)
|
||||
return BoxSessionInfo(
|
||||
session_id=spec.session_id,
|
||||
backend_name=self.name,
|
||||
backend_session_id=f'backend-{spec.session_id}',
|
||||
image=spec.image,
|
||||
network=spec.network,
|
||||
created_at=now,
|
||||
last_used_at=now,
|
||||
)
|
||||
|
||||
async def exec(self, session: BoxSessionInfo, spec: BoxSpec) -> BoxExecutionResult:
|
||||
self.exec_calls.append((session.session_id, spec.cmd))
|
||||
return BoxExecutionResult(
|
||||
session_id=session.session_id,
|
||||
backend_name=self.name,
|
||||
status=BoxExecutionStatus.COMPLETED,
|
||||
exit_code=0,
|
||||
stdout=f'executed: {spec.cmd}',
|
||||
stderr='',
|
||||
duration_ms=12,
|
||||
)
|
||||
|
||||
async def stop_session(self, session: BoxSessionInfo):
|
||||
self.stop_calls.append(session.session_id)
|
||||
|
||||
|
||||
def make_query(query_id: int = 42) -> pipeline_query.Query:
|
||||
return pipeline_query.Query.model_construct(query_id=query_id)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_runtime_reuses_request_session():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
await runtime.initialize()
|
||||
|
||||
first = BoxSpec.model_validate({'cmd': 'echo first', 'session_id': 'req-1'})
|
||||
second = BoxSpec.model_validate({'cmd': 'echo second', 'session_id': 'req-1'})
|
||||
|
||||
await runtime.execute(first)
|
||||
await runtime.execute(second)
|
||||
|
||||
assert backend.start_calls == ['req-1']
|
||||
assert backend.exec_calls == [('req-1', 'echo first'), ('req-1', 'echo second')]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_service_defaults_session_id_from_query():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(SimpleNamespace(logger=logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
result = await service.execute_sandbox_tool({'cmd': 'pwd', 'network': BoxNetworkMode.OFF.value}, make_query(7))
|
||||
|
||||
assert result['session_id'] == '7'
|
||||
assert result['ok'] is True
|
||||
assert backend.start_calls == ['7']
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_box_service_fails_closed_when_backend_unavailable():
|
||||
logger = Mock()
|
||||
backend = FakeBackend(logger, available=False)
|
||||
runtime = BoxRuntime(logger=logger, backends=[backend], session_ttl_sec=300)
|
||||
service = BoxService(SimpleNamespace(logger=logger), runtime=runtime)
|
||||
await service.initialize()
|
||||
|
||||
with pytest.raises(BoxBackendUnavailableError):
|
||||
await service.execute_sandbox_tool({'cmd': 'echo hello'}, make_query(9))
|
||||
149
tests/unit_tests/provider/test_localagent_sandbox_exec.py
Normal file
149
tests/unit_tests/provider/test_localagent_sandbox_exec.py
Normal file
@@ -0,0 +1,149 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, Mock
|
||||
|
||||
import pytest
|
||||
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
import langbot_plugin.api.entities.builtin.provider.session as provider_session
|
||||
|
||||
from langbot.pkg.provider.runners.localagent import LocalAgentRunner
|
||||
|
||||
|
||||
class RecordingProvider:
|
||||
def __init__(self):
|
||||
self.requests: list[dict] = []
|
||||
|
||||
async def invoke_llm(self, query, model, messages, funcs, extra_args=None, remove_think=None):
|
||||
self.requests.append(
|
||||
{
|
||||
'messages': list(messages),
|
||||
'funcs': list(funcs),
|
||||
'remove_think': remove_think,
|
||||
}
|
||||
)
|
||||
|
||||
if len(self.requests) == 1:
|
||||
return provider_message.Message(
|
||||
role='assistant',
|
||||
content='Let me calculate that exactly.',
|
||||
tool_calls=[
|
||||
provider_message.ToolCall(
|
||||
id='call-1',
|
||||
type='function',
|
||||
function=provider_message.FunctionCall(
|
||||
name='sandbox_exec',
|
||||
arguments=json.dumps(
|
||||
{
|
||||
'cmd': (
|
||||
"python - <<'PY'\n"
|
||||
"nums = [1, 2, 3, 4]\n"
|
||||
'print(sum(nums) / len(nums))\n'
|
||||
'PY'
|
||||
)
|
||||
}
|
||||
),
|
||||
),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
tool_result = json.loads(messages[-1].content)
|
||||
return provider_message.Message(
|
||||
role='assistant',
|
||||
content=f"The average is {tool_result['stdout']}.",
|
||||
)
|
||||
|
||||
|
||||
def make_query() -> pipeline_query.Query:
|
||||
adapter = AsyncMock()
|
||||
adapter.is_stream_output_supported = AsyncMock(return_value=False)
|
||||
|
||||
return pipeline_query.Query.model_construct(
|
||||
query_id='avg-query',
|
||||
launcher_type=provider_session.LauncherTypes.PERSON,
|
||||
launcher_id=12345,
|
||||
sender_id=12345,
|
||||
message_chain=[],
|
||||
message_event=None,
|
||||
adapter=adapter,
|
||||
pipeline_uuid='pipeline-uuid',
|
||||
bot_uuid='bot-uuid',
|
||||
pipeline_config={
|
||||
'ai': {
|
||||
'runner': {'runner': 'local-agent'},
|
||||
'local-agent': {'model': {'primary': 'test-model-uuid', 'fallbacks': []}, 'prompt': 'test-prompt'},
|
||||
},
|
||||
'output': {'misc': {'remove-think': False}},
|
||||
},
|
||||
prompt=SimpleNamespace(messages=[]),
|
||||
messages=[],
|
||||
user_message=provider_message.Message(
|
||||
role='user',
|
||||
content='Please calculate the average of 1, 2, 3, and 4.',
|
||||
),
|
||||
use_funcs=[SimpleNamespace(name='sandbox_exec')],
|
||||
use_llm_model_uuid='test-model-uuid',
|
||||
variables={},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_localagent_uses_sandbox_exec_for_exact_calculation():
|
||||
provider = RecordingProvider()
|
||||
model = SimpleNamespace(
|
||||
provider=provider,
|
||||
model_entity=SimpleNamespace(
|
||||
uuid='test-model-uuid',
|
||||
name='test-model',
|
||||
abilities=['func_call'],
|
||||
extra_args={},
|
||||
),
|
||||
)
|
||||
|
||||
tool_manager = SimpleNamespace(
|
||||
execute_func_call=AsyncMock(
|
||||
return_value={
|
||||
'session_id': 'avg-query',
|
||||
'backend': 'podman',
|
||||
'status': 'completed',
|
||||
'ok': True,
|
||||
'exit_code': 0,
|
||||
'stdout': '2.5',
|
||||
'stderr': '',
|
||||
'duration_ms': 18,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
app = SimpleNamespace(
|
||||
logger=Mock(),
|
||||
model_mgr=SimpleNamespace(get_model_by_uuid=AsyncMock(return_value=model)),
|
||||
tool_mgr=tool_manager,
|
||||
rag_mgr=SimpleNamespace(),
|
||||
)
|
||||
|
||||
runner = LocalAgentRunner(app, pipeline_config={})
|
||||
query = make_query()
|
||||
|
||||
results = [message async for message in runner.run(query)]
|
||||
|
||||
assert [message.role for message in results] == ['assistant', 'tool', 'assistant']
|
||||
assert results[-1].content == 'The average is 2.5.'
|
||||
|
||||
tool_manager.execute_func_call.assert_awaited_once()
|
||||
tool_name, tool_parameters = tool_manager.execute_func_call.await_args.args[:2]
|
||||
assert tool_name == 'sandbox_exec'
|
||||
assert "print(sum(nums) / len(nums))" in tool_parameters['cmd']
|
||||
|
||||
first_request = provider.requests[0]
|
||||
assert any(
|
||||
message.role == 'system'
|
||||
and 'sandbox_exec' in str(message.content)
|
||||
and 'exact calculations' in str(message.content)
|
||||
for message in first_request['messages']
|
||||
)
|
||||
assert [tool.name for tool in first_request['funcs']] == ['sandbox_exec']
|
||||
63
tests/unit_tests/provider/test_tool_manager_native.py
Normal file
63
tests/unit_tests/provider/test_tool_manager_native.py
Normal file
@@ -0,0 +1,63 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
|
||||
from langbot.pkg.provider.tools.toolmgr import ToolManager
|
||||
|
||||
|
||||
class StubLoader:
|
||||
def __init__(self, tools: list[resource_tool.LLMTool] | None = None, invoke_result=None):
|
||||
self._tools = tools or []
|
||||
self._invoke_result = invoke_result
|
||||
|
||||
async def get_tools(self, *_args, **_kwargs):
|
||||
return self._tools
|
||||
|
||||
async def has_tool(self, name: str) -> bool:
|
||||
return any(tool.name == name for tool in self._tools)
|
||||
|
||||
async def invoke_tool(self, name: str, parameters: dict, query):
|
||||
return self._invoke_result(name, parameters, query) if callable(self._invoke_result) else self._invoke_result
|
||||
|
||||
async def shutdown(self):
|
||||
return None
|
||||
|
||||
|
||||
def make_tool(name: str) -> resource_tool.LLMTool:
|
||||
return resource_tool.LLMTool(
|
||||
name=name,
|
||||
human_desc=name,
|
||||
description=name,
|
||||
parameters={'type': 'object', 'properties': {}},
|
||||
func=lambda parameters: parameters,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_manager_lists_native_tools_first():
|
||||
manager = ToolManager(SimpleNamespace())
|
||||
manager.native_tool_loader = StubLoader([make_tool('sandbox_exec')])
|
||||
manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')])
|
||||
manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')])
|
||||
|
||||
tools = await manager.get_all_tools()
|
||||
|
||||
assert [tool.name for tool in tools] == ['sandbox_exec', 'plugin_tool', 'mcp_tool']
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_manager_routes_native_tool_calls():
|
||||
app = SimpleNamespace()
|
||||
manager = ToolManager(app)
|
||||
manager.native_tool_loader = StubLoader([make_tool('sandbox_exec')], invoke_result={'backend': 'fake'})
|
||||
manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')])
|
||||
manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')])
|
||||
|
||||
result = await manager.execute_func_call('sandbox_exec', {'cmd': 'pwd'}, query=Mock())
|
||||
|
||||
assert result == {'backend': 'fake'}
|
||||
Reference in New Issue
Block a user