feat(agent-runner): add bounded native tool artifacts

This commit is contained in:
huanghuoguoguo
2026-06-04 11:10:29 +08:00
parent 7053acfb1b
commit 36292102f9
7 changed files with 952 additions and 69 deletions
+50 -6
View File
@@ -5,11 +5,9 @@ import pytest
from unittest.mock import MagicMock, AsyncMock, patch
import base64
import datetime
import asyncio
from langbot.pkg.agent.runner.artifact_store import ArtifactStore
from langbot.pkg.agent.runner.session_registry import (
AgentRunSessionRegistry,
get_session_registry,
)
from .conftest import make_session
@@ -24,7 +22,6 @@ class TestArtifactStore:
Note: The new store uses AsyncSession, so we need to mock
the session factory behavior.
"""
from unittest.mock import MagicMock, AsyncMock, patch
from sqlalchemy.ext.asyncio import AsyncEngine
engine = MagicMock(spec=AsyncEngine)
@@ -452,10 +449,7 @@ class TestArtifactStoreRealSQLite:
async def db_engine(self):
"""Create an in-memory SQLite database for testing."""
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy import text
from langbot.pkg.entity.persistence.base import Base
from langbot.pkg.entity.persistence.artifact import AgentArtifact
from langbot.pkg.entity.persistence.bstorage import BinaryStorage
engine = create_async_engine("sqlite+aiosqlite:///:memory:")
@@ -580,6 +574,56 @@ class TestArtifactStoreRealSQLite:
assert result["has_more"] is True
assert result["length"] == 100
@pytest.mark.asyncio
async def test_file_artifact_range_read_and_public_metadata(self, db_engine, tmp_path):
"""File-backed artifacts read ranges without exposing host paths."""
store = ArtifactStore(db_engine)
content = b"0123456789" * 20
file_path = tmp_path / "large.txt"
file_path.write_bytes(content)
artifact_id = await store.register_file_artifact(
artifact_id="art_file_001",
host_path=str(file_path),
host_root=str(tmp_path),
source="tool",
mime_type="text/plain",
name="large.txt",
conversation_id="conv_001",
run_id="run_001",
metadata={"sandbox_path": "/workspace/large.txt"},
)
metadata = await store.get_metadata(artifact_id)
assert metadata is not None
assert metadata["artifact_id"] == "art_file_001"
assert metadata["metadata"] == {"sandbox_path": "/workspace/large.txt"}
assert str(file_path) not in str(metadata)
result = await store.read_artifact(artifact_id, offset=10, limit=15)
assert result is not None
assert result["offset"] == 10
assert result["length"] == 15
assert result["size_bytes"] == len(content)
assert result["has_more"] is True
assert base64.b64decode(result["content_base64"]) == content[10:25]
@pytest.mark.asyncio
async def test_register_file_artifact_rejects_path_escape(self, db_engine, tmp_path):
"""File-backed artifacts must stay inside their declared host root."""
store = ArtifactStore(db_engine)
root = tmp_path / "root"
root.mkdir()
outside = tmp_path / "outside.txt"
outside.write_text("outside")
with pytest.raises(ValueError, match="escapes"):
await store.register_file_artifact(
artifact_id="art_file_escape",
host_path=str(outside),
host_root=str(root),
)
@pytest.mark.asyncio
async def test_metadata_sdk_validation(self, db_engine):
"""Test that metadata can be validated by SDK ArtifactMetadata."""
@@ -589,6 +589,7 @@ class TestAgentRunProxyActions:
await registry.unregister(run_id)
assert response.code == 0
assert getattr(query, '_agent_run_session')['run_id'] == run_id
app.tool_mgr.execute_func_call.assert_awaited_once_with(
name='test/search',
parameters={'q': 'langbot'},
@@ -3,13 +3,18 @@ from __future__ import annotations
import os
import tempfile
from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock
from unittest.mock import AsyncMock, Mock, patch
import pytest
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
from langbot.pkg.provider.tools.loaders.native import NativeToolLoader
from langbot.pkg.provider.tools.loaders.native import (
_DEFAULT_TOOL_RESULT_MAX_BYTES,
_GLOB_MAX_MATCHES,
_GREP_MAX_MATCHES,
NativeToolLoader,
)
from langbot.pkg.provider.tools.toolmgr import ToolManager
@@ -81,6 +86,23 @@ async def test_tool_manager_routes_native_tool_calls():
assert result == {'backend': 'fake'}
@pytest.mark.asyncio
async def test_tool_manager_get_tool_by_name_resolves_native_and_skill_tools():
manager = ToolManager(SimpleNamespace())
manager.native_tool_loader = StubLoader([make_tool('exec')])
manager.skill_tool_loader = StubLoader([make_tool('activate')])
manager.plugin_tool_loader = StubLoader([make_tool('plugin_tool')])
manager.mcp_tool_loader = StubLoader([make_tool('mcp_tool')])
native_tool = await manager.get_tool_by_name('exec')
skill_tool = await manager.get_tool_by_name('activate')
assert native_tool is not None
assert native_tool.name == 'exec'
assert skill_tool is not None
assert skill_tool.name == 'activate'
@pytest.mark.asyncio
async def test_native_tool_loader_hides_tools_when_box_unavailable():
loader = NativeToolLoader(SimpleNamespace(box_service=SimpleNamespace(available=False)))
@@ -119,6 +141,7 @@ def _make_loader_with_workspace(tmpdir: str) -> tuple[NativeToolLoader, Mock]:
def _make_query() -> Mock:
q = Mock()
q.query_id = 'test-query-1'
q.variables = {}
return q
@@ -133,6 +156,9 @@ async def test_read_file():
assert result['ok'] is True
assert result['content'] == 'hello world'
assert result['truncated'] is False
assert result['start_line'] == 1
assert result['end_line'] == 1
@pytest.mark.asyncio
@@ -159,6 +185,136 @@ async def test_read_directory():
assert result['ok'] is True
assert result['is_directory'] is True
assert 'a.txt' in result['content']
assert result['total'] == 2
assert result['truncated'] is False
@pytest.mark.asyncio
async def test_read_file_supports_line_window():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
content = '\n'.join(f'line-{line_no}' for line_no in range(1, 7))
with open(os.path.join(tmpdir, 'large.txt'), 'w') as f:
f.write(content)
result = await loader.invoke_tool(
'read',
{'path': '/workspace/large.txt', 'offset': 2, 'limit': 3},
_make_query(),
)
assert result['ok'] is True
assert result['content'] == 'line-2\nline-3\nline-4'
assert result['truncated'] is True
assert result['truncated_by'] == 'lines'
assert result['start_line'] == 2
assert result['end_line'] == 4
assert result['next_offset'] == 5
@pytest.mark.asyncio
async def test_read_file_is_bounded_by_bytes():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
with open(os.path.join(tmpdir, 'wide.txt'), 'w') as f:
f.write(('x' * 128) + '\nsecond line')
result = await loader.invoke_tool(
'read',
{'path': '/workspace/wide.txt', 'max_bytes': 32},
_make_query(),
)
assert result['ok'] is True
assert result['truncated'] is True
assert result['truncated_by'] == 'bytes'
assert result['next_offset'] == 1
assert result['content'].startswith('[Line 1 exceeds')
assert len(result['content']) < 200
@pytest.mark.asyncio
async def test_skill_read_uses_host_preview_when_package_root_available():
with tempfile.TemporaryDirectory() as tmpdir:
skill_root = os.path.join(tmpdir, 'skill-demo')
os.makedirs(skill_root)
with open(os.path.join(skill_root, 'large.txt'), 'w') as f:
f.write('first\nsecond\nthird')
box_service = SimpleNamespace(
available=True,
default_workspace=tmpdir,
read_skill_file=AsyncMock(return_value={'content': 'should not be used'}),
)
skill_mgr = SimpleNamespace(skills={'demo': {'name': 'demo', 'package_root': skill_root}})
loader = NativeToolLoader(SimpleNamespace(box_service=box_service, skill_mgr=skill_mgr, logger=Mock()))
result = await loader.invoke_tool(
'read',
{'path': '/workspace/.skills/demo/large.txt', 'limit': 1},
_make_query(),
)
assert result['ok'] is True
assert result['content'] == 'first'
assert result['truncated'] is True
assert result['next_offset'] == 2
box_service.read_skill_file.assert_not_awaited()
@pytest.mark.asyncio
async def test_read_truncated_file_returns_host_artifact_ref_for_agent_run():
with tempfile.TemporaryDirectory() as tmpdir:
engine = object()
logger = Mock()
box_service = SimpleNamespace(available=True, default_workspace=tmpdir)
persistence_mgr = SimpleNamespace(get_db_engine=Mock(return_value=engine))
loader = NativeToolLoader(
SimpleNamespace(box_service=box_service, persistence_mgr=persistence_mgr, logger=logger)
)
with open(os.path.join(tmpdir, 'large.txt'), 'w') as f:
f.write('first\nsecond\nthird')
query = _make_query()
query.bot_uuid = 'bot-001'
query._agent_run_session = {
'run_id': 'run-001',
'runner_id': 'plugin:test/runner/default',
'authorization': {'conversation_id': 'conv-001'},
}
with patch('langbot.pkg.agent.runner.artifact_store.ArtifactStore') as store_cls:
store = store_cls.return_value
store.register_file_artifact = AsyncMock(return_value='artifact-file-001')
result = await loader.invoke_tool(
'read',
{'path': '/workspace/large.txt', 'limit': 1},
query,
)
assert result['ok'] is True
assert result['content'] == 'first'
assert result['preview'] == 'first'
assert result['truncated'] is True
assert result['artifact_refs'] == [
{
'artifact_id': 'artifact-file-001',
'artifact_type': 'file',
'mime_type': 'text/plain',
'name': 'large.txt',
'size_bytes': os.path.getsize(os.path.join(tmpdir, 'large.txt')),
}
]
store_cls.assert_called_once_with(engine)
store.register_file_artifact.assert_awaited_once()
call_kwargs = store.register_file_artifact.await_args.kwargs
assert call_kwargs['host_path'] == os.path.realpath(os.path.join(tmpdir, 'large.txt'))
assert call_kwargs['host_root'] == tmpdir
assert call_kwargs['conversation_id'] == 'conv-001'
assert call_kwargs['run_id'] == 'run-001'
assert call_kwargs['runner_id'] == 'plugin:test/runner/default'
assert call_kwargs['metadata']['sandbox_path'] == '/workspace/large.txt'
@pytest.mark.asyncio
@@ -248,3 +404,119 @@ async def test_path_escape_blocked():
with pytest.raises(ValueError, match='escapes'):
await loader.invoke_tool('read', {'path': '/workspace/../../etc/passwd'}, _make_query())
@pytest.mark.asyncio
async def test_glob_result_is_bounded():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
for index in range(_GLOB_MAX_MATCHES + 5):
with open(os.path.join(tmpdir, f'file-{index:03d}.txt'), 'w') as f:
f.write(str(index))
result = await loader.invoke_tool(
'glob',
{'path': '/workspace', 'pattern': '*.txt'},
_make_query(),
)
assert result['ok'] is True
assert len(result['matches']) == _GLOB_MAX_MATCHES
assert result['total'] == _GLOB_MAX_MATCHES + 5
assert result['truncated'] is True
assert result['truncated_by'] == 'matches'
assert result['preview'].splitlines() == result['matches']
@pytest.mark.asyncio
async def test_grep_result_is_bounded_by_match_count():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
with open(os.path.join(tmpdir, 'hits.txt'), 'w') as f:
for index in range(_GREP_MAX_MATCHES + 5):
f.write(f'needle {index}\n')
result = await loader.invoke_tool(
'grep',
{'path': '/workspace', 'pattern': 'needle', 'include': '*.txt'},
_make_query(),
)
assert result['ok'] is True
assert len(result['matches']) == _GREP_MAX_MATCHES
assert result['total'] == _GREP_MAX_MATCHES
assert result['truncated'] is True
assert result['truncated_by'] == 'matches'
@pytest.mark.asyncio
async def test_grep_truncates_long_matching_line():
with tempfile.TemporaryDirectory() as tmpdir:
loader, _ = _make_loader_with_workspace(tmpdir)
with open(os.path.join(tmpdir, 'wide.txt'), 'w') as f:
f.write('needle ' + ('x' * 600))
result = await loader.invoke_tool(
'grep',
{'path': '/workspace', 'pattern': 'needle', 'include': '*.txt'},
_make_query(),
)
assert result['ok'] is True
assert len(result['matches']) == 1
assert result['matches'][0]['content'].endswith('... [truncated]')
assert result['truncated'] is True
assert result['truncated_by'] == 'line'
@pytest.mark.asyncio
async def test_exec_result_adds_preview_and_truncated_flag():
with tempfile.TemporaryDirectory() as tmpdir:
box_service = SimpleNamespace(
available=True,
default_workspace=tmpdir,
execute_tool=AsyncMock(
return_value={
'ok': True,
'stdout': 'stdout text',
'stderr': 'stderr text',
'stdout_truncated': True,
'stderr_truncated': False,
}
),
)
loader = NativeToolLoader(SimpleNamespace(box_service=box_service, logger=Mock()))
result = await loader.invoke_tool('exec', {'command': 'echo ok'}, _make_query())
assert result['ok'] is True
assert result['truncated'] is True
assert result['preview'] == 'stdout:\nstdout text\n\nstderr:\nstderr text'
box_service.execute_tool.assert_awaited_once()
@pytest.mark.asyncio
async def test_exec_result_caps_untrusted_large_output():
with tempfile.TemporaryDirectory() as tmpdir:
box_service = SimpleNamespace(
available=True,
default_workspace=tmpdir,
execute_tool=AsyncMock(
return_value={
'ok': True,
'stdout': 'x' * (_DEFAULT_TOOL_RESULT_MAX_BYTES + 128),
'stderr': '',
'stdout_truncated': False,
'stderr_truncated': False,
}
),
)
loader = NativeToolLoader(SimpleNamespace(box_service=box_service, logger=Mock()))
result = await loader.invoke_tool('exec', {'command': 'echo ok'}, _make_query())
assert result['ok'] is True
assert len(result['stdout'].encode('utf-8')) <= _DEFAULT_TOOL_RESULT_MAX_BYTES
assert result['stdout_truncated'] is True
assert result['truncated'] is True
assert result['preview'] == result['stdout']