chore(agent-runner): merge split tool runtime base

# Conflicts:
#	src/langbot/pkg/box/workspace.py
#	src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
#	src/langbot/pkg/provider/tools/loaders/native.py
#	src/langbot/pkg/provider/tools/loaders/skill.py
#	tests/unit_tests/box/test_workspace.py
#	tests/unit_tests/provider/test_mcp_box_integration.py
This commit is contained in:
huanghuoguoguo
2026-06-14 21:22:05 +08:00
28 changed files with 548 additions and 53 deletions
+52 -10
View File
@@ -115,6 +115,15 @@ class TestExtractUsage:
assert result['prompt_tokens'] == 0
assert result['completion_tokens'] == 0
def test_extract_usage_without_provider_usage(self):
"""Missing provider usage is not treated as authoritative zero usage."""
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
response = Mock()
response.usage = None
assert requester._extract_usage(response) is None
class TestNormalizeUsage:
"""Test _normalize_usage helper covering real-world usage shapes"""
@@ -131,6 +140,22 @@ class TestNormalizeUsage:
)
assert result == {'prompt_tokens': 12, 'completion_tokens': 8, 'total_tokens': 20}
def test_preserves_token_details(self):
"""Provider token details such as cache counters are preserved."""
result = litellmchat.LiteLLMRequester._normalize_usage(
{
'prompt_tokens': 12,
'completion_tokens': 8,
'total_tokens': 20,
'prompt_tokens_details': {'cached_tokens': 7},
'completion_tokens_details': {'reasoning_tokens': 3},
}
)
assert result['prompt_tokens'] == 12
assert result['prompt_tokens_details'] == {'cached_tokens': 7}
assert result['completion_tokens_details'] == {'reasoning_tokens': 3}
def test_missing_total_is_derived(self):
"""When total_tokens is absent/zero it is derived from prompt + completion"""
usage = Mock()
@@ -166,9 +191,7 @@ class TestInvokeLLMStreamUsage:
if has_choice:
choice = Mock()
delta = Mock()
delta.model_dump = Mock(
return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls}
)
delta.model_dump = Mock(return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls})
choice.delta = delta
choice.finish_reason = finish_reason
chunk.choices = [choice]
@@ -313,7 +336,8 @@ class TestInvokeLLMStreamUsage:
with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
collected = [
chunk async for chunk in requester.invoke_llm_stream(
chunk
async for chunk in requester.invoke_llm_stream(
query=query,
model=model,
messages=messages,
@@ -788,7 +812,9 @@ class TestInvokeRerank:
with patch('httpx.AsyncClient', return_value=mock_client):
# arerank must NOT be called on the openai-compatible path
with patch.object(
litellmchat, 'arerank', new_callable=AsyncMock,
litellmchat,
'arerank',
new_callable=AsyncMock,
side_effect=AssertionError('arerank must not be used for openai-compatible provider'),
):
results = await requester.invoke_rerank(
@@ -1034,11 +1060,28 @@ class TestScanModels:
},
)
with patch.object(litellmchat.litellm, 'get_max_tokens') as mock_get_max_tokens:
mock_get_max_tokens.side_effect = lambda model: 131072 if model == 'moonshot/moonshot-v1-128k' else None
with patch.object(litellmchat.litellm, 'get_model_info') as mock_get_model_info:
mock_get_model_info.side_effect = (
lambda model: {'max_input_tokens': 131072}
if model == 'moonshot/moonshot-v1-128k'
else {}
)
assert requester._safe_context_length('moonshot-v1-128k') == 131072
def test_safe_context_length_uses_litellm_max_input_tokens(self):
"""LiteLLM max_output_tokens must not be treated as the context window."""
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
with patch.object(litellmchat.litellm, 'get_model_info') as mock_get_model_info:
mock_get_model_info.return_value = {
'max_input_tokens': 128000,
'max_output_tokens': 16384,
'max_tokens': 16384,
}
assert requester._safe_context_length('gpt-4o') == 128000
def test_litellm_bool_helper_tries_moonshot_metadata_alias(self):
"""OpenAI-compatible Moonshot endpoints still use Moonshot metadata for abilities."""
requester = litellmchat.LiteLLMRequester(
@@ -1051,8 +1094,7 @@ class TestScanModels:
with patch.object(litellmchat.litellm, 'supports_function_calling') as mock_supports_function_calling:
mock_supports_function_calling.side_effect = (
lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6'
and custom_llm_provider is None
lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6' and custom_llm_provider is None
)
assert requester._supports_function_calling('kimi-k2.6') is True
@@ -1102,7 +1144,7 @@ class TestScanModels:
},
)
with patch.object(litellmchat.litellm, 'get_max_tokens', side_effect=Exception('not mapped')):
with patch.object(litellmchat.litellm, 'get_model_info', side_effect=Exception('not mapped')):
assert requester._safe_context_length('deepseek-v4-pro') == 1_000_000
assert requester._safe_context_length('deepseek-v4-flash') == 1_000_000
@@ -193,6 +193,29 @@ class TestSkillPathHelpers:
assert list(result.keys()) == ['visible']
def test_restore_activated_skills_uses_caller_provided_names_and_visibility(self):
from langbot.pkg.provider.tools.loaders.skill import (
ACTIVATED_SKILLS_KEY,
PIPELINE_BOUND_SKILLS_KEY,
get_activated_skill_names,
restore_activated_skills,
)
ap = _make_ap()
ap.skill_mgr = SimpleNamespace(
skills={
'visible': _make_skill_data(name='visible'),
'hidden': _make_skill_data(name='hidden'),
}
)
query = SimpleNamespace(variables={PIPELINE_BOUND_SKILLS_KEY: ['visible']})
restored = restore_activated_skills(ap, query, ['visible', 'hidden', 'visible', ''])
assert restored == ['visible']
assert list(query.variables[ACTIVATED_SKILLS_KEY].keys()) == ['visible']
assert get_activated_skill_names(query) == ['visible']
def test_resolve_virtual_skill_path_allows_visible_skill_reads(self):
from langbot.pkg.provider.tools.loaders.skill import (
PIPELINE_BOUND_SKILLS_KEY,
@@ -282,6 +305,7 @@ class TestSkillToolLoader:
assert result['activated'] is True
assert result['skill_name'] == 'demo'
assert result['mount_path'] == '/workspace/.skills/demo'
assert result['activated_skill_names'] == ['demo']
assert 'Step 1' in result['content']
assert set(query.variables[ACTIVATED_SKILLS_KEY].keys()) == {'demo'}
@@ -62,6 +62,7 @@ class TestBuildHeartbeatPayload:
assert payload['event_type'] == 'instance_heartbeat'
assert payload['query_id'] == ''
assert 'instance_create_ts' in payload
assert 'timestamp' in payload
f = payload['features']
assert f['database'] == 'postgresql'