chore(agent-runner): merge split tool runtime base

# Conflicts: # src/langbot/pkg/box/workspace.py # src/langbot/pkg/provider/tools/loaders/mcp_stdio.py # src/langbot/pkg/provider/tools/loaders/native.py # src/langbot/pkg/provider/tools/loaders/skill.py # tests/unit_tests/box/test_workspace.py # tests/unit_tests/provider/test_mcp_box_integration.py
2026-06-27 07:54:19 +00:00 · 2026-06-14 21:22:05 +08:00
parent 0a71747eec 9fa3251f3d
commit 4205858957
28 changed files with 548 additions and 53 deletions
@@ -115,6 +115,15 @@ class TestExtractUsage:
        assert result['prompt_tokens'] == 0
        assert result['completion_tokens'] == 0

+    def test_extract_usage_without_provider_usage(self):
+        """Missing provider usage is not treated as authoritative zero usage."""
+        requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
+
+        response = Mock()
+        response.usage = None
+
+        assert requester._extract_usage(response) is None
+

 class TestNormalizeUsage:
    """Test _normalize_usage helper covering real-world usage shapes"""
@@ -131,6 +140,22 @@ class TestNormalizeUsage:
        )
        assert result == {'prompt_tokens': 12, 'completion_tokens': 8, 'total_tokens': 20}

+    def test_preserves_token_details(self):
+        """Provider token details such as cache counters are preserved."""
+        result = litellmchat.LiteLLMRequester._normalize_usage(
+            {
+                'prompt_tokens': 12,
+                'completion_tokens': 8,
+                'total_tokens': 20,
+                'prompt_tokens_details': {'cached_tokens': 7},
+                'completion_tokens_details': {'reasoning_tokens': 3},
+            }
+        )
+
+        assert result['prompt_tokens'] == 12
+        assert result['prompt_tokens_details'] == {'cached_tokens': 7}
+        assert result['completion_tokens_details'] == {'reasoning_tokens': 3}
+
    def test_missing_total_is_derived(self):
        """When total_tokens is absent/zero it is derived from prompt + completion"""
        usage = Mock()
@@ -166,9 +191,7 @@ class TestInvokeLLMStreamUsage:
        if has_choice:
            choice = Mock()
            delta = Mock()
-            delta.model_dump = Mock(
-                return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls}
-            )
+            delta.model_dump = Mock(return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls})
            choice.delta = delta
            choice.finish_reason = finish_reason
            chunk.choices = [choice]
@@ -313,7 +336,8 @@ class TestInvokeLLMStreamUsage:

        with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
            collected = [
-                chunk async for chunk in requester.invoke_llm_stream(
+                chunk
+                async for chunk in requester.invoke_llm_stream(
                    query=query,
                    model=model,
                    messages=messages,
@@ -788,7 +812,9 @@ class TestInvokeRerank:
        with patch('httpx.AsyncClient', return_value=mock_client):
            # arerank must NOT be called on the openai-compatible path
            with patch.object(
-                litellmchat, 'arerank', new_callable=AsyncMock,
+                litellmchat,
+                'arerank',
+                new_callable=AsyncMock,
                side_effect=AssertionError('arerank must not be used for openai-compatible provider'),
            ):
                results = await requester.invoke_rerank(
@@ -1034,11 +1060,28 @@ class TestScanModels:
            },
        )

-        with patch.object(litellmchat.litellm, 'get_max_tokens') as mock_get_max_tokens:
-            mock_get_max_tokens.side_effect = lambda model: 131072 if model == 'moonshot/moonshot-v1-128k' else None
+        with patch.object(litellmchat.litellm, 'get_model_info') as mock_get_model_info:
+            mock_get_model_info.side_effect = (
+                lambda model: {'max_input_tokens': 131072}
+                if model == 'moonshot/moonshot-v1-128k'
+                else {}
+            )

            assert requester._safe_context_length('moonshot-v1-128k') == 131072

+    def test_safe_context_length_uses_litellm_max_input_tokens(self):
+        """LiteLLM max_output_tokens must not be treated as the context window."""
+        requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
+
+        with patch.object(litellmchat.litellm, 'get_model_info') as mock_get_model_info:
+            mock_get_model_info.return_value = {
+                'max_input_tokens': 128000,
+                'max_output_tokens': 16384,
+                'max_tokens': 16384,
+            }
+
+            assert requester._safe_context_length('gpt-4o') == 128000
+
    def test_litellm_bool_helper_tries_moonshot_metadata_alias(self):
        """OpenAI-compatible Moonshot endpoints still use Moonshot metadata for abilities."""
        requester = litellmchat.LiteLLMRequester(
@@ -1051,8 +1094,7 @@ class TestScanModels:

        with patch.object(litellmchat.litellm, 'supports_function_calling') as mock_supports_function_calling:
            mock_supports_function_calling.side_effect = (
-                lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6'
-                and custom_llm_provider is None
+                lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6' and custom_llm_provider is None
            )

            assert requester._supports_function_calling('kimi-k2.6') is True
@@ -1102,7 +1144,7 @@ class TestScanModels:
            },
        )

-        with patch.object(litellmchat.litellm, 'get_max_tokens', side_effect=Exception('not mapped')):
+        with patch.object(litellmchat.litellm, 'get_model_info', side_effect=Exception('not mapped')):
            assert requester._safe_context_length('deepseek-v4-pro') == 1_000_000
            assert requester._safe_context_length('deepseek-v4-flash') == 1_000_000

@@ -193,6 +193,29 @@ class TestSkillPathHelpers:

        assert list(result.keys()) == ['visible']

+    def test_restore_activated_skills_uses_caller_provided_names_and_visibility(self):
+        from langbot.pkg.provider.tools.loaders.skill import (
+            ACTIVATED_SKILLS_KEY,
+            PIPELINE_BOUND_SKILLS_KEY,
+            get_activated_skill_names,
+            restore_activated_skills,
+        )
+
+        ap = _make_ap()
+        ap.skill_mgr = SimpleNamespace(
+            skills={
+                'visible': _make_skill_data(name='visible'),
+                'hidden': _make_skill_data(name='hidden'),
+            }
+        )
+        query = SimpleNamespace(variables={PIPELINE_BOUND_SKILLS_KEY: ['visible']})
+
+        restored = restore_activated_skills(ap, query, ['visible', 'hidden', 'visible', ''])
+
+        assert restored == ['visible']
+        assert list(query.variables[ACTIVATED_SKILLS_KEY].keys()) == ['visible']
+        assert get_activated_skill_names(query) == ['visible']
+
    def test_resolve_virtual_skill_path_allows_visible_skill_reads(self):
        from langbot.pkg.provider.tools.loaders.skill import (
            PIPELINE_BOUND_SKILLS_KEY,
@@ -282,6 +305,7 @@ class TestSkillToolLoader:
        assert result['activated'] is True
        assert result['skill_name'] == 'demo'
        assert result['mount_path'] == '/workspace/.skills/demo'
+        assert result['activated_skill_names'] == ['demo']
        assert 'Step 1' in result['content']
        assert set(query.variables[ACTIVATED_SKILLS_KEY].keys()) == {'demo'}

@@ -62,6 +62,7 @@ class TestBuildHeartbeatPayload:

        assert payload['event_type'] == 'instance_heartbeat'
        assert payload['query_id'] == ''
+        assert 'instance_create_ts' in payload
        assert 'timestamp' in payload
        f = payload['features']
        assert f['database'] == 'postgresql'