From 3d5b70cc5d5b64d2dcb6ea34f2fadf8b8c741a3d Mon Sep 17 00:00:00 2001 From: Junyan Chin Date: Fri, 19 Jun 2026 18:07:25 +0800 Subject: [PATCH] fix(modelmgr): keep id-less streamed tool calls (Ollama) (#2262) Ollama's OpenAI-compatible streaming endpoint emits a tool-call delta carrying an `index` and a `function` payload but never an OpenAI-style `id`. `_normalize_stream_tool_calls` dropped any tool call without an `id`, so a tool-only turn yielded neither content nor a tool call: the stream "completed" with 0 chars, the tool never ran, and the chat appeared stuck. Models on standard OpenAI APIs (e.g. SiliconFlow) were unaffected because they always send a `call_...` id. Synthesize a stable per-index id (`call_`) when the provider omits one but a function name is present. Providers that do send ids keep theirs, and parallel id-less calls keep distinct ids. Adds regression tests for the single and multi id-less tool-call cases. Fixes #2261 --- .../modelmgr/requesters/litellmchat.py | 11 ++ tests/unit_tests/provider/test_litellmchat.py | 111 ++++++++++++++++++ 2 files changed, 122 insertions(+) diff --git a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py index d58dd2c5f..a6c09b7e7 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py +++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py @@ -392,6 +392,17 @@ class LiteLLMRequester(requester.ProviderAPIRequester): elif not isinstance(arguments, str): arguments = str(arguments) + # Some OpenAI-compatible providers (notably Ollama's + # /v1/chat/completions) stream a tool-call delta with an `index` and + # a `function` payload but never emit an OpenAI-style `id`. Without + # an id the call used to be dropped here, so the whole tool call + # silently vanished: a tool-only turn then yielded no content and no + # tool call, the stream "completed" with 0 chars, and the chat + # appeared stuck. Synthesize a stable per-index id so named-but-idless + # tool calls survive. Providers that do send ids keep theirs. + if not state['id'] and state['name']: + state['id'] = f'call_{index}' + if not state['id'] or not state['name']: continue diff --git a/tests/unit_tests/provider/test_litellmchat.py b/tests/unit_tests/provider/test_litellmchat.py index 91d00b19f..f7a448ab6 100644 --- a/tests/unit_tests/provider/test_litellmchat.py +++ b/tests/unit_tests/provider/test_litellmchat.py @@ -352,6 +352,117 @@ class TestInvokeLLMStreamUsage: assert tool_chunks[1].tool_calls[0].function.arguments == '{"text":' assert tool_chunks[2].tool_calls[0].function.arguments == '"plugin-tool-ok"}' + @pytest.mark.asyncio + async def test_stream_tool_call_without_id_is_not_dropped(self): + """Regression for #2261. + + Ollama's OpenAI-compatible streaming endpoint emits a tool-call delta + carrying an ``index`` and a ``function`` payload but never an + OpenAI-style ``id``. The requester used to drop any id-less tool call, + so a tool-only turn yielded nothing, the stream "completed" with 0 + chars, and the chat got stuck. A stable per-index id must be + synthesized so the tool call survives. + """ + import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query + import langbot_plugin.api.entities.builtin.provider.message as provider_message + + mock_ap = Mock() + mock_ap.tool_mgr = Mock() + mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock( + return_value=[{'type': 'function', 'function': {'name': 'zotero_search_items'}}] + ) + requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={'custom_llm_provider': 'openai'}) + model = MockRuntimeModel('gpt-oss:20b', 'ollama') + + # Ollama delivers the whole tool call in a single delta, with no id. + chunks = [ + self._make_chunk( + tool_calls=[ + { + 'index': 0, + 'function': {'name': 'zotero_search_items', 'arguments': '{"query":"hello"}'}, + } + ] + ), + self._make_chunk(finish_reason='tool_calls'), + ] + + async def _aiter(*args, **kwargs): + for c in chunks: + yield c + + query = Mock(spec=pipeline_query.Query) + query.variables = {} + messages = [provider_message.Message(role='user', content='hello?')] + funcs = [Mock()] + + with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())): + collected = [ + chunk + async for chunk in requester.invoke_llm_stream( + query=query, + model=model, + messages=messages, + funcs=funcs, + ) + ] + + tool_chunks = [chunk for chunk in collected if chunk.tool_calls] + assert len(tool_chunks) == 1, 'id-less Ollama tool call must not be dropped' + tc = tool_chunks[0].tool_calls[0] + assert tc.id == 'call_0' + assert tc.function.name == 'zotero_search_items' + assert tc.function.arguments == '{"query":"hello"}' + + @pytest.mark.asyncio + async def test_stream_multiple_tool_calls_without_id_get_distinct_ids(self): + """Two parallel id-less tool calls must keep distinct synthesized ids.""" + import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query + import langbot_plugin.api.entities.builtin.provider.message as provider_message + + mock_ap = Mock() + mock_ap.tool_mgr = Mock() + mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock( + return_value=[{'type': 'function', 'function': {'name': 'zotero_search_items'}}] + ) + requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={'custom_llm_provider': 'openai'}) + model = MockRuntimeModel('gpt-oss:20b', 'ollama') + + chunks = [ + self._make_chunk( + tool_calls=[ + {'index': 0, 'function': {'name': 'zotero_search_items', 'arguments': '{"q":"a"}'}}, + {'index': 1, 'function': {'name': 'zotero_get_notes', 'arguments': '{"q":"b"}'}}, + ] + ), + self._make_chunk(finish_reason='tool_calls'), + ] + + async def _aiter(*args, **kwargs): + for c in chunks: + yield c + + query = Mock(spec=pipeline_query.Query) + query.variables = {} + messages = [provider_message.Message(role='user', content='hello?')] + funcs = [Mock()] + + with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())): + collected = [ + chunk + async for chunk in requester.invoke_llm_stream( + query=query, + model=model, + messages=messages, + funcs=funcs, + ) + ] + + tool_chunks = [chunk for chunk in collected if chunk.tool_calls] + assert len(tool_chunks) == 1 + ids = {tc.id for tc in tool_chunks[0].tool_calls} + assert ids == {'call_0', 'call_1'} + class TestProcessThinkingContent: """Test _process_thinking_content method"""