From 3d5b70cc5d5b64d2dcb6ea34f2fadf8b8c741a3d Mon Sep 17 00:00:00 2001
From: Junyan Chin <rockchinq@gmail.com>
Date: Fri, 19 Jun 2026 18:07:25 +0800
Subject: [PATCH] fix(modelmgr): keep id-less streamed tool calls (Ollama)
 (#2262)

Ollama's OpenAI-compatible streaming endpoint emits a tool-call delta
carrying an `index` and a `function` payload but never an OpenAI-style
`id`. `_normalize_stream_tool_calls` dropped any tool call without an
`id`, so a tool-only turn yielded neither content nor a tool call: the
stream "completed" with 0 chars, the tool never ran, and the chat
appeared stuck. Models on standard OpenAI APIs (e.g. SiliconFlow) were
unaffected because they always send a `call_...` id.

Synthesize a stable per-index id (`call_<index>`) when the provider
omits one but a function name is present. Providers that do send ids
keep theirs, and parallel id-less calls keep distinct ids.

Adds regression tests for the single and multi id-less tool-call cases.

Fixes #2261
---
 .../modelmgr/requesters/litellmchat.py        |  11 ++
 tests/unit_tests/provider/test_litellmchat.py | 111 ++++++++++++++++++
 2 files changed, 122 insertions(+)
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
index d58dd2c5f..a6c09b7e7 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
@@ -392,6 +392,17 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
             elif not isinstance(arguments, str):
                 arguments = str(arguments)
 
+            # Some OpenAI-compatible providers (notably Ollama's
+            # /v1/chat/completions) stream a tool-call delta with an `index` and
+            # a `function` payload but never emit an OpenAI-style `id`. Without
+            # an id the call used to be dropped here, so the whole tool call
+            # silently vanished: a tool-only turn then yielded no content and no
+            # tool call, the stream "completed" with 0 chars, and the chat
+            # appeared stuck. Synthesize a stable per-index id so named-but-idless
+            # tool calls survive. Providers that do send ids keep theirs.
+            if not state['id'] and state['name']:
+                state['id'] = f'call_{index}'
+
             if not state['id'] or not state['name']:
                 continue
 
diff --git a/tests/unit_tests/provider/test_litellmchat.py b/tests/unit_tests/provider/test_litellmchat.py
index 91d00b19f..f7a448ab6 100644
--- a/tests/unit_tests/provider/test_litellmchat.py
+++ b/tests/unit_tests/provider/test_litellmchat.py
@@ -352,6 +352,117 @@ class TestInvokeLLMStreamUsage:
         assert tool_chunks[1].tool_calls[0].function.arguments == '{"text":'
         assert tool_chunks[2].tool_calls[0].function.arguments == '"plugin-tool-ok"}'
 
+    @pytest.mark.asyncio
+    async def test_stream_tool_call_without_id_is_not_dropped(self):
+        """Regression for #2261.
+
+        Ollama's OpenAI-compatible streaming endpoint emits a tool-call delta
+        carrying an ``index`` and a ``function`` payload but never an
+        OpenAI-style ``id``. The requester used to drop any id-less tool call,
+        so a tool-only turn yielded nothing, the stream "completed" with 0
+        chars, and the chat got stuck. A stable per-index id must be
+        synthesized so the tool call survives.
+        """
+        import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+        import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+        mock_ap = Mock()
+        mock_ap.tool_mgr = Mock()
+        mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(
+            return_value=[{'type': 'function', 'function': {'name': 'zotero_search_items'}}]
+        )
+        requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={'custom_llm_provider': 'openai'})
+        model = MockRuntimeModel('gpt-oss:20b', 'ollama')
+
+        # Ollama delivers the whole tool call in a single delta, with no id.
+        chunks = [
+            self._make_chunk(
+                tool_calls=[
+                    {
+                        'index': 0,
+                        'function': {'name': 'zotero_search_items', 'arguments': '{"query":"hello"}'},
+                    }
+                ]
+            ),
+            self._make_chunk(finish_reason='tool_calls'),
+        ]
+
+        async def _aiter(*args, **kwargs):
+            for c in chunks:
+                yield c
+
+        query = Mock(spec=pipeline_query.Query)
+        query.variables = {}
+        messages = [provider_message.Message(role='user', content='hello?')]
+        funcs = [Mock()]
+
+        with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
+            collected = [
+                chunk
+                async for chunk in requester.invoke_llm_stream(
+                    query=query,
+                    model=model,
+                    messages=messages,
+                    funcs=funcs,
+                )
+            ]
+
+        tool_chunks = [chunk for chunk in collected if chunk.tool_calls]
+        assert len(tool_chunks) == 1, 'id-less Ollama tool call must not be dropped'
+        tc = tool_chunks[0].tool_calls[0]
+        assert tc.id == 'call_0'
+        assert tc.function.name == 'zotero_search_items'
+        assert tc.function.arguments == '{"query":"hello"}'
+
+    @pytest.mark.asyncio
+    async def test_stream_multiple_tool_calls_without_id_get_distinct_ids(self):
+        """Two parallel id-less tool calls must keep distinct synthesized ids."""
+        import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+        import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+        mock_ap = Mock()
+        mock_ap.tool_mgr = Mock()
+        mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(
+            return_value=[{'type': 'function', 'function': {'name': 'zotero_search_items'}}]
+        )
+        requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={'custom_llm_provider': 'openai'})
+        model = MockRuntimeModel('gpt-oss:20b', 'ollama')
+
+        chunks = [
+            self._make_chunk(
+                tool_calls=[
+                    {'index': 0, 'function': {'name': 'zotero_search_items', 'arguments': '{"q":"a"}'}},
+                    {'index': 1, 'function': {'name': 'zotero_get_notes', 'arguments': '{"q":"b"}'}},
+                ]
+            ),
+            self._make_chunk(finish_reason='tool_calls'),
+        ]
+
+        async def _aiter(*args, **kwargs):
+            for c in chunks:
+                yield c
+
+        query = Mock(spec=pipeline_query.Query)
+        query.variables = {}
+        messages = [provider_message.Message(role='user', content='hello?')]
+        funcs = [Mock()]
+
+        with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
+            collected = [
+                chunk
+                async for chunk in requester.invoke_llm_stream(
+                    query=query,
+                    model=model,
+                    messages=messages,
+                    funcs=funcs,
+                )
+            ]
+
+        tool_chunks = [chunk for chunk in collected if chunk.tool_calls]
+        assert len(tool_chunks) == 1
+        ids = {tc.id for tc in tool_chunks[0].tool_calls}
+        assert ids == {'call_0', 'call_1'}
+
 
 class TestProcessThinkingContent:
     """Test _process_thinking_content method"""