diff --git a/pkg/provider/modelmgr/requesters/chatcmpl.py b/pkg/provider/modelmgr/requesters/chatcmpl.py
index 4fcce481..51ea864b 100644
--- a/pkg/provider/modelmgr/requesters/chatcmpl.py
+++ b/pkg/provider/modelmgr/requesters/chatcmpl.py
@@ -189,8 +189,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
                 delta_message.is_final = True
                 delta_message.content = current_content
 
-            if chunk_idx % 64 == 0 or delta_message.is_final:
-                yield delta_message
+            yield delta_message
             # return
 
     async def _closure(
diff --git a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
index 1c19a534..7ac9fa1a 100644
--- a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
+++ b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
@@ -195,5 +195,4 @@ class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
                 delta_message.is_final = True
                 delta_message.content = current_content
 
-            if chunk_idx % 64 == 0 or delta_message.is_final:
-                yield delta_message
+            yield delta_message
diff --git a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
index 97201e47..04987c19 100644
--- a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
+++ b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
@@ -286,8 +286,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):
                 delta_message.is_final = True
                 delta_message.content = current_content
 
-            if chunk_idx % 64 == 0 or delta_message.is_final:
-                yield delta_message
+            yield delta_message
             # return
 
     async def invoke_llm(
diff --git a/pkg/provider/runners/localagent.py b/pkg/provider/runners/localagent.py
index dc8be15f..0d7bdd0a 100644
--- a/pkg/provider/runners/localagent.py
+++ b/pkg/provider/runners/localagent.py
@@ -111,15 +111,17 @@ class LocalAgentRunner(runner.RequestRunner):
         else:
             # 流式输出，需要处理工具调用
             tool_calls_map: dict[str, llm_entities.ToolCall] = {}
+            msg_idx = 0
             async for msg in query.use_llm_model.requester.invoke_llm_stream(
                 query,
                 query.use_llm_model,
                 req_messages,
                 query.use_funcs,
                 extra_args=query.use_llm_model.model_entity.extra_args,
-            ):  
-                assert isinstance(msg, llm_entities.MessageChunk)
-                yield msg
+            ):
+                msg_idx = msg_idx + 1
+                if msg_idx % 8 == 0 or msg.is_final:
+                    yield msg
                 if msg.tool_calls:
                     for tool_call in msg.tool_calls:
                         if tool_call.id not in tool_calls_map:
@@ -180,19 +182,19 @@ class LocalAgentRunner(runner.RequestRunner):
                     extra_args=query.use_llm_model.model_entity.extra_args, 
                 ):  
                     yield msg
-                if msg.tool_calls:
-                    for tool_call in msg.tool_calls:
-                        if tool_call.id not in tool_calls_map:
-                            tool_calls_map[tool_call.id] = llm_entities.ToolCall(
-                                id=tool_call.id,
-                                type=tool_call.type,
-                                function=llm_entities.FunctionCall(
-                                    name=tool_call.function.name if tool_call.function else '', arguments=''
-                                ),
-                            )
-                        if tool_call.function and tool_call.function.arguments:
-                            # 流式处理中，工具调用参数可能分多个chunk返回，需要追加而不是覆盖
-                            tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
+                    if msg.tool_calls:
+                        for tool_call in msg.tool_calls:
+                            if tool_call.id not in tool_calls_map:
+                                tool_calls_map[tool_call.id] = llm_entities.ToolCall(
+                                    id=tool_call.id,
+                                    type=tool_call.type,
+                                    function=llm_entities.FunctionCall(
+                                        name=tool_call.function.name if tool_call.function else '', arguments=''
+                                    ),
+                                )
+                            if tool_call.function and tool_call.function.arguments:
+                                # 流式处理中，工具调用参数可能分多个chunk返回，需要追加而不是覆盖
+                                tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
                 final_msg = llm_entities.Message(
                     role=msg.role,
                     content=msg.all_content,