Merge branch 'rc/new-plugin' into refactor/new-plugin-system

2026-06-11 00:06:04 +00:00 · 2025-08-24 21:40:02 +08:00
parent f2d5c21712 83ff64698b
commit 64764c412b
232 changed files with 11998 additions and 1440 deletions
--- a/pkg/provider/runners/dashscopeapi.py
+++ b/pkg/provider/runners/dashscopeapi.py
@@ -102,8 +102,14 @@ class DashScopeAPIRunner(runner.RequestRunner):
        plain_text = ''  # 用户输入的纯文本信息
        image_ids = []  # 用户输入的图片ID列表 （暂不支持）

-        plain_text, image_ids = await self._preprocess_user_message(query)
+        think_start = False
+        think_end = False

+        plain_text, image_ids = await self._preprocess_user_message(query)
+        has_thoughts = True  # 获取思考过程
+        remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
+        if remove_think:
+            has_thoughts = False
        # 发送对话请求
        response = dashscope.Application.call(
            api_key=self.api_key,  # 智能体应用的API Key
@@ -112,43 +118,108 @@ class DashScopeAPIRunner(runner.RequestRunner):
            stream=True,  # 流式输出
            incremental_output=True,  # 增量输出，使用流式输出需要开启增量输出
            session_id=query.session.using_conversation.uuid,  # 会话ID用于，多轮对话
+            has_thoughts=has_thoughts,
            # rag_options={                                     # 主要用于文件交互，暂不支持
            #     "session_file_ids": ["FILE_ID1"],             # FILE_ID1 替换为实际的临时文件ID,逗号隔开多个
            # }
        )
+        idx_chunk = 0
+        try:
+            is_stream = await query.adapter.is_stream_output_supported()

-        for chunk in response:
-            if chunk.get('status_code') != 200:
-                raise DashscopeAPIError(
-                    f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
-                )
-            if not chunk:
-                continue
+        except AttributeError:
+            is_stream = False
+        if is_stream:
+            for chunk in response:
+                if chunk.get('status_code') != 200:
+                    raise DashscopeAPIError(
+                        f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
+                    )
+                if not chunk:
+                    continue
+                idx_chunk += 1
+                # 获取流式传输的output
+                stream_output = chunk.get('output', {})
+                stream_think = stream_output.get('thoughts', [])
+                if stream_think[0].get('thought'):
+                    if not think_start:
+                        think_start = True
+                        pending_content += f'<think>\n{stream_think[0].get("thought")}'
+                    else:
+                        # 继续输出 reasoning_content
+                        pending_content += stream_think[0].get('thought')
+                elif stream_think[0].get('thought') == '' and not think_end:
+                    think_end = True
+                    pending_content += '\n</think>\n'
+                if stream_output.get('text') is not None:
+                    pending_content += stream_output.get('text')
+                # 是否是流式最后一个chunk
+                is_final = False if stream_output.get('finish_reason', False) == 'null' else True

-            # 获取流式传输的output
-            stream_output = chunk.get('output', {})
-            if stream_output.get('text') is not None:
-                pending_content += stream_output.get('text')
+                # 获取模型传出的参考资料列表
+                references_dict_list = stream_output.get('doc_references', [])

-        # 保存当前会话的session_id用于下次对话的语境
-        query.session.using_conversation.uuid = stream_output.get('session_id')
+                # 从模型传出的参考资料信息中提取用于替换的字典
+                if references_dict_list is not None:
+                    for doc in references_dict_list:
+                        if doc.get('index_id') is not None:
+                            references_dict[doc.get('index_id')] = doc.get('doc_name')

-        # 获取模型传出的参考资料列表
-        references_dict_list = stream_output.get('doc_references', [])
+                    # 将参考资料替换到文本中
+                    pending_content = self._replace_references(pending_content, references_dict)

-        # 从模型传出的参考资料信息中提取用于替换的字典
-        if references_dict_list is not None:
-            for doc in references_dict_list:
-                if doc.get('index_id') is not None:
-                    references_dict[doc.get('index_id')] = doc.get('doc_name')
+                if idx_chunk % 8 == 0 or is_final:
+                    yield provider_message.MessageChunk(
+                        role='assistant',
+                        content=pending_content,
+                        is_final=is_final,
+                    )
+            # 保存当前会话的session_id用于下次对话的语境
+            query.session.using_conversation.uuid = stream_output.get('session_id')
+        else:
+            for chunk in response:
+                if chunk.get('status_code') != 200:
+                    raise DashscopeAPIError(
+                        f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
+                    )
+                if not chunk:
+                    continue
+                idx_chunk += 1
+                # 获取流式传输的output
+                stream_output = chunk.get('output', {})
+                stream_think = stream_output.get('thoughts', [])
+                if stream_think[0].get('thought'):
+                    if not think_start:
+                        think_start = True
+                        pending_content += f'<think>\n{stream_think[0].get("thought")}'
+                    else:
+                        # 继续输出 reasoning_content
+                        pending_content += stream_think[0].get('thought')
+                elif stream_think[0].get('thought') == '' and not think_end:
+                    think_end = True
+                    pending_content += '\n</think>\n'
+                if stream_output.get('text') is not None:
+                    pending_content += stream_output.get('text')

-            # 将参考资料替换到文本中
-            pending_content = self._replace_references(pending_content, references_dict)
+            # 保存当前会话的session_id用于下次对话的语境
+            query.session.using_conversation.uuid = stream_output.get('session_id')

-        yield provider_message.Message(
-            role='assistant',
-            content=pending_content,
-        )
+            # 获取模型传出的参考资料列表
+            references_dict_list = stream_output.get('doc_references', [])
+
+            # 从模型传出的参考资料信息中提取用于替换的字典
+            if references_dict_list is not None:
+                for doc in references_dict_list:
+                    if doc.get('index_id') is not None:
+                        references_dict[doc.get('index_id')] = doc.get('doc_name')
+
+                # 将参考资料替换到文本中
+                pending_content = self._replace_references(pending_content, references_dict)
+
+            yield provider_message.Message(
+                role='assistant',
+                content=pending_content,
+            )

    async def _workflow_messages(
        self, query: pipeline_query.Query
@@ -176,52 +247,108 @@ class DashScopeAPIRunner(runner.RequestRunner):
            incremental_output=True,  # 增量输出，使用流式输出需要开启增量输出
            session_id=query.session.using_conversation.uuid,  # 会话ID用于，多轮对话
            biz_params=biz_params,  # 工作流应用的自定义输入参数传递
+            flow_stream_mode='message_format',  # 消息模式，输出/结束节点的流式结果
            # rag_options={                                     # 主要用于文件交互，暂不支持
            #     "session_file_ids": ["FILE_ID1"],             # FILE_ID1 替换为实际的临时文件ID,逗号隔开多个
            # }
        )

        # 处理API返回的流式输出
-        for chunk in response:
-            if chunk.get('status_code') != 200:
-                raise DashscopeAPIError(
-                    f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
-                )
-            if not chunk:
-                continue
+        try:
+            is_stream = await query.adapter.is_stream_output_supported()

-            # 获取流式传输的output
-            stream_output = chunk.get('output', {})
-            if stream_output.get('text') is not None:
-                pending_content += stream_output.get('text')
+        except AttributeError:
+            is_stream = False
+        idx_chunk = 0
+        if is_stream:
+            for chunk in response:
+                if chunk.get('status_code') != 200:
+                    raise DashscopeAPIError(
+                        f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
+                    )
+                if not chunk:
+                    continue
+                idx_chunk += 1
+                # 获取流式传输的output
+                stream_output = chunk.get('output', {})
+                if stream_output.get('workflow_message') is not None:
+                    pending_content += stream_output.get('workflow_message').get('message').get('content')
+                # if stream_output.get('text') is not None:
+                #     pending_content += stream_output.get('text')

-        # 保存当前会话的session_id用于下次对话的语境
-        query.session.using_conversation.uuid = stream_output.get('session_id')
+                is_final = False if stream_output.get('finish_reason', False) == 'null' else True

-        # 获取模型传出的参考资料列表
-        references_dict_list = stream_output.get('doc_references', [])
+                # 获取模型传出的参考资料列表
+                references_dict_list = stream_output.get('doc_references', [])

-        # 从模型传出的参考资料信息中提取用于替换的字典
-        if references_dict_list is not None:
-            for doc in references_dict_list:
-                if doc.get('index_id') is not None:
-                    references_dict[doc.get('index_id')] = doc.get('doc_name')
+                # 从模型传出的参考资料信息中提取用于替换的字典
+                if references_dict_list is not None:
+                    for doc in references_dict_list:
+                        if doc.get('index_id') is not None:
+                            references_dict[doc.get('index_id')] = doc.get('doc_name')

-            # 将参考资料替换到文本中
-            pending_content = self._replace_references(pending_content, references_dict)
+                    # 将参考资料替换到文本中
+                    pending_content = self._replace_references(pending_content, references_dict)
+                if idx_chunk % 8 == 0 or is_final:
+                    yield provider_message.MessageChunk(
+                        role='assistant',
+                        content=pending_content,
+                        is_final=is_final,
+                    )

-        yield provider_message.Message(
-            role='assistant',
-            content=pending_content,
-        )
+            # 保存当前会话的session_id用于下次对话的语境
+            query.session.using_conversation.uuid = stream_output.get('session_id')
+
+        else:
+            for chunk in response:
+                if chunk.get('status_code') != 200:
+                    raise DashscopeAPIError(
+                        f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
+                    )
+                if not chunk:
+                    continue
+
+                # 获取流式传输的output
+                stream_output = chunk.get('output', {})
+                if stream_output.get('text') is not None:
+                    pending_content += stream_output.get('text')
+
+                is_final = False if stream_output.get('finish_reason', False) == 'null' else True
+
+            # 保存当前会话的session_id用于下次对话的语境
+            query.session.using_conversation.uuid = stream_output.get('session_id')
+
+            # 获取模型传出的参考资料列表
+            references_dict_list = stream_output.get('doc_references', [])
+
+            # 从模型传出的参考资料信息中提取用于替换的字典
+            if references_dict_list is not None:
+                for doc in references_dict_list:
+                    if doc.get('index_id') is not None:
+                        references_dict[doc.get('index_id')] = doc.get('doc_name')
+
+                # 将参考资料替换到文本中
+                pending_content = self._replace_references(pending_content, references_dict)
+
+            yield provider_message.Message(
+                role='assistant',
+                content=pending_content,
+            )

    async def run(self, query: pipeline_query.Query) -> typing.AsyncGenerator[provider_message.Message, None]:
        """运行"""
+        msg_seq = 0
        if self.app_type == 'agent':
            async for msg in self._agent_messages(query):
+                if isinstance(msg, provider_message.MessageChunk):
+                    msg_seq += 1
+                    msg.msg_sequence = msg_seq
                yield msg
        elif self.app_type == 'workflow':
            async for msg in self._workflow_messages(query):
+                if isinstance(msg, provider_message.MessageChunk):
+                    msg_seq += 1
+                    msg.msg_sequence = msg_seq
                yield msg
        else:
            raise DashscopeAPIError(f'不支持的 Dashscope 应用类型: {self.app_type}')
--- a/pkg/provider/runners/difysvapi.py
+++ b/pkg/provider/runners/difysvapi.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 import typing
 import json
 import uuid
-import re
 import base64


@@ -38,29 +37,38 @@ class DifyServiceAPIRunner(runner.RequestRunner):
            base_url=self.pipeline_config['ai']['dify-service-api']['base-url'],
        )

-    def _try_convert_thinking(self, resp_text: str) -> str:
-        """尝试转换 Dify 的思考提示"""
-        if not resp_text.startswith(
-            '<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> <summary> Thinking... </summary>'
-        ):
-            return resp_text
+    def _process_thinking_content(
+        self,
+        content: str,
+    ) -> tuple[str, str]:
+        """处理思维链内容

-        if self.pipeline_config['ai']['dify-service-api']['thinking-convert'] == 'original':
-            return resp_text
+        Args:
+            content: 原始内容
+        Returns:
+            (处理后的内容, 提取的思维链内容)
+        """
+        remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
+        thinking_content = ''
+        # 从 content 中提取 <think> 标签内容
+        if content and '<think>' in content and '</think>' in content:
+            import re

-        if self.pipeline_config['ai']['dify-service-api']['thinking-convert'] == 'remove':
-            return re.sub(
-                r'<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> <summary> Thinking... </summary>.*?</details>',
-                '',
-                resp_text,
-                flags=re.DOTALL,
-            )
+            think_pattern = r'<think>(.*?)</think>'
+            think_matches = re.findall(think_pattern, content, re.DOTALL)
+            if think_matches:
+                thinking_content = '\n'.join(think_matches)
+                # 移除 content 中的 <think> 标签
+                content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()

-        if self.pipeline_config['ai']['dify-service-api']['thinking-convert'] == 'plain':
-            pattern = r'<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> <summary> Thinking... </summary>(.*?)</details>'
-            thinking_text = re.search(pattern, resp_text, flags=re.DOTALL)
-            content_text = re.sub(pattern, '', resp_text, flags=re.DOTALL)
-            return f'<think>{thinking_text.group(1)}</think>\n{content_text}'
+        # 3. 根据 remove_think 参数决定是否保留思维链
+        if remove_think:
+            return content, ''
+        else:
+            # 如果有思维链内容，将其以 <think> 格式添加到 content 开头
+            if thinking_content:
+                content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
+            return content, thinking_content

    async def _preprocess_user_message(self, query: pipeline_query.Query) -> tuple[str, list[str]]:
        """预处理用户消息，提取纯文本，并将图片上传到 Dify 服务
@@ -134,17 +142,20 @@ class DifyServiceAPIRunner(runner.RequestRunner):
            if mode == 'workflow':
                if chunk['event'] == 'node_finished':
                    if chunk['data']['node_type'] == 'answer':
+                        content, _ = self._process_thinking_content(chunk['data']['outputs']['answer'])
+
                        yield provider_message.Message(
                            role='assistant',
-                            content=self._try_convert_thinking(chunk['data']['outputs']['answer']),
+                            content=content,
                        )
            elif mode == 'basic':
                if chunk['event'] == 'message':
                    basic_mode_pending_chunk += chunk['answer']
                elif chunk['event'] == 'message_end':
+                    content, _ = self._process_thinking_content(basic_mode_pending_chunk)
                    yield provider_message.Message(
                        role='assistant',
-                        content=self._try_convert_thinking(basic_mode_pending_chunk),
+                        content=content,
                    )
                    basic_mode_pending_chunk = ''

@@ -195,14 +206,15 @@ class DifyServiceAPIRunner(runner.RequestRunner):
            if chunk['event'] in ignored_events:
                continue

-            if chunk['event'] == 'agent_message':
+            if chunk['event'] == 'agent_message' or chunk['event'] == 'message':
                pending_agent_message += chunk['answer']
            else:
                if pending_agent_message.strip() != '':
                    pending_agent_message = pending_agent_message.replace('</details>Action:', '</details>')
+                    content, _ = self._process_thinking_content(pending_agent_message)
                    yield provider_message.Message(
                        role='assistant',
-                        content=self._try_convert_thinking(pending_agent_message),
+                        content=content,
                    )
                pending_agent_message = ''

@@ -312,26 +324,352 @@ class DifyServiceAPIRunner(runner.RequestRunner):
            elif chunk['event'] == 'workflow_finished':
                if chunk['data']['error']:
                    raise errors.DifyAPIError(chunk['data']['error'])
+                content, _ = self._process_thinking_content(chunk['data']['outputs']['summary'])

                msg = provider_message.Message(
                    role='assistant',
-                    content=chunk['data']['outputs']['summary'],
+                    content=content,
                )

                yield msg

+    async def _chat_messages_chunk(
+        self, query: pipeline_query.Query
+    ) -> typing.AsyncGenerator[provider_message.MessageChunk, None]:
+        """调用聊天助手"""
+        cov_id = query.session.using_conversation.uuid or ''
+        query.variables['conversation_id'] = cov_id
+
+        plain_text, image_ids = await self._preprocess_user_message(query)
+
+        files = [
+            {
+                'type': 'image',
+                'transfer_method': 'local_file',
+                'upload_file_id': image_id,
+            }
+            for image_id in image_ids
+        ]
+
+        basic_mode_pending_chunk = ''
+
+        inputs = {}
+
+        inputs.update(query.variables)
+        message_idx = 0
+
+        chunk = None  # 初始化chunk变量，防止在没有响应时引用错误
+
+        is_final = False
+        think_start = False
+        think_end = False
+
+        remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
+
+        async for chunk in self.dify_client.chat_messages(
+            inputs=inputs,
+            query=plain_text,
+            user=f'{query.session.launcher_type.value}_{query.session.launcher_id}',
+            conversation_id=cov_id,
+            files=files,
+            timeout=120,
+        ):
+            self.ap.logger.debug('dify-chat-chunk: ' + str(chunk))
+
+            # if chunk['event'] == 'workflow_started':
+            #     mode = 'workflow'
+            # if mode == 'workflow':
+            # elif mode == 'basic':
+            # 因为都只是返回的 message也没有工具调用什么的，暂时不分类
+            if chunk['event'] == 'message':
+                message_idx += 1
+                if remove_think:
+                    if '<think>' in chunk['answer'] and not think_start:
+                        think_start = True
+                        continue
+                    if '</think>' in chunk['answer'] and not think_end:
+                        import re
+
+                        content = re.sub(r'^\n</think>', '', chunk['answer'])
+                        basic_mode_pending_chunk += content
+                        think_end = True
+                    elif think_end:
+                        basic_mode_pending_chunk += chunk['answer']
+                    if think_start:
+                        continue
+
+                else:
+                    basic_mode_pending_chunk += chunk['answer']
+
+            if chunk['event'] == 'message_end':
+                is_final = True
+
+            if is_final or message_idx % 8 == 0:
+                # content, _ = self._process_thinking_content(basic_mode_pending_chunk)
+                yield provider_message.MessageChunk(
+                    role='assistant',
+                    content=basic_mode_pending_chunk,
+                    is_final=is_final,
+                )
+
+        if chunk is None:
+            raise errors.DifyAPIError('Dify API 没有返回任何响应，请检查网络连接和API配置')
+
+        query.session.using_conversation.uuid = chunk['conversation_id']
+
+    async def _agent_chat_messages_chunk(
+        self, query: pipeline_query.Query
+    ) -> typing.AsyncGenerator[provider_message.MessageChunk, None]:
+        """调用聊天助手"""
+        cov_id = query.session.using_conversation.uuid or ''
+        query.variables['conversation_id'] = cov_id
+
+        plain_text, image_ids = await self._preprocess_user_message(query)
+
+        files = [
+            {
+                'type': 'image',
+                'transfer_method': 'local_file',
+                'upload_file_id': image_id,
+            }
+            for image_id in image_ids
+        ]
+
+        ignored_events = []
+
+        inputs = {}
+
+        inputs.update(query.variables)
+
+        pending_agent_message = ''
+
+        chunk = None  # 初始化chunk变量，防止在没有响应时引用错误
+        message_idx = 0
+        is_final = False
+        think_start = False
+        think_end = False
+
+        remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
+
+        async for chunk in self.dify_client.chat_messages(
+            inputs=inputs,
+            query=plain_text,
+            user=f'{query.session.launcher_type.value}_{query.session.launcher_id}',
+            response_mode='streaming',
+            conversation_id=cov_id,
+            files=files,
+            timeout=120,
+        ):
+            self.ap.logger.debug('dify-agent-chunk: ' + str(chunk))
+
+            if chunk['event'] in ignored_events:
+                continue
+
+            if chunk['event'] == 'agent_message':
+                message_idx += 1
+                if remove_think:
+                    if '<think>' in chunk['answer'] and not think_start:
+                        think_start = True
+                        continue
+                    if '</think>' in chunk['answer'] and not think_end:
+                        import re
+
+                        content = re.sub(r'^\n</think>', '', chunk['answer'])
+                        pending_agent_message += content
+                        think_end = True
+                    elif think_end or not think_start:
+                        pending_agent_message += chunk['answer']
+                    if think_start:
+                        continue
+
+                else:
+                    pending_agent_message += chunk['answer']
+            elif chunk['event'] == 'message_end':
+                is_final = True
+            else:
+                if chunk['event'] == 'agent_thought':
+                    if chunk['tool'] != '' and chunk['observation'] != '':  # 工具调用结果，跳过
+                        continue
+                    message_idx += 1
+                    if chunk['tool']:
+                        msg = provider_message.MessageChunk(
+                            role='assistant',
+                            tool_calls=[
+                                provider_message.ToolCall(
+                                    id=chunk['id'],
+                                    type='function',
+                                    function=provider_message.FunctionCall(
+                                        name=chunk['tool'],
+                                        arguments=json.dumps({}),
+                                    ),
+                                )
+                            ],
+                        )
+                        yield msg
+                if chunk['event'] == 'message_file':
+                    message_idx += 1
+                    if chunk['type'] == 'image' and chunk['belongs_to'] == 'assistant':
+                        base_url = self.dify_client.base_url
+
+                        if base_url.endswith('/v1'):
+                            base_url = base_url[:-3]
+
+                        image_url = base_url + chunk['url']
+
+                        yield provider_message.MessageChunk(
+                            role='assistant',
+                            content=[provider_message.ContentElement.from_image_url(image_url)],
+                            is_final=is_final,
+                        )
+
+                if chunk['event'] == 'error':
+                    raise errors.DifyAPIError('dify 服务错误: ' + chunk['message'])
+            if message_idx % 8 == 0 or is_final:
+                yield provider_message.MessageChunk(
+                    role='assistant',
+                    content=pending_agent_message,
+                    is_final=is_final,
+                )
+
+        if chunk is None:
+            raise errors.DifyAPIError('Dify API 没有返回任何响应，请检查网络连接和API配置')
+
+        query.session.using_conversation.uuid = chunk['conversation_id']
+
+    async def _workflow_messages_chunk(
+        self, query: pipeline_query.Query
+    ) -> typing.AsyncGenerator[provider_message.MessageChunk, None]:
+        """调用工作流"""
+
+        if not query.session.using_conversation.uuid:
+            query.session.using_conversation.uuid = str(uuid.uuid4())
+
+        query.variables['conversation_id'] = query.session.using_conversation.uuid
+
+        plain_text, image_ids = await self._preprocess_user_message(query)
+
+        files = [
+            {
+                'type': 'image',
+                'transfer_method': 'local_file',
+                'upload_file_id': image_id,
+            }
+            for image_id in image_ids
+        ]
+
+        ignored_events = ['workflow_started']
+
+        inputs = {  # these variables are legacy variables, we need to keep them for compatibility
+            'langbot_user_message_text': plain_text,
+            'langbot_session_id': query.variables['session_id'],
+            'langbot_conversation_id': query.variables['conversation_id'],
+            'langbot_msg_create_time': query.variables['msg_create_time'],
+        }
+
+        inputs.update(query.variables)
+        messsage_idx = 0
+        is_final = False
+        think_start = False
+        think_end = False
+        workflow_contents = ''
+
+        remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
+        async for chunk in self.dify_client.workflow_run(
+            inputs=inputs,
+            user=f'{query.session.launcher_type.value}_{query.session.launcher_id}',
+            files=files,
+            timeout=120,
+        ):
+            self.ap.logger.debug('dify-workflow-chunk: ' + str(chunk))
+            if chunk['event'] in ignored_events:
+                continue
+            if chunk['event'] == 'workflow_finished':
+                is_final = True
+                if chunk['data']['error']:
+                    raise errors.DifyAPIError(chunk['data']['error'])
+
+            if chunk['event'] == 'text_chunk':
+                messsage_idx += 1
+                if remove_think:
+                    if '<think>' in chunk['data']['text'] and not think_start:
+                        think_start = True
+                        continue
+                    if '</think>' in chunk['data']['text'] and not think_end:
+                        import re
+
+                        content = re.sub(r'^\n</think>', '', chunk['data']['text'])
+                        workflow_contents += content
+                        think_end = True
+                    elif think_end:
+                        workflow_contents += chunk['data']['text']
+                    if think_start:
+                        continue
+
+                else:
+                    workflow_contents += chunk['data']['text']
+
+            if chunk['event'] == 'node_started':
+                if chunk['data']['node_type'] == 'start' or chunk['data']['node_type'] == 'end':
+                    continue
+                messsage_idx += 1
+                msg = provider_message.MessageChunk(
+                    role='assistant',
+                    content=None,
+                    tool_calls=[
+                        provider_message.ToolCall(
+                            id=chunk['data']['node_id'],
+                            type='function',
+                            function=provider_message.FunctionCall(
+                                name=chunk['data']['title'],
+                                arguments=json.dumps({}),
+                            ),
+                        )
+                    ],
+                )
+
+                yield msg
+
+            if messsage_idx % 8 == 0 or is_final:
+                yield provider_message.MessageChunk(
+                    role='assistant',
+                    content=workflow_contents,
+                    is_final=is_final,
+                )
+
    async def run(self, query: pipeline_query.Query) -> typing.AsyncGenerator[provider_message.Message, None]:
        """运行请求"""
-        if self.pipeline_config['ai']['dify-service-api']['app-type'] == 'chat':
-            async for msg in self._chat_messages(query):
-                yield msg
-        elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'agent':
-            async for msg in self._agent_chat_messages(query):
-                yield msg
-        elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'workflow':
-            async for msg in self._workflow_messages(query):
-                yield msg
+        if await query.adapter.is_stream_output_supported():
+            msg_idx = 0
+            if self.pipeline_config['ai']['dify-service-api']['app-type'] == 'chat':
+                async for msg in self._chat_messages_chunk(query):
+                    msg_idx += 1
+                    msg.msg_sequence = msg_idx
+                    yield msg
+            elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'agent':
+                async for msg in self._agent_chat_messages_chunk(query):
+                    msg_idx += 1
+                    msg.msg_sequence = msg_idx
+                    yield msg
+            elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'workflow':
+                async for msg in self._workflow_messages_chunk(query):
+                    msg_idx += 1
+                    msg.msg_sequence = msg_idx
+                    yield msg
+            else:
+                raise errors.DifyAPIError(
+                    f'不支持的 Dify 应用类型: {self.pipeline_config["ai"]["dify-service-api"]["app-type"]}'
+                )
        else:
-            raise errors.DifyAPIError(
-                f'不支持的 Dify 应用类型: {self.pipeline_config["ai"]["dify-service-api"]["app-type"]}'
-            )
+            if self.pipeline_config['ai']['dify-service-api']['app-type'] == 'chat':
+                async for msg in self._chat_messages(query):
+                    yield msg
+            elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'agent':
+                async for msg in self._agent_chat_messages(query):
+                    yield msg
+            elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'workflow':
+                async for msg in self._workflow_messages(query):
+                    yield msg
+            else:
+                raise errors.DifyAPIError(
+                    f'不支持的 Dify 应用类型: {self.pipeline_config["ai"]["dify-service-api"]["app-type"]}'
+                )
--- a/pkg/provider/runners/localagent.py
+++ b/pkg/provider/runners/localagent.py
@@ -1,39 +1,179 @@
 from __future__ import annotations

 import json
+import copy
 import typing
-
 from .. import runner
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message


+rag_combined_prompt_template = """
+The following are relevant context entries retrieved from the knowledge base. 
+Please use them to answer the user's message. 
+Respond in the same language as the user's input.
+
+<context>
+{rag_context}
+</context>
+
+<user_message>
+{user_message}
+</user_message>
+"""
+
+
@runner.runner_class('local-agent')
 class LocalAgentRunner(runner.RequestRunner):
    """本地Agent请求运行器"""

-    async def run(self, query: pipeline_query.Query) -> typing.AsyncGenerator[provider_message.Message, None]:
+    class ToolCallTracker:
+        """工具调用追踪器"""
+
+        def __init__(self):
+            self.active_calls: dict[str, dict] = {}
+            self.completed_calls: list[provider_message.ToolCall] = []
+
+    async def run(
+        self, query: pipeline_query.Query
+    ) -> typing.AsyncGenerator[provider_message.Message | provider_message.MessageChunk, None]:
        """运行请求"""
        pending_tool_calls = []

-        req_messages = query.prompt.messages.copy() + query.messages.copy() + [query.user_message]
+        kb_uuid = query.pipeline_config['ai']['local-agent']['knowledge-base']

-        use_llm_model = await self.ap.model_mgr.get_model_by_uuid(query.use_llm_model_uuid)
+        if kb_uuid == '__none__':
+            kb_uuid = None

-        # 首次请求
-        msg = await use_llm_model.requester.invoke_llm(
-            query,
-            use_llm_model,
-            req_messages,
-            query.use_funcs,
-            extra_args=use_llm_model.model_entity.extra_args,
-        )
+        user_message = copy.deepcopy(query.user_message)

-        yield msg
+        user_message_text = ''

-        pending_tool_calls = msg.tool_calls
+        if isinstance(user_message.content, str):
+            user_message_text = user_message.content
+        elif isinstance(user_message.content, list):
+            for ce in user_message.content:
+                if ce.type == 'text':
+                    user_message_text += ce.text
+                    break

-        req_messages.append(msg)
+        if kb_uuid and user_message_text:
+            # only support text for now
+            kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid)
+
+            if not kb:
+                self.ap.logger.warning(f'Knowledge base {kb_uuid} not found')
+                raise ValueError(f'Knowledge base {kb_uuid} not found')
+
+            result = await kb.retrieve(user_message_text, kb.knowledge_base_entity.top_k)
+
+            final_user_message_text = ''
+
+            if result:
+                rag_context = '\n\n'.join(
+                    f'[{i + 1}] {entry.metadata.get("text", "")}' for i, entry in enumerate(result)
+                )
+                final_user_message_text = rag_combined_prompt_template.format(
+                    rag_context=rag_context, user_message=user_message_text
+                )
+
+            else:
+                final_user_message_text = user_message_text
+
+            self.ap.logger.debug(f'Final user message text: {final_user_message_text}')
+
+            for ce in user_message.content:
+                if ce.type == 'text':
+                    ce.text = final_user_message_text
+                    break
+
+        req_messages = query.prompt.messages.copy() + query.messages.copy() + [user_message]
+
+        try:
+            is_stream = await query.adapter.is_stream_output_supported()
+        except AttributeError:
+            is_stream = False
+
+        remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
+
+        if not is_stream:
+            # 非流式输出，直接请求
+
+            msg = await query.use_llm_model.requester.invoke_llm(
+                query,
+                query.use_llm_model,
+                req_messages,
+                query.use_funcs,
+                extra_args=query.use_llm_model.model_entity.extra_args,
+                remove_think=remove_think,
+            )
+            yield msg
+            final_msg = msg
+        else:
+            # 流式输出，需要处理工具调用
+            tool_calls_map: dict[str, provider_message.ToolCall] = {}
+            msg_idx = 0
+            accumulated_content = ''  # 从开始累积的所有内容
+            last_role = 'assistant'
+            msg_sequence = 1
+            async for msg in query.use_llm_model.requester.invoke_llm_stream(
+                query,
+                query.use_llm_model,
+                req_messages,
+                query.use_funcs,
+                extra_args=query.use_llm_model.model_entity.extra_args,
+                remove_think=remove_think,
+            ):
+                msg_idx = msg_idx + 1
+
+                # 记录角色
+                if msg.role:
+                    last_role = msg.role
+
+                # 累积内容
+                if msg.content:
+                    accumulated_content += msg.content
+
+                # 处理工具调用
+                if msg.tool_calls:
+                    for tool_call in msg.tool_calls:
+                        if tool_call.id not in tool_calls_map:
+                            tool_calls_map[tool_call.id] = provider_message.ToolCall(
+                                id=tool_call.id,
+                                type=tool_call.type,
+                                function=provider_message.FunctionCall(
+                                    name=tool_call.function.name if tool_call.function else '', arguments=''
+                                ),
+                            )
+                        if tool_call.function and tool_call.function.arguments:
+                            # 流式处理中，工具调用参数可能分多个chunk返回，需要追加而不是覆盖
+                            tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
+                # continue
+                # 每8个chunk或最后一个chunk时，输出所有累积的内容
+                if msg_idx % 8 == 0 or msg.is_final:
+                    msg_sequence += 1
+                    yield provider_message.MessageChunk(
+                        role=last_role,
+                        content=accumulated_content,  # 输出所有累积内容
+                        tool_calls=list(tool_calls_map.values()) if (tool_calls_map and msg.is_final) else None,
+                        is_final=msg.is_final,
+                        msg_sequence=msg_sequence,
+                    )
+
+            # 创建最终消息用于后续处理
+            final_msg = provider_message.MessageChunk(
+                role=last_role,
+                content=accumulated_content,
+                tool_calls=list(tool_calls_map.values()) if tool_calls_map else None,
+                msg_sequence=msg_sequence,
+            )
+
+        pending_tool_calls = final_msg.tool_calls
+        first_content = final_msg.content
+        if isinstance(final_msg, provider_message.MessageChunk):
+            first_end_sequence = final_msg.msg_sequence
+
+        req_messages.append(final_msg)

        # 持续请求，只要还有待处理的工具调用就继续处理调用
        while pending_tool_calls:
@@ -43,13 +183,19 @@ class LocalAgentRunner(runner.RequestRunner):

                    parameters = json.loads(func.arguments)

-                    func_ret = await self.ap.tool_mgr.execute_func_call(func.name, parameters)
-
-                    msg = provider_message.Message(
-                        role='tool',
-                        content=json.dumps(func_ret, ensure_ascii=False),
-                        tool_call_id=tool_call.id,
-                    )
+                    func_ret = await self.ap.tool_mgr.execute_func_call(query, func.name, parameters)
+                    if is_stream:
+                        msg = provider_message.MessageChunk(
+                            role='tool',
+                            content=json.dumps(func_ret, ensure_ascii=False),
+                            tool_call_id=tool_call.id,
+                        )
+                    else:
+                        msg = provider_message.Message(
+                            role='tool',
+                            content=json.dumps(func_ret, ensure_ascii=False),
+                            tool_call_id=tool_call.id,
+                        )

                    yield msg

@@ -62,17 +208,81 @@ class LocalAgentRunner(runner.RequestRunner):

                    req_messages.append(err_msg)

-            # 处理完所有调用，再次请求
-            msg = await use_llm_model.requester.invoke_llm(
-                query,
-                use_llm_model,
-                req_messages,
-                query.use_funcs,
-                extra_args=use_llm_model.model_entity.extra_args,
-            )
+            if is_stream:
+                tool_calls_map = {}
+                msg_idx = 0
+                accumulated_content = ''  # 从开始累积的所有内容
+                last_role = 'assistant'
+                msg_sequence = first_end_sequence

-            yield msg
+                async for msg in query.use_llm_model.requester.invoke_llm_stream(
+                    query,
+                    query.use_llm_model,
+                    req_messages,
+                    query.use_funcs,
+                    extra_args=query.use_llm_model.model_entity.extra_args,
+                    remove_think=remove_think,
+                ):
+                    msg_idx += 1

-            pending_tool_calls = msg.tool_calls
+                    # 记录角色
+                    if msg.role:
+                        last_role = msg.role

-            req_messages.append(msg)
+                    # 第一次请求工具调用时的内容
+                    if msg_idx == 1:
+                        accumulated_content = first_content if first_content is not None else accumulated_content
+
+                    # 累积内容
+                    if msg.content:
+                        accumulated_content += msg.content
+
+                    # 处理工具调用
+                    if msg.tool_calls:
+                        for tool_call in msg.tool_calls:
+                            if tool_call.id not in tool_calls_map:
+                                tool_calls_map[tool_call.id] = provider_message.ToolCall(
+                                    id=tool_call.id,
+                                    type=tool_call.type,
+                                    function=provider_message.FunctionCall(
+                                        name=tool_call.function.name if tool_call.function else '', arguments=''
+                                    ),
+                                )
+                            if tool_call.function and tool_call.function.arguments:
+                                # 流式处理中，工具调用参数可能分多个chunk返回，需要追加而不是覆盖
+                                tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
+
+                    # 每8个chunk或最后一个chunk时，输出所有累积的内容
+                    if msg_idx % 8 == 0 or msg.is_final:
+                        msg_sequence += 1
+                        yield provider_message.MessageChunk(
+                            role=last_role,
+                            content=accumulated_content,  # 输出所有累积内容
+                            tool_calls=list(tool_calls_map.values()) if (tool_calls_map and msg.is_final) else None,
+                            is_final=msg.is_final,
+                            msg_sequence=msg_sequence,
+                        )
+
+                final_msg = provider_message.MessageChunk(
+                    role=last_role,
+                    content=accumulated_content,
+                    tool_calls=list(tool_calls_map.values()) if tool_calls_map else None,
+                    msg_sequence=msg_sequence,
+                )
+            else:
+                # 处理完所有调用，再次请求
+                msg = await query.use_llm_model.requester.invoke_llm(
+                    query,
+                    query.use_llm_model,
+                    req_messages,
+                    query.use_funcs,
+                    extra_args=query.use_llm_model.model_entity.extra_args,
+                    remove_think=remove_think,
+                )
+
+                yield msg
+                final_msg = msg
+
+            pending_tool_calls = final_msg.tool_calls
+
+            req_messages.append(final_msg)