feat:support dify message streaming output (#1437)

* fix:lark adapter listeners init problem

* feat:support dify streaming mode

* feat:remove some log

* fix(bot form): field desc missing

* fix: not compatible with chatflow

---------

Co-authored-by: wangzejie <wangzejie@meicai.cn>
Co-authored-by: Junyan Qin <rockchinq@gmail.com>
This commit is contained in:
zejiewang
2025-05-18 12:03:01 +08:00
committed by Junyan Qin
parent d60af2b451
commit ba4b5255a2
5 changed files with 180 additions and 5 deletions

View File

@@ -120,8 +120,10 @@ class RuntimeBot:
if isinstance(e, asyncio.CancelledError): if isinstance(e, asyncio.CancelledError):
self.task_context.set_current_action('Exited.') self.task_context.set_current_action('Exited.')
return return
traceback_str = traceback.format_exc()
self.task_context.set_current_action('Exited with error.') self.task_context.set_current_action('Exited with error.')
await self.logger.error(f'平台适配器运行出错:\n{e}\n{traceback.format_exc()}') await self.logger.error(f'平台适配器运行出错:\n{e}\n{traceback_str}')
self.task_wrapper = self.ap.task_mgr.create_task( self.task_wrapper = self.ap.task_mgr.create_task(
exception_wrapper(), exception_wrapper(),

View File

@@ -9,6 +9,7 @@ import re
import base64 import base64
import uuid import uuid
import json import json
import time
import datetime import datetime
import hashlib import hashlib
from Crypto.Cipher import AES from Crypto.Cipher import AES
@@ -320,6 +321,10 @@ class LarkEventConverter(adapter.EventConverter):
) )
CARD_ID_CACHE_SIZE = 500
CARD_ID_CACHE_MAX_LIFETIME = 20 * 60 # 20分钟
class LarkAdapter(adapter.MessagePlatformAdapter): class LarkAdapter(adapter.MessagePlatformAdapter):
bot: lark_oapi.ws.Client bot: lark_oapi.ws.Client
api_client: lark_oapi.Client api_client: lark_oapi.Client
@@ -338,6 +343,8 @@ class LarkAdapter(adapter.MessagePlatformAdapter):
config: dict config: dict
quart_app: quart.Quart quart_app: quart.Quart
ap: app.Application ap: app.Application
message_id_to_card_id: typing.Dict[str, typing.Tuple[str, int]]
def __init__(self, config: dict, ap: app.Application, logger: EventLogger): def __init__(self, config: dict, ap: app.Application, logger: EventLogger):
self.config = config self.config = config
@@ -345,6 +352,7 @@ class LarkAdapter(adapter.MessagePlatformAdapter):
self.logger = logger self.logger = logger
self.quart_app = quart.Quart(__name__) self.quart_app = quart.Quart(__name__)
self.listeners = {} self.listeners = {}
self.message_id_to_card_id = {}
@self.quart_app.route('/lark/callback', methods=['POST']) @self.quart_app.route('/lark/callback', methods=['POST'])
async def lark_callback(): async def lark_callback():
@@ -390,6 +398,19 @@ class LarkAdapter(adapter.MessagePlatformAdapter):
return {'code': 500, 'message': 'error'} return {'code': 500, 'message': 'error'}
async def on_message(event: lark_oapi.im.v1.P2ImMessageReceiveV1): async def on_message(event: lark_oapi.im.v1.P2ImMessageReceiveV1):
if self.config['enable-card-reply'] and event.event.message.message_id not in self.message_id_to_card_id:
self.ap.logger.debug('卡片回复模式开启')
# 开启卡片回复模式. 这里可以实现飞书一发消息,马上创建卡片进行回复"思考中..."
reply_message_id = await self.create_message_card(event.event.message.message_id)
self.message_id_to_card_id[event.event.message.message_id] = (reply_message_id, time.time())
if len(self.message_id_to_card_id) > CARD_ID_CACHE_SIZE:
self.message_id_to_card_id = {
k: v
for k, v in self.message_id_to_card_id.items()
if v[1] > time.time() - CARD_ID_CACHE_MAX_LIFETIME
}
lb_event = await self.event_converter.target2yiri(event, self.api_client) lb_event = await self.event_converter.target2yiri(event, self.api_client)
await self.listeners[type(lb_event)](lb_event, self) await self.listeners[type(lb_event)](lb_event, self)
@@ -409,11 +430,93 @@ class LarkAdapter(adapter.MessagePlatformAdapter):
async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain): async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain):
pass pass
async def create_message_card(self, message_id: str) -> str:
"""
创建卡片消息。
使用卡片消息是因为普通消息更新次数有限制,而大模型流式返回结果可能很多而超过限制,而飞书卡片没有这个限制
"""
# TODO 目前只支持卡片模板方式且卡片变量一定是content未来这块要做成可配置
# 发消息马上就会回复显示初始化的content信息即思考中
content = {
'type': 'template',
'data': {'template_id': self.config['card_template_id'], 'template_variable': {'content': 'Thinking...'}},
}
request: ReplyMessageRequest = (
ReplyMessageRequest.builder()
.message_id(message_id)
.request_body(
ReplyMessageRequestBody.builder().content(json.dumps(content)).msg_type('interactive').build()
)
.build()
)
# 发起请求
response: ReplyMessageResponse = await self.api_client.im.v1.message.areply(request)
# 处理失败返回
if not response.success():
raise Exception(
f'client.im.v1.message.reply failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}'
)
return response.data.message_id
async def reply_message( async def reply_message(
self, self,
message_source: platform_events.MessageEvent, message_source: platform_events.MessageEvent,
message: platform_message.MessageChain, message: platform_message.MessageChain,
quote_origin: bool = False, quote_origin: bool = False,
):
if self.config['enable-card-reply']:
await self.reply_card_message(message_source, message, quote_origin)
else:
await self.reply_normal_message(message_source, message, quote_origin)
async def reply_card_message(
self,
message_source: platform_events.MessageEvent,
message: platform_message.MessageChain,
quote_origin: bool = False,
):
"""
回复消息变成更新卡片消息
"""
lark_message = await self.message_converter.yiri2target(message, self.api_client)
text_message = ''
for ele in lark_message[0]:
if ele['tag'] == 'text':
text_message += ele['text']
elif ele['tag'] == 'md':
text_message += ele['text']
content = {
'type': 'template',
'data': {'template_id': self.config['card_template_id'], 'template_variable': {'content': text_message}},
}
request: PatchMessageRequest = (
PatchMessageRequest.builder()
.message_id(self.message_id_to_card_id[message_source.message_chain.message_id][0])
.request_body(PatchMessageRequestBody.builder().content(json.dumps(content)).build())
.build()
)
# 发起请求
response: PatchMessageResponse = self.api_client.im.v1.message.patch(request)
# 处理失败返回
if not response.success():
raise Exception(
f'client.im.v1.message.patch failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}'
)
return
async def reply_normal_message(
self,
message_source: platform_events.MessageEvent,
message: platform_message.MessageChain,
quote_origin: bool = False,
): ):
# 不再需要了因为message_id已经被包含到message_chain中 # 不再需要了因为message_id已经被包含到message_chain中
# lark_event = await self.event_converter.yiri2target(message_source) # lark_event = await self.event_converter.yiri2target(message_source)
@@ -492,4 +595,9 @@ class LarkAdapter(adapter.MessagePlatformAdapter):
) )
async def kill(self) -> bool: async def kill(self) -> bool:
# 需要断开连接,不然旧的连接会继续运行,导致飞书消息来时会随机选择一个连接
# 断开时lark.ws.Client的_receive_message_loop会打印error日志: receive message loop exit。然后进行重连
# 所以要设置_auto_reconnect=False,让其不重连。
self.bot._auto_reconnect = False
await self.bot._disconnect()
return False return False

View File

@@ -65,6 +65,23 @@ spec:
type: string type: string
required: true required: true
default: "" default: ""
- name: enable-card-reply
label:
en_US: Enable Card Reply Mode
zh_Hans: 启用飞书卡片回复模式
description:
en_US: If enabled, the bot will use the card of lark reply mode
zh_Hans: 如果启用,将使用飞书卡片方式来回复内容
type: boolean
required: true
default: false
- name: card_template_id
label:
en_US: card template id
zh_Hans: 卡片模板ID
type: string
required: true
default: "填写你的卡片template_id"
execution: execution:
python: python:
path: ./lark.py path: ./lark.py

View File

@@ -108,7 +108,13 @@ class DifyServiceAPIRunner(runner.RequestRunner):
mode = 'basic' # 标记是基础编排还是工作流编排 mode = 'basic' # 标记是基础编排还是工作流编排
basic_mode_pending_chunk = '' stream_output_pending_chunk = ''
batch_pending_max_size = self.pipeline_config['ai']['dify-service-api'].get(
'output-batch-size', 0
) # 积累一定量的消息更新消息一次
batch_pending_index = 0
inputs = {} inputs = {}
@@ -126,6 +132,13 @@ class DifyServiceAPIRunner(runner.RequestRunner):
): ):
self.ap.logger.debug('dify-chat-chunk: ' + str(chunk)) self.ap.logger.debug('dify-chat-chunk: ' + str(chunk))
# 查询异常情况
if chunk['event'] == 'error':
yield llm_entities.Message(
role='assistant',
content=f"查询异常: [{chunk['code']}]. {chunk['message']}.\n请重试,如果还报错,请用 <font color='red'>**!reset**</font> 命令重置对话再尝试。",
)
if chunk['event'] == 'workflow_started': if chunk['event'] == 'workflow_started':
mode = 'workflow' mode = 'workflow'
@@ -136,15 +149,35 @@ class DifyServiceAPIRunner(runner.RequestRunner):
role='assistant', role='assistant',
content=self._try_convert_thinking(chunk['data']['outputs']['answer']), content=self._try_convert_thinking(chunk['data']['outputs']['answer']),
) )
elif chunk['event'] == 'message':
stream_output_pending_chunk += chunk['answer']
if self.pipeline_config['ai']['dify-service-api'].get('enable-streaming', False):
# 消息数超过量就输出从而达到streaming的效果
batch_pending_index += 1
if batch_pending_index >= batch_pending_max_size:
yield llm_entities.Message(
role='assistant',
content=self._try_convert_thinking(stream_output_pending_chunk),
)
batch_pending_index = 0
elif mode == 'basic': elif mode == 'basic':
if chunk['event'] == 'message': if chunk['event'] == 'message':
basic_mode_pending_chunk += chunk['answer'] stream_output_pending_chunk += chunk['answer']
if self.pipeline_config['ai']['dify-service-api'].get('enable-streaming', False):
# 消息数超过量就输出从而达到streaming的效果
batch_pending_index += 1
if batch_pending_index >= batch_pending_max_size:
yield llm_entities.Message(
role='assistant',
content=self._try_convert_thinking(stream_output_pending_chunk),
)
batch_pending_index = 0
elif chunk['event'] == 'message_end': elif chunk['event'] == 'message_end':
yield llm_entities.Message( yield llm_entities.Message(
role='assistant', role='assistant',
content=self._try_convert_thinking(basic_mode_pending_chunk), content=self._try_convert_thinking(stream_output_pending_chunk),
) )
basic_mode_pending_chunk = '' stream_output_pending_chunk = ''
if chunk is None: if chunk is None:
raise errors.DifyAPIError('Dify API 没有返回任何响应请检查网络连接和API配置') raise errors.DifyAPIError('Dify API 没有返回任何响应请检查网络连接和API配置')

View File

@@ -138,6 +138,21 @@ stages:
label: label:
en_US: Remove en_US: Remove
zh_Hans: 移除 zh_Hans: 移除
- name: enable-streaming
label:
en_US: enable streaming mode
zh_Hans: 开启流式输出
type: boolean
required: true
default: false
- name: output-batch-size
label:
en_US: output batch size
zh_Hans: 输出批次大小(积累多少条消息后一起输出)
type: integer
required: true
default: 10
- name: dashscope-app-api - name: dashscope-app-api
label: label:
en_US: Aliyun Dashscope App API en_US: Aliyun Dashscope App API