feat: Expanded WeCom message parsing to capture msgtype, inline voice/video… (#1843)

* Expanded WeCom message parsing to capture msgtype, inline voice/video/file/link data, bounded base64 downloads, and richer mixed-message attachments (src/langbot/libs/wecom_ai_bot_api/api.py); added event accessors for new fields (src/langbot/libs/wecom_ai_bot_api/wecombotevent.py).
Converter now maps richer WeCom payloads (text, images, files, voice, video, links) into platform message chain with fallbacks when nothing parsable is present (src/langbot/pkg/platform/sources/wecombot.py).
Preprocessor now turns voice inputs into file URLs for downstream runners (src/langbot/pkg/pipeline/preproc/preproc.py).
Dify runner uploads all incoming files (images/audio/video/docs) after downloading or decoding data URLs, infers MIME types, and passes typed file descriptors into chat/workflow calls (src/langbot/pkg/provider/runners/difysvapi.py).

* Update src/langbot/pkg/platform/sources/wecombot.py

Fixed the issue of duplicate text in the comments.

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/langbot/libs/wecom_ai_bot_api/api.py

Modify the way you approach challenges.

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update src/langbot/pkg/platform/sources/wecombot.py

Changing the variable names makes more sense.

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* feat: use from_base64 for the voice file converting

---------

Co-authored-by: tabriswang <tabriswang@finecomn.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Junyan Qin <rockchinq@gmail.com>
This commit is contained in:
Yaguang.Wang
2025-12-05 22:33:15 +08:00
committed by GitHub
parent 45e61befac
commit cb7c9af25c
6 changed files with 400 additions and 65 deletions

View File

@@ -63,7 +63,7 @@ dependencies = [
"langchain-text-splitters>=0.0.1",
"chromadb>=0.4.24",
"qdrant-client (>=1.15.1,<2.0.0)",
"langbot-plugin==0.2.0",
"langbot-plugin==0.2.1b1",
"asyncpg>=0.30.0",
"line-bot-sdk>=3.19.0",
"tboxsdk>=0.0.10",

View File

@@ -458,32 +458,174 @@ class WecomBotClient:
async def get_message(self, msg_json):
message_data = {}
msg_type = msg_json.get('msgtype', '')
if msg_type:
message_data['msgtype'] = msg_type
if msg_json.get('chattype', '') == 'single':
message_data['type'] = 'single'
elif msg_json.get('chattype', '') == 'group':
message_data['type'] = 'group'
if msg_json.get('msgtype') == 'text':
max_inline_file_size = 5 * 1024 * 1024 # avoid decoding very large payloads by default
async def _safe_download(url: str):
if not url:
return None
return await self.download_url_to_base64(url, self.EnCodingAESKey)
if msg_type == 'text':
message_data['content'] = msg_json.get('text', {}).get('content')
elif msg_json.get('msgtype') == 'image':
elif msg_type == 'markdown':
message_data['content'] = msg_json.get('markdown', {}).get('content') or msg_json.get('text', {}).get(
'content', ''
)
elif msg_type == 'image':
picurl = msg_json.get('image', {}).get('url', '')
base64 = await self.download_url_to_base64(picurl, self.EnCodingAESKey)
message_data['picurl'] = base64
elif msg_json.get('msgtype') == 'mixed':
base64_data = await _safe_download(picurl)
if base64_data:
message_data['picurl'] = base64_data
message_data['images'] = [base64_data]
elif msg_type == 'voice':
voice_info = msg_json.get('voice', {}) or {}
download_url = voice_info.get('url')
message_data['voice'] = {
'url': download_url,
'md5sum': voice_info.get('md5sum') or voice_info.get('md5'),
'filesize': voice_info.get('filesize') or voice_info.get('size'),
'sdkfileid': voice_info.get('sdkfileid') or voice_info.get('fileid'),
}
# 企业微信智能转写文本(如果已有)直接复用,避免重复转写
if voice_info.get('content'):
message_data['content'] = voice_info.get('content')
if (message_data['voice'].get('filesize') or 0) <= max_inline_file_size:
voice_base64 = await _safe_download(download_url)
if voice_base64:
message_data['voice']['base64'] = voice_base64
elif msg_type == 'video':
video_info = msg_json.get('video', {}) or {}
download_url = video_info.get('url')
video_data = {
'url': download_url,
'filesize': video_info.get('filesize') or video_info.get('size'),
'sdkfileid': video_info.get('sdkfileid') or video_info.get('fileid'),
'md5sum': video_info.get('md5sum') or video_info.get('md5'),
'filename': video_info.get('filename') or video_info.get('name'),
}
if (video_data.get('filesize') or 0) <= max_inline_file_size:
video_base64 = await _safe_download(download_url)
if video_base64:
video_data['base64'] = video_base64
message_data['video'] = video_data
elif msg_type == 'file':
file_info = msg_json.get('file', {}) or {}
download_url = file_info.get('url') or file_info.get('fileurl')
file_data = {
'filename': file_info.get('filename') or file_info.get('name'),
'filesize': file_info.get('filesize') or file_info.get('size'),
'md5sum': file_info.get('md5sum') or file_info.get('md5'),
'sdkfileid': file_info.get('sdkfileid') or file_info.get('fileid'),
'download_url': download_url,
'extra': file_info,
}
if (file_data.get('filesize') or 0) <= max_inline_file_size:
file_base64 = await _safe_download(download_url)
if file_base64:
file_data['base64'] = file_base64
message_data['file'] = file_data
elif msg_type == 'link':
message_data['link'] = msg_json.get('link', {})
if not message_data.get('content'):
title = message_data['link'].get('title', '')
desc = message_data['link'].get('description') or message_data['link'].get('digest', '')
message_data['content'] = '\n'.join(filter(None, [title, desc]))
elif msg_type == 'mixed':
items = msg_json.get('mixed', {}).get('msg_item', [])
texts = []
picurl = None
images = []
files = []
voices = []
videos = []
links = []
for item in items:
if item.get('msgtype') == 'text':
item_type = item.get('msgtype')
if item_type == 'text':
texts.append(item.get('text', {}).get('content', ''))
elif item.get('msgtype') == 'image' and picurl is None:
picurl = item.get('image', {}).get('url')
elif item_type == 'image':
img_url = item.get('image', {}).get('url')
base64_data = await _safe_download(img_url)
if base64_data:
images.append(base64_data)
elif item_type == 'file':
file_info = item.get('file', {}) or {}
download_url = file_info.get('url') or file_info.get('fileurl')
file_data = {
'filename': file_info.get('filename') or file_info.get('name'),
'filesize': file_info.get('filesize') or file_info.get('size'),
'md5sum': file_info.get('md5sum') or file_info.get('md5'),
'sdkfileid': file_info.get('sdkfileid') or file_info.get('fileid'),
'download_url': download_url,
'extra': file_info,
}
if (file_data.get('filesize') or 0) <= max_inline_file_size:
file_base64 = await _safe_download(download_url)
if file_base64:
file_data['base64'] = file_base64
files.append(file_data)
elif item_type == 'voice':
voice_info = item.get('voice', {}) or {}
download_url = voice_info.get('url')
voice_data = {
'url': download_url,
'md5sum': voice_info.get('md5sum') or voice_info.get('md5'),
'filesize': voice_info.get('filesize') or voice_info.get('size'),
'sdkfileid': voice_info.get('sdkfileid') or voice_info.get('fileid'),
}
if voice_info.get('content'):
texts.append(voice_info.get('content'))
if (voice_data.get('filesize') or 0) <= max_inline_file_size:
voice_base64 = await _safe_download(download_url)
if voice_base64:
voice_data['base64'] = voice_base64
voices.append(voice_data)
elif item_type == 'video':
video_info = item.get('video', {}) or {}
download_url = video_info.get('url')
video_data = {
'url': download_url,
'filesize': video_info.get('filesize') or video_info.get('size'),
'sdkfileid': video_info.get('sdkfileid') or video_info.get('fileid'),
'md5sum': video_info.get('md5sum') or video_info.get('md5'),
'filename': video_info.get('filename') or video_info.get('name'),
}
if (video_data.get('filesize') or 0) <= max_inline_file_size:
video_base64 = await _safe_download(download_url)
if video_base64:
video_data['base64'] = video_base64
videos.append(video_data)
elif item_type == 'link':
links.append(item.get('link', {}))
if texts:
message_data['content'] = ''.join(texts) # 拼接所有 text
if picurl:
base64 = await self.download_url_to_base64(picurl, self.EnCodingAESKey)
message_data['picurl'] = base64 # 只保留第一个 image
message_data['content'] = ' '.join(texts) # 拼接所有 text
if images:
message_data['images'] = images
message_data['picurl'] = images[0] # 只保留第一个 image
if files:
message_data['files'] = files
message_data['file'] = files[0]
if voices:
message_data['voices'] = voices
message_data['voice'] = voices[0]
if videos:
message_data['videos'] = videos
message_data['video'] = videos[0]
if links:
message_data['link'] = links[0]
if items:
message_data['attachments'] = items
else:
message_data['raw_msg'] = msg_json
# Extract user information
from_info = msg_json.get('from', {})

View File

@@ -17,6 +17,13 @@ class WecomBotEvent(dict):
"""
return self.get('type', '')
@property
def msgtype(self) -> str:
"""
消息 msgtype
"""
return self.get('msgtype', '')
@property
def userid(self) -> str:
"""
@@ -52,6 +59,55 @@ class WecomBotEvent(dict):
"""
return self.get('picurl', '')
@property
def images(self):
"""
图片列表(兼容 mixed
"""
return self.get('images', [])
@property
def file(self):
"""
文件信息
"""
return self.get('file', {})
@property
def voice(self):
"""
语音信息
"""
return self.get('voice', {})
@property
def video(self):
"""
视频信息
"""
return self.get('video', {})
@property
def link(self):
"""
链接消息信息
"""
return self.get('link', {})
@property
def location(self):
"""
位置信息
"""
return self.get('location', {})
@property
def attachments(self):
"""
原始 mixed 中的附件项
"""
return self.get('attachments', [])
@property
def chatid(self) -> str:
"""

View File

@@ -111,6 +111,12 @@ class PreProcessor(stage.PipelineStage):
):
if me.base64 is not None:
content_list.append(provider_message.ContentElement.from_image_base64(me.base64))
elif isinstance(me, platform_message.Voice):
# 转成文件链接,让下游 runner 上传到目标模型
if me.base64:
content_list.append(provider_message.ContentElement.from_file_base64(me.base64, 'voice.silk'))
elif me.url:
content_list.append(provider_message.ContentElement.from_file_url(me.url, 'voice'))
elif isinstance(me, platform_message.File):
# if me.url is not None:
content_list.append(provider_message.ContentElement.from_file_url(me.url, me.name))

View File

@@ -28,9 +28,102 @@ class WecomBotMessageConverter(abstract_platform_adapter.AbstractMessageConverte
if event.type == 'group':
yiri_msg_list.append(platform_message.At(target=event.ai_bot_id))
yiri_msg_list.append(platform_message.Source(id=event.message_id, time=datetime.datetime.now()))
yiri_msg_list.append(platform_message.Plain(text=event.content))
if event.picurl != '':
yiri_msg_list.append(platform_message.Image(base64=event.picurl))
if event.content:
yiri_msg_list.append(platform_message.Plain(text=event.content))
images = []
if event.images:
images.extend([img for img in event.images if img])
if not images and event.picurl:
images.append(event.picurl)
for image_base64 in images:
if image_base64:
yiri_msg_list.append(platform_message.Image(base64=image_base64))
file_info = event.file or {}
if file_info:
file_url = (
file_info.get('download_url')
or file_info.get('url')
or file_info.get('fileurl')
or file_info.get('path')
)
file_base64 = file_info.get('base64')
file_name = file_info.get('filename') or file_info.get('name')
file_size = file_info.get('filesize') or file_info.get('size')
file_data = file_url or file_base64
if file_data or file_name:
file_kwargs = {}
if file_data:
file_kwargs['url'] = file_data
if file_name:
file_kwargs['name'] = file_name
if file_size is not None:
file_kwargs['size'] = file_size
try:
yiri_msg_list.append(platform_message.File(**file_kwargs))
except Exception:
# 兜底
yiri_msg_list.append(platform_message.Unknown(text='[file message unsupported]'))
voice_info = event.voice or {}
if voice_info:
voice_payload = voice_info.get('base64') or voice_info.get('url')
if voice_payload:
if voice_info.get('base64') and not voice_payload.startswith('data:'):
voice_payload = f"data:audio/mpeg;base64,{voice_info.get('base64')}"
try:
yiri_msg_list.append(platform_message.Voice(base64=voice_payload))
except Exception:
try:
voice_kwargs = {'url': voice_payload}
voice_name = voice_info.get('filename') or voice_info.get('name')
voice_size = voice_info.get('filesize') or voice_info.get('size')
if voice_name:
voice_kwargs['name'] = voice_name
if voice_size is not None:
voice_kwargs['size'] = voice_size
yiri_msg_list.append(platform_message.File(**voice_kwargs))
except Exception:
yiri_msg_list.append(platform_message.Unknown(text='[voice message unsupported]'))
video_info = event.video or {}
if video_info:
video_payload = (
video_info.get('base64')
or video_info.get('url')
or video_info.get('download_url')
or video_info.get('fileurl')
)
if video_payload:
video_kwargs = {'url': video_payload}
video_name = video_info.get('filename') or video_info.get('name')
video_size = video_info.get('filesize') or video_info.get('size')
if video_name:
video_kwargs['name'] = video_name
if video_size is not None:
video_kwargs['size'] = video_size
try:
# 没有专门的视频类型,沿用 File 传递给上层
yiri_msg_list.append(platform_message.File(**video_kwargs))
except Exception:
yiri_msg_list.append(platform_message.Unknown(text='[video message unsupported]'))
if event.msgtype == 'link' and event.link:
link = event.link
summary = '\n'.join(
filter(None, [link.get('title', ''), link.get('description') or link.get('digest', ''), link.get('url', '')])
)
if summary:
yiri_msg_list.append(platform_message.Plain(text=summary))
has_content_element = any(
not isinstance(element, (platform_message.Source, platform_message.At)) for element in yiri_msg_list
)
if not has_content_element:
fallback_type = event.msgtype or 'unknown'
yiri_msg_list.append(platform_message.Unknown(text=f'[unsupported wecom msgtype: {fallback_type}]'))
chain = platform_message.MessageChain(yiri_msg_list)
return chain

View File

@@ -4,6 +4,7 @@ import typing
import json
import uuid
import base64
import mimetypes
from langbot.pkg.provider import runner
@@ -12,6 +13,7 @@ import langbot_plugin.api.entities.builtin.provider.message as provider_message
from langbot.pkg.utils import image
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
from langbot.libs.dify_service_api.v1 import client, errors
import httpx
@runner.runner_class('dify-service-api')
@@ -70,14 +72,43 @@ class DifyServiceAPIRunner(runner.RequestRunner):
content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
return content, thinking_content
async def _preprocess_user_message(self, query: pipeline_query.Query) -> tuple[str, list[str]]:
"""预处理用户消息,提取纯文本,并将图片上传到 Dify 服务
async def _preprocess_user_message(self, query: pipeline_query.Query) -> tuple[str, list[dict]]:
"""预处理用户消息,提取纯文本,并将图片/文件上传到 Dify 服务
Returns:
tuple[str, list[str]]: 纯文本和图片的 Dify 服务图片 ID
tuple[str, list[dict]]: 纯文本和上传后的文件描述(包含 type 与 id
"""
plain_text = ''
file_ids = []
upload_files: list[dict] = []
user_tag = f'{query.session.launcher_type.value}_{query.session.launcher_id}'
async def upload_file_bytes(file_name: str, file_bytes: bytes, content_type: str) -> str:
file_name = file_name or 'file'
content_type = content_type or 'application/octet-stream'
file = (file_name, file_bytes, content_type)
resp = await self.dify_client.upload_file(file, user_tag)
return resp['id']
async def download_file(file_url: str) -> tuple[bytes, str]:
"""Download file from url (supports data url)."""
async with httpx.AsyncClient() as client_session:
resp = await client_session.get(file_url)
resp.raise_for_status()
content_type = (
resp.headers.get('content-type') or mimetypes.guess_type(file_url)[0] or 'application/octet-stream'
)
return resp.content, content_type
def _detect_file_type(content_type: str) -> str:
"""Map MIME to dify file type."""
if content_type and content_type.startswith('image/'):
return 'image'
if content_type and content_type.startswith('audio/'):
return 'audio'
if content_type and content_type.startswith('video/'):
return 'video'
return 'document'
if isinstance(query.user_message.content, list):
for ce in query.user_message.content:
@@ -86,30 +117,36 @@ class DifyServiceAPIRunner(runner.RequestRunner):
elif ce.type == 'image_base64':
image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
file_bytes = base64.b64decode(image_b64)
file = ('img.png', file_bytes, f'image/{image_format}')
file_upload_resp = await self.dify_client.upload_file(
file,
f'{query.session.launcher_type.value}_{query.session.launcher_id}',
)
image_id = file_upload_resp['id']
file_ids.append(image_id)
# elif ce.type == "file_url":
# file_bytes = base64.b64decode(ce.file_url)
# file_upload_resp = await self.dify_client.upload_file(
# file_bytes,
# f'{query.session.launcher_type.value}_{query.session.launcher_id}',
# )
# file_id = file_upload_resp['id']
# file_ids.append(file_id)
image_id = await upload_file_bytes(f'img.{image_format}', file_bytes, f'image/{image_format}')
upload_files.append({'type': 'image', 'id': image_id})
elif ce.type == 'file_url':
file_url = getattr(ce, 'file_url', None)
file_name = getattr(ce, 'file_name', None) or 'file'
try:
file_bytes, content_type = await download_file(file_url)
file_id = await upload_file_bytes(file_name, file_bytes, content_type)
file_type = _detect_file_type(content_type)
upload_files.append({'type': file_type, 'id': file_id})
except Exception as e:
self.ap.logger.warning(f'dify file upload failed: {e}')
elif ce.type == 'file_base64':
file_name = getattr(ce, 'file_name', None) or 'file'
header, b64_data = ce.file_base64.split(',', 1)
content_type = 'application/octet-stream'
if ';' in header:
content_type = header.split(';')[0][5:] or content_type
file_bytes = base64.b64decode(b64_data)
file_id = await upload_file_bytes(file_name, file_bytes, content_type)
file_type = _detect_file_type(content_type)
upload_files.append({'type': file_type, 'id': file_id})
elif isinstance(query.user_message.content, str):
plain_text = query.user_message.content
# plain_text = "When the file content is readable, please read the content of this file. When the file is an image, describe the content of this image." if file_ids and not plain_text else plain_text
# plain_text = "The user message type cannot be parsed." if not file_ids and not plain_text else plain_text
# plain_text = plain_text if plain_text else "When the file content is readable, please read the content of this file. When the file is an image, describe the content of this image."
# print(self.pipeline_config['ai'])
plain_text = plain_text if plain_text else self.pipeline_config['ai']['dify-service-api']['base-prompt']
return plain_text, file_ids
return plain_text, upload_files
async def _chat_messages(
self, query: pipeline_query.Query
@@ -118,14 +155,15 @@ class DifyServiceAPIRunner(runner.RequestRunner):
cov_id = query.session.using_conversation.uuid or ''
query.variables['conversation_id'] = cov_id
plain_text, image_ids = await self._preprocess_user_message(query)
plain_text, upload_files = await self._preprocess_user_message(query)
files = [
{
'type': 'image',
'upload_file_id': image_id,
'type': f['type'],
'transfer_method': 'local_file',
'upload_file_id': f['id'],
}
for image_id in image_ids
for f in upload_files
]
mode = 'basic' # 标记是基础编排还是工作流编排
@@ -183,15 +221,15 @@ class DifyServiceAPIRunner(runner.RequestRunner):
cov_id = query.session.using_conversation.uuid or ''
query.variables['conversation_id'] = cov_id
plain_text, image_ids = await self._preprocess_user_message(query)
plain_text, upload_files = await self._preprocess_user_message(query)
files = [
{
'type': 'image',
'type': f['type'],
'transfer_method': 'local_file',
'upload_file_id': image_id,
'upload_file_id': f['id'],
}
for image_id in image_ids
for f in upload_files
]
ignored_events = []
@@ -280,15 +318,15 @@ class DifyServiceAPIRunner(runner.RequestRunner):
query.variables['conversation_id'] = query.session.using_conversation.uuid
plain_text, image_ids = await self._preprocess_user_message(query)
plain_text, upload_files = await self._preprocess_user_message(query)
files = [
{
'type': 'image',
'type': f['type'],
'transfer_method': 'local_file',
'upload_file_id': image_id,
'upload_file_id': f['id'],
}
for image_id in image_ids
for f in upload_files
]
ignored_events = ['text_chunk', 'workflow_started']
@@ -352,15 +390,15 @@ class DifyServiceAPIRunner(runner.RequestRunner):
cov_id = query.session.using_conversation.uuid or ''
query.variables['conversation_id'] = cov_id
plain_text, image_ids = await self._preprocess_user_message(query)
plain_text, upload_files = await self._preprocess_user_message(query)
files = [
{
'type': 'image',
'type': f['type'],
'transfer_method': 'local_file',
'upload_file_id': image_id,
'upload_file_id': f['id'],
}
for image_id in image_ids
for f in upload_files
]
basic_mode_pending_chunk = ''
@@ -436,15 +474,15 @@ class DifyServiceAPIRunner(runner.RequestRunner):
cov_id = query.session.using_conversation.uuid or ''
query.variables['conversation_id'] = cov_id
plain_text, image_ids = await self._preprocess_user_message(query)
plain_text, upload_files = await self._preprocess_user_message(query)
files = [
{
'type': 'image',
'type': f['type'],
'transfer_method': 'local_file',
'upload_file_id': image_id,
'upload_file_id': f['id'],
}
for image_id in image_ids
for f in upload_files
]
ignored_events = []
@@ -558,15 +596,15 @@ class DifyServiceAPIRunner(runner.RequestRunner):
query.variables['conversation_id'] = query.session.using_conversation.uuid
plain_text, image_ids = await self._preprocess_user_message(query)
plain_text, upload_files = await self._preprocess_user_message(query)
files = [
{
'type': 'image',
'type': f['type'],
'transfer_method': 'local_file',
'upload_file_id': image_id,
'upload_file_id': f['id'],
}
for image_id in image_ids
for f in upload_files
]
ignored_events = ['workflow_started']