Merge pull request #2113 from langbot-app/feat/wecombot-group-msg

feat: add feat for receive files in wecombot
2026-07-18 10:26:07 +00:00 · 2026-04-07 16:54:35 +08:00
parent 83ccb33fd3 25f9330491
commit 66087f83e1
4 changed files with 231 additions and 1 deletions
@@ -595,6 +595,120 @@ async def parse_wecom_bot_message(
    if msg_json.get('aibotid'):
        message_data['aibotid'] = msg_json.get('aibotid', '')

+    # Handle quote (referenced message) - important for group chat file references
+    quote_info = msg_json.get('quote')
+    if quote_info:
+        quote_data: dict[str, Any] = {}
+        quote_type = quote_info.get('msgtype', '')
+        quote_data['msgtype'] = quote_type
+
+        if quote_type == 'text':
+            quote_data['content'] = quote_info.get('text', {}).get('content', '')
+        elif quote_type == 'image':
+            img_info = quote_info.get('image', {})
+            img_url = img_info.get('url', '')
+            img_aeskey = img_info.get('aeskey', '')
+            base64_data = await _safe_download_as_data_uri(img_url, img_aeskey)
+            if base64_data:
+                quote_data['picurl'] = base64_data
+                quote_data['images'] = [base64_data]
+        elif quote_type == 'file':
+            file_info = quote_info.get('file', {}) or {}
+            download_url = file_info.get('url') or file_info.get('fileurl')
+            item_aeskey = file_info.get('aeskey', '')
+            file_data = {
+                'filename': file_info.get('filename') or file_info.get('name'),
+                'filesize': file_info.get('filesize') or file_info.get('size'),
+                'md5sum': file_info.get('md5sum') or file_info.get('md5'),
+                'sdkfileid': file_info.get('sdkfileid') or file_info.get('fileid'),
+                'download_url': download_url,
+                'extra': file_info,
+            }
+            # Same as private chat: append aeskey to download_url for plugin processing
+            if download_url and item_aeskey:
+                file_data['download_url'] = download_url + f'?aeskey={item_aeskey}'
+            quote_data['file'] = file_data
+        elif quote_type == 'voice':
+            voice_info = quote_info.get('voice', {}) or {}
+            download_url = voice_info.get('url')
+            item_aeskey = voice_info.get('aeskey', '')
+            voice_data = {
+                'url': download_url,
+                'md5sum': voice_info.get('md5sum') or voice_info.get('md5'),
+                'filesize': voice_info.get('filesize') or voice_info.get('size'),
+                'sdkfileid': voice_info.get('sdkfileid') or voice_info.get('fileid'),
+            }
+            if voice_info.get('content'):
+                quote_data['content'] = voice_info.get('content')
+            # Same as private chat: append aeskey to url for plugin processing
+            if download_url and item_aeskey:
+                voice_data['url'] = download_url + f'?aeskey={item_aeskey}'
+            quote_data['voice'] = voice_data
+        elif quote_type == 'video':
+            video_info = quote_info.get('video', {}) or {}
+            download_url = video_info.get('url')
+            item_aeskey = video_info.get('aeskey', '')
+            video_data = {
+                'url': download_url,
+                'filesize': video_info.get('filesize') or video_info.get('size'),
+                'sdkfileid': video_info.get('sdkfileid') or video_info.get('fileid'),
+                'md5sum': video_info.get('md5sum') or video_info.get('md5'),
+                'filename': video_info.get('filename') or video_info.get('name'),
+            }
+            # Same as private chat: append aeskey to download_url for plugin processing
+            if download_url and item_aeskey:
+                video_data['download_url'] = download_url + f'?aeskey={item_aeskey}'
+            quote_data['video'] = video_data
+        elif quote_type == 'link':
+            quote_data['link'] = quote_info.get('link', {})
+            link = quote_data['link']
+            title = link.get('title', '')
+            desc = link.get('description') or link.get('digest', '')
+            quote_data['content'] = '\n'.join(filter(None, [title, desc]))
+        elif quote_type == 'mixed':
+            # Handle mixed type in quote (text + images + files etc.)
+            items = quote_info.get('mixed', {}).get('msg_item', [])
+            texts = []
+            images = []
+            files = []
+            for item in items:
+                item_type = item.get('msgtype')
+                if item_type == 'text':
+                    texts.append(item.get('text', {}).get('content', ''))
+                elif item_type == 'image':
+                    img_info = item.get('image', {})
+                    img_url = img_info.get('url')
+                    img_aeskey = img_info.get('aeskey', '')
+                    base64_data = await _safe_download_as_data_uri(img_url, img_aeskey)
+                    if base64_data:
+                        images.append(base64_data)
+                elif item_type == 'file':
+                    file_info = item.get('file', {}) or {}
+                    download_url = file_info.get('url') or file_info.get('fileurl')
+                    item_aeskey = file_info.get('aeskey', '')
+                    file_data = {
+                        'filename': file_info.get('filename') or file_info.get('name'),
+                        'filesize': file_info.get('filesize') or file_info.get('size'),
+                        'md5sum': file_info.get('md5sum') or file_info.get('md5'),
+                        'sdkfileid': file_info.get('sdkfileid') or file_info.get('fileid'),
+                        'download_url': download_url,
+                        'extra': file_info,
+                    }
+                    # Same as private chat: append aeskey to download_url for plugin processing
+                    if download_url and item_aeskey:
+                        file_data['download_url'] = download_url + f'?aeskey={item_aeskey}'
+                    files.append(file_data)
+            if texts:
+                quote_data['content'] = ' '.join(texts)
+            if images:
+                quote_data['images'] = images
+                quote_data['picurl'] = images[0]
+            if files:
+                quote_data['files'] = files
+                quote_data['file'] = files[0]
+
+        message_data['quote'] = quote_data
+
    return message_data


@@ -147,3 +147,10 @@ class WecomBotEvent(dict):
        流式消息 ID
        """
        return self.get('stream_id', '')
+
+    @property
+    def quote(self):
+        """
+        引用消息信息（群聊中用户引用其他消息时返回）
+        """
+        return self.get('quote', {})
@@ -160,7 +160,6 @@ class PreProcessor(stage.PipelineStage):
                elif me.url:
                    content_list.append(provider_message.ContentElement.from_file_url(me.url, 'voice'))
            elif isinstance(me, platform_message.File):
-                # if me.url is not None:
                content_list.append(provider_message.ContentElement.from_file_url(me.url, me.name))
            elif isinstance(me, platform_message.Quote) and quote_msg:
                for msg in me.origin:
@@ -172,6 +171,15 @@ class PreProcessor(stage.PipelineStage):
                        ):
                            if msg.base64 is not None:
                                content_list.append(provider_message.ContentElement.from_image_base64(msg.base64))
+                    elif isinstance(msg, platform_message.File):
+                        content_list.append(provider_message.ContentElement.from_file_url(msg.url, msg.name))
+                    elif isinstance(msg, platform_message.Voice):
+                        if msg.base64:
+                            content_list.append(
+                                provider_message.ContentElement.from_file_base64(msg.base64, 'voice.silk')
+                            )
+                        elif msg.url:
+                            content_list.append(provider_message.ContentElement.from_file_url(msg.url, 'voice'))

        query.variables['user_message_text'] = plain_text

@@ -126,6 +126,107 @@ class WecomBotMessageConverter(abstract_platform_adapter.AbstractMessageConverte
            if summary:
                yiri_msg_list.append(platform_message.Plain(text=summary))

+        # Handle quoted message (引用消息) - important for group chat file references
+        # Extract files/images/voice from quote and add them as top-level components
+        # so that plugins like FileReader can process them the same way as direct messages
+        quote_info = event.quote or {}
+        if quote_info:
+            # Process quote text content - add as Plain for context
+            if quote_info.get('content'):
+                yiri_msg_list.append(platform_message.Plain(text=f'[引用消息] {quote_info.get("content")}'))
+
+            # Process quote images - add as top-level Image components
+            quote_images = quote_info.get('images', [])
+            if not quote_images and quote_info.get('picurl'):
+                quote_images = [quote_info.get('picurl')]
+            for img_data in quote_images:
+                if img_data:
+                    yiri_msg_list.append(platform_message.Image(base64=img_data))
+
+            # Process quote file - add as top-level File component (same as private chat)
+            quote_file = quote_info.get('file') or {}
+            if quote_file:
+                file_url = (
+                    quote_file.get('base64')
+                    or quote_file.get('download_url')
+                    or quote_file.get('url')
+                    or quote_file.get('fileurl')
+                )
+                file_name = quote_file.get('filename') or quote_file.get('name')
+                file_size = quote_file.get('filesize') or quote_file.get('size')
+                if file_url or file_name:
+                    file_kwargs = {}
+                    if file_url:
+                        file_kwargs['url'] = file_url
+                    if file_name:
+                        file_kwargs['name'] = file_name
+                    if file_size is not None:
+                        file_kwargs['size'] = file_size
+                    try:
+                        yiri_msg_list.append(platform_message.File(**file_kwargs))
+                    except Exception:
+                        yiri_msg_list.append(platform_message.Unknown(text='[quoted file unsupported]'))
+
+            # Process quote voice - add as top-level Voice/File component
+            quote_voice = quote_info.get('voice') or {}
+            if quote_voice:
+                voice_payload = quote_voice.get('base64') or quote_voice.get('url')
+                if voice_payload:
+                    if quote_voice.get('base64') and not voice_payload.startswith('data:'):
+                        voice_payload = f'data:audio/mpeg;base64,{quote_voice.get("base64")}'
+                    try:
+                        yiri_msg_list.append(platform_message.Voice(base64=voice_payload))
+                    except Exception:
+                        try:
+                            voice_kwargs = {'url': voice_payload}
+                            voice_name = quote_voice.get('filename') or quote_voice.get('name')
+                            voice_size = quote_voice.get('filesize') or quote_voice.get('size')
+                            if voice_name:
+                                voice_kwargs['name'] = voice_name
+                            if voice_size is not None:
+                                voice_kwargs['size'] = voice_size
+                            yiri_msg_list.append(platform_message.File(**voice_kwargs))
+                        except Exception:
+                            yiri_msg_list.append(platform_message.Unknown(text='[quoted voice unsupported]'))
+
+            # Process quote video - add as top-level File component
+            quote_video = quote_info.get('video') or {}
+            if quote_video:
+                video_payload = (
+                    quote_video.get('base64')
+                    or quote_video.get('url')
+                    or quote_video.get('download_url')
+                    or quote_video.get('fileurl')
+                )
+                if video_payload:
+                    video_kwargs = {'url': video_payload}
+                    video_name = quote_video.get('filename') or quote_video.get('name')
+                    video_size = quote_video.get('filesize') or quote_video.get('size')
+                    if video_name:
+                        video_kwargs['name'] = video_name
+                    if video_size is not None:
+                        video_kwargs['size'] = video_size
+                    try:
+                        yiri_msg_list.append(platform_message.File(**video_kwargs))
+                    except Exception:
+                        yiri_msg_list.append(platform_message.Unknown(text='[quoted video unsupported]'))
+
+            # Process quote link - add as Plain text
+            quote_link = quote_info.get('link') or {}
+            if quote_link:
+                link_summary = '\n'.join(
+                    filter(
+                        None,
+                        [
+                            quote_link.get('title', ''),
+                            quote_link.get('description') or quote_link.get('digest', ''),
+                            quote_link.get('url', ''),
+                        ],
+                    )
+                )
+                if link_summary:
+                    yiri_msg_list.append(platform_message.Plain(text=f'[引用链接] {link_summary}'))
+
        has_content_element = any(
            not isinstance(element, (platform_message.Source, platform_message.At)) for element in yiri_msg_list
        )