diff --git a/src/langbot/libs/dingtalk_api/api.py b/src/langbot/libs/dingtalk_api/api.py
index 7ed502d9..49b81372 100644
--- a/src/langbot/libs/dingtalk_api/api.py
+++ b/src/langbot/libs/dingtalk_api/api.py
@@ -268,7 +268,25 @@ class DingTalkClient:
 
                 message_data['Type'] = 'image'
             elif incoming_message.message_type == 'audio':
-                message_data['Audio'] = await self.get_audio_url(incoming_message.to_dict()['content']['downloadCode'])
+                raw_content = incoming_message.to_dict().get('content', {})
+                # 兼容处理：如果 content 仍为 JSON 字符串则进行解析
+                if isinstance(raw_content, str):
+                    try:
+                        raw_content = json.loads(raw_content)
+                    except (json.JSONDecodeError, TypeError):
+                        raw_content = {}
+
+                if self.logger:
+                    await self.logger.info(f'DingTalk audio raw content: {json.dumps(raw_content, ensure_ascii=False)}')
+
+                # 提取钉钉自带的语音转写文字（Powered by Qwen）
+                recognition = raw_content.get('recognition', '')
+                if recognition:
+                    message_data['Content'] = recognition
+
+                download_code = raw_content.get('downloadCode')
+                if download_code:
+                    message_data['Audio'] = await self.get_audio_url(download_code)
 
                 message_data['Type'] = 'audio'
             elif incoming_message.message_type == 'file':
diff --git a/src/langbot/pkg/platform/sources/dingtalk.py b/src/langbot/pkg/platform/sources/dingtalk.py
index bd9d0fa3..c7a8a430 100644
--- a/src/langbot/pkg/platform/sources/dingtalk.py
+++ b/src/langbot/pkg/platform/sources/dingtalk.py
@@ -71,7 +71,8 @@ class DingTalkMessageConverter(abstract_platform_adapter.AbstractMessageConverte
                     yiri_msg_list.append(platform_message.Image(base64=element['Picture']))
         else:
             # 回退到原有简单逻辑
-            if event.content:
+            # 对于音频消息，content 来自 recognition 转写文字，在下方音频处理块中统一处理
+            if event.content and event.type != 'audio':
                 text_content = event.content.replace('@' + bot_name, '')
                 yiri_msg_list.append(platform_message.Plain(text=text_content))
             if event.picture:
@@ -81,7 +82,11 @@ class DingTalkMessageConverter(abstract_platform_adapter.AbstractMessageConverte
         if event.file:
             yiri_msg_list.append(platform_message.File(url=event.file, name=event.name))
         if event.audio:
-            yiri_msg_list.append(platform_message.Voice(base64=event.audio))
+            # 优先使用钉钉自带的语音转写文字（recognition字段）
+            if event.content and event.type == 'audio':
+                yiri_msg_list.append(platform_message.Plain(text=event.content))
+            else:
+                yiri_msg_list.append(platform_message.Voice(base64=event.audio))
 
         chain = platform_message.MessageChain(yiri_msg_list)