fix: split Wecom messages exceeding 2048-byte limit (#1901)

Co-authored-by: Oracle Public Cloud User <opc@arm1.subnet.vcn.oraclevcn.com>
2026-07-19 10:56:06 +00:00 · 2026-01-05 15:04:46 +08:00
parent 45bedbe70e
commit 1cd586634d
2 changed files with 58 additions and 4 deletions
@@ -197,7 +197,7 @@ class WecomClient:
            self.access_token = await self.get_access_token(self.secret)

        url = self.base_url + '/message/send?access_token=' + self.access_token
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(timeout=None) as client:
            params = {
                'touser': user_id,
                'msgtype': 'text',
@@ -15,6 +15,58 @@ import langbot_plugin.api.entities.builtin.platform.events as platform_events
 import langbot_plugin.api.entities.builtin.platform.entities as platform_entities


+def split_string_by_bytes(text, limit=2048, encoding='utf-8'):
+    """
+    Splits a string into a list of strings, where each part is at most 'limit' bytes.
+    
+    Args:
+        text (str): The original string to split.
+        limit (int): The maximum byte size for each split part.
+        encoding (str): The encoding to use (default is 'utf-8').
+        
+    Returns:
+        list: A list of split strings.
+    """
+    # 1. Encode the entire string into bytes
+    bytes_data = text.encode(encoding)
+    total_len = len(bytes_data)
+    
+    parts = []
+    start = 0
+    
+    while start < total_len:
+        # 2. Determine the end index for the current chunk
+        # It shouldn't exceed the total length
+        end = min(start + limit, total_len)
+        
+        # 3. Slice the byte array
+        chunk = bytes_data[start:end]
+        
+        # 4. Attempt to decode the chunk
+        # Use errors='ignore' to drop any partial bytes at the end of the chunk
+        # (e.g., if a 3-byte character was cut after the 2nd byte)
+        part_str = chunk.decode(encoding, errors='ignore')
+        
+        # 5. Calculate the actual byte length of the successfully decoded string
+        # This tells us exactly where the valid character boundary ended
+        part_bytes = part_str.encode(encoding)
+        part_len = len(part_bytes)
+        
+        # Safety check: Prevent infinite loop if limit is too small (e.g., limit=1 for a Chinese char)
+        if part_len == 0 and end < total_len:
+            # Force advance by 1 byte to consume the un-decodable byte or raise error
+            # Here we just treat it as a part to avoid stuck loops, though it might be invalid
+            start += 1 
+            continue
+
+        parts.append(part_str)
+        
+        # 6. Move the start pointer by the actual length consumed
+        start += part_len
+        
+    return parts
+
+
 class WecomMessageConverter(abstract_platform_adapter.AbstractMessageConverter):
    @staticmethod
    async def yiri2target(message_chain: platform_message.MessageChain, bot: WecomClient):
@@ -22,12 +74,14 @@ class WecomMessageConverter(abstract_platform_adapter.AbstractMessageConverter):

        for msg in message_chain:
            if type(msg) is platform_message.Plain:
-                content_list.append(
+                chunks = split_string_by_bytes(msg.text)
+                content_list.extend([
                    {
                        'type': 'text',
-                        'content': msg.text,
+                        'content': chunk,
                    }
-                )
+                    for chunk in chunks
+                ])
            elif type(msg) is platform_message.Image:
                content_list.append(
                    {