mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-02 12:05:54 +00:00
Compare commits
12 Commits
v4.9.3
...
feature/un
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
60579f7b3a | ||
|
|
c4d4c90930 | ||
|
|
1a3ef217d9 | ||
|
|
6e9fcccd7d | ||
|
|
865f6ee81b | ||
|
|
bd5ec59b7c | ||
|
|
9c0cc1003d | ||
|
|
ea07d8ad00 | ||
|
|
3ac3fad4bc | ||
|
|
254a13bba3 | ||
|
|
4355f0fa78 | ||
|
|
031737f05d |
@@ -64,7 +64,7 @@ dependencies = [
|
||||
"chromadb>=1.0.0,<2.0.0",
|
||||
"qdrant-client (>=1.15.1,<2.0.0)",
|
||||
"pyseekdb==1.1.0.post3",
|
||||
"langbot-plugin==0.3.2",
|
||||
"langbot-plugin==0.3.3",
|
||||
"asyncpg>=0.30.0",
|
||||
"line-bot-sdk>=3.19.0",
|
||||
"tboxsdk>=0.0.10",
|
||||
|
||||
@@ -68,6 +68,7 @@ class BotService:
|
||||
'wecomcs',
|
||||
'LINE',
|
||||
'lark',
|
||||
'gewechat',
|
||||
]:
|
||||
webhook_prefix = self.ap.instance_config.data['api'].get('webhook_prefix', 'http://127.0.0.1:5300')
|
||||
extra_webhook_prefix = self.ap.instance_config.data['api'].get('extra_webhook_prefix', '')
|
||||
|
||||
@@ -272,6 +272,9 @@ class PlatformManager:
|
||||
# 如果 adapter 支持 set_bot_uuid 方法,设置 bot_uuid(用于统一 webhook)
|
||||
if hasattr(adapter_inst, 'set_bot_uuid'):
|
||||
adapter_inst.set_bot_uuid(bot_entity.uuid)
|
||||
adapter_inst.config['_webhook_prefix'] = self.ap.instance_config.data['api'].get(
|
||||
'webhook_prefix', 'http://127.0.0.1:5300'
|
||||
)
|
||||
|
||||
runtime_bot = RuntimeBot(ap=self.ap, bot_entity=bot_entity, adapter=adapter_inst, logger=logger)
|
||||
|
||||
|
||||
BIN
src/langbot/pkg/platform/sources/gewechat.png
Normal file
BIN
src/langbot/pkg/platform/sources/gewechat.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 25 KiB |
609
src/langbot/pkg/platform/sources/gewechat.py
Normal file
609
src/langbot/pkg/platform/sources/gewechat.py
Normal file
@@ -0,0 +1,609 @@
|
||||
import gewechat_client
|
||||
|
||||
import typing
|
||||
import asyncio
|
||||
import traceback
|
||||
|
||||
import re
|
||||
import copy
|
||||
import threading
|
||||
|
||||
from langbot.pkg.utils import httpclient
|
||||
|
||||
import langbot_plugin.api.definition.abstract.platform.adapter as abstract_platform_adapter
|
||||
import langbot_plugin.api.entities.builtin.platform.message as platform_message
|
||||
import langbot_plugin.api.entities.builtin.platform.events as platform_events
|
||||
import langbot_plugin.api.entities.builtin.platform.entities as platform_entities
|
||||
from ...utils import image
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Optional, Tuple
|
||||
from functools import partial
|
||||
from ..logger import EventLogger
|
||||
|
||||
|
||||
class GewechatMessageConverter(abstract_platform_adapter.AbstractMessageConverter):
|
||||
def __init__(self, config: dict):
|
||||
self.config = config
|
||||
|
||||
@staticmethod
|
||||
async def yiri2target(message_chain: platform_message.MessageChain) -> list[dict]:
|
||||
content_list = []
|
||||
for component in message_chain:
|
||||
if isinstance(component, platform_message.At):
|
||||
content_list.append({'type': 'at', 'target': component.target})
|
||||
elif isinstance(component, platform_message.Plain):
|
||||
content_list.append({'type': 'text', 'content': component.text})
|
||||
elif isinstance(component, platform_message.Image):
|
||||
if not component.url:
|
||||
pass
|
||||
content_list.append({'type': 'image', 'image': component.url})
|
||||
elif isinstance(component, platform_message.Voice):
|
||||
content_list.append({'type': 'voice', 'url': component.url, 'length': component.length})
|
||||
elif isinstance(component, platform_message.Forward):
|
||||
for node in component.node_list:
|
||||
content_list.extend(await GewechatMessageConverter.yiri2target(node.message_chain))
|
||||
content_list.append({'type': 'image', 'image': component.url})
|
||||
elif isinstance(component, platform_message.WeChatMiniPrograms):
|
||||
content_list.append(
|
||||
{
|
||||
'type': 'WeChatMiniPrograms',
|
||||
'mini_app_id': component.mini_app_id,
|
||||
'display_name': component.display_name,
|
||||
'page_path': component.page_path,
|
||||
'cover_img_url': component.image_url,
|
||||
'title': component.title,
|
||||
'user_name': component.user_name,
|
||||
}
|
||||
)
|
||||
elif isinstance(component, platform_message.WeChatForwardMiniPrograms):
|
||||
content_list.append(
|
||||
{
|
||||
'type': 'WeChatForwardMiniPrograms',
|
||||
'xml_data': component.xml_data,
|
||||
'image_url': component.image_url,
|
||||
}
|
||||
)
|
||||
elif isinstance(component, platform_message.WeChatEmoji):
|
||||
content_list.append(
|
||||
{
|
||||
'type': 'WeChatEmoji',
|
||||
'emoji_md5': component.emoji_md5,
|
||||
'emoji_size': component.emoji_size,
|
||||
}
|
||||
)
|
||||
elif isinstance(component, platform_message.WeChatLink):
|
||||
content_list.append(
|
||||
{
|
||||
'type': 'WeChatLink',
|
||||
'link_title': component.link_title,
|
||||
'link_desc': component.link_desc,
|
||||
'link_thumb_url': component.link_thumb_url,
|
||||
'link_url': component.link_url,
|
||||
}
|
||||
)
|
||||
elif isinstance(component, platform_message.WeChatForwardLink):
|
||||
content_list.append({'type': 'WeChatForwardLink', 'xml_data': component.xml_data})
|
||||
elif isinstance(component, platform_message.WeChatForwardImage):
|
||||
content_list.append({'type': 'WeChatForwardImage', 'xml_data': component.xml_data})
|
||||
elif isinstance(component, platform_message.WeChatForwardFile):
|
||||
content_list.append({'type': 'WeChatForwardFile', 'xml_data': component.xml_data})
|
||||
elif isinstance(component, platform_message.WeChatAppMsg):
|
||||
content_list.append({'type': 'WeChatAppMsg', 'app_msg': component.app_msg})
|
||||
elif isinstance(component, platform_message.WeChatForwardQuote):
|
||||
content_list.append({'type': 'WeChatAppMsg', 'app_msg': component.app_msg})
|
||||
elif isinstance(component, platform_message.Forward):
|
||||
for node in component.node_list:
|
||||
if node.message_chain:
|
||||
content_list.extend(await GewechatMessageConverter.yiri2target(node.message_chain))
|
||||
|
||||
return content_list
|
||||
|
||||
async def target2yiri(self, message: dict, bot_account_id: str) -> platform_message.MessageChain:
|
||||
message_list = []
|
||||
ats_bot = False
|
||||
content = message['Data']['Content']['string']
|
||||
content_no_preifx = content
|
||||
is_group_message = self._is_group_message(message)
|
||||
if is_group_message:
|
||||
ats_bot = self._ats_bot(message, bot_account_id)
|
||||
if '@所有人' in content:
|
||||
message_list.append(platform_message.AtAll())
|
||||
elif ats_bot:
|
||||
message_list.append(platform_message.At(target=bot_account_id))
|
||||
content_no_preifx, _ = self._extract_content_and_sender(content)
|
||||
|
||||
msg_type = message['Data']['MsgType']
|
||||
|
||||
handler_map = {
|
||||
1: self._handler_text,
|
||||
3: self._handler_image,
|
||||
34: self._handler_voice,
|
||||
49: self._handler_compound,
|
||||
}
|
||||
|
||||
handler = handler_map.get(msg_type, self._handler_default)
|
||||
handler_result = await handler(
|
||||
message=message,
|
||||
content_no_preifx=content_no_preifx,
|
||||
)
|
||||
|
||||
if handler_result and len(handler_result) > 0:
|
||||
message_list.extend(handler_result)
|
||||
|
||||
return platform_message.MessageChain(message_list)
|
||||
|
||||
async def _handler_text(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
|
||||
if message and self._is_group_message(message):
|
||||
pattern = r'@\S{1,20}'
|
||||
content_no_preifx = re.sub(pattern, '', content_no_preifx)
|
||||
|
||||
return platform_message.MessageChain([platform_message.Plain(content_no_preifx)])
|
||||
|
||||
async def _handler_image(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
|
||||
try:
|
||||
image_xml = content_no_preifx
|
||||
if not image_xml:
|
||||
return platform_message.MessageChain([platform_message.Unknown('[图片内容为空]')])
|
||||
|
||||
base64_str, image_format = await image.get_gewechat_image_base64(
|
||||
gewechat_url=self.config['gewechat_url'],
|
||||
gewechat_file_url=self.config['gewechat_file_url'],
|
||||
app_id=self.config['app_id'],
|
||||
xml_content=image_xml,
|
||||
token=self.config['token'],
|
||||
image_type=2,
|
||||
)
|
||||
|
||||
elements = [
|
||||
platform_message.Image(base64=f'data:image/{image_format};base64,{base64_str}'),
|
||||
platform_message.WeChatForwardImage(xml_data=image_xml),
|
||||
]
|
||||
return platform_message.MessageChain(elements)
|
||||
except Exception as e:
|
||||
print(f'处理图片失败: {str(e)}')
|
||||
return platform_message.MessageChain([platform_message.Unknown('[图片处理失败]')])
|
||||
|
||||
async def _handler_voice(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
|
||||
message_List = []
|
||||
try:
|
||||
audio_base64 = message['Data']['ImgBuf']['buffer']
|
||||
|
||||
if not audio_base64:
|
||||
message_List.append(platform_message.Unknown(text='[语音内容为空]'))
|
||||
return platform_message.MessageChain(message_List)
|
||||
|
||||
voice_element = platform_message.Voice(base64=f'data:audio/silk;base64,{audio_base64}')
|
||||
message_List.append(voice_element)
|
||||
|
||||
except KeyError as e:
|
||||
print(f'语音数据字段缺失: {str(e)}')
|
||||
message_List.append(platform_message.Unknown(text='[语音数据解析失败]'))
|
||||
except Exception as e:
|
||||
print(f'处理语音消息异常: {str(e)}')
|
||||
message_List.append(platform_message.Unknown(text='[语音处理失败]'))
|
||||
|
||||
return platform_message.MessageChain(message_List)
|
||||
|
||||
async def _handler_compound(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
|
||||
try:
|
||||
xml_data = ET.fromstring(content_no_preifx)
|
||||
appmsg_data = xml_data.find('.//appmsg')
|
||||
if appmsg_data:
|
||||
data_type = appmsg_data.findtext('.//type', '')
|
||||
|
||||
sub_handler_map = {
|
||||
'57': self._handler_compound_quote,
|
||||
'5': self._handler_compound_link,
|
||||
'6': self._handler_compound_file,
|
||||
'33': self._handler_compound_mini_program,
|
||||
'36': self._handler_compound_mini_program,
|
||||
'2000': partial(self._handler_compound_unsupported, text='[转账消息]'),
|
||||
'2001': partial(self._handler_compound_unsupported, text='[红包消息]'),
|
||||
'51': partial(self._handler_compound_unsupported, text='[视频号消息]'),
|
||||
}
|
||||
|
||||
handler = sub_handler_map.get(data_type, self._handler_compound_unsupported)
|
||||
return await handler(
|
||||
message=message,
|
||||
xml_data=xml_data,
|
||||
)
|
||||
else:
|
||||
return platform_message.MessageChain([platform_message.Unknown(text=content_no_preifx)])
|
||||
except Exception as e:
|
||||
print(f'解析复合消息失败: {str(e)}')
|
||||
return platform_message.MessageChain([platform_message.Unknown(text=content_no_preifx)])
|
||||
|
||||
async def _handler_compound_quote(
|
||||
self, message: Optional[dict], xml_data: ET.Element
|
||||
) -> platform_message.MessageChain:
|
||||
message_list = []
|
||||
appmsg_data = xml_data.find('.//appmsg')
|
||||
quote_data = ''
|
||||
user_data = ''
|
||||
sender_id = xml_data.findtext('.//fromusername')
|
||||
if appmsg_data:
|
||||
user_data = appmsg_data.findtext('.//title') or ''
|
||||
quote_data = appmsg_data.find('.//refermsg').findtext('.//content')
|
||||
message_list.append(
|
||||
platform_message.WeChatForwardQuote(app_msg=ET.tostring(appmsg_data, encoding='unicode'))
|
||||
)
|
||||
if quote_data:
|
||||
quote_data_message_list = platform_message.MessageChain()
|
||||
try:
|
||||
if '<msg>' not in quote_data:
|
||||
quote_data_message_list.append(platform_message.Plain(quote_data))
|
||||
else:
|
||||
quote_data_xml = ET.fromstring(quote_data)
|
||||
if quote_data_xml.find('img'):
|
||||
quote_data_message_list.extend(await self._handler_image(None, quote_data))
|
||||
elif quote_data_xml.find('voicemsg'):
|
||||
quote_data_message_list.extend(await self._handler_voice(None, quote_data))
|
||||
elif quote_data_xml.find('videomsg'):
|
||||
quote_data_message_list.extend(await self._handler_default(None, quote_data))
|
||||
else:
|
||||
quote_data_message_list.extend(await self._handler_compound(None, quote_data))
|
||||
except Exception as e:
|
||||
print(f'处理引用消息异常 expcetion:{e}')
|
||||
quote_data_message_list.append(platform_message.Plain(quote_data))
|
||||
message_list.append(
|
||||
platform_message.Quote(
|
||||
sender_id=sender_id,
|
||||
origin=quote_data_message_list,
|
||||
)
|
||||
)
|
||||
if len(user_data) > 0:
|
||||
pattern = r'@\S{1,20}'
|
||||
user_data = re.sub(pattern, '', user_data)
|
||||
message_list.append(platform_message.Plain(user_data))
|
||||
|
||||
return platform_message.MessageChain(message_list)
|
||||
|
||||
async def _handler_compound_file(self, message: dict, xml_data: ET.Element) -> platform_message.MessageChain:
|
||||
xml_data_str = ET.tostring(xml_data, encoding='unicode')
|
||||
return platform_message.MessageChain([platform_message.WeChatForwardFile(xml_data=xml_data_str)])
|
||||
|
||||
async def _handler_compound_link(self, message: dict, xml_data: ET.Element) -> platform_message.MessageChain:
|
||||
message_list = []
|
||||
try:
|
||||
appmsg = xml_data.find('.//appmsg')
|
||||
if appmsg is None:
|
||||
return platform_message.MessageChain()
|
||||
message_list.append(
|
||||
platform_message.WeChatLink(
|
||||
link_title=appmsg.findtext('title', ''),
|
||||
link_desc=appmsg.findtext('des', ''),
|
||||
link_url=appmsg.findtext('url', ''),
|
||||
link_thumb_url=appmsg.findtext('thumburl', ''),
|
||||
)
|
||||
)
|
||||
xml_data_str = ET.tostring(xml_data, encoding='unicode')
|
||||
message_list.append(platform_message.WeChatForwardLink(xml_data=xml_data_str))
|
||||
except Exception as e:
|
||||
print(f'解析链接消息失败: {str(e)}')
|
||||
return platform_message.MessageChain(message_list)
|
||||
|
||||
async def _handler_compound_mini_program(
|
||||
self, message: dict, xml_data: ET.Element
|
||||
) -> platform_message.MessageChain:
|
||||
xml_data_str = ET.tostring(xml_data, encoding='unicode')
|
||||
return platform_message.MessageChain([platform_message.WeChatForwardMiniPrograms(xml_data=xml_data_str)])
|
||||
|
||||
async def _handler_default(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
|
||||
if message:
|
||||
msg_type = message['Data']['MsgType']
|
||||
else:
|
||||
msg_type = ''
|
||||
return platform_message.MessageChain([platform_message.Unknown(text=f'[未知消息类型 msg_type:{msg_type}]')])
|
||||
|
||||
def _handler_compound_unsupported(
|
||||
self, message: dict, xml_data: str, text: Optional[str] = None
|
||||
) -> platform_message.MessageChain:
|
||||
if not text:
|
||||
text = f'[xml_data={xml_data}]'
|
||||
content_list = []
|
||||
content_list.append(platform_message.Unknown(text=f'[处理未支持复合消息类型[msg_type=49]|{text}'))
|
||||
|
||||
return platform_message.MessageChain(content_list)
|
||||
|
||||
def _ats_bot(self, message: dict, bot_account_id: str) -> bool:
|
||||
ats_bot = False
|
||||
try:
|
||||
to_user_name = message['Wxid']
|
||||
raw_content = message['Data']['Content']['string']
|
||||
content_no_prefix, _ = self._extract_content_and_sender(raw_content)
|
||||
push_content = message.get('Data', {}).get('PushContent', '')
|
||||
ats_bot = ats_bot or ('在群聊中@了你' in push_content)
|
||||
msg_source = message.get('Data', {}).get('MsgSource', '') or ''
|
||||
if len(msg_source) > 0:
|
||||
msg_source_data = ET.fromstring(msg_source)
|
||||
at_user_list = msg_source_data.findtext('atuserlist') or ''
|
||||
ats_bot = ats_bot or (to_user_name in at_user_list)
|
||||
if message.get('Data', {}).get('MsgType', 0) == 49:
|
||||
xml_data = ET.fromstring(content_no_prefix)
|
||||
appmsg_data = xml_data.find('.//appmsg')
|
||||
tousername = message['Wxid']
|
||||
if appmsg_data:
|
||||
quote_id = appmsg_data.find('.//refermsg').findtext('.//chatusr')
|
||||
ats_bot = ats_bot or (quote_id == tousername)
|
||||
except Exception as e:
|
||||
print(f'Error in gewechat _ats_bot: {e}')
|
||||
finally:
|
||||
return ats_bot
|
||||
|
||||
def _extract_content_and_sender(self, raw_content: str) -> Tuple[str, Optional[str]]:
|
||||
try:
|
||||
regex = re.compile(r'^[a-zA-Z0-9_\-]{5,20}:')
|
||||
line_split = raw_content.split('\n')
|
||||
if len(line_split) > 0 and regex.match(line_split[0]):
|
||||
raw_content = '\n'.join(line_split[1:])
|
||||
sender_id = line_split[0].strip(':')
|
||||
return raw_content, sender_id
|
||||
except Exception as e:
|
||||
print(f'_extract_content_and_sender got except: {e}')
|
||||
finally:
|
||||
return raw_content, None
|
||||
|
||||
def _is_group_message(self, message: dict) -> bool:
|
||||
from_user_name = message['Data']['FromUserName']['string']
|
||||
return from_user_name.endswith('@chatroom')
|
||||
|
||||
|
||||
class GewechatEventConverter(abstract_platform_adapter.AbstractEventConverter):
|
||||
def __init__(self, config: dict):
|
||||
self.config = config
|
||||
self.message_converter = GewechatMessageConverter(config)
|
||||
|
||||
@staticmethod
|
||||
async def yiri2target(event: platform_events.MessageEvent) -> dict:
|
||||
pass
|
||||
|
||||
async def target2yiri(self, event: dict, bot_account_id: str) -> platform_events.MessageEvent:
|
||||
if event['Wxid'] == event['Data']['FromUserName']['string']:
|
||||
return None
|
||||
if event['Data']['FromUserName']['string'].startswith('gh_') or event['Data']['FromUserName'][
|
||||
'string'
|
||||
].startswith('weixin'):
|
||||
return None
|
||||
message_chain = await self.message_converter.target2yiri(copy.deepcopy(event), bot_account_id)
|
||||
|
||||
if not message_chain:
|
||||
return None
|
||||
|
||||
if '@chatroom' in event['Data']['FromUserName']['string']:
|
||||
sender_wxid = event['Data']['Content']['string'].split(':')[0]
|
||||
|
||||
return platform_events.GroupMessage(
|
||||
sender=platform_entities.GroupMember(
|
||||
id=sender_wxid,
|
||||
member_name=event['Data']['FromUserName']['string'],
|
||||
permission=platform_entities.Permission.Member,
|
||||
group=platform_entities.Group(
|
||||
id=event['Data']['FromUserName']['string'],
|
||||
name=event['Data']['FromUserName']['string'],
|
||||
permission=platform_entities.Permission.Member,
|
||||
),
|
||||
special_title='',
|
||||
),
|
||||
message_chain=message_chain,
|
||||
time=event['Data']['CreateTime'],
|
||||
source_platform_object=event,
|
||||
)
|
||||
else:
|
||||
return platform_events.FriendMessage(
|
||||
sender=platform_entities.Friend(
|
||||
id=event['Data']['FromUserName']['string'],
|
||||
nickname=event['Data']['FromUserName']['string'],
|
||||
remark='',
|
||||
),
|
||||
message_chain=message_chain,
|
||||
time=event['Data']['CreateTime'],
|
||||
source_platform_object=event,
|
||||
)
|
||||
|
||||
|
||||
class GeWeChatAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
bot: gewechat_client.GewechatClient = None
|
||||
bot_uuid: str = None
|
||||
message_converter: GewechatMessageConverter = None
|
||||
event_converter: GewechatEventConverter = None
|
||||
|
||||
listeners: typing.Dict[
|
||||
typing.Type[platform_events.Event],
|
||||
typing.Callable[[platform_events.Event, abstract_platform_adapter.AbstractMessagePlatformAdapter], None],
|
||||
] = {}
|
||||
|
||||
def __init__(self, config: dict, logger: EventLogger):
|
||||
super().__init__(
|
||||
config=config,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
self.message_converter = GewechatMessageConverter(config)
|
||||
self.event_converter = GewechatEventConverter(config)
|
||||
|
||||
def set_bot_uuid(self, bot_uuid: str):
|
||||
self.bot_uuid = bot_uuid
|
||||
|
||||
async def handle_unified_webhook(self, bot_uuid: str, path: str, request):
|
||||
data = await request.json
|
||||
await self.logger.debug(f'Gewechat callback event: {data}')
|
||||
|
||||
if 'data' in data:
|
||||
data['Data'] = data['data']
|
||||
if 'type_name' in data:
|
||||
data['TypeName'] = data['type_name']
|
||||
|
||||
if 'testMsg' in data:
|
||||
return 'ok'
|
||||
elif 'TypeName' in data and data['TypeName'] == 'AddMsg':
|
||||
try:
|
||||
event = await self.event_converter.target2yiri(data.copy(), self.bot_account_id)
|
||||
except Exception:
|
||||
await self.logger.error(f'Error in gewechat callback: {traceback.format_exc()}')
|
||||
return 'ok'
|
||||
|
||||
if event and event.__class__ in self.listeners:
|
||||
await self.listeners[event.__class__](event, self)
|
||||
|
||||
return 'ok'
|
||||
|
||||
return 'ok'
|
||||
|
||||
async def _handle_message(self, message: platform_message.MessageChain, target_id: str):
|
||||
content_list = await self.message_converter.yiri2target(message)
|
||||
at_targets = [item['target'] for item in content_list if item['type'] == 'at']
|
||||
|
||||
at_targets = at_targets or []
|
||||
member_info = []
|
||||
if at_targets:
|
||||
member_info = self.bot.get_chatroom_member_detail(self.config['app_id'], target_id, at_targets[::-1])[
|
||||
'data'
|
||||
]
|
||||
|
||||
for msg in content_list:
|
||||
if msg['type'] == 'text' and at_targets:
|
||||
for member in member_info:
|
||||
msg['content'] = f'@{member["nickName"]} {msg["content"]}'
|
||||
|
||||
handler_map = {
|
||||
'text': lambda msg: self.bot.post_text(
|
||||
app_id=self.config['app_id'],
|
||||
to_wxid=target_id,
|
||||
content=msg['content'],
|
||||
ats=','.join(at_targets),
|
||||
),
|
||||
'image': lambda msg: self.bot.post_image(
|
||||
app_id=self.config['app_id'],
|
||||
to_wxid=target_id,
|
||||
img_url=msg['image'],
|
||||
),
|
||||
'WeChatForwardMiniPrograms': lambda msg: self.bot.forward_mini_app(
|
||||
app_id=self.config['app_id'],
|
||||
to_wxid=target_id,
|
||||
xml=msg['xml_data'],
|
||||
cover_img_url=msg.get('image_url'),
|
||||
),
|
||||
'WeChatEmoji': lambda msg: self.bot.post_emoji(
|
||||
app_id=self.config['app_id'],
|
||||
to_wxid=target_id,
|
||||
emoji_md5=msg['emoji_md5'],
|
||||
emoji_size=msg['emoji_size'],
|
||||
),
|
||||
'WeChatLink': lambda msg: self.bot.post_link(
|
||||
app_id=self.config['app_id'],
|
||||
to_wxid=target_id,
|
||||
title=msg['link_title'],
|
||||
desc=msg['link_desc'],
|
||||
link_url=msg['link_url'],
|
||||
thumb_url=msg['link_thumb_url'],
|
||||
),
|
||||
'WeChatMiniPrograms': lambda msg: self.bot.post_mini_app(
|
||||
app_id=self.config['app_id'],
|
||||
to_wxid=target_id,
|
||||
mini_app_id=msg['mini_app_id'],
|
||||
display_name=msg['display_name'],
|
||||
page_path=msg['page_path'],
|
||||
cover_img_url=msg['cover_img_url'],
|
||||
title=msg['title'],
|
||||
user_name=msg['user_name'],
|
||||
),
|
||||
'WeChatForwardLink': lambda msg: self.bot.forward_url(
|
||||
app_id=self.config['app_id'], to_wxid=target_id, xml=msg['xml_data']
|
||||
),
|
||||
'WeChatForwardImage': lambda msg: self.bot.forward_image(
|
||||
app_id=self.config['app_id'], to_wxid=target_id, xml=msg['xml_data']
|
||||
),
|
||||
'WeChatForwardFile': lambda msg: self.bot.forward_file(
|
||||
app_id=self.config['app_id'], to_wxid=target_id, xml=msg['xml_data']
|
||||
),
|
||||
'voice': lambda msg: self.bot.post_voice(
|
||||
app_id=self.config['app_id'],
|
||||
to_wxid=target_id,
|
||||
voice_url=msg['url'],
|
||||
voice_duration=msg['length'],
|
||||
),
|
||||
'WeChatAppMsg': lambda msg: self.bot.post_app_msg(
|
||||
app_id=self.config['app_id'],
|
||||
to_wxid=target_id,
|
||||
appmsg=msg['app_msg'],
|
||||
),
|
||||
'at': lambda msg: None,
|
||||
}
|
||||
|
||||
if handler := handler_map.get(msg['type']):
|
||||
handler(msg)
|
||||
else:
|
||||
await self.logger.warning(f'未处理的消息类型: {msg["type"]}')
|
||||
continue
|
||||
|
||||
async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain):
|
||||
return await self._handle_message(message, target_id)
|
||||
|
||||
async def reply_message(
|
||||
self,
|
||||
message_source: platform_events.MessageEvent,
|
||||
message: platform_message.MessageChain,
|
||||
quote_origin: bool = False,
|
||||
):
|
||||
if message_source.source_platform_object:
|
||||
target_id = message_source.source_platform_object['Data']['FromUserName']['string']
|
||||
return await self._handle_message(message, target_id)
|
||||
|
||||
async def is_muted(self, group_id: int) -> bool:
|
||||
pass
|
||||
|
||||
def register_listener(
|
||||
self,
|
||||
event_type: typing.Type[platform_events.Event],
|
||||
callback: typing.Callable[
|
||||
[platform_events.Event, abstract_platform_adapter.AbstractMessagePlatformAdapter], None
|
||||
],
|
||||
):
|
||||
self.listeners[event_type] = callback
|
||||
|
||||
def unregister_listener(
|
||||
self,
|
||||
event_type: typing.Type[platform_events.Event],
|
||||
callback: typing.Callable[
|
||||
[platform_events.Event, abstract_platform_adapter.AbstractMessagePlatformAdapter], None
|
||||
],
|
||||
):
|
||||
pass
|
||||
|
||||
def _build_callback_url(self) -> str:
|
||||
webhook_prefix = self.config.get('_webhook_prefix', 'http://127.0.0.1:5300').rstrip('/')
|
||||
return f'{webhook_prefix}/bots/{self.bot_uuid}'
|
||||
|
||||
async def run_async(self):
|
||||
if not self.config['token']:
|
||||
session = httpclient.get_session()
|
||||
async with session.post(
|
||||
f'{self.config["gewechat_url"]}/v2/api/tools/getTokenId',
|
||||
json={'app_id': self.config['app_id']},
|
||||
) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f'获取gewechat token失败: {await response.text()}')
|
||||
self.config['token'] = (await response.json())['data']
|
||||
|
||||
self.bot = gewechat_client.GewechatClient(f'{self.config["gewechat_url"]}/v2/api', self.config['token'])
|
||||
|
||||
def gewechat_init_process():
|
||||
profile = self.bot.get_profile(self.config['app_id'])
|
||||
self.bot_account_id = profile['data']['nickName']
|
||||
|
||||
try:
|
||||
callback_url = self._build_callback_url()
|
||||
self.bot.set_callback(self.config['token'], callback_url)
|
||||
print(f'Gewechat 回调地址已设置: {callback_url}')
|
||||
except Exception as e:
|
||||
raise Exception(f'设置 Gewechat 回调失败,token失效:{e}')
|
||||
|
||||
threading.Thread(target=gewechat_init_process).start()
|
||||
|
||||
# 统一 webhook 模式下,不启动独立的 HTTP 服务
|
||||
# 保持适配器运行
|
||||
while True:
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def kill(self) -> bool:
|
||||
return False
|
||||
51
src/langbot/pkg/platform/sources/gewechat.yaml
Normal file
51
src/langbot/pkg/platform/sources/gewechat.yaml
Normal file
@@ -0,0 +1,51 @@
|
||||
apiVersion: v1
|
||||
kind: MessagePlatformAdapter
|
||||
metadata:
|
||||
name: gewechat
|
||||
label:
|
||||
en_US: GeWeChat
|
||||
zh_Hans: GeWeChat(个人微信)
|
||||
description:
|
||||
en_US: GeWeChat Adapter (Unified Webhook)
|
||||
zh_Hans: GeWeChat 适配器(统一 Webhook),请查看文档了解使用方式
|
||||
icon: gewechat.png
|
||||
spec:
|
||||
config:
|
||||
- name: gewechat_url
|
||||
label:
|
||||
en_US: GeWeChat URL
|
||||
zh_Hans: GeWeChat URL
|
||||
description:
|
||||
en_US: GeWeChat API server address, e.g. http://127.0.0.1:2531
|
||||
zh_Hans: GeWeChat API 服务器地址,如 http://127.0.0.1:2531
|
||||
type: string
|
||||
required: true
|
||||
default: ""
|
||||
- name: gewechat_file_url
|
||||
label:
|
||||
en_US: GeWeChat file download URL
|
||||
zh_Hans: GeWeChat 文件下载URL
|
||||
description:
|
||||
en_US: GeWeChat file download service address
|
||||
zh_Hans: GeWeChat 文件下载服务地址
|
||||
type: string
|
||||
required: true
|
||||
default: ""
|
||||
- name: app_id
|
||||
label:
|
||||
en_US: App ID
|
||||
zh_Hans: 应用ID
|
||||
type: string
|
||||
required: true
|
||||
default: ""
|
||||
- name: token
|
||||
label:
|
||||
en_US: Token
|
||||
zh_Hans: 令牌
|
||||
type: string
|
||||
required: true
|
||||
default: ""
|
||||
execution:
|
||||
python:
|
||||
path: ./gewechat.py
|
||||
attr: GeWeChatAdapter
|
||||
@@ -42,6 +42,25 @@ class TelegramMessageConverter(abstract_platform_adapter.AbstractMessageConverte
|
||||
photo_bytes = f.read()
|
||||
|
||||
components.append({'type': 'photo', 'photo': photo_bytes})
|
||||
elif isinstance(component, platform_message.File):
|
||||
file_bytes = None
|
||||
|
||||
if component.base64:
|
||||
# Strip data URI prefix if present (e.g. "data:application/pdf;base64,...")
|
||||
b64_data = component.base64
|
||||
if ';base64,' in b64_data:
|
||||
b64_data = b64_data.split(';base64,', 1)[1]
|
||||
file_bytes = base64.b64decode(b64_data)
|
||||
elif component.url:
|
||||
session = httpclient.get_session()
|
||||
async with session.get(component.url) as response:
|
||||
file_bytes = await response.read()
|
||||
elif component.path:
|
||||
with open(component.path, 'rb') as f:
|
||||
file_bytes = f.read()
|
||||
|
||||
file_name = getattr(component, 'name', None) or 'file'
|
||||
components.append({'type': 'document', 'document': file_bytes, 'filename': file_name})
|
||||
elif isinstance(component, platform_message.Forward):
|
||||
for node in component.node_list:
|
||||
components.extend(await TelegramMessageConverter.yiri2target(node.message_chain, bot))
|
||||
@@ -104,6 +123,27 @@ class TelegramMessageConverter(abstract_platform_adapter.AbstractMessageConverte
|
||||
)
|
||||
)
|
||||
|
||||
if message.document:
|
||||
if message.caption:
|
||||
message_components.extend(parse_message_text(message.caption))
|
||||
|
||||
file = await message.document.get_file()
|
||||
file_name = message.document.file_name or 'document'
|
||||
file_size = message.document.file_size or 0
|
||||
file_format = message.document.mime_type or 'application/octet-stream'
|
||||
|
||||
file_bytes = None
|
||||
async with httpclient.get_session(trust_env=True).get(file.file_path) as response:
|
||||
file_bytes = await response.read()
|
||||
|
||||
message_components.append(
|
||||
platform_message.File(
|
||||
name=file_name,
|
||||
size=file_size,
|
||||
base64=f'data:{file_format};base64,{base64.b64encode(file_bytes).decode("utf-8")}',
|
||||
)
|
||||
)
|
||||
|
||||
return platform_message.MessageChain(message_components)
|
||||
|
||||
|
||||
@@ -179,7 +219,10 @@ class TelegramAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
application = ApplicationBuilder().token(config['token']).build()
|
||||
bot = application.bot
|
||||
application.add_handler(
|
||||
MessageHandler(filters.TEXT | (filters.COMMAND) | filters.PHOTO | filters.VOICE, telegram_callback)
|
||||
MessageHandler(
|
||||
filters.TEXT | (filters.COMMAND) | filters.PHOTO | filters.VOICE | filters.Document.ALL,
|
||||
telegram_callback,
|
||||
)
|
||||
)
|
||||
super().__init__(
|
||||
config=config,
|
||||
@@ -218,6 +261,13 @@ class TelegramAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
continue
|
||||
args['photo'] = telegram.InputFile(photo)
|
||||
await self.bot.send_photo(**args)
|
||||
elif component_type == 'document':
|
||||
doc = component.get('document')
|
||||
if doc is None:
|
||||
continue
|
||||
filename = component.get('filename', 'file')
|
||||
args['document'] = telegram.InputFile(doc, filename=filename)
|
||||
await self.bot.send_document(**args)
|
||||
|
||||
async def reply_message(
|
||||
self,
|
||||
|
||||
@@ -24,14 +24,18 @@ class WecomBotMessageConverter(abstract_platform_adapter.AbstractMessageConverte
|
||||
return content
|
||||
|
||||
@staticmethod
|
||||
async def target2yiri(event: WecomBotEvent):
|
||||
async def target2yiri(event: WecomBotEvent, bot_name: str = ''):
|
||||
yiri_msg_list = []
|
||||
if event.type == 'group':
|
||||
yiri_msg_list.append(platform_message.At(target=event.ai_bot_id))
|
||||
|
||||
yiri_msg_list.append(platform_message.Source(id=event.message_id, time=datetime.datetime.now()))
|
||||
|
||||
if event.content:
|
||||
yiri_msg_list.append(platform_message.Plain(text=event.content))
|
||||
content = event.content
|
||||
if bot_name:
|
||||
content = content.replace(f'@{bot_name}', '').strip()
|
||||
yiri_msg_list.append(platform_message.Plain(text=content))
|
||||
|
||||
images = []
|
||||
if event.images:
|
||||
@@ -134,13 +138,15 @@ class WecomBotMessageConverter(abstract_platform_adapter.AbstractMessageConverte
|
||||
|
||||
|
||||
class WecomBotEventConverter(abstract_platform_adapter.AbstractEventConverter):
|
||||
def __init__(self, bot_name: str = ''):
|
||||
self.bot_name = bot_name
|
||||
|
||||
@staticmethod
|
||||
async def yiri2target(event: platform_events.MessageEvent):
|
||||
return event.source_platform_object
|
||||
|
||||
@staticmethod
|
||||
async def target2yiri(event: WecomBotEvent):
|
||||
message_chain = await WecomBotMessageConverter.target2yiri(event)
|
||||
async def target2yiri(self, event: WecomBotEvent):
|
||||
message_chain = await WecomBotMessageConverter.target2yiri(event, bot_name=self.bot_name)
|
||||
if event.type == 'single':
|
||||
return platform_events.FriendMessage(
|
||||
sender=platform_entities.Friend(
|
||||
@@ -180,13 +186,16 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
bot: typing.Union[WecomBotClient, WecomBotWsClient]
|
||||
bot_account_id: str
|
||||
message_converter: WecomBotMessageConverter = WecomBotMessageConverter()
|
||||
event_converter: WecomBotEventConverter = WecomBotEventConverter()
|
||||
event_converter: WecomBotEventConverter
|
||||
config: dict
|
||||
bot_uuid: str = None
|
||||
_ws_mode: bool = False
|
||||
bot_name: str = ''
|
||||
listeners: dict = {}
|
||||
|
||||
def __init__(self, config: dict, logger: EventLogger):
|
||||
enable_webhook = config.get('enable-webhook', False)
|
||||
bot_name = config.get('robot_name', '')
|
||||
|
||||
if not enable_webhook:
|
||||
bot = WecomBotWsClient(
|
||||
@@ -195,7 +204,6 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
logger=logger,
|
||||
encoding_aes_key=config.get('EncodingAESKey', ''),
|
||||
)
|
||||
ws_mode = True
|
||||
else:
|
||||
# Webhook callback mode
|
||||
required_keys = ['Token', 'EncodingAESKey', 'Corpid']
|
||||
@@ -210,17 +218,18 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
logger=logger,
|
||||
unified_mode=True,
|
||||
)
|
||||
ws_mode = False
|
||||
|
||||
bot_account_id = config.get('BotId', '')
|
||||
|
||||
event_converter = WecomBotEventConverter(bot_name=bot_name)
|
||||
super().__init__(
|
||||
config=config,
|
||||
logger=logger,
|
||||
bot=bot,
|
||||
bot_account_id=bot_account_id,
|
||||
bot_name=bot_name,
|
||||
event_converter=event_converter,
|
||||
)
|
||||
self._ws_mode = ws_mode
|
||||
self.listeners = {}
|
||||
|
||||
async def reply_message(
|
||||
self,
|
||||
@@ -229,7 +238,9 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
quote_origin: bool = False,
|
||||
):
|
||||
content = await self.message_converter.yiri2target(message)
|
||||
if self._ws_mode:
|
||||
_ws_mode = not self.config.get('enable-webhook', False)
|
||||
|
||||
if _ws_mode:
|
||||
event = message_source.source_platform_object
|
||||
req_id = event.get('req_id', '')
|
||||
if req_id:
|
||||
@@ -249,8 +260,9 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
):
|
||||
content = await self.message_converter.yiri2target(message)
|
||||
msg_id = message_source.source_platform_object.message_id
|
||||
_ws_mode = not self.config.get('enable-webhook', False)
|
||||
|
||||
if self._ws_mode:
|
||||
if _ws_mode:
|
||||
success = await self.bot.push_stream_chunk(msg_id, content, is_final=is_final)
|
||||
if not success and is_final:
|
||||
event = message_source.source_platform_object
|
||||
@@ -275,12 +287,22 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
return True
|
||||
|
||||
async def send_message(self, target_type, target_id, message):
|
||||
if self._ws_mode:
|
||||
_ws_mode = not self.config.get('enable-webhook', False)
|
||||
if _ws_mode:
|
||||
content = await self.message_converter.yiri2target(message)
|
||||
await self.bot.send_message(target_id, content)
|
||||
else:
|
||||
pass
|
||||
|
||||
async def on_message(self, event: WecomBotEvent):
|
||||
try:
|
||||
lb_event = await self.event_converter.target2yiri(event)
|
||||
if lb_event:
|
||||
await self.listeners[type(lb_event)](lb_event, self)
|
||||
except Exception:
|
||||
await self.logger.error(f'Error in wecombot callback: {traceback.format_exc()}')
|
||||
print(traceback.format_exc())
|
||||
|
||||
def register_listener(
|
||||
self,
|
||||
event_type: typing.Type[platform_events.Event],
|
||||
@@ -288,18 +310,13 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
[platform_events.Event, abstract_platform_adapter.AbstractMessagePlatformAdapter], None
|
||||
],
|
||||
):
|
||||
async def on_message(event: WecomBotEvent):
|
||||
try:
|
||||
return await callback(await self.event_converter.target2yiri(event), self)
|
||||
except Exception:
|
||||
await self.logger.error(f'Error in wecombot callback: {traceback.format_exc()}')
|
||||
print(traceback.format_exc())
|
||||
self.listeners[event_type] = callback
|
||||
|
||||
try:
|
||||
if event_type == platform_events.FriendMessage:
|
||||
self.bot.on_message('single')(on_message)
|
||||
self.bot.on_message('single')(self.on_message)
|
||||
elif event_type == platform_events.GroupMessage:
|
||||
self.bot.on_message('group')(on_message)
|
||||
self.bot.on_message('group')(self.on_message)
|
||||
except Exception:
|
||||
print(traceback.format_exc())
|
||||
|
||||
@@ -308,12 +325,14 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
self.bot_uuid = bot_uuid
|
||||
|
||||
async def handle_unified_webhook(self, bot_uuid: str, path: str, request):
|
||||
if self._ws_mode:
|
||||
_ws_mode = not self.config.get('enable-webhook', False)
|
||||
if _ws_mode:
|
||||
return None
|
||||
return await self.bot.handle_unified_webhook(request)
|
||||
|
||||
async def run_async(self):
|
||||
if self._ws_mode:
|
||||
_ws_mode = not self.config.get('enable-webhook', False)
|
||||
if _ws_mode:
|
||||
await self.bot.connect()
|
||||
else:
|
||||
|
||||
@@ -324,7 +343,8 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
|
||||
await keep_alive()
|
||||
|
||||
async def kill(self) -> bool:
|
||||
if self._ws_mode:
|
||||
_ws_mode = not self.config.get('enable-webhook', False)
|
||||
if _ws_mode:
|
||||
await self.bot.disconnect()
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -18,6 +18,13 @@ spec:
|
||||
type: string
|
||||
required: true
|
||||
default: ""
|
||||
- name: robot_name
|
||||
label:
|
||||
en_US: Robot Name
|
||||
zh_Hans: 机器人名称
|
||||
type: string
|
||||
required: true
|
||||
default: ""
|
||||
- name: enable-webhook
|
||||
label:
|
||||
en_US: Enable Webhook Mode
|
||||
|
||||
@@ -555,6 +555,18 @@ class RuntimeConnectionHandler(handler.Handler):
|
||||
except Exception as e:
|
||||
return _make_rag_error_response(e, 'VectorStoreError', collection_id=collection_id)
|
||||
|
||||
@self.action(PluginToRuntimeAction.VECTOR_LIST)
|
||||
async def vector_list(data: dict[str, Any]) -> handler.ActionResponse:
|
||||
collection_id = data['collection_id']
|
||||
filters = data.get('filters')
|
||||
limit = data.get('limit', 20)
|
||||
offset = data.get('offset', 0)
|
||||
try:
|
||||
items, total = await self.ap.rag_runtime_service.vector_list(collection_id, filters, limit, offset)
|
||||
return handler.ActionResponse.success(data={'items': items, 'total': total})
|
||||
except Exception as e:
|
||||
return _make_rag_error_response(e, 'VectorStoreError', collection_id=collection_id)
|
||||
|
||||
@self.action(PluginToRuntimeAction.GET_KNOWLEDEGE_FILE_STREAM)
|
||||
async def get_knowledge_file_stream(data: dict[str, Any]) -> handler.ActionResponse:
|
||||
storage_path = data['storage_path']
|
||||
|
||||
@@ -288,10 +288,10 @@ class AnthropicMessages(requester.ProviderAPIRequester):
|
||||
think_started = False
|
||||
think_ended = False
|
||||
finish_reason = False
|
||||
content = ''
|
||||
tool_name = ''
|
||||
tool_id = ''
|
||||
async for chunk in await self.client.messages.create(**args):
|
||||
content = ''
|
||||
tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
|
||||
if isinstance(
|
||||
chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
|
||||
|
||||
@@ -75,6 +75,31 @@ class RAGRuntimeService:
|
||||
count = await self.ap.vector_db_mgr.delete_by_filter(collection_name=collection_id, filter=filters)
|
||||
return count
|
||||
|
||||
async def vector_list(
|
||||
self,
|
||||
collection_id: str,
|
||||
filters: dict[str, Any] | None = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
"""Handle VECTOR_LIST action.
|
||||
|
||||
Args:
|
||||
collection_id: The collection to list from.
|
||||
filters: Optional metadata filters.
|
||||
limit: Maximum number of items to return.
|
||||
offset: Number of items to skip.
|
||||
|
||||
Returns:
|
||||
Tuple of (items, total).
|
||||
"""
|
||||
return await self.ap.vector_db_mgr.list_by_filter(
|
||||
collection_name=collection_id,
|
||||
filter=filters,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
async def get_file_stream(self, storage_path: str) -> bytes:
|
||||
"""Handle GET_KNOWLEDEGE_FILE_STREAM action.
|
||||
|
||||
|
||||
@@ -49,17 +49,25 @@ def normalize_filter(
|
||||
def strip_unsupported_fields(
|
||||
triples: list[tuple[str, str, Any]],
|
||||
supported_fields: set[str],
|
||||
field_aliases: dict[str, str] | None = None,
|
||||
) -> list[tuple[str, str, Any]]:
|
||||
"""Return only triples whose field is in *supported_fields*.
|
||||
|
||||
If *field_aliases* is provided, aliased field names are mapped to the
|
||||
canonical backend name before the support check. For example,
|
||||
``{'uuid': 'chunk_uuid'}`` allows callers to use ``uuid`` which is
|
||||
transparently rewritten to ``chunk_uuid``.
|
||||
|
||||
Dropped fields are logged at WARNING level so the caller knows they were
|
||||
silently ignored (useful for Milvus / pgvector which only store a fixed
|
||||
schema).
|
||||
"""
|
||||
aliases = field_aliases or {}
|
||||
kept: list[tuple[str, str, Any]] = []
|
||||
for field, op, value in triples:
|
||||
if field in supported_fields:
|
||||
kept.append((field, op, value))
|
||||
resolved = aliases.get(field, field)
|
||||
if resolved in supported_fields:
|
||||
kept.append((resolved, op, value))
|
||||
else:
|
||||
logger.warning(
|
||||
'Filter field %r is not supported by this backend and will be ignored (supported: %s)',
|
||||
|
||||
@@ -157,3 +157,17 @@ class VectorDBManager:
|
||||
Number of deleted vectors (best-effort; some backends return 0).
|
||||
"""
|
||||
return await self.vector_db.delete_by_filter(collection_name, filter)
|
||||
|
||||
async def list_by_filter(
|
||||
self,
|
||||
collection_name: str,
|
||||
filter: dict | None = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[dict], int]:
|
||||
"""Proxy: List vectors by metadata filter with pagination.
|
||||
|
||||
Returns:
|
||||
Tuple of (items, total).
|
||||
"""
|
||||
return await self.vector_db.list_by_filter(collection_name, filter, limit, offset)
|
||||
|
||||
@@ -92,6 +92,28 @@ class VectorDatabase(abc.ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
async def list_by_filter(
|
||||
self,
|
||||
collection: str,
|
||||
filter: dict[str, Any] | None = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
"""List vectors matching the given metadata filter with pagination.
|
||||
|
||||
Args:
|
||||
collection: Collection name.
|
||||
filter: Optional metadata filter dict in canonical format.
|
||||
limit: Maximum number of items to return.
|
||||
offset: Number of items to skip.
|
||||
|
||||
Returns:
|
||||
Tuple of (items, total) where items is a list of dicts with
|
||||
keys 'id', 'document', 'metadata', and total is the best-effort
|
||||
count of all matching vectors (-1 if unknown).
|
||||
"""
|
||||
return [], -1
|
||||
|
||||
@abc.abstractmethod
|
||||
async def get_or_create_collection(self, collection: str):
|
||||
"""Get or create collection."""
|
||||
|
||||
@@ -221,6 +221,41 @@ class ChromaVectorDatabase(VectorDatabase):
|
||||
self.ap.logger.info(f"Deleted embeddings from Chroma collection '{collection}' by filter")
|
||||
return 0 # Chroma delete does not return a count
|
||||
|
||||
async def list_by_filter(
|
||||
self,
|
||||
collection: str,
|
||||
filter: dict[str, Any] | None = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
col = await self.get_or_create_collection(collection)
|
||||
get_kwargs: dict[str, Any] = dict(
|
||||
include=['metadatas', 'documents'],
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
if filter:
|
||||
get_kwargs['where'] = filter
|
||||
results = await asyncio.to_thread(col.get, **get_kwargs)
|
||||
|
||||
ids = results.get('ids', [])
|
||||
metadatas = results.get('metadatas', []) or [None] * len(ids)
|
||||
documents = results.get('documents', []) or [None] * len(ids)
|
||||
|
||||
items = []
|
||||
for i, vid in enumerate(ids):
|
||||
items.append(
|
||||
{
|
||||
'id': vid,
|
||||
'document': documents[i] if i < len(documents) else None,
|
||||
'metadata': metadatas[i] if i < len(metadatas) else {},
|
||||
}
|
||||
)
|
||||
|
||||
# Chroma col.count() gives total in collection; filtered count not available
|
||||
total = await asyncio.to_thread(col.count) if not filter else -1
|
||||
return items, total
|
||||
|
||||
async def delete_collection(self, collection: str):
|
||||
if collection in self._collections:
|
||||
del self._collections[collection]
|
||||
|
||||
@@ -11,11 +11,14 @@ from langbot.pkg.core import app
|
||||
# silently dropped with a warning.
|
||||
_MILVUS_SUPPORTED_FIELDS = {'text', 'file_id', 'chunk_uuid'}
|
||||
|
||||
# Callers use canonical metadata key 'uuid' but Milvus stores it as 'chunk_uuid'.
|
||||
_MILVUS_FIELD_ALIASES = {'uuid': 'chunk_uuid'}
|
||||
|
||||
|
||||
def _build_milvus_expr(filter_dict: dict[str, Any]) -> str:
|
||||
"""Translate canonical filter dict into a Milvus boolean expression string."""
|
||||
triples = normalize_filter(filter_dict)
|
||||
triples = strip_unsupported_fields(triples, _MILVUS_SUPPORTED_FIELDS)
|
||||
triples = strip_unsupported_fields(triples, _MILVUS_SUPPORTED_FIELDS, _MILVUS_FIELD_ALIASES)
|
||||
if not triples:
|
||||
return ''
|
||||
|
||||
@@ -340,6 +343,62 @@ class MilvusVectorDatabase(VectorDatabase):
|
||||
self.ap.logger.info(f"Deleted embeddings from Milvus collection '{collection}' by filter")
|
||||
return 0 # Milvus delete does not return a count
|
||||
|
||||
async def list_by_filter(
|
||||
self,
|
||||
collection: str,
|
||||
filter: dict[str, Any] | None = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
collection = self._normalize_collection_name(collection)
|
||||
await self.get_or_create_collection(collection)
|
||||
|
||||
query_kwargs: dict[str, Any] = dict(
|
||||
collection_name=collection,
|
||||
output_fields=['text', 'file_id', 'chunk_uuid'],
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
if filter:
|
||||
expr = _build_milvus_expr(filter)
|
||||
if expr:
|
||||
query_kwargs['filter'] = expr
|
||||
|
||||
results = await asyncio.to_thread(self.client.query, **query_kwargs)
|
||||
|
||||
items = []
|
||||
for row in results:
|
||||
items.append(
|
||||
{
|
||||
'id': row.get('id', ''),
|
||||
'document': row.get('text'),
|
||||
'metadata': {
|
||||
'text': row.get('text', ''),
|
||||
'file_id': row.get('file_id', ''),
|
||||
'uuid': row.get('chunk_uuid', ''),
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Milvus query with count(*)
|
||||
total = -1
|
||||
try:
|
||||
count_kwargs: dict[str, Any] = dict(
|
||||
collection_name=collection,
|
||||
output_fields=['count(*)'],
|
||||
)
|
||||
if filter:
|
||||
expr = _build_milvus_expr(filter)
|
||||
if expr:
|
||||
count_kwargs['filter'] = expr
|
||||
count_result = await asyncio.to_thread(self.client.query, **count_kwargs)
|
||||
if count_result:
|
||||
total = count_result[0].get('count(*)', -1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return items, total
|
||||
|
||||
async def delete_collection(self, collection: str):
|
||||
"""Delete a Milvus collection
|
||||
|
||||
|
||||
@@ -13,6 +13,9 @@ Base = declarative_base()
|
||||
# pgvector schema only stores these metadata fields.
|
||||
_PG_SUPPORTED_FIELDS = {'text', 'file_id', 'chunk_uuid'}
|
||||
|
||||
# Callers use canonical metadata key 'uuid' but pgvector stores it as 'chunk_uuid'.
|
||||
_PG_FIELD_ALIASES = {'uuid': 'chunk_uuid'}
|
||||
|
||||
# Map schema field names to SQLAlchemy columns (resolved lazily from PgVectorEntry).
|
||||
_PG_COLUMN_MAP = {
|
||||
'text': 'text',
|
||||
@@ -37,7 +40,7 @@ class PgVectorEntry(Base):
|
||||
def _build_pg_conditions(filter_dict: dict[str, Any]) -> list:
|
||||
"""Translate canonical filter dict into a list of SQLAlchemy conditions."""
|
||||
triples = normalize_filter(filter_dict)
|
||||
triples = strip_unsupported_fields(triples, _PG_SUPPORTED_FIELDS)
|
||||
triples = strip_unsupported_fields(triples, _PG_SUPPORTED_FIELDS, _PG_FIELD_ALIASES)
|
||||
|
||||
conditions = []
|
||||
for field, op, value in triples:
|
||||
@@ -309,6 +312,65 @@ class PgVectorDatabase(VectorDatabase):
|
||||
self.ap.logger.error(f'Error deleting from pgvector by filter: {e}')
|
||||
raise
|
||||
|
||||
async def list_by_filter(
|
||||
self,
|
||||
collection: str,
|
||||
filter: dict[str, Any] | None = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
await self.get_or_create_collection(collection)
|
||||
|
||||
async with self.AsyncSessionLocal() as session:
|
||||
try:
|
||||
from sqlalchemy import select, func
|
||||
|
||||
stmt = (
|
||||
select(
|
||||
PgVectorEntry.id,
|
||||
PgVectorEntry.text,
|
||||
PgVectorEntry.file_id,
|
||||
PgVectorEntry.chunk_uuid,
|
||||
)
|
||||
.filter(PgVectorEntry.collection == collection)
|
||||
.offset(offset)
|
||||
.limit(limit)
|
||||
)
|
||||
|
||||
count_stmt = (
|
||||
select(func.count()).select_from(PgVectorEntry).filter(PgVectorEntry.collection == collection)
|
||||
)
|
||||
|
||||
if filter:
|
||||
for cond in _build_pg_conditions(filter):
|
||||
stmt = stmt.filter(cond)
|
||||
count_stmt = count_stmt.filter(cond)
|
||||
|
||||
result = await session.execute(stmt)
|
||||
rows = result.fetchall()
|
||||
|
||||
count_result = await session.execute(count_stmt)
|
||||
total = count_result.scalar() or 0
|
||||
|
||||
items = []
|
||||
for row in rows:
|
||||
items.append(
|
||||
{
|
||||
'id': row.id,
|
||||
'document': row.text or '',
|
||||
'metadata': {
|
||||
'text': row.text or '',
|
||||
'file_id': row.file_id or '',
|
||||
'uuid': row.chunk_uuid or '',
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
return items, total
|
||||
except Exception as e:
|
||||
self.ap.logger.error(f'Error listing from pgvector: {e}')
|
||||
raise
|
||||
|
||||
async def delete_collection(self, collection: str):
|
||||
"""Delete all vectors in a collection
|
||||
|
||||
|
||||
@@ -150,6 +150,97 @@ class QdrantVectorDatabase(VectorDatabase):
|
||||
self.ap.logger.info(f"Deleted embeddings from Qdrant collection '{collection}' by filter")
|
||||
return 0 # Qdrant delete does not return a count
|
||||
|
||||
async def list_by_filter(
|
||||
self,
|
||||
collection: str,
|
||||
filter: dict[str, Any] | None = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[dict[str, Any]], int]:
|
||||
exists = await self.client.collection_exists(collection)
|
||||
if not exists:
|
||||
return [], 0
|
||||
|
||||
qdrant_filter = _build_qdrant_filter(filter) if filter else None
|
||||
|
||||
# Qdrant scroll uses cursor-based pagination (offset = point ID),
|
||||
# not numeric skip. To support numeric offset we scroll through
|
||||
# `offset + limit` items and discard the first `offset`.
|
||||
remaining_to_skip = offset
|
||||
remaining_to_collect = limit
|
||||
cursor: int | str | None = None
|
||||
collected: list[dict[str, Any]] = []
|
||||
|
||||
while remaining_to_skip > 0 or remaining_to_collect > 0:
|
||||
batch_size = remaining_to_skip + remaining_to_collect if remaining_to_skip > 0 else remaining_to_collect
|
||||
scroll_kwargs: dict[str, Any] = dict(
|
||||
collection_name=collection,
|
||||
limit=min(batch_size, 256),
|
||||
with_payload=True if remaining_to_skip == 0 else False,
|
||||
with_vectors=False,
|
||||
)
|
||||
if qdrant_filter:
|
||||
scroll_kwargs['scroll_filter'] = qdrant_filter
|
||||
if cursor is not None:
|
||||
scroll_kwargs['offset'] = cursor
|
||||
|
||||
points, next_cursor = await self.client.scroll(**scroll_kwargs)
|
||||
if not points:
|
||||
break
|
||||
|
||||
for point in points:
|
||||
if remaining_to_skip > 0:
|
||||
remaining_to_skip -= 1
|
||||
continue
|
||||
if remaining_to_collect <= 0:
|
||||
break
|
||||
# Re-fetch payload if we skipped it during the skip phase
|
||||
payload = point.payload or {}
|
||||
collected.append(
|
||||
{
|
||||
'id': str(point.id),
|
||||
'document': payload.get('text') or payload.get('document'),
|
||||
'metadata': payload,
|
||||
}
|
||||
)
|
||||
remaining_to_collect -= 1
|
||||
|
||||
if next_cursor is None:
|
||||
break
|
||||
cursor = next_cursor
|
||||
|
||||
# If we skipped without payload, re-fetch the collected items' payloads
|
||||
# (only needed when offset > 0 and items were collected in a skip batch)
|
||||
if offset > 0 and collected:
|
||||
refetch_ids = [item['id'] for item in collected if not item.get('metadata')]
|
||||
if refetch_ids:
|
||||
fetched_points = await self.client.retrieve(
|
||||
collection_name=collection,
|
||||
ids=refetch_ids,
|
||||
with_payload=True,
|
||||
with_vectors=False,
|
||||
)
|
||||
payload_map = {str(p.id): p.payload or {} for p in fetched_points}
|
||||
for item in collected:
|
||||
if item['id'] in payload_map:
|
||||
payload = payload_map[item['id']]
|
||||
item['metadata'] = payload
|
||||
item['document'] = payload.get('text') or payload.get('document')
|
||||
|
||||
# Use count() for accurate total (supports filter)
|
||||
total = -1
|
||||
try:
|
||||
count_result = await self.client.count(
|
||||
collection_name=collection,
|
||||
count_filter=qdrant_filter,
|
||||
exact=True,
|
||||
)
|
||||
total = count_result.count
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return collected, total
|
||||
|
||||
async def delete_collection(self, collection: str):
|
||||
try:
|
||||
await self.client.delete_collection(collection)
|
||||
|
||||
@@ -340,6 +340,50 @@ class SeekDBVectorDatabase(VectorDatabase):
|
||||
self.ap.logger.info(f"Deleted embeddings from SeekDB collection '{collection}' by filter")
|
||||
return 0 # SeekDB delete does not return a count
|
||||
|
||||
async def list_by_filter(
|
||||
self,
|
||||
collection: str,
|
||||
filter: Dict[str, Any] | None = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> tuple[list[Dict[str, Any]], int]:
|
||||
exists = await asyncio.to_thread(self.client.has_collection, collection)
|
||||
if not exists:
|
||||
return [], 0
|
||||
|
||||
if collection not in self._collections:
|
||||
coll = await asyncio.to_thread(self.client.get_collection, collection, embedding_function=None)
|
||||
self._collections[collection] = coll
|
||||
else:
|
||||
coll = self._collections[collection]
|
||||
|
||||
get_kwargs: Dict[str, Any] = dict(
|
||||
include=['metadatas', 'documents'],
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
if filter:
|
||||
get_kwargs['where'] = filter
|
||||
|
||||
results = await asyncio.to_thread(coll.get, **get_kwargs)
|
||||
|
||||
ids = results.get('ids', [])
|
||||
metadatas = results.get('metadatas', []) or [None] * len(ids)
|
||||
documents = results.get('documents', []) or [None] * len(ids)
|
||||
|
||||
items = []
|
||||
for i, vid in enumerate(ids):
|
||||
items.append(
|
||||
{
|
||||
'id': vid,
|
||||
'document': documents[i] if i < len(documents) else None,
|
||||
'metadata': metadatas[i] if i < len(metadatas) else {},
|
||||
}
|
||||
)
|
||||
|
||||
total = await asyncio.to_thread(coll.count) if not filter else -1
|
||||
return items, total
|
||||
|
||||
async def delete_collection(self, collection: str):
|
||||
"""Delete the entire collection.
|
||||
|
||||
|
||||
@@ -2,77 +2,5 @@
|
||||
"说明": "mask将替换敏感词中的每一个字,若mask_word值不为空,则将敏感词整个替换为mask_word的值",
|
||||
"mask": "*",
|
||||
"mask_word": "",
|
||||
"words": [
|
||||
"习近平",
|
||||
"胡锦涛",
|
||||
"江泽民",
|
||||
"温家宝",
|
||||
"李克强",
|
||||
"李长春",
|
||||
"毛泽东",
|
||||
"邓小平",
|
||||
"周恩来",
|
||||
"马克思",
|
||||
"社会主义",
|
||||
"共产党",
|
||||
"共产主义",
|
||||
"大陆官方",
|
||||
"北京政权",
|
||||
"中华帝国",
|
||||
"中国政府",
|
||||
"共狗",
|
||||
"六四事件",
|
||||
"天安门",
|
||||
"六四",
|
||||
"政治局常委",
|
||||
"两会",
|
||||
"共青团",
|
||||
"学潮",
|
||||
"八九",
|
||||
"二十大",
|
||||
"民进党",
|
||||
"台独",
|
||||
"台湾独立",
|
||||
"台湾国",
|
||||
"国民党",
|
||||
"台湾民国",
|
||||
"中华民国",
|
||||
"pornhub",
|
||||
"Pornhub",
|
||||
"[Yy]ou[Pp]orn",
|
||||
"porn",
|
||||
"Porn",
|
||||
"[Xx][Vv]ideos",
|
||||
"[Rr]ed[Tt]ube",
|
||||
"[Xx][Hh]amster",
|
||||
"[Ss]pank[Ww]ire",
|
||||
"[Ss]pank[Bb]ang",
|
||||
"[Tt]ube8",
|
||||
"[Yy]ou[Jj]izz",
|
||||
"[Bb]razzers",
|
||||
"[Nn]aughty[ ]?[Aa]merica",
|
||||
"作爱",
|
||||
"做爱",
|
||||
"性交",
|
||||
"性爱",
|
||||
"自慰",
|
||||
"阴茎",
|
||||
"淫妇",
|
||||
"肛交",
|
||||
"交配",
|
||||
"性关系",
|
||||
"性活动",
|
||||
"色情",
|
||||
"色图",
|
||||
"涩图",
|
||||
"裸体",
|
||||
"小穴",
|
||||
"淫荡",
|
||||
"性爱",
|
||||
"翻墙",
|
||||
"VPN",
|
||||
"科学上网",
|
||||
"挂梯子",
|
||||
"GFW"
|
||||
]
|
||||
"words": []
|
||||
}
|
||||
8
uv.lock
generated
8
uv.lock
generated
@@ -1937,7 +1937,7 @@ requires-dist = [
|
||||
{ name = "ebooklib", specifier = ">=0.18" },
|
||||
{ name = "gewechat-client", specifier = ">=0.1.5" },
|
||||
{ name = "html2text", specifier = ">=2024.2.26" },
|
||||
{ name = "langbot-plugin", specifier = "==0.3.2" },
|
||||
{ name = "langbot-plugin", specifier = "==0.3.3" },
|
||||
{ name = "langchain", specifier = ">=0.2.0" },
|
||||
{ name = "langchain-text-splitters", specifier = ">=0.0.1" },
|
||||
{ name = "lark-oapi", specifier = ">=1.4.15" },
|
||||
@@ -1993,7 +1993,7 @@ dev = [
|
||||
|
||||
[[package]]
|
||||
name = "langbot-plugin"
|
||||
version = "0.3.2"
|
||||
version = "0.3.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "aiofiles" },
|
||||
@@ -2011,9 +2011,9 @@ dependencies = [
|
||||
{ name = "watchdog" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/05/32/1b029961d718b5418d9d139687287193d4657914c467833176d9c060fb4c/langbot_plugin-0.3.2.tar.gz", hash = "sha256:2f7f16285600ec019a4e8cc8b40bc8f8d404e3bbc69c9d129620dc70b3bde2f8", size = 170431, upload-time = "2026-03-14T12:42:55.175Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/08/be/78a0375aec6aad34bc2f00e2b4d2511ec597e8143cf8596d0736e8767904/langbot_plugin-0.3.3.tar.gz", hash = "sha256:5b07609b9b08f8b24fefcf29b97b9882838c140143de6d4bac2f320511ef4374", size = 170709, upload-time = "2026-03-19T12:40:23.877Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/91/d7/a5dd6cff000a4f7fe88692689be276875b04485f48fc26e162f1ad2a341c/langbot_plugin-0.3.2-py3-none-any.whl", hash = "sha256:a1de527c4d651e6b6ba2458b6fac09a300e6b1ffdc5a25bbfad88d970cfd6cfd", size = 144906, upload-time = "2026-03-14T12:42:56.486Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/92/53/c708f3ef9459420974f1b131cffd43cfb2f97220350c26a6b5fbadbd6211/langbot_plugin-0.3.3-py3-none-any.whl", hash = "sha256:cca94a2ed07c1d3ec4be33f97268746908ed304b528d0eb7f23308677fe619ca", size = 145188, upload-time = "2026-03-19T12:40:25.094Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Reference in New Issue
Block a user