Compare commits

...

12 Commits

Author SHA1 Message Date
fdc310
60579f7b3a refactor(gewechat): remove redundant set_webhook_url and login logic
Remove the unnecessary set_webhook_url method and its caller in botmgr,
instead inject webhook_prefix via adapter config for self-assembly.
Remove login logic from run_async since login is handled elsewhere.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 06:15:16 +08:00
fdc310
c4d4c90930 fix: add set_webhook_url call in botmgr for auto callback URL
Pass webhook_full_url to adapters that implement set_webhook_url(),
built from api.webhook_prefix config + bot_uuid.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 05:57:32 +08:00
fdc310
1a3ef217d9 fix: resolve pydantic init error and auto-generate webhook callback URL
- Fix AttributeError by removing self.config assignment before super().__init__()
- Remove callback_base_url from adapter config (no longer needed)
- Add set_webhook_url() method, called by botmgr with auto-built URL
- Add gewechat to webhook URL display list in bot.py
- botmgr now passes webhook_prefix + bot_uuid to adapters via set_webhook_url()

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 05:52:50 +08:00
fdc310
6e9fcccd7d feat: migrate gewechat adapter to unified webhook architecture
Move gewechat adapter from legacy (self-hosted Quart server) to the
unified webhook entry point at /api/bots/{bot_uuid}. This removes the
need for a dedicated port and aligns with the modern adapter pattern.

Key changes:
- Add set_bot_uuid() and handle_unified_webhook() methods
- Remove self-hosted Quart server from run_async()
- Auto-generate callback URL from callback_base_url + bot_uuid
- Update constructor signature to (config, logger)
- Rename legacy gewechat.yaml to prevent name conflict

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 04:59:29 +08:00
RockChinQ
865f6ee81b style: format telegram.py for ruff 2026-03-21 22:10:23 +08:00
fdc310
bd5ec59b7c fix:The fix is in place — content = '' is now reset at the start of each loop iteration , which prevents stale text from being duplicated across tool call and end-turn chunks. (#2060) 2026-03-21 22:08:35 +08:00
fdc310
9c0cc1003d Fixed the issue where the at bot did not remove the at symbol, result… (#2062)
* Fixed the issue where the at bot did not remove the at symbol, resulting in some commands not being activated in group chats. Also, adjusted the logic in the on_message section.

* fix:reply_message  del bot_name
2026-03-21 22:07:31 +08:00
Bijin
ea07d8ad00 fix(telegram): add document message support (docx/pdf/etc) (#2069)
The Telegram adapter only handles TEXT, COMMAND, PHOTO, and VOICE
messages. Document files (docx, pdf, etc.) sent by users are silently
dropped because:

1. MessageHandler filters lack filters.Document.ALL
2. target2yiri() has no message.document branch
3. yiri2target() has no platform_message.File branch
4. send_message() has no 'document' component handler

Changes:
- Add filters.Document.ALL to the MessageHandler filter set
- Add message.document parsing in target2yiri() → platform_message.File
- Add platform_message.File handling in yiri2target() → document component
- Add 'document' type handling in send_message() via bot.send_document()

This allows Telegram document messages to flow through the existing
PreProcessor and Dify file upload pipeline, consistent with how other
adapters (Lark, KOOK, Discord, WeCom) already handle files.

Closes #2065
2026-03-21 22:06:54 +08:00
youhuanghe
3ac3fad4bc chore: upgrade plugin sdk to 0.3.3 2026-03-19 12:48:29 +00:00
youhuanghe
254a13bba3 fix: 4355f0fa78 ruff lint 2026-03-16 06:39:29 +00:00
youhuanghe
4355f0fa78 feat(rag): expose vector listing API with backend filter support 2026-03-16 06:26:05 +00:00
Junyan Qin
031737f05d chore: remove all preset sensitive words 2026-03-16 13:42:19 +08:00
23 changed files with 1149 additions and 108 deletions

View File

@@ -64,7 +64,7 @@ dependencies = [
"chromadb>=1.0.0,<2.0.0",
"qdrant-client (>=1.15.1,<2.0.0)",
"pyseekdb==1.1.0.post3",
"langbot-plugin==0.3.2",
"langbot-plugin==0.3.3",
"asyncpg>=0.30.0",
"line-bot-sdk>=3.19.0",
"tboxsdk>=0.0.10",

View File

@@ -68,6 +68,7 @@ class BotService:
'wecomcs',
'LINE',
'lark',
'gewechat',
]:
webhook_prefix = self.ap.instance_config.data['api'].get('webhook_prefix', 'http://127.0.0.1:5300')
extra_webhook_prefix = self.ap.instance_config.data['api'].get('extra_webhook_prefix', '')

View File

@@ -272,6 +272,9 @@ class PlatformManager:
# 如果 adapter 支持 set_bot_uuid 方法,设置 bot_uuid用于统一 webhook
if hasattr(adapter_inst, 'set_bot_uuid'):
adapter_inst.set_bot_uuid(bot_entity.uuid)
adapter_inst.config['_webhook_prefix'] = self.ap.instance_config.data['api'].get(
'webhook_prefix', 'http://127.0.0.1:5300'
)
runtime_bot = RuntimeBot(ap=self.ap, bot_entity=bot_entity, adapter=adapter_inst, logger=logger)

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -0,0 +1,609 @@
import gewechat_client
import typing
import asyncio
import traceback
import re
import copy
import threading
from langbot.pkg.utils import httpclient
import langbot_plugin.api.definition.abstract.platform.adapter as abstract_platform_adapter
import langbot_plugin.api.entities.builtin.platform.message as platform_message
import langbot_plugin.api.entities.builtin.platform.events as platform_events
import langbot_plugin.api.entities.builtin.platform.entities as platform_entities
from ...utils import image
import xml.etree.ElementTree as ET
from typing import Optional, Tuple
from functools import partial
from ..logger import EventLogger
class GewechatMessageConverter(abstract_platform_adapter.AbstractMessageConverter):
def __init__(self, config: dict):
self.config = config
@staticmethod
async def yiri2target(message_chain: platform_message.MessageChain) -> list[dict]:
content_list = []
for component in message_chain:
if isinstance(component, platform_message.At):
content_list.append({'type': 'at', 'target': component.target})
elif isinstance(component, platform_message.Plain):
content_list.append({'type': 'text', 'content': component.text})
elif isinstance(component, platform_message.Image):
if not component.url:
pass
content_list.append({'type': 'image', 'image': component.url})
elif isinstance(component, platform_message.Voice):
content_list.append({'type': 'voice', 'url': component.url, 'length': component.length})
elif isinstance(component, platform_message.Forward):
for node in component.node_list:
content_list.extend(await GewechatMessageConverter.yiri2target(node.message_chain))
content_list.append({'type': 'image', 'image': component.url})
elif isinstance(component, platform_message.WeChatMiniPrograms):
content_list.append(
{
'type': 'WeChatMiniPrograms',
'mini_app_id': component.mini_app_id,
'display_name': component.display_name,
'page_path': component.page_path,
'cover_img_url': component.image_url,
'title': component.title,
'user_name': component.user_name,
}
)
elif isinstance(component, platform_message.WeChatForwardMiniPrograms):
content_list.append(
{
'type': 'WeChatForwardMiniPrograms',
'xml_data': component.xml_data,
'image_url': component.image_url,
}
)
elif isinstance(component, platform_message.WeChatEmoji):
content_list.append(
{
'type': 'WeChatEmoji',
'emoji_md5': component.emoji_md5,
'emoji_size': component.emoji_size,
}
)
elif isinstance(component, platform_message.WeChatLink):
content_list.append(
{
'type': 'WeChatLink',
'link_title': component.link_title,
'link_desc': component.link_desc,
'link_thumb_url': component.link_thumb_url,
'link_url': component.link_url,
}
)
elif isinstance(component, platform_message.WeChatForwardLink):
content_list.append({'type': 'WeChatForwardLink', 'xml_data': component.xml_data})
elif isinstance(component, platform_message.WeChatForwardImage):
content_list.append({'type': 'WeChatForwardImage', 'xml_data': component.xml_data})
elif isinstance(component, platform_message.WeChatForwardFile):
content_list.append({'type': 'WeChatForwardFile', 'xml_data': component.xml_data})
elif isinstance(component, platform_message.WeChatAppMsg):
content_list.append({'type': 'WeChatAppMsg', 'app_msg': component.app_msg})
elif isinstance(component, platform_message.WeChatForwardQuote):
content_list.append({'type': 'WeChatAppMsg', 'app_msg': component.app_msg})
elif isinstance(component, platform_message.Forward):
for node in component.node_list:
if node.message_chain:
content_list.extend(await GewechatMessageConverter.yiri2target(node.message_chain))
return content_list
async def target2yiri(self, message: dict, bot_account_id: str) -> platform_message.MessageChain:
message_list = []
ats_bot = False
content = message['Data']['Content']['string']
content_no_preifx = content
is_group_message = self._is_group_message(message)
if is_group_message:
ats_bot = self._ats_bot(message, bot_account_id)
if '@所有人' in content:
message_list.append(platform_message.AtAll())
elif ats_bot:
message_list.append(platform_message.At(target=bot_account_id))
content_no_preifx, _ = self._extract_content_and_sender(content)
msg_type = message['Data']['MsgType']
handler_map = {
1: self._handler_text,
3: self._handler_image,
34: self._handler_voice,
49: self._handler_compound,
}
handler = handler_map.get(msg_type, self._handler_default)
handler_result = await handler(
message=message,
content_no_preifx=content_no_preifx,
)
if handler_result and len(handler_result) > 0:
message_list.extend(handler_result)
return platform_message.MessageChain(message_list)
async def _handler_text(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
if message and self._is_group_message(message):
pattern = r'@\S{1,20}'
content_no_preifx = re.sub(pattern, '', content_no_preifx)
return platform_message.MessageChain([platform_message.Plain(content_no_preifx)])
async def _handler_image(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
try:
image_xml = content_no_preifx
if not image_xml:
return platform_message.MessageChain([platform_message.Unknown('[图片内容为空]')])
base64_str, image_format = await image.get_gewechat_image_base64(
gewechat_url=self.config['gewechat_url'],
gewechat_file_url=self.config['gewechat_file_url'],
app_id=self.config['app_id'],
xml_content=image_xml,
token=self.config['token'],
image_type=2,
)
elements = [
platform_message.Image(base64=f'data:image/{image_format};base64,{base64_str}'),
platform_message.WeChatForwardImage(xml_data=image_xml),
]
return platform_message.MessageChain(elements)
except Exception as e:
print(f'处理图片失败: {str(e)}')
return platform_message.MessageChain([platform_message.Unknown('[图片处理失败]')])
async def _handler_voice(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
message_List = []
try:
audio_base64 = message['Data']['ImgBuf']['buffer']
if not audio_base64:
message_List.append(platform_message.Unknown(text='[语音内容为空]'))
return platform_message.MessageChain(message_List)
voice_element = platform_message.Voice(base64=f'data:audio/silk;base64,{audio_base64}')
message_List.append(voice_element)
except KeyError as e:
print(f'语音数据字段缺失: {str(e)}')
message_List.append(platform_message.Unknown(text='[语音数据解析失败]'))
except Exception as e:
print(f'处理语音消息异常: {str(e)}')
message_List.append(platform_message.Unknown(text='[语音处理失败]'))
return platform_message.MessageChain(message_List)
async def _handler_compound(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
try:
xml_data = ET.fromstring(content_no_preifx)
appmsg_data = xml_data.find('.//appmsg')
if appmsg_data:
data_type = appmsg_data.findtext('.//type', '')
sub_handler_map = {
'57': self._handler_compound_quote,
'5': self._handler_compound_link,
'6': self._handler_compound_file,
'33': self._handler_compound_mini_program,
'36': self._handler_compound_mini_program,
'2000': partial(self._handler_compound_unsupported, text='[转账消息]'),
'2001': partial(self._handler_compound_unsupported, text='[红包消息]'),
'51': partial(self._handler_compound_unsupported, text='[视频号消息]'),
}
handler = sub_handler_map.get(data_type, self._handler_compound_unsupported)
return await handler(
message=message,
xml_data=xml_data,
)
else:
return platform_message.MessageChain([platform_message.Unknown(text=content_no_preifx)])
except Exception as e:
print(f'解析复合消息失败: {str(e)}')
return platform_message.MessageChain([platform_message.Unknown(text=content_no_preifx)])
async def _handler_compound_quote(
self, message: Optional[dict], xml_data: ET.Element
) -> platform_message.MessageChain:
message_list = []
appmsg_data = xml_data.find('.//appmsg')
quote_data = ''
user_data = ''
sender_id = xml_data.findtext('.//fromusername')
if appmsg_data:
user_data = appmsg_data.findtext('.//title') or ''
quote_data = appmsg_data.find('.//refermsg').findtext('.//content')
message_list.append(
platform_message.WeChatForwardQuote(app_msg=ET.tostring(appmsg_data, encoding='unicode'))
)
if quote_data:
quote_data_message_list = platform_message.MessageChain()
try:
if '<msg>' not in quote_data:
quote_data_message_list.append(platform_message.Plain(quote_data))
else:
quote_data_xml = ET.fromstring(quote_data)
if quote_data_xml.find('img'):
quote_data_message_list.extend(await self._handler_image(None, quote_data))
elif quote_data_xml.find('voicemsg'):
quote_data_message_list.extend(await self._handler_voice(None, quote_data))
elif quote_data_xml.find('videomsg'):
quote_data_message_list.extend(await self._handler_default(None, quote_data))
else:
quote_data_message_list.extend(await self._handler_compound(None, quote_data))
except Exception as e:
print(f'处理引用消息异常 expcetion:{e}')
quote_data_message_list.append(platform_message.Plain(quote_data))
message_list.append(
platform_message.Quote(
sender_id=sender_id,
origin=quote_data_message_list,
)
)
if len(user_data) > 0:
pattern = r'@\S{1,20}'
user_data = re.sub(pattern, '', user_data)
message_list.append(platform_message.Plain(user_data))
return platform_message.MessageChain(message_list)
async def _handler_compound_file(self, message: dict, xml_data: ET.Element) -> platform_message.MessageChain:
xml_data_str = ET.tostring(xml_data, encoding='unicode')
return platform_message.MessageChain([platform_message.WeChatForwardFile(xml_data=xml_data_str)])
async def _handler_compound_link(self, message: dict, xml_data: ET.Element) -> platform_message.MessageChain:
message_list = []
try:
appmsg = xml_data.find('.//appmsg')
if appmsg is None:
return platform_message.MessageChain()
message_list.append(
platform_message.WeChatLink(
link_title=appmsg.findtext('title', ''),
link_desc=appmsg.findtext('des', ''),
link_url=appmsg.findtext('url', ''),
link_thumb_url=appmsg.findtext('thumburl', ''),
)
)
xml_data_str = ET.tostring(xml_data, encoding='unicode')
message_list.append(platform_message.WeChatForwardLink(xml_data=xml_data_str))
except Exception as e:
print(f'解析链接消息失败: {str(e)}')
return platform_message.MessageChain(message_list)
async def _handler_compound_mini_program(
self, message: dict, xml_data: ET.Element
) -> platform_message.MessageChain:
xml_data_str = ET.tostring(xml_data, encoding='unicode')
return platform_message.MessageChain([platform_message.WeChatForwardMiniPrograms(xml_data=xml_data_str)])
async def _handler_default(self, message: Optional[dict], content_no_preifx: str) -> platform_message.MessageChain:
if message:
msg_type = message['Data']['MsgType']
else:
msg_type = ''
return platform_message.MessageChain([platform_message.Unknown(text=f'[未知消息类型 msg_type:{msg_type}]')])
def _handler_compound_unsupported(
self, message: dict, xml_data: str, text: Optional[str] = None
) -> platform_message.MessageChain:
if not text:
text = f'[xml_data={xml_data}]'
content_list = []
content_list.append(platform_message.Unknown(text=f'[处理未支持复合消息类型[msg_type=49]|{text}'))
return platform_message.MessageChain(content_list)
def _ats_bot(self, message: dict, bot_account_id: str) -> bool:
ats_bot = False
try:
to_user_name = message['Wxid']
raw_content = message['Data']['Content']['string']
content_no_prefix, _ = self._extract_content_and_sender(raw_content)
push_content = message.get('Data', {}).get('PushContent', '')
ats_bot = ats_bot or ('在群聊中@了你' in push_content)
msg_source = message.get('Data', {}).get('MsgSource', '') or ''
if len(msg_source) > 0:
msg_source_data = ET.fromstring(msg_source)
at_user_list = msg_source_data.findtext('atuserlist') or ''
ats_bot = ats_bot or (to_user_name in at_user_list)
if message.get('Data', {}).get('MsgType', 0) == 49:
xml_data = ET.fromstring(content_no_prefix)
appmsg_data = xml_data.find('.//appmsg')
tousername = message['Wxid']
if appmsg_data:
quote_id = appmsg_data.find('.//refermsg').findtext('.//chatusr')
ats_bot = ats_bot or (quote_id == tousername)
except Exception as e:
print(f'Error in gewechat _ats_bot: {e}')
finally:
return ats_bot
def _extract_content_and_sender(self, raw_content: str) -> Tuple[str, Optional[str]]:
try:
regex = re.compile(r'^[a-zA-Z0-9_\-]{5,20}:')
line_split = raw_content.split('\n')
if len(line_split) > 0 and regex.match(line_split[0]):
raw_content = '\n'.join(line_split[1:])
sender_id = line_split[0].strip(':')
return raw_content, sender_id
except Exception as e:
print(f'_extract_content_and_sender got except: {e}')
finally:
return raw_content, None
def _is_group_message(self, message: dict) -> bool:
from_user_name = message['Data']['FromUserName']['string']
return from_user_name.endswith('@chatroom')
class GewechatEventConverter(abstract_platform_adapter.AbstractEventConverter):
def __init__(self, config: dict):
self.config = config
self.message_converter = GewechatMessageConverter(config)
@staticmethod
async def yiri2target(event: platform_events.MessageEvent) -> dict:
pass
async def target2yiri(self, event: dict, bot_account_id: str) -> platform_events.MessageEvent:
if event['Wxid'] == event['Data']['FromUserName']['string']:
return None
if event['Data']['FromUserName']['string'].startswith('gh_') or event['Data']['FromUserName'][
'string'
].startswith('weixin'):
return None
message_chain = await self.message_converter.target2yiri(copy.deepcopy(event), bot_account_id)
if not message_chain:
return None
if '@chatroom' in event['Data']['FromUserName']['string']:
sender_wxid = event['Data']['Content']['string'].split(':')[0]
return platform_events.GroupMessage(
sender=platform_entities.GroupMember(
id=sender_wxid,
member_name=event['Data']['FromUserName']['string'],
permission=platform_entities.Permission.Member,
group=platform_entities.Group(
id=event['Data']['FromUserName']['string'],
name=event['Data']['FromUserName']['string'],
permission=platform_entities.Permission.Member,
),
special_title='',
),
message_chain=message_chain,
time=event['Data']['CreateTime'],
source_platform_object=event,
)
else:
return platform_events.FriendMessage(
sender=platform_entities.Friend(
id=event['Data']['FromUserName']['string'],
nickname=event['Data']['FromUserName']['string'],
remark='',
),
message_chain=message_chain,
time=event['Data']['CreateTime'],
source_platform_object=event,
)
class GeWeChatAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
bot: gewechat_client.GewechatClient = None
bot_uuid: str = None
message_converter: GewechatMessageConverter = None
event_converter: GewechatEventConverter = None
listeners: typing.Dict[
typing.Type[platform_events.Event],
typing.Callable[[platform_events.Event, abstract_platform_adapter.AbstractMessagePlatformAdapter], None],
] = {}
def __init__(self, config: dict, logger: EventLogger):
super().__init__(
config=config,
logger=logger,
)
self.message_converter = GewechatMessageConverter(config)
self.event_converter = GewechatEventConverter(config)
def set_bot_uuid(self, bot_uuid: str):
self.bot_uuid = bot_uuid
async def handle_unified_webhook(self, bot_uuid: str, path: str, request):
data = await request.json
await self.logger.debug(f'Gewechat callback event: {data}')
if 'data' in data:
data['Data'] = data['data']
if 'type_name' in data:
data['TypeName'] = data['type_name']
if 'testMsg' in data:
return 'ok'
elif 'TypeName' in data and data['TypeName'] == 'AddMsg':
try:
event = await self.event_converter.target2yiri(data.copy(), self.bot_account_id)
except Exception:
await self.logger.error(f'Error in gewechat callback: {traceback.format_exc()}')
return 'ok'
if event and event.__class__ in self.listeners:
await self.listeners[event.__class__](event, self)
return 'ok'
return 'ok'
async def _handle_message(self, message: platform_message.MessageChain, target_id: str):
content_list = await self.message_converter.yiri2target(message)
at_targets = [item['target'] for item in content_list if item['type'] == 'at']
at_targets = at_targets or []
member_info = []
if at_targets:
member_info = self.bot.get_chatroom_member_detail(self.config['app_id'], target_id, at_targets[::-1])[
'data'
]
for msg in content_list:
if msg['type'] == 'text' and at_targets:
for member in member_info:
msg['content'] = f'@{member["nickName"]} {msg["content"]}'
handler_map = {
'text': lambda msg: self.bot.post_text(
app_id=self.config['app_id'],
to_wxid=target_id,
content=msg['content'],
ats=','.join(at_targets),
),
'image': lambda msg: self.bot.post_image(
app_id=self.config['app_id'],
to_wxid=target_id,
img_url=msg['image'],
),
'WeChatForwardMiniPrograms': lambda msg: self.bot.forward_mini_app(
app_id=self.config['app_id'],
to_wxid=target_id,
xml=msg['xml_data'],
cover_img_url=msg.get('image_url'),
),
'WeChatEmoji': lambda msg: self.bot.post_emoji(
app_id=self.config['app_id'],
to_wxid=target_id,
emoji_md5=msg['emoji_md5'],
emoji_size=msg['emoji_size'],
),
'WeChatLink': lambda msg: self.bot.post_link(
app_id=self.config['app_id'],
to_wxid=target_id,
title=msg['link_title'],
desc=msg['link_desc'],
link_url=msg['link_url'],
thumb_url=msg['link_thumb_url'],
),
'WeChatMiniPrograms': lambda msg: self.bot.post_mini_app(
app_id=self.config['app_id'],
to_wxid=target_id,
mini_app_id=msg['mini_app_id'],
display_name=msg['display_name'],
page_path=msg['page_path'],
cover_img_url=msg['cover_img_url'],
title=msg['title'],
user_name=msg['user_name'],
),
'WeChatForwardLink': lambda msg: self.bot.forward_url(
app_id=self.config['app_id'], to_wxid=target_id, xml=msg['xml_data']
),
'WeChatForwardImage': lambda msg: self.bot.forward_image(
app_id=self.config['app_id'], to_wxid=target_id, xml=msg['xml_data']
),
'WeChatForwardFile': lambda msg: self.bot.forward_file(
app_id=self.config['app_id'], to_wxid=target_id, xml=msg['xml_data']
),
'voice': lambda msg: self.bot.post_voice(
app_id=self.config['app_id'],
to_wxid=target_id,
voice_url=msg['url'],
voice_duration=msg['length'],
),
'WeChatAppMsg': lambda msg: self.bot.post_app_msg(
app_id=self.config['app_id'],
to_wxid=target_id,
appmsg=msg['app_msg'],
),
'at': lambda msg: None,
}
if handler := handler_map.get(msg['type']):
handler(msg)
else:
await self.logger.warning(f'未处理的消息类型: {msg["type"]}')
continue
async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain):
return await self._handle_message(message, target_id)
async def reply_message(
self,
message_source: platform_events.MessageEvent,
message: platform_message.MessageChain,
quote_origin: bool = False,
):
if message_source.source_platform_object:
target_id = message_source.source_platform_object['Data']['FromUserName']['string']
return await self._handle_message(message, target_id)
async def is_muted(self, group_id: int) -> bool:
pass
def register_listener(
self,
event_type: typing.Type[platform_events.Event],
callback: typing.Callable[
[platform_events.Event, abstract_platform_adapter.AbstractMessagePlatformAdapter], None
],
):
self.listeners[event_type] = callback
def unregister_listener(
self,
event_type: typing.Type[platform_events.Event],
callback: typing.Callable[
[platform_events.Event, abstract_platform_adapter.AbstractMessagePlatformAdapter], None
],
):
pass
def _build_callback_url(self) -> str:
webhook_prefix = self.config.get('_webhook_prefix', 'http://127.0.0.1:5300').rstrip('/')
return f'{webhook_prefix}/bots/{self.bot_uuid}'
async def run_async(self):
if not self.config['token']:
session = httpclient.get_session()
async with session.post(
f'{self.config["gewechat_url"]}/v2/api/tools/getTokenId',
json={'app_id': self.config['app_id']},
) as response:
if response.status != 200:
raise Exception(f'获取gewechat token失败: {await response.text()}')
self.config['token'] = (await response.json())['data']
self.bot = gewechat_client.GewechatClient(f'{self.config["gewechat_url"]}/v2/api', self.config['token'])
def gewechat_init_process():
profile = self.bot.get_profile(self.config['app_id'])
self.bot_account_id = profile['data']['nickName']
try:
callback_url = self._build_callback_url()
self.bot.set_callback(self.config['token'], callback_url)
print(f'Gewechat 回调地址已设置: {callback_url}')
except Exception as e:
raise Exception(f'设置 Gewechat 回调失败token失效{e}')
threading.Thread(target=gewechat_init_process).start()
# 统一 webhook 模式下,不启动独立的 HTTP 服务
# 保持适配器运行
while True:
await asyncio.sleep(1)
async def kill(self) -> bool:
return False

View File

@@ -0,0 +1,51 @@
apiVersion: v1
kind: MessagePlatformAdapter
metadata:
name: gewechat
label:
en_US: GeWeChat
zh_Hans: GeWeChat个人微信
description:
en_US: GeWeChat Adapter (Unified Webhook)
zh_Hans: GeWeChat 适配器(统一 Webhook请查看文档了解使用方式
icon: gewechat.png
spec:
config:
- name: gewechat_url
label:
en_US: GeWeChat URL
zh_Hans: GeWeChat URL
description:
en_US: GeWeChat API server address, e.g. http://127.0.0.1:2531
zh_Hans: GeWeChat API 服务器地址,如 http://127.0.0.1:2531
type: string
required: true
default: ""
- name: gewechat_file_url
label:
en_US: GeWeChat file download URL
zh_Hans: GeWeChat 文件下载URL
description:
en_US: GeWeChat file download service address
zh_Hans: GeWeChat 文件下载服务地址
type: string
required: true
default: ""
- name: app_id
label:
en_US: App ID
zh_Hans: 应用ID
type: string
required: true
default: ""
- name: token
label:
en_US: Token
zh_Hans: 令牌
type: string
required: true
default: ""
execution:
python:
path: ./gewechat.py
attr: GeWeChatAdapter

View File

@@ -42,6 +42,25 @@ class TelegramMessageConverter(abstract_platform_adapter.AbstractMessageConverte
photo_bytes = f.read()
components.append({'type': 'photo', 'photo': photo_bytes})
elif isinstance(component, platform_message.File):
file_bytes = None
if component.base64:
# Strip data URI prefix if present (e.g. "data:application/pdf;base64,...")
b64_data = component.base64
if ';base64,' in b64_data:
b64_data = b64_data.split(';base64,', 1)[1]
file_bytes = base64.b64decode(b64_data)
elif component.url:
session = httpclient.get_session()
async with session.get(component.url) as response:
file_bytes = await response.read()
elif component.path:
with open(component.path, 'rb') as f:
file_bytes = f.read()
file_name = getattr(component, 'name', None) or 'file'
components.append({'type': 'document', 'document': file_bytes, 'filename': file_name})
elif isinstance(component, platform_message.Forward):
for node in component.node_list:
components.extend(await TelegramMessageConverter.yiri2target(node.message_chain, bot))
@@ -104,6 +123,27 @@ class TelegramMessageConverter(abstract_platform_adapter.AbstractMessageConverte
)
)
if message.document:
if message.caption:
message_components.extend(parse_message_text(message.caption))
file = await message.document.get_file()
file_name = message.document.file_name or 'document'
file_size = message.document.file_size or 0
file_format = message.document.mime_type or 'application/octet-stream'
file_bytes = None
async with httpclient.get_session(trust_env=True).get(file.file_path) as response:
file_bytes = await response.read()
message_components.append(
platform_message.File(
name=file_name,
size=file_size,
base64=f'data:{file_format};base64,{base64.b64encode(file_bytes).decode("utf-8")}',
)
)
return platform_message.MessageChain(message_components)
@@ -179,7 +219,10 @@ class TelegramAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
application = ApplicationBuilder().token(config['token']).build()
bot = application.bot
application.add_handler(
MessageHandler(filters.TEXT | (filters.COMMAND) | filters.PHOTO | filters.VOICE, telegram_callback)
MessageHandler(
filters.TEXT | (filters.COMMAND) | filters.PHOTO | filters.VOICE | filters.Document.ALL,
telegram_callback,
)
)
super().__init__(
config=config,
@@ -218,6 +261,13 @@ class TelegramAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
continue
args['photo'] = telegram.InputFile(photo)
await self.bot.send_photo(**args)
elif component_type == 'document':
doc = component.get('document')
if doc is None:
continue
filename = component.get('filename', 'file')
args['document'] = telegram.InputFile(doc, filename=filename)
await self.bot.send_document(**args)
async def reply_message(
self,

View File

@@ -24,14 +24,18 @@ class WecomBotMessageConverter(abstract_platform_adapter.AbstractMessageConverte
return content
@staticmethod
async def target2yiri(event: WecomBotEvent):
async def target2yiri(event: WecomBotEvent, bot_name: str = ''):
yiri_msg_list = []
if event.type == 'group':
yiri_msg_list.append(platform_message.At(target=event.ai_bot_id))
yiri_msg_list.append(platform_message.Source(id=event.message_id, time=datetime.datetime.now()))
if event.content:
yiri_msg_list.append(platform_message.Plain(text=event.content))
content = event.content
if bot_name:
content = content.replace(f'@{bot_name}', '').strip()
yiri_msg_list.append(platform_message.Plain(text=content))
images = []
if event.images:
@@ -134,13 +138,15 @@ class WecomBotMessageConverter(abstract_platform_adapter.AbstractMessageConverte
class WecomBotEventConverter(abstract_platform_adapter.AbstractEventConverter):
def __init__(self, bot_name: str = ''):
self.bot_name = bot_name
@staticmethod
async def yiri2target(event: platform_events.MessageEvent):
return event.source_platform_object
@staticmethod
async def target2yiri(event: WecomBotEvent):
message_chain = await WecomBotMessageConverter.target2yiri(event)
async def target2yiri(self, event: WecomBotEvent):
message_chain = await WecomBotMessageConverter.target2yiri(event, bot_name=self.bot_name)
if event.type == 'single':
return platform_events.FriendMessage(
sender=platform_entities.Friend(
@@ -180,13 +186,16 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
bot: typing.Union[WecomBotClient, WecomBotWsClient]
bot_account_id: str
message_converter: WecomBotMessageConverter = WecomBotMessageConverter()
event_converter: WecomBotEventConverter = WecomBotEventConverter()
event_converter: WecomBotEventConverter
config: dict
bot_uuid: str = None
_ws_mode: bool = False
bot_name: str = ''
listeners: dict = {}
def __init__(self, config: dict, logger: EventLogger):
enable_webhook = config.get('enable-webhook', False)
bot_name = config.get('robot_name', '')
if not enable_webhook:
bot = WecomBotWsClient(
@@ -195,7 +204,6 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
logger=logger,
encoding_aes_key=config.get('EncodingAESKey', ''),
)
ws_mode = True
else:
# Webhook callback mode
required_keys = ['Token', 'EncodingAESKey', 'Corpid']
@@ -210,17 +218,18 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
logger=logger,
unified_mode=True,
)
ws_mode = False
bot_account_id = config.get('BotId', '')
event_converter = WecomBotEventConverter(bot_name=bot_name)
super().__init__(
config=config,
logger=logger,
bot=bot,
bot_account_id=bot_account_id,
bot_name=bot_name,
event_converter=event_converter,
)
self._ws_mode = ws_mode
self.listeners = {}
async def reply_message(
self,
@@ -229,7 +238,9 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
quote_origin: bool = False,
):
content = await self.message_converter.yiri2target(message)
if self._ws_mode:
_ws_mode = not self.config.get('enable-webhook', False)
if _ws_mode:
event = message_source.source_platform_object
req_id = event.get('req_id', '')
if req_id:
@@ -249,8 +260,9 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
):
content = await self.message_converter.yiri2target(message)
msg_id = message_source.source_platform_object.message_id
_ws_mode = not self.config.get('enable-webhook', False)
if self._ws_mode:
if _ws_mode:
success = await self.bot.push_stream_chunk(msg_id, content, is_final=is_final)
if not success and is_final:
event = message_source.source_platform_object
@@ -275,12 +287,22 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
return True
async def send_message(self, target_type, target_id, message):
if self._ws_mode:
_ws_mode = not self.config.get('enable-webhook', False)
if _ws_mode:
content = await self.message_converter.yiri2target(message)
await self.bot.send_message(target_id, content)
else:
pass
async def on_message(self, event: WecomBotEvent):
try:
lb_event = await self.event_converter.target2yiri(event)
if lb_event:
await self.listeners[type(lb_event)](lb_event, self)
except Exception:
await self.logger.error(f'Error in wecombot callback: {traceback.format_exc()}')
print(traceback.format_exc())
def register_listener(
self,
event_type: typing.Type[platform_events.Event],
@@ -288,18 +310,13 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
[platform_events.Event, abstract_platform_adapter.AbstractMessagePlatformAdapter], None
],
):
async def on_message(event: WecomBotEvent):
try:
return await callback(await self.event_converter.target2yiri(event), self)
except Exception:
await self.logger.error(f'Error in wecombot callback: {traceback.format_exc()}')
print(traceback.format_exc())
self.listeners[event_type] = callback
try:
if event_type == platform_events.FriendMessage:
self.bot.on_message('single')(on_message)
self.bot.on_message('single')(self.on_message)
elif event_type == platform_events.GroupMessage:
self.bot.on_message('group')(on_message)
self.bot.on_message('group')(self.on_message)
except Exception:
print(traceback.format_exc())
@@ -308,12 +325,14 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
self.bot_uuid = bot_uuid
async def handle_unified_webhook(self, bot_uuid: str, path: str, request):
if self._ws_mode:
_ws_mode = not self.config.get('enable-webhook', False)
if _ws_mode:
return None
return await self.bot.handle_unified_webhook(request)
async def run_async(self):
if self._ws_mode:
_ws_mode = not self.config.get('enable-webhook', False)
if _ws_mode:
await self.bot.connect()
else:
@@ -324,7 +343,8 @@ class WecomBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
await keep_alive()
async def kill(self) -> bool:
if self._ws_mode:
_ws_mode = not self.config.get('enable-webhook', False)
if _ws_mode:
await self.bot.disconnect()
return True
return False

View File

@@ -18,6 +18,13 @@ spec:
type: string
required: true
default: ""
- name: robot_name
label:
en_US: Robot Name
zh_Hans: 机器人名称
type: string
required: true
default: ""
- name: enable-webhook
label:
en_US: Enable Webhook Mode

View File

@@ -555,6 +555,18 @@ class RuntimeConnectionHandler(handler.Handler):
except Exception as e:
return _make_rag_error_response(e, 'VectorStoreError', collection_id=collection_id)
@self.action(PluginToRuntimeAction.VECTOR_LIST)
async def vector_list(data: dict[str, Any]) -> handler.ActionResponse:
collection_id = data['collection_id']
filters = data.get('filters')
limit = data.get('limit', 20)
offset = data.get('offset', 0)
try:
items, total = await self.ap.rag_runtime_service.vector_list(collection_id, filters, limit, offset)
return handler.ActionResponse.success(data={'items': items, 'total': total})
except Exception as e:
return _make_rag_error_response(e, 'VectorStoreError', collection_id=collection_id)
@self.action(PluginToRuntimeAction.GET_KNOWLEDEGE_FILE_STREAM)
async def get_knowledge_file_stream(data: dict[str, Any]) -> handler.ActionResponse:
storage_path = data['storage_path']

View File

@@ -288,10 +288,10 @@ class AnthropicMessages(requester.ProviderAPIRequester):
think_started = False
think_ended = False
finish_reason = False
content = ''
tool_name = ''
tool_id = ''
async for chunk in await self.client.messages.create(**args):
content = ''
tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
if isinstance(
chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent

View File

@@ -75,6 +75,31 @@ class RAGRuntimeService:
count = await self.ap.vector_db_mgr.delete_by_filter(collection_name=collection_id, filter=filters)
return count
async def vector_list(
self,
collection_id: str,
filters: dict[str, Any] | None = None,
limit: int = 20,
offset: int = 0,
) -> tuple[list[dict[str, Any]], int]:
"""Handle VECTOR_LIST action.
Args:
collection_id: The collection to list from.
filters: Optional metadata filters.
limit: Maximum number of items to return.
offset: Number of items to skip.
Returns:
Tuple of (items, total).
"""
return await self.ap.vector_db_mgr.list_by_filter(
collection_name=collection_id,
filter=filters,
limit=limit,
offset=offset,
)
async def get_file_stream(self, storage_path: str) -> bytes:
"""Handle GET_KNOWLEDEGE_FILE_STREAM action.

View File

@@ -49,17 +49,25 @@ def normalize_filter(
def strip_unsupported_fields(
triples: list[tuple[str, str, Any]],
supported_fields: set[str],
field_aliases: dict[str, str] | None = None,
) -> list[tuple[str, str, Any]]:
"""Return only triples whose field is in *supported_fields*.
If *field_aliases* is provided, aliased field names are mapped to the
canonical backend name before the support check. For example,
``{'uuid': 'chunk_uuid'}`` allows callers to use ``uuid`` which is
transparently rewritten to ``chunk_uuid``.
Dropped fields are logged at WARNING level so the caller knows they were
silently ignored (useful for Milvus / pgvector which only store a fixed
schema).
"""
aliases = field_aliases or {}
kept: list[tuple[str, str, Any]] = []
for field, op, value in triples:
if field in supported_fields:
kept.append((field, op, value))
resolved = aliases.get(field, field)
if resolved in supported_fields:
kept.append((resolved, op, value))
else:
logger.warning(
'Filter field %r is not supported by this backend and will be ignored (supported: %s)',

View File

@@ -157,3 +157,17 @@ class VectorDBManager:
Number of deleted vectors (best-effort; some backends return 0).
"""
return await self.vector_db.delete_by_filter(collection_name, filter)
async def list_by_filter(
self,
collection_name: str,
filter: dict | None = None,
limit: int = 20,
offset: int = 0,
) -> tuple[list[dict], int]:
"""Proxy: List vectors by metadata filter with pagination.
Returns:
Tuple of (items, total).
"""
return await self.vector_db.list_by_filter(collection_name, filter, limit, offset)

View File

@@ -92,6 +92,28 @@ class VectorDatabase(abc.ABC):
"""
pass
async def list_by_filter(
self,
collection: str,
filter: dict[str, Any] | None = None,
limit: int = 20,
offset: int = 0,
) -> tuple[list[dict[str, Any]], int]:
"""List vectors matching the given metadata filter with pagination.
Args:
collection: Collection name.
filter: Optional metadata filter dict in canonical format.
limit: Maximum number of items to return.
offset: Number of items to skip.
Returns:
Tuple of (items, total) where items is a list of dicts with
keys 'id', 'document', 'metadata', and total is the best-effort
count of all matching vectors (-1 if unknown).
"""
return [], -1
@abc.abstractmethod
async def get_or_create_collection(self, collection: str):
"""Get or create collection."""

View File

@@ -221,6 +221,41 @@ class ChromaVectorDatabase(VectorDatabase):
self.ap.logger.info(f"Deleted embeddings from Chroma collection '{collection}' by filter")
return 0 # Chroma delete does not return a count
async def list_by_filter(
self,
collection: str,
filter: dict[str, Any] | None = None,
limit: int = 20,
offset: int = 0,
) -> tuple[list[dict[str, Any]], int]:
col = await self.get_or_create_collection(collection)
get_kwargs: dict[str, Any] = dict(
include=['metadatas', 'documents'],
limit=limit,
offset=offset,
)
if filter:
get_kwargs['where'] = filter
results = await asyncio.to_thread(col.get, **get_kwargs)
ids = results.get('ids', [])
metadatas = results.get('metadatas', []) or [None] * len(ids)
documents = results.get('documents', []) or [None] * len(ids)
items = []
for i, vid in enumerate(ids):
items.append(
{
'id': vid,
'document': documents[i] if i < len(documents) else None,
'metadata': metadatas[i] if i < len(metadatas) else {},
}
)
# Chroma col.count() gives total in collection; filtered count not available
total = await asyncio.to_thread(col.count) if not filter else -1
return items, total
async def delete_collection(self, collection: str):
if collection in self._collections:
del self._collections[collection]

View File

@@ -11,11 +11,14 @@ from langbot.pkg.core import app
# silently dropped with a warning.
_MILVUS_SUPPORTED_FIELDS = {'text', 'file_id', 'chunk_uuid'}
# Callers use canonical metadata key 'uuid' but Milvus stores it as 'chunk_uuid'.
_MILVUS_FIELD_ALIASES = {'uuid': 'chunk_uuid'}
def _build_milvus_expr(filter_dict: dict[str, Any]) -> str:
"""Translate canonical filter dict into a Milvus boolean expression string."""
triples = normalize_filter(filter_dict)
triples = strip_unsupported_fields(triples, _MILVUS_SUPPORTED_FIELDS)
triples = strip_unsupported_fields(triples, _MILVUS_SUPPORTED_FIELDS, _MILVUS_FIELD_ALIASES)
if not triples:
return ''
@@ -340,6 +343,62 @@ class MilvusVectorDatabase(VectorDatabase):
self.ap.logger.info(f"Deleted embeddings from Milvus collection '{collection}' by filter")
return 0 # Milvus delete does not return a count
async def list_by_filter(
self,
collection: str,
filter: dict[str, Any] | None = None,
limit: int = 20,
offset: int = 0,
) -> tuple[list[dict[str, Any]], int]:
collection = self._normalize_collection_name(collection)
await self.get_or_create_collection(collection)
query_kwargs: dict[str, Any] = dict(
collection_name=collection,
output_fields=['text', 'file_id', 'chunk_uuid'],
limit=limit,
offset=offset,
)
if filter:
expr = _build_milvus_expr(filter)
if expr:
query_kwargs['filter'] = expr
results = await asyncio.to_thread(self.client.query, **query_kwargs)
items = []
for row in results:
items.append(
{
'id': row.get('id', ''),
'document': row.get('text'),
'metadata': {
'text': row.get('text', ''),
'file_id': row.get('file_id', ''),
'uuid': row.get('chunk_uuid', ''),
},
}
)
# Milvus query with count(*)
total = -1
try:
count_kwargs: dict[str, Any] = dict(
collection_name=collection,
output_fields=['count(*)'],
)
if filter:
expr = _build_milvus_expr(filter)
if expr:
count_kwargs['filter'] = expr
count_result = await asyncio.to_thread(self.client.query, **count_kwargs)
if count_result:
total = count_result[0].get('count(*)', -1)
except Exception:
pass
return items, total
async def delete_collection(self, collection: str):
"""Delete a Milvus collection

View File

@@ -13,6 +13,9 @@ Base = declarative_base()
# pgvector schema only stores these metadata fields.
_PG_SUPPORTED_FIELDS = {'text', 'file_id', 'chunk_uuid'}
# Callers use canonical metadata key 'uuid' but pgvector stores it as 'chunk_uuid'.
_PG_FIELD_ALIASES = {'uuid': 'chunk_uuid'}
# Map schema field names to SQLAlchemy columns (resolved lazily from PgVectorEntry).
_PG_COLUMN_MAP = {
'text': 'text',
@@ -37,7 +40,7 @@ class PgVectorEntry(Base):
def _build_pg_conditions(filter_dict: dict[str, Any]) -> list:
"""Translate canonical filter dict into a list of SQLAlchemy conditions."""
triples = normalize_filter(filter_dict)
triples = strip_unsupported_fields(triples, _PG_SUPPORTED_FIELDS)
triples = strip_unsupported_fields(triples, _PG_SUPPORTED_FIELDS, _PG_FIELD_ALIASES)
conditions = []
for field, op, value in triples:
@@ -309,6 +312,65 @@ class PgVectorDatabase(VectorDatabase):
self.ap.logger.error(f'Error deleting from pgvector by filter: {e}')
raise
async def list_by_filter(
self,
collection: str,
filter: dict[str, Any] | None = None,
limit: int = 20,
offset: int = 0,
) -> tuple[list[dict[str, Any]], int]:
await self.get_or_create_collection(collection)
async with self.AsyncSessionLocal() as session:
try:
from sqlalchemy import select, func
stmt = (
select(
PgVectorEntry.id,
PgVectorEntry.text,
PgVectorEntry.file_id,
PgVectorEntry.chunk_uuid,
)
.filter(PgVectorEntry.collection == collection)
.offset(offset)
.limit(limit)
)
count_stmt = (
select(func.count()).select_from(PgVectorEntry).filter(PgVectorEntry.collection == collection)
)
if filter:
for cond in _build_pg_conditions(filter):
stmt = stmt.filter(cond)
count_stmt = count_stmt.filter(cond)
result = await session.execute(stmt)
rows = result.fetchall()
count_result = await session.execute(count_stmt)
total = count_result.scalar() or 0
items = []
for row in rows:
items.append(
{
'id': row.id,
'document': row.text or '',
'metadata': {
'text': row.text or '',
'file_id': row.file_id or '',
'uuid': row.chunk_uuid or '',
},
}
)
return items, total
except Exception as e:
self.ap.logger.error(f'Error listing from pgvector: {e}')
raise
async def delete_collection(self, collection: str):
"""Delete all vectors in a collection

View File

@@ -150,6 +150,97 @@ class QdrantVectorDatabase(VectorDatabase):
self.ap.logger.info(f"Deleted embeddings from Qdrant collection '{collection}' by filter")
return 0 # Qdrant delete does not return a count
async def list_by_filter(
self,
collection: str,
filter: dict[str, Any] | None = None,
limit: int = 20,
offset: int = 0,
) -> tuple[list[dict[str, Any]], int]:
exists = await self.client.collection_exists(collection)
if not exists:
return [], 0
qdrant_filter = _build_qdrant_filter(filter) if filter else None
# Qdrant scroll uses cursor-based pagination (offset = point ID),
# not numeric skip. To support numeric offset we scroll through
# `offset + limit` items and discard the first `offset`.
remaining_to_skip = offset
remaining_to_collect = limit
cursor: int | str | None = None
collected: list[dict[str, Any]] = []
while remaining_to_skip > 0 or remaining_to_collect > 0:
batch_size = remaining_to_skip + remaining_to_collect if remaining_to_skip > 0 else remaining_to_collect
scroll_kwargs: dict[str, Any] = dict(
collection_name=collection,
limit=min(batch_size, 256),
with_payload=True if remaining_to_skip == 0 else False,
with_vectors=False,
)
if qdrant_filter:
scroll_kwargs['scroll_filter'] = qdrant_filter
if cursor is not None:
scroll_kwargs['offset'] = cursor
points, next_cursor = await self.client.scroll(**scroll_kwargs)
if not points:
break
for point in points:
if remaining_to_skip > 0:
remaining_to_skip -= 1
continue
if remaining_to_collect <= 0:
break
# Re-fetch payload if we skipped it during the skip phase
payload = point.payload or {}
collected.append(
{
'id': str(point.id),
'document': payload.get('text') or payload.get('document'),
'metadata': payload,
}
)
remaining_to_collect -= 1
if next_cursor is None:
break
cursor = next_cursor
# If we skipped without payload, re-fetch the collected items' payloads
# (only needed when offset > 0 and items were collected in a skip batch)
if offset > 0 and collected:
refetch_ids = [item['id'] for item in collected if not item.get('metadata')]
if refetch_ids:
fetched_points = await self.client.retrieve(
collection_name=collection,
ids=refetch_ids,
with_payload=True,
with_vectors=False,
)
payload_map = {str(p.id): p.payload or {} for p in fetched_points}
for item in collected:
if item['id'] in payload_map:
payload = payload_map[item['id']]
item['metadata'] = payload
item['document'] = payload.get('text') or payload.get('document')
# Use count() for accurate total (supports filter)
total = -1
try:
count_result = await self.client.count(
collection_name=collection,
count_filter=qdrant_filter,
exact=True,
)
total = count_result.count
except Exception:
pass
return collected, total
async def delete_collection(self, collection: str):
try:
await self.client.delete_collection(collection)

View File

@@ -340,6 +340,50 @@ class SeekDBVectorDatabase(VectorDatabase):
self.ap.logger.info(f"Deleted embeddings from SeekDB collection '{collection}' by filter")
return 0 # SeekDB delete does not return a count
async def list_by_filter(
self,
collection: str,
filter: Dict[str, Any] | None = None,
limit: int = 20,
offset: int = 0,
) -> tuple[list[Dict[str, Any]], int]:
exists = await asyncio.to_thread(self.client.has_collection, collection)
if not exists:
return [], 0
if collection not in self._collections:
coll = await asyncio.to_thread(self.client.get_collection, collection, embedding_function=None)
self._collections[collection] = coll
else:
coll = self._collections[collection]
get_kwargs: Dict[str, Any] = dict(
include=['metadatas', 'documents'],
limit=limit,
offset=offset,
)
if filter:
get_kwargs['where'] = filter
results = await asyncio.to_thread(coll.get, **get_kwargs)
ids = results.get('ids', [])
metadatas = results.get('metadatas', []) or [None] * len(ids)
documents = results.get('documents', []) or [None] * len(ids)
items = []
for i, vid in enumerate(ids):
items.append(
{
'id': vid,
'document': documents[i] if i < len(documents) else None,
'metadata': metadatas[i] if i < len(metadatas) else {},
}
)
total = await asyncio.to_thread(coll.count) if not filter else -1
return items, total
async def delete_collection(self, collection: str):
"""Delete the entire collection.

View File

@@ -2,77 +2,5 @@
"说明": "mask将替换敏感词中的每一个字若mask_word值不为空则将敏感词整个替换为mask_word的值",
"mask": "*",
"mask_word": "",
"words": [
"习近平",
"胡锦涛",
"江泽民",
"温家宝",
"李克强",
"李长春",
"毛泽东",
"邓小平",
"周恩来",
"马克思",
"社会主义",
"共产党",
"共产主义",
"大陆官方",
"北京政权",
"中华帝国",
"中国政府",
"共狗",
"六四事件",
"天安门",
"六四",
"政治局常委",
"两会",
"共青团",
"学潮",
"八九",
"二十大",
"民进党",
"台独",
"台湾独立",
"台湾国",
"国民党",
"台湾民国",
"中华民国",
"pornhub",
"Pornhub",
"[Yy]ou[Pp]orn",
"porn",
"Porn",
"[Xx][Vv]ideos",
"[Rr]ed[Tt]ube",
"[Xx][Hh]amster",
"[Ss]pank[Ww]ire",
"[Ss]pank[Bb]ang",
"[Tt]ube8",
"[Yy]ou[Jj]izz",
"[Bb]razzers",
"[Nn]aughty[ ]?[Aa]merica",
"作爱",
"做爱",
"性交",
"性爱",
"自慰",
"阴茎",
"淫妇",
"肛交",
"交配",
"性关系",
"性活动",
"色情",
"色图",
"涩图",
"裸体",
"小穴",
"淫荡",
"性爱",
"翻墙",
"VPN",
"科学上网",
"挂梯子",
"GFW"
]
"words": []
}

8
uv.lock generated
View File

@@ -1937,7 +1937,7 @@ requires-dist = [
{ name = "ebooklib", specifier = ">=0.18" },
{ name = "gewechat-client", specifier = ">=0.1.5" },
{ name = "html2text", specifier = ">=2024.2.26" },
{ name = "langbot-plugin", specifier = "==0.3.2" },
{ name = "langbot-plugin", specifier = "==0.3.3" },
{ name = "langchain", specifier = ">=0.2.0" },
{ name = "langchain-text-splitters", specifier = ">=0.0.1" },
{ name = "lark-oapi", specifier = ">=1.4.15" },
@@ -1993,7 +1993,7 @@ dev = [
[[package]]
name = "langbot-plugin"
version = "0.3.2"
version = "0.3.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "aiofiles" },
@@ -2011,9 +2011,9 @@ dependencies = [
{ name = "watchdog" },
{ name = "websockets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/05/32/1b029961d718b5418d9d139687287193d4657914c467833176d9c060fb4c/langbot_plugin-0.3.2.tar.gz", hash = "sha256:2f7f16285600ec019a4e8cc8b40bc8f8d404e3bbc69c9d129620dc70b3bde2f8", size = 170431, upload-time = "2026-03-14T12:42:55.175Z" }
sdist = { url = "https://files.pythonhosted.org/packages/08/be/78a0375aec6aad34bc2f00e2b4d2511ec597e8143cf8596d0736e8767904/langbot_plugin-0.3.3.tar.gz", hash = "sha256:5b07609b9b08f8b24fefcf29b97b9882838c140143de6d4bac2f320511ef4374", size = 170709, upload-time = "2026-03-19T12:40:23.877Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/91/d7/a5dd6cff000a4f7fe88692689be276875b04485f48fc26e162f1ad2a341c/langbot_plugin-0.3.2-py3-none-any.whl", hash = "sha256:a1de527c4d651e6b6ba2458b6fac09a300e6b1ffdc5a25bbfad88d970cfd6cfd", size = 144906, upload-time = "2026-03-14T12:42:56.486Z" },
{ url = "https://files.pythonhosted.org/packages/92/53/c708f3ef9459420974f1b131cffd43cfb2f97220350c26a6b5fbadbd6211/langbot_plugin-0.3.3-py3-none-any.whl", hash = "sha256:cca94a2ed07c1d3ec4be33f97268746908ed304b528d0eb7f23308677fe619ca", size = 145188, upload-time = "2026-03-19T12:40:25.094Z" },
]
[[package]]