diff --git a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py
new file mode 100644
index 00000000..40a41718
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """302.AI ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.302.ai/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
index 4f3acf31..e4f70cae 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
@@ -22,9 +22,12 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
- rerank
provider_category: maas
+execution:
+ python:
+ path: ./302aichatcmpl.py
+ attr: AI302ChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
new file mode 100644
index 00000000..1428dc88
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
@@ -0,0 +1,370 @@
+from __future__ import annotations
+
+import typing
+import json
+import platform
+import socket
+import anthropic
+import httpx
+
+from .. import errors, requester
+
+from ....utils import image
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class AnthropicMessages(requester.ProviderAPIRequester):
+ """Anthropic Messages API 请求器"""
+
+ client: anthropic.AsyncAnthropic
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.anthropic.com',
+ 'timeout': 120,
+ }
+
+ async def initialize(self):
+ # 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题
+ if platform.system() == 'Windows':
+ if not hasattr(socket, 'TCP_KEEPINTVL'):
+ socket.TCP_KEEPINTVL = 0
+ if not hasattr(socket, 'TCP_KEEPCNT'):
+ socket.TCP_KEEPCNT = 0
+ httpx_client = anthropic._base_client.AsyncHttpxClientWrapper(
+ base_url=self.requester_cfg['base_url'],
+ # cast to a valid type because mypy doesn't understand our type narrowing
+ timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']),
+ limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS,
+ follow_redirects=True,
+ trust_env=True,
+ )
+
+ self.client = anthropic.AsyncAnthropic(
+ api_key='',
+ http_client=httpx_client,
+ base_url=self.requester_cfg['base_url'],
+ )
+
+ async def invoke_llm(
+ self,
+ query: pipeline_query.Query,
+ model: requester.RuntimeLLMModel,
+ messages: typing.List[provider_message.Message],
+ funcs: typing.List[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.Message:
+ self.client.api_key = model.provider.token_mgr.get_token()
+
+ args = extra_args.copy()
+ args['model'] = model.model_entity.name
+
+ # 处理消息
+
+ # system
+ system_role_message = None
+
+ for i, m in enumerate(messages):
+ if m.role == 'system':
+ system_role_message = m
+
+ break
+
+ if system_role_message:
+ messages.pop(i)
+
+ if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
+ args['system'] = system_role_message.content
+
+ req_messages = []
+
+ for m in messages:
+ if m.role == 'tool':
+ tool_call_id = m.tool_call_id
+
+ req_messages.append(
+ {
+ 'role': 'user',
+ 'content': [
+ {
+ 'type': 'tool_result',
+ 'tool_use_id': tool_call_id,
+ 'is_error': False,
+ 'content': [{'type': 'text', 'text': m.content}],
+ }
+ ],
+ }
+ )
+
+ continue
+
+ msg_dict = m.dict(exclude_none=True)
+
+ if isinstance(m.content, str) and m.content.strip() != '':
+ msg_dict['content'] = [{'type': 'text', 'text': m.content}]
+ elif isinstance(m.content, list):
+ for i, ce in enumerate(m.content):
+ if ce.type == 'image_base64':
+ image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
+
+ alter_image_ele = {
+ 'type': 'image',
+ 'source': {
+ 'type': 'base64',
+ 'media_type': f'image/{image_format}',
+ 'data': image_b64,
+ },
+ }
+ msg_dict['content'][i] = alter_image_ele
+
+ if m.tool_calls:
+ for tool_call in m.tool_calls:
+ msg_dict['content'].append(
+ {
+ 'type': 'tool_use',
+ 'id': tool_call.id,
+ 'name': tool_call.function.name,
+ 'input': json.loads(tool_call.function.arguments),
+ }
+ )
+
+ del msg_dict['tool_calls']
+
+ req_messages.append(msg_dict)
+
+ args['messages'] = req_messages
+
+ if 'thinking' in args:
+ args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
+
+ if funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ try:
+ resp = await self.client.messages.create(**args)
+
+ args = {
+ 'content': '',
+ 'role': resp.role,
+ }
+ assert type(resp) is anthropic.types.message.Message
+
+ for block in resp.content:
+ if not remove_think and block.type == 'thinking':
+ args['content'] = '\n' + block.thinking + '\n\n' + args['content']
+ elif block.type == 'text':
+ args['content'] += block.text
+ elif block.type == 'tool_use':
+ assert type(block) is anthropic.types.tool_use_block.ToolUseBlock
+ tool_call = provider_message.ToolCall(
+ id=block.id,
+ type='function',
+ function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)),
+ )
+ if 'tool_calls' not in args:
+ args['tool_calls'] = []
+ args['tool_calls'].append(tool_call)
+
+ return provider_message.Message(**args)
+ except anthropic.AuthenticationError as e:
+ raise errors.RequesterError(f'api-key 无效: {e.message}')
+ except anthropic.BadRequestError as e:
+ raise errors.RequesterError(str(e.message))
+ except anthropic.NotFoundError as e:
+ if 'model: ' in str(e):
+ raise errors.RequesterError(f'模型无效: {e.message}')
+ else:
+ raise errors.RequesterError(f'请求地址无效: {e.message}')
+
+ async def invoke_llm_stream(
+ self,
+ query: pipeline_query.Query,
+ model: requester.RuntimeLLMModel,
+ messages: typing.List[provider_message.Message],
+ funcs: typing.List[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.Message:
+ self.client.api_key = model.provider.token_mgr.get_token()
+
+ args = extra_args.copy()
+ args['model'] = model.model_entity.name
+ args['stream'] = True
+
+ # 处理消息
+
+ # system
+ system_role_message = None
+
+ for i, m in enumerate(messages):
+ if m.role == 'system':
+ system_role_message = m
+
+ break
+
+ if system_role_message:
+ messages.pop(i)
+
+ if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
+ args['system'] = system_role_message.content
+
+ req_messages = []
+
+ for m in messages:
+ if m.role == 'tool':
+ tool_call_id = m.tool_call_id
+
+ req_messages.append(
+ {
+ 'role': 'user',
+ 'content': [
+ {
+ 'type': 'tool_result',
+ 'tool_use_id': tool_call_id,
+ 'is_error': False, # 暂时直接写false
+ 'content': [
+ {'type': 'text', 'text': m.content}
+ ], # 这里要是list包裹,应该是多个返回的情况?type类型好像也可以填其他的,暂时只写text
+ }
+ ],
+ }
+ )
+
+ continue
+
+ msg_dict = m.dict(exclude_none=True)
+
+ if isinstance(m.content, str) and m.content.strip() != '':
+ msg_dict['content'] = [{'type': 'text', 'text': m.content}]
+ elif isinstance(m.content, list):
+ for i, ce in enumerate(m.content):
+ if ce.type == 'image_base64':
+ image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
+
+ alter_image_ele = {
+ 'type': 'image',
+ 'source': {
+ 'type': 'base64',
+ 'media_type': f'image/{image_format}',
+ 'data': image_b64,
+ },
+ }
+ msg_dict['content'][i] = alter_image_ele
+ if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
+ msg_dict['content'] = [] # 这里不知道为什么会莫名有个空导致content为字符
+ if m.tool_calls:
+ for tool_call in m.tool_calls:
+ msg_dict['content'].append(
+ {
+ 'type': 'tool_use',
+ 'id': tool_call.id,
+ 'name': tool_call.function.name,
+ 'input': json.loads(tool_call.function.arguments),
+ }
+ )
+
+ del msg_dict['tool_calls']
+
+ req_messages.append(msg_dict)
+ if 'thinking' in args:
+ args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
+
+ args['messages'] = req_messages
+
+ if funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ try:
+ role = 'assistant' # 默认角色
+ # chunk_idx = 0
+ think_started = False
+ think_ended = False
+ finish_reason = False
+ tool_name = ''
+ tool_id = ''
+ async for chunk in await self.client.messages.create(**args):
+ content = ''
+ tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
+ if isinstance(
+ chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
+ ): # 记录开始
+ if chunk.content_block.type == 'tool_use':
+ if chunk.content_block.name is not None:
+ tool_name = chunk.content_block.name
+ if chunk.content_block.id is not None:
+ tool_id = chunk.content_block.id
+
+ tool_call['function']['name'] = tool_name
+ tool_call['function']['arguments'] = ''
+ tool_call['id'] = tool_id
+
+ if not remove_think:
+ if chunk.content_block.type == 'thinking' and not remove_think:
+ think_started = True
+ elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
+ think_ended = True
+ continue
+ elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
+ if chunk.delta.type == 'thinking_delta':
+ if think_started:
+ think_started = False
+ content = '\n' + chunk.delta.thinking
+ elif remove_think:
+ continue
+ else:
+ content = chunk.delta.thinking
+ elif chunk.delta.type == 'text_delta':
+ if think_ended:
+ think_ended = False
+ content = '\n\n' + chunk.delta.text
+ else:
+ content = chunk.delta.text
+ elif chunk.delta.type == 'input_json_delta':
+ tool_call['function']['arguments'] = chunk.delta.partial_json
+ tool_call['function']['name'] = tool_name
+ tool_call['id'] = tool_id
+ elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
+ continue # 记录raw_content_block结束的
+
+ elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
+ if chunk.delta.stop_reason == 'end_turn':
+ finish_reason = True
+ elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
+ continue # 这个好像是完全结束
+ else:
+ # print(chunk)
+ self.ap.logger.debug(f'anthropic chunk: {chunk}')
+ continue
+
+ args = {
+ 'content': content,
+ 'role': role,
+ 'is_final': finish_reason,
+ 'tool_calls': None if tool_call['id'] is None else [tool_call],
+ }
+ # if chunk_idx == 0:
+ # chunk_idx += 1
+ # continue
+
+ # assert type(chunk) is anthropic.types.message.Chunk
+
+ yield provider_message.MessageChunk(**args)
+
+ # return llm_entities.Message(**args)
+ except anthropic.AuthenticationError as e:
+ raise errors.RequesterError(f'api-key 无效: {e.message}')
+ except anthropic.BadRequestError as e:
+ raise errors.RequesterError(str(e.message))
+ except anthropic.NotFoundError as e:
+ if 'model: ' in str(e):
+ raise errors.RequesterError(f'模型无效: {e.message}')
+ else:
+ raise errors.RequesterError(f'请求地址无效: {e.message}')
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
index c4785fd8..0ef60d3e 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: anthropic
support_type:
- llm
provider_category: manufacturer
+execution:
+ python:
+ path: ./anthropicmsgs.py
+ attr: AnthropicMessages
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
new file mode 100644
index 00000000..9da6e1b4
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
@@ -0,0 +1,242 @@
+from __future__ import annotations
+
+import typing
+import dashscope
+import openai
+
+from . import modelscopechatcmpl
+from .. import requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
+ """阿里云百炼大模型平台 ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
+ 'timeout': 120,
+ }
+
+ async def _closure_stream(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages.copy()
+
+ is_use_dashscope_call = False # 是否使用阿里原生库调用
+ is_enable_multi_model = True # 是否支持多轮对话
+ use_time_num = 0 # 模型已调用次数,防止存在多文件时重复调用
+ use_time_ids = [] # 已调用的ID列表
+ message_id = 0 # 记录消息序号
+
+ for msg in messages:
+ # print(msg)
+ if 'content' in msg and isinstance(msg['content'], list):
+ for me in msg['content']:
+ if me['type'] == 'image_base64':
+ me['image_url'] = {'url': me['image_base64']}
+ me['type'] = 'image_url'
+ del me['image_base64']
+ elif me['type'] == 'file_url' and '.' in me.get('file_name', ''):
+ # 1. 视频文件推理
+ # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871
+ file_type = me.get('file_name').lower().split('.')[-1]
+ if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']:
+ me['type'] = 'video_url'
+ me['video_url'] = {'url': me['file_url']}
+ del me['file_url']
+ del me['file_name']
+ use_time_num += 1
+ use_time_ids.append(message_id)
+ is_enable_multi_model = False
+ # 2. 语音文件识别, 无法通过openai的audio字段传递,暂时不支持
+ # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031
+ elif file_type in [
+ 'aac',
+ 'amr',
+ 'aiff',
+ 'flac',
+ 'm4a',
+ 'mp3',
+ 'mpeg',
+ 'ogg',
+ 'opus',
+ 'wav',
+ 'webm',
+ 'wma',
+ ]:
+ me['audio'] = me['file_url']
+ me['type'] = 'audio'
+ del me['file_url']
+ del me['type']
+ del me['file_name']
+ is_use_dashscope_call = True
+ use_time_num += 1
+ use_time_ids.append(message_id)
+ is_enable_multi_model = False
+ message_id += 1
+
+ # 使用列表推导式,保留不在 use_time_ids[:-1] 中的元素,仅保留最后一个多媒体消息
+ if not is_enable_multi_model and use_time_num > 1:
+ messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]]
+
+ if not is_enable_multi_model:
+ messages = [msg for msg in messages if 'resp_message_id' not in msg]
+
+ args['messages'] = messages
+ args['stream'] = True
+
+ # 流式处理状态
+ # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+ chunk_idx = 0
+ thinking_started = False
+ thinking_ended = False
+ role = 'assistant' # 默认角色
+
+ if is_use_dashscope_call:
+ response = dashscope.MultiModalConversation.call(
+ # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key = "sk-xxx"
+ api_key=use_model.provider.token_mgr.get_token(),
+ model=use_model.model_entity.name,
+ messages=messages,
+ result_format='message',
+ asr_options={
+ # "language": "zh", # 可选,若已知音频的语种,可通过该参数指定待识别语种,以提升识别准确率
+ 'enable_lid': True,
+ 'enable_itn': False,
+ },
+ stream=True,
+ )
+ content_length_list = []
+ previous_length = 0 # 记录上一次的内容长度
+ for res in response:
+ chunk = res['output']
+ # 解析 chunk 数据
+ if hasattr(chunk, 'choices') and chunk.choices:
+ choice = chunk.choices[0]
+ delta_content = choice['message'].content[0]['text']
+ finish_reason = choice['finish_reason']
+ content_length_list.append(len(delta_content))
+ else:
+ delta_content = ''
+ finish_reason = None
+
+ # 跳过空的第一个 chunk(只有 role 没有内容)
+ if chunk_idx == 0 and not delta_content:
+ chunk_idx += 1
+ continue
+
+ # 检查 content_length_list 是否有足够的数据
+ if len(content_length_list) >= 2:
+ now_content = delta_content[previous_length : content_length_list[-1]]
+ previous_length = content_length_list[-1] # 更新上一次的长度
+ else:
+ now_content = delta_content # 第一次循环时直接使用 delta_content
+ previous_length = len(delta_content) # 更新上一次的长度
+
+ # 构建 MessageChunk - 只包含增量内容
+ chunk_data = {
+ 'role': role,
+ 'content': now_content if now_content else None,
+ 'is_final': bool(finish_reason) and finish_reason != 'null',
+ }
+
+ # 移除 None 值
+ chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+ yield provider_message.MessageChunk(**chunk_data)
+ chunk_idx += 1
+ else:
+ async for chunk in self._req_stream(args, extra_body=extra_args):
+ # 解析 chunk 数据
+ if hasattr(chunk, 'choices') and chunk.choices:
+ choice = chunk.choices[0]
+ delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+ finish_reason = getattr(choice, 'finish_reason', None)
+ else:
+ delta = {}
+ finish_reason = None
+
+ # 从第一个 chunk 获取 role,后续使用这个 role
+ if 'role' in delta and delta['role']:
+ role = delta['role']
+
+ # 获取增量内容
+ delta_content = delta.get('content', '')
+ reasoning_content = delta.get('reasoning_content', '')
+
+ # 处理 reasoning_content
+ if reasoning_content:
+ # accumulated_reasoning += reasoning_content
+ # 如果设置了 remove_think,跳过 reasoning_content
+ if remove_think:
+ chunk_idx += 1
+ continue
+
+ # 第一次出现 reasoning_content,添加 开始标签
+ if not thinking_started:
+ thinking_started = True
+ delta_content = '\n' + reasoning_content
+ else:
+ # 继续输出 reasoning_content
+ delta_content = reasoning_content
+ elif thinking_started and not thinking_ended and delta_content:
+ # reasoning_content 结束,normal content 开始,添加 结束标签
+ thinking_ended = True
+ delta_content = '\n\n' + delta_content
+
+ # 处理工具调用增量
+ if delta.get('tool_calls'):
+ for tool_call in delta['tool_calls']:
+ if tool_call['id'] != '':
+ tool_id = tool_call['id']
+ if tool_call['function']['name'] is not None:
+ tool_name = tool_call['function']['name']
+
+ if tool_call['type'] is None:
+ tool_call['type'] = 'function'
+ tool_call['id'] = tool_id
+ tool_call['function']['name'] = tool_name
+ tool_call['function']['arguments'] = (
+ '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
+ )
+
+ # 跳过空的第一个 chunk(只有 role 没有内容)
+ if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+ chunk_idx += 1
+ continue
+
+ # 构建 MessageChunk - 只包含增量内容
+ chunk_data = {
+ 'role': role,
+ 'content': delta_content if delta_content else None,
+ 'tool_calls': delta.get('tool_calls'),
+ 'is_final': bool(finish_reason),
+ }
+
+ # 移除 None 值
+ chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+ yield provider_message.MessageChunk(**chunk_data)
+ chunk_idx += 1
+ # return
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
index ad2c28db..fc5998c4 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- rerank
provider_category: maas
+execution:
+ python:
+ path: ./bailianchatcmpl.py
+ attr: BailianChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
new file mode 100644
index 00000000..da24bda0
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
@@ -0,0 +1,702 @@
+from __future__ import annotations
+
+import asyncio
+import typing
+
+import openai
+import openai.types.chat.chat_completion as chat_completion_module
+import httpx
+
+from .. import errors, requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class OpenAIChatCompletions(requester.ProviderAPIRequester):
+ """OpenAI ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.openai.com/v1',
+ 'timeout': 120,
+ }
+
+ async def initialize(self):
+ self.client = openai.AsyncClient(
+ api_key='',
+ base_url=self.requester_cfg['base_url'].replace(' ', ''),
+ timeout=self.requester_cfg['timeout'],
+ http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
+ )
+
+ def _mask_api_key(self, api_key: str | None) -> str:
+ if not api_key:
+ return ''
+ if len(api_key) <= 8:
+ return '****'
+ return f'{api_key[:4]}...{api_key[-4:]}'
+
+ def _infer_model_type(self, model_id: str) -> str:
+ normalized_model_id = (model_id or '').lower()
+ embedding_keywords = (
+ 'embedding',
+ 'embed',
+ 'bge-',
+ 'e5-',
+ 'm3e',
+ 'gte-',
+ 'multilingual-e5',
+ 'text-embedding',
+ )
+ return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
+
+ def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
+ normalized_model_id = (model_id or '').lower()
+ abilities: set[str] = set()
+
+ def _flatten(value: typing.Any) -> list[str]:
+ if value is None:
+ return []
+ if isinstance(value, str):
+ return [value.lower()]
+ if isinstance(value, dict):
+ flattened: list[str] = []
+ for nested_value in value.values():
+ flattened.extend(_flatten(nested_value))
+ return flattened
+ if isinstance(value, (list, tuple, set)):
+ flattened: list[str] = []
+ for nested_value in value:
+ flattened.extend(_flatten(nested_value))
+ return flattened
+ return [str(value).lower()]
+
+ capability_tokens = _flatten(item.get('capabilities'))
+ capability_tokens.extend(_flatten(item.get('modalities')))
+ capability_tokens.extend(_flatten(item.get('input_modalities')))
+ capability_tokens.extend(_flatten(item.get('output_modalities')))
+ capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
+ capability_tokens.extend(_flatten(item.get('supported_parameters')))
+ capability_tokens.extend(_flatten(item.get('architecture')))
+
+ combined_tokens = capability_tokens + [normalized_model_id]
+
+ vision_keywords = (
+ 'vision',
+ 'image',
+ 'file',
+ 'video',
+ 'multimodal',
+ 'vl',
+ 'ocr',
+ 'omni',
+ )
+ function_call_keywords = (
+ 'function',
+ 'tool',
+ 'tools',
+ 'tool_choice',
+ 'tool_call',
+ 'tool-use',
+ 'tool_use',
+ )
+
+ if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
+ abilities.add('vision')
+
+ if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
+ abilities.add('func_call')
+
+ return sorted(abilities)
+
+ def _normalize_modalities(self, value: typing.Any) -> list[str]:
+ normalized: list[str] = []
+
+ def _collect(item: typing.Any):
+ if item is None:
+ return
+ if isinstance(item, str):
+ for part in item.replace('->', ',').replace('+', ',').split(','):
+ token = part.strip().lower()
+ if token and token not in normalized:
+ normalized.append(token)
+ return
+ if isinstance(item, dict):
+ for nested in item.values():
+ _collect(nested)
+ return
+ if isinstance(item, (list, tuple, set)):
+ for nested in item:
+ _collect(nested)
+ return
+
+ _collect(value)
+ return normalized
+
+ def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
+ display_name = item.get('name')
+ if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
+ display_name = ''
+
+ description = item.get('description')
+ if not isinstance(description, str) or not description.strip():
+ description = ''
+
+ context_length = item.get('context_length')
+ if context_length is None and isinstance(item.get('top_provider'), dict):
+ context_length = item['top_provider'].get('context_length')
+
+ if not isinstance(context_length, int):
+ try:
+ context_length = int(context_length) if context_length is not None else None
+ except (TypeError, ValueError):
+ context_length = None
+
+ input_modalities = self._normalize_modalities(item.get('input_modalities'))
+ output_modalities = self._normalize_modalities(item.get('output_modalities'))
+
+ if isinstance(item.get('architecture'), dict):
+ if not input_modalities:
+ input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
+ if not output_modalities:
+ output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
+
+ owned_by = item.get('owned_by')
+ if not isinstance(owned_by, str) or not owned_by.strip():
+ owned_by = ''
+
+ return {
+ 'display_name': display_name or None,
+ 'description': description or None,
+ 'context_length': context_length,
+ 'owned_by': owned_by or None,
+ 'input_modalities': input_modalities,
+ 'output_modalities': output_modalities,
+ }
+
+ async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+ headers = {}
+ if api_key:
+ headers['Authorization'] = f'Bearer {api_key}'
+
+ models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
+ async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
+ response = await client.get(models_url, headers=headers)
+ response.raise_for_status()
+ payload = response.json()
+
+ models = []
+ for item in payload.get('data', []):
+ model_id = item.get('id')
+ if not model_id:
+ continue
+ models.append(
+ {
+ 'id': model_id,
+ 'name': model_id,
+ 'type': self._infer_model_type(model_id),
+ 'abilities': self._infer_model_abilities(item, model_id),
+ **self._extract_scan_metadata(item, model_id),
+ }
+ )
+
+ models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
+ return {
+ 'models': models,
+ 'debug': {
+ 'request': {
+ 'method': 'GET',
+ 'url': models_url,
+ 'headers': {
+ 'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
+ },
+ },
+ 'response': payload,
+ },
+ }
+
+ async def _req(
+ self,
+ args: dict,
+ extra_body: dict = {},
+ ) -> chat_completion_module.ChatCompletion:
+ return await self.client.chat.completions.create(**args, extra_body=extra_body)
+
+ async def _req_stream(
+ self,
+ args: dict,
+ extra_body: dict = {},
+ ):
+ async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
+ yield chunk
+
+ async def _make_msg(
+ self,
+ chat_completion: chat_completion_module.ChatCompletion,
+ remove_think: bool = False,
+ ) -> provider_message.Message:
+ if not isinstance(chat_completion, chat_completion_module.ChatCompletion):
+ raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}')
+
+ chatcmpl_message = chat_completion.choices[0].message.model_dump()
+
+ # 确保 role 字段存在且不为 None
+ if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
+ chatcmpl_message['role'] = 'assistant'
+
+ # 处理思维链
+ content = chatcmpl_message.get('content', '')
+ reasoning_content = chatcmpl_message.get('reasoning_content', None)
+
+ processed_content, _ = await self._process_thinking_content(
+ content=content, reasoning_content=reasoning_content, remove_think=remove_think
+ )
+
+ chatcmpl_message['content'] = processed_content
+
+ # 移除 reasoning_content 字段,避免传递给 Message
+ if 'reasoning_content' in chatcmpl_message:
+ del chatcmpl_message['reasoning_content']
+
+ message = provider_message.Message(**chatcmpl_message)
+
+ return message
+
+ async def _process_thinking_content(
+ self,
+ content: str,
+ reasoning_content: str = None,
+ remove_think: bool = False,
+ ) -> tuple[str, str]:
+ """处理思维链内容
+
+ Args:
+ content: 原始内容
+ reasoning_content: reasoning_content 字段内容
+ remove_think: 是否移除思维链
+
+ Returns:
+ (处理后的内容, 提取的思维链内容)
+ """
+ thinking_content = ''
+
+ # 1. 从 reasoning_content 提取思维链
+ if reasoning_content:
+ thinking_content = reasoning_content
+
+ # 2. 从 content 中提取 标签内容
+ if content and '' in content and '' in content:
+ import re
+
+ think_pattern = r'(.*?)'
+ think_matches = re.findall(think_pattern, content, re.DOTALL)
+ if think_matches:
+ # 如果已有 reasoning_content,则追加
+ if thinking_content:
+ thinking_content += '\n' + '\n'.join(think_matches)
+ else:
+ thinking_content = '\n'.join(think_matches)
+ # 移除 content 中的 标签
+ content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
+
+ # 3. 根据 remove_think 参数决定是否保留思维链
+ if remove_think:
+ return content, ''
+ else:
+ # 如果有思维链内容,将其以 格式添加到 content 开头
+ if thinking_content:
+ content = f'\n{thinking_content}\n\n{content}'.strip()
+ return content, thinking_content
+
+ async def _closure_stream(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.MessageChunk:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages.copy()
+
+ # 检查vision
+ for msg in messages:
+ if 'content' in msg and isinstance(msg['content'], list):
+ for me in msg['content']:
+ if me['type'] == 'image_base64':
+ me['image_url'] = {'url': me['image_base64']}
+ me['type'] = 'image_url'
+ del me['image_base64']
+
+ args['messages'] = messages
+ args['stream'] = True
+
+ # 流式处理状态
+ # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+ chunk_idx = 0
+ thinking_started = False
+ thinking_ended = False
+ role = 'assistant' # 默认角色
+ tool_id = ''
+ tool_name = ''
+ # accumulated_reasoning = '' # 仅用于判断何时结束思维链
+
+ async for chunk in self._req_stream(args, extra_body=extra_args):
+ # 解析 chunk 数据
+
+ if hasattr(chunk, 'choices') and chunk.choices:
+ choice = chunk.choices[0]
+ delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+
+ finish_reason = getattr(choice, 'finish_reason', None)
+ else:
+ delta = {}
+ finish_reason = None
+ # 从第一个 chunk 获取 role,后续使用这个 role
+ if 'role' in delta and delta['role']:
+ role = delta['role']
+
+ # 获取增量内容
+ delta_content = delta.get('content', '')
+ reasoning_content = delta.get('reasoning_content', '')
+
+ # 处理 reasoning_content
+ if reasoning_content:
+ # accumulated_reasoning += reasoning_content
+ # 如果设置了 remove_think,跳过 reasoning_content
+ if remove_think:
+ chunk_idx += 1
+ continue
+
+ # 第一次出现 reasoning_content,添加 开始标签
+ if not thinking_started:
+ thinking_started = True
+ delta_content = '\n' + reasoning_content
+ else:
+ # 继续输出 reasoning_content
+ delta_content = reasoning_content
+ elif thinking_started and not thinking_ended and delta_content:
+ # reasoning_content 结束,normal content 开始,添加 结束标签
+ thinking_ended = True
+ delta_content = '\n\n' + delta_content
+
+ # 处理 content 中已有的 标签(如果需要移除)
+ # if delta_content and remove_think and '' in delta_content:
+ # import re
+ #
+ # # 移除 标签及其内容
+ # delta_content = re.sub(r'.*?', '', delta_content, flags=re.DOTALL)
+
+ # 处理工具调用增量
+ # delta_tool_calls = None
+ if delta.get('tool_calls'):
+ for tool_call in delta['tool_calls']:
+ if tool_call['id'] and tool_call['function']['name']:
+ tool_id = tool_call['id']
+ tool_name = tool_call['function']['name']
+ else:
+ tool_call['id'] = tool_id
+ tool_call['function']['name'] = tool_name
+ if tool_call['type'] is None:
+ tool_call['type'] = 'function'
+
+ # 跳过空的第一个 chunk(只有 role 没有内容)
+ if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+ chunk_idx += 1
+ continue
+ # 构建 MessageChunk - 只包含增量内容
+ chunk_data = {
+ 'role': role,
+ 'content': delta_content if delta_content else None,
+ 'tool_calls': delta.get('tool_calls'),
+ 'is_final': bool(finish_reason),
+ }
+
+ # 移除 None 值
+ chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+ yield provider_message.MessageChunk(**chunk_data)
+ chunk_idx += 1
+
+ async def _closure(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> tuple[provider_message.Message, dict]:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages.copy()
+
+ # 检查vision
+ for msg in messages:
+ if 'content' in msg and isinstance(msg['content'], list):
+ for me in msg['content']:
+ if me['type'] == 'image_base64':
+ me['image_url'] = {'url': me['image_base64']}
+ me['type'] = 'image_url'
+ del me['image_base64']
+
+ args['messages'] = messages
+
+ # 发送请求
+
+ resp = await self._req(args, extra_body=extra_args)
+ # 处理请求结果
+ message = await self._make_msg(resp, remove_think)
+
+ # Extract token usage from response
+ usage_info = {}
+ if hasattr(resp, 'usage') and resp.usage:
+ usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
+ usage_info['output_tokens'] = resp.usage.completion_tokens or 0
+ usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+ return message, usage_info
+
+ async def invoke_llm(
+ self,
+ query: pipeline_query.Query,
+ model: requester.RuntimeLLMModel,
+ messages: typing.List[provider_message.Message],
+ funcs: typing.List[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> tuple[provider_message.Message, dict]:
+ """Invoke LLM and return message with usage info"""
+ req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
+ for m in messages:
+ msg_dict = m.dict(exclude_none=True)
+ content = msg_dict.get('content')
+ if isinstance(content, list):
+ # 检查 content 列表中是否每个部分都是文本
+ if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+ # 将所有文本部分合并为一个字符串
+ msg_dict['content'] = '\n'.join(part['text'] for part in content)
+ req_messages.append(msg_dict)
+
+ try:
+ msg, usage_info = await self._closure(
+ query=query,
+ req_messages=req_messages,
+ use_model=model,
+ use_funcs=funcs,
+ extra_args=extra_args,
+ remove_think=remove_think,
+ )
+ return msg, usage_info
+ except asyncio.TimeoutError:
+ raise errors.RequesterError('请求超时')
+ except openai.BadRequestError as e:
+ error_message = str(e.message) if hasattr(e, 'message') else str(e)
+ if 'context_length_exceeded' in str(e):
+ raise errors.RequesterError(f'上文过长,请重置会话: {error_message}')
+ else:
+ raise errors.RequesterError(f'请求参数错误: {error_message}')
+ except openai.AuthenticationError as e:
+ error_message = str(e.message) if hasattr(e, 'message') else str(e)
+ raise errors.RequesterError(f'无效的 api-key: {error_message}')
+ except openai.NotFoundError as e:
+ error_message = str(e.message) if hasattr(e, 'message') else str(e)
+ raise errors.RequesterError(f'请求路径错误: {error_message}')
+ except openai.RateLimitError as e:
+ error_message = str(e.message) if hasattr(e, 'message') else str(e)
+ raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
+ except openai.APIConnectionError as e:
+ error_message = f'连接错误: {str(e)}'
+ raise errors.RequesterError(error_message)
+ except openai.APIError as e:
+ error_message = str(e.message) if hasattr(e, 'message') else str(e)
+ raise errors.RequesterError(f'请求错误: {error_message}')
+
+ async def invoke_embedding(
+ self,
+ model: requester.RuntimeEmbeddingModel,
+ input_text: list[str],
+ extra_args: dict[str, typing.Any] = {},
+ ) -> tuple[list[list[float]], dict]:
+ """调用 Embedding API, returns (embeddings, usage_info)"""
+ self.client.api_key = model.provider.token_mgr.get_token()
+
+ args = {
+ 'model': model.model_entity.name,
+ 'input': input_text,
+ }
+
+ if model.model_entity.extra_args:
+ args.update(model.model_entity.extra_args)
+
+ args.update(extra_args)
+
+ try:
+ resp = await self.client.embeddings.create(**args)
+
+ # Extract usage info
+ usage_info = {}
+ if hasattr(resp, 'usage') and resp.usage:
+ usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
+ usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+ return [d.embedding for d in resp.data], usage_info
+ except asyncio.TimeoutError:
+ raise errors.RequesterError('请求超时')
+ except openai.BadRequestError as e:
+ raise errors.RequesterError(f'请求参数错误: {e.message}')
+
+ async def invoke_llm_stream(
+ self,
+ query: pipeline_query.Query,
+ model: requester.RuntimeLLMModel,
+ messages: typing.List[provider_message.Message],
+ funcs: typing.List[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.MessageChunk:
+ req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
+ for m in messages:
+ msg_dict = m.dict(exclude_none=True)
+ content = msg_dict.get('content')
+ if isinstance(content, list):
+ # 检查 content 列表中是否每个部分都是文本
+ if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+ # 将所有文本部分合并为一个字符串
+ msg_dict['content'] = '\n'.join(part['text'] for part in content)
+ req_messages.append(msg_dict)
+
+ try:
+ async for item in self._closure_stream(
+ query=query,
+ req_messages=req_messages,
+ use_model=model,
+ use_funcs=funcs,
+ extra_args=extra_args,
+ remove_think=remove_think,
+ ):
+ yield item
+
+ except asyncio.TimeoutError:
+ raise errors.RequesterError('请求超时')
+ except openai.BadRequestError as e:
+ if 'context_length_exceeded' in e.message:
+ raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
+ else:
+ raise errors.RequesterError(f'请求参数错误: {e.message}')
+ except openai.AuthenticationError as e:
+ raise errors.RequesterError(f'无效的 api-key: {e.message}')
+ except openai.NotFoundError as e:
+ raise errors.RequesterError(f'请求路径错误: {e.message}')
+ except openai.RateLimitError as e:
+ raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
+ except openai.APIError as e:
+ raise errors.RequesterError(f'请求错误: {e.message}')
+
+ async def invoke_rerank(
+ self,
+ model: requester.RuntimeRerankModel,
+ query: str,
+ documents: typing.List[str],
+ extra_args: dict[str, typing.Any] = {},
+ ) -> typing.List[dict]:
+ """Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible)
+
+ Supports extra_args from model.extra_args:
+ - rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks")
+ - rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank")
+ - Any other fields are merged into the request payload.
+ """
+ api_key = model.provider.token_mgr.get_token()
+ base_url = self.requester_cfg.get('base_url', '').rstrip('/')
+ timeout = self.requester_cfg.get('timeout', 120)
+
+ merged_args = {}
+ if model.model_entity.extra_args:
+ merged_args.update(model.model_entity.extra_args)
+ if extra_args:
+ merged_args.update(extra_args)
+
+ rerank_url = merged_args.pop('rerank_url', None)
+ rerank_path = merged_args.pop('rerank_path', 'rerank')
+ if not rerank_url:
+ rerank_url = f'{base_url}/{rerank_path}'
+
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Authorization': f'Bearer {api_key}',
+ }
+
+ payload = {
+ 'model': model.model_entity.name,
+ 'query': query,
+ 'documents': documents[:64],
+ 'top_n': min(len(documents), 64),
+ }
+
+ if merged_args:
+ payload.update(merged_args)
+
+ try:
+ async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
+ resp = await client.post(rerank_url, headers=headers, json=payload)
+ resp.raise_for_status()
+ data = resp.json()
+
+ results = self._parse_rerank_response(data)
+
+ if results:
+ scores = [r.get('relevance_score', 0.0) for r in results]
+ min_score = min(scores)
+ max_score = max(scores)
+ if max_score - min_score > 1e-6:
+ for r in results:
+ r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
+
+ return results
+ except httpx.HTTPStatusError as e:
+ raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}')
+ except httpx.TimeoutException:
+ raise errors.RequesterError('Rerank request timed out')
+ except Exception as e:
+ raise errors.RequesterError(f'Rerank request error: {str(e)}')
+
+ @staticmethod
+ def _parse_rerank_response(data: dict) -> typing.List[dict]:
+ """Parse rerank response from various providers.
+
+ Handles:
+ - Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]}
+ - Voyage AI: {"data": [{"index", "relevance_score"}]}
+ - DashScope: {"output": {"results": [{"index", "relevance_score"}]}}
+ """
+ if 'results' in data:
+ return data['results']
+ if 'data' in data:
+ return data['data']
+ if 'output' in data and isinstance(data['output'], dict):
+ return data['output'].get('results', [])
+ return []
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
index 8004a7b2..21bd6a05 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
@@ -22,10 +22,12 @@ spec:
type: integer
required: true
default: 120
- # LiteLLM provider prefix - when set, uses unified LiteLLMRequester
- litellm_provider: openai
support_type:
- llm
- text-embedding
- rerank
provider_category: manufacturer
+execution:
+ python:
+ path: ./chatcmpl.py
+ attr: OpenAIChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
index 6ac8052d..f1ca209b 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: cohere
support_type:
- rerank
provider_category: manufacturer
+execution:
+ python:
+ path: ./chatcmpl.py
+ attr: OpenAIChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py
new file mode 100644
index 00000000..d272e721
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """CompShare ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.modelverse.cn/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
index f122dffe..92fcafdc 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
provider_category: maas
+execution:
+ python:
+ path: ./compsharechatcmpl.py
+ attr: CompShareChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
new file mode 100644
index 00000000..5bcbd40c
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import typing
+
+from . import chatcmpl
+from .. import errors, requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """Deepseek ChatCompletion API 请求器"""
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.deepseek.com',
+ 'timeout': 120,
+ }
+
+ async def _closure(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> tuple[provider_message.Message, dict]:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages
+
+ # deepseek 不支持多模态,把content都转换成纯文字
+ for m in messages:
+ if 'content' in m and isinstance(m['content'], list):
+ m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c])
+
+ args['messages'] = messages
+
+ # 发送请求
+ resp = await self._req(args, extra_body=extra_args)
+
+ # print(resp)
+
+ if resp is None:
+ raise errors.RequesterError('接口返回为空,请确定模型提供商服务是否正常')
+ # 处理请求结果
+ message = await self._make_msg(resp, remove_think)
+
+ # Extract token usage from response
+ usage_info = {}
+ if hasattr(resp, 'usage') and resp.usage:
+ usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
+ usage_info['output_tokens'] = resp.usage.completion_tokens or 0
+ usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+ return message, usage_info
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
index 8a9dbca7..8ef1fcf9 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: deepseek
support_type:
- llm
provider_category: manufacturer
+execution:
+ python:
+ path: ./deepseekchatcmpl.py
+ attr: DeepseekChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
new file mode 100644
index 00000000..956b49f6
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
@@ -0,0 +1,205 @@
+from __future__ import annotations
+
+import typing
+import httpx
+
+from . import chatcmpl
+
+import uuid
+
+from .. import requester
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+
+
+class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """Google Gemini API 请求器"""
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
+ 'timeout': 120,
+ }
+
+ async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+ models_url = 'https://generativelanguage.googleapis.com/v1beta/models'
+ params = {'key': api_key} if api_key else {}
+
+ all_models: list[dict[str, typing.Any]] = []
+ next_page_token = ''
+ last_payload: dict[str, typing.Any] = {}
+
+ async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
+ while True:
+ request_params = dict(params)
+ if next_page_token:
+ request_params['pageToken'] = next_page_token
+
+ response = await client.get(models_url, params=request_params)
+ response.raise_for_status()
+ payload = response.json()
+ last_payload = payload
+
+ for item in payload.get('models', []):
+ model_name = item.get('name', '')
+ model_id = model_name.replace('models/', '', 1)
+ if not model_id:
+ continue
+
+ supported_methods = item.get('supportedGenerationMethods', []) or []
+ if 'embedContent' in supported_methods and 'generateContent' not in supported_methods:
+ model_type = 'embedding'
+ else:
+ model_type = 'llm'
+
+ all_models.append(
+ {
+ 'id': model_id,
+ 'name': model_id,
+ 'type': model_type,
+ 'abilities': self._infer_model_abilities(item, model_id),
+ 'display_name': item.get('displayName') or None,
+ 'description': item.get('description') or None,
+ 'context_length': item.get('inputTokenLimit'),
+ 'input_modalities': self._normalize_modalities(item.get('inputModalities')),
+ 'output_modalities': self._normalize_modalities(item.get('outputModalities')),
+ }
+ )
+
+ next_page_token = payload.get('nextPageToken', '')
+ if not next_page_token:
+ break
+
+ all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
+ return {
+ 'models': all_models,
+ 'debug': {
+ 'request': {
+ 'method': 'GET',
+ 'url': models_url,
+ 'query': {'key': self._mask_api_key(api_key)} if api_key else {},
+ },
+ 'response': last_payload,
+ },
+ }
+
+ async def _closure_stream(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.MessageChunk:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages.copy()
+
+ # 检查vision
+ for msg in messages:
+ if 'content' in msg and isinstance(msg['content'], list):
+ for me in msg['content']:
+ if me['type'] == 'image_base64':
+ me['image_url'] = {'url': me['image_base64']}
+ me['type'] = 'image_url'
+ del me['image_base64']
+
+ args['messages'] = messages
+ args['stream'] = True
+
+ # 流式处理状态
+ # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+ chunk_idx = 0
+ thinking_started = False
+ thinking_ended = False
+ role = 'assistant' # 默认角色
+ tool_id = ''
+ tool_name = ''
+ # accumulated_reasoning = '' # 仅用于判断何时结束思维链
+
+ async for chunk in self._req_stream(args, extra_body=extra_args):
+ # 解析 chunk 数据
+
+ if hasattr(chunk, 'choices') and chunk.choices:
+ choice = chunk.choices[0]
+ delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+
+ finish_reason = getattr(choice, 'finish_reason', None)
+ else:
+ delta = {}
+ finish_reason = None
+ # 从第一个 chunk 获取 role,后续使用这个 role
+ if 'role' in delta and delta['role']:
+ role = delta['role']
+
+ # 获取增量内容
+ delta_content = delta.get('content', '')
+ reasoning_content = delta.get('reasoning_content', '')
+
+ # 处理 reasoning_content
+ if reasoning_content:
+ # accumulated_reasoning += reasoning_content
+ # 如果设置了 remove_think,跳过 reasoning_content
+ if remove_think:
+ chunk_idx += 1
+ continue
+
+ # 第一次出现 reasoning_content,添加 开始标签
+ if not thinking_started:
+ thinking_started = True
+ delta_content = '\n' + reasoning_content
+ else:
+ # 继续输出 reasoning_content
+ delta_content = reasoning_content
+ elif thinking_started and not thinking_ended and delta_content:
+ # reasoning_content 结束,normal content 开始,添加 结束标签
+ thinking_ended = True
+ delta_content = '\n\n' + delta_content
+
+ # 处理 content 中已有的 标签(如果需要移除)
+ # if delta_content and remove_think and '' in delta_content:
+ # import re
+ #
+ # # 移除 标签及其内容
+ # delta_content = re.sub(r'.*?', '', delta_content, flags=re.DOTALL)
+
+ # 处理工具调用增量
+ # delta_tool_calls = None
+ if delta.get('tool_calls'):
+ for tool_call in delta['tool_calls']:
+ if tool_call['id'] == '' and tool_id == '':
+ tool_id = str(uuid.uuid4())
+ if tool_call['function']['name']:
+ tool_name = tool_call['function']['name']
+ tool_call['id'] = tool_id
+ tool_call['function']['name'] = tool_name
+ if tool_call['type'] is None:
+ tool_call['type'] = 'function'
+
+ # 跳过空的第一个 chunk(只有 role 没有内容)
+ if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+ chunk_idx += 1
+ continue
+ # 构建 MessageChunk - 只包含增量内容
+ chunk_data = {
+ 'role': role,
+ 'content': delta_content if delta_content else None,
+ 'tool_calls': delta.get('tool_calls'),
+ 'is_final': bool(finish_reason),
+ }
+
+ # 移除 None 值
+ chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+ yield provider_message.MessageChunk(**chunk_data)
+ chunk_idx += 1
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
index 53d71dc9..fdebe9b9 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: gemini
support_type:
- llm
provider_category: manufacturer
+execution:
+ python:
+ path: ./geminichatcmpl.py
+ attr: GeminiChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
new file mode 100644
index 00000000..4e295e9f
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+
+import typing
+
+from . import ppiochatcmpl
+
+
+class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
+ """Gitee AI ChatCompletions API 请求器"""
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://ai.gitee.com/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
index 1f208534..b7b158a7 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
@@ -22,9 +22,12 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
- rerank
provider_category: maas
+execution:
+ python:
+ path: ./giteeaichatcmpl.py
+ attr: GiteeAIChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
new file mode 100644
index 00000000..305ae21f
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
@@ -0,0 +1,208 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+from .. import requester
+import openai.types.chat.chat_completion as chat_completion
+import re
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+
+
+class JieKouAIChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """接口 AI ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.jiekou.ai/openai',
+ 'timeout': 120,
+ }
+
+ is_think: bool = False
+
+ async def _make_msg(
+ self,
+ chat_completion: chat_completion.ChatCompletion,
+ remove_think: bool,
+ ) -> provider_message.Message:
+ chatcmpl_message = chat_completion.choices[0].message.model_dump()
+ # print(chatcmpl_message.keys(), chatcmpl_message.values())
+
+ # 确保 role 字段存在且不为 None
+ if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
+ chatcmpl_message['role'] = 'assistant'
+
+ reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
+
+ # deepseek的reasoner模型
+ chatcmpl_message['content'] = await self._process_thinking_content(
+ chatcmpl_message['content'], reasoning_content, remove_think
+ )
+
+ # 移除 reasoning_content 字段,避免传递给 Message
+ if 'reasoning_content' in chatcmpl_message:
+ del chatcmpl_message['reasoning_content']
+
+ message = provider_message.Message(**chatcmpl_message)
+
+ return message
+
+ async def _process_thinking_content(
+ self,
+ content: str,
+ reasoning_content: str = None,
+ remove_think: bool = False,
+ ) -> tuple[str, str]:
+ """处理思维链内容
+
+ Args:
+ content: 原始内容
+ reasoning_content: reasoning_content 字段内容
+ remove_think: 是否移除思维链
+
+ Returns:
+ 处理后的内容
+ """
+ if remove_think:
+ content = re.sub(r'.*?', '', content, flags=re.DOTALL)
+ else:
+ if reasoning_content is not None:
+ content = '\n' + reasoning_content + '\n\n' + content
+ return content
+
+ async def _make_msg_chunk(
+ self,
+ delta: dict[str, typing.Any],
+ idx: int,
+ ) -> provider_message.MessageChunk:
+ # 处理流式chunk和完整响应的差异
+ # print(chat_completion.choices[0])
+
+ # 确保 role 字段存在且不为 None
+ if 'role' not in delta or delta['role'] is None:
+ delta['role'] = 'assistant'
+
+ reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
+
+ delta['content'] = '' if delta['content'] is None else delta['content']
+ # print(reasoning_content)
+
+ # deepseek的reasoner模型
+
+ if reasoning_content is not None:
+ delta['content'] += reasoning_content
+
+ message = provider_message.MessageChunk(**delta)
+
+ return message
+
+ async def _closure_stream(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages.copy()
+
+ # 检查vision
+ for msg in messages:
+ if 'content' in msg and isinstance(msg['content'], list):
+ for me in msg['content']:
+ if me['type'] == 'image_base64':
+ me['image_url'] = {'url': me['image_base64']}
+ me['type'] = 'image_url'
+ del me['image_base64']
+
+ args['messages'] = messages
+ args['stream'] = True
+
+ # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+ chunk_idx = 0
+ thinking_started = False
+ thinking_ended = False
+ role = 'assistant' # 默认角色
+ async for chunk in self._req_stream(args, extra_body=extra_args):
+ # 解析 chunk 数据
+ if hasattr(chunk, 'choices') and chunk.choices:
+ choice = chunk.choices[0]
+ delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+ finish_reason = getattr(choice, 'finish_reason', None)
+ else:
+ delta = {}
+ finish_reason = None
+
+ # 从第一个 chunk 获取 role,后续使用这个 role
+ if 'role' in delta and delta['role']:
+ role = delta['role']
+
+ # 获取增量内容
+ delta_content = delta.get('content', '')
+ # reasoning_content = delta.get('reasoning_content', '')
+
+ if remove_think:
+ if delta['content'] is not None:
+ if '' in delta['content'] and not thinking_started and not thinking_ended:
+ thinking_started = True
+ continue
+ elif delta['content'] == r'' and not thinking_ended:
+ thinking_ended = True
+ continue
+ elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
+ thinking_started = False
+ continue
+ elif thinking_started and not thinking_ended:
+ continue
+
+ # delta_tool_calls = None
+ if delta.get('tool_calls'):
+ for tool_call in delta['tool_calls']:
+ if tool_call['id'] and tool_call['function']['name']:
+ tool_id = tool_call['id']
+ tool_name = tool_call['function']['name']
+
+ if tool_call['id'] is None:
+ tool_call['id'] = tool_id
+ if tool_call['function']['name'] is None:
+ tool_call['function']['name'] = tool_name
+ if tool_call['function']['arguments'] is None:
+ tool_call['function']['arguments'] = ''
+ if tool_call['type'] is None:
+ tool_call['type'] = 'function'
+
+ # 跳过空的第一个 chunk(只有 role 没有内容)
+ if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
+ chunk_idx += 1
+ continue
+
+ # 构建 MessageChunk - 只包含增量内容
+ chunk_data = {
+ 'role': role,
+ 'content': delta_content if delta_content else None,
+ 'tool_calls': delta.get('tool_calls'),
+ 'is_final': bool(finish_reason),
+ }
+
+ # 移除 None 值
+ chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+ yield provider_message.MessageChunk(**chunk_data)
+ chunk_idx += 1
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
index 121fbae1..3c791d73 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
@@ -29,8 +29,11 @@ spec:
type: int
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
+execution:
+ python:
+ path: ./jiekouaichatcmpl.py
+ attr: JieKouAIChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
index 2329655d..3b448e38 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: jina
support_type:
- rerank
provider_category: manufacturer
+execution:
+ python:
+ path: ./chatcmpl.py
+ attr: OpenAIChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py
new file mode 100644
index 00000000..c9060c1b
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class LmStudioChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """LMStudio ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'http://127.0.0.1:1234/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
index b1b5bfac..81dc82cf 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: self-hosted
+execution:
+ python:
+ path: ./lmstudiochatcmpl.py
+ attr: LmStudioChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
new file mode 100644
index 00000000..ed5d8795
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
@@ -0,0 +1,561 @@
+from __future__ import annotations
+
+import asyncio
+import typing
+
+import openai
+import openai.types.chat.chat_completion as chat_completion
+import httpx
+
+from .. import entities, errors, requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class ModelScopeChatCompletions(requester.ProviderAPIRequester):
+ """ModelScope ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api-inference.modelscope.cn/v1',
+ 'timeout': 120,
+ }
+
+ async def initialize(self):
+ self.client = openai.AsyncClient(
+ api_key='',
+ base_url=self.requester_cfg['base_url'],
+ timeout=self.requester_cfg['timeout'],
+ http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
+ )
+
+ def _mask_api_key(self, api_key: str | None) -> str:
+ if not api_key:
+ return ''
+ if len(api_key) <= 8:
+ return '****'
+ return f'{api_key[:4]}...{api_key[-4:]}'
+
+ def _infer_model_type(self, model_id: str) -> str:
+ normalized_model_id = (model_id or '').lower()
+ embedding_keywords = (
+ 'embedding',
+ 'embed',
+ 'bge-',
+ 'e5-',
+ 'm3e',
+ 'gte-',
+ 'multilingual-e5',
+ 'text-embedding',
+ )
+ return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
+
+ def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
+ normalized_model_id = (model_id or '').lower()
+ abilities: set[str] = set()
+
+ def _flatten(value: typing.Any) -> list[str]:
+ if value is None:
+ return []
+ if isinstance(value, str):
+ return [value.lower()]
+ if isinstance(value, dict):
+ flattened: list[str] = []
+ for nested_value in value.values():
+ flattened.extend(_flatten(nested_value))
+ return flattened
+ if isinstance(value, (list, tuple, set)):
+ flattened: list[str] = []
+ for nested_value in value:
+ flattened.extend(_flatten(nested_value))
+ return flattened
+ return [str(value).lower()]
+
+ capability_tokens = _flatten(item.get('capabilities'))
+ capability_tokens.extend(_flatten(item.get('modalities')))
+ capability_tokens.extend(_flatten(item.get('input_modalities')))
+ capability_tokens.extend(_flatten(item.get('output_modalities')))
+ capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
+ capability_tokens.extend(_flatten(item.get('supported_parameters')))
+ capability_tokens.extend(_flatten(item.get('architecture')))
+
+ combined_tokens = capability_tokens + [normalized_model_id]
+
+ vision_keywords = ('vision', 'image', 'file', 'video', 'multimodal', 'vl', 'ocr', 'omni')
+ function_call_keywords = ('function', 'tool', 'tools', 'tool_choice', 'tool_call', 'tool-use', 'tool_use')
+
+ if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
+ abilities.add('vision')
+
+ if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
+ abilities.add('func_call')
+
+ return sorted(abilities)
+
+ def _normalize_modalities(self, value: typing.Any) -> list[str]:
+ normalized: list[str] = []
+
+ def _collect(item: typing.Any):
+ if item is None:
+ return
+ if isinstance(item, str):
+ for part in item.replace('->', ',').replace('+', ',').split(','):
+ token = part.strip().lower()
+ if token and token not in normalized:
+ normalized.append(token)
+ return
+ if isinstance(item, dict):
+ for nested in item.values():
+ _collect(nested)
+ return
+ if isinstance(item, (list, tuple, set)):
+ for nested in item:
+ _collect(nested)
+ return
+
+ _collect(value)
+ return normalized
+
+ def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
+ display_name = item.get('name')
+ if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
+ display_name = ''
+
+ description = item.get('description')
+ if not isinstance(description, str) or not description.strip():
+ description = ''
+
+ context_length = item.get('context_length')
+ if context_length is None and isinstance(item.get('top_provider'), dict):
+ context_length = item['top_provider'].get('context_length')
+
+ if not isinstance(context_length, int):
+ try:
+ context_length = int(context_length) if context_length is not None else None
+ except (TypeError, ValueError):
+ context_length = None
+
+ input_modalities = self._normalize_modalities(item.get('input_modalities'))
+ output_modalities = self._normalize_modalities(item.get('output_modalities'))
+
+ if isinstance(item.get('architecture'), dict):
+ if not input_modalities:
+ input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
+ if not output_modalities:
+ output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
+
+ owned_by = item.get('owned_by')
+ if not isinstance(owned_by, str) or not owned_by.strip():
+ owned_by = ''
+
+ return {
+ 'display_name': display_name or None,
+ 'description': description or None,
+ 'context_length': context_length,
+ 'owned_by': owned_by or None,
+ 'input_modalities': input_modalities,
+ 'output_modalities': output_modalities,
+ }
+
+ async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+ headers = {}
+ if api_key:
+ headers['Authorization'] = f'Bearer {api_key}'
+
+ models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
+ async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
+ response = await client.get(models_url, headers=headers)
+ response.raise_for_status()
+ payload = response.json()
+
+ models = []
+ for item in payload.get('data', []):
+ model_id = item.get('id')
+ if not model_id:
+ continue
+ models.append(
+ {
+ 'id': model_id,
+ 'name': model_id,
+ 'type': self._infer_model_type(model_id),
+ 'abilities': self._infer_model_abilities(item, model_id),
+ **self._extract_scan_metadata(item, model_id),
+ }
+ )
+
+ models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
+ return {
+ 'models': models,
+ 'debug': {
+ 'request': {
+ 'method': 'GET',
+ 'url': models_url,
+ 'headers': {
+ 'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
+ },
+ },
+ 'response': payload,
+ },
+ }
+
+ async def _req(
+ self,
+ query: pipeline_query.Query,
+ args: dict,
+ extra_body: dict = {},
+ remove_think: bool = False,
+ ) -> list[dict[str, typing.Any]]:
+ args['stream'] = True
+
+ chunk = None
+
+ pending_content = ''
+
+ tool_calls = []
+
+ resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)
+
+ chunk_idx = 0
+ thinking_started = False
+ thinking_ended = False
+ tool_id = ''
+ tool_name = ''
+ message_delta = {}
+ async for chunk in resp_gen:
+ if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
+ continue
+
+ delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
+ reasoning_content = delta.get('reasoning_content')
+ # 处理 reasoning_content
+ if reasoning_content:
+ # accumulated_reasoning += reasoning_content
+ # 如果设置了 remove_think,跳过 reasoning_content
+ if remove_think:
+ chunk_idx += 1
+ continue
+
+ # 第一次出现 reasoning_content,添加 开始标签
+ if not thinking_started:
+ thinking_started = True
+ pending_content += '\n' + reasoning_content
+ else:
+ # 继续输出 reasoning_content
+ pending_content += reasoning_content
+ elif thinking_started and not thinking_ended and delta.get('content'):
+ # reasoning_content 结束,normal content 开始,添加 结束标签
+ thinking_ended = True
+ pending_content += '\n\n' + delta.get('content')
+
+ if delta.get('content') is not None:
+ pending_content += delta.get('content')
+
+ if delta.get('tool_calls') is not None:
+ for tool_call in delta.get('tool_calls'):
+ if tool_call['id'] != '':
+ tool_id = tool_call['id']
+ if tool_call['function']['name'] is not None:
+ tool_name = tool_call['function']['name']
+ if tool_call['function']['arguments'] is None:
+ continue
+ tool_call['id'] = tool_id
+ tool_call['name'] = tool_name
+ for tc in tool_calls:
+ if tc['index'] == tool_call['index']:
+ tc['function']['arguments'] += tool_call['function']['arguments']
+ break
+ else:
+ tool_calls.append(tool_call)
+
+ if chunk.choices[0].finish_reason is not None:
+ break
+ message_delta['content'] = pending_content
+ message_delta['role'] = 'assistant'
+
+ message_delta['tool_calls'] = tool_calls if tool_calls else None
+ return [message_delta]
+
+ async def _make_msg(
+ self,
+ chat_completion: list[dict[str, typing.Any]],
+ ) -> provider_message.Message:
+ chatcmpl_message = chat_completion[0]
+
+ # 确保 role 字段存在且不为 None
+ if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
+ chatcmpl_message['role'] = 'assistant'
+
+ message = provider_message.Message(**chatcmpl_message)
+
+ return message
+
+ async def _closure(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> tuple[provider_message.Message, dict]:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages.copy()
+
+ # 检查vision
+ for msg in messages:
+ if 'content' in msg and isinstance(msg['content'], list):
+ for me in msg['content']:
+ if me['type'] == 'image_base64':
+ me['image_url'] = {'url': me['image_base64']}
+ me['type'] = 'image_url'
+ del me['image_base64']
+
+ args['messages'] = messages
+
+ # 发送请求
+ resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)
+
+ # 处理请求结果
+ message = await self._make_msg(resp)
+
+ # ModelScope uses streaming, usage info not available
+ usage_info = {}
+
+ return message, usage_info
+
+ async def _req_stream(
+ self,
+ args: dict,
+ extra_body: dict = {},
+ ) -> chat_completion.ChatCompletion:
+ async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
+ yield chunk
+
+ async def _closure_stream(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages.copy()
+
+ # 检查vision
+ for msg in messages:
+ if 'content' in msg and isinstance(msg['content'], list):
+ for me in msg['content']:
+ if me['type'] == 'image_base64':
+ me['image_url'] = {'url': me['image_base64']}
+ me['type'] = 'image_url'
+ del me['image_base64']
+
+ args['messages'] = messages
+ args['stream'] = True
+
+ # 流式处理状态
+ # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+ chunk_idx = 0
+ thinking_started = False
+ thinking_ended = False
+ role = 'assistant' # 默认角色
+ # accumulated_reasoning = '' # 仅用于判断何时结束思维链
+
+ async for chunk in self._req_stream(args, extra_body=extra_args):
+ # 解析 chunk 数据
+ if hasattr(chunk, 'choices') and chunk.choices:
+ choice = chunk.choices[0]
+ delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+ finish_reason = getattr(choice, 'finish_reason', None)
+ else:
+ delta = {}
+ finish_reason = None
+
+ # 从第一个 chunk 获取 role,后续使用这个 role
+ if 'role' in delta and delta['role']:
+ role = delta['role']
+
+ # 获取增量内容
+ delta_content = delta.get('content', '')
+ reasoning_content = delta.get('reasoning_content', '')
+
+ # 处理 reasoning_content
+ if reasoning_content:
+ # accumulated_reasoning += reasoning_content
+ # 如果设置了 remove_think,跳过 reasoning_content
+ if remove_think:
+ chunk_idx += 1
+ continue
+
+ # 第一次出现 reasoning_content,添加 开始标签
+ if not thinking_started:
+ thinking_started = True
+ delta_content = '\n' + reasoning_content
+ else:
+ # 继续输出 reasoning_content
+ delta_content = reasoning_content
+ elif thinking_started and not thinking_ended and delta_content:
+ # reasoning_content 结束,normal content 开始,添加 结束标签
+ thinking_ended = True
+ delta_content = '\n\n' + delta_content
+
+ # 处理 content 中已有的 标签(如果需要移除)
+ # if delta_content and remove_think and '' in delta_content:
+ # import re
+ #
+ # # 移除 标签及其内容
+ # delta_content = re.sub(r'.*?', '', delta_content, flags=re.DOTALL)
+
+ # 处理工具调用增量
+ if delta.get('tool_calls'):
+ for tool_call in delta['tool_calls']:
+ if tool_call['id'] != '':
+ tool_id = tool_call['id']
+ if tool_call['function']['name'] is not None:
+ tool_name = tool_call['function']['name']
+
+ if tool_call['type'] is None:
+ tool_call['type'] = 'function'
+ tool_call['id'] = tool_id
+ tool_call['function']['name'] = tool_name
+ tool_call['function']['arguments'] = (
+ '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
+ )
+
+ # 跳过空的第一个 chunk(只有 role 没有内容)
+ if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+ chunk_idx += 1
+ continue
+
+ # 构建 MessageChunk - 只包含增量内容
+ chunk_data = {
+ 'role': role,
+ 'content': delta_content if delta_content else None,
+ 'tool_calls': delta.get('tool_calls'),
+ 'is_final': bool(finish_reason),
+ }
+
+ # 移除 None 值
+ chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+ yield provider_message.MessageChunk(**chunk_data)
+ chunk_idx += 1
+ # return
+
+ async def invoke_llm(
+ self,
+ query: pipeline_query.Query,
+ model: entities.LLMModelInfo,
+ messages: typing.List[provider_message.Message],
+ funcs: typing.List[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.Message:
+ req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
+ for m in messages:
+ msg_dict = m.dict(exclude_none=True)
+ content = msg_dict.get('content')
+ if isinstance(content, list):
+ # 检查 content 列表中是否每个部分都是文本
+ if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+ # 将所有文本部分合并为一个字符串
+ msg_dict['content'] = '\n'.join(part['text'] for part in content)
+ req_messages.append(msg_dict)
+
+ try:
+ return await self._closure(
+ query=query,
+ req_messages=req_messages,
+ use_model=model,
+ use_funcs=funcs,
+ extra_args=extra_args,
+ remove_think=remove_think,
+ )
+ except asyncio.TimeoutError:
+ raise errors.RequesterError('请求超时')
+ except openai.BadRequestError as e:
+ if 'context_length_exceeded' in e.message:
+ raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
+ else:
+ raise errors.RequesterError(f'请求参数错误: {e.message}')
+ except openai.AuthenticationError as e:
+ raise errors.RequesterError(f'无效的 api-key: {e.message}')
+ except openai.NotFoundError as e:
+ raise errors.RequesterError(f'请求路径错误: {e.message}')
+ except openai.RateLimitError as e:
+ raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
+ except openai.APIError as e:
+ raise errors.RequesterError(f'请求错误: {e.message}')
+
+ async def invoke_llm_stream(
+ self,
+ query: pipeline_query.Query,
+ model: requester.RuntimeLLMModel,
+ messages: typing.List[provider_message.Message],
+ funcs: typing.List[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.MessageChunk:
+ req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
+ for m in messages:
+ msg_dict = m.dict(exclude_none=True)
+ content = msg_dict.get('content')
+ if isinstance(content, list):
+ # 检查 content 列表中是否每个部分都是文本
+ if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+ # 将所有文本部分合并为一个字符串
+ msg_dict['content'] = '\n'.join(part['text'] for part in content)
+ req_messages.append(msg_dict)
+
+ try:
+ async for item in self._closure_stream(
+ query=query,
+ req_messages=req_messages,
+ use_model=model,
+ use_funcs=funcs,
+ extra_args=extra_args,
+ remove_think=remove_think,
+ ):
+ yield item
+
+ except asyncio.TimeoutError:
+ raise errors.RequesterError('请求超时')
+ except openai.BadRequestError as e:
+ if 'context_length_exceeded' in e.message:
+ raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
+ else:
+ raise errors.RequesterError(f'请求参数错误: {e.message}')
+ except openai.AuthenticationError as e:
+ raise errors.RequesterError(f'无效的 api-key: {e.message}')
+ except openai.NotFoundError as e:
+ raise errors.RequesterError(f'请求路径错误: {e.message}')
+ except openai.RateLimitError as e:
+ raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
+ except openai.APIError as e:
+ raise errors.RequesterError(f'请求错误: {e.message}')
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
index 12d38201..8d22002d 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
@@ -29,7 +29,10 @@ spec:
type: int
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
provider_category: maas
+execution:
+ python:
+ path: ./modelscopechatcmpl.py
+ attr: ModelScopeChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
new file mode 100644
index 00000000..b6852963
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import typing
+
+
+from . import chatcmpl
+from .. import requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """Moonshot ChatCompletion API 请求器"""
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.moonshot.cn/v1',
+ 'timeout': 120,
+ }
+
+ async def _closure(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> tuple[provider_message.Message, dict]:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages
+
+ # deepseek 不支持多模态,把content都转换成纯文字
+ for m in messages:
+ if 'content' in m and isinstance(m['content'], list):
+ m['content'] = ' '.join([c['text'] for c in m['content']])
+
+ # 删除空的,不知道干嘛的,直接删了。
+ # messages = [m for m in messages if m["content"].strip() != "" and ('tool_calls' not in m or not m['tool_calls'])]
+
+ args['messages'] = messages
+
+ # 发送请求
+ resp = await self._req(args, extra_body=extra_args)
+
+ # 处理请求结果
+ message = await self._make_msg(resp, remove_think)
+
+ # Extract token usage from response
+ usage_info = {}
+ if hasattr(resp, 'usage') and resp.usage:
+ usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
+ usage_info['output_tokens'] = resp.usage.completion_tokens or 0
+ usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+ return message, usage_info
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
index 11bb5a05..7a7e3060 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: moonshot
support_type:
- llm
provider_category: manufacturer
+execution:
+ python:
+ path: ./moonshotchatcmpl.py
+ attr: MoonshotChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py
new file mode 100644
index 00000000..3c2bd3fb
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """New API ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'http://localhost:3000/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
index 2d51bf9f..e0f44e99 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
+execution:
+ python:
+ path: ./newapichatcmpl.py
+ attr: NewAPIChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
new file mode 100644
index 00000000..50f601d7
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
@@ -0,0 +1,314 @@
+from __future__ import annotations
+
+import asyncio
+import os
+import typing
+from typing import Union, Mapping, Any, AsyncIterator
+import uuid
+import json
+
+import ollama
+import httpx
+
+from .. import errors, requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+REQUESTER_NAME: str = 'ollama-chat'
+
+
+class OllamaChatCompletions(requester.ProviderAPIRequester):
+ """Ollama平台 ChatCompletion API请求器"""
+
+ client: ollama.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'http://127.0.0.1:11434',
+ 'timeout': 120,
+ }
+
+ async def initialize(self):
+ os.environ['OLLAMA_HOST'] = self.requester_cfg['base_url']
+ self.client = ollama.AsyncClient(timeout=self.requester_cfg['timeout'])
+
+ def _infer_model_type(self, model_id: str) -> str:
+ normalized_model_id = (model_id or '').lower()
+ embedding_keywords = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding')
+ return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
+
+ def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
+ normalized_model_id = (model_id or '').lower()
+ abilities: set[str] = set()
+ details = item.get('details', {}) or {}
+ families = details.get('families', []) or []
+ tokens = [normalized_model_id, str(details.get('family', '')).lower()]
+ tokens.extend(str(family).lower() for family in families)
+
+ if any(keyword in token for token in tokens for keyword in ('vision', 'vl', 'omni', 'llava', 'ocr')):
+ abilities.add('vision')
+ if any(keyword in token for token in tokens for keyword in ('tool', 'function')):
+ abilities.add('func_call')
+ return sorted(abilities)
+
+ async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+ del api_key
+ models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/api/tags'
+
+ async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
+ response = await client.get(models_url)
+ response.raise_for_status()
+ payload = response.json()
+
+ models: list[dict[str, typing.Any]] = []
+ for item in payload.get('models', []):
+ model_id = item.get('model') or item.get('name')
+ if not model_id:
+ continue
+ models.append(
+ {
+ 'id': model_id,
+ 'name': item.get('name', model_id),
+ 'type': self._infer_model_type(model_id),
+ 'abilities': self._infer_model_abilities(item, model_id),
+ }
+ )
+
+ models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
+ return {
+ 'models': models,
+ 'debug': {
+ 'request': {
+ 'method': 'GET',
+ 'url': models_url,
+ },
+ 'response': payload,
+ },
+ }
+
+ async def _req(
+ self,
+ args: dict,
+ ) -> Union[Mapping[str, Any], AsyncIterator[Mapping[str, Any]]]:
+ return await self.client.chat(**args)
+
+ async def _closure(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.Message:
+ args = extra_args.copy()
+ args['model'] = use_model.model_entity.name
+
+ messages: list[dict] = req_messages.copy()
+ for msg in messages:
+ if 'content' in msg and isinstance(msg['content'], list):
+ text_content: list = []
+ image_urls: list = []
+ for me in msg['content']:
+ if me['type'] == 'text':
+ text_content.append(me['text'])
+ elif me['type'] == 'image_base64':
+ image_urls.append(me['image_base64'])
+
+ msg['content'] = '\n'.join(text_content)
+ msg['images'] = [url.split(',')[1] for url in image_urls]
+ if 'tool_calls' in msg: # LangBot 内部以 str 存储 tool_calls 的参数,这里需要转换为 dict
+ for tool_call in msg['tool_calls']:
+ tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
+ args['messages'] = messages
+
+ args['tools'] = []
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+ if tools:
+ args['tools'] = tools
+
+ resp = await self._req(args)
+ message: provider_message.Message = await self._make_msg(resp)
+ return message
+
+ async def _make_msg(self, chat_completions: ollama.ChatResponse) -> provider_message.Message:
+ message: ollama.Message = chat_completions.message
+ if message is None:
+ raise ValueError("chat_completions must contain a 'message' field")
+
+ ret_msg: provider_message.Message = None
+
+ if message.content is not None:
+ ret_msg = provider_message.Message(role='assistant', content=message.content)
+ if message.tool_calls is not None and len(message.tool_calls) > 0:
+ tool_calls: list[provider_message.ToolCall] = []
+
+ for tool_call in message.tool_calls:
+ tool_calls.append(
+ provider_message.ToolCall(
+ id=uuid.uuid4().hex,
+ type='function',
+ function=provider_message.FunctionCall(
+ name=tool_call.function.name,
+ arguments=json.dumps(tool_call.function.arguments),
+ ),
+ )
+ )
+ ret_msg.tool_calls = tool_calls
+
+ return ret_msg
+
+ async def _prepare_messages(
+ self,
+ messages: typing.List[provider_message.Message],
+ ) -> list[dict]:
+ """Prepare messages for Ollama API request."""
+ req_messages: list = []
+ for m in messages:
+ msg_dict: dict = m.dict(exclude_none=True)
+ content: Any = msg_dict.get('content')
+ if isinstance(content, list):
+ if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+ msg_dict['content'] = '\n'.join(part['text'] for part in content)
+ req_messages.append(msg_dict)
+ return req_messages
+
+ async def invoke_llm(
+ self,
+ query: pipeline_query.Query,
+ model: requester.RuntimeLLMModel,
+ messages: typing.List[provider_message.Message],
+ funcs: typing.List[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.Message:
+ req_messages = await self._prepare_messages(messages)
+ try:
+ return await self._closure(
+ query=query,
+ req_messages=req_messages,
+ use_model=model,
+ use_funcs=funcs,
+ extra_args=extra_args,
+ remove_think=remove_think,
+ )
+ except asyncio.TimeoutError:
+ raise errors.RequesterError('请求超时')
+
+ async def invoke_llm_stream(
+ self,
+ query: pipeline_query.Query,
+ model: requester.RuntimeLLMModel,
+ messages: typing.List[provider_message.Message],
+ funcs: typing.List[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.MessageChunk:
+ req_messages = await self._prepare_messages(messages)
+
+ try:
+ args = extra_args.copy()
+ args['model'] = model.model_entity.name
+
+ # Process messages for Ollama format
+ msgs: list[dict] = req_messages.copy()
+ for msg in msgs:
+ if 'content' in msg and isinstance(msg['content'], list):
+ text_content: list = []
+ image_urls: list = []
+ for me in msg['content']:
+ if me['type'] == 'text':
+ text_content.append(me['text'])
+ elif me['type'] == 'image_base64':
+ image_urls.append(me['image_base64'])
+ msg['content'] = '\n'.join(text_content)
+ msg['images'] = [url.split(',')[1] for url in image_urls]
+ if 'tool_calls' in msg:
+ for tool_call in msg['tool_calls']:
+ tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
+ args['messages'] = msgs
+
+ args['tools'] = []
+ if funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs)
+ if tools:
+ args['tools'] = tools
+
+ args['stream'] = True
+
+ chunk_idx = 0
+ thinking_started = False
+ thinking_ended = False
+ role = 'assistant'
+
+ async for chunk in await self.client.chat(**args):
+ message: ollama.Message = chunk.message
+ done = chunk.done
+
+ delta_content = message.content or ''
+ reasoning_content = getattr(message, 'thinking', '') or ''
+
+ # Handle reasoning/thinking content
+ if reasoning_content:
+ if remove_think:
+ chunk_idx += 1
+ continue
+
+ if not thinking_started:
+ thinking_started = True
+ delta_content = '\n' + reasoning_content
+ else:
+ delta_content = reasoning_content
+ elif thinking_started and not thinking_ended and delta_content:
+ thinking_ended = True
+ delta_content = '\n\n' + delta_content
+
+ # Handle tool calls
+ tool_calls_data = None
+ if message.tool_calls:
+ tool_calls_data = []
+ for tc in message.tool_calls:
+ tool_calls_data.append(
+ {
+ 'id': uuid.uuid4().hex,
+ 'type': 'function',
+ 'function': {
+ 'name': tc.function.name,
+ 'arguments': json.dumps(tc.function.arguments),
+ },
+ }
+ )
+
+ # Skip empty first chunk
+ if chunk_idx == 0 and not delta_content and not reasoning_content and not tool_calls_data:
+ chunk_idx += 1
+ continue
+
+ chunk_data = {
+ 'role': role,
+ 'content': delta_content if delta_content else None,
+ 'tool_calls': tool_calls_data,
+ 'is_final': bool(done),
+ }
+ chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+ yield provider_message.MessageChunk(**chunk_data)
+ chunk_idx += 1
+
+ except asyncio.TimeoutError:
+ raise errors.RequesterError('请求超时')
+
+ async def invoke_embedding(
+ self,
+ model: requester.RuntimeEmbeddingModel,
+ input_text: list[str],
+ extra_args: dict[str, typing.Any] = {},
+ ) -> list[list[float]]:
+ return (
+ await self.client.embed(
+ model=model.model_entity.name,
+ input=input_text,
+ **extra_args,
+ )
+ ).embeddings
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
index 24f4a0e9..a724f8f8 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: ollama
support_type:
- llm
- text-embedding
provider_category: self-hosted
+execution:
+ python:
+ path: ./ollamachat.py
+ attr: OllamaChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py
new file mode 100644
index 00000000..17b88431
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import modelscopechatcmpl
+
+
+class OpenRouterChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
+ """OpenRouter ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://openrouter.ai/api/v1',
+ 'timeout': 120,
+ }
+
+ async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+ original_base_url = self.requester_cfg.get('base_url', '')
+ self.requester_cfg['base_url'] = 'https://openrouter.ai/api/v1'
+ try:
+ return await super().scan_models(api_key)
+ finally:
+ self.requester_cfg['base_url'] = original_base_url
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
index 9a386736..71064dc0 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
@@ -22,9 +22,12 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openrouter
support_type:
- llm
- text-embedding
- rerank
provider_category: maas
+execution:
+ python:
+ path: ./openrouterchatcmpl.py
+ attr: OpenRouterChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
new file mode 100644
index 00000000..1836bd62
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
@@ -0,0 +1,208 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+from .. import requester
+import openai.types.chat.chat_completion as chat_completion
+import re
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+
+
+class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """欧派云 ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.ppinfra.com/v3/openai',
+ 'timeout': 120,
+ }
+
+ is_think: bool = False
+
+ async def _make_msg(
+ self,
+ chat_completion: chat_completion.ChatCompletion,
+ remove_think: bool,
+ ) -> provider_message.Message:
+ chatcmpl_message = chat_completion.choices[0].message.model_dump()
+ # print(chatcmpl_message.keys(), chatcmpl_message.values())
+
+ # 确保 role 字段存在且不为 None
+ if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
+ chatcmpl_message['role'] = 'assistant'
+
+ reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
+
+ # deepseek的reasoner模型
+ chatcmpl_message['content'] = await self._process_thinking_content(
+ chatcmpl_message['content'], reasoning_content, remove_think
+ )
+
+ # 移除 reasoning_content 字段,避免传递给 Message
+ if 'reasoning_content' in chatcmpl_message:
+ del chatcmpl_message['reasoning_content']
+
+ message = provider_message.Message(**chatcmpl_message)
+
+ return message
+
+ async def _process_thinking_content(
+ self,
+ content: str,
+ reasoning_content: str = None,
+ remove_think: bool = False,
+ ) -> tuple[str, str]:
+ """处理思维链内容
+
+ Args:
+ content: 原始内容
+ reasoning_content: reasoning_content 字段内容
+ remove_think: 是否移除思维链
+
+ Returns:
+ 处理后的内容
+ """
+ if remove_think:
+ content = re.sub(r'.*?', '', content, flags=re.DOTALL)
+ else:
+ if reasoning_content is not None:
+ content = '\n' + reasoning_content + '\n\n' + content
+ return content
+
+ async def _make_msg_chunk(
+ self,
+ delta: dict[str, typing.Any],
+ idx: int,
+ ) -> provider_message.MessageChunk:
+ # 处理流式chunk和完整响应的差异
+ # print(chat_completion.choices[0])
+
+ # 确保 role 字段存在且不为 None
+ if 'role' not in delta or delta['role'] is None:
+ delta['role'] = 'assistant'
+
+ reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
+
+ delta['content'] = '' if delta['content'] is None else delta['content']
+ # print(reasoning_content)
+
+ # deepseek的reasoner模型
+
+ if reasoning_content is not None:
+ delta['content'] += reasoning_content
+
+ message = provider_message.MessageChunk(**delta)
+
+ return message
+
+ async def _closure_stream(
+ self,
+ query: pipeline_query.Query,
+ req_messages: list[dict],
+ use_model: requester.RuntimeLLMModel,
+ use_funcs: list[resource_tool.LLMTool] = None,
+ extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
+ ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+ self.client.api_key = use_model.provider.token_mgr.get_token()
+
+ args = {}
+ args['model'] = use_model.model_entity.name
+
+ if use_funcs:
+ tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+ if tools:
+ args['tools'] = tools
+
+ # 设置此次请求中的messages
+ messages = req_messages.copy()
+
+ # 检查vision
+ for msg in messages:
+ if 'content' in msg and isinstance(msg['content'], list):
+ for me in msg['content']:
+ if me['type'] == 'image_base64':
+ me['image_url'] = {'url': me['image_base64']}
+ me['type'] = 'image_url'
+ del me['image_base64']
+
+ args['messages'] = messages
+ args['stream'] = True
+
+ # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+ chunk_idx = 0
+ thinking_started = False
+ thinking_ended = False
+ role = 'assistant' # 默认角色
+ async for chunk in self._req_stream(args, extra_body=extra_args):
+ # 解析 chunk 数据
+ if hasattr(chunk, 'choices') and chunk.choices:
+ choice = chunk.choices[0]
+ delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+ finish_reason = getattr(choice, 'finish_reason', None)
+ else:
+ delta = {}
+ finish_reason = None
+
+ # 从第一个 chunk 获取 role,后续使用这个 role
+ if 'role' in delta and delta['role']:
+ role = delta['role']
+
+ # 获取增量内容
+ delta_content = delta.get('content', '')
+ # reasoning_content = delta.get('reasoning_content', '')
+
+ if remove_think:
+ if delta['content'] is not None:
+ if '' in delta['content'] and not thinking_started and not thinking_ended:
+ thinking_started = True
+ continue
+ elif delta['content'] == r'' and not thinking_ended:
+ thinking_ended = True
+ continue
+ elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
+ thinking_started = False
+ continue
+ elif thinking_started and not thinking_ended:
+ continue
+
+ # delta_tool_calls = None
+ if delta.get('tool_calls'):
+ for tool_call in delta['tool_calls']:
+ if tool_call['id'] and tool_call['function']['name']:
+ tool_id = tool_call['id']
+ tool_name = tool_call['function']['name']
+
+ if tool_call['id'] is None:
+ tool_call['id'] = tool_id
+ if tool_call['function']['name'] is None:
+ tool_call['function']['name'] = tool_name
+ if tool_call['function']['arguments'] is None:
+ tool_call['function']['arguments'] = ''
+ if tool_call['type'] is None:
+ tool_call['type'] = 'function'
+
+ # 跳过空的第一个 chunk(只有 role 没有内容)
+ if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
+ chunk_idx += 1
+ continue
+
+ # 构建 MessageChunk - 只包含增量内容
+ chunk_data = {
+ 'role': role,
+ 'content': delta_content if delta_content else None,
+ 'tool_calls': delta.get('tool_calls'),
+ 'is_final': bool(finish_reason),
+ }
+
+ # 移除 None 值
+ chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+ yield provider_message.MessageChunk(**chunk_data)
+ chunk_idx += 1
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
index 38fdb83e..9e8eb1b0 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
@@ -29,8 +29,11 @@ spec:
type: int
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
+execution:
+ python:
+ path: ./ppiochatcmpl.py
+ attr: PPIOChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
new file mode 100644
index 00000000..a68b6896
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+
+
+class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """启航 AI ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.qhaigc.com/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
index 1b2a25bf..46ae1fad 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
@@ -29,8 +29,11 @@ spec:
type: int
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
+execution:
+ python:
+ path: ./qhaigcchatcmpl.py
+ attr: QHAIGCChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
new file mode 100644
index 00000000..122eaf7d
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+import openai.types.chat.chat_completion as chat_completion
+
+
+class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://router.shengsuanyun.com/api/v1',
+ 'timeout': 120,
+ }
+
+ async def _req(
+ self,
+ args: dict,
+ extra_body: dict = {},
+ ) -> chat_completion.ChatCompletion:
+ return await self.client.chat.completions.create(
+ **args,
+ extra_body=extra_body,
+ extra_headers={
+ 'HTTP-Referer': 'https://langbot.app',
+ 'X-Title': 'LangBot',
+ },
+ )
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
index 55bf2a3f..77cf682c 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
@@ -29,8 +29,11 @@ spec:
type: int
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
+execution:
+ python:
+ path: ./shengsuanyun.py
+ attr: ShengSuanYunChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py
new file mode 100644
index 00000000..3636d9d1
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class SiliconFlowChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """SiliconFlow ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.siliconflow.cn/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
index 07715db1..11a2ffa3 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
@@ -22,9 +22,12 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: siliconflow
support_type:
- llm
- text-embedding
- rerank
provider_category: maas
+execution:
+ python:
+ path: ./siliconflowchatcmpl.py
+ attr: SiliconFlowChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py
new file mode 100644
index 00000000..91740a1f
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class LangBotSpaceChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """LangBot Space ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.langbot.cloud/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
index 70faea5f..29c23a83 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
+execution:
+ python:
+ path: ./spacechatcmpl.py
+ attr: LangBotSpaceChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
index 712dd490..f160bdea 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
+execution:
+ python:
+ path: ./tokenponychatcmpl.py
+ attr: TokenPonyChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py
new file mode 100644
index 00000000..92311454
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class TokenPonyChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """TokenPony ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.tokenpony.cn/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py
new file mode 100644
index 00000000..7eb68956
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class VolcArkChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """火山方舟大模型平台 ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://ark.cn-beijing.volces.com/api/v3',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
index 19f79f59..e5c82657 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: openai
support_type:
- llm
provider_category: maas
+execution:
+ python:
+ path: ./volcarkchatcmpl.py
+ attr: VolcArkChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
index 0955f4a0..a47b8d47 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: voyage
support_type:
- rerank
provider_category: manufacturer
+execution:
+ python:
+ path: ./chatcmpl.py
+ attr: OpenAIChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
new file mode 100644
index 00000000..db2022f1
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class XaiChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """xAI ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://api.x.ai/v1',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
index 76db8a2f..2e721d70 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: xai
support_type:
- llm
provider_category: manufacturer
+execution:
+ python:
+ path: ./xaichatcmpl.py
+ attr: XaiChatCompletions
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py
new file mode 100644
index 00000000..a1a07068
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class ZhipuAIChatCompletions(chatcmpl.OpenAIChatCompletions):
+ """智谱AI ChatCompletion API 请求器"""
+
+ client: openai.AsyncClient
+
+ default_config: dict[str, typing.Any] = {
+ 'base_url': 'https://open.bigmodel.cn/api/paas/v4',
+ 'timeout': 120,
+ }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
index 632ff1b4..a4ebb2ec 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
- litellm_provider: zhipu
support_type:
- llm
provider_category: manufacturer
+execution:
+ python:
+ path: ./zhipuaichatcmpl.py
+ attr: ZhipuAIChatCompletions