restore: restore deleted provider requester files

Restore individual provider requester implementations that were
removed in de61b5d3. These files coexist with the unified
litellmchat.py backend.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
huanghuoguoguo
2026-05-04 12:19:18 +08:00
parent d170bdd343
commit 8dd16aac51
51 changed files with 3311 additions and 28 deletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
"""302.AI ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.302.ai/v1',
'timeout': 120,
}

View File

@@ -22,9 +22,12 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
- rerank
provider_category: maas
execution:
python:
path: ./302aichatcmpl.py
attr: AI302ChatCompletions

View File

@@ -0,0 +1,370 @@
from __future__ import annotations
import typing
import json
import platform
import socket
import anthropic
import httpx
from .. import errors, requester
from ....utils import image
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class AnthropicMessages(requester.ProviderAPIRequester):
"""Anthropic Messages API 请求器"""
client: anthropic.AsyncAnthropic
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.anthropic.com',
'timeout': 120,
}
async def initialize(self):
# 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题
if platform.system() == 'Windows':
if not hasattr(socket, 'TCP_KEEPINTVL'):
socket.TCP_KEEPINTVL = 0
if not hasattr(socket, 'TCP_KEEPCNT'):
socket.TCP_KEEPCNT = 0
httpx_client = anthropic._base_client.AsyncHttpxClientWrapper(
base_url=self.requester_cfg['base_url'],
# cast to a valid type because mypy doesn't understand our type narrowing
timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']),
limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS,
follow_redirects=True,
trust_env=True,
)
self.client = anthropic.AsyncAnthropic(
api_key='',
http_client=httpx_client,
base_url=self.requester_cfg['base_url'],
)
async def invoke_llm(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
self.client.api_key = model.provider.token_mgr.get_token()
args = extra_args.copy()
args['model'] = model.model_entity.name
# 处理消息
# system
system_role_message = None
for i, m in enumerate(messages):
if m.role == 'system':
system_role_message = m
break
if system_role_message:
messages.pop(i)
if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
args['system'] = system_role_message.content
req_messages = []
for m in messages:
if m.role == 'tool':
tool_call_id = m.tool_call_id
req_messages.append(
{
'role': 'user',
'content': [
{
'type': 'tool_result',
'tool_use_id': tool_call_id,
'is_error': False,
'content': [{'type': 'text', 'text': m.content}],
}
],
}
)
continue
msg_dict = m.dict(exclude_none=True)
if isinstance(m.content, str) and m.content.strip() != '':
msg_dict['content'] = [{'type': 'text', 'text': m.content}]
elif isinstance(m.content, list):
for i, ce in enumerate(m.content):
if ce.type == 'image_base64':
image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
alter_image_ele = {
'type': 'image',
'source': {
'type': 'base64',
'media_type': f'image/{image_format}',
'data': image_b64,
},
}
msg_dict['content'][i] = alter_image_ele
if m.tool_calls:
for tool_call in m.tool_calls:
msg_dict['content'].append(
{
'type': 'tool_use',
'id': tool_call.id,
'name': tool_call.function.name,
'input': json.loads(tool_call.function.arguments),
}
)
del msg_dict['tool_calls']
req_messages.append(msg_dict)
args['messages'] = req_messages
if 'thinking' in args:
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
if funcs:
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
if tools:
args['tools'] = tools
try:
resp = await self.client.messages.create(**args)
args = {
'content': '',
'role': resp.role,
}
assert type(resp) is anthropic.types.message.Message
for block in resp.content:
if not remove_think and block.type == 'thinking':
args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
elif block.type == 'text':
args['content'] += block.text
elif block.type == 'tool_use':
assert type(block) is anthropic.types.tool_use_block.ToolUseBlock
tool_call = provider_message.ToolCall(
id=block.id,
type='function',
function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)),
)
if 'tool_calls' not in args:
args['tool_calls'] = []
args['tool_calls'].append(tool_call)
return provider_message.Message(**args)
except anthropic.AuthenticationError as e:
raise errors.RequesterError(f'api-key 无效: {e.message}')
except anthropic.BadRequestError as e:
raise errors.RequesterError(str(e.message))
except anthropic.NotFoundError as e:
if 'model: ' in str(e):
raise errors.RequesterError(f'模型无效: {e.message}')
else:
raise errors.RequesterError(f'请求地址无效: {e.message}')
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
self.client.api_key = model.provider.token_mgr.get_token()
args = extra_args.copy()
args['model'] = model.model_entity.name
args['stream'] = True
# 处理消息
# system
system_role_message = None
for i, m in enumerate(messages):
if m.role == 'system':
system_role_message = m
break
if system_role_message:
messages.pop(i)
if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
args['system'] = system_role_message.content
req_messages = []
for m in messages:
if m.role == 'tool':
tool_call_id = m.tool_call_id
req_messages.append(
{
'role': 'user',
'content': [
{
'type': 'tool_result',
'tool_use_id': tool_call_id,
'is_error': False, # 暂时直接写false
'content': [
{'type': 'text', 'text': m.content}
], # 这里要是list包裹应该是多个返回的情况type类型好像也可以填其他的暂时只写text
}
],
}
)
continue
msg_dict = m.dict(exclude_none=True)
if isinstance(m.content, str) and m.content.strip() != '':
msg_dict['content'] = [{'type': 'text', 'text': m.content}]
elif isinstance(m.content, list):
for i, ce in enumerate(m.content):
if ce.type == 'image_base64':
image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
alter_image_ele = {
'type': 'image',
'source': {
'type': 'base64',
'media_type': f'image/{image_format}',
'data': image_b64,
},
}
msg_dict['content'][i] = alter_image_ele
if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
msg_dict['content'] = [] # 这里不知道为什么会莫名有个空导致content为字符
if m.tool_calls:
for tool_call in m.tool_calls:
msg_dict['content'].append(
{
'type': 'tool_use',
'id': tool_call.id,
'name': tool_call.function.name,
'input': json.loads(tool_call.function.arguments),
}
)
del msg_dict['tool_calls']
req_messages.append(msg_dict)
if 'thinking' in args:
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
args['messages'] = req_messages
if funcs:
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
if tools:
args['tools'] = tools
try:
role = 'assistant' # 默认角色
# chunk_idx = 0
think_started = False
think_ended = False
finish_reason = False
tool_name = ''
tool_id = ''
async for chunk in await self.client.messages.create(**args):
content = ''
tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
if isinstance(
chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
): # 记录开始
if chunk.content_block.type == 'tool_use':
if chunk.content_block.name is not None:
tool_name = chunk.content_block.name
if chunk.content_block.id is not None:
tool_id = chunk.content_block.id
tool_call['function']['name'] = tool_name
tool_call['function']['arguments'] = ''
tool_call['id'] = tool_id
if not remove_think:
if chunk.content_block.type == 'thinking' and not remove_think:
think_started = True
elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
think_ended = True
continue
elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
if chunk.delta.type == 'thinking_delta':
if think_started:
think_started = False
content = '<think>\n' + chunk.delta.thinking
elif remove_think:
continue
else:
content = chunk.delta.thinking
elif chunk.delta.type == 'text_delta':
if think_ended:
think_ended = False
content = '\n</think>\n' + chunk.delta.text
else:
content = chunk.delta.text
elif chunk.delta.type == 'input_json_delta':
tool_call['function']['arguments'] = chunk.delta.partial_json
tool_call['function']['name'] = tool_name
tool_call['id'] = tool_id
elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
continue # 记录raw_content_block结束的
elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
if chunk.delta.stop_reason == 'end_turn':
finish_reason = True
elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
continue # 这个好像是完全结束
else:
# print(chunk)
self.ap.logger.debug(f'anthropic chunk: {chunk}')
continue
args = {
'content': content,
'role': role,
'is_final': finish_reason,
'tool_calls': None if tool_call['id'] is None else [tool_call],
}
# if chunk_idx == 0:
# chunk_idx += 1
# continue
# assert type(chunk) is anthropic.types.message.Chunk
yield provider_message.MessageChunk(**args)
# return llm_entities.Message(**args)
except anthropic.AuthenticationError as e:
raise errors.RequesterError(f'api-key 无效: {e.message}')
except anthropic.BadRequestError as e:
raise errors.RequesterError(str(e.message))
except anthropic.NotFoundError as e:
if 'model: ' in str(e):
raise errors.RequesterError(f'模型无效: {e.message}')
else:
raise errors.RequesterError(f'请求地址无效: {e.message}')

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: anthropic
support_type:
- llm
provider_category: manufacturer
execution:
python:
path: ./anthropicmsgs.py
attr: AnthropicMessages

View File

@@ -0,0 +1,242 @@
from __future__ import annotations
import typing
import dashscope
import openai
from . import modelscopechatcmpl
from .. import requester
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
"""阿里云百炼大模型平台 ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
'timeout': 120,
}
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
is_use_dashscope_call = False # 是否使用阿里原生库调用
is_enable_multi_model = True # 是否支持多轮对话
use_time_num = 0 # 模型已调用次数,防止存在多文件时重复调用
use_time_ids = [] # 已调用的ID列表
message_id = 0 # 记录消息序号
for msg in messages:
# print(msg)
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
elif me['type'] == 'file_url' and '.' in me.get('file_name', ''):
# 1. 视频文件推理
# https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871
file_type = me.get('file_name').lower().split('.')[-1]
if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']:
me['type'] = 'video_url'
me['video_url'] = {'url': me['file_url']}
del me['file_url']
del me['file_name']
use_time_num += 1
use_time_ids.append(message_id)
is_enable_multi_model = False
# 2. 语音文件识别, 无法通过openai的audio字段传递暂时不支持
# https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031
elif file_type in [
'aac',
'amr',
'aiff',
'flac',
'm4a',
'mp3',
'mpeg',
'ogg',
'opus',
'wav',
'webm',
'wma',
]:
me['audio'] = me['file_url']
me['type'] = 'audio'
del me['file_url']
del me['type']
del me['file_name']
is_use_dashscope_call = True
use_time_num += 1
use_time_ids.append(message_id)
is_enable_multi_model = False
message_id += 1
# 使用列表推导式,保留不在 use_time_ids[:-1] 中的元素,仅保留最后一个多媒体消息
if not is_enable_multi_model and use_time_num > 1:
messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]]
if not is_enable_multi_model:
messages = [msg for msg in messages if 'resp_message_id' not in msg]
args['messages'] = messages
args['stream'] = True
# 流式处理状态
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
if is_use_dashscope_call:
response = dashscope.MultiModalConversation.call(
# 若没有配置环境变量请用百炼API Key将下行替换为api_key = "sk-xxx"
api_key=use_model.provider.token_mgr.get_token(),
model=use_model.model_entity.name,
messages=messages,
result_format='message',
asr_options={
# "language": "zh", # 可选,若已知音频的语种,可通过该参数指定待识别语种,以提升识别准确率
'enable_lid': True,
'enable_itn': False,
},
stream=True,
)
content_length_list = []
previous_length = 0 # 记录上一次的内容长度
for res in response:
chunk = res['output']
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta_content = choice['message'].content[0]['text']
finish_reason = choice['finish_reason']
content_length_list.append(len(delta_content))
else:
delta_content = ''
finish_reason = None
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content:
chunk_idx += 1
continue
# 检查 content_length_list 是否有足够的数据
if len(content_length_list) >= 2:
now_content = delta_content[previous_length : content_length_list[-1]]
previous_length = content_length_list[-1] # 更新上一次的长度
else:
now_content = delta_content # 第一次循环时直接使用 delta_content
previous_length = len(delta_content) # 更新上一次的长度
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': now_content if now_content else None,
'is_final': bool(finish_reason) and finish_reason != 'null',
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1
else:
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
reasoning_content = delta.get('reasoning_content', '')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
delta_content = '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
delta_content = reasoning_content
elif thinking_started and not thinking_ended and delta_content:
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
delta_content = '\n</think>\n' + delta_content
# 处理工具调用增量
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] != '':
tool_id = tool_call['id']
if tool_call['function']['name'] is not None:
tool_name = tool_call['function']['name']
if tool_call['type'] is None:
tool_call['type'] = 'function'
tool_call['id'] = tool_id
tool_call['function']['name'] = tool_name
tool_call['function']['arguments'] = (
'' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
)
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1
# return

View File

@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- rerank
provider_category: maas
execution:
python:
path: ./bailianchatcmpl.py
attr: BailianChatCompletions

View File

@@ -0,0 +1,702 @@
from __future__ import annotations
import asyncio
import typing
import openai
import openai.types.chat.chat_completion as chat_completion_module
import httpx
from .. import errors, requester
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class OpenAIChatCompletions(requester.ProviderAPIRequester):
"""OpenAI ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.openai.com/v1',
'timeout': 120,
}
async def initialize(self):
self.client = openai.AsyncClient(
api_key='',
base_url=self.requester_cfg['base_url'].replace(' ', ''),
timeout=self.requester_cfg['timeout'],
http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
)
def _mask_api_key(self, api_key: str | None) -> str:
if not api_key:
return ''
if len(api_key) <= 8:
return '****'
return f'{api_key[:4]}...{api_key[-4:]}'
def _infer_model_type(self, model_id: str) -> str:
normalized_model_id = (model_id or '').lower()
embedding_keywords = (
'embedding',
'embed',
'bge-',
'e5-',
'm3e',
'gte-',
'multilingual-e5',
'text-embedding',
)
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
normalized_model_id = (model_id or '').lower()
abilities: set[str] = set()
def _flatten(value: typing.Any) -> list[str]:
if value is None:
return []
if isinstance(value, str):
return [value.lower()]
if isinstance(value, dict):
flattened: list[str] = []
for nested_value in value.values():
flattened.extend(_flatten(nested_value))
return flattened
if isinstance(value, (list, tuple, set)):
flattened: list[str] = []
for nested_value in value:
flattened.extend(_flatten(nested_value))
return flattened
return [str(value).lower()]
capability_tokens = _flatten(item.get('capabilities'))
capability_tokens.extend(_flatten(item.get('modalities')))
capability_tokens.extend(_flatten(item.get('input_modalities')))
capability_tokens.extend(_flatten(item.get('output_modalities')))
capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
capability_tokens.extend(_flatten(item.get('supported_parameters')))
capability_tokens.extend(_flatten(item.get('architecture')))
combined_tokens = capability_tokens + [normalized_model_id]
vision_keywords = (
'vision',
'image',
'file',
'video',
'multimodal',
'vl',
'ocr',
'omni',
)
function_call_keywords = (
'function',
'tool',
'tools',
'tool_choice',
'tool_call',
'tool-use',
'tool_use',
)
if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
abilities.add('vision')
if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
abilities.add('func_call')
return sorted(abilities)
def _normalize_modalities(self, value: typing.Any) -> list[str]:
normalized: list[str] = []
def _collect(item: typing.Any):
if item is None:
return
if isinstance(item, str):
for part in item.replace('->', ',').replace('+', ',').split(','):
token = part.strip().lower()
if token and token not in normalized:
normalized.append(token)
return
if isinstance(item, dict):
for nested in item.values():
_collect(nested)
return
if isinstance(item, (list, tuple, set)):
for nested in item:
_collect(nested)
return
_collect(value)
return normalized
def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
display_name = item.get('name')
if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
display_name = ''
description = item.get('description')
if not isinstance(description, str) or not description.strip():
description = ''
context_length = item.get('context_length')
if context_length is None and isinstance(item.get('top_provider'), dict):
context_length = item['top_provider'].get('context_length')
if not isinstance(context_length, int):
try:
context_length = int(context_length) if context_length is not None else None
except (TypeError, ValueError):
context_length = None
input_modalities = self._normalize_modalities(item.get('input_modalities'))
output_modalities = self._normalize_modalities(item.get('output_modalities'))
if isinstance(item.get('architecture'), dict):
if not input_modalities:
input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
if not output_modalities:
output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
owned_by = item.get('owned_by')
if not isinstance(owned_by, str) or not owned_by.strip():
owned_by = ''
return {
'display_name': display_name or None,
'description': description or None,
'context_length': context_length,
'owned_by': owned_by or None,
'input_modalities': input_modalities,
'output_modalities': output_modalities,
}
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
headers = {}
if api_key:
headers['Authorization'] = f'Bearer {api_key}'
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
response = await client.get(models_url, headers=headers)
response.raise_for_status()
payload = response.json()
models = []
for item in payload.get('data', []):
model_id = item.get('id')
if not model_id:
continue
models.append(
{
'id': model_id,
'name': model_id,
'type': self._infer_model_type(model_id),
'abilities': self._infer_model_abilities(item, model_id),
**self._extract_scan_metadata(item, model_id),
}
)
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
return {
'models': models,
'debug': {
'request': {
'method': 'GET',
'url': models_url,
'headers': {
'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
},
},
'response': payload,
},
}
async def _req(
self,
args: dict,
extra_body: dict = {},
) -> chat_completion_module.ChatCompletion:
return await self.client.chat.completions.create(**args, extra_body=extra_body)
async def _req_stream(
self,
args: dict,
extra_body: dict = {},
):
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
yield chunk
async def _make_msg(
self,
chat_completion: chat_completion_module.ChatCompletion,
remove_think: bool = False,
) -> provider_message.Message:
if not isinstance(chat_completion, chat_completion_module.ChatCompletion):
raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}')
chatcmpl_message = chat_completion.choices[0].message.model_dump()
# 确保 role 字段存在且不为 None
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
chatcmpl_message['role'] = 'assistant'
# 处理思维链
content = chatcmpl_message.get('content', '')
reasoning_content = chatcmpl_message.get('reasoning_content', None)
processed_content, _ = await self._process_thinking_content(
content=content, reasoning_content=reasoning_content, remove_think=remove_think
)
chatcmpl_message['content'] = processed_content
# 移除 reasoning_content 字段,避免传递给 Message
if 'reasoning_content' in chatcmpl_message:
del chatcmpl_message['reasoning_content']
message = provider_message.Message(**chatcmpl_message)
return message
async def _process_thinking_content(
self,
content: str,
reasoning_content: str = None,
remove_think: bool = False,
) -> tuple[str, str]:
"""处理思维链内容
Args:
content: 原始内容
reasoning_content: reasoning_content 字段内容
remove_think: 是否移除思维链
Returns:
(处理后的内容, 提取的思维链内容)
"""
thinking_content = ''
# 1. 从 reasoning_content 提取思维链
if reasoning_content:
thinking_content = reasoning_content
# 2. 从 content 中提取 <think> 标签内容
if content and '<think>' in content and '</think>' in content:
import re
think_pattern = r'<think>(.*?)</think>'
think_matches = re.findall(think_pattern, content, re.DOTALL)
if think_matches:
# 如果已有 reasoning_content则追加
if thinking_content:
thinking_content += '\n' + '\n'.join(think_matches)
else:
thinking_content = '\n'.join(think_matches)
# 移除 content 中的 <think> 标签
content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
# 3. 根据 remove_think 参数决定是否保留思维链
if remove_think:
return content, ''
else:
# 如果有思维链内容,将其以 <think> 格式添加到 content 开头
if thinking_content:
content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
return content, thinking_content
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# 流式处理状态
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
tool_id = ''
tool_name = ''
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
reasoning_content = delta.get('reasoning_content', '')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
delta_content = '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
delta_content = reasoning_content
elif thinking_started and not thinking_ended and delta_content:
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
delta_content = '\n</think>\n' + delta_content
# 处理 content 中已有的 <think> 标签(如果需要移除)
# if delta_content and remove_think and '<think>' in delta_content:
# import re
#
# # 移除 <think> 标签及其内容
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
# 处理工具调用增量
# delta_tool_calls = None
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] and tool_call['function']['name']:
tool_id = tool_call['id']
tool_name = tool_call['function']['name']
else:
tool_call['id'] = tool_id
tool_call['function']['name'] = tool_name
if tool_call['type'] is None:
tool_call['type'] = 'function'
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1
async def _closure(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> tuple[provider_message.Message, dict]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
# 发送请求
resp = await self._req(args, extra_body=extra_args)
# 处理请求结果
message = await self._make_msg(resp, remove_think)
# Extract token usage from response
usage_info = {}
if hasattr(resp, 'usage') and resp.usage:
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
usage_info['total_tokens'] = resp.usage.total_tokens or 0
return message, usage_info
async def invoke_llm(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> tuple[provider_message.Message, dict]:
"""Invoke LLM and return message with usage info"""
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
msg_dict = m.dict(exclude_none=True)
content = msg_dict.get('content')
if isinstance(content, list):
# 检查 content 列表中是否每个部分都是文本
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
# 将所有文本部分合并为一个字符串
msg_dict['content'] = '\n'.join(part['text'] for part in content)
req_messages.append(msg_dict)
try:
msg, usage_info = await self._closure(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
)
return msg, usage_info
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
if 'context_length_exceeded' in str(e):
raise errors.RequesterError(f'上文过长,请重置会话: {error_message}')
else:
raise errors.RequesterError(f'请求参数错误: {error_message}')
except openai.AuthenticationError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'无效的 api-key: {error_message}')
except openai.NotFoundError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'请求路径错误: {error_message}')
except openai.RateLimitError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
except openai.APIConnectionError as e:
error_message = f'连接错误: {str(e)}'
raise errors.RequesterError(error_message)
except openai.APIError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'请求错误: {error_message}')
async def invoke_embedding(
self,
model: requester.RuntimeEmbeddingModel,
input_text: list[str],
extra_args: dict[str, typing.Any] = {},
) -> tuple[list[list[float]], dict]:
"""调用 Embedding API, returns (embeddings, usage_info)"""
self.client.api_key = model.provider.token_mgr.get_token()
args = {
'model': model.model_entity.name,
'input': input_text,
}
if model.model_entity.extra_args:
args.update(model.model_entity.extra_args)
args.update(extra_args)
try:
resp = await self.client.embeddings.create(**args)
# Extract usage info
usage_info = {}
if hasattr(resp, 'usage') and resp.usage:
usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
usage_info['total_tokens'] = resp.usage.total_tokens or 0
return [d.embedding for d in resp.data], usage_info
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
raise errors.RequesterError(f'请求参数错误: {e.message}')
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
msg_dict = m.dict(exclude_none=True)
content = msg_dict.get('content')
if isinstance(content, list):
# 检查 content 列表中是否每个部分都是文本
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
# 将所有文本部分合并为一个字符串
msg_dict['content'] = '\n'.join(part['text'] for part in content)
req_messages.append(msg_dict)
try:
async for item in self._closure_stream(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
):
yield item
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
if 'context_length_exceeded' in e.message:
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
else:
raise errors.RequesterError(f'请求参数错误: {e.message}')
except openai.AuthenticationError as e:
raise errors.RequesterError(f'无效的 api-key: {e.message}')
except openai.NotFoundError as e:
raise errors.RequesterError(f'请求路径错误: {e.message}')
except openai.RateLimitError as e:
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
except openai.APIError as e:
raise errors.RequesterError(f'请求错误: {e.message}')
async def invoke_rerank(
self,
model: requester.RuntimeRerankModel,
query: str,
documents: typing.List[str],
extra_args: dict[str, typing.Any] = {},
) -> typing.List[dict]:
"""Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible)
Supports extra_args from model.extra_args:
- rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks")
- rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank")
- Any other fields are merged into the request payload.
"""
api_key = model.provider.token_mgr.get_token()
base_url = self.requester_cfg.get('base_url', '').rstrip('/')
timeout = self.requester_cfg.get('timeout', 120)
merged_args = {}
if model.model_entity.extra_args:
merged_args.update(model.model_entity.extra_args)
if extra_args:
merged_args.update(extra_args)
rerank_url = merged_args.pop('rerank_url', None)
rerank_path = merged_args.pop('rerank_path', 'rerank')
if not rerank_url:
rerank_url = f'{base_url}/{rerank_path}'
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {api_key}',
}
payload = {
'model': model.model_entity.name,
'query': query,
'documents': documents[:64],
'top_n': min(len(documents), 64),
}
if merged_args:
payload.update(merged_args)
try:
async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
resp = await client.post(rerank_url, headers=headers, json=payload)
resp.raise_for_status()
data = resp.json()
results = self._parse_rerank_response(data)
if results:
scores = [r.get('relevance_score', 0.0) for r in results]
min_score = min(scores)
max_score = max(scores)
if max_score - min_score > 1e-6:
for r in results:
r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
return results
except httpx.HTTPStatusError as e:
raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}')
except httpx.TimeoutException:
raise errors.RequesterError('Rerank request timed out')
except Exception as e:
raise errors.RequesterError(f'Rerank request error: {str(e)}')
@staticmethod
def _parse_rerank_response(data: dict) -> typing.List[dict]:
"""Parse rerank response from various providers.
Handles:
- Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]}
- Voyage AI: {"data": [{"index", "relevance_score"}]}
- DashScope: {"output": {"results": [{"index", "relevance_score"}]}}
"""
if 'results' in data:
return data['results']
if 'data' in data:
return data['data']
if 'output' in data and isinstance(data['output'], dict):
return data['output'].get('results', [])
return []

View File

@@ -22,10 +22,12 @@ spec:
type: integer
required: true
default: 120
# LiteLLM provider prefix - when set, uses unified LiteLLMRequester
litellm_provider: openai
support_type:
- llm
- text-embedding
- rerank
provider_category: manufacturer
execution:
python:
path: ./chatcmpl.py
attr: OpenAIChatCompletions

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: cohere
support_type:
- rerank
provider_category: manufacturer
execution:
python:
path: ./chatcmpl.py
attr: OpenAIChatCompletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
"""CompShare ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.modelverse.cn/v1',
'timeout': 120,
}

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openai
support_type:
- llm
provider_category: maas
execution:
python:
path: ./compsharechatcmpl.py
attr: CompShareChatCompletions

View File

@@ -0,0 +1,67 @@
from __future__ import annotations
import typing
from . import chatcmpl
from .. import errors, requester
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
"""Deepseek ChatCompletion API 请求器"""
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.deepseek.com',
'timeout': 120,
}
async def _closure(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> tuple[provider_message.Message, dict]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages
# deepseek 不支持多模态把content都转换成纯文字
for m in messages:
if 'content' in m and isinstance(m['content'], list):
m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c])
args['messages'] = messages
# 发送请求
resp = await self._req(args, extra_body=extra_args)
# print(resp)
if resp is None:
raise errors.RequesterError('接口返回为空,请确定模型提供商服务是否正常')
# 处理请求结果
message = await self._make_msg(resp, remove_think)
# Extract token usage from response
usage_info = {}
if hasattr(resp, 'usage') and resp.usage:
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
usage_info['total_tokens'] = resp.usage.total_tokens or 0
return message, usage_info

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: deepseek
support_type:
- llm
provider_category: manufacturer
execution:
python:
path: ./deepseekchatcmpl.py
attr: DeepseekChatCompletions

View File

@@ -0,0 +1,205 @@
from __future__ import annotations
import typing
import httpx
from . import chatcmpl
import uuid
from .. import requester
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
"""Google Gemini API 请求器"""
default_config: dict[str, typing.Any] = {
'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
'timeout': 120,
}
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
models_url = 'https://generativelanguage.googleapis.com/v1beta/models'
params = {'key': api_key} if api_key else {}
all_models: list[dict[str, typing.Any]] = []
next_page_token = ''
last_payload: dict[str, typing.Any] = {}
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
while True:
request_params = dict(params)
if next_page_token:
request_params['pageToken'] = next_page_token
response = await client.get(models_url, params=request_params)
response.raise_for_status()
payload = response.json()
last_payload = payload
for item in payload.get('models', []):
model_name = item.get('name', '')
model_id = model_name.replace('models/', '', 1)
if not model_id:
continue
supported_methods = item.get('supportedGenerationMethods', []) or []
if 'embedContent' in supported_methods and 'generateContent' not in supported_methods:
model_type = 'embedding'
else:
model_type = 'llm'
all_models.append(
{
'id': model_id,
'name': model_id,
'type': model_type,
'abilities': self._infer_model_abilities(item, model_id),
'display_name': item.get('displayName') or None,
'description': item.get('description') or None,
'context_length': item.get('inputTokenLimit'),
'input_modalities': self._normalize_modalities(item.get('inputModalities')),
'output_modalities': self._normalize_modalities(item.get('outputModalities')),
}
)
next_page_token = payload.get('nextPageToken', '')
if not next_page_token:
break
all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
return {
'models': all_models,
'debug': {
'request': {
'method': 'GET',
'url': models_url,
'query': {'key': self._mask_api_key(api_key)} if api_key else {},
},
'response': last_payload,
},
}
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# 流式处理状态
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
tool_id = ''
tool_name = ''
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
reasoning_content = delta.get('reasoning_content', '')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
delta_content = '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
delta_content = reasoning_content
elif thinking_started and not thinking_ended and delta_content:
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
delta_content = '\n</think>\n' + delta_content
# 处理 content 中已有的 <think> 标签(如果需要移除)
# if delta_content and remove_think and '<think>' in delta_content:
# import re
#
# # 移除 <think> 标签及其内容
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
# 处理工具调用增量
# delta_tool_calls = None
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] == '' and tool_id == '':
tool_id = str(uuid.uuid4())
if tool_call['function']['name']:
tool_name = tool_call['function']['name']
tool_call['id'] = tool_id
tool_call['function']['name'] = tool_name
if tool_call['type'] is None:
tool_call['type'] = 'function'
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: gemini
support_type:
- llm
provider_category: manufacturer
execution:
python:
path: ./geminichatcmpl.py
attr: GeminiChatCompletions

View File

@@ -0,0 +1,15 @@
from __future__ import annotations
import typing
from . import ppiochatcmpl
class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
"""Gitee AI ChatCompletions API 请求器"""
default_config: dict[str, typing.Any] = {
'base_url': 'https://ai.gitee.com/v1',
'timeout': 120,
}

View File

@@ -22,9 +22,12 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
- rerank
provider_category: maas
execution:
python:
path: ./giteeaichatcmpl.py
attr: GiteeAIChatCompletions

View File

@@ -0,0 +1,208 @@
from __future__ import annotations
import openai
import typing
from . import chatcmpl
from .. import requester
import openai.types.chat.chat_completion as chat_completion
import re
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
class JieKouAIChatCompletions(chatcmpl.OpenAIChatCompletions):
"""接口 AI ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.jiekou.ai/openai',
'timeout': 120,
}
is_think: bool = False
async def _make_msg(
self,
chat_completion: chat_completion.ChatCompletion,
remove_think: bool,
) -> provider_message.Message:
chatcmpl_message = chat_completion.choices[0].message.model_dump()
# print(chatcmpl_message.keys(), chatcmpl_message.values())
# 确保 role 字段存在且不为 None
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
chatcmpl_message['role'] = 'assistant'
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
# deepseek的reasoner模型
chatcmpl_message['content'] = await self._process_thinking_content(
chatcmpl_message['content'], reasoning_content, remove_think
)
# 移除 reasoning_content 字段,避免传递给 Message
if 'reasoning_content' in chatcmpl_message:
del chatcmpl_message['reasoning_content']
message = provider_message.Message(**chatcmpl_message)
return message
async def _process_thinking_content(
self,
content: str,
reasoning_content: str = None,
remove_think: bool = False,
) -> tuple[str, str]:
"""处理思维链内容
Args:
content: 原始内容
reasoning_content: reasoning_content 字段内容
remove_think: 是否移除思维链
Returns:
处理后的内容
"""
if remove_think:
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
else:
if reasoning_content is not None:
content = '<think>\n' + reasoning_content + '\n</think>\n' + content
return content
async def _make_msg_chunk(
self,
delta: dict[str, typing.Any],
idx: int,
) -> provider_message.MessageChunk:
# 处理流式chunk和完整响应的差异
# print(chat_completion.choices[0])
# 确保 role 字段存在且不为 None
if 'role' not in delta or delta['role'] is None:
delta['role'] = 'assistant'
reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
delta['content'] = '' if delta['content'] is None else delta['content']
# print(reasoning_content)
# deepseek的reasoner模型
if reasoning_content is not None:
delta['content'] += reasoning_content
message = provider_message.MessageChunk(**delta)
return message
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
# reasoning_content = delta.get('reasoning_content', '')
if remove_think:
if delta['content'] is not None:
if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
thinking_started = True
continue
elif delta['content'] == r'</think>' and not thinking_ended:
thinking_ended = True
continue
elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
thinking_started = False
continue
elif thinking_started and not thinking_ended:
continue
# delta_tool_calls = None
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] and tool_call['function']['name']:
tool_id = tool_call['id']
tool_name = tool_call['function']['name']
if tool_call['id'] is None:
tool_call['id'] = tool_id
if tool_call['function']['name'] is None:
tool_call['function']['name'] = tool_name
if tool_call['function']['arguments'] is None:
tool_call['function']['arguments'] = ''
if tool_call['type'] is None:
tool_call['type'] = 'function'
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1

View File

@@ -29,8 +29,11 @@ spec:
type: int
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
execution:
python:
path: ./jiekouaichatcmpl.py
attr: JieKouAIChatCompletions

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: jina
support_type:
- rerank
provider_category: manufacturer
execution:
python:
path: ./chatcmpl.py
attr: OpenAIChatCompletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class LmStudioChatCompletions(chatcmpl.OpenAIChatCompletions):
"""LMStudio ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'http://127.0.0.1:1234/v1',
'timeout': 120,
}

View File

@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: self-hosted
execution:
python:
path: ./lmstudiochatcmpl.py
attr: LmStudioChatCompletions

View File

@@ -0,0 +1,561 @@
from __future__ import annotations
import asyncio
import typing
import openai
import openai.types.chat.chat_completion as chat_completion
import httpx
from .. import entities, errors, requester
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class ModelScopeChatCompletions(requester.ProviderAPIRequester):
"""ModelScope ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api-inference.modelscope.cn/v1',
'timeout': 120,
}
async def initialize(self):
self.client = openai.AsyncClient(
api_key='',
base_url=self.requester_cfg['base_url'],
timeout=self.requester_cfg['timeout'],
http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
)
def _mask_api_key(self, api_key: str | None) -> str:
if not api_key:
return ''
if len(api_key) <= 8:
return '****'
return f'{api_key[:4]}...{api_key[-4:]}'
def _infer_model_type(self, model_id: str) -> str:
normalized_model_id = (model_id or '').lower()
embedding_keywords = (
'embedding',
'embed',
'bge-',
'e5-',
'm3e',
'gte-',
'multilingual-e5',
'text-embedding',
)
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
normalized_model_id = (model_id or '').lower()
abilities: set[str] = set()
def _flatten(value: typing.Any) -> list[str]:
if value is None:
return []
if isinstance(value, str):
return [value.lower()]
if isinstance(value, dict):
flattened: list[str] = []
for nested_value in value.values():
flattened.extend(_flatten(nested_value))
return flattened
if isinstance(value, (list, tuple, set)):
flattened: list[str] = []
for nested_value in value:
flattened.extend(_flatten(nested_value))
return flattened
return [str(value).lower()]
capability_tokens = _flatten(item.get('capabilities'))
capability_tokens.extend(_flatten(item.get('modalities')))
capability_tokens.extend(_flatten(item.get('input_modalities')))
capability_tokens.extend(_flatten(item.get('output_modalities')))
capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
capability_tokens.extend(_flatten(item.get('supported_parameters')))
capability_tokens.extend(_flatten(item.get('architecture')))
combined_tokens = capability_tokens + [normalized_model_id]
vision_keywords = ('vision', 'image', 'file', 'video', 'multimodal', 'vl', 'ocr', 'omni')
function_call_keywords = ('function', 'tool', 'tools', 'tool_choice', 'tool_call', 'tool-use', 'tool_use')
if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
abilities.add('vision')
if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
abilities.add('func_call')
return sorted(abilities)
def _normalize_modalities(self, value: typing.Any) -> list[str]:
normalized: list[str] = []
def _collect(item: typing.Any):
if item is None:
return
if isinstance(item, str):
for part in item.replace('->', ',').replace('+', ',').split(','):
token = part.strip().lower()
if token and token not in normalized:
normalized.append(token)
return
if isinstance(item, dict):
for nested in item.values():
_collect(nested)
return
if isinstance(item, (list, tuple, set)):
for nested in item:
_collect(nested)
return
_collect(value)
return normalized
def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
display_name = item.get('name')
if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
display_name = ''
description = item.get('description')
if not isinstance(description, str) or not description.strip():
description = ''
context_length = item.get('context_length')
if context_length is None and isinstance(item.get('top_provider'), dict):
context_length = item['top_provider'].get('context_length')
if not isinstance(context_length, int):
try:
context_length = int(context_length) if context_length is not None else None
except (TypeError, ValueError):
context_length = None
input_modalities = self._normalize_modalities(item.get('input_modalities'))
output_modalities = self._normalize_modalities(item.get('output_modalities'))
if isinstance(item.get('architecture'), dict):
if not input_modalities:
input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
if not output_modalities:
output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
owned_by = item.get('owned_by')
if not isinstance(owned_by, str) or not owned_by.strip():
owned_by = ''
return {
'display_name': display_name or None,
'description': description or None,
'context_length': context_length,
'owned_by': owned_by or None,
'input_modalities': input_modalities,
'output_modalities': output_modalities,
}
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
headers = {}
if api_key:
headers['Authorization'] = f'Bearer {api_key}'
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
response = await client.get(models_url, headers=headers)
response.raise_for_status()
payload = response.json()
models = []
for item in payload.get('data', []):
model_id = item.get('id')
if not model_id:
continue
models.append(
{
'id': model_id,
'name': model_id,
'type': self._infer_model_type(model_id),
'abilities': self._infer_model_abilities(item, model_id),
**self._extract_scan_metadata(item, model_id),
}
)
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
return {
'models': models,
'debug': {
'request': {
'method': 'GET',
'url': models_url,
'headers': {
'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
},
},
'response': payload,
},
}
async def _req(
self,
query: pipeline_query.Query,
args: dict,
extra_body: dict = {},
remove_think: bool = False,
) -> list[dict[str, typing.Any]]:
args['stream'] = True
chunk = None
pending_content = ''
tool_calls = []
resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)
chunk_idx = 0
thinking_started = False
thinking_ended = False
tool_id = ''
tool_name = ''
message_delta = {}
async for chunk in resp_gen:
if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
continue
delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
reasoning_content = delta.get('reasoning_content')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
pending_content += '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
pending_content += reasoning_content
elif thinking_started and not thinking_ended and delta.get('content'):
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
pending_content += '\n</think>\n' + delta.get('content')
if delta.get('content') is not None:
pending_content += delta.get('content')
if delta.get('tool_calls') is not None:
for tool_call in delta.get('tool_calls'):
if tool_call['id'] != '':
tool_id = tool_call['id']
if tool_call['function']['name'] is not None:
tool_name = tool_call['function']['name']
if tool_call['function']['arguments'] is None:
continue
tool_call['id'] = tool_id
tool_call['name'] = tool_name
for tc in tool_calls:
if tc['index'] == tool_call['index']:
tc['function']['arguments'] += tool_call['function']['arguments']
break
else:
tool_calls.append(tool_call)
if chunk.choices[0].finish_reason is not None:
break
message_delta['content'] = pending_content
message_delta['role'] = 'assistant'
message_delta['tool_calls'] = tool_calls if tool_calls else None
return [message_delta]
async def _make_msg(
self,
chat_completion: list[dict[str, typing.Any]],
) -> provider_message.Message:
chatcmpl_message = chat_completion[0]
# 确保 role 字段存在且不为 None
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
chatcmpl_message['role'] = 'assistant'
message = provider_message.Message(**chatcmpl_message)
return message
async def _closure(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> tuple[provider_message.Message, dict]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
# 发送请求
resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)
# 处理请求结果
message = await self._make_msg(resp)
# ModelScope uses streaming, usage info not available
usage_info = {}
return message, usage_info
async def _req_stream(
self,
args: dict,
extra_body: dict = {},
) -> chat_completion.ChatCompletion:
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
yield chunk
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# 流式处理状态
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
reasoning_content = delta.get('reasoning_content', '')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
delta_content = '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
delta_content = reasoning_content
elif thinking_started and not thinking_ended and delta_content:
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
delta_content = '\n</think>\n' + delta_content
# 处理 content 中已有的 <think> 标签(如果需要移除)
# if delta_content and remove_think and '<think>' in delta_content:
# import re
#
# # 移除 <think> 标签及其内容
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
# 处理工具调用增量
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] != '':
tool_id = tool_call['id']
if tool_call['function']['name'] is not None:
tool_name = tool_call['function']['name']
if tool_call['type'] is None:
tool_call['type'] = 'function'
tool_call['id'] = tool_id
tool_call['function']['name'] = tool_name
tool_call['function']['arguments'] = (
'' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
)
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1
# return
async def invoke_llm(
self,
query: pipeline_query.Query,
model: entities.LLMModelInfo,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
msg_dict = m.dict(exclude_none=True)
content = msg_dict.get('content')
if isinstance(content, list):
# 检查 content 列表中是否每个部分都是文本
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
# 将所有文本部分合并为一个字符串
msg_dict['content'] = '\n'.join(part['text'] for part in content)
req_messages.append(msg_dict)
try:
return await self._closure(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
)
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
if 'context_length_exceeded' in e.message:
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
else:
raise errors.RequesterError(f'请求参数错误: {e.message}')
except openai.AuthenticationError as e:
raise errors.RequesterError(f'无效的 api-key: {e.message}')
except openai.NotFoundError as e:
raise errors.RequesterError(f'请求路径错误: {e.message}')
except openai.RateLimitError as e:
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
except openai.APIError as e:
raise errors.RequesterError(f'请求错误: {e.message}')
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
msg_dict = m.dict(exclude_none=True)
content = msg_dict.get('content')
if isinstance(content, list):
# 检查 content 列表中是否每个部分都是文本
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
# 将所有文本部分合并为一个字符串
msg_dict['content'] = '\n'.join(part['text'] for part in content)
req_messages.append(msg_dict)
try:
async for item in self._closure_stream(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
):
yield item
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
if 'context_length_exceeded' in e.message:
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
else:
raise errors.RequesterError(f'请求参数错误: {e.message}')
except openai.AuthenticationError as e:
raise errors.RequesterError(f'无效的 api-key: {e.message}')
except openai.NotFoundError as e:
raise errors.RequesterError(f'请求路径错误: {e.message}')
except openai.RateLimitError as e:
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
except openai.APIError as e:
raise errors.RequesterError(f'请求错误: {e.message}')

View File

@@ -29,7 +29,10 @@ spec:
type: int
required: true
default: 120
litellm_provider: openai
support_type:
- llm
provider_category: maas
execution:
python:
path: ./modelscopechatcmpl.py
attr: ModelScopeChatCompletions

View File

@@ -0,0 +1,67 @@
from __future__ import annotations
import typing
from . import chatcmpl
from .. import requester
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
"""Moonshot ChatCompletion API 请求器"""
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.moonshot.cn/v1',
'timeout': 120,
}
async def _closure(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> tuple[provider_message.Message, dict]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages
# deepseek 不支持多模态把content都转换成纯文字
for m in messages:
if 'content' in m and isinstance(m['content'], list):
m['content'] = ' '.join([c['text'] for c in m['content']])
# 删除空的,不知道干嘛的,直接删了。
# messages = [m for m in messages if m["content"].strip() != "" and ('tool_calls' not in m or not m['tool_calls'])]
args['messages'] = messages
# 发送请求
resp = await self._req(args, extra_body=extra_args)
# 处理请求结果
message = await self._make_msg(resp, remove_think)
# Extract token usage from response
usage_info = {}
if hasattr(resp, 'usage') and resp.usage:
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
usage_info['total_tokens'] = resp.usage.total_tokens or 0
return message, usage_info

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: moonshot
support_type:
- llm
provider_category: manufacturer
execution:
python:
path: ./moonshotchatcmpl.py
attr: MoonshotChatCompletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
"""New API ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'http://localhost:3000/v1',
'timeout': 120,
}

View File

@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
execution:
python:
path: ./newapichatcmpl.py
attr: NewAPIChatCompletions

View File

@@ -0,0 +1,314 @@
from __future__ import annotations
import asyncio
import os
import typing
from typing import Union, Mapping, Any, AsyncIterator
import uuid
import json
import ollama
import httpx
from .. import errors, requester
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
REQUESTER_NAME: str = 'ollama-chat'
class OllamaChatCompletions(requester.ProviderAPIRequester):
"""Ollama平台 ChatCompletion API请求器"""
client: ollama.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'http://127.0.0.1:11434',
'timeout': 120,
}
async def initialize(self):
os.environ['OLLAMA_HOST'] = self.requester_cfg['base_url']
self.client = ollama.AsyncClient(timeout=self.requester_cfg['timeout'])
def _infer_model_type(self, model_id: str) -> str:
normalized_model_id = (model_id or '').lower()
embedding_keywords = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding')
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
normalized_model_id = (model_id or '').lower()
abilities: set[str] = set()
details = item.get('details', {}) or {}
families = details.get('families', []) or []
tokens = [normalized_model_id, str(details.get('family', '')).lower()]
tokens.extend(str(family).lower() for family in families)
if any(keyword in token for token in tokens for keyword in ('vision', 'vl', 'omni', 'llava', 'ocr')):
abilities.add('vision')
if any(keyword in token for token in tokens for keyword in ('tool', 'function')):
abilities.add('func_call')
return sorted(abilities)
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
del api_key
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/api/tags'
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
response = await client.get(models_url)
response.raise_for_status()
payload = response.json()
models: list[dict[str, typing.Any]] = []
for item in payload.get('models', []):
model_id = item.get('model') or item.get('name')
if not model_id:
continue
models.append(
{
'id': model_id,
'name': item.get('name', model_id),
'type': self._infer_model_type(model_id),
'abilities': self._infer_model_abilities(item, model_id),
}
)
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
return {
'models': models,
'debug': {
'request': {
'method': 'GET',
'url': models_url,
},
'response': payload,
},
}
async def _req(
self,
args: dict,
) -> Union[Mapping[str, Any], AsyncIterator[Mapping[str, Any]]]:
return await self.client.chat(**args)
async def _closure(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
args = extra_args.copy()
args['model'] = use_model.model_entity.name
messages: list[dict] = req_messages.copy()
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
text_content: list = []
image_urls: list = []
for me in msg['content']:
if me['type'] == 'text':
text_content.append(me['text'])
elif me['type'] == 'image_base64':
image_urls.append(me['image_base64'])
msg['content'] = '\n'.join(text_content)
msg['images'] = [url.split(',')[1] for url in image_urls]
if 'tool_calls' in msg: # LangBot 内部以 str 存储 tool_calls 的参数,这里需要转换为 dict
for tool_call in msg['tool_calls']:
tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
args['messages'] = messages
args['tools'] = []
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
resp = await self._req(args)
message: provider_message.Message = await self._make_msg(resp)
return message
async def _make_msg(self, chat_completions: ollama.ChatResponse) -> provider_message.Message:
message: ollama.Message = chat_completions.message
if message is None:
raise ValueError("chat_completions must contain a 'message' field")
ret_msg: provider_message.Message = None
if message.content is not None:
ret_msg = provider_message.Message(role='assistant', content=message.content)
if message.tool_calls is not None and len(message.tool_calls) > 0:
tool_calls: list[provider_message.ToolCall] = []
for tool_call in message.tool_calls:
tool_calls.append(
provider_message.ToolCall(
id=uuid.uuid4().hex,
type='function',
function=provider_message.FunctionCall(
name=tool_call.function.name,
arguments=json.dumps(tool_call.function.arguments),
),
)
)
ret_msg.tool_calls = tool_calls
return ret_msg
async def _prepare_messages(
self,
messages: typing.List[provider_message.Message],
) -> list[dict]:
"""Prepare messages for Ollama API request."""
req_messages: list = []
for m in messages:
msg_dict: dict = m.dict(exclude_none=True)
content: Any = msg_dict.get('content')
if isinstance(content, list):
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
msg_dict['content'] = '\n'.join(part['text'] for part in content)
req_messages.append(msg_dict)
return req_messages
async def invoke_llm(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
req_messages = await self._prepare_messages(messages)
try:
return await self._closure(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
)
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
req_messages = await self._prepare_messages(messages)
try:
args = extra_args.copy()
args['model'] = model.model_entity.name
# Process messages for Ollama format
msgs: list[dict] = req_messages.copy()
for msg in msgs:
if 'content' in msg and isinstance(msg['content'], list):
text_content: list = []
image_urls: list = []
for me in msg['content']:
if me['type'] == 'text':
text_content.append(me['text'])
elif me['type'] == 'image_base64':
image_urls.append(me['image_base64'])
msg['content'] = '\n'.join(text_content)
msg['images'] = [url.split(',')[1] for url in image_urls]
if 'tool_calls' in msg:
for tool_call in msg['tool_calls']:
tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
args['messages'] = msgs
args['tools'] = []
if funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs)
if tools:
args['tools'] = tools
args['stream'] = True
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant'
async for chunk in await self.client.chat(**args):
message: ollama.Message = chunk.message
done = chunk.done
delta_content = message.content or ''
reasoning_content = getattr(message, 'thinking', '') or ''
# Handle reasoning/thinking content
if reasoning_content:
if remove_think:
chunk_idx += 1
continue
if not thinking_started:
thinking_started = True
delta_content = '<think>\n' + reasoning_content
else:
delta_content = reasoning_content
elif thinking_started and not thinking_ended and delta_content:
thinking_ended = True
delta_content = '\n</think>\n' + delta_content
# Handle tool calls
tool_calls_data = None
if message.tool_calls:
tool_calls_data = []
for tc in message.tool_calls:
tool_calls_data.append(
{
'id': uuid.uuid4().hex,
'type': 'function',
'function': {
'name': tc.function.name,
'arguments': json.dumps(tc.function.arguments),
},
}
)
# Skip empty first chunk
if chunk_idx == 0 and not delta_content and not reasoning_content and not tool_calls_data:
chunk_idx += 1
continue
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': tool_calls_data,
'is_final': bool(done),
}
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
async def invoke_embedding(
self,
model: requester.RuntimeEmbeddingModel,
input_text: list[str],
extra_args: dict[str, typing.Any] = {},
) -> list[list[float]]:
return (
await self.client.embed(
model=model.model_entity.name,
input=input_text,
**extra_args,
)
).embeddings

View File

@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
litellm_provider: ollama
support_type:
- llm
- text-embedding
provider_category: self-hosted
execution:
python:
path: ./ollamachat.py
attr: OllamaChatCompletions

View File

@@ -0,0 +1,25 @@
from __future__ import annotations
import typing
import openai
from . import modelscopechatcmpl
class OpenRouterChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
"""OpenRouter ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://openrouter.ai/api/v1',
'timeout': 120,
}
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
original_base_url = self.requester_cfg.get('base_url', '')
self.requester_cfg['base_url'] = 'https://openrouter.ai/api/v1'
try:
return await super().scan_models(api_key)
finally:
self.requester_cfg['base_url'] = original_base_url

View File

@@ -22,9 +22,12 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openrouter
support_type:
- llm
- text-embedding
- rerank
provider_category: maas
execution:
python:
path: ./openrouterchatcmpl.py
attr: OpenRouterChatCompletions

View File

@@ -0,0 +1,208 @@
from __future__ import annotations
import openai
import typing
from . import chatcmpl
from .. import requester
import openai.types.chat.chat_completion as chat_completion
import re
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
"""欧派云 ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.ppinfra.com/v3/openai',
'timeout': 120,
}
is_think: bool = False
async def _make_msg(
self,
chat_completion: chat_completion.ChatCompletion,
remove_think: bool,
) -> provider_message.Message:
chatcmpl_message = chat_completion.choices[0].message.model_dump()
# print(chatcmpl_message.keys(), chatcmpl_message.values())
# 确保 role 字段存在且不为 None
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
chatcmpl_message['role'] = 'assistant'
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
# deepseek的reasoner模型
chatcmpl_message['content'] = await self._process_thinking_content(
chatcmpl_message['content'], reasoning_content, remove_think
)
# 移除 reasoning_content 字段,避免传递给 Message
if 'reasoning_content' in chatcmpl_message:
del chatcmpl_message['reasoning_content']
message = provider_message.Message(**chatcmpl_message)
return message
async def _process_thinking_content(
self,
content: str,
reasoning_content: str = None,
remove_think: bool = False,
) -> tuple[str, str]:
"""处理思维链内容
Args:
content: 原始内容
reasoning_content: reasoning_content 字段内容
remove_think: 是否移除思维链
Returns:
处理后的内容
"""
if remove_think:
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
else:
if reasoning_content is not None:
content = '<think>\n' + reasoning_content + '\n</think>\n' + content
return content
async def _make_msg_chunk(
self,
delta: dict[str, typing.Any],
idx: int,
) -> provider_message.MessageChunk:
# 处理流式chunk和完整响应的差异
# print(chat_completion.choices[0])
# 确保 role 字段存在且不为 None
if 'role' not in delta or delta['role'] is None:
delta['role'] = 'assistant'
reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
delta['content'] = '' if delta['content'] is None else delta['content']
# print(reasoning_content)
# deepseek的reasoner模型
if reasoning_content is not None:
delta['content'] += reasoning_content
message = provider_message.MessageChunk(**delta)
return message
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
# reasoning_content = delta.get('reasoning_content', '')
if remove_think:
if delta['content'] is not None:
if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
thinking_started = True
continue
elif delta['content'] == r'</think>' and not thinking_ended:
thinking_ended = True
continue
elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
thinking_started = False
continue
elif thinking_started and not thinking_ended:
continue
# delta_tool_calls = None
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] and tool_call['function']['name']:
tool_id = tool_call['id']
tool_name = tool_call['function']['name']
if tool_call['id'] is None:
tool_call['id'] = tool_id
if tool_call['function']['name'] is None:
tool_call['function']['name'] = tool_name
if tool_call['function']['arguments'] is None:
tool_call['function']['arguments'] = ''
if tool_call['type'] is None:
tool_call['type'] = 'function'
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1

View File

@@ -29,8 +29,11 @@ spec:
type: int
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
execution:
python:
path: ./ppiochatcmpl.py
attr: PPIOChatCompletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import openai
import typing
from . import chatcmpl
class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
"""启航 AI ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.qhaigc.com/v1',
'timeout': 120,
}

View File

@@ -29,8 +29,11 @@ spec:
type: int
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
execution:
python:
path: ./qhaigcchatcmpl.py
attr: QHAIGCChatCompletions

View File

@@ -0,0 +1,32 @@
from __future__ import annotations
import openai
import typing
from . import chatcmpl
import openai.types.chat.chat_completion as chat_completion
class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
"""胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://router.shengsuanyun.com/api/v1',
'timeout': 120,
}
async def _req(
self,
args: dict,
extra_body: dict = {},
) -> chat_completion.ChatCompletion:
return await self.client.chat.completions.create(
**args,
extra_body=extra_body,
extra_headers={
'HTTP-Referer': 'https://langbot.app',
'X-Title': 'LangBot',
},
)

View File

@@ -29,8 +29,11 @@ spec:
type: int
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
execution:
python:
path: ./shengsuanyun.py
attr: ShengSuanYunChatCompletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class SiliconFlowChatCompletions(chatcmpl.OpenAIChatCompletions):
"""SiliconFlow ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.siliconflow.cn/v1',
'timeout': 120,
}

View File

@@ -22,9 +22,12 @@ spec:
type: integer
required: true
default: 120
litellm_provider: siliconflow
support_type:
- llm
- text-embedding
- rerank
provider_category: maas
execution:
python:
path: ./siliconflowchatcmpl.py
attr: SiliconFlowChatCompletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class LangBotSpaceChatCompletions(chatcmpl.OpenAIChatCompletions):
"""LangBot Space ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.langbot.cloud/v1',
'timeout': 120,
}

View File

@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
execution:
python:
path: ./spacechatcmpl.py
attr: LangBotSpaceChatCompletions

View File

@@ -22,8 +22,11 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openai
support_type:
- llm
- text-embedding
provider_category: maas
execution:
python:
path: ./tokenponychatcmpl.py
attr: TokenPonyChatCompletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class TokenPonyChatCompletions(chatcmpl.OpenAIChatCompletions):
"""TokenPony ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.tokenpony.cn/v1',
'timeout': 120,
}

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class VolcArkChatCompletions(chatcmpl.OpenAIChatCompletions):
"""火山方舟大模型平台 ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://ark.cn-beijing.volces.com/api/v3',
'timeout': 120,
}

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: openai
support_type:
- llm
provider_category: maas
execution:
python:
path: ./volcarkchatcmpl.py
attr: VolcArkChatCompletions

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: voyage
support_type:
- rerank
provider_category: manufacturer
execution:
python:
path: ./chatcmpl.py
attr: OpenAIChatCompletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class XaiChatCompletions(chatcmpl.OpenAIChatCompletions):
"""xAI ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.x.ai/v1',
'timeout': 120,
}

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: xai
support_type:
- llm
provider_category: manufacturer
execution:
python:
path: ./xaichatcmpl.py
attr: XaiChatCompletions

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class ZhipuAIChatCompletions(chatcmpl.OpenAIChatCompletions):
"""智谱AI ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://open.bigmodel.cn/api/paas/v4',
'timeout': 120,
}

View File

@@ -22,7 +22,10 @@ spec:
type: integer
required: true
default: 120
litellm_provider: zhipu
support_type:
- llm
provider_category: manufacturer
execution:
python:
path: ./zhipuaichatcmpl.py
attr: ZhipuAIChatCompletions