mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-02 03:55:55 +00:00
restore: restore deleted provider requester files
Restore individual provider requester implementations that were
removed in de61b5d3. These files coexist with the unified
litellmchat.py backend.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""302.AI ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.302.ai/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,9 +22,12 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
- rerank
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./302aichatcmpl.py
|
||||
attr: AI302ChatCompletions
|
||||
|
||||
370
src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
Normal file
370
src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
Normal file
@@ -0,0 +1,370 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import json
|
||||
import platform
|
||||
import socket
|
||||
import anthropic
|
||||
import httpx
|
||||
|
||||
from .. import errors, requester
|
||||
|
||||
from ....utils import image
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
class AnthropicMessages(requester.ProviderAPIRequester):
|
||||
"""Anthropic Messages API 请求器"""
|
||||
|
||||
client: anthropic.AsyncAnthropic
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.anthropic.com',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def initialize(self):
|
||||
# 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题
|
||||
if platform.system() == 'Windows':
|
||||
if not hasattr(socket, 'TCP_KEEPINTVL'):
|
||||
socket.TCP_KEEPINTVL = 0
|
||||
if not hasattr(socket, 'TCP_KEEPCNT'):
|
||||
socket.TCP_KEEPCNT = 0
|
||||
httpx_client = anthropic._base_client.AsyncHttpxClientWrapper(
|
||||
base_url=self.requester_cfg['base_url'],
|
||||
# cast to a valid type because mypy doesn't understand our type narrowing
|
||||
timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']),
|
||||
limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS,
|
||||
follow_redirects=True,
|
||||
trust_env=True,
|
||||
)
|
||||
|
||||
self.client = anthropic.AsyncAnthropic(
|
||||
api_key='',
|
||||
http_client=httpx_client,
|
||||
base_url=self.requester_cfg['base_url'],
|
||||
)
|
||||
|
||||
async def invoke_llm(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
self.client.api_key = model.provider.token_mgr.get_token()
|
||||
|
||||
args = extra_args.copy()
|
||||
args['model'] = model.model_entity.name
|
||||
|
||||
# 处理消息
|
||||
|
||||
# system
|
||||
system_role_message = None
|
||||
|
||||
for i, m in enumerate(messages):
|
||||
if m.role == 'system':
|
||||
system_role_message = m
|
||||
|
||||
break
|
||||
|
||||
if system_role_message:
|
||||
messages.pop(i)
|
||||
|
||||
if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
|
||||
args['system'] = system_role_message.content
|
||||
|
||||
req_messages = []
|
||||
|
||||
for m in messages:
|
||||
if m.role == 'tool':
|
||||
tool_call_id = m.tool_call_id
|
||||
|
||||
req_messages.append(
|
||||
{
|
||||
'role': 'user',
|
||||
'content': [
|
||||
{
|
||||
'type': 'tool_result',
|
||||
'tool_use_id': tool_call_id,
|
||||
'is_error': False,
|
||||
'content': [{'type': 'text', 'text': m.content}],
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
continue
|
||||
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
|
||||
if isinstance(m.content, str) and m.content.strip() != '':
|
||||
msg_dict['content'] = [{'type': 'text', 'text': m.content}]
|
||||
elif isinstance(m.content, list):
|
||||
for i, ce in enumerate(m.content):
|
||||
if ce.type == 'image_base64':
|
||||
image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
|
||||
|
||||
alter_image_ele = {
|
||||
'type': 'image',
|
||||
'source': {
|
||||
'type': 'base64',
|
||||
'media_type': f'image/{image_format}',
|
||||
'data': image_b64,
|
||||
},
|
||||
}
|
||||
msg_dict['content'][i] = alter_image_ele
|
||||
|
||||
if m.tool_calls:
|
||||
for tool_call in m.tool_calls:
|
||||
msg_dict['content'].append(
|
||||
{
|
||||
'type': 'tool_use',
|
||||
'id': tool_call.id,
|
||||
'name': tool_call.function.name,
|
||||
'input': json.loads(tool_call.function.arguments),
|
||||
}
|
||||
)
|
||||
|
||||
del msg_dict['tool_calls']
|
||||
|
||||
req_messages.append(msg_dict)
|
||||
|
||||
args['messages'] = req_messages
|
||||
|
||||
if 'thinking' in args:
|
||||
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
|
||||
|
||||
if funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
try:
|
||||
resp = await self.client.messages.create(**args)
|
||||
|
||||
args = {
|
||||
'content': '',
|
||||
'role': resp.role,
|
||||
}
|
||||
assert type(resp) is anthropic.types.message.Message
|
||||
|
||||
for block in resp.content:
|
||||
if not remove_think and block.type == 'thinking':
|
||||
args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
|
||||
elif block.type == 'text':
|
||||
args['content'] += block.text
|
||||
elif block.type == 'tool_use':
|
||||
assert type(block) is anthropic.types.tool_use_block.ToolUseBlock
|
||||
tool_call = provider_message.ToolCall(
|
||||
id=block.id,
|
||||
type='function',
|
||||
function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)),
|
||||
)
|
||||
if 'tool_calls' not in args:
|
||||
args['tool_calls'] = []
|
||||
args['tool_calls'].append(tool_call)
|
||||
|
||||
return provider_message.Message(**args)
|
||||
except anthropic.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'api-key 无效: {e.message}')
|
||||
except anthropic.BadRequestError as e:
|
||||
raise errors.RequesterError(str(e.message))
|
||||
except anthropic.NotFoundError as e:
|
||||
if 'model: ' in str(e):
|
||||
raise errors.RequesterError(f'模型无效: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求地址无效: {e.message}')
|
||||
|
||||
async def invoke_llm_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
self.client.api_key = model.provider.token_mgr.get_token()
|
||||
|
||||
args = extra_args.copy()
|
||||
args['model'] = model.model_entity.name
|
||||
args['stream'] = True
|
||||
|
||||
# 处理消息
|
||||
|
||||
# system
|
||||
system_role_message = None
|
||||
|
||||
for i, m in enumerate(messages):
|
||||
if m.role == 'system':
|
||||
system_role_message = m
|
||||
|
||||
break
|
||||
|
||||
if system_role_message:
|
||||
messages.pop(i)
|
||||
|
||||
if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
|
||||
args['system'] = system_role_message.content
|
||||
|
||||
req_messages = []
|
||||
|
||||
for m in messages:
|
||||
if m.role == 'tool':
|
||||
tool_call_id = m.tool_call_id
|
||||
|
||||
req_messages.append(
|
||||
{
|
||||
'role': 'user',
|
||||
'content': [
|
||||
{
|
||||
'type': 'tool_result',
|
||||
'tool_use_id': tool_call_id,
|
||||
'is_error': False, # 暂时直接写false
|
||||
'content': [
|
||||
{'type': 'text', 'text': m.content}
|
||||
], # 这里要是list包裹,应该是多个返回的情况?type类型好像也可以填其他的,暂时只写text
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
continue
|
||||
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
|
||||
if isinstance(m.content, str) and m.content.strip() != '':
|
||||
msg_dict['content'] = [{'type': 'text', 'text': m.content}]
|
||||
elif isinstance(m.content, list):
|
||||
for i, ce in enumerate(m.content):
|
||||
if ce.type == 'image_base64':
|
||||
image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
|
||||
|
||||
alter_image_ele = {
|
||||
'type': 'image',
|
||||
'source': {
|
||||
'type': 'base64',
|
||||
'media_type': f'image/{image_format}',
|
||||
'data': image_b64,
|
||||
},
|
||||
}
|
||||
msg_dict['content'][i] = alter_image_ele
|
||||
if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
|
||||
msg_dict['content'] = [] # 这里不知道为什么会莫名有个空导致content为字符
|
||||
if m.tool_calls:
|
||||
for tool_call in m.tool_calls:
|
||||
msg_dict['content'].append(
|
||||
{
|
||||
'type': 'tool_use',
|
||||
'id': tool_call.id,
|
||||
'name': tool_call.function.name,
|
||||
'input': json.loads(tool_call.function.arguments),
|
||||
}
|
||||
)
|
||||
|
||||
del msg_dict['tool_calls']
|
||||
|
||||
req_messages.append(msg_dict)
|
||||
if 'thinking' in args:
|
||||
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
|
||||
|
||||
args['messages'] = req_messages
|
||||
|
||||
if funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
try:
|
||||
role = 'assistant' # 默认角色
|
||||
# chunk_idx = 0
|
||||
think_started = False
|
||||
think_ended = False
|
||||
finish_reason = False
|
||||
tool_name = ''
|
||||
tool_id = ''
|
||||
async for chunk in await self.client.messages.create(**args):
|
||||
content = ''
|
||||
tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
|
||||
if isinstance(
|
||||
chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
|
||||
): # 记录开始
|
||||
if chunk.content_block.type == 'tool_use':
|
||||
if chunk.content_block.name is not None:
|
||||
tool_name = chunk.content_block.name
|
||||
if chunk.content_block.id is not None:
|
||||
tool_id = chunk.content_block.id
|
||||
|
||||
tool_call['function']['name'] = tool_name
|
||||
tool_call['function']['arguments'] = ''
|
||||
tool_call['id'] = tool_id
|
||||
|
||||
if not remove_think:
|
||||
if chunk.content_block.type == 'thinking' and not remove_think:
|
||||
think_started = True
|
||||
elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
|
||||
think_ended = True
|
||||
continue
|
||||
elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
|
||||
if chunk.delta.type == 'thinking_delta':
|
||||
if think_started:
|
||||
think_started = False
|
||||
content = '<think>\n' + chunk.delta.thinking
|
||||
elif remove_think:
|
||||
continue
|
||||
else:
|
||||
content = chunk.delta.thinking
|
||||
elif chunk.delta.type == 'text_delta':
|
||||
if think_ended:
|
||||
think_ended = False
|
||||
content = '\n</think>\n' + chunk.delta.text
|
||||
else:
|
||||
content = chunk.delta.text
|
||||
elif chunk.delta.type == 'input_json_delta':
|
||||
tool_call['function']['arguments'] = chunk.delta.partial_json
|
||||
tool_call['function']['name'] = tool_name
|
||||
tool_call['id'] = tool_id
|
||||
elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
|
||||
continue # 记录raw_content_block结束的
|
||||
|
||||
elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
|
||||
if chunk.delta.stop_reason == 'end_turn':
|
||||
finish_reason = True
|
||||
elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
|
||||
continue # 这个好像是完全结束
|
||||
else:
|
||||
# print(chunk)
|
||||
self.ap.logger.debug(f'anthropic chunk: {chunk}')
|
||||
continue
|
||||
|
||||
args = {
|
||||
'content': content,
|
||||
'role': role,
|
||||
'is_final': finish_reason,
|
||||
'tool_calls': None if tool_call['id'] is None else [tool_call],
|
||||
}
|
||||
# if chunk_idx == 0:
|
||||
# chunk_idx += 1
|
||||
# continue
|
||||
|
||||
# assert type(chunk) is anthropic.types.message.Chunk
|
||||
|
||||
yield provider_message.MessageChunk(**args)
|
||||
|
||||
# return llm_entities.Message(**args)
|
||||
except anthropic.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'api-key 无效: {e.message}')
|
||||
except anthropic.BadRequestError as e:
|
||||
raise errors.RequesterError(str(e.message))
|
||||
except anthropic.NotFoundError as e:
|
||||
if 'model: ' in str(e):
|
||||
raise errors.RequesterError(f'模型无效: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求地址无效: {e.message}')
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: anthropic
|
||||
support_type:
|
||||
- llm
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./anthropicmsgs.py
|
||||
attr: AnthropicMessages
|
||||
|
||||
242
src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
Normal file
242
src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
Normal file
@@ -0,0 +1,242 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import dashscope
|
||||
import openai
|
||||
|
||||
from . import modelscopechatcmpl
|
||||
from .. import requester
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
|
||||
"""阿里云百炼大模型平台 ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
is_use_dashscope_call = False # 是否使用阿里原生库调用
|
||||
is_enable_multi_model = True # 是否支持多轮对话
|
||||
use_time_num = 0 # 模型已调用次数,防止存在多文件时重复调用
|
||||
use_time_ids = [] # 已调用的ID列表
|
||||
message_id = 0 # 记录消息序号
|
||||
|
||||
for msg in messages:
|
||||
# print(msg)
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
elif me['type'] == 'file_url' and '.' in me.get('file_name', ''):
|
||||
# 1. 视频文件推理
|
||||
# https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871
|
||||
file_type = me.get('file_name').lower().split('.')[-1]
|
||||
if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']:
|
||||
me['type'] = 'video_url'
|
||||
me['video_url'] = {'url': me['file_url']}
|
||||
del me['file_url']
|
||||
del me['file_name']
|
||||
use_time_num += 1
|
||||
use_time_ids.append(message_id)
|
||||
is_enable_multi_model = False
|
||||
# 2. 语音文件识别, 无法通过openai的audio字段传递,暂时不支持
|
||||
# https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031
|
||||
elif file_type in [
|
||||
'aac',
|
||||
'amr',
|
||||
'aiff',
|
||||
'flac',
|
||||
'm4a',
|
||||
'mp3',
|
||||
'mpeg',
|
||||
'ogg',
|
||||
'opus',
|
||||
'wav',
|
||||
'webm',
|
||||
'wma',
|
||||
]:
|
||||
me['audio'] = me['file_url']
|
||||
me['type'] = 'audio'
|
||||
del me['file_url']
|
||||
del me['type']
|
||||
del me['file_name']
|
||||
is_use_dashscope_call = True
|
||||
use_time_num += 1
|
||||
use_time_ids.append(message_id)
|
||||
is_enable_multi_model = False
|
||||
message_id += 1
|
||||
|
||||
# 使用列表推导式,保留不在 use_time_ids[:-1] 中的元素,仅保留最后一个多媒体消息
|
||||
if not is_enable_multi_model and use_time_num > 1:
|
||||
messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]]
|
||||
|
||||
if not is_enable_multi_model:
|
||||
messages = [msg for msg in messages if 'resp_message_id' not in msg]
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# 流式处理状态
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
|
||||
if is_use_dashscope_call:
|
||||
response = dashscope.MultiModalConversation.call(
|
||||
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key = "sk-xxx"
|
||||
api_key=use_model.provider.token_mgr.get_token(),
|
||||
model=use_model.model_entity.name,
|
||||
messages=messages,
|
||||
result_format='message',
|
||||
asr_options={
|
||||
# "language": "zh", # 可选,若已知音频的语种,可通过该参数指定待识别语种,以提升识别准确率
|
||||
'enable_lid': True,
|
||||
'enable_itn': False,
|
||||
},
|
||||
stream=True,
|
||||
)
|
||||
content_length_list = []
|
||||
previous_length = 0 # 记录上一次的内容长度
|
||||
for res in response:
|
||||
chunk = res['output']
|
||||
# 解析 chunk 数据
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta_content = choice['message'].content[0]['text']
|
||||
finish_reason = choice['finish_reason']
|
||||
content_length_list.append(len(delta_content))
|
||||
else:
|
||||
delta_content = ''
|
||||
finish_reason = None
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 检查 content_length_list 是否有足够的数据
|
||||
if len(content_length_list) >= 2:
|
||||
now_content = delta_content[previous_length : content_length_list[-1]]
|
||||
previous_length = content_length_list[-1] # 更新上一次的长度
|
||||
else:
|
||||
now_content = delta_content # 第一次循环时直接使用 delta_content
|
||||
previous_length = len(delta_content) # 更新上一次的长度
|
||||
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': now_content if now_content else None,
|
||||
'is_final': bool(finish_reason) and finish_reason != 'null',
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
else:
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
# 处理 reasoning_content
|
||||
if reasoning_content:
|
||||
# accumulated_reasoning += reasoning_content
|
||||
# 如果设置了 remove_think,跳过 reasoning_content
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
delta_content = '<think>\n' + reasoning_content
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
delta_content = reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta_content:
|
||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
||||
thinking_ended = True
|
||||
delta_content = '\n</think>\n' + delta_content
|
||||
|
||||
# 处理工具调用增量
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] != '':
|
||||
tool_id = tool_call['id']
|
||||
if tool_call['function']['name'] is not None:
|
||||
tool_name = tool_call['function']['name']
|
||||
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
tool_call['id'] = tool_id
|
||||
tool_call['function']['name'] = tool_name
|
||||
tool_call['function']['arguments'] = (
|
||||
'' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
|
||||
)
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
# return
|
||||
@@ -22,8 +22,11 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- rerank
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./bailianchatcmpl.py
|
||||
attr: BailianChatCompletions
|
||||
|
||||
702
src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
Normal file
702
src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
Normal file
@@ -0,0 +1,702 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import typing
|
||||
|
||||
import openai
|
||||
import openai.types.chat.chat_completion as chat_completion_module
|
||||
import httpx
|
||||
|
||||
from .. import errors, requester
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
class OpenAIChatCompletions(requester.ProviderAPIRequester):
|
||||
"""OpenAI ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.openai.com/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def initialize(self):
|
||||
self.client = openai.AsyncClient(
|
||||
api_key='',
|
||||
base_url=self.requester_cfg['base_url'].replace(' ', ''),
|
||||
timeout=self.requester_cfg['timeout'],
|
||||
http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
|
||||
)
|
||||
|
||||
def _mask_api_key(self, api_key: str | None) -> str:
|
||||
if not api_key:
|
||||
return ''
|
||||
if len(api_key) <= 8:
|
||||
return '****'
|
||||
return f'{api_key[:4]}...{api_key[-4:]}'
|
||||
|
||||
def _infer_model_type(self, model_id: str) -> str:
|
||||
normalized_model_id = (model_id or '').lower()
|
||||
embedding_keywords = (
|
||||
'embedding',
|
||||
'embed',
|
||||
'bge-',
|
||||
'e5-',
|
||||
'm3e',
|
||||
'gte-',
|
||||
'multilingual-e5',
|
||||
'text-embedding',
|
||||
)
|
||||
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
|
||||
|
||||
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
|
||||
normalized_model_id = (model_id or '').lower()
|
||||
abilities: set[str] = set()
|
||||
|
||||
def _flatten(value: typing.Any) -> list[str]:
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, str):
|
||||
return [value.lower()]
|
||||
if isinstance(value, dict):
|
||||
flattened: list[str] = []
|
||||
for nested_value in value.values():
|
||||
flattened.extend(_flatten(nested_value))
|
||||
return flattened
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
flattened: list[str] = []
|
||||
for nested_value in value:
|
||||
flattened.extend(_flatten(nested_value))
|
||||
return flattened
|
||||
return [str(value).lower()]
|
||||
|
||||
capability_tokens = _flatten(item.get('capabilities'))
|
||||
capability_tokens.extend(_flatten(item.get('modalities')))
|
||||
capability_tokens.extend(_flatten(item.get('input_modalities')))
|
||||
capability_tokens.extend(_flatten(item.get('output_modalities')))
|
||||
capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
|
||||
capability_tokens.extend(_flatten(item.get('supported_parameters')))
|
||||
capability_tokens.extend(_flatten(item.get('architecture')))
|
||||
|
||||
combined_tokens = capability_tokens + [normalized_model_id]
|
||||
|
||||
vision_keywords = (
|
||||
'vision',
|
||||
'image',
|
||||
'file',
|
||||
'video',
|
||||
'multimodal',
|
||||
'vl',
|
||||
'ocr',
|
||||
'omni',
|
||||
)
|
||||
function_call_keywords = (
|
||||
'function',
|
||||
'tool',
|
||||
'tools',
|
||||
'tool_choice',
|
||||
'tool_call',
|
||||
'tool-use',
|
||||
'tool_use',
|
||||
)
|
||||
|
||||
if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
|
||||
abilities.add('vision')
|
||||
|
||||
if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
|
||||
abilities.add('func_call')
|
||||
|
||||
return sorted(abilities)
|
||||
|
||||
def _normalize_modalities(self, value: typing.Any) -> list[str]:
|
||||
normalized: list[str] = []
|
||||
|
||||
def _collect(item: typing.Any):
|
||||
if item is None:
|
||||
return
|
||||
if isinstance(item, str):
|
||||
for part in item.replace('->', ',').replace('+', ',').split(','):
|
||||
token = part.strip().lower()
|
||||
if token and token not in normalized:
|
||||
normalized.append(token)
|
||||
return
|
||||
if isinstance(item, dict):
|
||||
for nested in item.values():
|
||||
_collect(nested)
|
||||
return
|
||||
if isinstance(item, (list, tuple, set)):
|
||||
for nested in item:
|
||||
_collect(nested)
|
||||
return
|
||||
|
||||
_collect(value)
|
||||
return normalized
|
||||
|
||||
def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
|
||||
display_name = item.get('name')
|
||||
if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
|
||||
display_name = ''
|
||||
|
||||
description = item.get('description')
|
||||
if not isinstance(description, str) or not description.strip():
|
||||
description = ''
|
||||
|
||||
context_length = item.get('context_length')
|
||||
if context_length is None and isinstance(item.get('top_provider'), dict):
|
||||
context_length = item['top_provider'].get('context_length')
|
||||
|
||||
if not isinstance(context_length, int):
|
||||
try:
|
||||
context_length = int(context_length) if context_length is not None else None
|
||||
except (TypeError, ValueError):
|
||||
context_length = None
|
||||
|
||||
input_modalities = self._normalize_modalities(item.get('input_modalities'))
|
||||
output_modalities = self._normalize_modalities(item.get('output_modalities'))
|
||||
|
||||
if isinstance(item.get('architecture'), dict):
|
||||
if not input_modalities:
|
||||
input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
|
||||
if not output_modalities:
|
||||
output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
|
||||
|
||||
owned_by = item.get('owned_by')
|
||||
if not isinstance(owned_by, str) or not owned_by.strip():
|
||||
owned_by = ''
|
||||
|
||||
return {
|
||||
'display_name': display_name or None,
|
||||
'description': description or None,
|
||||
'context_length': context_length,
|
||||
'owned_by': owned_by or None,
|
||||
'input_modalities': input_modalities,
|
||||
'output_modalities': output_modalities,
|
||||
}
|
||||
|
||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
||||
headers = {}
|
||||
if api_key:
|
||||
headers['Authorization'] = f'Bearer {api_key}'
|
||||
|
||||
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
|
||||
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
|
||||
response = await client.get(models_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
models = []
|
||||
for item in payload.get('data', []):
|
||||
model_id = item.get('id')
|
||||
if not model_id:
|
||||
continue
|
||||
models.append(
|
||||
{
|
||||
'id': model_id,
|
||||
'name': model_id,
|
||||
'type': self._infer_model_type(model_id),
|
||||
'abilities': self._infer_model_abilities(item, model_id),
|
||||
**self._extract_scan_metadata(item, model_id),
|
||||
}
|
||||
)
|
||||
|
||||
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
|
||||
return {
|
||||
'models': models,
|
||||
'debug': {
|
||||
'request': {
|
||||
'method': 'GET',
|
||||
'url': models_url,
|
||||
'headers': {
|
||||
'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
|
||||
},
|
||||
},
|
||||
'response': payload,
|
||||
},
|
||||
}
|
||||
|
||||
async def _req(
|
||||
self,
|
||||
args: dict,
|
||||
extra_body: dict = {},
|
||||
) -> chat_completion_module.ChatCompletion:
|
||||
return await self.client.chat.completions.create(**args, extra_body=extra_body)
|
||||
|
||||
async def _req_stream(
|
||||
self,
|
||||
args: dict,
|
||||
extra_body: dict = {},
|
||||
):
|
||||
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
|
||||
yield chunk
|
||||
|
||||
async def _make_msg(
|
||||
self,
|
||||
chat_completion: chat_completion_module.ChatCompletion,
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
if not isinstance(chat_completion, chat_completion_module.ChatCompletion):
|
||||
raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}')
|
||||
|
||||
chatcmpl_message = chat_completion.choices[0].message.model_dump()
|
||||
|
||||
# 确保 role 字段存在且不为 None
|
||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
||||
chatcmpl_message['role'] = 'assistant'
|
||||
|
||||
# 处理思维链
|
||||
content = chatcmpl_message.get('content', '')
|
||||
reasoning_content = chatcmpl_message.get('reasoning_content', None)
|
||||
|
||||
processed_content, _ = await self._process_thinking_content(
|
||||
content=content, reasoning_content=reasoning_content, remove_think=remove_think
|
||||
)
|
||||
|
||||
chatcmpl_message['content'] = processed_content
|
||||
|
||||
# 移除 reasoning_content 字段,避免传递给 Message
|
||||
if 'reasoning_content' in chatcmpl_message:
|
||||
del chatcmpl_message['reasoning_content']
|
||||
|
||||
message = provider_message.Message(**chatcmpl_message)
|
||||
|
||||
return message
|
||||
|
||||
async def _process_thinking_content(
|
||||
self,
|
||||
content: str,
|
||||
reasoning_content: str = None,
|
||||
remove_think: bool = False,
|
||||
) -> tuple[str, str]:
|
||||
"""处理思维链内容
|
||||
|
||||
Args:
|
||||
content: 原始内容
|
||||
reasoning_content: reasoning_content 字段内容
|
||||
remove_think: 是否移除思维链
|
||||
|
||||
Returns:
|
||||
(处理后的内容, 提取的思维链内容)
|
||||
"""
|
||||
thinking_content = ''
|
||||
|
||||
# 1. 从 reasoning_content 提取思维链
|
||||
if reasoning_content:
|
||||
thinking_content = reasoning_content
|
||||
|
||||
# 2. 从 content 中提取 <think> 标签内容
|
||||
if content and '<think>' in content and '</think>' in content:
|
||||
import re
|
||||
|
||||
think_pattern = r'<think>(.*?)</think>'
|
||||
think_matches = re.findall(think_pattern, content, re.DOTALL)
|
||||
if think_matches:
|
||||
# 如果已有 reasoning_content,则追加
|
||||
if thinking_content:
|
||||
thinking_content += '\n' + '\n'.join(think_matches)
|
||||
else:
|
||||
thinking_content = '\n'.join(think_matches)
|
||||
# 移除 content 中的 <think> 标签
|
||||
content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
|
||||
|
||||
# 3. 根据 remove_think 参数决定是否保留思维链
|
||||
if remove_think:
|
||||
return content, ''
|
||||
else:
|
||||
# 如果有思维链内容,将其以 <think> 格式添加到 content 开头
|
||||
if thinking_content:
|
||||
content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
|
||||
return content, thinking_content
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# 流式处理状态
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
tool_id = ''
|
||||
tool_name = ''
|
||||
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
|
||||
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
# 处理 reasoning_content
|
||||
if reasoning_content:
|
||||
# accumulated_reasoning += reasoning_content
|
||||
# 如果设置了 remove_think,跳过 reasoning_content
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
delta_content = '<think>\n' + reasoning_content
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
delta_content = reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta_content:
|
||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
||||
thinking_ended = True
|
||||
delta_content = '\n</think>\n' + delta_content
|
||||
|
||||
# 处理 content 中已有的 <think> 标签(如果需要移除)
|
||||
# if delta_content and remove_think and '<think>' in delta_content:
|
||||
# import re
|
||||
#
|
||||
# # 移除 <think> 标签及其内容
|
||||
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
|
||||
|
||||
# 处理工具调用增量
|
||||
# delta_tool_calls = None
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] and tool_call['function']['name']:
|
||||
tool_id = tool_call['id']
|
||||
tool_name = tool_call['function']['name']
|
||||
else:
|
||||
tool_call['id'] = tool_id
|
||||
tool_call['function']['name'] = tool_name
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
|
||||
async def _closure(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> tuple[provider_message.Message, dict]:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
|
||||
# 发送请求
|
||||
|
||||
resp = await self._req(args, extra_body=extra_args)
|
||||
# 处理请求结果
|
||||
message = await self._make_msg(resp, remove_think)
|
||||
|
||||
# Extract token usage from response
|
||||
usage_info = {}
|
||||
if hasattr(resp, 'usage') and resp.usage:
|
||||
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
|
||||
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
|
||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
||||
|
||||
return message, usage_info
|
||||
|
||||
async def invoke_llm(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> tuple[provider_message.Message, dict]:
|
||||
"""Invoke LLM and return message with usage info"""
|
||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
||||
for m in messages:
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
content = msg_dict.get('content')
|
||||
if isinstance(content, list):
|
||||
# 检查 content 列表中是否每个部分都是文本
|
||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
||||
# 将所有文本部分合并为一个字符串
|
||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
||||
req_messages.append(msg_dict)
|
||||
|
||||
try:
|
||||
msg, usage_info = await self._closure(
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
return msg, usage_info
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
if 'context_length_exceeded' in str(e):
|
||||
raise errors.RequesterError(f'上文过长,请重置会话: {error_message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求参数错误: {error_message}')
|
||||
except openai.AuthenticationError as e:
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
raise errors.RequesterError(f'无效的 api-key: {error_message}')
|
||||
except openai.NotFoundError as e:
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
raise errors.RequesterError(f'请求路径错误: {error_message}')
|
||||
except openai.RateLimitError as e:
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
|
||||
except openai.APIConnectionError as e:
|
||||
error_message = f'连接错误: {str(e)}'
|
||||
raise errors.RequesterError(error_message)
|
||||
except openai.APIError as e:
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
raise errors.RequesterError(f'请求错误: {error_message}')
|
||||
|
||||
async def invoke_embedding(
|
||||
self,
|
||||
model: requester.RuntimeEmbeddingModel,
|
||||
input_text: list[str],
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
) -> tuple[list[list[float]], dict]:
|
||||
"""调用 Embedding API, returns (embeddings, usage_info)"""
|
||||
self.client.api_key = model.provider.token_mgr.get_token()
|
||||
|
||||
args = {
|
||||
'model': model.model_entity.name,
|
||||
'input': input_text,
|
||||
}
|
||||
|
||||
if model.model_entity.extra_args:
|
||||
args.update(model.model_entity.extra_args)
|
||||
|
||||
args.update(extra_args)
|
||||
|
||||
try:
|
||||
resp = await self.client.embeddings.create(**args)
|
||||
|
||||
# Extract usage info
|
||||
usage_info = {}
|
||||
if hasattr(resp, 'usage') and resp.usage:
|
||||
usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
|
||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
||||
|
||||
return [d.embedding for d in resp.data], usage_info
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
||||
|
||||
async def invoke_llm_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
||||
for m in messages:
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
content = msg_dict.get('content')
|
||||
if isinstance(content, list):
|
||||
# 检查 content 列表中是否每个部分都是文本
|
||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
||||
# 将所有文本部分合并为一个字符串
|
||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
||||
req_messages.append(msg_dict)
|
||||
|
||||
try:
|
||||
async for item in self._closure_stream(
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
):
|
||||
yield item
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
if 'context_length_exceeded' in e.message:
|
||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
||||
except openai.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
||||
except openai.NotFoundError as e:
|
||||
raise errors.RequesterError(f'请求路径错误: {e.message}')
|
||||
except openai.RateLimitError as e:
|
||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
||||
except openai.APIError as e:
|
||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
||||
|
||||
async def invoke_rerank(
|
||||
self,
|
||||
model: requester.RuntimeRerankModel,
|
||||
query: str,
|
||||
documents: typing.List[str],
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
) -> typing.List[dict]:
|
||||
"""Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible)
|
||||
|
||||
Supports extra_args from model.extra_args:
|
||||
- rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks")
|
||||
- rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank")
|
||||
- Any other fields are merged into the request payload.
|
||||
"""
|
||||
api_key = model.provider.token_mgr.get_token()
|
||||
base_url = self.requester_cfg.get('base_url', '').rstrip('/')
|
||||
timeout = self.requester_cfg.get('timeout', 120)
|
||||
|
||||
merged_args = {}
|
||||
if model.model_entity.extra_args:
|
||||
merged_args.update(model.model_entity.extra_args)
|
||||
if extra_args:
|
||||
merged_args.update(extra_args)
|
||||
|
||||
rerank_url = merged_args.pop('rerank_url', None)
|
||||
rerank_path = merged_args.pop('rerank_path', 'rerank')
|
||||
if not rerank_url:
|
||||
rerank_url = f'{base_url}/{rerank_path}'
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {api_key}',
|
||||
}
|
||||
|
||||
payload = {
|
||||
'model': model.model_entity.name,
|
||||
'query': query,
|
||||
'documents': documents[:64],
|
||||
'top_n': min(len(documents), 64),
|
||||
}
|
||||
|
||||
if merged_args:
|
||||
payload.update(merged_args)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
|
||||
resp = await client.post(rerank_url, headers=headers, json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
results = self._parse_rerank_response(data)
|
||||
|
||||
if results:
|
||||
scores = [r.get('relevance_score', 0.0) for r in results]
|
||||
min_score = min(scores)
|
||||
max_score = max(scores)
|
||||
if max_score - min_score > 1e-6:
|
||||
for r in results:
|
||||
r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
|
||||
|
||||
return results
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}')
|
||||
except httpx.TimeoutException:
|
||||
raise errors.RequesterError('Rerank request timed out')
|
||||
except Exception as e:
|
||||
raise errors.RequesterError(f'Rerank request error: {str(e)}')
|
||||
|
||||
@staticmethod
|
||||
def _parse_rerank_response(data: dict) -> typing.List[dict]:
|
||||
"""Parse rerank response from various providers.
|
||||
|
||||
Handles:
|
||||
- Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]}
|
||||
- Voyage AI: {"data": [{"index", "relevance_score"}]}
|
||||
- DashScope: {"output": {"results": [{"index", "relevance_score"}]}}
|
||||
"""
|
||||
if 'results' in data:
|
||||
return data['results']
|
||||
if 'data' in data:
|
||||
return data['data']
|
||||
if 'output' in data and isinstance(data['output'], dict):
|
||||
return data['output'].get('results', [])
|
||||
return []
|
||||
@@ -22,10 +22,12 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
# LiteLLM provider prefix - when set, uses unified LiteLLMRequester
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
- rerank
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./chatcmpl.py
|
||||
attr: OpenAIChatCompletions
|
||||
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: cohere
|
||||
support_type:
|
||||
- rerank
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./chatcmpl.py
|
||||
attr: OpenAIChatCompletions
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""CompShare ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.modelverse.cn/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./compsharechatcmpl.py
|
||||
attr: CompShareChatCompletions
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
|
||||
from . import chatcmpl
|
||||
from .. import errors, requester
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""Deepseek ChatCompletion API 请求器"""
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.deepseek.com',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def _closure(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> tuple[provider_message.Message, dict]:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages
|
||||
|
||||
# deepseek 不支持多模态,把content都转换成纯文字
|
||||
for m in messages:
|
||||
if 'content' in m and isinstance(m['content'], list):
|
||||
m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c])
|
||||
|
||||
args['messages'] = messages
|
||||
|
||||
# 发送请求
|
||||
resp = await self._req(args, extra_body=extra_args)
|
||||
|
||||
# print(resp)
|
||||
|
||||
if resp is None:
|
||||
raise errors.RequesterError('接口返回为空,请确定模型提供商服务是否正常')
|
||||
# 处理请求结果
|
||||
message = await self._make_msg(resp, remove_think)
|
||||
|
||||
# Extract token usage from response
|
||||
usage_info = {}
|
||||
if hasattr(resp, 'usage') and resp.usage:
|
||||
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
|
||||
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
|
||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
||||
|
||||
return message, usage_info
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: deepseek
|
||||
support_type:
|
||||
- llm
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./deepseekchatcmpl.py
|
||||
attr: DeepseekChatCompletions
|
||||
|
||||
205
src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
Normal file
205
src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
Normal file
@@ -0,0 +1,205 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import httpx
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
import uuid
|
||||
|
||||
from .. import requester
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
|
||||
|
||||
class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""Google Gemini API 请求器"""
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
||||
models_url = 'https://generativelanguage.googleapis.com/v1beta/models'
|
||||
params = {'key': api_key} if api_key else {}
|
||||
|
||||
all_models: list[dict[str, typing.Any]] = []
|
||||
next_page_token = ''
|
||||
last_payload: dict[str, typing.Any] = {}
|
||||
|
||||
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
|
||||
while True:
|
||||
request_params = dict(params)
|
||||
if next_page_token:
|
||||
request_params['pageToken'] = next_page_token
|
||||
|
||||
response = await client.get(models_url, params=request_params)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
last_payload = payload
|
||||
|
||||
for item in payload.get('models', []):
|
||||
model_name = item.get('name', '')
|
||||
model_id = model_name.replace('models/', '', 1)
|
||||
if not model_id:
|
||||
continue
|
||||
|
||||
supported_methods = item.get('supportedGenerationMethods', []) or []
|
||||
if 'embedContent' in supported_methods and 'generateContent' not in supported_methods:
|
||||
model_type = 'embedding'
|
||||
else:
|
||||
model_type = 'llm'
|
||||
|
||||
all_models.append(
|
||||
{
|
||||
'id': model_id,
|
||||
'name': model_id,
|
||||
'type': model_type,
|
||||
'abilities': self._infer_model_abilities(item, model_id),
|
||||
'display_name': item.get('displayName') or None,
|
||||
'description': item.get('description') or None,
|
||||
'context_length': item.get('inputTokenLimit'),
|
||||
'input_modalities': self._normalize_modalities(item.get('inputModalities')),
|
||||
'output_modalities': self._normalize_modalities(item.get('outputModalities')),
|
||||
}
|
||||
)
|
||||
|
||||
next_page_token = payload.get('nextPageToken', '')
|
||||
if not next_page_token:
|
||||
break
|
||||
|
||||
all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
|
||||
return {
|
||||
'models': all_models,
|
||||
'debug': {
|
||||
'request': {
|
||||
'method': 'GET',
|
||||
'url': models_url,
|
||||
'query': {'key': self._mask_api_key(api_key)} if api_key else {},
|
||||
},
|
||||
'response': last_payload,
|
||||
},
|
||||
}
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# 流式处理状态
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
tool_id = ''
|
||||
tool_name = ''
|
||||
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
|
||||
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
# 处理 reasoning_content
|
||||
if reasoning_content:
|
||||
# accumulated_reasoning += reasoning_content
|
||||
# 如果设置了 remove_think,跳过 reasoning_content
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
delta_content = '<think>\n' + reasoning_content
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
delta_content = reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta_content:
|
||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
||||
thinking_ended = True
|
||||
delta_content = '\n</think>\n' + delta_content
|
||||
|
||||
# 处理 content 中已有的 <think> 标签(如果需要移除)
|
||||
# if delta_content and remove_think and '<think>' in delta_content:
|
||||
# import re
|
||||
#
|
||||
# # 移除 <think> 标签及其内容
|
||||
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
|
||||
|
||||
# 处理工具调用增量
|
||||
# delta_tool_calls = None
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] == '' and tool_id == '':
|
||||
tool_id = str(uuid.uuid4())
|
||||
if tool_call['function']['name']:
|
||||
tool_name = tool_call['function']['name']
|
||||
tool_call['id'] = tool_id
|
||||
tool_call['function']['name'] = tool_name
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: gemini
|
||||
support_type:
|
||||
- llm
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./geminichatcmpl.py
|
||||
attr: GeminiChatCompletions
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
import typing
|
||||
|
||||
from . import ppiochatcmpl
|
||||
|
||||
|
||||
class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
|
||||
"""Gitee AI ChatCompletions API 请求器"""
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://ai.gitee.com/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,9 +22,12 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
- rerank
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./giteeaichatcmpl.py
|
||||
attr: GiteeAIChatCompletions
|
||||
|
||||
208
src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
Normal file
208
src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
Normal file
@@ -0,0 +1,208 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import openai
|
||||
import typing
|
||||
|
||||
from . import chatcmpl
|
||||
from .. import requester
|
||||
import openai.types.chat.chat_completion as chat_completion
|
||||
import re
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
|
||||
|
||||
class JieKouAIChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""接口 AI ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.jiekou.ai/openai',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
is_think: bool = False
|
||||
|
||||
async def _make_msg(
|
||||
self,
|
||||
chat_completion: chat_completion.ChatCompletion,
|
||||
remove_think: bool,
|
||||
) -> provider_message.Message:
|
||||
chatcmpl_message = chat_completion.choices[0].message.model_dump()
|
||||
# print(chatcmpl_message.keys(), chatcmpl_message.values())
|
||||
|
||||
# 确保 role 字段存在且不为 None
|
||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
||||
chatcmpl_message['role'] = 'assistant'
|
||||
|
||||
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
|
||||
|
||||
# deepseek的reasoner模型
|
||||
chatcmpl_message['content'] = await self._process_thinking_content(
|
||||
chatcmpl_message['content'], reasoning_content, remove_think
|
||||
)
|
||||
|
||||
# 移除 reasoning_content 字段,避免传递给 Message
|
||||
if 'reasoning_content' in chatcmpl_message:
|
||||
del chatcmpl_message['reasoning_content']
|
||||
|
||||
message = provider_message.Message(**chatcmpl_message)
|
||||
|
||||
return message
|
||||
|
||||
async def _process_thinking_content(
|
||||
self,
|
||||
content: str,
|
||||
reasoning_content: str = None,
|
||||
remove_think: bool = False,
|
||||
) -> tuple[str, str]:
|
||||
"""处理思维链内容
|
||||
|
||||
Args:
|
||||
content: 原始内容
|
||||
reasoning_content: reasoning_content 字段内容
|
||||
remove_think: 是否移除思维链
|
||||
|
||||
Returns:
|
||||
处理后的内容
|
||||
"""
|
||||
if remove_think:
|
||||
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
||||
else:
|
||||
if reasoning_content is not None:
|
||||
content = '<think>\n' + reasoning_content + '\n</think>\n' + content
|
||||
return content
|
||||
|
||||
async def _make_msg_chunk(
|
||||
self,
|
||||
delta: dict[str, typing.Any],
|
||||
idx: int,
|
||||
) -> provider_message.MessageChunk:
|
||||
# 处理流式chunk和完整响应的差异
|
||||
# print(chat_completion.choices[0])
|
||||
|
||||
# 确保 role 字段存在且不为 None
|
||||
if 'role' not in delta or delta['role'] is None:
|
||||
delta['role'] = 'assistant'
|
||||
|
||||
reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
|
||||
|
||||
delta['content'] = '' if delta['content'] is None else delta['content']
|
||||
# print(reasoning_content)
|
||||
|
||||
# deepseek的reasoner模型
|
||||
|
||||
if reasoning_content is not None:
|
||||
delta['content'] += reasoning_content
|
||||
|
||||
message = provider_message.MessageChunk(**delta)
|
||||
|
||||
return message
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
# reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
if remove_think:
|
||||
if delta['content'] is not None:
|
||||
if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
|
||||
thinking_started = True
|
||||
continue
|
||||
elif delta['content'] == r'</think>' and not thinking_ended:
|
||||
thinking_ended = True
|
||||
continue
|
||||
elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
|
||||
thinking_started = False
|
||||
continue
|
||||
elif thinking_started and not thinking_ended:
|
||||
continue
|
||||
|
||||
# delta_tool_calls = None
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] and tool_call['function']['name']:
|
||||
tool_id = tool_call['id']
|
||||
tool_name = tool_call['function']['name']
|
||||
|
||||
if tool_call['id'] is None:
|
||||
tool_call['id'] = tool_id
|
||||
if tool_call['function']['name'] is None:
|
||||
tool_call['function']['name'] = tool_name
|
||||
if tool_call['function']['arguments'] is None:
|
||||
tool_call['function']['arguments'] = ''
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
@@ -29,8 +29,11 @@ spec:
|
||||
type: int
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./jiekouaichatcmpl.py
|
||||
attr: JieKouAIChatCompletions
|
||||
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: jina
|
||||
support_type:
|
||||
- rerank
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./chatcmpl.py
|
||||
attr: OpenAIChatCompletions
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class LmStudioChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""LMStudio ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'http://127.0.0.1:1234/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,8 +22,11 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
provider_category: self-hosted
|
||||
execution:
|
||||
python:
|
||||
path: ./lmstudiochatcmpl.py
|
||||
attr: LmStudioChatCompletions
|
||||
|
||||
@@ -0,0 +1,561 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import typing
|
||||
|
||||
import openai
|
||||
import openai.types.chat.chat_completion as chat_completion
|
||||
import httpx
|
||||
|
||||
from .. import entities, errors, requester
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
class ModelScopeChatCompletions(requester.ProviderAPIRequester):
|
||||
"""ModelScope ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api-inference.modelscope.cn/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def initialize(self):
|
||||
self.client = openai.AsyncClient(
|
||||
api_key='',
|
||||
base_url=self.requester_cfg['base_url'],
|
||||
timeout=self.requester_cfg['timeout'],
|
||||
http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
|
||||
)
|
||||
|
||||
def _mask_api_key(self, api_key: str | None) -> str:
|
||||
if not api_key:
|
||||
return ''
|
||||
if len(api_key) <= 8:
|
||||
return '****'
|
||||
return f'{api_key[:4]}...{api_key[-4:]}'
|
||||
|
||||
def _infer_model_type(self, model_id: str) -> str:
|
||||
normalized_model_id = (model_id or '').lower()
|
||||
embedding_keywords = (
|
||||
'embedding',
|
||||
'embed',
|
||||
'bge-',
|
||||
'e5-',
|
||||
'm3e',
|
||||
'gte-',
|
||||
'multilingual-e5',
|
||||
'text-embedding',
|
||||
)
|
||||
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
|
||||
|
||||
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
|
||||
normalized_model_id = (model_id or '').lower()
|
||||
abilities: set[str] = set()
|
||||
|
||||
def _flatten(value: typing.Any) -> list[str]:
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, str):
|
||||
return [value.lower()]
|
||||
if isinstance(value, dict):
|
||||
flattened: list[str] = []
|
||||
for nested_value in value.values():
|
||||
flattened.extend(_flatten(nested_value))
|
||||
return flattened
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
flattened: list[str] = []
|
||||
for nested_value in value:
|
||||
flattened.extend(_flatten(nested_value))
|
||||
return flattened
|
||||
return [str(value).lower()]
|
||||
|
||||
capability_tokens = _flatten(item.get('capabilities'))
|
||||
capability_tokens.extend(_flatten(item.get('modalities')))
|
||||
capability_tokens.extend(_flatten(item.get('input_modalities')))
|
||||
capability_tokens.extend(_flatten(item.get('output_modalities')))
|
||||
capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
|
||||
capability_tokens.extend(_flatten(item.get('supported_parameters')))
|
||||
capability_tokens.extend(_flatten(item.get('architecture')))
|
||||
|
||||
combined_tokens = capability_tokens + [normalized_model_id]
|
||||
|
||||
vision_keywords = ('vision', 'image', 'file', 'video', 'multimodal', 'vl', 'ocr', 'omni')
|
||||
function_call_keywords = ('function', 'tool', 'tools', 'tool_choice', 'tool_call', 'tool-use', 'tool_use')
|
||||
|
||||
if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
|
||||
abilities.add('vision')
|
||||
|
||||
if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
|
||||
abilities.add('func_call')
|
||||
|
||||
return sorted(abilities)
|
||||
|
||||
def _normalize_modalities(self, value: typing.Any) -> list[str]:
|
||||
normalized: list[str] = []
|
||||
|
||||
def _collect(item: typing.Any):
|
||||
if item is None:
|
||||
return
|
||||
if isinstance(item, str):
|
||||
for part in item.replace('->', ',').replace('+', ',').split(','):
|
||||
token = part.strip().lower()
|
||||
if token and token not in normalized:
|
||||
normalized.append(token)
|
||||
return
|
||||
if isinstance(item, dict):
|
||||
for nested in item.values():
|
||||
_collect(nested)
|
||||
return
|
||||
if isinstance(item, (list, tuple, set)):
|
||||
for nested in item:
|
||||
_collect(nested)
|
||||
return
|
||||
|
||||
_collect(value)
|
||||
return normalized
|
||||
|
||||
def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
|
||||
display_name = item.get('name')
|
||||
if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
|
||||
display_name = ''
|
||||
|
||||
description = item.get('description')
|
||||
if not isinstance(description, str) or not description.strip():
|
||||
description = ''
|
||||
|
||||
context_length = item.get('context_length')
|
||||
if context_length is None and isinstance(item.get('top_provider'), dict):
|
||||
context_length = item['top_provider'].get('context_length')
|
||||
|
||||
if not isinstance(context_length, int):
|
||||
try:
|
||||
context_length = int(context_length) if context_length is not None else None
|
||||
except (TypeError, ValueError):
|
||||
context_length = None
|
||||
|
||||
input_modalities = self._normalize_modalities(item.get('input_modalities'))
|
||||
output_modalities = self._normalize_modalities(item.get('output_modalities'))
|
||||
|
||||
if isinstance(item.get('architecture'), dict):
|
||||
if not input_modalities:
|
||||
input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
|
||||
if not output_modalities:
|
||||
output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
|
||||
|
||||
owned_by = item.get('owned_by')
|
||||
if not isinstance(owned_by, str) or not owned_by.strip():
|
||||
owned_by = ''
|
||||
|
||||
return {
|
||||
'display_name': display_name or None,
|
||||
'description': description or None,
|
||||
'context_length': context_length,
|
||||
'owned_by': owned_by or None,
|
||||
'input_modalities': input_modalities,
|
||||
'output_modalities': output_modalities,
|
||||
}
|
||||
|
||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
||||
headers = {}
|
||||
if api_key:
|
||||
headers['Authorization'] = f'Bearer {api_key}'
|
||||
|
||||
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
|
||||
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
|
||||
response = await client.get(models_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
models = []
|
||||
for item in payload.get('data', []):
|
||||
model_id = item.get('id')
|
||||
if not model_id:
|
||||
continue
|
||||
models.append(
|
||||
{
|
||||
'id': model_id,
|
||||
'name': model_id,
|
||||
'type': self._infer_model_type(model_id),
|
||||
'abilities': self._infer_model_abilities(item, model_id),
|
||||
**self._extract_scan_metadata(item, model_id),
|
||||
}
|
||||
)
|
||||
|
||||
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
|
||||
return {
|
||||
'models': models,
|
||||
'debug': {
|
||||
'request': {
|
||||
'method': 'GET',
|
||||
'url': models_url,
|
||||
'headers': {
|
||||
'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
|
||||
},
|
||||
},
|
||||
'response': payload,
|
||||
},
|
||||
}
|
||||
|
||||
async def _req(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
args: dict,
|
||||
extra_body: dict = {},
|
||||
remove_think: bool = False,
|
||||
) -> list[dict[str, typing.Any]]:
|
||||
args['stream'] = True
|
||||
|
||||
chunk = None
|
||||
|
||||
pending_content = ''
|
||||
|
||||
tool_calls = []
|
||||
|
||||
resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)
|
||||
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
tool_id = ''
|
||||
tool_name = ''
|
||||
message_delta = {}
|
||||
async for chunk in resp_gen:
|
||||
if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
|
||||
continue
|
||||
|
||||
delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
|
||||
reasoning_content = delta.get('reasoning_content')
|
||||
# 处理 reasoning_content
|
||||
if reasoning_content:
|
||||
# accumulated_reasoning += reasoning_content
|
||||
# 如果设置了 remove_think,跳过 reasoning_content
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
pending_content += '<think>\n' + reasoning_content
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
pending_content += reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta.get('content'):
|
||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
||||
thinking_ended = True
|
||||
pending_content += '\n</think>\n' + delta.get('content')
|
||||
|
||||
if delta.get('content') is not None:
|
||||
pending_content += delta.get('content')
|
||||
|
||||
if delta.get('tool_calls') is not None:
|
||||
for tool_call in delta.get('tool_calls'):
|
||||
if tool_call['id'] != '':
|
||||
tool_id = tool_call['id']
|
||||
if tool_call['function']['name'] is not None:
|
||||
tool_name = tool_call['function']['name']
|
||||
if tool_call['function']['arguments'] is None:
|
||||
continue
|
||||
tool_call['id'] = tool_id
|
||||
tool_call['name'] = tool_name
|
||||
for tc in tool_calls:
|
||||
if tc['index'] == tool_call['index']:
|
||||
tc['function']['arguments'] += tool_call['function']['arguments']
|
||||
break
|
||||
else:
|
||||
tool_calls.append(tool_call)
|
||||
|
||||
if chunk.choices[0].finish_reason is not None:
|
||||
break
|
||||
message_delta['content'] = pending_content
|
||||
message_delta['role'] = 'assistant'
|
||||
|
||||
message_delta['tool_calls'] = tool_calls if tool_calls else None
|
||||
return [message_delta]
|
||||
|
||||
async def _make_msg(
|
||||
self,
|
||||
chat_completion: list[dict[str, typing.Any]],
|
||||
) -> provider_message.Message:
|
||||
chatcmpl_message = chat_completion[0]
|
||||
|
||||
# 确保 role 字段存在且不为 None
|
||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
||||
chatcmpl_message['role'] = 'assistant'
|
||||
|
||||
message = provider_message.Message(**chatcmpl_message)
|
||||
|
||||
return message
|
||||
|
||||
async def _closure(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> tuple[provider_message.Message, dict]:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
|
||||
# 发送请求
|
||||
resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)
|
||||
|
||||
# 处理请求结果
|
||||
message = await self._make_msg(resp)
|
||||
|
||||
# ModelScope uses streaming, usage info not available
|
||||
usage_info = {}
|
||||
|
||||
return message, usage_info
|
||||
|
||||
async def _req_stream(
|
||||
self,
|
||||
args: dict,
|
||||
extra_body: dict = {},
|
||||
) -> chat_completion.ChatCompletion:
|
||||
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
|
||||
yield chunk
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# 流式处理状态
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
|
||||
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
# 处理 reasoning_content
|
||||
if reasoning_content:
|
||||
# accumulated_reasoning += reasoning_content
|
||||
# 如果设置了 remove_think,跳过 reasoning_content
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
delta_content = '<think>\n' + reasoning_content
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
delta_content = reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta_content:
|
||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
||||
thinking_ended = True
|
||||
delta_content = '\n</think>\n' + delta_content
|
||||
|
||||
# 处理 content 中已有的 <think> 标签(如果需要移除)
|
||||
# if delta_content and remove_think and '<think>' in delta_content:
|
||||
# import re
|
||||
#
|
||||
# # 移除 <think> 标签及其内容
|
||||
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
|
||||
|
||||
# 处理工具调用增量
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] != '':
|
||||
tool_id = tool_call['id']
|
||||
if tool_call['function']['name'] is not None:
|
||||
tool_name = tool_call['function']['name']
|
||||
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
tool_call['id'] = tool_id
|
||||
tool_call['function']['name'] = tool_name
|
||||
tool_call['function']['arguments'] = (
|
||||
'' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
|
||||
)
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
# return
|
||||
|
||||
async def invoke_llm(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: entities.LLMModelInfo,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
||||
for m in messages:
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
content = msg_dict.get('content')
|
||||
if isinstance(content, list):
|
||||
# 检查 content 列表中是否每个部分都是文本
|
||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
||||
# 将所有文本部分合并为一个字符串
|
||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
||||
req_messages.append(msg_dict)
|
||||
|
||||
try:
|
||||
return await self._closure(
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
if 'context_length_exceeded' in e.message:
|
||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
||||
except openai.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
||||
except openai.NotFoundError as e:
|
||||
raise errors.RequesterError(f'请求路径错误: {e.message}')
|
||||
except openai.RateLimitError as e:
|
||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
||||
except openai.APIError as e:
|
||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
||||
|
||||
async def invoke_llm_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
||||
for m in messages:
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
content = msg_dict.get('content')
|
||||
if isinstance(content, list):
|
||||
# 检查 content 列表中是否每个部分都是文本
|
||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
||||
# 将所有文本部分合并为一个字符串
|
||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
||||
req_messages.append(msg_dict)
|
||||
|
||||
try:
|
||||
async for item in self._closure_stream(
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
):
|
||||
yield item
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
if 'context_length_exceeded' in e.message:
|
||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
||||
except openai.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
||||
except openai.NotFoundError as e:
|
||||
raise errors.RequesterError(f'请求路径错误: {e.message}')
|
||||
except openai.RateLimitError as e:
|
||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
||||
except openai.APIError as e:
|
||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
||||
@@ -29,7 +29,10 @@ spec:
|
||||
type: int
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./modelscopechatcmpl.py
|
||||
attr: ModelScopeChatCompletions
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
|
||||
|
||||
from . import chatcmpl
|
||||
from .. import requester
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""Moonshot ChatCompletion API 请求器"""
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.moonshot.cn/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def _closure(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> tuple[provider_message.Message, dict]:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages
|
||||
|
||||
# deepseek 不支持多模态,把content都转换成纯文字
|
||||
for m in messages:
|
||||
if 'content' in m and isinstance(m['content'], list):
|
||||
m['content'] = ' '.join([c['text'] for c in m['content']])
|
||||
|
||||
# 删除空的,不知道干嘛的,直接删了。
|
||||
# messages = [m for m in messages if m["content"].strip() != "" and ('tool_calls' not in m or not m['tool_calls'])]
|
||||
|
||||
args['messages'] = messages
|
||||
|
||||
# 发送请求
|
||||
resp = await self._req(args, extra_body=extra_args)
|
||||
|
||||
# 处理请求结果
|
||||
message = await self._make_msg(resp, remove_think)
|
||||
|
||||
# Extract token usage from response
|
||||
usage_info = {}
|
||||
if hasattr(resp, 'usage') and resp.usage:
|
||||
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
|
||||
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
|
||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
||||
|
||||
return message, usage_info
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: moonshot
|
||||
support_type:
|
||||
- llm
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./moonshotchatcmpl.py
|
||||
attr: MoonshotChatCompletions
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""New API ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'http://localhost:3000/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,8 +22,11 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./newapichatcmpl.py
|
||||
attr: NewAPIChatCompletions
|
||||
|
||||
314
src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
Normal file
314
src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
Normal file
@@ -0,0 +1,314 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import typing
|
||||
from typing import Union, Mapping, Any, AsyncIterator
|
||||
import uuid
|
||||
import json
|
||||
|
||||
import ollama
|
||||
import httpx
|
||||
|
||||
from .. import errors, requester
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
REQUESTER_NAME: str = 'ollama-chat'
|
||||
|
||||
|
||||
class OllamaChatCompletions(requester.ProviderAPIRequester):
|
||||
"""Ollama平台 ChatCompletion API请求器"""
|
||||
|
||||
client: ollama.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'http://127.0.0.1:11434',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def initialize(self):
|
||||
os.environ['OLLAMA_HOST'] = self.requester_cfg['base_url']
|
||||
self.client = ollama.AsyncClient(timeout=self.requester_cfg['timeout'])
|
||||
|
||||
def _infer_model_type(self, model_id: str) -> str:
|
||||
normalized_model_id = (model_id or '').lower()
|
||||
embedding_keywords = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding')
|
||||
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
|
||||
|
||||
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
|
||||
normalized_model_id = (model_id or '').lower()
|
||||
abilities: set[str] = set()
|
||||
details = item.get('details', {}) or {}
|
||||
families = details.get('families', []) or []
|
||||
tokens = [normalized_model_id, str(details.get('family', '')).lower()]
|
||||
tokens.extend(str(family).lower() for family in families)
|
||||
|
||||
if any(keyword in token for token in tokens for keyword in ('vision', 'vl', 'omni', 'llava', 'ocr')):
|
||||
abilities.add('vision')
|
||||
if any(keyword in token for token in tokens for keyword in ('tool', 'function')):
|
||||
abilities.add('func_call')
|
||||
return sorted(abilities)
|
||||
|
||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
||||
del api_key
|
||||
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/api/tags'
|
||||
|
||||
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
|
||||
response = await client.get(models_url)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
models: list[dict[str, typing.Any]] = []
|
||||
for item in payload.get('models', []):
|
||||
model_id = item.get('model') or item.get('name')
|
||||
if not model_id:
|
||||
continue
|
||||
models.append(
|
||||
{
|
||||
'id': model_id,
|
||||
'name': item.get('name', model_id),
|
||||
'type': self._infer_model_type(model_id),
|
||||
'abilities': self._infer_model_abilities(item, model_id),
|
||||
}
|
||||
)
|
||||
|
||||
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
|
||||
return {
|
||||
'models': models,
|
||||
'debug': {
|
||||
'request': {
|
||||
'method': 'GET',
|
||||
'url': models_url,
|
||||
},
|
||||
'response': payload,
|
||||
},
|
||||
}
|
||||
|
||||
async def _req(
|
||||
self,
|
||||
args: dict,
|
||||
) -> Union[Mapping[str, Any], AsyncIterator[Mapping[str, Any]]]:
|
||||
return await self.client.chat(**args)
|
||||
|
||||
async def _closure(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
args = extra_args.copy()
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
messages: list[dict] = req_messages.copy()
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
text_content: list = []
|
||||
image_urls: list = []
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'text':
|
||||
text_content.append(me['text'])
|
||||
elif me['type'] == 'image_base64':
|
||||
image_urls.append(me['image_base64'])
|
||||
|
||||
msg['content'] = '\n'.join(text_content)
|
||||
msg['images'] = [url.split(',')[1] for url in image_urls]
|
||||
if 'tool_calls' in msg: # LangBot 内部以 str 存储 tool_calls 的参数,这里需要转换为 dict
|
||||
for tool_call in msg['tool_calls']:
|
||||
tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
|
||||
args['messages'] = messages
|
||||
|
||||
args['tools'] = []
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
resp = await self._req(args)
|
||||
message: provider_message.Message = await self._make_msg(resp)
|
||||
return message
|
||||
|
||||
async def _make_msg(self, chat_completions: ollama.ChatResponse) -> provider_message.Message:
|
||||
message: ollama.Message = chat_completions.message
|
||||
if message is None:
|
||||
raise ValueError("chat_completions must contain a 'message' field")
|
||||
|
||||
ret_msg: provider_message.Message = None
|
||||
|
||||
if message.content is not None:
|
||||
ret_msg = provider_message.Message(role='assistant', content=message.content)
|
||||
if message.tool_calls is not None and len(message.tool_calls) > 0:
|
||||
tool_calls: list[provider_message.ToolCall] = []
|
||||
|
||||
for tool_call in message.tool_calls:
|
||||
tool_calls.append(
|
||||
provider_message.ToolCall(
|
||||
id=uuid.uuid4().hex,
|
||||
type='function',
|
||||
function=provider_message.FunctionCall(
|
||||
name=tool_call.function.name,
|
||||
arguments=json.dumps(tool_call.function.arguments),
|
||||
),
|
||||
)
|
||||
)
|
||||
ret_msg.tool_calls = tool_calls
|
||||
|
||||
return ret_msg
|
||||
|
||||
async def _prepare_messages(
|
||||
self,
|
||||
messages: typing.List[provider_message.Message],
|
||||
) -> list[dict]:
|
||||
"""Prepare messages for Ollama API request."""
|
||||
req_messages: list = []
|
||||
for m in messages:
|
||||
msg_dict: dict = m.dict(exclude_none=True)
|
||||
content: Any = msg_dict.get('content')
|
||||
if isinstance(content, list):
|
||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
||||
req_messages.append(msg_dict)
|
||||
return req_messages
|
||||
|
||||
async def invoke_llm(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
req_messages = await self._prepare_messages(messages)
|
||||
try:
|
||||
return await self._closure(
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
|
||||
async def invoke_llm_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
req_messages = await self._prepare_messages(messages)
|
||||
|
||||
try:
|
||||
args = extra_args.copy()
|
||||
args['model'] = model.model_entity.name
|
||||
|
||||
# Process messages for Ollama format
|
||||
msgs: list[dict] = req_messages.copy()
|
||||
for msg in msgs:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
text_content: list = []
|
||||
image_urls: list = []
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'text':
|
||||
text_content.append(me['text'])
|
||||
elif me['type'] == 'image_base64':
|
||||
image_urls.append(me['image_base64'])
|
||||
msg['content'] = '\n'.join(text_content)
|
||||
msg['images'] = [url.split(',')[1] for url in image_urls]
|
||||
if 'tool_calls' in msg:
|
||||
for tool_call in msg['tool_calls']:
|
||||
tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
|
||||
args['messages'] = msgs
|
||||
|
||||
args['tools'] = []
|
||||
if funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs)
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
args['stream'] = True
|
||||
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant'
|
||||
|
||||
async for chunk in await self.client.chat(**args):
|
||||
message: ollama.Message = chunk.message
|
||||
done = chunk.done
|
||||
|
||||
delta_content = message.content or ''
|
||||
reasoning_content = getattr(message, 'thinking', '') or ''
|
||||
|
||||
# Handle reasoning/thinking content
|
||||
if reasoning_content:
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
delta_content = '<think>\n' + reasoning_content
|
||||
else:
|
||||
delta_content = reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta_content:
|
||||
thinking_ended = True
|
||||
delta_content = '\n</think>\n' + delta_content
|
||||
|
||||
# Handle tool calls
|
||||
tool_calls_data = None
|
||||
if message.tool_calls:
|
||||
tool_calls_data = []
|
||||
for tc in message.tool_calls:
|
||||
tool_calls_data.append(
|
||||
{
|
||||
'id': uuid.uuid4().hex,
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': tc.function.name,
|
||||
'arguments': json.dumps(tc.function.arguments),
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Skip empty first chunk
|
||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not tool_calls_data:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': tool_calls_data,
|
||||
'is_final': bool(done),
|
||||
}
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
|
||||
async def invoke_embedding(
|
||||
self,
|
||||
model: requester.RuntimeEmbeddingModel,
|
||||
input_text: list[str],
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
) -> list[list[float]]:
|
||||
return (
|
||||
await self.client.embed(
|
||||
model=model.model_entity.name,
|
||||
input=input_text,
|
||||
**extra_args,
|
||||
)
|
||||
).embeddings
|
||||
@@ -22,8 +22,11 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: ollama
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
provider_category: self-hosted
|
||||
execution:
|
||||
python:
|
||||
path: ./ollamachat.py
|
||||
attr: OllamaChatCompletions
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import modelscopechatcmpl
|
||||
|
||||
|
||||
class OpenRouterChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
|
||||
"""OpenRouter ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://openrouter.ai/api/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
||||
original_base_url = self.requester_cfg.get('base_url', '')
|
||||
self.requester_cfg['base_url'] = 'https://openrouter.ai/api/v1'
|
||||
try:
|
||||
return await super().scan_models(api_key)
|
||||
finally:
|
||||
self.requester_cfg['base_url'] = original_base_url
|
||||
@@ -22,9 +22,12 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openrouter
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
- rerank
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./openrouterchatcmpl.py
|
||||
attr: OpenRouterChatCompletions
|
||||
|
||||
208
src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
Normal file
208
src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
Normal file
@@ -0,0 +1,208 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import openai
|
||||
import typing
|
||||
|
||||
from . import chatcmpl
|
||||
from .. import requester
|
||||
import openai.types.chat.chat_completion as chat_completion
|
||||
import re
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
|
||||
|
||||
class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""欧派云 ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.ppinfra.com/v3/openai',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
is_think: bool = False
|
||||
|
||||
async def _make_msg(
|
||||
self,
|
||||
chat_completion: chat_completion.ChatCompletion,
|
||||
remove_think: bool,
|
||||
) -> provider_message.Message:
|
||||
chatcmpl_message = chat_completion.choices[0].message.model_dump()
|
||||
# print(chatcmpl_message.keys(), chatcmpl_message.values())
|
||||
|
||||
# 确保 role 字段存在且不为 None
|
||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
||||
chatcmpl_message['role'] = 'assistant'
|
||||
|
||||
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
|
||||
|
||||
# deepseek的reasoner模型
|
||||
chatcmpl_message['content'] = await self._process_thinking_content(
|
||||
chatcmpl_message['content'], reasoning_content, remove_think
|
||||
)
|
||||
|
||||
# 移除 reasoning_content 字段,避免传递给 Message
|
||||
if 'reasoning_content' in chatcmpl_message:
|
||||
del chatcmpl_message['reasoning_content']
|
||||
|
||||
message = provider_message.Message(**chatcmpl_message)
|
||||
|
||||
return message
|
||||
|
||||
async def _process_thinking_content(
|
||||
self,
|
||||
content: str,
|
||||
reasoning_content: str = None,
|
||||
remove_think: bool = False,
|
||||
) -> tuple[str, str]:
|
||||
"""处理思维链内容
|
||||
|
||||
Args:
|
||||
content: 原始内容
|
||||
reasoning_content: reasoning_content 字段内容
|
||||
remove_think: 是否移除思维链
|
||||
|
||||
Returns:
|
||||
处理后的内容
|
||||
"""
|
||||
if remove_think:
|
||||
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
||||
else:
|
||||
if reasoning_content is not None:
|
||||
content = '<think>\n' + reasoning_content + '\n</think>\n' + content
|
||||
return content
|
||||
|
||||
async def _make_msg_chunk(
|
||||
self,
|
||||
delta: dict[str, typing.Any],
|
||||
idx: int,
|
||||
) -> provider_message.MessageChunk:
|
||||
# 处理流式chunk和完整响应的差异
|
||||
# print(chat_completion.choices[0])
|
||||
|
||||
# 确保 role 字段存在且不为 None
|
||||
if 'role' not in delta or delta['role'] is None:
|
||||
delta['role'] = 'assistant'
|
||||
|
||||
reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
|
||||
|
||||
delta['content'] = '' if delta['content'] is None else delta['content']
|
||||
# print(reasoning_content)
|
||||
|
||||
# deepseek的reasoner模型
|
||||
|
||||
if reasoning_content is not None:
|
||||
delta['content'] += reasoning_content
|
||||
|
||||
message = provider_message.MessageChunk(**delta)
|
||||
|
||||
return message
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
# reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
if remove_think:
|
||||
if delta['content'] is not None:
|
||||
if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
|
||||
thinking_started = True
|
||||
continue
|
||||
elif delta['content'] == r'</think>' and not thinking_ended:
|
||||
thinking_ended = True
|
||||
continue
|
||||
elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
|
||||
thinking_started = False
|
||||
continue
|
||||
elif thinking_started and not thinking_ended:
|
||||
continue
|
||||
|
||||
# delta_tool_calls = None
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] and tool_call['function']['name']:
|
||||
tool_id = tool_call['id']
|
||||
tool_name = tool_call['function']['name']
|
||||
|
||||
if tool_call['id'] is None:
|
||||
tool_call['id'] = tool_id
|
||||
if tool_call['function']['name'] is None:
|
||||
tool_call['function']['name'] = tool_name
|
||||
if tool_call['function']['arguments'] is None:
|
||||
tool_call['function']['arguments'] = ''
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
@@ -29,8 +29,11 @@ spec:
|
||||
type: int
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./ppiochatcmpl.py
|
||||
attr: PPIOChatCompletions
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import openai
|
||||
import typing
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""启航 AI ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.qhaigc.com/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -29,8 +29,11 @@ spec:
|
||||
type: int
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./qhaigcchatcmpl.py
|
||||
attr: QHAIGCChatCompletions
|
||||
|
||||
32
src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
Normal file
32
src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import openai
|
||||
import typing
|
||||
|
||||
from . import chatcmpl
|
||||
import openai.types.chat.chat_completion as chat_completion
|
||||
|
||||
|
||||
class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://router.shengsuanyun.com/api/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def _req(
|
||||
self,
|
||||
args: dict,
|
||||
extra_body: dict = {},
|
||||
) -> chat_completion.ChatCompletion:
|
||||
return await self.client.chat.completions.create(
|
||||
**args,
|
||||
extra_body=extra_body,
|
||||
extra_headers={
|
||||
'HTTP-Referer': 'https://langbot.app',
|
||||
'X-Title': 'LangBot',
|
||||
},
|
||||
)
|
||||
@@ -29,8 +29,11 @@ spec:
|
||||
type: int
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./shengsuanyun.py
|
||||
attr: ShengSuanYunChatCompletions
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class SiliconFlowChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""SiliconFlow ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.siliconflow.cn/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,9 +22,12 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: siliconflow
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
- rerank
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./siliconflowchatcmpl.py
|
||||
attr: SiliconFlowChatCompletions
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class LangBotSpaceChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""LangBot Space ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.langbot.cloud/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,8 +22,11 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./spacechatcmpl.py
|
||||
attr: LangBotSpaceChatCompletions
|
||||
|
||||
@@ -22,8 +22,11 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./tokenponychatcmpl.py
|
||||
attr: TokenPonyChatCompletions
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class TokenPonyChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""TokenPony ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.tokenpony.cn/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class VolcArkChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""火山方舟大模型平台 ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://ark.cn-beijing.volces.com/api/v3',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: openai
|
||||
support_type:
|
||||
- llm
|
||||
provider_category: maas
|
||||
execution:
|
||||
python:
|
||||
path: ./volcarkchatcmpl.py
|
||||
attr: VolcArkChatCompletions
|
||||
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: voyage
|
||||
support_type:
|
||||
- rerank
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./chatcmpl.py
|
||||
attr: OpenAIChatCompletions
|
||||
|
||||
17
src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
Normal file
17
src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class XaiChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""xAI ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.x.ai/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: xai
|
||||
support_type:
|
||||
- llm
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./xaichatcmpl.py
|
||||
attr: XaiChatCompletions
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class ZhipuAIChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""智谱AI ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://open.bigmodel.cn/api/paas/v4',
|
||||
'timeout': 120,
|
||||
}
|
||||
@@ -22,7 +22,10 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
litellm_provider: zhipu
|
||||
support_type:
|
||||
- llm
|
||||
provider_category: manufacturer
|
||||
execution:
|
||||
python:
|
||||
path: ./zhipuaichatcmpl.py
|
||||
attr: ZhipuAIChatCompletions
|
||||
|
||||
Reference in New Issue
Block a user