Merge branch 'rc/new-plugin' into refactor/new-plugin-system

This commit is contained in:
Junyan Qin
2025-08-24 21:40:02 +08:00
232 changed files with 11998 additions and 1440 deletions

View File

@@ -17,7 +17,7 @@ class LLMModelInfo(pydantic.BaseModel):
token_mgr: token.TokenManager
requester: requester.LLMAPIRequester
requester: requester.ProviderAPIRequester
tool_call_supported: typing.Optional[bool] = False

View File

@@ -20,13 +20,16 @@ class ModelManager:
llm_models: list[requester.RuntimeLLMModel]
embedding_models: list[requester.RuntimeEmbeddingModel]
requester_components: list[engine.Component]
requester_dict: dict[str, type[requester.LLMAPIRequester]] # cache
requester_dict: dict[str, type[requester.ProviderAPIRequester]] # cache
def __init__(self, ap: app.Application):
self.ap = ap
self.llm_models = []
self.embedding_models = []
self.requester_components = []
self.requester_dict = {}
@@ -34,7 +37,7 @@ class ModelManager:
self.requester_components = self.ap.discover.get_components_by_kind('LLMAPIRequester')
# forge requester class dict
requester_dict: dict[str, type[requester.LLMAPIRequester]] = {}
requester_dict: dict[str, type[requester.ProviderAPIRequester]] = {}
for component in self.requester_components:
requester_dict[component.metadata.name] = component.get_python_component_class()
@@ -47,13 +50,11 @@ class ModelManager:
self.ap.logger.info('Loading models from db...')
self.llm_models = []
self.embedding_models = []
# llm models
result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.LLMModel))
llm_models = result.all()
# load models
for llm_model in llm_models:
try:
await self.load_llm_model(llm_model)
@@ -62,11 +63,17 @@ class ModelManager:
except Exception as e:
self.ap.logger.error(f'Failed to load model {llm_model.uuid}: {e}\n{traceback.format_exc()}')
# embedding models
result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.EmbeddingModel))
embedding_models = result.all()
for embedding_model in embedding_models:
await self.load_embedding_model(embedding_model)
async def init_runtime_llm_model(
self,
model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict,
):
"""初始化运行时模型"""
"""初始化运行时 LLM 模型"""
if isinstance(model_info, sqlalchemy.Row):
model_info = persistence_model.LLMModel(**model_info._mapping)
elif isinstance(model_info, dict):
@@ -90,31 +97,85 @@ class ModelManager:
return runtime_llm_model
async def init_runtime_embedding_model(
self,
model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict,
):
"""初始化运行时 Embedding 模型"""
if isinstance(model_info, sqlalchemy.Row):
model_info = persistence_model.EmbeddingModel(**model_info._mapping)
elif isinstance(model_info, dict):
model_info = persistence_model.EmbeddingModel(**model_info)
requester_inst = self.requester_dict[model_info.requester](ap=self.ap, config=model_info.requester_config)
await requester_inst.initialize()
runtime_embedding_model = requester.RuntimeEmbeddingModel(
model_entity=model_info,
token_mgr=token.TokenManager(
name=model_info.uuid,
tokens=model_info.api_keys,
),
requester=requester_inst,
)
return runtime_embedding_model
async def load_llm_model(
self,
model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict,
):
"""加载模型"""
"""加载 LLM 模型"""
runtime_llm_model = await self.init_runtime_llm_model(model_info)
self.llm_models.append(runtime_llm_model)
async def load_embedding_model(
self,
model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict,
):
"""加载 Embedding 模型"""
runtime_embedding_model = await self.init_runtime_embedding_model(model_info)
self.embedding_models.append(runtime_embedding_model)
async def get_model_by_uuid(self, uuid: str) -> requester.RuntimeLLMModel:
"""通过uuid获取模型"""
"""通过uuid获取 LLM 模型"""
for model in self.llm_models:
if model.model_entity.uuid == uuid:
return model
raise ValueError(f'model {uuid} not found')
raise ValueError(f'LLM model {uuid} not found')
async def get_embedding_model_by_uuid(self, uuid: str) -> requester.RuntimeEmbeddingModel:
"""通过uuid获取 Embedding 模型"""
for model in self.embedding_models:
if model.model_entity.uuid == uuid:
return model
raise ValueError(f'Embedding model {uuid} not found')
async def remove_llm_model(self, model_uuid: str):
"""移除模型"""
"""移除 LLM 模型"""
for model in self.llm_models:
if model.model_entity.uuid == model_uuid:
self.llm_models.remove(model)
return
def get_available_requesters_info(self) -> list[dict]:
async def remove_embedding_model(self, model_uuid: str):
"""移除 Embedding 模型"""
for model in self.embedding_models:
if model.model_entity.uuid == model_uuid:
self.embedding_models.remove(model)
return
def get_available_requesters_info(self, model_type: str) -> list[dict]:
"""获取所有可用的请求器"""
return [component.to_plain_dict() for component in self.requester_components]
if model_type != '':
return [
component.to_plain_dict()
for component in self.requester_components
if model_type in component.spec['support_type']
]
else:
return [component.to_plain_dict() for component in self.requester_components]
def get_available_requester_info_by_name(self, name: str) -> dict | None:
"""通过名称获取请求器信息"""

View File

@@ -20,22 +20,45 @@ class RuntimeLLMModel:
token_mgr: token.TokenManager
"""api key管理器"""
requester: LLMAPIRequester
requester: ProviderAPIRequester
"""请求器实例"""
def __init__(
self,
model_entity: persistence_model.LLMModel,
token_mgr: token.TokenManager,
requester: LLMAPIRequester,
requester: ProviderAPIRequester,
):
self.model_entity = model_entity
self.token_mgr = token_mgr
self.requester = requester
class LLMAPIRequester(metaclass=abc.ABCMeta):
"""LLM API请求器"""
class RuntimeEmbeddingModel:
"""运行时 Embedding 模型"""
model_entity: persistence_model.EmbeddingModel
"""模型数据"""
token_mgr: token.TokenManager
"""api key管理器"""
requester: ProviderAPIRequester
"""请求器实例"""
def __init__(
self,
model_entity: persistence_model.EmbeddingModel,
token_mgr: token.TokenManager,
requester: ProviderAPIRequester,
):
self.model_entity = model_entity
self.token_mgr = token_mgr
self.requester = requester
class ProviderAPIRequester(metaclass=abc.ABCMeta):
"""Provider API请求器"""
name: str = None
@@ -61,6 +84,7 @@ class LLMAPIRequester(metaclass=abc.ABCMeta):
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
"""调用API
@@ -69,8 +93,50 @@ class LLMAPIRequester(metaclass=abc.ABCMeta):
messages (typing.List[llm_entities.Message]): 消息对象列表
funcs (typing.List[tools_entities.LLMFunction], optional): 使用的工具函数列表. Defaults to None.
extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
remove_think (bool, optional): 是否移思考中的消息. Defaults to False.
Returns:
llm_entities.Message: 返回消息对象
"""
pass
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
"""调用API
Args:
model (RuntimeLLMModel): 使用的模型信息
messages (typing.List[provider_message.Message]): 消息对象列表
funcs (typing.List[resource_tool.LLMTool], optional): 使用的工具函数列表. Defaults to None.
extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
remove_think (bool, optional): 是否移除思考中的消息. Defaults to False.
Returns:
typing.AsyncGenerator[provider_message.MessageChunk]: 返回消息对象
"""
pass
async def invoke_embedding(
self,
model: RuntimeEmbeddingModel,
input_text: typing.List[str],
extra_args: dict[str, typing.Any] = {},
) -> typing.List[typing.List[float]]:
"""调用 Embedding API
Args:
model (RuntimeEmbeddingModel): 使用的模型信息
input_text (typing.List[str]): 输入文本
extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
Returns:
typing.List[typing.List[float]]: 返回的 embedding 向量
"""
pass

View File

@@ -7,7 +7,7 @@ from . import chatcmpl
class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
"""302 AI ChatCompletion API 请求器"""
"""302.AI ChatCompletion API 请求器"""
client: openai.AsyncClient

View File

@@ -3,8 +3,8 @@ kind: LLMAPIRequester
metadata:
name: 302-ai-chat-completions
label:
en_US: 302 AI
zh_Hans: 302 AI
en_US: 302.AI
zh_Hans: 302.AI
icon: 302ai.png
spec:
config:
@@ -22,6 +22,9 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./302aichatcmpl.py

View File

@@ -15,13 +15,13 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class AnthropicMessages(requester.LLMAPIRequester):
class AnthropicMessages(requester.ProviderAPIRequester):
"""Anthropic Messages API 请求器"""
client: anthropic.AsyncAnthropic
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.anthropic.com/v1',
'base_url': 'https://api.anthropic.com',
'timeout': 120,
}
@@ -44,6 +44,7 @@ class AnthropicMessages(requester.LLMAPIRequester):
self.client = anthropic.AsyncAnthropic(
api_key='',
http_client=httpx_client,
base_url=self.requester_cfg['base_url'],
)
async def invoke_llm(
@@ -53,6 +54,7 @@ class AnthropicMessages(requester.LLMAPIRequester):
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
self.client.api_key = model.token_mgr.get_token()
@@ -89,7 +91,8 @@ class AnthropicMessages(requester.LLMAPIRequester):
{
'type': 'tool_result',
'tool_use_id': tool_call_id,
'content': m.content,
'is_error': False,
'content': [{'type': 'text', 'text': m.content}],
}
],
}
@@ -133,6 +136,9 @@ class AnthropicMessages(requester.LLMAPIRequester):
args['messages'] = req_messages
if 'thinking' in args:
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
if funcs:
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
@@ -140,19 +146,17 @@ class AnthropicMessages(requester.LLMAPIRequester):
args['tools'] = tools
try:
# print(json.dumps(args, indent=4, ensure_ascii=False))
resp = await self.client.messages.create(**args)
args = {
'content': '',
'role': resp.role,
}
assert type(resp) is anthropic.types.message.Message
for block in resp.content:
if block.type == 'thinking':
args['content'] = '<think>' + block.thinking + '</think>\n' + args['content']
if not remove_think and block.type == 'thinking':
args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
elif block.type == 'text':
args['content'] += block.text
elif block.type == 'tool_use':
@@ -176,3 +180,191 @@ class AnthropicMessages(requester.LLMAPIRequester):
raise errors.RequesterError(f'模型无效: {e.message}')
else:
raise errors.RequesterError(f'请求地址无效: {e.message}')
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
self.client.api_key = model.token_mgr.get_token()
args = extra_args.copy()
args['model'] = model.model_entity.name
args['stream'] = True
# 处理消息
# system
system_role_message = None
for i, m in enumerate(messages):
if m.role == 'system':
system_role_message = m
break
if system_role_message:
messages.pop(i)
if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
args['system'] = system_role_message.content
req_messages = []
for m in messages:
if m.role == 'tool':
tool_call_id = m.tool_call_id
req_messages.append(
{
'role': 'user',
'content': [
{
'type': 'tool_result',
'tool_use_id': tool_call_id,
'is_error': False, # 暂时直接写false
'content': [
{'type': 'text', 'text': m.content}
], # 这里要是list包裹应该是多个返回的情况type类型好像也可以填其他的暂时只写text
}
],
}
)
continue
msg_dict = m.dict(exclude_none=True)
if isinstance(m.content, str) and m.content.strip() != '':
msg_dict['content'] = [{'type': 'text', 'text': m.content}]
elif isinstance(m.content, list):
for i, ce in enumerate(m.content):
if ce.type == 'image_base64':
image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
alter_image_ele = {
'type': 'image',
'source': {
'type': 'base64',
'media_type': f'image/{image_format}',
'data': image_b64,
},
}
msg_dict['content'][i] = alter_image_ele
if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
msg_dict['content'] = [] # 这里不知道为什么会莫名有个空导致content为字符
if m.tool_calls:
for tool_call in m.tool_calls:
msg_dict['content'].append(
{
'type': 'tool_use',
'id': tool_call.id,
'name': tool_call.function.name,
'input': json.loads(tool_call.function.arguments),
}
)
del msg_dict['tool_calls']
req_messages.append(msg_dict)
if 'thinking' in args:
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
args['messages'] = req_messages
if funcs:
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
if tools:
args['tools'] = tools
try:
role = 'assistant' # 默认角色
# chunk_idx = 0
think_started = False
think_ended = False
finish_reason = False
content = ''
tool_name = ''
tool_id = ''
async for chunk in await self.client.messages.create(**args):
tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
if isinstance(
chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
): # 记录开始
if chunk.content_block.type == 'tool_use':
if chunk.content_block.name is not None:
tool_name = chunk.content_block.name
if chunk.content_block.id is not None:
tool_id = chunk.content_block.id
tool_call['function']['name'] = tool_name
tool_call['function']['arguments'] = ''
tool_call['id'] = tool_id
if not remove_think:
if chunk.content_block.type == 'thinking' and not remove_think:
think_started = True
elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
think_ended = True
continue
elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
if chunk.delta.type == 'thinking_delta':
if think_started:
think_started = False
content = '<think>\n' + chunk.delta.thinking
elif remove_think:
continue
else:
content = chunk.delta.thinking
elif chunk.delta.type == 'text_delta':
if think_ended:
think_ended = False
content = '\n</think>\n' + chunk.delta.text
else:
content = chunk.delta.text
elif chunk.delta.type == 'input_json_delta':
tool_call['function']['arguments'] = chunk.delta.partial_json
tool_call['function']['name'] = tool_name
tool_call['id'] = tool_id
elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
continue # 记录raw_content_block结束的
elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
if chunk.delta.stop_reason == 'end_turn':
finish_reason = True
elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
continue # 这个好像是完全结束
else:
# print(chunk)
self.ap.logger.debug(f'anthropic chunk: {chunk}')
continue
args = {
'content': content,
'role': role,
'is_final': finish_reason,
'tool_calls': None if tool_call['id'] is None else [tool_call],
}
# if chunk_idx == 0:
# chunk_idx += 1
# continue
# assert type(chunk) is anthropic.types.message.Chunk
yield provider_message.MessageChunk(**args)
# return llm_entities.Message(**args)
except anthropic.AuthenticationError as e:
raise errors.RequesterError(f'api-key 无效: {e.message}')
except anthropic.BadRequestError as e:
raise errors.RequesterError(str(e.message))
except anthropic.NotFoundError as e:
if 'model: ' in str(e):
raise errors.RequesterError(f'模型无效: {e.message}')
else:
raise errors.RequesterError(f'请求地址无效: {e.message}')

View File

@@ -14,7 +14,7 @@ spec:
zh_Hans: 基础 URL
type: string
required: true
default: "https://api.anthropic.com/v1"
default: "https://api.anthropic.com"
- name: timeout
label:
en_US: Timeout
@@ -22,6 +22,8 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./anthropicmsgs.py

View File

@@ -22,6 +22,8 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./bailianchatcmpl.py

View File

@@ -13,7 +13,7 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class OpenAIChatCompletions(requester.LLMAPIRequester):
class OpenAIChatCompletions(requester.ProviderAPIRequester):
"""OpenAI ChatCompletion API 请求器"""
client: openai.AsyncClient
@@ -38,9 +38,18 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
) -> chat_completion.ChatCompletion:
return await self.client.chat.completions.create(**args, extra_body=extra_body)
async def _req_stream(
self,
args: dict,
extra_body: dict = {},
):
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
yield chunk
async def _make_msg(
self,
chat_completion: chat_completion.ChatCompletion,
remove_think: bool = False,
) -> provider_message.Message:
chatcmpl_message = chat_completion.choices[0].message.model_dump()
@@ -48,16 +57,191 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
chatcmpl_message['role'] = 'assistant'
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
# 处理思维链
content = chatcmpl_message.get('content', '')
reasoning_content = chatcmpl_message.get('reasoning_content', None)
# deepseek的reasoner模型
if reasoning_content is not None:
chatcmpl_message['content'] = '<think>\n' + reasoning_content + '\n</think>\n' + chatcmpl_message['content']
processed_content, _ = await self._process_thinking_content(
content=content, reasoning_content=reasoning_content, remove_think=remove_think
)
chatcmpl_message['content'] = processed_content
# 移除 reasoning_content 字段,避免传递给 Message
if 'reasoning_content' in chatcmpl_message:
del chatcmpl_message['reasoning_content']
message = provider_message.Message(**chatcmpl_message)
return message
async def _process_thinking_content(
self,
content: str,
reasoning_content: str = None,
remove_think: bool = False,
) -> tuple[str, str]:
"""处理思维链内容
Args:
content: 原始内容
reasoning_content: reasoning_content 字段内容
remove_think: 是否移除思维链
Returns:
(处理后的内容, 提取的思维链内容)
"""
thinking_content = ''
# 1. 从 reasoning_content 提取思维链
if reasoning_content:
thinking_content = reasoning_content
# 2. 从 content 中提取 <think> 标签内容
if content and '<think>' in content and '</think>' in content:
import re
think_pattern = r'<think>(.*?)</think>'
think_matches = re.findall(think_pattern, content, re.DOTALL)
if think_matches:
# 如果已有 reasoning_content则追加
if thinking_content:
thinking_content += '\n' + '\n'.join(think_matches)
else:
thinking_content = '\n'.join(think_matches)
# 移除 content 中的 <think> 标签
content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
# 3. 根据 remove_think 参数决定是否保留思维链
if remove_think:
return content, ''
else:
# 如果有思维链内容,将其以 <think> 格式添加到 content 开头
if thinking_content:
content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
return content, thinking_content
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
self.client.api_key = use_model.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# 流式处理状态
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
tool_id = ''
tool_name = ''
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
reasoning_content = delta.get('reasoning_content', '')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
delta_content = '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
delta_content = reasoning_content
elif thinking_started and not thinking_ended and delta_content:
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
delta_content = '\n</think>\n' + delta_content
# 处理 content 中已有的 <think> 标签(如果需要移除)
# if delta_content and remove_think and '<think>' in delta_content:
# import re
#
# # 移除 <think> 标签及其内容
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
# 处理工具调用增量
# delta_tool_calls = None
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] and tool_call['function']['name']:
tool_id = tool_call['id']
tool_name = tool_call['function']['name']
else:
tool_call['id'] = tool_id
tool_call['function']['name'] = tool_name
if tool_call['type'] is None:
tool_call['type'] = 'function'
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1
async def _closure(
self,
query: pipeline_query.Query,
@@ -65,6 +249,7 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
self.client.api_key = use_model.token_mgr.get_token()
@@ -92,10 +277,10 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
args['messages'] = messages
# 发送请求
resp = await self._req(args, extra_body=extra_args)
resp = await self._req(args, extra_body=extra_args)
# 处理请求结果
message = await self._make_msg(resp)
message = await self._make_msg(resp, remove_think)
return message
@@ -106,6 +291,7 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
@@ -119,13 +305,90 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
req_messages.append(msg_dict)
try:
return await self._closure(
msg = await self._closure(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
)
return msg
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
if 'context_length_exceeded' in e.message:
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
else:
raise errors.RequesterError(f'请求参数错误: {e.message}')
except openai.AuthenticationError as e:
raise errors.RequesterError(f'无效的 api-key: {e.message}')
except openai.NotFoundError as e:
raise errors.RequesterError(f'请求路径错误: {e.message}')
except openai.RateLimitError as e:
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
except openai.APIError as e:
raise errors.RequesterError(f'请求错误: {e.message}')
async def invoke_embedding(
self,
model: requester.RuntimeEmbeddingModel,
input_text: list[str],
extra_args: dict[str, typing.Any] = {},
) -> list[list[float]]:
"""调用 Embedding API"""
self.client.api_key = model.token_mgr.get_token()
args = {
'model': model.model_entity.name,
'input': input_text,
}
if model.model_entity.extra_args:
args.update(model.model_entity.extra_args)
args.update(extra_args)
try:
resp = await self.client.embeddings.create(**args)
return [d.embedding for d in resp.data]
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
raise errors.RequesterError(f'请求参数错误: {e.message}')
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
msg_dict = m.dict(exclude_none=True)
content = msg_dict.get('content')
if isinstance(content, list):
# 检查 content 列表中是否每个部分都是文本
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
# 将所有文本部分合并为一个字符串
msg_dict['content'] = '\n'.join(part['text'] for part in content)
req_messages.append(msg_dict)
try:
async for item in self._closure_stream(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
):
yield item
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:

View File

@@ -22,6 +22,9 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./chatcmpl.py

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
"""CompShare ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.modelverse.cn/v1',
'timeout': 120,
}

View File

@@ -0,0 +1,30 @@
apiVersion: v1
kind: LLMAPIRequester
metadata:
name: compshare-chat-completions
label:
en_US: CompShare
zh_Hans: 优云智算
icon: compshare.png
spec:
config:
- name: base_url
label:
en_US: Base URL
zh_Hans: 基础 URL
type: string
required: true
default: "https://api.modelverse.cn/v1"
- name: timeout
label:
en_US: Timeout
zh_Hans: 超时时间
type: integer
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./compsharechatcmpl.py
attr: CompShareChatCompletions

View File

@@ -24,6 +24,7 @@ class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
self.client.api_key = use_model.token_mgr.get_token()
@@ -49,10 +50,11 @@ class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
# 发送请求
resp = await self._req(args, extra_body=extra_args)
# print(resp)
if resp is None:
raise errors.RequesterError('接口返回为空,请确定模型提供商服务是否正常')
# 处理请求结果
message = await self._make_msg(resp)
message = await self._make_msg(resp, remove_think)
return message

View File

@@ -4,7 +4,7 @@ metadata:
name: deepseek-chat-completions
label:
en_US: DeepSeek
zh_Hans: 深度求索
zh_Hans: DeepSeek
icon: deepseek.svg
spec:
config:
@@ -22,6 +22,8 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./deepseekchatcmpl.py

View File

@@ -4,6 +4,13 @@ import typing
from . import chatcmpl
import uuid
from .. import requester
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
"""Google Gemini API 请求器"""
@@ -12,3 +19,124 @@ class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
'timeout': 120,
}
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
self.client.api_key = use_model.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# 流式处理状态
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
tool_id = ''
tool_name = ''
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
reasoning_content = delta.get('reasoning_content', '')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
delta_content = '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
delta_content = reasoning_content
elif thinking_started and not thinking_ended and delta_content:
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
delta_content = '\n</think>\n' + delta_content
# 处理 content 中已有的 <think> 标签(如果需要移除)
# if delta_content and remove_think and '<think>' in delta_content:
# import re
#
# # 移除 <think> 标签及其内容
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
# 处理工具调用增量
# delta_tool_calls = None
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] == '' and tool_id == '':
tool_id = str(uuid.uuid4())
if tool_call['function']['name']:
tool_name = tool_call['function']['name']
tool_call['id'] = tool_id
tool_call['function']['name'] = tool_name
if tool_call['type'] is None:
tool_call['type'] = 'function'
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1

View File

@@ -22,6 +22,8 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./geminichatcmpl.py

View File

@@ -3,49 +3,13 @@ from __future__ import annotations
import typing
from . import chatcmpl
from .. import requester
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
from . import ppiochatcmpl
class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
"""Gitee AI ChatCompletions API 请求器"""
default_config: dict[str, typing.Any] = {
'base_url': 'https://ai.gitee.com/v1',
'timeout': 120,
}
async def _closure(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
) -> provider_message.Message:
self.client.api_key = use_model.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# gitee 不支持多模态把content都转换成纯文字
for m in req_messages:
if 'content' in m and isinstance(m['content'], list):
m['content'] = ' '.join([c['text'] for c in m['content']])
args['messages'] = req_messages
resp = await self._req(args, extra_body=extra_args)
message = await self._make_msg(resp)
return message

View File

@@ -22,6 +22,9 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./giteeaichatcmpl.py

View File

@@ -22,6 +22,9 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./lmstudiochatcmpl.py

View File

@@ -5,7 +5,6 @@ import typing
import openai
import openai.types.chat.chat_completion as chat_completion
import openai.types.chat.chat_completion_message_tool_call as chat_completion_message_tool_call
import httpx
from .. import entities, errors, requester
@@ -14,7 +13,7 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class ModelScopeChatCompletions(requester.LLMAPIRequester):
class ModelScopeChatCompletions(requester.ProviderAPIRequester):
"""ModelScope ChatCompletion API 请求器"""
client: openai.AsyncClient
@@ -34,9 +33,11 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
async def _req(
self,
query: pipeline_query.Query,
args: dict,
extra_body: dict = {},
) -> chat_completion.ChatCompletion:
remove_think: bool = False,
) -> list[dict[str, typing.Any]]:
args['stream'] = True
chunk = None
@@ -47,73 +48,71 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)
chunk_idx = 0
thinking_started = False
thinking_ended = False
tool_id = ''
tool_name = ''
message_delta = {}
async for chunk in resp_gen:
# print(chunk)
if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
continue
if chunk.choices[0].delta.content is not None:
pending_content += chunk.choices[0].delta.content
delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
reasoning_content = delta.get('reasoning_content')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
if chunk.choices[0].delta.tool_calls is not None:
for tool_call in chunk.choices[0].delta.tool_calls:
if tool_call.function.arguments is None:
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
pending_content += '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
pending_content += reasoning_content
elif thinking_started and not thinking_ended and delta.get('content'):
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
pending_content += '\n</think>\n' + delta.get('content')
if delta.get('content') is not None:
pending_content += delta.get('content')
if delta.get('tool_calls') is not None:
for tool_call in delta.get('tool_calls'):
if tool_call['id'] != '':
tool_id = tool_call['id']
if tool_call['function']['name'] is not None:
tool_name = tool_call['function']['name']
if tool_call['function']['arguments'] is None:
continue
tool_call['id'] = tool_id
tool_call['name'] = tool_name
for tc in tool_calls:
if tc.index == tool_call.index:
tc.function.arguments += tool_call.function.arguments
if tc['index'] == tool_call['index']:
tc['function']['arguments'] += tool_call['function']['arguments']
break
else:
tool_calls.append(tool_call)
if chunk.choices[0].finish_reason is not None:
break
message_delta['content'] = pending_content
message_delta['role'] = 'assistant'
real_tool_calls = []
for tc in tool_calls:
function = chat_completion_message_tool_call.Function(
name=tc.function.name, arguments=tc.function.arguments
)
real_tool_calls.append(
chat_completion_message_tool_call.ChatCompletionMessageToolCall(
id=tc.id, function=function, type='function'
)
)
return (
chat_completion.ChatCompletion(
id=chunk.id,
object='chat.completion',
created=chunk.created,
choices=[
chat_completion.Choice(
index=0,
message=chat_completion.ChatCompletionMessage(
role='assistant',
content=pending_content,
tool_calls=real_tool_calls if len(real_tool_calls) > 0 else None,
),
finish_reason=chunk.choices[0].finish_reason
if hasattr(chunk.choices[0], 'finish_reason') and chunk.choices[0].finish_reason is not None
else 'stop',
logprobs=chunk.choices[0].logprobs,
)
],
model=chunk.model,
service_tier=chunk.service_tier if hasattr(chunk, 'service_tier') else None,
system_fingerprint=chunk.system_fingerprint if hasattr(chunk, 'system_fingerprint') else None,
usage=chunk.usage if hasattr(chunk, 'usage') else None,
)
if chunk
else None
)
message_delta['tool_calls'] = tool_calls if tool_calls else None
return [message_delta]
async def _make_msg(
self,
chat_completion: chat_completion.ChatCompletion,
chat_completion: list[dict[str, typing.Any]],
) -> provider_message.Message:
chatcmpl_message = chat_completion.choices[0].message.dict()
chatcmpl_message = chat_completion[0]
# 确保 role 字段存在且不为 None
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
@@ -130,6 +129,7 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
self.client.api_key = use_model.token_mgr.get_token()
@@ -157,13 +157,145 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
args['messages'] = messages
# 发送请求
resp = await self._req(args, extra_body=extra_args)
resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)
# 处理请求结果
message = await self._make_msg(resp)
return message
async def _req_stream(
self,
args: dict,
extra_body: dict = {},
) -> chat_completion.ChatCompletion:
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
yield chunk
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
self.client.api_key = use_model.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# 流式处理状态
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
reasoning_content = delta.get('reasoning_content', '')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
delta_content = '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
delta_content = reasoning_content
elif thinking_started and not thinking_ended and delta_content:
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
delta_content = '\n</think>\n' + delta_content
# 处理 content 中已有的 <think> 标签(如果需要移除)
# if delta_content and remove_think and '<think>' in delta_content:
# import re
#
# # 移除 <think> 标签及其内容
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
# 处理工具调用增量
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] != '':
tool_id = tool_call['id']
if tool_call['function']['name'] is not None:
tool_name = tool_call['function']['name']
if tool_call['type'] is None:
tool_call['type'] = 'function'
tool_call['id'] = tool_id
tool_call['function']['name'] = tool_name
tool_call['function']['arguments'] = (
'' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
)
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1
# return
async def invoke_llm(
self,
query: pipeline_query.Query,
@@ -171,6 +303,7 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
@@ -185,7 +318,12 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
try:
return await self._closure(
query=query, req_messages=req_messages, use_model=model, use_funcs=funcs, extra_args=extra_args
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
)
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
@@ -202,3 +340,50 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
except openai.APIError as e:
raise errors.RequesterError(f'请求错误: {e.message}')
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
msg_dict = m.dict(exclude_none=True)
content = msg_dict.get('content')
if isinstance(content, list):
# 检查 content 列表中是否每个部分都是文本
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
# 将所有文本部分合并为一个字符串
msg_dict['content'] = '\n'.join(part['text'] for part in content)
req_messages.append(msg_dict)
try:
async for item in self._closure_stream(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
):
yield item
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
if 'context_length_exceeded' in e.message:
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
else:
raise errors.RequesterError(f'请求参数错误: {e.message}')
except openai.AuthenticationError as e:
raise errors.RequesterError(f'无效的 api-key: {e.message}')
except openai.NotFoundError as e:
raise errors.RequesterError(f'请求路径错误: {e.message}')
except openai.RateLimitError as e:
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
except openai.APIError as e:
raise errors.RequesterError(f'请求错误: {e.message}')

View File

@@ -29,6 +29,8 @@ spec:
type: int
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./modelscopechatcmpl.py

View File

@@ -25,6 +25,7 @@ class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
self.client.api_key = use_model.token_mgr.get_token()
@@ -54,6 +55,6 @@ class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
resp = await self._req(args, extra_body=extra_args)
# 处理请求结果
message = await self._make_msg(resp)
message = await self._make_msg(resp, remove_think)
return message

View File

@@ -14,7 +14,7 @@ spec:
zh_Hans: 基础 URL
type: string
required: true
default: "https://api.moonshot.com/v1"
default: "https://api.moonshot.ai/v1"
- name: timeout
label:
en_US: Timeout
@@ -22,6 +22,8 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./moonshotchatcmpl.py

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import typing
import openai
from . import chatcmpl
class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
"""New API ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'http://localhost:3000/v1',
'timeout': 120,
}

View File

@@ -0,0 +1,31 @@
apiVersion: v1
kind: LLMAPIRequester
metadata:
name: new-api-chat-completions
label:
en_US: New API
zh_Hans: New API
icon: newapi.png
spec:
config:
- name: base_url
label:
en_US: Base URL
zh_Hans: 基础 URL
type: string
required: true
default: "http://localhost:3000/v1"
- name: timeout
label:
en_US: Timeout
zh_Hans: 超时时间
type: integer
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./newapichatcmpl.py
attr: NewAPIChatCompletions

View File

@@ -17,7 +17,7 @@ import langbot_plugin.api.entities.builtin.provider.message as provider_message
REQUESTER_NAME: str = 'ollama-chat'
class OllamaChatCompletions(requester.LLMAPIRequester):
class OllamaChatCompletions(requester.ProviderAPIRequester):
"""Ollama平台 ChatCompletion API请求器"""
client: ollama.AsyncClient
@@ -44,6 +44,7 @@ class OllamaChatCompletions(requester.LLMAPIRequester):
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
args = extra_args.copy()
args['model'] = use_model.model_entity.name
@@ -110,6 +111,7 @@ class OllamaChatCompletions(requester.LLMAPIRequester):
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
req_messages: list = []
for m in messages:
@@ -126,6 +128,21 @@ class OllamaChatCompletions(requester.LLMAPIRequester):
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
)
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
async def invoke_embedding(
self,
model: requester.RuntimeEmbeddingModel,
input_text: list[str],
extra_args: dict[str, typing.Any] = {},
) -> list[list[float]]:
return (
await self.client.embed(
model=model.model_entity.name,
input=input_text,
**extra_args,
)
).embeddings

View File

@@ -22,6 +22,9 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./ollamachat.py

View File

@@ -22,6 +22,9 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./openrouterchatcmpl.py

View File

@@ -4,6 +4,12 @@ import openai
import typing
from . import chatcmpl
from .. import requester
import openai.types.chat.chat_completion as chat_completion
import re
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
@@ -15,3 +21,188 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
'base_url': 'https://api.ppinfra.com/v3/openai',
'timeout': 120,
}
is_think: bool = False
async def _make_msg(
self,
chat_completion: chat_completion.ChatCompletion,
remove_think: bool,
) -> provider_message.Message:
chatcmpl_message = chat_completion.choices[0].message.model_dump()
# print(chatcmpl_message.keys(), chatcmpl_message.values())
# 确保 role 字段存在且不为 None
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
chatcmpl_message['role'] = 'assistant'
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
# deepseek的reasoner模型
chatcmpl_message['content'] = await self._process_thinking_content(
chatcmpl_message['content'], reasoning_content, remove_think
)
# 移除 reasoning_content 字段,避免传递给 Message
if 'reasoning_content' in chatcmpl_message:
del chatcmpl_message['reasoning_content']
message = provider_message.Message(**chatcmpl_message)
return message
async def _process_thinking_content(
self,
content: str,
reasoning_content: str = None,
remove_think: bool = False,
) -> tuple[str, str]:
"""处理思维链内容
Args:
content: 原始内容
reasoning_content: reasoning_content 字段内容
remove_think: 是否移除思维链
Returns:
处理后的内容
"""
if remove_think:
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
else:
if reasoning_content is not None:
content = '<think>\n' + reasoning_content + '\n</think>\n' + content
return content
async def _make_msg_chunk(
self,
delta: dict[str, typing.Any],
idx: int,
) -> provider_message.MessageChunk:
# 处理流式chunk和完整响应的差异
# print(chat_completion.choices[0])
# 确保 role 字段存在且不为 None
if 'role' not in delta or delta['role'] is None:
delta['role'] = 'assistant'
reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
delta['content'] = '' if delta['content'] is None else delta['content']
# print(reasoning_content)
# deepseek的reasoner模型
if reasoning_content is not None:
delta['content'] += reasoning_content
message = provider_message.MessageChunk(**delta)
return message
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
self.client.api_key = use_model.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
# reasoning_content = delta.get('reasoning_content', '')
if remove_think:
if delta['content'] is not None:
if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
thinking_started = True
continue
elif delta['content'] == r'</think>' and not thinking_ended:
thinking_ended = True
continue
elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
thinking_started = False
continue
elif thinking_started and not thinking_ended:
continue
# delta_tool_calls = None
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] and tool_call['function']['name']:
tool_id = tool_call['id']
tool_name = tool_call['function']['name']
if tool_call['id'] is None:
tool_call['id'] = tool_id
if tool_call['function']['name'] is None:
tool_call['function']['name'] = tool_name
if tool_call['function']['arguments'] is None:
tool_call['function']['arguments'] = ''
if tool_call['type'] is None:
tool_call['type'] = 'function'
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1

View File

@@ -29,6 +29,9 @@ spec:
type: int
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./ppiochatcmpl.py

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
import openai
import typing
from . import chatcmpl
class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
"""启航 AI ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.qhaigc.com/v1',
'timeout': 120,
}

View File

@@ -0,0 +1,38 @@
apiVersion: v1
kind: LLMAPIRequester
metadata:
name: qhaigc-chat-completions
label:
en_US: QH AI
zh_Hans: 启航 AI
icon: qhaigc.png
spec:
config:
- name: base_url
label:
en_US: Base URL
zh_Hans: 基础 URL
type: string
required: true
default: "https://api.qhaigc.net/v1"
- name: args
label:
en_US: Args
zh_Hans: 附加参数
type: object
required: true
default: {}
- name: timeout
label:
en_US: Timeout
zh_Hans: 超时时间
type: int
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./qhaigcchatcmpl.py
attr: QHAIGCChatCompletions

View File

@@ -0,0 +1,32 @@
from __future__ import annotations
import openai
import typing
from . import chatcmpl
import openai.types.chat.chat_completion as chat_completion
class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
"""胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://router.shengsuanyun.com/api/v1',
'timeout': 120,
}
async def _req(
self,
args: dict,
extra_body: dict = {},
) -> chat_completion.ChatCompletion:
return await self.client.chat.completions.create(
**args,
extra_body=extra_body,
extra_headers={
'HTTP-Referer': 'https://langbot.app',
'X-Title': 'LangBot',
},
)

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 7.4 KiB

View File

@@ -0,0 +1,38 @@
apiVersion: v1
kind: LLMAPIRequester
metadata:
name: shengsuanyun-chat-completions
label:
en_US: ShengSuanYun
zh_Hans: 胜算云
icon: shengsuanyun.svg
spec:
config:
- name: base_url
label:
en_US: Base URL
zh_Hans: 基础 URL
type: string
required: true
default: "https://router.shengsuanyun.com/api/v1"
- name: args
label:
en_US: Args
zh_Hans: 附加参数
type: object
required: true
default: {}
- name: timeout
label:
en_US: Timeout
zh_Hans: 超时时间
type: int
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./shengsuanyun.py
attr: ShengSuanYunChatCompletions

View File

@@ -22,6 +22,9 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
- text-embedding
execution:
python:
path: ./siliconflowchatcmpl.py

View File

@@ -22,6 +22,8 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./volcarkchatcmpl.py

View File

@@ -22,6 +22,8 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./xaichatcmpl.py

View File

@@ -22,6 +22,8 @@ spec:
type: integer
required: true
default: 120
support_type:
- llm
execution:
python:
path: ./zhipuaichatcmpl.py