From 8dd16aac5137038e95ea499a4219f5aab21215c2 Mon Sep 17 00:00:00 2001 From: huanghuoguoguo <1051233107@qq.com> Date: Mon, 4 May 2026 12:19:18 +0800 Subject: [PATCH] restore: restore deleted provider requester files Restore individual provider requester implementations that were removed in de61b5d3. These files coexist with the unified litellmchat.py backend. Co-Authored-By: Claude Opus 4.7 --- .../modelmgr/requesters/302aichatcmpl.py | 17 + .../modelmgr/requesters/302aichatcmpl.yaml | 5 +- .../modelmgr/requesters/anthropicmsgs.py | 370 +++++++++ .../modelmgr/requesters/anthropicmsgs.yaml | 5 +- .../modelmgr/requesters/bailianchatcmpl.py | 242 ++++++ .../modelmgr/requesters/bailianchatcmpl.yaml | 5 +- .../provider/modelmgr/requesters/chatcmpl.py | 702 ++++++++++++++++++ .../modelmgr/requesters/chatcmpl.yaml | 6 +- .../modelmgr/requesters/coherererank.yaml | 5 +- .../modelmgr/requesters/compsharechatcmpl.py | 17 + .../requesters/compsharechatcmpl.yaml | 5 +- .../modelmgr/requesters/deepseekchatcmpl.py | 67 ++ .../modelmgr/requesters/deepseekchatcmpl.yaml | 5 +- .../modelmgr/requesters/geminichatcmpl.py | 205 +++++ .../modelmgr/requesters/geminichatcmpl.yaml | 5 +- .../modelmgr/requesters/giteeaichatcmpl.py | 15 + .../modelmgr/requesters/giteeaichatcmpl.yaml | 5 +- .../modelmgr/requesters/jiekouaichatcmpl.py | 208 ++++++ .../modelmgr/requesters/jiekouaichatcmpl.yaml | 5 +- .../modelmgr/requesters/jinarerank.yaml | 5 +- .../modelmgr/requesters/lmstudiochatcmpl.py | 17 + .../modelmgr/requesters/lmstudiochatcmpl.yaml | 5 +- .../modelmgr/requesters/modelscopechatcmpl.py | 561 ++++++++++++++ .../requesters/modelscopechatcmpl.yaml | 5 +- .../modelmgr/requesters/moonshotchatcmpl.py | 67 ++ .../modelmgr/requesters/moonshotchatcmpl.yaml | 5 +- .../modelmgr/requesters/newapichatcmpl.py | 17 + .../modelmgr/requesters/newapichatcmpl.yaml | 5 +- .../modelmgr/requesters/ollamachat.py | 314 ++++++++ .../modelmgr/requesters/ollamachat.yaml | 5 +- .../modelmgr/requesters/openrouterchatcmpl.py | 25 + .../requesters/openrouterchatcmpl.yaml | 5 +- .../modelmgr/requesters/ppiochatcmpl.py | 208 ++++++ .../modelmgr/requesters/ppiochatcmpl.yaml | 5 +- .../modelmgr/requesters/qhaigcchatcmpl.py | 17 + .../modelmgr/requesters/qhaigcchatcmpl.yaml | 5 +- .../modelmgr/requesters/shengsuanyun.py | 32 + .../modelmgr/requesters/shengsuanyun.yaml | 5 +- .../requesters/siliconflowchatcmpl.py | 17 + .../requesters/siliconflowchatcmpl.yaml | 5 +- .../modelmgr/requesters/spacechatcmpl.py | 17 + .../modelmgr/requesters/spacechatcmpl.yaml | 5 +- .../modelmgr/requesters/tokenpony.yaml | 5 +- .../modelmgr/requesters/tokenponychatcmpl.py | 17 + .../modelmgr/requesters/volcarkchatcmpl.py | 17 + .../modelmgr/requesters/volcarkchatcmpl.yaml | 5 +- .../modelmgr/requesters/voyageairerank.yaml | 5 +- .../modelmgr/requesters/xaichatcmpl.py | 17 + .../modelmgr/requesters/xaichatcmpl.yaml | 5 +- .../modelmgr/requesters/zhipuaichatcmpl.py | 17 + .../modelmgr/requesters/zhipuaichatcmpl.yaml | 5 +- 51 files changed, 3311 insertions(+), 28 deletions(-) create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py create mode 100644 src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py diff --git a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py new file mode 100644 index 00000000..40a41718 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions): + """302.AI ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.302.ai/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml index 4f3acf31..e4f70cae 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml @@ -22,9 +22,12 @@ spec: type: integer required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding - rerank provider_category: maas +execution: + python: + path: ./302aichatcmpl.py + attr: AI302ChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py new file mode 100644 index 00000000..1428dc88 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py @@ -0,0 +1,370 @@ +from __future__ import annotations + +import typing +import json +import platform +import socket +import anthropic +import httpx + +from .. import errors, requester + +from ....utils import image +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.provider.message as provider_message + + +class AnthropicMessages(requester.ProviderAPIRequester): + """Anthropic Messages API 请求器""" + + client: anthropic.AsyncAnthropic + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.anthropic.com', + 'timeout': 120, + } + + async def initialize(self): + # 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题 + if platform.system() == 'Windows': + if not hasattr(socket, 'TCP_KEEPINTVL'): + socket.TCP_KEEPINTVL = 0 + if not hasattr(socket, 'TCP_KEEPCNT'): + socket.TCP_KEEPCNT = 0 + httpx_client = anthropic._base_client.AsyncHttpxClientWrapper( + base_url=self.requester_cfg['base_url'], + # cast to a valid type because mypy doesn't understand our type narrowing + timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']), + limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS, + follow_redirects=True, + trust_env=True, + ) + + self.client = anthropic.AsyncAnthropic( + api_key='', + http_client=httpx_client, + base_url=self.requester_cfg['base_url'], + ) + + async def invoke_llm( + self, + query: pipeline_query.Query, + model: requester.RuntimeLLMModel, + messages: typing.List[provider_message.Message], + funcs: typing.List[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.Message: + self.client.api_key = model.provider.token_mgr.get_token() + + args = extra_args.copy() + args['model'] = model.model_entity.name + + # 处理消息 + + # system + system_role_message = None + + for i, m in enumerate(messages): + if m.role == 'system': + system_role_message = m + + break + + if system_role_message: + messages.pop(i) + + if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str): + args['system'] = system_role_message.content + + req_messages = [] + + for m in messages: + if m.role == 'tool': + tool_call_id = m.tool_call_id + + req_messages.append( + { + 'role': 'user', + 'content': [ + { + 'type': 'tool_result', + 'tool_use_id': tool_call_id, + 'is_error': False, + 'content': [{'type': 'text', 'text': m.content}], + } + ], + } + ) + + continue + + msg_dict = m.dict(exclude_none=True) + + if isinstance(m.content, str) and m.content.strip() != '': + msg_dict['content'] = [{'type': 'text', 'text': m.content}] + elif isinstance(m.content, list): + for i, ce in enumerate(m.content): + if ce.type == 'image_base64': + image_b64, image_format = await image.extract_b64_and_format(ce.image_base64) + + alter_image_ele = { + 'type': 'image', + 'source': { + 'type': 'base64', + 'media_type': f'image/{image_format}', + 'data': image_b64, + }, + } + msg_dict['content'][i] = alter_image_ele + + if m.tool_calls: + for tool_call in m.tool_calls: + msg_dict['content'].append( + { + 'type': 'tool_use', + 'id': tool_call.id, + 'name': tool_call.function.name, + 'input': json.loads(tool_call.function.arguments), + } + ) + + del msg_dict['tool_calls'] + + req_messages.append(msg_dict) + + args['messages'] = req_messages + + if 'thinking' in args: + args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000} + + if funcs: + tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs) + + if tools: + args['tools'] = tools + + try: + resp = await self.client.messages.create(**args) + + args = { + 'content': '', + 'role': resp.role, + } + assert type(resp) is anthropic.types.message.Message + + for block in resp.content: + if not remove_think and block.type == 'thinking': + args['content'] = '\n' + block.thinking + '\n\n' + args['content'] + elif block.type == 'text': + args['content'] += block.text + elif block.type == 'tool_use': + assert type(block) is anthropic.types.tool_use_block.ToolUseBlock + tool_call = provider_message.ToolCall( + id=block.id, + type='function', + function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)), + ) + if 'tool_calls' not in args: + args['tool_calls'] = [] + args['tool_calls'].append(tool_call) + + return provider_message.Message(**args) + except anthropic.AuthenticationError as e: + raise errors.RequesterError(f'api-key 无效: {e.message}') + except anthropic.BadRequestError as e: + raise errors.RequesterError(str(e.message)) + except anthropic.NotFoundError as e: + if 'model: ' in str(e): + raise errors.RequesterError(f'模型无效: {e.message}') + else: + raise errors.RequesterError(f'请求地址无效: {e.message}') + + async def invoke_llm_stream( + self, + query: pipeline_query.Query, + model: requester.RuntimeLLMModel, + messages: typing.List[provider_message.Message], + funcs: typing.List[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.Message: + self.client.api_key = model.provider.token_mgr.get_token() + + args = extra_args.copy() + args['model'] = model.model_entity.name + args['stream'] = True + + # 处理消息 + + # system + system_role_message = None + + for i, m in enumerate(messages): + if m.role == 'system': + system_role_message = m + + break + + if system_role_message: + messages.pop(i) + + if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str): + args['system'] = system_role_message.content + + req_messages = [] + + for m in messages: + if m.role == 'tool': + tool_call_id = m.tool_call_id + + req_messages.append( + { + 'role': 'user', + 'content': [ + { + 'type': 'tool_result', + 'tool_use_id': tool_call_id, + 'is_error': False, # 暂时直接写false + 'content': [ + {'type': 'text', 'text': m.content} + ], # 这里要是list包裹,应该是多个返回的情况?type类型好像也可以填其他的,暂时只写text + } + ], + } + ) + + continue + + msg_dict = m.dict(exclude_none=True) + + if isinstance(m.content, str) and m.content.strip() != '': + msg_dict['content'] = [{'type': 'text', 'text': m.content}] + elif isinstance(m.content, list): + for i, ce in enumerate(m.content): + if ce.type == 'image_base64': + image_b64, image_format = await image.extract_b64_and_format(ce.image_base64) + + alter_image_ele = { + 'type': 'image', + 'source': { + 'type': 'base64', + 'media_type': f'image/{image_format}', + 'data': image_b64, + }, + } + msg_dict['content'][i] = alter_image_ele + if isinstance(msg_dict['content'], str) and msg_dict['content'] == '': + msg_dict['content'] = [] # 这里不知道为什么会莫名有个空导致content为字符 + if m.tool_calls: + for tool_call in m.tool_calls: + msg_dict['content'].append( + { + 'type': 'tool_use', + 'id': tool_call.id, + 'name': tool_call.function.name, + 'input': json.loads(tool_call.function.arguments), + } + ) + + del msg_dict['tool_calls'] + + req_messages.append(msg_dict) + if 'thinking' in args: + args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000} + + args['messages'] = req_messages + + if funcs: + tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs) + + if tools: + args['tools'] = tools + + try: + role = 'assistant' # 默认角色 + # chunk_idx = 0 + think_started = False + think_ended = False + finish_reason = False + tool_name = '' + tool_id = '' + async for chunk in await self.client.messages.create(**args): + content = '' + tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'} + if isinstance( + chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent + ): # 记录开始 + if chunk.content_block.type == 'tool_use': + if chunk.content_block.name is not None: + tool_name = chunk.content_block.name + if chunk.content_block.id is not None: + tool_id = chunk.content_block.id + + tool_call['function']['name'] = tool_name + tool_call['function']['arguments'] = '' + tool_call['id'] = tool_id + + if not remove_think: + if chunk.content_block.type == 'thinking' and not remove_think: + think_started = True + elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think: + think_ended = True + continue + elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent): + if chunk.delta.type == 'thinking_delta': + if think_started: + think_started = False + content = '\n' + chunk.delta.thinking + elif remove_think: + continue + else: + content = chunk.delta.thinking + elif chunk.delta.type == 'text_delta': + if think_ended: + think_ended = False + content = '\n\n' + chunk.delta.text + else: + content = chunk.delta.text + elif chunk.delta.type == 'input_json_delta': + tool_call['function']['arguments'] = chunk.delta.partial_json + tool_call['function']['name'] = tool_name + tool_call['id'] = tool_id + elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent): + continue # 记录raw_content_block结束的 + + elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent): + if chunk.delta.stop_reason == 'end_turn': + finish_reason = True + elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent): + continue # 这个好像是完全结束 + else: + # print(chunk) + self.ap.logger.debug(f'anthropic chunk: {chunk}') + continue + + args = { + 'content': content, + 'role': role, + 'is_final': finish_reason, + 'tool_calls': None if tool_call['id'] is None else [tool_call], + } + # if chunk_idx == 0: + # chunk_idx += 1 + # continue + + # assert type(chunk) is anthropic.types.message.Chunk + + yield provider_message.MessageChunk(**args) + + # return llm_entities.Message(**args) + except anthropic.AuthenticationError as e: + raise errors.RequesterError(f'api-key 无效: {e.message}') + except anthropic.BadRequestError as e: + raise errors.RequesterError(str(e.message)) + except anthropic.NotFoundError as e: + if 'model: ' in str(e): + raise errors.RequesterError(f'模型无效: {e.message}') + else: + raise errors.RequesterError(f'请求地址无效: {e.message}') diff --git a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml index c4785fd8..0ef60d3e 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: anthropic support_type: - llm provider_category: manufacturer +execution: + python: + path: ./anthropicmsgs.py + attr: AnthropicMessages diff --git a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py new file mode 100644 index 00000000..9da6e1b4 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +import typing +import dashscope +import openai + +from . import modelscopechatcmpl +from .. import requester +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.provider.message as provider_message + + +class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions): + """阿里云百炼大模型平台 ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1', + 'timeout': 120, + } + + async def _closure_stream( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages.copy() + + is_use_dashscope_call = False # 是否使用阿里原生库调用 + is_enable_multi_model = True # 是否支持多轮对话 + use_time_num = 0 # 模型已调用次数,防止存在多文件时重复调用 + use_time_ids = [] # 已调用的ID列表 + message_id = 0 # 记录消息序号 + + for msg in messages: + # print(msg) + if 'content' in msg and isinstance(msg['content'], list): + for me in msg['content']: + if me['type'] == 'image_base64': + me['image_url'] = {'url': me['image_base64']} + me['type'] = 'image_url' + del me['image_base64'] + elif me['type'] == 'file_url' and '.' in me.get('file_name', ''): + # 1. 视频文件推理 + # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871 + file_type = me.get('file_name').lower().split('.')[-1] + if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']: + me['type'] = 'video_url' + me['video_url'] = {'url': me['file_url']} + del me['file_url'] + del me['file_name'] + use_time_num += 1 + use_time_ids.append(message_id) + is_enable_multi_model = False + # 2. 语音文件识别, 无法通过openai的audio字段传递,暂时不支持 + # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031 + elif file_type in [ + 'aac', + 'amr', + 'aiff', + 'flac', + 'm4a', + 'mp3', + 'mpeg', + 'ogg', + 'opus', + 'wav', + 'webm', + 'wma', + ]: + me['audio'] = me['file_url'] + me['type'] = 'audio' + del me['file_url'] + del me['type'] + del me['file_name'] + is_use_dashscope_call = True + use_time_num += 1 + use_time_ids.append(message_id) + is_enable_multi_model = False + message_id += 1 + + # 使用列表推导式,保留不在 use_time_ids[:-1] 中的元素,仅保留最后一个多媒体消息 + if not is_enable_multi_model and use_time_num > 1: + messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]] + + if not is_enable_multi_model: + messages = [msg for msg in messages if 'resp_message_id' not in msg] + + args['messages'] = messages + args['stream'] = True + + # 流式处理状态 + # tool_calls_map: dict[str, provider_message.ToolCall] = {} + chunk_idx = 0 + thinking_started = False + thinking_ended = False + role = 'assistant' # 默认角色 + + if is_use_dashscope_call: + response = dashscope.MultiModalConversation.call( + # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key = "sk-xxx" + api_key=use_model.provider.token_mgr.get_token(), + model=use_model.model_entity.name, + messages=messages, + result_format='message', + asr_options={ + # "language": "zh", # 可选,若已知音频的语种,可通过该参数指定待识别语种,以提升识别准确率 + 'enable_lid': True, + 'enable_itn': False, + }, + stream=True, + ) + content_length_list = [] + previous_length = 0 # 记录上一次的内容长度 + for res in response: + chunk = res['output'] + # 解析 chunk 数据 + if hasattr(chunk, 'choices') and chunk.choices: + choice = chunk.choices[0] + delta_content = choice['message'].content[0]['text'] + finish_reason = choice['finish_reason'] + content_length_list.append(len(delta_content)) + else: + delta_content = '' + finish_reason = None + + # 跳过空的第一个 chunk(只有 role 没有内容) + if chunk_idx == 0 and not delta_content: + chunk_idx += 1 + continue + + # 检查 content_length_list 是否有足够的数据 + if len(content_length_list) >= 2: + now_content = delta_content[previous_length : content_length_list[-1]] + previous_length = content_length_list[-1] # 更新上一次的长度 + else: + now_content = delta_content # 第一次循环时直接使用 delta_content + previous_length = len(delta_content) # 更新上一次的长度 + + # 构建 MessageChunk - 只包含增量内容 + chunk_data = { + 'role': role, + 'content': now_content if now_content else None, + 'is_final': bool(finish_reason) and finish_reason != 'null', + } + + # 移除 None 值 + chunk_data = {k: v for k, v in chunk_data.items() if v is not None} + yield provider_message.MessageChunk(**chunk_data) + chunk_idx += 1 + else: + async for chunk in self._req_stream(args, extra_body=extra_args): + # 解析 chunk 数据 + if hasattr(chunk, 'choices') and chunk.choices: + choice = chunk.choices[0] + delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} + finish_reason = getattr(choice, 'finish_reason', None) + else: + delta = {} + finish_reason = None + + # 从第一个 chunk 获取 role,后续使用这个 role + if 'role' in delta and delta['role']: + role = delta['role'] + + # 获取增量内容 + delta_content = delta.get('content', '') + reasoning_content = delta.get('reasoning_content', '') + + # 处理 reasoning_content + if reasoning_content: + # accumulated_reasoning += reasoning_content + # 如果设置了 remove_think,跳过 reasoning_content + if remove_think: + chunk_idx += 1 + continue + + # 第一次出现 reasoning_content,添加 开始标签 + if not thinking_started: + thinking_started = True + delta_content = '\n' + reasoning_content + else: + # 继续输出 reasoning_content + delta_content = reasoning_content + elif thinking_started and not thinking_ended and delta_content: + # reasoning_content 结束,normal content 开始,添加 结束标签 + thinking_ended = True + delta_content = '\n\n' + delta_content + + # 处理工具调用增量 + if delta.get('tool_calls'): + for tool_call in delta['tool_calls']: + if tool_call['id'] != '': + tool_id = tool_call['id'] + if tool_call['function']['name'] is not None: + tool_name = tool_call['function']['name'] + + if tool_call['type'] is None: + tool_call['type'] = 'function' + tool_call['id'] = tool_id + tool_call['function']['name'] = tool_name + tool_call['function']['arguments'] = ( + '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments'] + ) + + # 跳过空的第一个 chunk(只有 role 没有内容) + if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'): + chunk_idx += 1 + continue + + # 构建 MessageChunk - 只包含增量内容 + chunk_data = { + 'role': role, + 'content': delta_content if delta_content else None, + 'tool_calls': delta.get('tool_calls'), + 'is_final': bool(finish_reason), + } + + # 移除 None 值 + chunk_data = {k: v for k, v in chunk_data.items() if v is not None} + + yield provider_message.MessageChunk(**chunk_data) + chunk_idx += 1 + # return diff --git a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml index ad2c28db..fc5998c4 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml @@ -22,8 +22,11 @@ spec: type: integer required: true default: 120 - litellm_provider: openai support_type: - llm - rerank provider_category: maas +execution: + python: + path: ./bailianchatcmpl.py + attr: BailianChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py new file mode 100644 index 00000000..da24bda0 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py @@ -0,0 +1,702 @@ +from __future__ import annotations + +import asyncio +import typing + +import openai +import openai.types.chat.chat_completion as chat_completion_module +import httpx + +from .. import errors, requester +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.provider.message as provider_message + + +class OpenAIChatCompletions(requester.ProviderAPIRequester): + """OpenAI ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.openai.com/v1', + 'timeout': 120, + } + + async def initialize(self): + self.client = openai.AsyncClient( + api_key='', + base_url=self.requester_cfg['base_url'].replace(' ', ''), + timeout=self.requester_cfg['timeout'], + http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']), + ) + + def _mask_api_key(self, api_key: str | None) -> str: + if not api_key: + return '' + if len(api_key) <= 8: + return '****' + return f'{api_key[:4]}...{api_key[-4:]}' + + def _infer_model_type(self, model_id: str) -> str: + normalized_model_id = (model_id or '').lower() + embedding_keywords = ( + 'embedding', + 'embed', + 'bge-', + 'e5-', + 'm3e', + 'gte-', + 'multilingual-e5', + 'text-embedding', + ) + return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm' + + def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]: + normalized_model_id = (model_id or '').lower() + abilities: set[str] = set() + + def _flatten(value: typing.Any) -> list[str]: + if value is None: + return [] + if isinstance(value, str): + return [value.lower()] + if isinstance(value, dict): + flattened: list[str] = [] + for nested_value in value.values(): + flattened.extend(_flatten(nested_value)) + return flattened + if isinstance(value, (list, tuple, set)): + flattened: list[str] = [] + for nested_value in value: + flattened.extend(_flatten(nested_value)) + return flattened + return [str(value).lower()] + + capability_tokens = _flatten(item.get('capabilities')) + capability_tokens.extend(_flatten(item.get('modalities'))) + capability_tokens.extend(_flatten(item.get('input_modalities'))) + capability_tokens.extend(_flatten(item.get('output_modalities'))) + capability_tokens.extend(_flatten(item.get('supported_generation_methods'))) + capability_tokens.extend(_flatten(item.get('supported_parameters'))) + capability_tokens.extend(_flatten(item.get('architecture'))) + + combined_tokens = capability_tokens + [normalized_model_id] + + vision_keywords = ( + 'vision', + 'image', + 'file', + 'video', + 'multimodal', + 'vl', + 'ocr', + 'omni', + ) + function_call_keywords = ( + 'function', + 'tool', + 'tools', + 'tool_choice', + 'tool_call', + 'tool-use', + 'tool_use', + ) + + if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens): + abilities.add('vision') + + if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens): + abilities.add('func_call') + + return sorted(abilities) + + def _normalize_modalities(self, value: typing.Any) -> list[str]: + normalized: list[str] = [] + + def _collect(item: typing.Any): + if item is None: + return + if isinstance(item, str): + for part in item.replace('->', ',').replace('+', ',').split(','): + token = part.strip().lower() + if token and token not in normalized: + normalized.append(token) + return + if isinstance(item, dict): + for nested in item.values(): + _collect(nested) + return + if isinstance(item, (list, tuple, set)): + for nested in item: + _collect(nested) + return + + _collect(value) + return normalized + + def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]: + display_name = item.get('name') + if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id: + display_name = '' + + description = item.get('description') + if not isinstance(description, str) or not description.strip(): + description = '' + + context_length = item.get('context_length') + if context_length is None and isinstance(item.get('top_provider'), dict): + context_length = item['top_provider'].get('context_length') + + if not isinstance(context_length, int): + try: + context_length = int(context_length) if context_length is not None else None + except (TypeError, ValueError): + context_length = None + + input_modalities = self._normalize_modalities(item.get('input_modalities')) + output_modalities = self._normalize_modalities(item.get('output_modalities')) + + if isinstance(item.get('architecture'), dict): + if not input_modalities: + input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities')) + if not output_modalities: + output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities')) + + owned_by = item.get('owned_by') + if not isinstance(owned_by, str) or not owned_by.strip(): + owned_by = '' + + return { + 'display_name': display_name or None, + 'description': description or None, + 'context_length': context_length, + 'owned_by': owned_by or None, + 'input_modalities': input_modalities, + 'output_modalities': output_modalities, + } + + async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: + headers = {} + if api_key: + headers['Authorization'] = f'Bearer {api_key}' + + models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models' + async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client: + response = await client.get(models_url, headers=headers) + response.raise_for_status() + payload = response.json() + + models = [] + for item in payload.get('data', []): + model_id = item.get('id') + if not model_id: + continue + models.append( + { + 'id': model_id, + 'name': model_id, + 'type': self._infer_model_type(model_id), + 'abilities': self._infer_model_abilities(item, model_id), + **self._extract_scan_metadata(item, model_id), + } + ) + + models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower())) + return { + 'models': models, + 'debug': { + 'request': { + 'method': 'GET', + 'url': models_url, + 'headers': { + 'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '', + }, + }, + 'response': payload, + }, + } + + async def _req( + self, + args: dict, + extra_body: dict = {}, + ) -> chat_completion_module.ChatCompletion: + return await self.client.chat.completions.create(**args, extra_body=extra_body) + + async def _req_stream( + self, + args: dict, + extra_body: dict = {}, + ): + async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body): + yield chunk + + async def _make_msg( + self, + chat_completion: chat_completion_module.ChatCompletion, + remove_think: bool = False, + ) -> provider_message.Message: + if not isinstance(chat_completion, chat_completion_module.ChatCompletion): + raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}') + + chatcmpl_message = chat_completion.choices[0].message.model_dump() + + # 确保 role 字段存在且不为 None + if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None: + chatcmpl_message['role'] = 'assistant' + + # 处理思维链 + content = chatcmpl_message.get('content', '') + reasoning_content = chatcmpl_message.get('reasoning_content', None) + + processed_content, _ = await self._process_thinking_content( + content=content, reasoning_content=reasoning_content, remove_think=remove_think + ) + + chatcmpl_message['content'] = processed_content + + # 移除 reasoning_content 字段,避免传递给 Message + if 'reasoning_content' in chatcmpl_message: + del chatcmpl_message['reasoning_content'] + + message = provider_message.Message(**chatcmpl_message) + + return message + + async def _process_thinking_content( + self, + content: str, + reasoning_content: str = None, + remove_think: bool = False, + ) -> tuple[str, str]: + """处理思维链内容 + + Args: + content: 原始内容 + reasoning_content: reasoning_content 字段内容 + remove_think: 是否移除思维链 + + Returns: + (处理后的内容, 提取的思维链内容) + """ + thinking_content = '' + + # 1. 从 reasoning_content 提取思维链 + if reasoning_content: + thinking_content = reasoning_content + + # 2. 从 content 中提取 标签内容 + if content and '' in content and '' in content: + import re + + think_pattern = r'(.*?)' + think_matches = re.findall(think_pattern, content, re.DOTALL) + if think_matches: + # 如果已有 reasoning_content,则追加 + if thinking_content: + thinking_content += '\n' + '\n'.join(think_matches) + else: + thinking_content = '\n'.join(think_matches) + # 移除 content 中的 标签 + content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip() + + # 3. 根据 remove_think 参数决定是否保留思维链 + if remove_think: + return content, '' + else: + # 如果有思维链内容,将其以 格式添加到 content 开头 + if thinking_content: + content = f'\n{thinking_content}\n\n{content}'.strip() + return content, thinking_content + + async def _closure_stream( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.MessageChunk: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages.copy() + + # 检查vision + for msg in messages: + if 'content' in msg and isinstance(msg['content'], list): + for me in msg['content']: + if me['type'] == 'image_base64': + me['image_url'] = {'url': me['image_base64']} + me['type'] = 'image_url' + del me['image_base64'] + + args['messages'] = messages + args['stream'] = True + + # 流式处理状态 + # tool_calls_map: dict[str, provider_message.ToolCall] = {} + chunk_idx = 0 + thinking_started = False + thinking_ended = False + role = 'assistant' # 默认角色 + tool_id = '' + tool_name = '' + # accumulated_reasoning = '' # 仅用于判断何时结束思维链 + + async for chunk in self._req_stream(args, extra_body=extra_args): + # 解析 chunk 数据 + + if hasattr(chunk, 'choices') and chunk.choices: + choice = chunk.choices[0] + delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} + + finish_reason = getattr(choice, 'finish_reason', None) + else: + delta = {} + finish_reason = None + # 从第一个 chunk 获取 role,后续使用这个 role + if 'role' in delta and delta['role']: + role = delta['role'] + + # 获取增量内容 + delta_content = delta.get('content', '') + reasoning_content = delta.get('reasoning_content', '') + + # 处理 reasoning_content + if reasoning_content: + # accumulated_reasoning += reasoning_content + # 如果设置了 remove_think,跳过 reasoning_content + if remove_think: + chunk_idx += 1 + continue + + # 第一次出现 reasoning_content,添加 开始标签 + if not thinking_started: + thinking_started = True + delta_content = '\n' + reasoning_content + else: + # 继续输出 reasoning_content + delta_content = reasoning_content + elif thinking_started and not thinking_ended and delta_content: + # reasoning_content 结束,normal content 开始,添加 结束标签 + thinking_ended = True + delta_content = '\n\n' + delta_content + + # 处理 content 中已有的 标签(如果需要移除) + # if delta_content and remove_think and '' in delta_content: + # import re + # + # # 移除 标签及其内容 + # delta_content = re.sub(r'.*?', '', delta_content, flags=re.DOTALL) + + # 处理工具调用增量 + # delta_tool_calls = None + if delta.get('tool_calls'): + for tool_call in delta['tool_calls']: + if tool_call['id'] and tool_call['function']['name']: + tool_id = tool_call['id'] + tool_name = tool_call['function']['name'] + else: + tool_call['id'] = tool_id + tool_call['function']['name'] = tool_name + if tool_call['type'] is None: + tool_call['type'] = 'function' + + # 跳过空的第一个 chunk(只有 role 没有内容) + if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'): + chunk_idx += 1 + continue + # 构建 MessageChunk - 只包含增量内容 + chunk_data = { + 'role': role, + 'content': delta_content if delta_content else None, + 'tool_calls': delta.get('tool_calls'), + 'is_final': bool(finish_reason), + } + + # 移除 None 值 + chunk_data = {k: v for k, v in chunk_data.items() if v is not None} + + yield provider_message.MessageChunk(**chunk_data) + chunk_idx += 1 + + async def _closure( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> tuple[provider_message.Message, dict]: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages.copy() + + # 检查vision + for msg in messages: + if 'content' in msg and isinstance(msg['content'], list): + for me in msg['content']: + if me['type'] == 'image_base64': + me['image_url'] = {'url': me['image_base64']} + me['type'] = 'image_url' + del me['image_base64'] + + args['messages'] = messages + + # 发送请求 + + resp = await self._req(args, extra_body=extra_args) + # 处理请求结果 + message = await self._make_msg(resp, remove_think) + + # Extract token usage from response + usage_info = {} + if hasattr(resp, 'usage') and resp.usage: + usage_info['input_tokens'] = resp.usage.prompt_tokens or 0 + usage_info['output_tokens'] = resp.usage.completion_tokens or 0 + usage_info['total_tokens'] = resp.usage.total_tokens or 0 + + return message, usage_info + + async def invoke_llm( + self, + query: pipeline_query.Query, + model: requester.RuntimeLLMModel, + messages: typing.List[provider_message.Message], + funcs: typing.List[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> tuple[provider_message.Message, dict]: + """Invoke LLM and return message with usage info""" + req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 + for m in messages: + msg_dict = m.dict(exclude_none=True) + content = msg_dict.get('content') + if isinstance(content, list): + # 检查 content 列表中是否每个部分都是文本 + if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): + # 将所有文本部分合并为一个字符串 + msg_dict['content'] = '\n'.join(part['text'] for part in content) + req_messages.append(msg_dict) + + try: + msg, usage_info = await self._closure( + query=query, + req_messages=req_messages, + use_model=model, + use_funcs=funcs, + extra_args=extra_args, + remove_think=remove_think, + ) + return msg, usage_info + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + except openai.BadRequestError as e: + error_message = str(e.message) if hasattr(e, 'message') else str(e) + if 'context_length_exceeded' in str(e): + raise errors.RequesterError(f'上文过长,请重置会话: {error_message}') + else: + raise errors.RequesterError(f'请求参数错误: {error_message}') + except openai.AuthenticationError as e: + error_message = str(e.message) if hasattr(e, 'message') else str(e) + raise errors.RequesterError(f'无效的 api-key: {error_message}') + except openai.NotFoundError as e: + error_message = str(e.message) if hasattr(e, 'message') else str(e) + raise errors.RequesterError(f'请求路径错误: {error_message}') + except openai.RateLimitError as e: + error_message = str(e.message) if hasattr(e, 'message') else str(e) + raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}') + except openai.APIConnectionError as e: + error_message = f'连接错误: {str(e)}' + raise errors.RequesterError(error_message) + except openai.APIError as e: + error_message = str(e.message) if hasattr(e, 'message') else str(e) + raise errors.RequesterError(f'请求错误: {error_message}') + + async def invoke_embedding( + self, + model: requester.RuntimeEmbeddingModel, + input_text: list[str], + extra_args: dict[str, typing.Any] = {}, + ) -> tuple[list[list[float]], dict]: + """调用 Embedding API, returns (embeddings, usage_info)""" + self.client.api_key = model.provider.token_mgr.get_token() + + args = { + 'model': model.model_entity.name, + 'input': input_text, + } + + if model.model_entity.extra_args: + args.update(model.model_entity.extra_args) + + args.update(extra_args) + + try: + resp = await self.client.embeddings.create(**args) + + # Extract usage info + usage_info = {} + if hasattr(resp, 'usage') and resp.usage: + usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0 + usage_info['total_tokens'] = resp.usage.total_tokens or 0 + + return [d.embedding for d in resp.data], usage_info + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + except openai.BadRequestError as e: + raise errors.RequesterError(f'请求参数错误: {e.message}') + + async def invoke_llm_stream( + self, + query: pipeline_query.Query, + model: requester.RuntimeLLMModel, + messages: typing.List[provider_message.Message], + funcs: typing.List[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.MessageChunk: + req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 + for m in messages: + msg_dict = m.dict(exclude_none=True) + content = msg_dict.get('content') + if isinstance(content, list): + # 检查 content 列表中是否每个部分都是文本 + if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): + # 将所有文本部分合并为一个字符串 + msg_dict['content'] = '\n'.join(part['text'] for part in content) + req_messages.append(msg_dict) + + try: + async for item in self._closure_stream( + query=query, + req_messages=req_messages, + use_model=model, + use_funcs=funcs, + extra_args=extra_args, + remove_think=remove_think, + ): + yield item + + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + except openai.BadRequestError as e: + if 'context_length_exceeded' in e.message: + raise errors.RequesterError(f'上文过长,请重置会话: {e.message}') + else: + raise errors.RequesterError(f'请求参数错误: {e.message}') + except openai.AuthenticationError as e: + raise errors.RequesterError(f'无效的 api-key: {e.message}') + except openai.NotFoundError as e: + raise errors.RequesterError(f'请求路径错误: {e.message}') + except openai.RateLimitError as e: + raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') + except openai.APIError as e: + raise errors.RequesterError(f'请求错误: {e.message}') + + async def invoke_rerank( + self, + model: requester.RuntimeRerankModel, + query: str, + documents: typing.List[str], + extra_args: dict[str, typing.Any] = {}, + ) -> typing.List[dict]: + """Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible) + + Supports extra_args from model.extra_args: + - rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks") + - rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank") + - Any other fields are merged into the request payload. + """ + api_key = model.provider.token_mgr.get_token() + base_url = self.requester_cfg.get('base_url', '').rstrip('/') + timeout = self.requester_cfg.get('timeout', 120) + + merged_args = {} + if model.model_entity.extra_args: + merged_args.update(model.model_entity.extra_args) + if extra_args: + merged_args.update(extra_args) + + rerank_url = merged_args.pop('rerank_url', None) + rerank_path = merged_args.pop('rerank_path', 'rerank') + if not rerank_url: + rerank_url = f'{base_url}/{rerank_path}' + + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {api_key}', + } + + payload = { + 'model': model.model_entity.name, + 'query': query, + 'documents': documents[:64], + 'top_n': min(len(documents), 64), + } + + if merged_args: + payload.update(merged_args) + + try: + async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client: + resp = await client.post(rerank_url, headers=headers, json=payload) + resp.raise_for_status() + data = resp.json() + + results = self._parse_rerank_response(data) + + if results: + scores = [r.get('relevance_score', 0.0) for r in results] + min_score = min(scores) + max_score = max(scores) + if max_score - min_score > 1e-6: + for r in results: + r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score) + + return results + except httpx.HTTPStatusError as e: + raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}') + except httpx.TimeoutException: + raise errors.RequesterError('Rerank request timed out') + except Exception as e: + raise errors.RequesterError(f'Rerank request error: {str(e)}') + + @staticmethod + def _parse_rerank_response(data: dict) -> typing.List[dict]: + """Parse rerank response from various providers. + + Handles: + - Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]} + - Voyage AI: {"data": [{"index", "relevance_score"}]} + - DashScope: {"output": {"results": [{"index", "relevance_score"}]}} + """ + if 'results' in data: + return data['results'] + if 'data' in data: + return data['data'] + if 'output' in data and isinstance(data['output'], dict): + return data['output'].get('results', []) + return [] diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml index 8004a7b2..21bd6a05 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml @@ -22,10 +22,12 @@ spec: type: integer required: true default: 120 - # LiteLLM provider prefix - when set, uses unified LiteLLMRequester - litellm_provider: openai support_type: - llm - text-embedding - rerank provider_category: manufacturer +execution: + python: + path: ./chatcmpl.py + attr: OpenAIChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml index 6ac8052d..f1ca209b 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: cohere support_type: - rerank provider_category: manufacturer +execution: + python: + path: ./chatcmpl.py + attr: OpenAIChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py new file mode 100644 index 00000000..d272e721 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions): + """CompShare ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.modelverse.cn/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml index f122dffe..92fcafdc 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: openai support_type: - llm provider_category: maas +execution: + python: + path: ./compsharechatcmpl.py + attr: CompShareChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py new file mode 100644 index 00000000..5bcbd40c --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import typing + +from . import chatcmpl +from .. import errors, requester +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.provider.message as provider_message + + +class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions): + """Deepseek ChatCompletion API 请求器""" + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.deepseek.com', + 'timeout': 120, + } + + async def _closure( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> tuple[provider_message.Message, dict]: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages + + # deepseek 不支持多模态,把content都转换成纯文字 + for m in messages: + if 'content' in m and isinstance(m['content'], list): + m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c]) + + args['messages'] = messages + + # 发送请求 + resp = await self._req(args, extra_body=extra_args) + + # print(resp) + + if resp is None: + raise errors.RequesterError('接口返回为空,请确定模型提供商服务是否正常') + # 处理请求结果 + message = await self._make_msg(resp, remove_think) + + # Extract token usage from response + usage_info = {} + if hasattr(resp, 'usage') and resp.usage: + usage_info['input_tokens'] = resp.usage.prompt_tokens or 0 + usage_info['output_tokens'] = resp.usage.completion_tokens or 0 + usage_info['total_tokens'] = resp.usage.total_tokens or 0 + + return message, usage_info diff --git a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml index 8a9dbca7..8ef1fcf9 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: deepseek support_type: - llm provider_category: manufacturer +execution: + python: + path: ./deepseekchatcmpl.py + attr: DeepseekChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py new file mode 100644 index 00000000..956b49f6 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +import typing +import httpx + +from . import chatcmpl + +import uuid + +from .. import requester +import langbot_plugin.api.entities.builtin.provider.message as provider_message +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool + + +class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions): + """Google Gemini API 请求器""" + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai', + 'timeout': 120, + } + + async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: + models_url = 'https://generativelanguage.googleapis.com/v1beta/models' + params = {'key': api_key} if api_key else {} + + all_models: list[dict[str, typing.Any]] = [] + next_page_token = '' + last_payload: dict[str, typing.Any] = {} + + async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client: + while True: + request_params = dict(params) + if next_page_token: + request_params['pageToken'] = next_page_token + + response = await client.get(models_url, params=request_params) + response.raise_for_status() + payload = response.json() + last_payload = payload + + for item in payload.get('models', []): + model_name = item.get('name', '') + model_id = model_name.replace('models/', '', 1) + if not model_id: + continue + + supported_methods = item.get('supportedGenerationMethods', []) or [] + if 'embedContent' in supported_methods and 'generateContent' not in supported_methods: + model_type = 'embedding' + else: + model_type = 'llm' + + all_models.append( + { + 'id': model_id, + 'name': model_id, + 'type': model_type, + 'abilities': self._infer_model_abilities(item, model_id), + 'display_name': item.get('displayName') or None, + 'description': item.get('description') or None, + 'context_length': item.get('inputTokenLimit'), + 'input_modalities': self._normalize_modalities(item.get('inputModalities')), + 'output_modalities': self._normalize_modalities(item.get('outputModalities')), + } + ) + + next_page_token = payload.get('nextPageToken', '') + if not next_page_token: + break + + all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower())) + return { + 'models': all_models, + 'debug': { + 'request': { + 'method': 'GET', + 'url': models_url, + 'query': {'key': self._mask_api_key(api_key)} if api_key else {}, + }, + 'response': last_payload, + }, + } + + async def _closure_stream( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.MessageChunk: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages.copy() + + # 检查vision + for msg in messages: + if 'content' in msg and isinstance(msg['content'], list): + for me in msg['content']: + if me['type'] == 'image_base64': + me['image_url'] = {'url': me['image_base64']} + me['type'] = 'image_url' + del me['image_base64'] + + args['messages'] = messages + args['stream'] = True + + # 流式处理状态 + # tool_calls_map: dict[str, provider_message.ToolCall] = {} + chunk_idx = 0 + thinking_started = False + thinking_ended = False + role = 'assistant' # 默认角色 + tool_id = '' + tool_name = '' + # accumulated_reasoning = '' # 仅用于判断何时结束思维链 + + async for chunk in self._req_stream(args, extra_body=extra_args): + # 解析 chunk 数据 + + if hasattr(chunk, 'choices') and chunk.choices: + choice = chunk.choices[0] + delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} + + finish_reason = getattr(choice, 'finish_reason', None) + else: + delta = {} + finish_reason = None + # 从第一个 chunk 获取 role,后续使用这个 role + if 'role' in delta and delta['role']: + role = delta['role'] + + # 获取增量内容 + delta_content = delta.get('content', '') + reasoning_content = delta.get('reasoning_content', '') + + # 处理 reasoning_content + if reasoning_content: + # accumulated_reasoning += reasoning_content + # 如果设置了 remove_think,跳过 reasoning_content + if remove_think: + chunk_idx += 1 + continue + + # 第一次出现 reasoning_content,添加 开始标签 + if not thinking_started: + thinking_started = True + delta_content = '\n' + reasoning_content + else: + # 继续输出 reasoning_content + delta_content = reasoning_content + elif thinking_started and not thinking_ended and delta_content: + # reasoning_content 结束,normal content 开始,添加 结束标签 + thinking_ended = True + delta_content = '\n\n' + delta_content + + # 处理 content 中已有的 标签(如果需要移除) + # if delta_content and remove_think and '' in delta_content: + # import re + # + # # 移除 标签及其内容 + # delta_content = re.sub(r'.*?', '', delta_content, flags=re.DOTALL) + + # 处理工具调用增量 + # delta_tool_calls = None + if delta.get('tool_calls'): + for tool_call in delta['tool_calls']: + if tool_call['id'] == '' and tool_id == '': + tool_id = str(uuid.uuid4()) + if tool_call['function']['name']: + tool_name = tool_call['function']['name'] + tool_call['id'] = tool_id + tool_call['function']['name'] = tool_name + if tool_call['type'] is None: + tool_call['type'] = 'function' + + # 跳过空的第一个 chunk(只有 role 没有内容) + if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'): + chunk_idx += 1 + continue + # 构建 MessageChunk - 只包含增量内容 + chunk_data = { + 'role': role, + 'content': delta_content if delta_content else None, + 'tool_calls': delta.get('tool_calls'), + 'is_final': bool(finish_reason), + } + + # 移除 None 值 + chunk_data = {k: v for k, v in chunk_data.items() if v is not None} + + yield provider_message.MessageChunk(**chunk_data) + chunk_idx += 1 diff --git a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml index 53d71dc9..fdebe9b9 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: gemini support_type: - llm provider_category: manufacturer +execution: + python: + path: ./geminichatcmpl.py + attr: GeminiChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py new file mode 100644 index 00000000..4e295e9f --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py @@ -0,0 +1,15 @@ +from __future__ import annotations + + +import typing + +from . import ppiochatcmpl + + +class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions): + """Gitee AI ChatCompletions API 请求器""" + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://ai.gitee.com/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml index 1f208534..b7b158a7 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml @@ -22,9 +22,12 @@ spec: type: integer required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding - rerank provider_category: maas +execution: + python: + path: ./giteeaichatcmpl.py + attr: GiteeAIChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py new file mode 100644 index 00000000..305ae21f --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py @@ -0,0 +1,208 @@ +from __future__ import annotations + +import openai +import typing + +from . import chatcmpl +from .. import requester +import openai.types.chat.chat_completion as chat_completion +import re +import langbot_plugin.api.entities.builtin.provider.message as provider_message +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool + + +class JieKouAIChatCompletions(chatcmpl.OpenAIChatCompletions): + """接口 AI ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.jiekou.ai/openai', + 'timeout': 120, + } + + is_think: bool = False + + async def _make_msg( + self, + chat_completion: chat_completion.ChatCompletion, + remove_think: bool, + ) -> provider_message.Message: + chatcmpl_message = chat_completion.choices[0].message.model_dump() + # print(chatcmpl_message.keys(), chatcmpl_message.values()) + + # 确保 role 字段存在且不为 None + if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None: + chatcmpl_message['role'] = 'assistant' + + reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None + + # deepseek的reasoner模型 + chatcmpl_message['content'] = await self._process_thinking_content( + chatcmpl_message['content'], reasoning_content, remove_think + ) + + # 移除 reasoning_content 字段,避免传递给 Message + if 'reasoning_content' in chatcmpl_message: + del chatcmpl_message['reasoning_content'] + + message = provider_message.Message(**chatcmpl_message) + + return message + + async def _process_thinking_content( + self, + content: str, + reasoning_content: str = None, + remove_think: bool = False, + ) -> tuple[str, str]: + """处理思维链内容 + + Args: + content: 原始内容 + reasoning_content: reasoning_content 字段内容 + remove_think: 是否移除思维链 + + Returns: + 处理后的内容 + """ + if remove_think: + content = re.sub(r'.*?', '', content, flags=re.DOTALL) + else: + if reasoning_content is not None: + content = '\n' + reasoning_content + '\n\n' + content + return content + + async def _make_msg_chunk( + self, + delta: dict[str, typing.Any], + idx: int, + ) -> provider_message.MessageChunk: + # 处理流式chunk和完整响应的差异 + # print(chat_completion.choices[0]) + + # 确保 role 字段存在且不为 None + if 'role' not in delta or delta['role'] is None: + delta['role'] = 'assistant' + + reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None + + delta['content'] = '' if delta['content'] is None else delta['content'] + # print(reasoning_content) + + # deepseek的reasoner模型 + + if reasoning_content is not None: + delta['content'] += reasoning_content + + message = provider_message.MessageChunk(**delta) + + return message + + async def _closure_stream( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages.copy() + + # 检查vision + for msg in messages: + if 'content' in msg and isinstance(msg['content'], list): + for me in msg['content']: + if me['type'] == 'image_base64': + me['image_url'] = {'url': me['image_base64']} + me['type'] = 'image_url' + del me['image_base64'] + + args['messages'] = messages + args['stream'] = True + + # tool_calls_map: dict[str, provider_message.ToolCall] = {} + chunk_idx = 0 + thinking_started = False + thinking_ended = False + role = 'assistant' # 默认角色 + async for chunk in self._req_stream(args, extra_body=extra_args): + # 解析 chunk 数据 + if hasattr(chunk, 'choices') and chunk.choices: + choice = chunk.choices[0] + delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} + finish_reason = getattr(choice, 'finish_reason', None) + else: + delta = {} + finish_reason = None + + # 从第一个 chunk 获取 role,后续使用这个 role + if 'role' in delta and delta['role']: + role = delta['role'] + + # 获取增量内容 + delta_content = delta.get('content', '') + # reasoning_content = delta.get('reasoning_content', '') + + if remove_think: + if delta['content'] is not None: + if '' in delta['content'] and not thinking_started and not thinking_ended: + thinking_started = True + continue + elif delta['content'] == r'' and not thinking_ended: + thinking_ended = True + continue + elif thinking_ended and delta['content'] == '\n\n' and thinking_started: + thinking_started = False + continue + elif thinking_started and not thinking_ended: + continue + + # delta_tool_calls = None + if delta.get('tool_calls'): + for tool_call in delta['tool_calls']: + if tool_call['id'] and tool_call['function']['name']: + tool_id = tool_call['id'] + tool_name = tool_call['function']['name'] + + if tool_call['id'] is None: + tool_call['id'] = tool_id + if tool_call['function']['name'] is None: + tool_call['function']['name'] = tool_name + if tool_call['function']['arguments'] is None: + tool_call['function']['arguments'] = '' + if tool_call['type'] is None: + tool_call['type'] = 'function' + + # 跳过空的第一个 chunk(只有 role 没有内容) + if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'): + chunk_idx += 1 + continue + + # 构建 MessageChunk - 只包含增量内容 + chunk_data = { + 'role': role, + 'content': delta_content if delta_content else None, + 'tool_calls': delta.get('tool_calls'), + 'is_final': bool(finish_reason), + } + + # 移除 None 值 + chunk_data = {k: v for k, v in chunk_data.items() if v is not None} + + yield provider_message.MessageChunk(**chunk_data) + chunk_idx += 1 diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml index 121fbae1..3c791d73 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml @@ -29,8 +29,11 @@ spec: type: int required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding provider_category: maas +execution: + python: + path: ./jiekouaichatcmpl.py + attr: JieKouAIChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml index 2329655d..3b448e38 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: jina support_type: - rerank provider_category: manufacturer +execution: + python: + path: ./chatcmpl.py + attr: OpenAIChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py new file mode 100644 index 00000000..c9060c1b --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class LmStudioChatCompletions(chatcmpl.OpenAIChatCompletions): + """LMStudio ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'http://127.0.0.1:1234/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml index b1b5bfac..81dc82cf 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml @@ -22,8 +22,11 @@ spec: type: integer required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding provider_category: self-hosted +execution: + python: + path: ./lmstudiochatcmpl.py + attr: LmStudioChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py new file mode 100644 index 00000000..ed5d8795 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py @@ -0,0 +1,561 @@ +from __future__ import annotations + +import asyncio +import typing + +import openai +import openai.types.chat.chat_completion as chat_completion +import httpx + +from .. import entities, errors, requester +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.provider.message as provider_message + + +class ModelScopeChatCompletions(requester.ProviderAPIRequester): + """ModelScope ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api-inference.modelscope.cn/v1', + 'timeout': 120, + } + + async def initialize(self): + self.client = openai.AsyncClient( + api_key='', + base_url=self.requester_cfg['base_url'], + timeout=self.requester_cfg['timeout'], + http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']), + ) + + def _mask_api_key(self, api_key: str | None) -> str: + if not api_key: + return '' + if len(api_key) <= 8: + return '****' + return f'{api_key[:4]}...{api_key[-4:]}' + + def _infer_model_type(self, model_id: str) -> str: + normalized_model_id = (model_id or '').lower() + embedding_keywords = ( + 'embedding', + 'embed', + 'bge-', + 'e5-', + 'm3e', + 'gte-', + 'multilingual-e5', + 'text-embedding', + ) + return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm' + + def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]: + normalized_model_id = (model_id or '').lower() + abilities: set[str] = set() + + def _flatten(value: typing.Any) -> list[str]: + if value is None: + return [] + if isinstance(value, str): + return [value.lower()] + if isinstance(value, dict): + flattened: list[str] = [] + for nested_value in value.values(): + flattened.extend(_flatten(nested_value)) + return flattened + if isinstance(value, (list, tuple, set)): + flattened: list[str] = [] + for nested_value in value: + flattened.extend(_flatten(nested_value)) + return flattened + return [str(value).lower()] + + capability_tokens = _flatten(item.get('capabilities')) + capability_tokens.extend(_flatten(item.get('modalities'))) + capability_tokens.extend(_flatten(item.get('input_modalities'))) + capability_tokens.extend(_flatten(item.get('output_modalities'))) + capability_tokens.extend(_flatten(item.get('supported_generation_methods'))) + capability_tokens.extend(_flatten(item.get('supported_parameters'))) + capability_tokens.extend(_flatten(item.get('architecture'))) + + combined_tokens = capability_tokens + [normalized_model_id] + + vision_keywords = ('vision', 'image', 'file', 'video', 'multimodal', 'vl', 'ocr', 'omni') + function_call_keywords = ('function', 'tool', 'tools', 'tool_choice', 'tool_call', 'tool-use', 'tool_use') + + if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens): + abilities.add('vision') + + if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens): + abilities.add('func_call') + + return sorted(abilities) + + def _normalize_modalities(self, value: typing.Any) -> list[str]: + normalized: list[str] = [] + + def _collect(item: typing.Any): + if item is None: + return + if isinstance(item, str): + for part in item.replace('->', ',').replace('+', ',').split(','): + token = part.strip().lower() + if token and token not in normalized: + normalized.append(token) + return + if isinstance(item, dict): + for nested in item.values(): + _collect(nested) + return + if isinstance(item, (list, tuple, set)): + for nested in item: + _collect(nested) + return + + _collect(value) + return normalized + + def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]: + display_name = item.get('name') + if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id: + display_name = '' + + description = item.get('description') + if not isinstance(description, str) or not description.strip(): + description = '' + + context_length = item.get('context_length') + if context_length is None and isinstance(item.get('top_provider'), dict): + context_length = item['top_provider'].get('context_length') + + if not isinstance(context_length, int): + try: + context_length = int(context_length) if context_length is not None else None + except (TypeError, ValueError): + context_length = None + + input_modalities = self._normalize_modalities(item.get('input_modalities')) + output_modalities = self._normalize_modalities(item.get('output_modalities')) + + if isinstance(item.get('architecture'), dict): + if not input_modalities: + input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities')) + if not output_modalities: + output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities')) + + owned_by = item.get('owned_by') + if not isinstance(owned_by, str) or not owned_by.strip(): + owned_by = '' + + return { + 'display_name': display_name or None, + 'description': description or None, + 'context_length': context_length, + 'owned_by': owned_by or None, + 'input_modalities': input_modalities, + 'output_modalities': output_modalities, + } + + async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: + headers = {} + if api_key: + headers['Authorization'] = f'Bearer {api_key}' + + models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models' + async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client: + response = await client.get(models_url, headers=headers) + response.raise_for_status() + payload = response.json() + + models = [] + for item in payload.get('data', []): + model_id = item.get('id') + if not model_id: + continue + models.append( + { + 'id': model_id, + 'name': model_id, + 'type': self._infer_model_type(model_id), + 'abilities': self._infer_model_abilities(item, model_id), + **self._extract_scan_metadata(item, model_id), + } + ) + + models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower())) + return { + 'models': models, + 'debug': { + 'request': { + 'method': 'GET', + 'url': models_url, + 'headers': { + 'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '', + }, + }, + 'response': payload, + }, + } + + async def _req( + self, + query: pipeline_query.Query, + args: dict, + extra_body: dict = {}, + remove_think: bool = False, + ) -> list[dict[str, typing.Any]]: + args['stream'] = True + + chunk = None + + pending_content = '' + + tool_calls = [] + + resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body) + + chunk_idx = 0 + thinking_started = False + thinking_ended = False + tool_id = '' + tool_name = '' + message_delta = {} + async for chunk in resp_gen: + if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta: + continue + + delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {} + reasoning_content = delta.get('reasoning_content') + # 处理 reasoning_content + if reasoning_content: + # accumulated_reasoning += reasoning_content + # 如果设置了 remove_think,跳过 reasoning_content + if remove_think: + chunk_idx += 1 + continue + + # 第一次出现 reasoning_content,添加 开始标签 + if not thinking_started: + thinking_started = True + pending_content += '\n' + reasoning_content + else: + # 继续输出 reasoning_content + pending_content += reasoning_content + elif thinking_started and not thinking_ended and delta.get('content'): + # reasoning_content 结束,normal content 开始,添加 结束标签 + thinking_ended = True + pending_content += '\n\n' + delta.get('content') + + if delta.get('content') is not None: + pending_content += delta.get('content') + + if delta.get('tool_calls') is not None: + for tool_call in delta.get('tool_calls'): + if tool_call['id'] != '': + tool_id = tool_call['id'] + if tool_call['function']['name'] is not None: + tool_name = tool_call['function']['name'] + if tool_call['function']['arguments'] is None: + continue + tool_call['id'] = tool_id + tool_call['name'] = tool_name + for tc in tool_calls: + if tc['index'] == tool_call['index']: + tc['function']['arguments'] += tool_call['function']['arguments'] + break + else: + tool_calls.append(tool_call) + + if chunk.choices[0].finish_reason is not None: + break + message_delta['content'] = pending_content + message_delta['role'] = 'assistant' + + message_delta['tool_calls'] = tool_calls if tool_calls else None + return [message_delta] + + async def _make_msg( + self, + chat_completion: list[dict[str, typing.Any]], + ) -> provider_message.Message: + chatcmpl_message = chat_completion[0] + + # 确保 role 字段存在且不为 None + if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None: + chatcmpl_message['role'] = 'assistant' + + message = provider_message.Message(**chatcmpl_message) + + return message + + async def _closure( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> tuple[provider_message.Message, dict]: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages.copy() + + # 检查vision + for msg in messages: + if 'content' in msg and isinstance(msg['content'], list): + for me in msg['content']: + if me['type'] == 'image_base64': + me['image_url'] = {'url': me['image_base64']} + me['type'] = 'image_url' + del me['image_base64'] + + args['messages'] = messages + + # 发送请求 + resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think) + + # 处理请求结果 + message = await self._make_msg(resp) + + # ModelScope uses streaming, usage info not available + usage_info = {} + + return message, usage_info + + async def _req_stream( + self, + args: dict, + extra_body: dict = {}, + ) -> chat_completion.ChatCompletion: + async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body): + yield chunk + + async def _closure_stream( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages.copy() + + # 检查vision + for msg in messages: + if 'content' in msg and isinstance(msg['content'], list): + for me in msg['content']: + if me['type'] == 'image_base64': + me['image_url'] = {'url': me['image_base64']} + me['type'] = 'image_url' + del me['image_base64'] + + args['messages'] = messages + args['stream'] = True + + # 流式处理状态 + # tool_calls_map: dict[str, provider_message.ToolCall] = {} + chunk_idx = 0 + thinking_started = False + thinking_ended = False + role = 'assistant' # 默认角色 + # accumulated_reasoning = '' # 仅用于判断何时结束思维链 + + async for chunk in self._req_stream(args, extra_body=extra_args): + # 解析 chunk 数据 + if hasattr(chunk, 'choices') and chunk.choices: + choice = chunk.choices[0] + delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} + finish_reason = getattr(choice, 'finish_reason', None) + else: + delta = {} + finish_reason = None + + # 从第一个 chunk 获取 role,后续使用这个 role + if 'role' in delta and delta['role']: + role = delta['role'] + + # 获取增量内容 + delta_content = delta.get('content', '') + reasoning_content = delta.get('reasoning_content', '') + + # 处理 reasoning_content + if reasoning_content: + # accumulated_reasoning += reasoning_content + # 如果设置了 remove_think,跳过 reasoning_content + if remove_think: + chunk_idx += 1 + continue + + # 第一次出现 reasoning_content,添加 开始标签 + if not thinking_started: + thinking_started = True + delta_content = '\n' + reasoning_content + else: + # 继续输出 reasoning_content + delta_content = reasoning_content + elif thinking_started and not thinking_ended and delta_content: + # reasoning_content 结束,normal content 开始,添加 结束标签 + thinking_ended = True + delta_content = '\n\n' + delta_content + + # 处理 content 中已有的 标签(如果需要移除) + # if delta_content and remove_think and '' in delta_content: + # import re + # + # # 移除 标签及其内容 + # delta_content = re.sub(r'.*?', '', delta_content, flags=re.DOTALL) + + # 处理工具调用增量 + if delta.get('tool_calls'): + for tool_call in delta['tool_calls']: + if tool_call['id'] != '': + tool_id = tool_call['id'] + if tool_call['function']['name'] is not None: + tool_name = tool_call['function']['name'] + + if tool_call['type'] is None: + tool_call['type'] = 'function' + tool_call['id'] = tool_id + tool_call['function']['name'] = tool_name + tool_call['function']['arguments'] = ( + '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments'] + ) + + # 跳过空的第一个 chunk(只有 role 没有内容) + if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'): + chunk_idx += 1 + continue + + # 构建 MessageChunk - 只包含增量内容 + chunk_data = { + 'role': role, + 'content': delta_content if delta_content else None, + 'tool_calls': delta.get('tool_calls'), + 'is_final': bool(finish_reason), + } + + # 移除 None 值 + chunk_data = {k: v for k, v in chunk_data.items() if v is not None} + + yield provider_message.MessageChunk(**chunk_data) + chunk_idx += 1 + # return + + async def invoke_llm( + self, + query: pipeline_query.Query, + model: entities.LLMModelInfo, + messages: typing.List[provider_message.Message], + funcs: typing.List[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.Message: + req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 + for m in messages: + msg_dict = m.dict(exclude_none=True) + content = msg_dict.get('content') + if isinstance(content, list): + # 检查 content 列表中是否每个部分都是文本 + if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): + # 将所有文本部分合并为一个字符串 + msg_dict['content'] = '\n'.join(part['text'] for part in content) + req_messages.append(msg_dict) + + try: + return await self._closure( + query=query, + req_messages=req_messages, + use_model=model, + use_funcs=funcs, + extra_args=extra_args, + remove_think=remove_think, + ) + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + except openai.BadRequestError as e: + if 'context_length_exceeded' in e.message: + raise errors.RequesterError(f'上文过长,请重置会话: {e.message}') + else: + raise errors.RequesterError(f'请求参数错误: {e.message}') + except openai.AuthenticationError as e: + raise errors.RequesterError(f'无效的 api-key: {e.message}') + except openai.NotFoundError as e: + raise errors.RequesterError(f'请求路径错误: {e.message}') + except openai.RateLimitError as e: + raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') + except openai.APIError as e: + raise errors.RequesterError(f'请求错误: {e.message}') + + async def invoke_llm_stream( + self, + query: pipeline_query.Query, + model: requester.RuntimeLLMModel, + messages: typing.List[provider_message.Message], + funcs: typing.List[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.MessageChunk: + req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 + for m in messages: + msg_dict = m.dict(exclude_none=True) + content = msg_dict.get('content') + if isinstance(content, list): + # 检查 content 列表中是否每个部分都是文本 + if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): + # 将所有文本部分合并为一个字符串 + msg_dict['content'] = '\n'.join(part['text'] for part in content) + req_messages.append(msg_dict) + + try: + async for item in self._closure_stream( + query=query, + req_messages=req_messages, + use_model=model, + use_funcs=funcs, + extra_args=extra_args, + remove_think=remove_think, + ): + yield item + + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + except openai.BadRequestError as e: + if 'context_length_exceeded' in e.message: + raise errors.RequesterError(f'上文过长,请重置会话: {e.message}') + else: + raise errors.RequesterError(f'请求参数错误: {e.message}') + except openai.AuthenticationError as e: + raise errors.RequesterError(f'无效的 api-key: {e.message}') + except openai.NotFoundError as e: + raise errors.RequesterError(f'请求路径错误: {e.message}') + except openai.RateLimitError as e: + raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') + except openai.APIError as e: + raise errors.RequesterError(f'请求错误: {e.message}') diff --git a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml index 12d38201..8d22002d 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml @@ -29,7 +29,10 @@ spec: type: int required: true default: 120 - litellm_provider: openai support_type: - llm provider_category: maas +execution: + python: + path: ./modelscopechatcmpl.py + attr: ModelScopeChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py new file mode 100644 index 00000000..b6852963 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import typing + + +from . import chatcmpl +from .. import requester +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.provider.message as provider_message + + +class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions): + """Moonshot ChatCompletion API 请求器""" + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.moonshot.cn/v1', + 'timeout': 120, + } + + async def _closure( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> tuple[provider_message.Message, dict]: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages + + # deepseek 不支持多模态,把content都转换成纯文字 + for m in messages: + if 'content' in m and isinstance(m['content'], list): + m['content'] = ' '.join([c['text'] for c in m['content']]) + + # 删除空的,不知道干嘛的,直接删了。 + # messages = [m for m in messages if m["content"].strip() != "" and ('tool_calls' not in m or not m['tool_calls'])] + + args['messages'] = messages + + # 发送请求 + resp = await self._req(args, extra_body=extra_args) + + # 处理请求结果 + message = await self._make_msg(resp, remove_think) + + # Extract token usage from response + usage_info = {} + if hasattr(resp, 'usage') and resp.usage: + usage_info['input_tokens'] = resp.usage.prompt_tokens or 0 + usage_info['output_tokens'] = resp.usage.completion_tokens or 0 + usage_info['total_tokens'] = resp.usage.total_tokens or 0 + + return message, usage_info diff --git a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml index 11bb5a05..7a7e3060 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: moonshot support_type: - llm provider_category: manufacturer +execution: + python: + path: ./moonshotchatcmpl.py + attr: MoonshotChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py new file mode 100644 index 00000000..3c2bd3fb --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions): + """New API ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'http://localhost:3000/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml index 2d51bf9f..e0f44e99 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml @@ -22,8 +22,11 @@ spec: type: integer required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding provider_category: maas +execution: + python: + path: ./newapichatcmpl.py + attr: NewAPIChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py new file mode 100644 index 00000000..50f601d7 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py @@ -0,0 +1,314 @@ +from __future__ import annotations + +import asyncio +import os +import typing +from typing import Union, Mapping, Any, AsyncIterator +import uuid +import json + +import ollama +import httpx + +from .. import errors, requester +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.provider.message as provider_message + +REQUESTER_NAME: str = 'ollama-chat' + + +class OllamaChatCompletions(requester.ProviderAPIRequester): + """Ollama平台 ChatCompletion API请求器""" + + client: ollama.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'http://127.0.0.1:11434', + 'timeout': 120, + } + + async def initialize(self): + os.environ['OLLAMA_HOST'] = self.requester_cfg['base_url'] + self.client = ollama.AsyncClient(timeout=self.requester_cfg['timeout']) + + def _infer_model_type(self, model_id: str) -> str: + normalized_model_id = (model_id or '').lower() + embedding_keywords = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding') + return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm' + + def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]: + normalized_model_id = (model_id or '').lower() + abilities: set[str] = set() + details = item.get('details', {}) or {} + families = details.get('families', []) or [] + tokens = [normalized_model_id, str(details.get('family', '')).lower()] + tokens.extend(str(family).lower() for family in families) + + if any(keyword in token for token in tokens for keyword in ('vision', 'vl', 'omni', 'llava', 'ocr')): + abilities.add('vision') + if any(keyword in token for token in tokens for keyword in ('tool', 'function')): + abilities.add('func_call') + return sorted(abilities) + + async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: + del api_key + models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/api/tags' + + async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client: + response = await client.get(models_url) + response.raise_for_status() + payload = response.json() + + models: list[dict[str, typing.Any]] = [] + for item in payload.get('models', []): + model_id = item.get('model') or item.get('name') + if not model_id: + continue + models.append( + { + 'id': model_id, + 'name': item.get('name', model_id), + 'type': self._infer_model_type(model_id), + 'abilities': self._infer_model_abilities(item, model_id), + } + ) + + models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower())) + return { + 'models': models, + 'debug': { + 'request': { + 'method': 'GET', + 'url': models_url, + }, + 'response': payload, + }, + } + + async def _req( + self, + args: dict, + ) -> Union[Mapping[str, Any], AsyncIterator[Mapping[str, Any]]]: + return await self.client.chat(**args) + + async def _closure( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.Message: + args = extra_args.copy() + args['model'] = use_model.model_entity.name + + messages: list[dict] = req_messages.copy() + for msg in messages: + if 'content' in msg and isinstance(msg['content'], list): + text_content: list = [] + image_urls: list = [] + for me in msg['content']: + if me['type'] == 'text': + text_content.append(me['text']) + elif me['type'] == 'image_base64': + image_urls.append(me['image_base64']) + + msg['content'] = '\n'.join(text_content) + msg['images'] = [url.split(',')[1] for url in image_urls] + if 'tool_calls' in msg: # LangBot 内部以 str 存储 tool_calls 的参数,这里需要转换为 dict + for tool_call in msg['tool_calls']: + tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments']) + args['messages'] = messages + + args['tools'] = [] + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + if tools: + args['tools'] = tools + + resp = await self._req(args) + message: provider_message.Message = await self._make_msg(resp) + return message + + async def _make_msg(self, chat_completions: ollama.ChatResponse) -> provider_message.Message: + message: ollama.Message = chat_completions.message + if message is None: + raise ValueError("chat_completions must contain a 'message' field") + + ret_msg: provider_message.Message = None + + if message.content is not None: + ret_msg = provider_message.Message(role='assistant', content=message.content) + if message.tool_calls is not None and len(message.tool_calls) > 0: + tool_calls: list[provider_message.ToolCall] = [] + + for tool_call in message.tool_calls: + tool_calls.append( + provider_message.ToolCall( + id=uuid.uuid4().hex, + type='function', + function=provider_message.FunctionCall( + name=tool_call.function.name, + arguments=json.dumps(tool_call.function.arguments), + ), + ) + ) + ret_msg.tool_calls = tool_calls + + return ret_msg + + async def _prepare_messages( + self, + messages: typing.List[provider_message.Message], + ) -> list[dict]: + """Prepare messages for Ollama API request.""" + req_messages: list = [] + for m in messages: + msg_dict: dict = m.dict(exclude_none=True) + content: Any = msg_dict.get('content') + if isinstance(content, list): + if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): + msg_dict['content'] = '\n'.join(part['text'] for part in content) + req_messages.append(msg_dict) + return req_messages + + async def invoke_llm( + self, + query: pipeline_query.Query, + model: requester.RuntimeLLMModel, + messages: typing.List[provider_message.Message], + funcs: typing.List[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.Message: + req_messages = await self._prepare_messages(messages) + try: + return await self._closure( + query=query, + req_messages=req_messages, + use_model=model, + use_funcs=funcs, + extra_args=extra_args, + remove_think=remove_think, + ) + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + + async def invoke_llm_stream( + self, + query: pipeline_query.Query, + model: requester.RuntimeLLMModel, + messages: typing.List[provider_message.Message], + funcs: typing.List[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.MessageChunk: + req_messages = await self._prepare_messages(messages) + + try: + args = extra_args.copy() + args['model'] = model.model_entity.name + + # Process messages for Ollama format + msgs: list[dict] = req_messages.copy() + for msg in msgs: + if 'content' in msg and isinstance(msg['content'], list): + text_content: list = [] + image_urls: list = [] + for me in msg['content']: + if me['type'] == 'text': + text_content.append(me['text']) + elif me['type'] == 'image_base64': + image_urls.append(me['image_base64']) + msg['content'] = '\n'.join(text_content) + msg['images'] = [url.split(',')[1] for url in image_urls] + if 'tool_calls' in msg: + for tool_call in msg['tool_calls']: + tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments']) + args['messages'] = msgs + + args['tools'] = [] + if funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs) + if tools: + args['tools'] = tools + + args['stream'] = True + + chunk_idx = 0 + thinking_started = False + thinking_ended = False + role = 'assistant' + + async for chunk in await self.client.chat(**args): + message: ollama.Message = chunk.message + done = chunk.done + + delta_content = message.content or '' + reasoning_content = getattr(message, 'thinking', '') or '' + + # Handle reasoning/thinking content + if reasoning_content: + if remove_think: + chunk_idx += 1 + continue + + if not thinking_started: + thinking_started = True + delta_content = '\n' + reasoning_content + else: + delta_content = reasoning_content + elif thinking_started and not thinking_ended and delta_content: + thinking_ended = True + delta_content = '\n\n' + delta_content + + # Handle tool calls + tool_calls_data = None + if message.tool_calls: + tool_calls_data = [] + for tc in message.tool_calls: + tool_calls_data.append( + { + 'id': uuid.uuid4().hex, + 'type': 'function', + 'function': { + 'name': tc.function.name, + 'arguments': json.dumps(tc.function.arguments), + }, + } + ) + + # Skip empty first chunk + if chunk_idx == 0 and not delta_content and not reasoning_content and not tool_calls_data: + chunk_idx += 1 + continue + + chunk_data = { + 'role': role, + 'content': delta_content if delta_content else None, + 'tool_calls': tool_calls_data, + 'is_final': bool(done), + } + chunk_data = {k: v for k, v in chunk_data.items() if v is not None} + + yield provider_message.MessageChunk(**chunk_data) + chunk_idx += 1 + + except asyncio.TimeoutError: + raise errors.RequesterError('请求超时') + + async def invoke_embedding( + self, + model: requester.RuntimeEmbeddingModel, + input_text: list[str], + extra_args: dict[str, typing.Any] = {}, + ) -> list[list[float]]: + return ( + await self.client.embed( + model=model.model_entity.name, + input=input_text, + **extra_args, + ) + ).embeddings diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml index 24f4a0e9..a724f8f8 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml @@ -22,8 +22,11 @@ spec: type: integer required: true default: 120 - litellm_provider: ollama support_type: - llm - text-embedding provider_category: self-hosted +execution: + python: + path: ./ollamachat.py + attr: OllamaChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py new file mode 100644 index 00000000..17b88431 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import typing +import openai + +from . import modelscopechatcmpl + + +class OpenRouterChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions): + """OpenRouter ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://openrouter.ai/api/v1', + 'timeout': 120, + } + + async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: + original_base_url = self.requester_cfg.get('base_url', '') + self.requester_cfg['base_url'] = 'https://openrouter.ai/api/v1' + try: + return await super().scan_models(api_key) + finally: + self.requester_cfg['base_url'] = original_base_url diff --git a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml index 9a386736..71064dc0 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml @@ -22,9 +22,12 @@ spec: type: integer required: true default: 120 - litellm_provider: openrouter support_type: - llm - text-embedding - rerank provider_category: maas +execution: + python: + path: ./openrouterchatcmpl.py + attr: OpenRouterChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py new file mode 100644 index 00000000..1836bd62 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py @@ -0,0 +1,208 @@ +from __future__ import annotations + +import openai +import typing + +from . import chatcmpl +from .. import requester +import openai.types.chat.chat_completion as chat_completion +import re +import langbot_plugin.api.entities.builtin.provider.message as provider_message +import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query +import langbot_plugin.api.entities.builtin.resource.tool as resource_tool + + +class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions): + """欧派云 ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.ppinfra.com/v3/openai', + 'timeout': 120, + } + + is_think: bool = False + + async def _make_msg( + self, + chat_completion: chat_completion.ChatCompletion, + remove_think: bool, + ) -> provider_message.Message: + chatcmpl_message = chat_completion.choices[0].message.model_dump() + # print(chatcmpl_message.keys(), chatcmpl_message.values()) + + # 确保 role 字段存在且不为 None + if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None: + chatcmpl_message['role'] = 'assistant' + + reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None + + # deepseek的reasoner模型 + chatcmpl_message['content'] = await self._process_thinking_content( + chatcmpl_message['content'], reasoning_content, remove_think + ) + + # 移除 reasoning_content 字段,避免传递给 Message + if 'reasoning_content' in chatcmpl_message: + del chatcmpl_message['reasoning_content'] + + message = provider_message.Message(**chatcmpl_message) + + return message + + async def _process_thinking_content( + self, + content: str, + reasoning_content: str = None, + remove_think: bool = False, + ) -> tuple[str, str]: + """处理思维链内容 + + Args: + content: 原始内容 + reasoning_content: reasoning_content 字段内容 + remove_think: 是否移除思维链 + + Returns: + 处理后的内容 + """ + if remove_think: + content = re.sub(r'.*?', '', content, flags=re.DOTALL) + else: + if reasoning_content is not None: + content = '\n' + reasoning_content + '\n\n' + content + return content + + async def _make_msg_chunk( + self, + delta: dict[str, typing.Any], + idx: int, + ) -> provider_message.MessageChunk: + # 处理流式chunk和完整响应的差异 + # print(chat_completion.choices[0]) + + # 确保 role 字段存在且不为 None + if 'role' not in delta or delta['role'] is None: + delta['role'] = 'assistant' + + reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None + + delta['content'] = '' if delta['content'] is None else delta['content'] + # print(reasoning_content) + + # deepseek的reasoner模型 + + if reasoning_content is not None: + delta['content'] += reasoning_content + + message = provider_message.MessageChunk(**delta) + + return message + + async def _closure_stream( + self, + query: pipeline_query.Query, + req_messages: list[dict], + use_model: requester.RuntimeLLMModel, + use_funcs: list[resource_tool.LLMTool] = None, + extra_args: dict[str, typing.Any] = {}, + remove_think: bool = False, + ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]: + self.client.api_key = use_model.provider.token_mgr.get_token() + + args = {} + args['model'] = use_model.model_entity.name + + if use_funcs: + tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) + + if tools: + args['tools'] = tools + + # 设置此次请求中的messages + messages = req_messages.copy() + + # 检查vision + for msg in messages: + if 'content' in msg and isinstance(msg['content'], list): + for me in msg['content']: + if me['type'] == 'image_base64': + me['image_url'] = {'url': me['image_base64']} + me['type'] = 'image_url' + del me['image_base64'] + + args['messages'] = messages + args['stream'] = True + + # tool_calls_map: dict[str, provider_message.ToolCall] = {} + chunk_idx = 0 + thinking_started = False + thinking_ended = False + role = 'assistant' # 默认角色 + async for chunk in self._req_stream(args, extra_body=extra_args): + # 解析 chunk 数据 + if hasattr(chunk, 'choices') and chunk.choices: + choice = chunk.choices[0] + delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} + finish_reason = getattr(choice, 'finish_reason', None) + else: + delta = {} + finish_reason = None + + # 从第一个 chunk 获取 role,后续使用这个 role + if 'role' in delta and delta['role']: + role = delta['role'] + + # 获取增量内容 + delta_content = delta.get('content', '') + # reasoning_content = delta.get('reasoning_content', '') + + if remove_think: + if delta['content'] is not None: + if '' in delta['content'] and not thinking_started and not thinking_ended: + thinking_started = True + continue + elif delta['content'] == r'' and not thinking_ended: + thinking_ended = True + continue + elif thinking_ended and delta['content'] == '\n\n' and thinking_started: + thinking_started = False + continue + elif thinking_started and not thinking_ended: + continue + + # delta_tool_calls = None + if delta.get('tool_calls'): + for tool_call in delta['tool_calls']: + if tool_call['id'] and tool_call['function']['name']: + tool_id = tool_call['id'] + tool_name = tool_call['function']['name'] + + if tool_call['id'] is None: + tool_call['id'] = tool_id + if tool_call['function']['name'] is None: + tool_call['function']['name'] = tool_name + if tool_call['function']['arguments'] is None: + tool_call['function']['arguments'] = '' + if tool_call['type'] is None: + tool_call['type'] = 'function' + + # 跳过空的第一个 chunk(只有 role 没有内容) + if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'): + chunk_idx += 1 + continue + + # 构建 MessageChunk - 只包含增量内容 + chunk_data = { + 'role': role, + 'content': delta_content if delta_content else None, + 'tool_calls': delta.get('tool_calls'), + 'is_final': bool(finish_reason), + } + + # 移除 None 值 + chunk_data = {k: v for k, v in chunk_data.items() if v is not None} + + yield provider_message.MessageChunk(**chunk_data) + chunk_idx += 1 diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml index 38fdb83e..9e8eb1b0 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml @@ -29,8 +29,11 @@ spec: type: int required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding provider_category: maas +execution: + python: + path: ./ppiochatcmpl.py + attr: PPIOChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py new file mode 100644 index 00000000..a68b6896 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import openai +import typing + +from . import chatcmpl + + +class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions): + """启航 AI ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.qhaigc.com/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml index 1b2a25bf..46ae1fad 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml @@ -29,8 +29,11 @@ spec: type: int required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding provider_category: maas +execution: + python: + path: ./qhaigcchatcmpl.py + attr: QHAIGCChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py new file mode 100644 index 00000000..122eaf7d --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import openai +import typing + +from . import chatcmpl +import openai.types.chat.chat_completion as chat_completion + + +class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions): + """胜算云(ModelSpot.AI) ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://router.shengsuanyun.com/api/v1', + 'timeout': 120, + } + + async def _req( + self, + args: dict, + extra_body: dict = {}, + ) -> chat_completion.ChatCompletion: + return await self.client.chat.completions.create( + **args, + extra_body=extra_body, + extra_headers={ + 'HTTP-Referer': 'https://langbot.app', + 'X-Title': 'LangBot', + }, + ) diff --git a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml index 55bf2a3f..77cf682c 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml @@ -29,8 +29,11 @@ spec: type: int required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding provider_category: maas +execution: + python: + path: ./shengsuanyun.py + attr: ShengSuanYunChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py new file mode 100644 index 00000000..3636d9d1 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class SiliconFlowChatCompletions(chatcmpl.OpenAIChatCompletions): + """SiliconFlow ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.siliconflow.cn/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml index 07715db1..11a2ffa3 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml @@ -22,9 +22,12 @@ spec: type: integer required: true default: 120 - litellm_provider: siliconflow support_type: - llm - text-embedding - rerank provider_category: maas +execution: + python: + path: ./siliconflowchatcmpl.py + attr: SiliconFlowChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py new file mode 100644 index 00000000..91740a1f --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class LangBotSpaceChatCompletions(chatcmpl.OpenAIChatCompletions): + """LangBot Space ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.langbot.cloud/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml index 70faea5f..29c23a83 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml @@ -22,8 +22,11 @@ spec: type: integer required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding provider_category: maas +execution: + python: + path: ./spacechatcmpl.py + attr: LangBotSpaceChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml index 712dd490..f160bdea 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml @@ -22,8 +22,11 @@ spec: type: integer required: true default: 120 - litellm_provider: openai support_type: - llm - text-embedding provider_category: maas +execution: + python: + path: ./tokenponychatcmpl.py + attr: TokenPonyChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py new file mode 100644 index 00000000..92311454 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class TokenPonyChatCompletions(chatcmpl.OpenAIChatCompletions): + """TokenPony ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.tokenpony.cn/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py new file mode 100644 index 00000000..7eb68956 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class VolcArkChatCompletions(chatcmpl.OpenAIChatCompletions): + """火山方舟大模型平台 ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://ark.cn-beijing.volces.com/api/v3', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml index 19f79f59..e5c82657 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: openai support_type: - llm provider_category: maas +execution: + python: + path: ./volcarkchatcmpl.py + attr: VolcArkChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml index 0955f4a0..a47b8d47 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: voyage support_type: - rerank provider_category: manufacturer +execution: + python: + path: ./chatcmpl.py + attr: OpenAIChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py new file mode 100644 index 00000000..db2022f1 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class XaiChatCompletions(chatcmpl.OpenAIChatCompletions): + """xAI ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://api.x.ai/v1', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml index 76db8a2f..2e721d70 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: xai support_type: - llm provider_category: manufacturer +execution: + python: + path: ./xaichatcmpl.py + attr: XaiChatCompletions diff --git a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py new file mode 100644 index 00000000..a1a07068 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import typing +import openai + +from . import chatcmpl + + +class ZhipuAIChatCompletions(chatcmpl.OpenAIChatCompletions): + """智谱AI ChatCompletion API 请求器""" + + client: openai.AsyncClient + + default_config: dict[str, typing.Any] = { + 'base_url': 'https://open.bigmodel.cn/api/paas/v4', + 'timeout': 120, + } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml index 632ff1b4..a4ebb2ec 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml @@ -22,7 +22,10 @@ spec: type: integer required: true default: 120 - litellm_provider: zhipu support_type: - llm provider_category: manufacturer +execution: + python: + path: ./zhipuaichatcmpl.py + attr: ZhipuAIChatCompletions