diff --git a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py deleted file mode 100644 index 40a41718..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions): - """302.AI ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.302.ai/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml index e4f70cae..1d8f9a47 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 302.AI icon: 302ai.png spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py deleted file mode 100644 index 1428dc88..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py +++ /dev/null @@ -1,370 +0,0 @@ -from __future__ import annotations - -import typing -import json -import platform -import socket -import anthropic -import httpx - -from .. import errors, requester - -from ....utils import image -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.provider.message as provider_message - - -class AnthropicMessages(requester.ProviderAPIRequester): - """Anthropic Messages API 请求器""" - - client: anthropic.AsyncAnthropic - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.anthropic.com', - 'timeout': 120, - } - - async def initialize(self): - # 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题 - if platform.system() == 'Windows': - if not hasattr(socket, 'TCP_KEEPINTVL'): - socket.TCP_KEEPINTVL = 0 - if not hasattr(socket, 'TCP_KEEPCNT'): - socket.TCP_KEEPCNT = 0 - httpx_client = anthropic._base_client.AsyncHttpxClientWrapper( - base_url=self.requester_cfg['base_url'], - # cast to a valid type because mypy doesn't understand our type narrowing - timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']), - limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS, - follow_redirects=True, - trust_env=True, - ) - - self.client = anthropic.AsyncAnthropic( - api_key='', - http_client=httpx_client, - base_url=self.requester_cfg['base_url'], - ) - - async def invoke_llm( - self, - query: pipeline_query.Query, - model: requester.RuntimeLLMModel, - messages: typing.List[provider_message.Message], - funcs: typing.List[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.Message: - self.client.api_key = model.provider.token_mgr.get_token() - - args = extra_args.copy() - args['model'] = model.model_entity.name - - # 处理消息 - - # system - system_role_message = None - - for i, m in enumerate(messages): - if m.role == 'system': - system_role_message = m - - break - - if system_role_message: - messages.pop(i) - - if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str): - args['system'] = system_role_message.content - - req_messages = [] - - for m in messages: - if m.role == 'tool': - tool_call_id = m.tool_call_id - - req_messages.append( - { - 'role': 'user', - 'content': [ - { - 'type': 'tool_result', - 'tool_use_id': tool_call_id, - 'is_error': False, - 'content': [{'type': 'text', 'text': m.content}], - } - ], - } - ) - - continue - - msg_dict = m.dict(exclude_none=True) - - if isinstance(m.content, str) and m.content.strip() != '': - msg_dict['content'] = [{'type': 'text', 'text': m.content}] - elif isinstance(m.content, list): - for i, ce in enumerate(m.content): - if ce.type == 'image_base64': - image_b64, image_format = await image.extract_b64_and_format(ce.image_base64) - - alter_image_ele = { - 'type': 'image', - 'source': { - 'type': 'base64', - 'media_type': f'image/{image_format}', - 'data': image_b64, - }, - } - msg_dict['content'][i] = alter_image_ele - - if m.tool_calls: - for tool_call in m.tool_calls: - msg_dict['content'].append( - { - 'type': 'tool_use', - 'id': tool_call.id, - 'name': tool_call.function.name, - 'input': json.loads(tool_call.function.arguments), - } - ) - - del msg_dict['tool_calls'] - - req_messages.append(msg_dict) - - args['messages'] = req_messages - - if 'thinking' in args: - args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000} - - if funcs: - tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs) - - if tools: - args['tools'] = tools - - try: - resp = await self.client.messages.create(**args) - - args = { - 'content': '', - 'role': resp.role, - } - assert type(resp) is anthropic.types.message.Message - - for block in resp.content: - if not remove_think and block.type == 'thinking': - args['content'] = '\n' + block.thinking + '\n\n' + args['content'] - elif block.type == 'text': - args['content'] += block.text - elif block.type == 'tool_use': - assert type(block) is anthropic.types.tool_use_block.ToolUseBlock - tool_call = provider_message.ToolCall( - id=block.id, - type='function', - function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)), - ) - if 'tool_calls' not in args: - args['tool_calls'] = [] - args['tool_calls'].append(tool_call) - - return provider_message.Message(**args) - except anthropic.AuthenticationError as e: - raise errors.RequesterError(f'api-key 无效: {e.message}') - except anthropic.BadRequestError as e: - raise errors.RequesterError(str(e.message)) - except anthropic.NotFoundError as e: - if 'model: ' in str(e): - raise errors.RequesterError(f'模型无效: {e.message}') - else: - raise errors.RequesterError(f'请求地址无效: {e.message}') - - async def invoke_llm_stream( - self, - query: pipeline_query.Query, - model: requester.RuntimeLLMModel, - messages: typing.List[provider_message.Message], - funcs: typing.List[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.Message: - self.client.api_key = model.provider.token_mgr.get_token() - - args = extra_args.copy() - args['model'] = model.model_entity.name - args['stream'] = True - - # 处理消息 - - # system - system_role_message = None - - for i, m in enumerate(messages): - if m.role == 'system': - system_role_message = m - - break - - if system_role_message: - messages.pop(i) - - if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str): - args['system'] = system_role_message.content - - req_messages = [] - - for m in messages: - if m.role == 'tool': - tool_call_id = m.tool_call_id - - req_messages.append( - { - 'role': 'user', - 'content': [ - { - 'type': 'tool_result', - 'tool_use_id': tool_call_id, - 'is_error': False, # 暂时直接写false - 'content': [ - {'type': 'text', 'text': m.content} - ], # 这里要是list包裹,应该是多个返回的情况?type类型好像也可以填其他的,暂时只写text - } - ], - } - ) - - continue - - msg_dict = m.dict(exclude_none=True) - - if isinstance(m.content, str) and m.content.strip() != '': - msg_dict['content'] = [{'type': 'text', 'text': m.content}] - elif isinstance(m.content, list): - for i, ce in enumerate(m.content): - if ce.type == 'image_base64': - image_b64, image_format = await image.extract_b64_and_format(ce.image_base64) - - alter_image_ele = { - 'type': 'image', - 'source': { - 'type': 'base64', - 'media_type': f'image/{image_format}', - 'data': image_b64, - }, - } - msg_dict['content'][i] = alter_image_ele - if isinstance(msg_dict['content'], str) and msg_dict['content'] == '': - msg_dict['content'] = [] # 这里不知道为什么会莫名有个空导致content为字符 - if m.tool_calls: - for tool_call in m.tool_calls: - msg_dict['content'].append( - { - 'type': 'tool_use', - 'id': tool_call.id, - 'name': tool_call.function.name, - 'input': json.loads(tool_call.function.arguments), - } - ) - - del msg_dict['tool_calls'] - - req_messages.append(msg_dict) - if 'thinking' in args: - args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000} - - args['messages'] = req_messages - - if funcs: - tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs) - - if tools: - args['tools'] = tools - - try: - role = 'assistant' # 默认角色 - # chunk_idx = 0 - think_started = False - think_ended = False - finish_reason = False - tool_name = '' - tool_id = '' - async for chunk in await self.client.messages.create(**args): - content = '' - tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'} - if isinstance( - chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent - ): # 记录开始 - if chunk.content_block.type == 'tool_use': - if chunk.content_block.name is not None: - tool_name = chunk.content_block.name - if chunk.content_block.id is not None: - tool_id = chunk.content_block.id - - tool_call['function']['name'] = tool_name - tool_call['function']['arguments'] = '' - tool_call['id'] = tool_id - - if not remove_think: - if chunk.content_block.type == 'thinking' and not remove_think: - think_started = True - elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think: - think_ended = True - continue - elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent): - if chunk.delta.type == 'thinking_delta': - if think_started: - think_started = False - content = '\n' + chunk.delta.thinking - elif remove_think: - continue - else: - content = chunk.delta.thinking - elif chunk.delta.type == 'text_delta': - if think_ended: - think_ended = False - content = '\n\n' + chunk.delta.text - else: - content = chunk.delta.text - elif chunk.delta.type == 'input_json_delta': - tool_call['function']['arguments'] = chunk.delta.partial_json - tool_call['function']['name'] = tool_name - tool_call['id'] = tool_id - elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent): - continue # 记录raw_content_block结束的 - - elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent): - if chunk.delta.stop_reason == 'end_turn': - finish_reason = True - elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent): - continue # 这个好像是完全结束 - else: - # print(chunk) - self.ap.logger.debug(f'anthropic chunk: {chunk}') - continue - - args = { - 'content': content, - 'role': role, - 'is_final': finish_reason, - 'tool_calls': None if tool_call['id'] is None else [tool_call], - } - # if chunk_idx == 0: - # chunk_idx += 1 - # continue - - # assert type(chunk) is anthropic.types.message.Chunk - - yield provider_message.MessageChunk(**args) - - # return llm_entities.Message(**args) - except anthropic.AuthenticationError as e: - raise errors.RequesterError(f'api-key 无效: {e.message}') - except anthropic.BadRequestError as e: - raise errors.RequesterError(str(e.message)) - except anthropic.NotFoundError as e: - if 'model: ' in str(e): - raise errors.RequesterError(f'模型无效: {e.message}') - else: - raise errors.RequesterError(f'请求地址无效: {e.message}') diff --git a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml index 0ef60d3e..811ff6a2 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: Anthropic icon: anthropic.svg spec: + litellm_provider: anthropic config: - name: base_url label: @@ -24,6 +25,8 @@ spec: default: 120 support_type: - llm + - text-embedding + - rerank provider_category: manufacturer execution: python: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/baidu.svg b/src/langbot/pkg/provider/modelmgr/requesters/baidu.svg new file mode 100644 index 00000000..a541c95e --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/baidu.svg @@ -0,0 +1,5 @@ + + + Baidu + ERNIE + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/baiduchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/baiduchatcmpl.yaml new file mode 100644 index 00000000..55e7c328 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/baiduchatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: baidu-chat-completions + label: + en_US: Baidu ERNIE + zh_Hans: 百度文心一言 + icon: baidu.svg +spec: + litellm_provider: openai + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py deleted file mode 100644 index 9da6e1b4..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py +++ /dev/null @@ -1,242 +0,0 @@ -from __future__ import annotations - -import typing -import dashscope -import openai - -from . import modelscopechatcmpl -from .. import requester -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.provider.message as provider_message - - -class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions): - """阿里云百炼大模型平台 ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1', - 'timeout': 120, - } - - async def _closure_stream( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages.copy() - - is_use_dashscope_call = False # 是否使用阿里原生库调用 - is_enable_multi_model = True # 是否支持多轮对话 - use_time_num = 0 # 模型已调用次数,防止存在多文件时重复调用 - use_time_ids = [] # 已调用的ID列表 - message_id = 0 # 记录消息序号 - - for msg in messages: - # print(msg) - if 'content' in msg and isinstance(msg['content'], list): - for me in msg['content']: - if me['type'] == 'image_base64': - me['image_url'] = {'url': me['image_base64']} - me['type'] = 'image_url' - del me['image_base64'] - elif me['type'] == 'file_url' and '.' in me.get('file_name', ''): - # 1. 视频文件推理 - # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871 - file_type = me.get('file_name').lower().split('.')[-1] - if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']: - me['type'] = 'video_url' - me['video_url'] = {'url': me['file_url']} - del me['file_url'] - del me['file_name'] - use_time_num += 1 - use_time_ids.append(message_id) - is_enable_multi_model = False - # 2. 语音文件识别, 无法通过openai的audio字段传递,暂时不支持 - # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031 - elif file_type in [ - 'aac', - 'amr', - 'aiff', - 'flac', - 'm4a', - 'mp3', - 'mpeg', - 'ogg', - 'opus', - 'wav', - 'webm', - 'wma', - ]: - me['audio'] = me['file_url'] - me['type'] = 'audio' - del me['file_url'] - del me['type'] - del me['file_name'] - is_use_dashscope_call = True - use_time_num += 1 - use_time_ids.append(message_id) - is_enable_multi_model = False - message_id += 1 - - # 使用列表推导式,保留不在 use_time_ids[:-1] 中的元素,仅保留最后一个多媒体消息 - if not is_enable_multi_model and use_time_num > 1: - messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]] - - if not is_enable_multi_model: - messages = [msg for msg in messages if 'resp_message_id' not in msg] - - args['messages'] = messages - args['stream'] = True - - # 流式处理状态 - # tool_calls_map: dict[str, provider_message.ToolCall] = {} - chunk_idx = 0 - thinking_started = False - thinking_ended = False - role = 'assistant' # 默认角色 - - if is_use_dashscope_call: - response = dashscope.MultiModalConversation.call( - # 若没有配置环境变量,请用百炼API Key将下行替换为:api_key = "sk-xxx" - api_key=use_model.provider.token_mgr.get_token(), - model=use_model.model_entity.name, - messages=messages, - result_format='message', - asr_options={ - # "language": "zh", # 可选,若已知音频的语种,可通过该参数指定待识别语种,以提升识别准确率 - 'enable_lid': True, - 'enable_itn': False, - }, - stream=True, - ) - content_length_list = [] - previous_length = 0 # 记录上一次的内容长度 - for res in response: - chunk = res['output'] - # 解析 chunk 数据 - if hasattr(chunk, 'choices') and chunk.choices: - choice = chunk.choices[0] - delta_content = choice['message'].content[0]['text'] - finish_reason = choice['finish_reason'] - content_length_list.append(len(delta_content)) - else: - delta_content = '' - finish_reason = None - - # 跳过空的第一个 chunk(只有 role 没有内容) - if chunk_idx == 0 and not delta_content: - chunk_idx += 1 - continue - - # 检查 content_length_list 是否有足够的数据 - if len(content_length_list) >= 2: - now_content = delta_content[previous_length : content_length_list[-1]] - previous_length = content_length_list[-1] # 更新上一次的长度 - else: - now_content = delta_content # 第一次循环时直接使用 delta_content - previous_length = len(delta_content) # 更新上一次的长度 - - # 构建 MessageChunk - 只包含增量内容 - chunk_data = { - 'role': role, - 'content': now_content if now_content else None, - 'is_final': bool(finish_reason) and finish_reason != 'null', - } - - # 移除 None 值 - chunk_data = {k: v for k, v in chunk_data.items() if v is not None} - yield provider_message.MessageChunk(**chunk_data) - chunk_idx += 1 - else: - async for chunk in self._req_stream(args, extra_body=extra_args): - # 解析 chunk 数据 - if hasattr(chunk, 'choices') and chunk.choices: - choice = chunk.choices[0] - delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} - finish_reason = getattr(choice, 'finish_reason', None) - else: - delta = {} - finish_reason = None - - # 从第一个 chunk 获取 role,后续使用这个 role - if 'role' in delta and delta['role']: - role = delta['role'] - - # 获取增量内容 - delta_content = delta.get('content', '') - reasoning_content = delta.get('reasoning_content', '') - - # 处理 reasoning_content - if reasoning_content: - # accumulated_reasoning += reasoning_content - # 如果设置了 remove_think,跳过 reasoning_content - if remove_think: - chunk_idx += 1 - continue - - # 第一次出现 reasoning_content,添加 开始标签 - if not thinking_started: - thinking_started = True - delta_content = '\n' + reasoning_content - else: - # 继续输出 reasoning_content - delta_content = reasoning_content - elif thinking_started and not thinking_ended and delta_content: - # reasoning_content 结束,normal content 开始,添加 结束标签 - thinking_ended = True - delta_content = '\n\n' + delta_content - - # 处理工具调用增量 - if delta.get('tool_calls'): - for tool_call in delta['tool_calls']: - if tool_call['id'] != '': - tool_id = tool_call['id'] - if tool_call['function']['name'] is not None: - tool_name = tool_call['function']['name'] - - if tool_call['type'] is None: - tool_call['type'] = 'function' - tool_call['id'] = tool_id - tool_call['function']['name'] = tool_name - tool_call['function']['arguments'] = ( - '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments'] - ) - - # 跳过空的第一个 chunk(只有 role 没有内容) - if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'): - chunk_idx += 1 - continue - - # 构建 MessageChunk - 只包含增量内容 - chunk_data = { - 'role': role, - 'content': delta_content if delta_content else None, - 'tool_calls': delta.get('tool_calls'), - 'is_final': bool(finish_reason), - } - - # 移除 None 值 - chunk_data = {k: v for k, v in chunk_data.items() if v is not None} - - yield provider_message.MessageChunk(**chunk_data) - chunk_idx += 1 - # return diff --git a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml index fc5998c4..15203876 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 阿里云百炼 icon: bailian.png spec: + litellm_provider: openai config: - name: base_url label: @@ -24,6 +25,7 @@ spec: default: 120 support_type: - llm + - text-embedding - rerank provider_category: maas execution: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py deleted file mode 100644 index da24bda0..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py +++ /dev/null @@ -1,702 +0,0 @@ -from __future__ import annotations - -import asyncio -import typing - -import openai -import openai.types.chat.chat_completion as chat_completion_module -import httpx - -from .. import errors, requester -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.provider.message as provider_message - - -class OpenAIChatCompletions(requester.ProviderAPIRequester): - """OpenAI ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.openai.com/v1', - 'timeout': 120, - } - - async def initialize(self): - self.client = openai.AsyncClient( - api_key='', - base_url=self.requester_cfg['base_url'].replace(' ', ''), - timeout=self.requester_cfg['timeout'], - http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']), - ) - - def _mask_api_key(self, api_key: str | None) -> str: - if not api_key: - return '' - if len(api_key) <= 8: - return '****' - return f'{api_key[:4]}...{api_key[-4:]}' - - def _infer_model_type(self, model_id: str) -> str: - normalized_model_id = (model_id or '').lower() - embedding_keywords = ( - 'embedding', - 'embed', - 'bge-', - 'e5-', - 'm3e', - 'gte-', - 'multilingual-e5', - 'text-embedding', - ) - return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm' - - def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]: - normalized_model_id = (model_id or '').lower() - abilities: set[str] = set() - - def _flatten(value: typing.Any) -> list[str]: - if value is None: - return [] - if isinstance(value, str): - return [value.lower()] - if isinstance(value, dict): - flattened: list[str] = [] - for nested_value in value.values(): - flattened.extend(_flatten(nested_value)) - return flattened - if isinstance(value, (list, tuple, set)): - flattened: list[str] = [] - for nested_value in value: - flattened.extend(_flatten(nested_value)) - return flattened - return [str(value).lower()] - - capability_tokens = _flatten(item.get('capabilities')) - capability_tokens.extend(_flatten(item.get('modalities'))) - capability_tokens.extend(_flatten(item.get('input_modalities'))) - capability_tokens.extend(_flatten(item.get('output_modalities'))) - capability_tokens.extend(_flatten(item.get('supported_generation_methods'))) - capability_tokens.extend(_flatten(item.get('supported_parameters'))) - capability_tokens.extend(_flatten(item.get('architecture'))) - - combined_tokens = capability_tokens + [normalized_model_id] - - vision_keywords = ( - 'vision', - 'image', - 'file', - 'video', - 'multimodal', - 'vl', - 'ocr', - 'omni', - ) - function_call_keywords = ( - 'function', - 'tool', - 'tools', - 'tool_choice', - 'tool_call', - 'tool-use', - 'tool_use', - ) - - if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens): - abilities.add('vision') - - if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens): - abilities.add('func_call') - - return sorted(abilities) - - def _normalize_modalities(self, value: typing.Any) -> list[str]: - normalized: list[str] = [] - - def _collect(item: typing.Any): - if item is None: - return - if isinstance(item, str): - for part in item.replace('->', ',').replace('+', ',').split(','): - token = part.strip().lower() - if token and token not in normalized: - normalized.append(token) - return - if isinstance(item, dict): - for nested in item.values(): - _collect(nested) - return - if isinstance(item, (list, tuple, set)): - for nested in item: - _collect(nested) - return - - _collect(value) - return normalized - - def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]: - display_name = item.get('name') - if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id: - display_name = '' - - description = item.get('description') - if not isinstance(description, str) or not description.strip(): - description = '' - - context_length = item.get('context_length') - if context_length is None and isinstance(item.get('top_provider'), dict): - context_length = item['top_provider'].get('context_length') - - if not isinstance(context_length, int): - try: - context_length = int(context_length) if context_length is not None else None - except (TypeError, ValueError): - context_length = None - - input_modalities = self._normalize_modalities(item.get('input_modalities')) - output_modalities = self._normalize_modalities(item.get('output_modalities')) - - if isinstance(item.get('architecture'), dict): - if not input_modalities: - input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities')) - if not output_modalities: - output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities')) - - owned_by = item.get('owned_by') - if not isinstance(owned_by, str) or not owned_by.strip(): - owned_by = '' - - return { - 'display_name': display_name or None, - 'description': description or None, - 'context_length': context_length, - 'owned_by': owned_by or None, - 'input_modalities': input_modalities, - 'output_modalities': output_modalities, - } - - async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: - headers = {} - if api_key: - headers['Authorization'] = f'Bearer {api_key}' - - models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models' - async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client: - response = await client.get(models_url, headers=headers) - response.raise_for_status() - payload = response.json() - - models = [] - for item in payload.get('data', []): - model_id = item.get('id') - if not model_id: - continue - models.append( - { - 'id': model_id, - 'name': model_id, - 'type': self._infer_model_type(model_id), - 'abilities': self._infer_model_abilities(item, model_id), - **self._extract_scan_metadata(item, model_id), - } - ) - - models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower())) - return { - 'models': models, - 'debug': { - 'request': { - 'method': 'GET', - 'url': models_url, - 'headers': { - 'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '', - }, - }, - 'response': payload, - }, - } - - async def _req( - self, - args: dict, - extra_body: dict = {}, - ) -> chat_completion_module.ChatCompletion: - return await self.client.chat.completions.create(**args, extra_body=extra_body) - - async def _req_stream( - self, - args: dict, - extra_body: dict = {}, - ): - async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body): - yield chunk - - async def _make_msg( - self, - chat_completion: chat_completion_module.ChatCompletion, - remove_think: bool = False, - ) -> provider_message.Message: - if not isinstance(chat_completion, chat_completion_module.ChatCompletion): - raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}') - - chatcmpl_message = chat_completion.choices[0].message.model_dump() - - # 确保 role 字段存在且不为 None - if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None: - chatcmpl_message['role'] = 'assistant' - - # 处理思维链 - content = chatcmpl_message.get('content', '') - reasoning_content = chatcmpl_message.get('reasoning_content', None) - - processed_content, _ = await self._process_thinking_content( - content=content, reasoning_content=reasoning_content, remove_think=remove_think - ) - - chatcmpl_message['content'] = processed_content - - # 移除 reasoning_content 字段,避免传递给 Message - if 'reasoning_content' in chatcmpl_message: - del chatcmpl_message['reasoning_content'] - - message = provider_message.Message(**chatcmpl_message) - - return message - - async def _process_thinking_content( - self, - content: str, - reasoning_content: str = None, - remove_think: bool = False, - ) -> tuple[str, str]: - """处理思维链内容 - - Args: - content: 原始内容 - reasoning_content: reasoning_content 字段内容 - remove_think: 是否移除思维链 - - Returns: - (处理后的内容, 提取的思维链内容) - """ - thinking_content = '' - - # 1. 从 reasoning_content 提取思维链 - if reasoning_content: - thinking_content = reasoning_content - - # 2. 从 content 中提取 标签内容 - if content and '' in content and '' in content: - import re - - think_pattern = r'(.*?)' - think_matches = re.findall(think_pattern, content, re.DOTALL) - if think_matches: - # 如果已有 reasoning_content,则追加 - if thinking_content: - thinking_content += '\n' + '\n'.join(think_matches) - else: - thinking_content = '\n'.join(think_matches) - # 移除 content 中的 标签 - content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip() - - # 3. 根据 remove_think 参数决定是否保留思维链 - if remove_think: - return content, '' - else: - # 如果有思维链内容,将其以 格式添加到 content 开头 - if thinking_content: - content = f'\n{thinking_content}\n\n{content}'.strip() - return content, thinking_content - - async def _closure_stream( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.MessageChunk: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages.copy() - - # 检查vision - for msg in messages: - if 'content' in msg and isinstance(msg['content'], list): - for me in msg['content']: - if me['type'] == 'image_base64': - me['image_url'] = {'url': me['image_base64']} - me['type'] = 'image_url' - del me['image_base64'] - - args['messages'] = messages - args['stream'] = True - - # 流式处理状态 - # tool_calls_map: dict[str, provider_message.ToolCall] = {} - chunk_idx = 0 - thinking_started = False - thinking_ended = False - role = 'assistant' # 默认角色 - tool_id = '' - tool_name = '' - # accumulated_reasoning = '' # 仅用于判断何时结束思维链 - - async for chunk in self._req_stream(args, extra_body=extra_args): - # 解析 chunk 数据 - - if hasattr(chunk, 'choices') and chunk.choices: - choice = chunk.choices[0] - delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} - - finish_reason = getattr(choice, 'finish_reason', None) - else: - delta = {} - finish_reason = None - # 从第一个 chunk 获取 role,后续使用这个 role - if 'role' in delta and delta['role']: - role = delta['role'] - - # 获取增量内容 - delta_content = delta.get('content', '') - reasoning_content = delta.get('reasoning_content', '') - - # 处理 reasoning_content - if reasoning_content: - # accumulated_reasoning += reasoning_content - # 如果设置了 remove_think,跳过 reasoning_content - if remove_think: - chunk_idx += 1 - continue - - # 第一次出现 reasoning_content,添加 开始标签 - if not thinking_started: - thinking_started = True - delta_content = '\n' + reasoning_content - else: - # 继续输出 reasoning_content - delta_content = reasoning_content - elif thinking_started and not thinking_ended and delta_content: - # reasoning_content 结束,normal content 开始,添加 结束标签 - thinking_ended = True - delta_content = '\n\n' + delta_content - - # 处理 content 中已有的 标签(如果需要移除) - # if delta_content and remove_think and '' in delta_content: - # import re - # - # # 移除 标签及其内容 - # delta_content = re.sub(r'.*?', '', delta_content, flags=re.DOTALL) - - # 处理工具调用增量 - # delta_tool_calls = None - if delta.get('tool_calls'): - for tool_call in delta['tool_calls']: - if tool_call['id'] and tool_call['function']['name']: - tool_id = tool_call['id'] - tool_name = tool_call['function']['name'] - else: - tool_call['id'] = tool_id - tool_call['function']['name'] = tool_name - if tool_call['type'] is None: - tool_call['type'] = 'function' - - # 跳过空的第一个 chunk(只有 role 没有内容) - if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'): - chunk_idx += 1 - continue - # 构建 MessageChunk - 只包含增量内容 - chunk_data = { - 'role': role, - 'content': delta_content if delta_content else None, - 'tool_calls': delta.get('tool_calls'), - 'is_final': bool(finish_reason), - } - - # 移除 None 值 - chunk_data = {k: v for k, v in chunk_data.items() if v is not None} - - yield provider_message.MessageChunk(**chunk_data) - chunk_idx += 1 - - async def _closure( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> tuple[provider_message.Message, dict]: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages.copy() - - # 检查vision - for msg in messages: - if 'content' in msg and isinstance(msg['content'], list): - for me in msg['content']: - if me['type'] == 'image_base64': - me['image_url'] = {'url': me['image_base64']} - me['type'] = 'image_url' - del me['image_base64'] - - args['messages'] = messages - - # 发送请求 - - resp = await self._req(args, extra_body=extra_args) - # 处理请求结果 - message = await self._make_msg(resp, remove_think) - - # Extract token usage from response - usage_info = {} - if hasattr(resp, 'usage') and resp.usage: - usage_info['input_tokens'] = resp.usage.prompt_tokens or 0 - usage_info['output_tokens'] = resp.usage.completion_tokens or 0 - usage_info['total_tokens'] = resp.usage.total_tokens or 0 - - return message, usage_info - - async def invoke_llm( - self, - query: pipeline_query.Query, - model: requester.RuntimeLLMModel, - messages: typing.List[provider_message.Message], - funcs: typing.List[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> tuple[provider_message.Message, dict]: - """Invoke LLM and return message with usage info""" - req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 - for m in messages: - msg_dict = m.dict(exclude_none=True) - content = msg_dict.get('content') - if isinstance(content, list): - # 检查 content 列表中是否每个部分都是文本 - if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): - # 将所有文本部分合并为一个字符串 - msg_dict['content'] = '\n'.join(part['text'] for part in content) - req_messages.append(msg_dict) - - try: - msg, usage_info = await self._closure( - query=query, - req_messages=req_messages, - use_model=model, - use_funcs=funcs, - extra_args=extra_args, - remove_think=remove_think, - ) - return msg, usage_info - except asyncio.TimeoutError: - raise errors.RequesterError('请求超时') - except openai.BadRequestError as e: - error_message = str(e.message) if hasattr(e, 'message') else str(e) - if 'context_length_exceeded' in str(e): - raise errors.RequesterError(f'上文过长,请重置会话: {error_message}') - else: - raise errors.RequesterError(f'请求参数错误: {error_message}') - except openai.AuthenticationError as e: - error_message = str(e.message) if hasattr(e, 'message') else str(e) - raise errors.RequesterError(f'无效的 api-key: {error_message}') - except openai.NotFoundError as e: - error_message = str(e.message) if hasattr(e, 'message') else str(e) - raise errors.RequesterError(f'请求路径错误: {error_message}') - except openai.RateLimitError as e: - error_message = str(e.message) if hasattr(e, 'message') else str(e) - raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}') - except openai.APIConnectionError as e: - error_message = f'连接错误: {str(e)}' - raise errors.RequesterError(error_message) - except openai.APIError as e: - error_message = str(e.message) if hasattr(e, 'message') else str(e) - raise errors.RequesterError(f'请求错误: {error_message}') - - async def invoke_embedding( - self, - model: requester.RuntimeEmbeddingModel, - input_text: list[str], - extra_args: dict[str, typing.Any] = {}, - ) -> tuple[list[list[float]], dict]: - """调用 Embedding API, returns (embeddings, usage_info)""" - self.client.api_key = model.provider.token_mgr.get_token() - - args = { - 'model': model.model_entity.name, - 'input': input_text, - } - - if model.model_entity.extra_args: - args.update(model.model_entity.extra_args) - - args.update(extra_args) - - try: - resp = await self.client.embeddings.create(**args) - - # Extract usage info - usage_info = {} - if hasattr(resp, 'usage') and resp.usage: - usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0 - usage_info['total_tokens'] = resp.usage.total_tokens or 0 - - return [d.embedding for d in resp.data], usage_info - except asyncio.TimeoutError: - raise errors.RequesterError('请求超时') - except openai.BadRequestError as e: - raise errors.RequesterError(f'请求参数错误: {e.message}') - - async def invoke_llm_stream( - self, - query: pipeline_query.Query, - model: requester.RuntimeLLMModel, - messages: typing.List[provider_message.Message], - funcs: typing.List[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.MessageChunk: - req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 - for m in messages: - msg_dict = m.dict(exclude_none=True) - content = msg_dict.get('content') - if isinstance(content, list): - # 检查 content 列表中是否每个部分都是文本 - if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): - # 将所有文本部分合并为一个字符串 - msg_dict['content'] = '\n'.join(part['text'] for part in content) - req_messages.append(msg_dict) - - try: - async for item in self._closure_stream( - query=query, - req_messages=req_messages, - use_model=model, - use_funcs=funcs, - extra_args=extra_args, - remove_think=remove_think, - ): - yield item - - except asyncio.TimeoutError: - raise errors.RequesterError('请求超时') - except openai.BadRequestError as e: - if 'context_length_exceeded' in e.message: - raise errors.RequesterError(f'上文过长,请重置会话: {e.message}') - else: - raise errors.RequesterError(f'请求参数错误: {e.message}') - except openai.AuthenticationError as e: - raise errors.RequesterError(f'无效的 api-key: {e.message}') - except openai.NotFoundError as e: - raise errors.RequesterError(f'请求路径错误: {e.message}') - except openai.RateLimitError as e: - raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') - except openai.APIError as e: - raise errors.RequesterError(f'请求错误: {e.message}') - - async def invoke_rerank( - self, - model: requester.RuntimeRerankModel, - query: str, - documents: typing.List[str], - extra_args: dict[str, typing.Any] = {}, - ) -> typing.List[dict]: - """Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible) - - Supports extra_args from model.extra_args: - - rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks") - - rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank") - - Any other fields are merged into the request payload. - """ - api_key = model.provider.token_mgr.get_token() - base_url = self.requester_cfg.get('base_url', '').rstrip('/') - timeout = self.requester_cfg.get('timeout', 120) - - merged_args = {} - if model.model_entity.extra_args: - merged_args.update(model.model_entity.extra_args) - if extra_args: - merged_args.update(extra_args) - - rerank_url = merged_args.pop('rerank_url', None) - rerank_path = merged_args.pop('rerank_path', 'rerank') - if not rerank_url: - rerank_url = f'{base_url}/{rerank_path}' - - headers = { - 'Content-Type': 'application/json', - 'Authorization': f'Bearer {api_key}', - } - - payload = { - 'model': model.model_entity.name, - 'query': query, - 'documents': documents[:64], - 'top_n': min(len(documents), 64), - } - - if merged_args: - payload.update(merged_args) - - try: - async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client: - resp = await client.post(rerank_url, headers=headers, json=payload) - resp.raise_for_status() - data = resp.json() - - results = self._parse_rerank_response(data) - - if results: - scores = [r.get('relevance_score', 0.0) for r in results] - min_score = min(scores) - max_score = max(scores) - if max_score - min_score > 1e-6: - for r in results: - r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score) - - return results - except httpx.HTTPStatusError as e: - raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}') - except httpx.TimeoutException: - raise errors.RequesterError('Rerank request timed out') - except Exception as e: - raise errors.RequesterError(f'Rerank request error: {str(e)}') - - @staticmethod - def _parse_rerank_response(data: dict) -> typing.List[dict]: - """Parse rerank response from various providers. - - Handles: - - Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]} - - Voyage AI: {"data": [{"index", "relevance_score"}]} - - DashScope: {"output": {"results": [{"index", "relevance_score"}]}} - """ - if 'results' in data: - return data['results'] - if 'data' in data: - return data['data'] - if 'output' in data and isinstance(data['output'], dict): - return data['output'].get('results', []) - return [] diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml index 21bd6a05..b77a352c 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: OpenAI icon: openai.svg spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml index f1ca209b..504442ea 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: Cohere icon: cohere.svg spec: + litellm_provider: cohere config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py deleted file mode 100644 index d272e721..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions): - """CompShare ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.modelverse.cn/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml index 92fcafdc..8958eb41 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 优云智算 icon: compshare.png spec: + litellm_provider: openai config: - name: base_url label: @@ -24,6 +25,8 @@ spec: default: 120 support_type: - llm + - text-embedding + - rerank provider_category: maas execution: python: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py deleted file mode 100644 index 5bcbd40c..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import annotations - -import typing - -from . import chatcmpl -from .. import errors, requester -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.provider.message as provider_message - - -class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions): - """Deepseek ChatCompletion API 请求器""" - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.deepseek.com', - 'timeout': 120, - } - - async def _closure( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> tuple[provider_message.Message, dict]: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages - - # deepseek 不支持多模态,把content都转换成纯文字 - for m in messages: - if 'content' in m and isinstance(m['content'], list): - m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c]) - - args['messages'] = messages - - # 发送请求 - resp = await self._req(args, extra_body=extra_args) - - # print(resp) - - if resp is None: - raise errors.RequesterError('接口返回为空,请确定模型提供商服务是否正常') - # 处理请求结果 - message = await self._make_msg(resp, remove_think) - - # Extract token usage from response - usage_info = {} - if hasattr(resp, 'usage') and resp.usage: - usage_info['input_tokens'] = resp.usage.prompt_tokens or 0 - usage_info['output_tokens'] = resp.usage.completion_tokens or 0 - usage_info['total_tokens'] = resp.usage.total_tokens or 0 - - return message, usage_info diff --git a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml index 8ef1fcf9..c8da83c7 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: DeepSeek icon: deepseek.svg spec: + litellm_provider: deepseek config: - name: base_url label: @@ -24,6 +25,8 @@ spec: default: 120 support_type: - llm + - text-embedding + - rerank provider_category: manufacturer execution: python: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/doubao.svg b/src/langbot/pkg/provider/modelmgr/requesters/doubao.svg new file mode 100644 index 00000000..e47c7232 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/doubao.svg @@ -0,0 +1,4 @@ + + + 豆包 + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/doubaochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/doubaochatcmpl.yaml new file mode 100644 index 00000000..0b608b96 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/doubaochatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: doubao-chat-completions + label: + en_US: ByteDance Doubao + zh_Hans: 字节豆包 + icon: doubao.svg +spec: + litellm_provider: openai + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://ark.cn-beijing.volces.com/api/v3 + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py deleted file mode 100644 index 956b49f6..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py +++ /dev/null @@ -1,205 +0,0 @@ -from __future__ import annotations - -import typing -import httpx - -from . import chatcmpl - -import uuid - -from .. import requester -import langbot_plugin.api.entities.builtin.provider.message as provider_message -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool - - -class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions): - """Google Gemini API 请求器""" - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai', - 'timeout': 120, - } - - async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: - models_url = 'https://generativelanguage.googleapis.com/v1beta/models' - params = {'key': api_key} if api_key else {} - - all_models: list[dict[str, typing.Any]] = [] - next_page_token = '' - last_payload: dict[str, typing.Any] = {} - - async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client: - while True: - request_params = dict(params) - if next_page_token: - request_params['pageToken'] = next_page_token - - response = await client.get(models_url, params=request_params) - response.raise_for_status() - payload = response.json() - last_payload = payload - - for item in payload.get('models', []): - model_name = item.get('name', '') - model_id = model_name.replace('models/', '', 1) - if not model_id: - continue - - supported_methods = item.get('supportedGenerationMethods', []) or [] - if 'embedContent' in supported_methods and 'generateContent' not in supported_methods: - model_type = 'embedding' - else: - model_type = 'llm' - - all_models.append( - { - 'id': model_id, - 'name': model_id, - 'type': model_type, - 'abilities': self._infer_model_abilities(item, model_id), - 'display_name': item.get('displayName') or None, - 'description': item.get('description') or None, - 'context_length': item.get('inputTokenLimit'), - 'input_modalities': self._normalize_modalities(item.get('inputModalities')), - 'output_modalities': self._normalize_modalities(item.get('outputModalities')), - } - ) - - next_page_token = payload.get('nextPageToken', '') - if not next_page_token: - break - - all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower())) - return { - 'models': all_models, - 'debug': { - 'request': { - 'method': 'GET', - 'url': models_url, - 'query': {'key': self._mask_api_key(api_key)} if api_key else {}, - }, - 'response': last_payload, - }, - } - - async def _closure_stream( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.MessageChunk: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages.copy() - - # 检查vision - for msg in messages: - if 'content' in msg and isinstance(msg['content'], list): - for me in msg['content']: - if me['type'] == 'image_base64': - me['image_url'] = {'url': me['image_base64']} - me['type'] = 'image_url' - del me['image_base64'] - - args['messages'] = messages - args['stream'] = True - - # 流式处理状态 - # tool_calls_map: dict[str, provider_message.ToolCall] = {} - chunk_idx = 0 - thinking_started = False - thinking_ended = False - role = 'assistant' # 默认角色 - tool_id = '' - tool_name = '' - # accumulated_reasoning = '' # 仅用于判断何时结束思维链 - - async for chunk in self._req_stream(args, extra_body=extra_args): - # 解析 chunk 数据 - - if hasattr(chunk, 'choices') and chunk.choices: - choice = chunk.choices[0] - delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} - - finish_reason = getattr(choice, 'finish_reason', None) - else: - delta = {} - finish_reason = None - # 从第一个 chunk 获取 role,后续使用这个 role - if 'role' in delta and delta['role']: - role = delta['role'] - - # 获取增量内容 - delta_content = delta.get('content', '') - reasoning_content = delta.get('reasoning_content', '') - - # 处理 reasoning_content - if reasoning_content: - # accumulated_reasoning += reasoning_content - # 如果设置了 remove_think,跳过 reasoning_content - if remove_think: - chunk_idx += 1 - continue - - # 第一次出现 reasoning_content,添加 开始标签 - if not thinking_started: - thinking_started = True - delta_content = '\n' + reasoning_content - else: - # 继续输出 reasoning_content - delta_content = reasoning_content - elif thinking_started and not thinking_ended and delta_content: - # reasoning_content 结束,normal content 开始,添加 结束标签 - thinking_ended = True - delta_content = '\n\n' + delta_content - - # 处理 content 中已有的 标签(如果需要移除) - # if delta_content and remove_think and '' in delta_content: - # import re - # - # # 移除 标签及其内容 - # delta_content = re.sub(r'.*?', '', delta_content, flags=re.DOTALL) - - # 处理工具调用增量 - # delta_tool_calls = None - if delta.get('tool_calls'): - for tool_call in delta['tool_calls']: - if tool_call['id'] == '' and tool_id == '': - tool_id = str(uuid.uuid4()) - if tool_call['function']['name']: - tool_name = tool_call['function']['name'] - tool_call['id'] = tool_id - tool_call['function']['name'] = tool_name - if tool_call['type'] is None: - tool_call['type'] = 'function' - - # 跳过空的第一个 chunk(只有 role 没有内容) - if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'): - chunk_idx += 1 - continue - # 构建 MessageChunk - 只包含增量内容 - chunk_data = { - 'role': role, - 'content': delta_content if delta_content else None, - 'tool_calls': delta.get('tool_calls'), - 'is_final': bool(finish_reason), - } - - # 移除 None 值 - chunk_data = {k: v for k, v in chunk_data.items() if v is not None} - - yield provider_message.MessageChunk(**chunk_data) - chunk_idx += 1 diff --git a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml index fdebe9b9..7c7ca308 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: Google Gemini icon: gemini.svg spec: + litellm_provider: gemini config: - name: base_url label: @@ -24,6 +25,8 @@ spec: default: 120 support_type: - llm + - text-embedding + - rerank provider_category: manufacturer execution: python: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py deleted file mode 100644 index 4e295e9f..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py +++ /dev/null @@ -1,15 +0,0 @@ -from __future__ import annotations - - -import typing - -from . import ppiochatcmpl - - -class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions): - """Gitee AI ChatCompletions API 请求器""" - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://ai.gitee.com/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml index b7b158a7..f898889a 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: Gitee AI icon: giteeai.svg spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/groq.svg b/src/langbot/pkg/provider/modelmgr/requesters/groq.svg new file mode 100644 index 00000000..7c84ba68 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/groq.svg @@ -0,0 +1,4 @@ + + + Groq + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/groqchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/groqchatcmpl.yaml new file mode 100644 index 00000000..74632d36 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/groqchatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: groq-chat-completions + label: + en_US: Groq + zh_Hans: Groq + icon: groq.svg +spec: + litellm_provider: groq + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://api.groq.com/openai/v1 + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/iflytek.svg b/src/langbot/pkg/provider/modelmgr/requesters/iflytek.svg new file mode 100644 index 00000000..7498b149 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/iflytek.svg @@ -0,0 +1,5 @@ + + + iFlytek + Spark + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/iflytekchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/iflytekchatcmpl.yaml new file mode 100644 index 00000000..a02f38d1 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/iflytekchatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: iflytek-chat-completions + label: + en_US: iFlytek Spark + zh_Hans: 讯飞星火 + icon: iflytek.svg +spec: + litellm_provider: openai + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://spark-api-open.xf-yun.com/v1 + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py deleted file mode 100644 index 305ae21f..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py +++ /dev/null @@ -1,208 +0,0 @@ -from __future__ import annotations - -import openai -import typing - -from . import chatcmpl -from .. import requester -import openai.types.chat.chat_completion as chat_completion -import re -import langbot_plugin.api.entities.builtin.provider.message as provider_message -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool - - -class JieKouAIChatCompletions(chatcmpl.OpenAIChatCompletions): - """接口 AI ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.jiekou.ai/openai', - 'timeout': 120, - } - - is_think: bool = False - - async def _make_msg( - self, - chat_completion: chat_completion.ChatCompletion, - remove_think: bool, - ) -> provider_message.Message: - chatcmpl_message = chat_completion.choices[0].message.model_dump() - # print(chatcmpl_message.keys(), chatcmpl_message.values()) - - # 确保 role 字段存在且不为 None - if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None: - chatcmpl_message['role'] = 'assistant' - - reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None - - # deepseek的reasoner模型 - chatcmpl_message['content'] = await self._process_thinking_content( - chatcmpl_message['content'], reasoning_content, remove_think - ) - - # 移除 reasoning_content 字段,避免传递给 Message - if 'reasoning_content' in chatcmpl_message: - del chatcmpl_message['reasoning_content'] - - message = provider_message.Message(**chatcmpl_message) - - return message - - async def _process_thinking_content( - self, - content: str, - reasoning_content: str = None, - remove_think: bool = False, - ) -> tuple[str, str]: - """处理思维链内容 - - Args: - content: 原始内容 - reasoning_content: reasoning_content 字段内容 - remove_think: 是否移除思维链 - - Returns: - 处理后的内容 - """ - if remove_think: - content = re.sub(r'.*?', '', content, flags=re.DOTALL) - else: - if reasoning_content is not None: - content = '\n' + reasoning_content + '\n\n' + content - return content - - async def _make_msg_chunk( - self, - delta: dict[str, typing.Any], - idx: int, - ) -> provider_message.MessageChunk: - # 处理流式chunk和完整响应的差异 - # print(chat_completion.choices[0]) - - # 确保 role 字段存在且不为 None - if 'role' not in delta or delta['role'] is None: - delta['role'] = 'assistant' - - reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None - - delta['content'] = '' if delta['content'] is None else delta['content'] - # print(reasoning_content) - - # deepseek的reasoner模型 - - if reasoning_content is not None: - delta['content'] += reasoning_content - - message = provider_message.MessageChunk(**delta) - - return message - - async def _closure_stream( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages.copy() - - # 检查vision - for msg in messages: - if 'content' in msg and isinstance(msg['content'], list): - for me in msg['content']: - if me['type'] == 'image_base64': - me['image_url'] = {'url': me['image_base64']} - me['type'] = 'image_url' - del me['image_base64'] - - args['messages'] = messages - args['stream'] = True - - # tool_calls_map: dict[str, provider_message.ToolCall] = {} - chunk_idx = 0 - thinking_started = False - thinking_ended = False - role = 'assistant' # 默认角色 - async for chunk in self._req_stream(args, extra_body=extra_args): - # 解析 chunk 数据 - if hasattr(chunk, 'choices') and chunk.choices: - choice = chunk.choices[0] - delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} - finish_reason = getattr(choice, 'finish_reason', None) - else: - delta = {} - finish_reason = None - - # 从第一个 chunk 获取 role,后续使用这个 role - if 'role' in delta and delta['role']: - role = delta['role'] - - # 获取增量内容 - delta_content = delta.get('content', '') - # reasoning_content = delta.get('reasoning_content', '') - - if remove_think: - if delta['content'] is not None: - if '' in delta['content'] and not thinking_started and not thinking_ended: - thinking_started = True - continue - elif delta['content'] == r'' and not thinking_ended: - thinking_ended = True - continue - elif thinking_ended and delta['content'] == '\n\n' and thinking_started: - thinking_started = False - continue - elif thinking_started and not thinking_ended: - continue - - # delta_tool_calls = None - if delta.get('tool_calls'): - for tool_call in delta['tool_calls']: - if tool_call['id'] and tool_call['function']['name']: - tool_id = tool_call['id'] - tool_name = tool_call['function']['name'] - - if tool_call['id'] is None: - tool_call['id'] = tool_id - if tool_call['function']['name'] is None: - tool_call['function']['name'] = tool_name - if tool_call['function']['arguments'] is None: - tool_call['function']['arguments'] = '' - if tool_call['type'] is None: - tool_call['type'] = 'function' - - # 跳过空的第一个 chunk(只有 role 没有内容) - if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'): - chunk_idx += 1 - continue - - # 构建 MessageChunk - 只包含增量内容 - chunk_data = { - 'role': role, - 'content': delta_content if delta_content else None, - 'tool_calls': delta.get('tool_calls'), - 'is_final': bool(finish_reason), - } - - # 移除 None 值 - chunk_data = {k: v for k, v in chunk_data.items() if v is not None} - - yield provider_message.MessageChunk(**chunk_data) - chunk_idx += 1 diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml index 3c791d73..60ed9840 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 接口 AI icon: jiekouai.png spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml index 3b448e38..87c6b72d 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: Jina icon: jina.svg spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py index ae776e4d..46f81179 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py +++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py @@ -152,6 +152,10 @@ class LiteLLMRequester(requester.ProviderAPIRequester): args['stream'] = True args['stream_options'] = {'include_usage': True} self._build_common_args(args) + + # Apply model-level extra_args first, then call-level extra_args + if model.model_entity.extra_args: + args.update(model.model_entity.extra_args) args.update(extra_args) if funcs: @@ -239,9 +243,15 @@ class LiteLLMRequester(requester.ProviderAPIRequester): delta_content = delta.get('content', '') reasoning_content = delta.get('reasoning_content', '') + # Handle reasoning_content based on remove_think flag if reasoning_content: - chunk_idx += 1 - continue + if remove_think: + # Skip reasoning content when remove_think is True + chunk_idx += 1 + continue + else: + # Use reasoning_content as the displayed content + delta_content = reasoning_content if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'): chunk_idx += 1 diff --git a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py deleted file mode 100644 index c9060c1b..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class LmStudioChatCompletions(chatcmpl.OpenAIChatCompletions): - """LMStudio ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'http://127.0.0.1:1234/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml index 81dc82cf..11570903 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: LM Studio icon: lmstudio.webp spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/mimo.svg b/src/langbot/pkg/provider/modelmgr/requesters/mimo.svg new file mode 100644 index 00000000..5d9b21dc --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/mimo.svg @@ -0,0 +1,4 @@ + + + MiMo + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/mimochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/mimochatcmpl.yaml new file mode 100644 index 00000000..a871145b --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/mimochatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: mimo-chat-completions + label: + en_US: Xiaomi MiMo + zh_Hans: 小米 MiMo + icon: mimo.svg +spec: + litellm_provider: openai + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://api.xiaomimimo.com/v1 + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/minimax.svg b/src/langbot/pkg/provider/modelmgr/requesters/minimax.svg new file mode 100644 index 00000000..1afeadc3 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/minimax.svg @@ -0,0 +1,4 @@ + + + MiniMax + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/minimaxchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/minimaxchatcmpl.yaml new file mode 100644 index 00000000..c92bce69 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/minimaxchatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: minimax-chat-completions + label: + en_US: MiniMax + zh_Hans: MiniMax + icon: minimax.svg +spec: + litellm_provider: openai + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://api.minimax.chat/v1 + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/mistral.svg b/src/langbot/pkg/provider/modelmgr/requesters/mistral.svg new file mode 100644 index 00000000..853022d9 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/mistral.svg @@ -0,0 +1,5 @@ + + + Mistral + AI + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/mistralchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/mistralchatcmpl.yaml new file mode 100644 index 00000000..7d66f599 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/mistralchatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: mistral-chat-completions + label: + en_US: Mistral AI + zh_Hans: Mistral AI + icon: mistral.svg +spec: + litellm_provider: mistral + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://api.mistral.ai/v1 + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py deleted file mode 100644 index ed5d8795..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py +++ /dev/null @@ -1,561 +0,0 @@ -from __future__ import annotations - -import asyncio -import typing - -import openai -import openai.types.chat.chat_completion as chat_completion -import httpx - -from .. import entities, errors, requester -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.provider.message as provider_message - - -class ModelScopeChatCompletions(requester.ProviderAPIRequester): - """ModelScope ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api-inference.modelscope.cn/v1', - 'timeout': 120, - } - - async def initialize(self): - self.client = openai.AsyncClient( - api_key='', - base_url=self.requester_cfg['base_url'], - timeout=self.requester_cfg['timeout'], - http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']), - ) - - def _mask_api_key(self, api_key: str | None) -> str: - if not api_key: - return '' - if len(api_key) <= 8: - return '****' - return f'{api_key[:4]}...{api_key[-4:]}' - - def _infer_model_type(self, model_id: str) -> str: - normalized_model_id = (model_id or '').lower() - embedding_keywords = ( - 'embedding', - 'embed', - 'bge-', - 'e5-', - 'm3e', - 'gte-', - 'multilingual-e5', - 'text-embedding', - ) - return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm' - - def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]: - normalized_model_id = (model_id or '').lower() - abilities: set[str] = set() - - def _flatten(value: typing.Any) -> list[str]: - if value is None: - return [] - if isinstance(value, str): - return [value.lower()] - if isinstance(value, dict): - flattened: list[str] = [] - for nested_value in value.values(): - flattened.extend(_flatten(nested_value)) - return flattened - if isinstance(value, (list, tuple, set)): - flattened: list[str] = [] - for nested_value in value: - flattened.extend(_flatten(nested_value)) - return flattened - return [str(value).lower()] - - capability_tokens = _flatten(item.get('capabilities')) - capability_tokens.extend(_flatten(item.get('modalities'))) - capability_tokens.extend(_flatten(item.get('input_modalities'))) - capability_tokens.extend(_flatten(item.get('output_modalities'))) - capability_tokens.extend(_flatten(item.get('supported_generation_methods'))) - capability_tokens.extend(_flatten(item.get('supported_parameters'))) - capability_tokens.extend(_flatten(item.get('architecture'))) - - combined_tokens = capability_tokens + [normalized_model_id] - - vision_keywords = ('vision', 'image', 'file', 'video', 'multimodal', 'vl', 'ocr', 'omni') - function_call_keywords = ('function', 'tool', 'tools', 'tool_choice', 'tool_call', 'tool-use', 'tool_use') - - if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens): - abilities.add('vision') - - if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens): - abilities.add('func_call') - - return sorted(abilities) - - def _normalize_modalities(self, value: typing.Any) -> list[str]: - normalized: list[str] = [] - - def _collect(item: typing.Any): - if item is None: - return - if isinstance(item, str): - for part in item.replace('->', ',').replace('+', ',').split(','): - token = part.strip().lower() - if token and token not in normalized: - normalized.append(token) - return - if isinstance(item, dict): - for nested in item.values(): - _collect(nested) - return - if isinstance(item, (list, tuple, set)): - for nested in item: - _collect(nested) - return - - _collect(value) - return normalized - - def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]: - display_name = item.get('name') - if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id: - display_name = '' - - description = item.get('description') - if not isinstance(description, str) or not description.strip(): - description = '' - - context_length = item.get('context_length') - if context_length is None and isinstance(item.get('top_provider'), dict): - context_length = item['top_provider'].get('context_length') - - if not isinstance(context_length, int): - try: - context_length = int(context_length) if context_length is not None else None - except (TypeError, ValueError): - context_length = None - - input_modalities = self._normalize_modalities(item.get('input_modalities')) - output_modalities = self._normalize_modalities(item.get('output_modalities')) - - if isinstance(item.get('architecture'), dict): - if not input_modalities: - input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities')) - if not output_modalities: - output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities')) - - owned_by = item.get('owned_by') - if not isinstance(owned_by, str) or not owned_by.strip(): - owned_by = '' - - return { - 'display_name': display_name or None, - 'description': description or None, - 'context_length': context_length, - 'owned_by': owned_by or None, - 'input_modalities': input_modalities, - 'output_modalities': output_modalities, - } - - async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: - headers = {} - if api_key: - headers['Authorization'] = f'Bearer {api_key}' - - models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models' - async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client: - response = await client.get(models_url, headers=headers) - response.raise_for_status() - payload = response.json() - - models = [] - for item in payload.get('data', []): - model_id = item.get('id') - if not model_id: - continue - models.append( - { - 'id': model_id, - 'name': model_id, - 'type': self._infer_model_type(model_id), - 'abilities': self._infer_model_abilities(item, model_id), - **self._extract_scan_metadata(item, model_id), - } - ) - - models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower())) - return { - 'models': models, - 'debug': { - 'request': { - 'method': 'GET', - 'url': models_url, - 'headers': { - 'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '', - }, - }, - 'response': payload, - }, - } - - async def _req( - self, - query: pipeline_query.Query, - args: dict, - extra_body: dict = {}, - remove_think: bool = False, - ) -> list[dict[str, typing.Any]]: - args['stream'] = True - - chunk = None - - pending_content = '' - - tool_calls = [] - - resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body) - - chunk_idx = 0 - thinking_started = False - thinking_ended = False - tool_id = '' - tool_name = '' - message_delta = {} - async for chunk in resp_gen: - if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta: - continue - - delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {} - reasoning_content = delta.get('reasoning_content') - # 处理 reasoning_content - if reasoning_content: - # accumulated_reasoning += reasoning_content - # 如果设置了 remove_think,跳过 reasoning_content - if remove_think: - chunk_idx += 1 - continue - - # 第一次出现 reasoning_content,添加 开始标签 - if not thinking_started: - thinking_started = True - pending_content += '\n' + reasoning_content - else: - # 继续输出 reasoning_content - pending_content += reasoning_content - elif thinking_started and not thinking_ended and delta.get('content'): - # reasoning_content 结束,normal content 开始,添加 结束标签 - thinking_ended = True - pending_content += '\n\n' + delta.get('content') - - if delta.get('content') is not None: - pending_content += delta.get('content') - - if delta.get('tool_calls') is not None: - for tool_call in delta.get('tool_calls'): - if tool_call['id'] != '': - tool_id = tool_call['id'] - if tool_call['function']['name'] is not None: - tool_name = tool_call['function']['name'] - if tool_call['function']['arguments'] is None: - continue - tool_call['id'] = tool_id - tool_call['name'] = tool_name - for tc in tool_calls: - if tc['index'] == tool_call['index']: - tc['function']['arguments'] += tool_call['function']['arguments'] - break - else: - tool_calls.append(tool_call) - - if chunk.choices[0].finish_reason is not None: - break - message_delta['content'] = pending_content - message_delta['role'] = 'assistant' - - message_delta['tool_calls'] = tool_calls if tool_calls else None - return [message_delta] - - async def _make_msg( - self, - chat_completion: list[dict[str, typing.Any]], - ) -> provider_message.Message: - chatcmpl_message = chat_completion[0] - - # 确保 role 字段存在且不为 None - if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None: - chatcmpl_message['role'] = 'assistant' - - message = provider_message.Message(**chatcmpl_message) - - return message - - async def _closure( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> tuple[provider_message.Message, dict]: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages.copy() - - # 检查vision - for msg in messages: - if 'content' in msg and isinstance(msg['content'], list): - for me in msg['content']: - if me['type'] == 'image_base64': - me['image_url'] = {'url': me['image_base64']} - me['type'] = 'image_url' - del me['image_base64'] - - args['messages'] = messages - - # 发送请求 - resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think) - - # 处理请求结果 - message = await self._make_msg(resp) - - # ModelScope uses streaming, usage info not available - usage_info = {} - - return message, usage_info - - async def _req_stream( - self, - args: dict, - extra_body: dict = {}, - ) -> chat_completion.ChatCompletion: - async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body): - yield chunk - - async def _closure_stream( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages.copy() - - # 检查vision - for msg in messages: - if 'content' in msg and isinstance(msg['content'], list): - for me in msg['content']: - if me['type'] == 'image_base64': - me['image_url'] = {'url': me['image_base64']} - me['type'] = 'image_url' - del me['image_base64'] - - args['messages'] = messages - args['stream'] = True - - # 流式处理状态 - # tool_calls_map: dict[str, provider_message.ToolCall] = {} - chunk_idx = 0 - thinking_started = False - thinking_ended = False - role = 'assistant' # 默认角色 - # accumulated_reasoning = '' # 仅用于判断何时结束思维链 - - async for chunk in self._req_stream(args, extra_body=extra_args): - # 解析 chunk 数据 - if hasattr(chunk, 'choices') and chunk.choices: - choice = chunk.choices[0] - delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} - finish_reason = getattr(choice, 'finish_reason', None) - else: - delta = {} - finish_reason = None - - # 从第一个 chunk 获取 role,后续使用这个 role - if 'role' in delta and delta['role']: - role = delta['role'] - - # 获取增量内容 - delta_content = delta.get('content', '') - reasoning_content = delta.get('reasoning_content', '') - - # 处理 reasoning_content - if reasoning_content: - # accumulated_reasoning += reasoning_content - # 如果设置了 remove_think,跳过 reasoning_content - if remove_think: - chunk_idx += 1 - continue - - # 第一次出现 reasoning_content,添加 开始标签 - if not thinking_started: - thinking_started = True - delta_content = '\n' + reasoning_content - else: - # 继续输出 reasoning_content - delta_content = reasoning_content - elif thinking_started and not thinking_ended and delta_content: - # reasoning_content 结束,normal content 开始,添加 结束标签 - thinking_ended = True - delta_content = '\n\n' + delta_content - - # 处理 content 中已有的 标签(如果需要移除) - # if delta_content and remove_think and '' in delta_content: - # import re - # - # # 移除 标签及其内容 - # delta_content = re.sub(r'.*?', '', delta_content, flags=re.DOTALL) - - # 处理工具调用增量 - if delta.get('tool_calls'): - for tool_call in delta['tool_calls']: - if tool_call['id'] != '': - tool_id = tool_call['id'] - if tool_call['function']['name'] is not None: - tool_name = tool_call['function']['name'] - - if tool_call['type'] is None: - tool_call['type'] = 'function' - tool_call['id'] = tool_id - tool_call['function']['name'] = tool_name - tool_call['function']['arguments'] = ( - '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments'] - ) - - # 跳过空的第一个 chunk(只有 role 没有内容) - if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'): - chunk_idx += 1 - continue - - # 构建 MessageChunk - 只包含增量内容 - chunk_data = { - 'role': role, - 'content': delta_content if delta_content else None, - 'tool_calls': delta.get('tool_calls'), - 'is_final': bool(finish_reason), - } - - # 移除 None 值 - chunk_data = {k: v for k, v in chunk_data.items() if v is not None} - - yield provider_message.MessageChunk(**chunk_data) - chunk_idx += 1 - # return - - async def invoke_llm( - self, - query: pipeline_query.Query, - model: entities.LLMModelInfo, - messages: typing.List[provider_message.Message], - funcs: typing.List[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.Message: - req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 - for m in messages: - msg_dict = m.dict(exclude_none=True) - content = msg_dict.get('content') - if isinstance(content, list): - # 检查 content 列表中是否每个部分都是文本 - if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): - # 将所有文本部分合并为一个字符串 - msg_dict['content'] = '\n'.join(part['text'] for part in content) - req_messages.append(msg_dict) - - try: - return await self._closure( - query=query, - req_messages=req_messages, - use_model=model, - use_funcs=funcs, - extra_args=extra_args, - remove_think=remove_think, - ) - except asyncio.TimeoutError: - raise errors.RequesterError('请求超时') - except openai.BadRequestError as e: - if 'context_length_exceeded' in e.message: - raise errors.RequesterError(f'上文过长,请重置会话: {e.message}') - else: - raise errors.RequesterError(f'请求参数错误: {e.message}') - except openai.AuthenticationError as e: - raise errors.RequesterError(f'无效的 api-key: {e.message}') - except openai.NotFoundError as e: - raise errors.RequesterError(f'请求路径错误: {e.message}') - except openai.RateLimitError as e: - raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') - except openai.APIError as e: - raise errors.RequesterError(f'请求错误: {e.message}') - - async def invoke_llm_stream( - self, - query: pipeline_query.Query, - model: requester.RuntimeLLMModel, - messages: typing.List[provider_message.Message], - funcs: typing.List[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.MessageChunk: - req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行 - for m in messages: - msg_dict = m.dict(exclude_none=True) - content = msg_dict.get('content') - if isinstance(content, list): - # 检查 content 列表中是否每个部分都是文本 - if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): - # 将所有文本部分合并为一个字符串 - msg_dict['content'] = '\n'.join(part['text'] for part in content) - req_messages.append(msg_dict) - - try: - async for item in self._closure_stream( - query=query, - req_messages=req_messages, - use_model=model, - use_funcs=funcs, - extra_args=extra_args, - remove_think=remove_think, - ): - yield item - - except asyncio.TimeoutError: - raise errors.RequesterError('请求超时') - except openai.BadRequestError as e: - if 'context_length_exceeded' in e.message: - raise errors.RequesterError(f'上文过长,请重置会话: {e.message}') - else: - raise errors.RequesterError(f'请求参数错误: {e.message}') - except openai.AuthenticationError as e: - raise errors.RequesterError(f'无效的 api-key: {e.message}') - except openai.NotFoundError as e: - raise errors.RequesterError(f'请求路径错误: {e.message}') - except openai.RateLimitError as e: - raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}') - except openai.APIError as e: - raise errors.RequesterError(f'请求错误: {e.message}') diff --git a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml index 8d22002d..35705c2e 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 魔搭社区 icon: modelscope.svg spec: + litellm_provider: openai config: - name: base_url label: @@ -31,6 +32,8 @@ spec: default: 120 support_type: - llm + - text-embedding + - rerank provider_category: maas execution: python: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py deleted file mode 100644 index b6852963..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import annotations - -import typing - - -from . import chatcmpl -from .. import requester -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.provider.message as provider_message - - -class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions): - """Moonshot ChatCompletion API 请求器""" - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.moonshot.cn/v1', - 'timeout': 120, - } - - async def _closure( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> tuple[provider_message.Message, dict]: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages - - # deepseek 不支持多模态,把content都转换成纯文字 - for m in messages: - if 'content' in m and isinstance(m['content'], list): - m['content'] = ' '.join([c['text'] for c in m['content']]) - - # 删除空的,不知道干嘛的,直接删了。 - # messages = [m for m in messages if m["content"].strip() != "" and ('tool_calls' not in m or not m['tool_calls'])] - - args['messages'] = messages - - # 发送请求 - resp = await self._req(args, extra_body=extra_args) - - # 处理请求结果 - message = await self._make_msg(resp, remove_think) - - # Extract token usage from response - usage_info = {} - if hasattr(resp, 'usage') and resp.usage: - usage_info['input_tokens'] = resp.usage.prompt_tokens or 0 - usage_info['output_tokens'] = resp.usage.completion_tokens or 0 - usage_info['total_tokens'] = resp.usage.total_tokens or 0 - - return message, usage_info diff --git a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml index 7a7e3060..81a59e8f 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 月之暗面 icon: moonshot.png spec: + litellm_provider: openai config: - name: base_url label: @@ -24,6 +25,8 @@ spec: default: 120 support_type: - llm + - text-embedding + - rerank provider_category: manufacturer execution: python: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py deleted file mode 100644 index 3c2bd3fb..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions): - """New API ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'http://localhost:3000/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml index e0f44e99..9eaf182c 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: New API icon: newapi.png spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py deleted file mode 100644 index 50f601d7..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py +++ /dev/null @@ -1,314 +0,0 @@ -from __future__ import annotations - -import asyncio -import os -import typing -from typing import Union, Mapping, Any, AsyncIterator -import uuid -import json - -import ollama -import httpx - -from .. import errors, requester -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.provider.message as provider_message - -REQUESTER_NAME: str = 'ollama-chat' - - -class OllamaChatCompletions(requester.ProviderAPIRequester): - """Ollama平台 ChatCompletion API请求器""" - - client: ollama.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'http://127.0.0.1:11434', - 'timeout': 120, - } - - async def initialize(self): - os.environ['OLLAMA_HOST'] = self.requester_cfg['base_url'] - self.client = ollama.AsyncClient(timeout=self.requester_cfg['timeout']) - - def _infer_model_type(self, model_id: str) -> str: - normalized_model_id = (model_id or '').lower() - embedding_keywords = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding') - return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm' - - def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]: - normalized_model_id = (model_id or '').lower() - abilities: set[str] = set() - details = item.get('details', {}) or {} - families = details.get('families', []) or [] - tokens = [normalized_model_id, str(details.get('family', '')).lower()] - tokens.extend(str(family).lower() for family in families) - - if any(keyword in token for token in tokens for keyword in ('vision', 'vl', 'omni', 'llava', 'ocr')): - abilities.add('vision') - if any(keyword in token for token in tokens for keyword in ('tool', 'function')): - abilities.add('func_call') - return sorted(abilities) - - async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: - del api_key - models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/api/tags' - - async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client: - response = await client.get(models_url) - response.raise_for_status() - payload = response.json() - - models: list[dict[str, typing.Any]] = [] - for item in payload.get('models', []): - model_id = item.get('model') or item.get('name') - if not model_id: - continue - models.append( - { - 'id': model_id, - 'name': item.get('name', model_id), - 'type': self._infer_model_type(model_id), - 'abilities': self._infer_model_abilities(item, model_id), - } - ) - - models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower())) - return { - 'models': models, - 'debug': { - 'request': { - 'method': 'GET', - 'url': models_url, - }, - 'response': payload, - }, - } - - async def _req( - self, - args: dict, - ) -> Union[Mapping[str, Any], AsyncIterator[Mapping[str, Any]]]: - return await self.client.chat(**args) - - async def _closure( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.Message: - args = extra_args.copy() - args['model'] = use_model.model_entity.name - - messages: list[dict] = req_messages.copy() - for msg in messages: - if 'content' in msg and isinstance(msg['content'], list): - text_content: list = [] - image_urls: list = [] - for me in msg['content']: - if me['type'] == 'text': - text_content.append(me['text']) - elif me['type'] == 'image_base64': - image_urls.append(me['image_base64']) - - msg['content'] = '\n'.join(text_content) - msg['images'] = [url.split(',')[1] for url in image_urls] - if 'tool_calls' in msg: # LangBot 内部以 str 存储 tool_calls 的参数,这里需要转换为 dict - for tool_call in msg['tool_calls']: - tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments']) - args['messages'] = messages - - args['tools'] = [] - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - if tools: - args['tools'] = tools - - resp = await self._req(args) - message: provider_message.Message = await self._make_msg(resp) - return message - - async def _make_msg(self, chat_completions: ollama.ChatResponse) -> provider_message.Message: - message: ollama.Message = chat_completions.message - if message is None: - raise ValueError("chat_completions must contain a 'message' field") - - ret_msg: provider_message.Message = None - - if message.content is not None: - ret_msg = provider_message.Message(role='assistant', content=message.content) - if message.tool_calls is not None and len(message.tool_calls) > 0: - tool_calls: list[provider_message.ToolCall] = [] - - for tool_call in message.tool_calls: - tool_calls.append( - provider_message.ToolCall( - id=uuid.uuid4().hex, - type='function', - function=provider_message.FunctionCall( - name=tool_call.function.name, - arguments=json.dumps(tool_call.function.arguments), - ), - ) - ) - ret_msg.tool_calls = tool_calls - - return ret_msg - - async def _prepare_messages( - self, - messages: typing.List[provider_message.Message], - ) -> list[dict]: - """Prepare messages for Ollama API request.""" - req_messages: list = [] - for m in messages: - msg_dict: dict = m.dict(exclude_none=True) - content: Any = msg_dict.get('content') - if isinstance(content, list): - if all(isinstance(part, dict) and part.get('type') == 'text' for part in content): - msg_dict['content'] = '\n'.join(part['text'] for part in content) - req_messages.append(msg_dict) - return req_messages - - async def invoke_llm( - self, - query: pipeline_query.Query, - model: requester.RuntimeLLMModel, - messages: typing.List[provider_message.Message], - funcs: typing.List[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.Message: - req_messages = await self._prepare_messages(messages) - try: - return await self._closure( - query=query, - req_messages=req_messages, - use_model=model, - use_funcs=funcs, - extra_args=extra_args, - remove_think=remove_think, - ) - except asyncio.TimeoutError: - raise errors.RequesterError('请求超时') - - async def invoke_llm_stream( - self, - query: pipeline_query.Query, - model: requester.RuntimeLLMModel, - messages: typing.List[provider_message.Message], - funcs: typing.List[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.MessageChunk: - req_messages = await self._prepare_messages(messages) - - try: - args = extra_args.copy() - args['model'] = model.model_entity.name - - # Process messages for Ollama format - msgs: list[dict] = req_messages.copy() - for msg in msgs: - if 'content' in msg and isinstance(msg['content'], list): - text_content: list = [] - image_urls: list = [] - for me in msg['content']: - if me['type'] == 'text': - text_content.append(me['text']) - elif me['type'] == 'image_base64': - image_urls.append(me['image_base64']) - msg['content'] = '\n'.join(text_content) - msg['images'] = [url.split(',')[1] for url in image_urls] - if 'tool_calls' in msg: - for tool_call in msg['tool_calls']: - tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments']) - args['messages'] = msgs - - args['tools'] = [] - if funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs) - if tools: - args['tools'] = tools - - args['stream'] = True - - chunk_idx = 0 - thinking_started = False - thinking_ended = False - role = 'assistant' - - async for chunk in await self.client.chat(**args): - message: ollama.Message = chunk.message - done = chunk.done - - delta_content = message.content or '' - reasoning_content = getattr(message, 'thinking', '') or '' - - # Handle reasoning/thinking content - if reasoning_content: - if remove_think: - chunk_idx += 1 - continue - - if not thinking_started: - thinking_started = True - delta_content = '\n' + reasoning_content - else: - delta_content = reasoning_content - elif thinking_started and not thinking_ended and delta_content: - thinking_ended = True - delta_content = '\n\n' + delta_content - - # Handle tool calls - tool_calls_data = None - if message.tool_calls: - tool_calls_data = [] - for tc in message.tool_calls: - tool_calls_data.append( - { - 'id': uuid.uuid4().hex, - 'type': 'function', - 'function': { - 'name': tc.function.name, - 'arguments': json.dumps(tc.function.arguments), - }, - } - ) - - # Skip empty first chunk - if chunk_idx == 0 and not delta_content and not reasoning_content and not tool_calls_data: - chunk_idx += 1 - continue - - chunk_data = { - 'role': role, - 'content': delta_content if delta_content else None, - 'tool_calls': tool_calls_data, - 'is_final': bool(done), - } - chunk_data = {k: v for k, v in chunk_data.items() if v is not None} - - yield provider_message.MessageChunk(**chunk_data) - chunk_idx += 1 - - except asyncio.TimeoutError: - raise errors.RequesterError('请求超时') - - async def invoke_embedding( - self, - model: requester.RuntimeEmbeddingModel, - input_text: list[str], - extra_args: dict[str, typing.Any] = {}, - ) -> list[list[float]]: - return ( - await self.client.embed( - model=model.model_entity.name, - input=input_text, - **extra_args, - ) - ).embeddings diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml index a724f8f8..51bb6332 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: Ollama icon: ollama.svg spec: + litellm_provider: ollama config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py deleted file mode 100644 index 17b88431..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import modelscopechatcmpl - - -class OpenRouterChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions): - """OpenRouter ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://openrouter.ai/api/v1', - 'timeout': 120, - } - - async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]: - original_base_url = self.requester_cfg.get('base_url', '') - self.requester_cfg['base_url'] = 'https://openrouter.ai/api/v1' - try: - return await super().scan_models(api_key) - finally: - self.requester_cfg['base_url'] = original_base_url diff --git a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml index 71064dc0..296a8811 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: OpenRouter icon: openrouter.svg spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py deleted file mode 100644 index 1836bd62..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py +++ /dev/null @@ -1,208 +0,0 @@ -from __future__ import annotations - -import openai -import typing - -from . import chatcmpl -from .. import requester -import openai.types.chat.chat_completion as chat_completion -import re -import langbot_plugin.api.entities.builtin.provider.message as provider_message -import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query -import langbot_plugin.api.entities.builtin.resource.tool as resource_tool - - -class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions): - """欧派云 ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.ppinfra.com/v3/openai', - 'timeout': 120, - } - - is_think: bool = False - - async def _make_msg( - self, - chat_completion: chat_completion.ChatCompletion, - remove_think: bool, - ) -> provider_message.Message: - chatcmpl_message = chat_completion.choices[0].message.model_dump() - # print(chatcmpl_message.keys(), chatcmpl_message.values()) - - # 确保 role 字段存在且不为 None - if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None: - chatcmpl_message['role'] = 'assistant' - - reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None - - # deepseek的reasoner模型 - chatcmpl_message['content'] = await self._process_thinking_content( - chatcmpl_message['content'], reasoning_content, remove_think - ) - - # 移除 reasoning_content 字段,避免传递给 Message - if 'reasoning_content' in chatcmpl_message: - del chatcmpl_message['reasoning_content'] - - message = provider_message.Message(**chatcmpl_message) - - return message - - async def _process_thinking_content( - self, - content: str, - reasoning_content: str = None, - remove_think: bool = False, - ) -> tuple[str, str]: - """处理思维链内容 - - Args: - content: 原始内容 - reasoning_content: reasoning_content 字段内容 - remove_think: 是否移除思维链 - - Returns: - 处理后的内容 - """ - if remove_think: - content = re.sub(r'.*?', '', content, flags=re.DOTALL) - else: - if reasoning_content is not None: - content = '\n' + reasoning_content + '\n\n' + content - return content - - async def _make_msg_chunk( - self, - delta: dict[str, typing.Any], - idx: int, - ) -> provider_message.MessageChunk: - # 处理流式chunk和完整响应的差异 - # print(chat_completion.choices[0]) - - # 确保 role 字段存在且不为 None - if 'role' not in delta or delta['role'] is None: - delta['role'] = 'assistant' - - reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None - - delta['content'] = '' if delta['content'] is None else delta['content'] - # print(reasoning_content) - - # deepseek的reasoner模型 - - if reasoning_content is not None: - delta['content'] += reasoning_content - - message = provider_message.MessageChunk(**delta) - - return message - - async def _closure_stream( - self, - query: pipeline_query.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[resource_tool.LLMTool] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]: - self.client.api_key = use_model.provider.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages.copy() - - # 检查vision - for msg in messages: - if 'content' in msg and isinstance(msg['content'], list): - for me in msg['content']: - if me['type'] == 'image_base64': - me['image_url'] = {'url': me['image_base64']} - me['type'] = 'image_url' - del me['image_base64'] - - args['messages'] = messages - args['stream'] = True - - # tool_calls_map: dict[str, provider_message.ToolCall] = {} - chunk_idx = 0 - thinking_started = False - thinking_ended = False - role = 'assistant' # 默认角色 - async for chunk in self._req_stream(args, extra_body=extra_args): - # 解析 chunk 数据 - if hasattr(chunk, 'choices') and chunk.choices: - choice = chunk.choices[0] - delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {} - finish_reason = getattr(choice, 'finish_reason', None) - else: - delta = {} - finish_reason = None - - # 从第一个 chunk 获取 role,后续使用这个 role - if 'role' in delta and delta['role']: - role = delta['role'] - - # 获取增量内容 - delta_content = delta.get('content', '') - # reasoning_content = delta.get('reasoning_content', '') - - if remove_think: - if delta['content'] is not None: - if '' in delta['content'] and not thinking_started and not thinking_ended: - thinking_started = True - continue - elif delta['content'] == r'' and not thinking_ended: - thinking_ended = True - continue - elif thinking_ended and delta['content'] == '\n\n' and thinking_started: - thinking_started = False - continue - elif thinking_started and not thinking_ended: - continue - - # delta_tool_calls = None - if delta.get('tool_calls'): - for tool_call in delta['tool_calls']: - if tool_call['id'] and tool_call['function']['name']: - tool_id = tool_call['id'] - tool_name = tool_call['function']['name'] - - if tool_call['id'] is None: - tool_call['id'] = tool_id - if tool_call['function']['name'] is None: - tool_call['function']['name'] = tool_name - if tool_call['function']['arguments'] is None: - tool_call['function']['arguments'] = '' - if tool_call['type'] is None: - tool_call['type'] = 'function' - - # 跳过空的第一个 chunk(只有 role 没有内容) - if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'): - chunk_idx += 1 - continue - - # 构建 MessageChunk - 只包含增量内容 - chunk_data = { - 'role': role, - 'content': delta_content if delta_content else None, - 'tool_calls': delta.get('tool_calls'), - 'is_final': bool(finish_reason), - } - - # 移除 None 值 - chunk_data = {k: v for k, v in chunk_data.items() if v is not None} - - yield provider_message.MessageChunk(**chunk_data) - chunk_idx += 1 diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml index 9e8eb1b0..46a0dbae 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 派欧云 icon: ppio.svg spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py deleted file mode 100644 index a68b6896..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import openai -import typing - -from . import chatcmpl - - -class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions): - """启航 AI ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.qhaigc.com/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml index 46ae1fad..d5e59d6e 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 启航 AI icon: qhaigc.png spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py deleted file mode 100644 index 122eaf7d..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import annotations - -import openai -import typing - -from . import chatcmpl -import openai.types.chat.chat_completion as chat_completion - - -class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions): - """胜算云(ModelSpot.AI) ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://router.shengsuanyun.com/api/v1', - 'timeout': 120, - } - - async def _req( - self, - args: dict, - extra_body: dict = {}, - ) -> chat_completion.ChatCompletion: - return await self.client.chat.completions.create( - **args, - extra_body=extra_body, - extra_headers={ - 'HTTP-Referer': 'https://langbot.app', - 'X-Title': 'LangBot', - }, - ) diff --git a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml index 77cf682c..7e0797f0 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 胜算云 icon: shengsuanyun.svg spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py deleted file mode 100644 index 3636d9d1..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class SiliconFlowChatCompletions(chatcmpl.OpenAIChatCompletions): - """SiliconFlow ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.siliconflow.cn/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml index 11a2ffa3..915cd537 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 硅基流动 icon: siliconflow.svg spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py deleted file mode 100644 index 91740a1f..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class LangBotSpaceChatCompletions(chatcmpl.OpenAIChatCompletions): - """LangBot Space ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.langbot.cloud/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml index 29c23a83..5dd2693e 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: Space icon: space.webp spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tencent.svg b/src/langbot/pkg/provider/modelmgr/requesters/tencent.svg new file mode 100644 index 00000000..de32c1bf --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/tencent.svg @@ -0,0 +1,5 @@ + + + Tencent + Hunyuan + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tencentchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/tencentchatcmpl.yaml new file mode 100644 index 00000000..98130765 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/tencentchatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: tencent-chat-completions + label: + en_US: Tencent Hunyuan + zh_Hans: 腾讯混元 + icon: tencent.svg +spec: + litellm_provider: openai + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://hunyuan.tencentcloudapi.com/v1 + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/together.svg b/src/langbot/pkg/provider/modelmgr/requesters/together.svg new file mode 100644 index 00000000..b6ce0f80 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/together.svg @@ -0,0 +1,5 @@ + + + Together + AI + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/togetherchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/togetherchatcmpl.yaml new file mode 100644 index 00000000..90fe61c7 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/togetherchatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: together-chat-completions + label: + en_US: Together AI + zh_Hans: Together AI + icon: together.svg +spec: + litellm_provider: together_ai + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://api.together.xyz/v1 + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml index f160bdea..89afb1f7 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 小马算力 icon: tokenpony.svg spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py deleted file mode 100644 index 92311454..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class TokenPonyChatCompletions(chatcmpl.OpenAIChatCompletions): - """TokenPony ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.tokenpony.cn/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py deleted file mode 100644 index 7eb68956..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class VolcArkChatCompletions(chatcmpl.OpenAIChatCompletions): - """火山方舟大模型平台 ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://ark.cn-beijing.volces.com/api/v3', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml index e5c82657..5e4ab111 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 火山方舟 icon: volcark.svg spec: + litellm_provider: openai config: - name: base_url label: @@ -24,6 +25,8 @@ spec: default: 120 support_type: - llm + - text-embedding + - rerank provider_category: maas execution: python: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml index a47b8d47..f35f949c 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: Voyage AI icon: voyageai.svg spec: + litellm_provider: openai config: - name: base_url label: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py deleted file mode 100644 index db2022f1..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class XaiChatCompletions(chatcmpl.OpenAIChatCompletions): - """xAI ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://api.x.ai/v1', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml index 2e721d70..379a9da0 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: xAI icon: xai.svg spec: + litellm_provider: openai config: - name: base_url label: @@ -24,6 +25,8 @@ spec: default: 120 support_type: - llm + - text-embedding + - rerank provider_category: manufacturer execution: python: diff --git a/src/langbot/pkg/provider/modelmgr/requesters/yi.svg b/src/langbot/pkg/provider/modelmgr/requesters/yi.svg new file mode 100644 index 00000000..8dc5e827 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/yi.svg @@ -0,0 +1,5 @@ + + + 01.AI + Yi + diff --git a/src/langbot/pkg/provider/modelmgr/requesters/yichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/yichatcmpl.yaml new file mode 100644 index 00000000..2617fc30 --- /dev/null +++ b/src/langbot/pkg/provider/modelmgr/requesters/yichatcmpl.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: LLMAPIRequester +metadata: + name: yi-chat-completions + label: + en_US: 01.AI Yi + zh_Hans: 零一万物 + icon: yi.svg +spec: + litellm_provider: openai + config: + - name: base_url + label: + en_US: Base URL + zh_Hans: 基础 URL + type: string + required: true + default: https://api.lingyiwanwu.com/v1 + - name: timeout + label: + en_US: Timeout + zh_Hans: 超时时间 + type: integer + required: true + default: 120 + support_type: + - llm + - text-embedding + - rerank + provider_category: manufacturer diff --git a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py deleted file mode 100644 index a1a07068..00000000 --- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -import typing -import openai - -from . import chatcmpl - - -class ZhipuAIChatCompletions(chatcmpl.OpenAIChatCompletions): - """智谱AI ChatCompletion API 请求器""" - - client: openai.AsyncClient - - default_config: dict[str, typing.Any] = { - 'base_url': 'https://open.bigmodel.cn/api/paas/v4', - 'timeout': 120, - } diff --git a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml index a4ebb2ec..31bec259 100644 --- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml +++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml @@ -7,6 +7,7 @@ metadata: zh_Hans: 智谱 AI icon: zhipuai.svg spec: + litellm_provider: openai config: - name: base_url label: @@ -24,6 +25,8 @@ spec: default: 120 support_type: - llm + - text-embedding + - rerank provider_category: manufacturer execution: python: diff --git a/web/src/app/home/components/models-dialog/component/provider-form/ProviderForm.tsx b/web/src/app/home/components/models-dialog/component/provider-form/ProviderForm.tsx index c596037a..91dac280 100644 --- a/web/src/app/home/components/models-dialog/component/provider-form/ProviderForm.tsx +++ b/web/src/app/home/components/models-dialog/component/provider-form/ProviderForm.tsx @@ -1,4 +1,4 @@ -import { useEffect, useState } from 'react'; +import { useEffect, useState, useRef, useCallback } from 'react'; import { httpClient } from '@/app/infra/http/HttpClient'; import { zodResolver } from '@hookform/resolvers/zod'; @@ -16,19 +16,12 @@ import { FormMessage, } from '@/components/ui/form'; import { Input } from '@/components/ui/input'; -import { - Select, - SelectContent, - SelectGroup, - SelectItem, - SelectLabel, - SelectTrigger, - SelectValue, -} from '@/components/ui/select'; import { DialogFooter } from '@/components/ui/dialog'; import { toast } from 'sonner'; import { extractI18nObject } from '@/i18n/I18nProvider'; import { CustomApiError } from '@/app/infra/entities/common'; +import { cn } from '@/lib/utils'; +import { Check, ChevronDown, Search } from 'lucide-react'; const getFormSchema = (t: (key: string) => string) => z.object({ @@ -71,6 +64,10 @@ export default function ProviderForm({ description: string; }[] >([]); + const [searchQuery, setSearchQuery] = useState(''); + const [isOpen, setIsOpen] = useState(false); + const dropdownRef = useRef(null); + const searchInputRef = useRef(null); useEffect(() => { async function init() { @@ -82,6 +79,54 @@ export default function ProviderForm({ init(); }, [providerId]); + // Close dropdown when clicking outside + useEffect(() => { + function handleClickOutside(event: MouseEvent) { + if ( + dropdownRef.current && + !dropdownRef.current.contains(event.target as Node) + ) { + setIsOpen(false); + setSearchQuery(''); + } + } + document.addEventListener('mousedown', handleClickOutside); + return () => document.removeEventListener('mousedown', handleClickOutside); + }, []); + + // Focus search input when dropdown opens + useEffect(() => { + if (isOpen && searchInputRef.current) { + searchInputRef.current.focus(); + } + }, [isOpen]); + + // Filter requesters based on search query + const filteredRequesters = requesterList.filter( + (r) => + r.label.toLowerCase().includes(searchQuery.toLowerCase()) || + r.value.toLowerCase().includes(searchQuery.toLowerCase()), + ); + + // Group filtered requesters by category + const groupedRequesters = { + builtin: filteredRequesters.filter((r) => r.category === 'builtin'), + manufacturer: filteredRequesters.filter( + (r) => r.category === 'manufacturer', + ), + maas: filteredRequesters.filter((r) => r.category === 'maas'), + 'self-hosted': filteredRequesters.filter( + (r) => r.category === 'self-hosted', + ), + }; + + const categoryLabels: Record = { + builtin: t('models.builtin'), + manufacturer: t('models.modelManufacturer'), + maas: t('models.aggregationPlatform'), + 'self-hosted': t('models.selfDeployed'), + }; + async function loadRequesters() { const resp = await httpClient.getProviderRequesters(); setRequesterList( @@ -168,17 +213,16 @@ export default function ProviderForm({ {t('models.requester')} * - + + + + {/* Dropdown */} + {isOpen && ( +
+ {/* Search input */} +
+ + setSearchQuery(e.target.value)} + className="flex h-10 w-full rounded-md bg-transparent py-3 text-sm outline-none placeholder:text-muted-foreground" + /> +
+ + {/* Options list */} +
+ {Object.entries(groupedRequesters).map( + ([category, items]) => { + if (items.length === 0) return null; + return ( +
+
+ {categoryLabels[category]} +
+ {items.map((r) => ( + + ))} +
+ ); + }, + )} + {filteredRequesters.length === 0 && ( +
+ No results found. +
+ )} +
+
+ )} + {selectedRequester?.description && (

diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts index 2313b723..2e723f85 100644 --- a/web/src/i18n/locales/en-US.ts +++ b/web/src/i18n/locales/en-US.ts @@ -258,6 +258,7 @@ const enUS = { selectProvider: 'Select Provider', requester: 'Provider Type', selectRequester: 'Select Provider Type', + searchProviders: 'Search providers...', langbotModelsDescription: 'Cloud models powered by LangBot Space', credits: 'Credits', loginWithSpace: 'Login with Space', diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts index 5c885d5d..6271fb58 100644 --- a/web/src/i18n/locales/zh-Hans.ts +++ b/web/src/i18n/locales/zh-Hans.ts @@ -248,6 +248,7 @@ const zhHans = { selectProvider: '选择供应商', requester: '供应商类型', selectRequester: '选择供应商类型', + searchProviders: '搜索供应商...', langbotModelsDescription: 'LangBot Space 提供的云端模型', credits: '积分', loginWithSpace: '通过 Space 登录',