restore: restore deleted provider requester files

Restore individual provider requester implementations that were removed in de61b5d3. These files coexist with the unified litellmchat.py backend. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 03:55:55 +00:00 · 2026-05-04 12:19:18 +08:00
parent d170bdd343
commit 8dd16aac51
51 changed files with 3311 additions and 28 deletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """302.AI ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.302.ai/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
@@ -22,9 +22,12 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: maas
+execution:
+  python:
+    path: ./302aichatcmpl.py
+    attr: AI302ChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
@@ -0,0 +1,370 @@
+from __future__ import annotations
+
+import typing
+import json
+import platform
+import socket
+import anthropic
+import httpx
+
+from .. import errors, requester
+
+from ....utils import image
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class AnthropicMessages(requester.ProviderAPIRequester):
+    """Anthropic Messages API 请求器"""
+
+    client: anthropic.AsyncAnthropic
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.anthropic.com',
+        'timeout': 120,
+    }
+
+    async def initialize(self):
+        # 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题
+        if platform.system() == 'Windows':
+            if not hasattr(socket, 'TCP_KEEPINTVL'):
+                socket.TCP_KEEPINTVL = 0
+            if not hasattr(socket, 'TCP_KEEPCNT'):
+                socket.TCP_KEEPCNT = 0
+        httpx_client = anthropic._base_client.AsyncHttpxClientWrapper(
+            base_url=self.requester_cfg['base_url'],
+            # cast to a valid type because mypy doesn't understand our type narrowing
+            timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']),
+            limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS,
+            follow_redirects=True,
+            trust_env=True,
+        )
+
+        self.client = anthropic.AsyncAnthropic(
+            api_key='',
+            http_client=httpx_client,
+            base_url=self.requester_cfg['base_url'],
+        )
+
+    async def invoke_llm(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message:
+        self.client.api_key = model.provider.token_mgr.get_token()
+
+        args = extra_args.copy()
+        args['model'] = model.model_entity.name
+
+        # 处理消息
+
+        # system
+        system_role_message = None
+
+        for i, m in enumerate(messages):
+            if m.role == 'system':
+                system_role_message = m
+
+                break
+
+        if system_role_message:
+            messages.pop(i)
+
+        if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
+            args['system'] = system_role_message.content
+
+        req_messages = []
+
+        for m in messages:
+            if m.role == 'tool':
+                tool_call_id = m.tool_call_id
+
+                req_messages.append(
+                    {
+                        'role': 'user',
+                        'content': [
+                            {
+                                'type': 'tool_result',
+                                'tool_use_id': tool_call_id,
+                                'is_error': False,
+                                'content': [{'type': 'text', 'text': m.content}],
+                            }
+                        ],
+                    }
+                )
+
+                continue
+
+            msg_dict = m.dict(exclude_none=True)
+
+            if isinstance(m.content, str) and m.content.strip() != '':
+                msg_dict['content'] = [{'type': 'text', 'text': m.content}]
+            elif isinstance(m.content, list):
+                for i, ce in enumerate(m.content):
+                    if ce.type == 'image_base64':
+                        image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
+
+                        alter_image_ele = {
+                            'type': 'image',
+                            'source': {
+                                'type': 'base64',
+                                'media_type': f'image/{image_format}',
+                                'data': image_b64,
+                            },
+                        }
+                        msg_dict['content'][i] = alter_image_ele
+
+            if m.tool_calls:
+                for tool_call in m.tool_calls:
+                    msg_dict['content'].append(
+                        {
+                            'type': 'tool_use',
+                            'id': tool_call.id,
+                            'name': tool_call.function.name,
+                            'input': json.loads(tool_call.function.arguments),
+                        }
+                    )
+
+                del msg_dict['tool_calls']
+
+            req_messages.append(msg_dict)
+
+        args['messages'] = req_messages
+
+        if 'thinking' in args:
+            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
+
+        if funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        try:
+            resp = await self.client.messages.create(**args)
+
+            args = {
+                'content': '',
+                'role': resp.role,
+            }
+            assert type(resp) is anthropic.types.message.Message
+
+            for block in resp.content:
+                if not remove_think and block.type == 'thinking':
+                    args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
+                elif block.type == 'text':
+                    args['content'] += block.text
+                elif block.type == 'tool_use':
+                    assert type(block) is anthropic.types.tool_use_block.ToolUseBlock
+                    tool_call = provider_message.ToolCall(
+                        id=block.id,
+                        type='function',
+                        function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)),
+                    )
+                    if 'tool_calls' not in args:
+                        args['tool_calls'] = []
+                    args['tool_calls'].append(tool_call)
+
+            return provider_message.Message(**args)
+        except anthropic.AuthenticationError as e:
+            raise errors.RequesterError(f'api-key 无效: {e.message}')
+        except anthropic.BadRequestError as e:
+            raise errors.RequesterError(str(e.message))
+        except anthropic.NotFoundError as e:
+            if 'model: ' in str(e):
+                raise errors.RequesterError(f'模型无效: {e.message}')
+            else:
+                raise errors.RequesterError(f'请求地址无效: {e.message}')
+
+    async def invoke_llm_stream(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message:
+        self.client.api_key = model.provider.token_mgr.get_token()
+
+        args = extra_args.copy()
+        args['model'] = model.model_entity.name
+        args['stream'] = True
+
+        # 处理消息
+
+        # system
+        system_role_message = None
+
+        for i, m in enumerate(messages):
+            if m.role == 'system':
+                system_role_message = m
+
+                break
+
+        if system_role_message:
+            messages.pop(i)
+
+        if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
+            args['system'] = system_role_message.content
+
+        req_messages = []
+
+        for m in messages:
+            if m.role == 'tool':
+                tool_call_id = m.tool_call_id
+
+                req_messages.append(
+                    {
+                        'role': 'user',
+                        'content': [
+                            {
+                                'type': 'tool_result',
+                                'tool_use_id': tool_call_id,
+                                'is_error': False,  # 暂时直接写false
+                                'content': [
+                                    {'type': 'text', 'text': m.content}
+                                ],  # 这里要是list包裹，应该是多个返回的情况？type类型好像也可以填其他的，暂时只写text
+                            }
+                        ],
+                    }
+                )
+
+                continue
+
+            msg_dict = m.dict(exclude_none=True)
+
+            if isinstance(m.content, str) and m.content.strip() != '':
+                msg_dict['content'] = [{'type': 'text', 'text': m.content}]
+            elif isinstance(m.content, list):
+                for i, ce in enumerate(m.content):
+                    if ce.type == 'image_base64':
+                        image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
+
+                        alter_image_ele = {
+                            'type': 'image',
+                            'source': {
+                                'type': 'base64',
+                                'media_type': f'image/{image_format}',
+                                'data': image_b64,
+                            },
+                        }
+                        msg_dict['content'][i] = alter_image_ele
+            if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
+                msg_dict['content'] = []  # 这里不知道为什么会莫名有个空导致content为字符
+            if m.tool_calls:
+                for tool_call in m.tool_calls:
+                    msg_dict['content'].append(
+                        {
+                            'type': 'tool_use',
+                            'id': tool_call.id,
+                            'name': tool_call.function.name,
+                            'input': json.loads(tool_call.function.arguments),
+                        }
+                    )
+
+                del msg_dict['tool_calls']
+
+            req_messages.append(msg_dict)
+        if 'thinking' in args:
+            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
+
+        args['messages'] = req_messages
+
+        if funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        try:
+            role = 'assistant'  # 默认角色
+            # chunk_idx = 0
+            think_started = False
+            think_ended = False
+            finish_reason = False
+            tool_name = ''
+            tool_id = ''
+            async for chunk in await self.client.messages.create(**args):
+                content = ''
+                tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
+                if isinstance(
+                    chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
+                ):  # 记录开始
+                    if chunk.content_block.type == 'tool_use':
+                        if chunk.content_block.name is not None:
+                            tool_name = chunk.content_block.name
+                        if chunk.content_block.id is not None:
+                            tool_id = chunk.content_block.id
+
+                        tool_call['function']['name'] = tool_name
+                        tool_call['function']['arguments'] = ''
+                        tool_call['id'] = tool_id
+
+                    if not remove_think:
+                        if chunk.content_block.type == 'thinking' and not remove_think:
+                            think_started = True
+                        elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
+                            think_ended = True
+                        continue
+                elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
+                    if chunk.delta.type == 'thinking_delta':
+                        if think_started:
+                            think_started = False
+                            content = '<think>\n' + chunk.delta.thinking
+                        elif remove_think:
+                            continue
+                        else:
+                            content = chunk.delta.thinking
+                    elif chunk.delta.type == 'text_delta':
+                        if think_ended:
+                            think_ended = False
+                            content = '\n</think>\n' + chunk.delta.text
+                        else:
+                            content = chunk.delta.text
+                    elif chunk.delta.type == 'input_json_delta':
+                        tool_call['function']['arguments'] = chunk.delta.partial_json
+                        tool_call['function']['name'] = tool_name
+                        tool_call['id'] = tool_id
+                elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
+                    continue  # 记录raw_content_block结束的
+
+                elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
+                    if chunk.delta.stop_reason == 'end_turn':
+                        finish_reason = True
+                elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
+                    continue  # 这个好像是完全结束
+                else:
+                    # print(chunk)
+                    self.ap.logger.debug(f'anthropic chunk: {chunk}')
+                    continue
+
+                args = {
+                    'content': content,
+                    'role': role,
+                    'is_final': finish_reason,
+                    'tool_calls': None if tool_call['id'] is None else [tool_call],
+                }
+                # if chunk_idx == 0:
+                #     chunk_idx += 1
+                #     continue
+
+                # assert type(chunk) is anthropic.types.message.Chunk
+
+                yield provider_message.MessageChunk(**args)
+
+            # return llm_entities.Message(**args)
+        except anthropic.AuthenticationError as e:
+            raise errors.RequesterError(f'api-key 无效: {e.message}')
+        except anthropic.BadRequestError as e:
+            raise errors.RequesterError(str(e.message))
+        except anthropic.NotFoundError as e:
+            if 'model: ' in str(e):
+                raise errors.RequesterError(f'模型无效: {e.message}')
+            else:
+                raise errors.RequesterError(f'请求地址无效: {e.message}')
--- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: anthropic
  support_type:
  - llm
  provider_category: manufacturer
+execution:
+  python:
+    path: ./anthropicmsgs.py
+    attr: AnthropicMessages
--- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
@@ -0,0 +1,242 @@
+from __future__ import annotations
+
+import typing
+import dashscope
+import openai
+
+from . import modelscopechatcmpl
+from .. import requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
+    """阿里云百炼大模型平台 ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
+        'timeout': 120,
+    }
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        is_use_dashscope_call = False  # 是否使用阿里原生库调用
+        is_enable_multi_model = True  # 是否支持多轮对话
+        use_time_num = 0  # 模型已调用次数，防止存在多文件时重复调用
+        use_time_ids = []  # 已调用的ID列表
+        message_id = 0  # 记录消息序号
+
+        for msg in messages:
+            # print(msg)
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+                    elif me['type'] == 'file_url' and '.' in me.get('file_name', ''):
+                        # 1. 视频文件推理
+                        # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871
+                        file_type = me.get('file_name').lower().split('.')[-1]
+                        if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']:
+                            me['type'] = 'video_url'
+                            me['video_url'] = {'url': me['file_url']}
+                            del me['file_url']
+                            del me['file_name']
+                            use_time_num += 1
+                            use_time_ids.append(message_id)
+                            is_enable_multi_model = False
+                        # 2. 语音文件识别, 无法通过openai的audio字段传递，暂时不支持
+                        # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031
+                        elif file_type in [
+                            'aac',
+                            'amr',
+                            'aiff',
+                            'flac',
+                            'm4a',
+                            'mp3',
+                            'mpeg',
+                            'ogg',
+                            'opus',
+                            'wav',
+                            'webm',
+                            'wma',
+                        ]:
+                            me['audio'] = me['file_url']
+                            me['type'] = 'audio'
+                            del me['file_url']
+                            del me['type']
+                            del me['file_name']
+                            is_use_dashscope_call = True
+                            use_time_num += 1
+                            use_time_ids.append(message_id)
+                            is_enable_multi_model = False
+            message_id += 1
+
+        # 使用列表推导式，保留不在 use_time_ids[:-1] 中的元素，仅保留最后一个多媒体消息
+        if not is_enable_multi_model and use_time_num > 1:
+            messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]]
+
+        if not is_enable_multi_model:
+            messages = [msg for msg in messages if 'resp_message_id' not in msg]
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # 流式处理状态
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+
+        if is_use_dashscope_call:
+            response = dashscope.MultiModalConversation.call(
+                # 若没有配置环境变量，请用百炼API Key将下行替换为：api_key = "sk-xxx"
+                api_key=use_model.provider.token_mgr.get_token(),
+                model=use_model.model_entity.name,
+                messages=messages,
+                result_format='message',
+                asr_options={
+                    # "language": "zh", # 可选，若已知音频的语种，可通过该参数指定待识别语种，以提升识别准确率
+                    'enable_lid': True,
+                    'enable_itn': False,
+                },
+                stream=True,
+            )
+            content_length_list = []
+            previous_length = 0  # 记录上一次的内容长度
+            for res in response:
+                chunk = res['output']
+                # 解析 chunk 数据
+                if hasattr(chunk, 'choices') and chunk.choices:
+                    choice = chunk.choices[0]
+                    delta_content = choice['message'].content[0]['text']
+                    finish_reason = choice['finish_reason']
+                    content_length_list.append(len(delta_content))
+                else:
+                    delta_content = ''
+                    finish_reason = None
+
+                # 跳过空的第一个 chunk（只有 role 没有内容）
+                if chunk_idx == 0 and not delta_content:
+                    chunk_idx += 1
+                    continue
+
+                # 检查 content_length_list 是否有足够的数据
+                if len(content_length_list) >= 2:
+                    now_content = delta_content[previous_length : content_length_list[-1]]
+                    previous_length = content_length_list[-1]  # 更新上一次的长度
+                else:
+                    now_content = delta_content  # 第一次循环时直接使用 delta_content
+                    previous_length = len(delta_content)  # 更新上一次的长度
+
+                # 构建 MessageChunk - 只包含增量内容
+                chunk_data = {
+                    'role': role,
+                    'content': now_content if now_content else None,
+                    'is_final': bool(finish_reason) and finish_reason != 'null',
+                }
+
+                # 移除 None 值
+                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+                yield provider_message.MessageChunk(**chunk_data)
+                chunk_idx += 1
+        else:
+            async for chunk in self._req_stream(args, extra_body=extra_args):
+                # 解析 chunk 数据
+                if hasattr(chunk, 'choices') and chunk.choices:
+                    choice = chunk.choices[0]
+                    delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+                    finish_reason = getattr(choice, 'finish_reason', None)
+                else:
+                    delta = {}
+                    finish_reason = None
+
+                # 从第一个 chunk 获取 role，后续使用这个 role
+                if 'role' in delta and delta['role']:
+                    role = delta['role']
+
+                # 获取增量内容
+                delta_content = delta.get('content', '')
+                reasoning_content = delta.get('reasoning_content', '')
+
+                # 处理 reasoning_content
+                if reasoning_content:
+                    # accumulated_reasoning += reasoning_content
+                    # 如果设置了 remove_think，跳过 reasoning_content
+                    if remove_think:
+                        chunk_idx += 1
+                        continue
+
+                    # 第一次出现 reasoning_content，添加 <think> 开始标签
+                    if not thinking_started:
+                        thinking_started = True
+                        delta_content = '<think>\n' + reasoning_content
+                    else:
+                        # 继续输出 reasoning_content
+                        delta_content = reasoning_content
+                elif thinking_started and not thinking_ended and delta_content:
+                    # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
+                    thinking_ended = True
+                    delta_content = '\n</think>\n' + delta_content
+
+                # 处理工具调用增量
+                if delta.get('tool_calls'):
+                    for tool_call in delta['tool_calls']:
+                        if tool_call['id'] != '':
+                            tool_id = tool_call['id']
+                        if tool_call['function']['name'] is not None:
+                            tool_name = tool_call['function']['name']
+
+                        if tool_call['type'] is None:
+                            tool_call['type'] = 'function'
+                        tool_call['id'] = tool_id
+                        tool_call['function']['name'] = tool_name
+                        tool_call['function']['arguments'] = (
+                            '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
+                        )
+
+                # 跳过空的第一个 chunk（只有 role 没有内容）
+                if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+                    chunk_idx += 1
+                    continue
+
+                # 构建 MessageChunk - 只包含增量内容
+                chunk_data = {
+                    'role': role,
+                    'content': delta_content if delta_content else None,
+                    'tool_calls': delta.get('tool_calls'),
+                    'is_final': bool(finish_reason),
+                }
+
+                # 移除 None 值
+                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+                yield provider_message.MessageChunk(**chunk_data)
+                chunk_idx += 1
+                # return
--- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
@@ -22,8 +22,11 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - rerank
  provider_category: maas
+execution:
+  python:
+    path: ./bailianchatcmpl.py
+    attr: BailianChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
@@ -0,0 +1,702 @@
+from __future__ import annotations
+
+import asyncio
+import typing
+
+import openai
+import openai.types.chat.chat_completion as chat_completion_module
+import httpx
+
+from .. import errors, requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class OpenAIChatCompletions(requester.ProviderAPIRequester):
+    """OpenAI ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.openai.com/v1',
+        'timeout': 120,
+    }
+
+    async def initialize(self):
+        self.client = openai.AsyncClient(
+            api_key='',
+            base_url=self.requester_cfg['base_url'].replace(' ', ''),
+            timeout=self.requester_cfg['timeout'],
+            http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
+        )
+
+    def _mask_api_key(self, api_key: str | None) -> str:
+        if not api_key:
+            return ''
+        if len(api_key) <= 8:
+            return '****'
+        return f'{api_key[:4]}...{api_key[-4:]}'
+
+    def _infer_model_type(self, model_id: str) -> str:
+        normalized_model_id = (model_id or '').lower()
+        embedding_keywords = (
+            'embedding',
+            'embed',
+            'bge-',
+            'e5-',
+            'm3e',
+            'gte-',
+            'multilingual-e5',
+            'text-embedding',
+        )
+        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
+
+    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
+        normalized_model_id = (model_id or '').lower()
+        abilities: set[str] = set()
+
+        def _flatten(value: typing.Any) -> list[str]:
+            if value is None:
+                return []
+            if isinstance(value, str):
+                return [value.lower()]
+            if isinstance(value, dict):
+                flattened: list[str] = []
+                for nested_value in value.values():
+                    flattened.extend(_flatten(nested_value))
+                return flattened
+            if isinstance(value, (list, tuple, set)):
+                flattened: list[str] = []
+                for nested_value in value:
+                    flattened.extend(_flatten(nested_value))
+                return flattened
+            return [str(value).lower()]
+
+        capability_tokens = _flatten(item.get('capabilities'))
+        capability_tokens.extend(_flatten(item.get('modalities')))
+        capability_tokens.extend(_flatten(item.get('input_modalities')))
+        capability_tokens.extend(_flatten(item.get('output_modalities')))
+        capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
+        capability_tokens.extend(_flatten(item.get('supported_parameters')))
+        capability_tokens.extend(_flatten(item.get('architecture')))
+
+        combined_tokens = capability_tokens + [normalized_model_id]
+
+        vision_keywords = (
+            'vision',
+            'image',
+            'file',
+            'video',
+            'multimodal',
+            'vl',
+            'ocr',
+            'omni',
+        )
+        function_call_keywords = (
+            'function',
+            'tool',
+            'tools',
+            'tool_choice',
+            'tool_call',
+            'tool-use',
+            'tool_use',
+        )
+
+        if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
+            abilities.add('vision')
+
+        if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
+            abilities.add('func_call')
+
+        return sorted(abilities)
+
+    def _normalize_modalities(self, value: typing.Any) -> list[str]:
+        normalized: list[str] = []
+
+        def _collect(item: typing.Any):
+            if item is None:
+                return
+            if isinstance(item, str):
+                for part in item.replace('->', ',').replace('+', ',').split(','):
+                    token = part.strip().lower()
+                    if token and token not in normalized:
+                        normalized.append(token)
+                return
+            if isinstance(item, dict):
+                for nested in item.values():
+                    _collect(nested)
+                return
+            if isinstance(item, (list, tuple, set)):
+                for nested in item:
+                    _collect(nested)
+                return
+
+        _collect(value)
+        return normalized
+
+    def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
+        display_name = item.get('name')
+        if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
+            display_name = ''
+
+        description = item.get('description')
+        if not isinstance(description, str) or not description.strip():
+            description = ''
+
+        context_length = item.get('context_length')
+        if context_length is None and isinstance(item.get('top_provider'), dict):
+            context_length = item['top_provider'].get('context_length')
+
+        if not isinstance(context_length, int):
+            try:
+                context_length = int(context_length) if context_length is not None else None
+            except (TypeError, ValueError):
+                context_length = None
+
+        input_modalities = self._normalize_modalities(item.get('input_modalities'))
+        output_modalities = self._normalize_modalities(item.get('output_modalities'))
+
+        if isinstance(item.get('architecture'), dict):
+            if not input_modalities:
+                input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
+            if not output_modalities:
+                output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
+
+        owned_by = item.get('owned_by')
+        if not isinstance(owned_by, str) or not owned_by.strip():
+            owned_by = ''
+
+        return {
+            'display_name': display_name or None,
+            'description': description or None,
+            'context_length': context_length,
+            'owned_by': owned_by or None,
+            'input_modalities': input_modalities,
+            'output_modalities': output_modalities,
+        }
+
+    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+        headers = {}
+        if api_key:
+            headers['Authorization'] = f'Bearer {api_key}'
+
+        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
+        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
+            response = await client.get(models_url, headers=headers)
+            response.raise_for_status()
+            payload = response.json()
+
+        models = []
+        for item in payload.get('data', []):
+            model_id = item.get('id')
+            if not model_id:
+                continue
+            models.append(
+                {
+                    'id': model_id,
+                    'name': model_id,
+                    'type': self._infer_model_type(model_id),
+                    'abilities': self._infer_model_abilities(item, model_id),
+                    **self._extract_scan_metadata(item, model_id),
+                }
+            )
+
+        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
+        return {
+            'models': models,
+            'debug': {
+                'request': {
+                    'method': 'GET',
+                    'url': models_url,
+                    'headers': {
+                        'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
+                    },
+                },
+                'response': payload,
+            },
+        }
+
+    async def _req(
+        self,
+        args: dict,
+        extra_body: dict = {},
+    ) -> chat_completion_module.ChatCompletion:
+        return await self.client.chat.completions.create(**args, extra_body=extra_body)
+
+    async def _req_stream(
+        self,
+        args: dict,
+        extra_body: dict = {},
+    ):
+        async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
+            yield chunk
+
+    async def _make_msg(
+        self,
+        chat_completion: chat_completion_module.ChatCompletion,
+        remove_think: bool = False,
+    ) -> provider_message.Message:
+        if not isinstance(chat_completion, chat_completion_module.ChatCompletion):
+            raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}')
+
+        chatcmpl_message = chat_completion.choices[0].message.model_dump()
+
+        # 确保 role 字段存在且不为 None
+        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
+            chatcmpl_message['role'] = 'assistant'
+
+        # 处理思维链
+        content = chatcmpl_message.get('content', '')
+        reasoning_content = chatcmpl_message.get('reasoning_content', None)
+
+        processed_content, _ = await self._process_thinking_content(
+            content=content, reasoning_content=reasoning_content, remove_think=remove_think
+        )
+
+        chatcmpl_message['content'] = processed_content
+
+        # 移除 reasoning_content 字段，避免传递给 Message
+        if 'reasoning_content' in chatcmpl_message:
+            del chatcmpl_message['reasoning_content']
+
+        message = provider_message.Message(**chatcmpl_message)
+
+        return message
+
+    async def _process_thinking_content(
+        self,
+        content: str,
+        reasoning_content: str = None,
+        remove_think: bool = False,
+    ) -> tuple[str, str]:
+        """处理思维链内容
+
+        Args:
+            content: 原始内容
+            reasoning_content: reasoning_content 字段内容
+            remove_think: 是否移除思维链
+
+        Returns:
+            (处理后的内容, 提取的思维链内容)
+        """
+        thinking_content = ''
+
+        # 1. 从 reasoning_content 提取思维链
+        if reasoning_content:
+            thinking_content = reasoning_content
+
+        # 2. 从 content 中提取 <think> 标签内容
+        if content and '<think>' in content and '</think>' in content:
+            import re
+
+            think_pattern = r'<think>(.*?)</think>'
+            think_matches = re.findall(think_pattern, content, re.DOTALL)
+            if think_matches:
+                # 如果已有 reasoning_content，则追加
+                if thinking_content:
+                    thinking_content += '\n' + '\n'.join(think_matches)
+                else:
+                    thinking_content = '\n'.join(think_matches)
+                # 移除 content 中的 <think> 标签
+                content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
+
+        # 3. 根据 remove_think 参数决定是否保留思维链
+        if remove_think:
+            return content, ''
+        else:
+            # 如果有思维链内容，将其以 <think> 格式添加到 content 开头
+            if thinking_content:
+                content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
+            return content, thinking_content
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # 流式处理状态
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+        tool_id = ''
+        tool_name = ''
+        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
+
+        async for chunk in self._req_stream(args, extra_body=extra_args):
+            # 解析 chunk 数据
+
+            if hasattr(chunk, 'choices') and chunk.choices:
+                choice = chunk.choices[0]
+                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+
+                finish_reason = getattr(choice, 'finish_reason', None)
+            else:
+                delta = {}
+                finish_reason = None
+            # 从第一个 chunk 获取 role，后续使用这个 role
+            if 'role' in delta and delta['role']:
+                role = delta['role']
+
+            # 获取增量内容
+            delta_content = delta.get('content', '')
+            reasoning_content = delta.get('reasoning_content', '')
+
+            # 处理 reasoning_content
+            if reasoning_content:
+                # accumulated_reasoning += reasoning_content
+                # 如果设置了 remove_think，跳过 reasoning_content
+                if remove_think:
+                    chunk_idx += 1
+                    continue
+
+                # 第一次出现 reasoning_content，添加 <think> 开始标签
+                if not thinking_started:
+                    thinking_started = True
+                    delta_content = '<think>\n' + reasoning_content
+                else:
+                    # 继续输出 reasoning_content
+                    delta_content = reasoning_content
+            elif thinking_started and not thinking_ended and delta_content:
+                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
+                thinking_ended = True
+                delta_content = '\n</think>\n' + delta_content
+
+            # 处理 content 中已有的 <think> 标签（如果需要移除）
+            # if delta_content and remove_think and '<think>' in delta_content:
+            #     import re
+            #
+            #     # 移除 <think> 标签及其内容
+            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
+
+            # 处理工具调用增量
+            # delta_tool_calls = None
+            if delta.get('tool_calls'):
+                for tool_call in delta['tool_calls']:
+                    if tool_call['id'] and tool_call['function']['name']:
+                        tool_id = tool_call['id']
+                        tool_name = tool_call['function']['name']
+                    else:
+                        tool_call['id'] = tool_id
+                        tool_call['function']['name'] = tool_name
+                    if tool_call['type'] is None:
+                        tool_call['type'] = 'function'
+
+            # 跳过空的第一个 chunk（只有 role 没有内容）
+            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+                chunk_idx += 1
+                continue
+            # 构建 MessageChunk - 只包含增量内容
+            chunk_data = {
+                'role': role,
+                'content': delta_content if delta_content else None,
+                'tool_calls': delta.get('tool_calls'),
+                'is_final': bool(finish_reason),
+            }
+
+            # 移除 None 值
+            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+            yield provider_message.MessageChunk(**chunk_data)
+            chunk_idx += 1
+
+    async def _closure(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> tuple[provider_message.Message, dict]:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+
+        # 发送请求
+
+        resp = await self._req(args, extra_body=extra_args)
+        # 处理请求结果
+        message = await self._make_msg(resp, remove_think)
+
+        # Extract token usage from response
+        usage_info = {}
+        if hasattr(resp, 'usage') and resp.usage:
+            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
+            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
+            usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+        return message, usage_info
+
+    async def invoke_llm(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> tuple[provider_message.Message, dict]:
+        """Invoke LLM and return message with usage info"""
+        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
+        for m in messages:
+            msg_dict = m.dict(exclude_none=True)
+            content = msg_dict.get('content')
+            if isinstance(content, list):
+                # 检查 content 列表中是否每个部分都是文本
+                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+                    # 将所有文本部分合并为一个字符串
+                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
+            req_messages.append(msg_dict)
+
+        try:
+            msg, usage_info = await self._closure(
+                query=query,
+                req_messages=req_messages,
+                use_model=model,
+                use_funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
+            )
+            return msg, usage_info
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+        except openai.BadRequestError as e:
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            if 'context_length_exceeded' in str(e):
+                raise errors.RequesterError(f'上文过长，请重置会话: {error_message}')
+            else:
+                raise errors.RequesterError(f'请求参数错误: {error_message}')
+        except openai.AuthenticationError as e:
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            raise errors.RequesterError(f'无效的 api-key: {error_message}')
+        except openai.NotFoundError as e:
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            raise errors.RequesterError(f'请求路径错误: {error_message}')
+        except openai.RateLimitError as e:
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
+        except openai.APIConnectionError as e:
+            error_message = f'连接错误: {str(e)}'
+            raise errors.RequesterError(error_message)
+        except openai.APIError as e:
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            raise errors.RequesterError(f'请求错误: {error_message}')
+
+    async def invoke_embedding(
+        self,
+        model: requester.RuntimeEmbeddingModel,
+        input_text: list[str],
+        extra_args: dict[str, typing.Any] = {},
+    ) -> tuple[list[list[float]], dict]:
+        """调用 Embedding API, returns (embeddings, usage_info)"""
+        self.client.api_key = model.provider.token_mgr.get_token()
+
+        args = {
+            'model': model.model_entity.name,
+            'input': input_text,
+        }
+
+        if model.model_entity.extra_args:
+            args.update(model.model_entity.extra_args)
+
+        args.update(extra_args)
+
+        try:
+            resp = await self.client.embeddings.create(**args)
+
+            # Extract usage info
+            usage_info = {}
+            if hasattr(resp, 'usage') and resp.usage:
+                usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
+                usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+            return [d.embedding for d in resp.data], usage_info
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+        except openai.BadRequestError as e:
+            raise errors.RequesterError(f'请求参数错误: {e.message}')
+
+    async def invoke_llm_stream(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
+        for m in messages:
+            msg_dict = m.dict(exclude_none=True)
+            content = msg_dict.get('content')
+            if isinstance(content, list):
+                # 检查 content 列表中是否每个部分都是文本
+                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+                    # 将所有文本部分合并为一个字符串
+                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
+            req_messages.append(msg_dict)
+
+        try:
+            async for item in self._closure_stream(
+                query=query,
+                req_messages=req_messages,
+                use_model=model,
+                use_funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
+            ):
+                yield item
+
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+        except openai.BadRequestError as e:
+            if 'context_length_exceeded' in e.message:
+                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
+            else:
+                raise errors.RequesterError(f'请求参数错误: {e.message}')
+        except openai.AuthenticationError as e:
+            raise errors.RequesterError(f'无效的 api-key: {e.message}')
+        except openai.NotFoundError as e:
+            raise errors.RequesterError(f'请求路径错误: {e.message}')
+        except openai.RateLimitError as e:
+            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
+        except openai.APIError as e:
+            raise errors.RequesterError(f'请求错误: {e.message}')
+
+    async def invoke_rerank(
+        self,
+        model: requester.RuntimeRerankModel,
+        query: str,
+        documents: typing.List[str],
+        extra_args: dict[str, typing.Any] = {},
+    ) -> typing.List[dict]:
+        """Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible)
+
+        Supports extra_args from model.extra_args:
+        - rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks")
+        - rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank")
+        - Any other fields are merged into the request payload.
+        """
+        api_key = model.provider.token_mgr.get_token()
+        base_url = self.requester_cfg.get('base_url', '').rstrip('/')
+        timeout = self.requester_cfg.get('timeout', 120)
+
+        merged_args = {}
+        if model.model_entity.extra_args:
+            merged_args.update(model.model_entity.extra_args)
+        if extra_args:
+            merged_args.update(extra_args)
+
+        rerank_url = merged_args.pop('rerank_url', None)
+        rerank_path = merged_args.pop('rerank_path', 'rerank')
+        if not rerank_url:
+            rerank_url = f'{base_url}/{rerank_path}'
+
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {api_key}',
+        }
+
+        payload = {
+            'model': model.model_entity.name,
+            'query': query,
+            'documents': documents[:64],
+            'top_n': min(len(documents), 64),
+        }
+
+        if merged_args:
+            payload.update(merged_args)
+
+        try:
+            async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
+                resp = await client.post(rerank_url, headers=headers, json=payload)
+                resp.raise_for_status()
+                data = resp.json()
+
+            results = self._parse_rerank_response(data)
+
+            if results:
+                scores = [r.get('relevance_score', 0.0) for r in results]
+                min_score = min(scores)
+                max_score = max(scores)
+                if max_score - min_score > 1e-6:
+                    for r in results:
+                        r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
+
+            return results
+        except httpx.HTTPStatusError as e:
+            raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}')
+        except httpx.TimeoutException:
+            raise errors.RequesterError('Rerank request timed out')
+        except Exception as e:
+            raise errors.RequesterError(f'Rerank request error: {str(e)}')
+
+    @staticmethod
+    def _parse_rerank_response(data: dict) -> typing.List[dict]:
+        """Parse rerank response from various providers.
+
+        Handles:
+        - Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]}
+        - Voyage AI: {"data": [{"index", "relevance_score"}]}
+        - DashScope: {"output": {"results": [{"index", "relevance_score"}]}}
+        """
+        if 'results' in data:
+            return data['results']
+        if 'data' in data:
+            return data['data']
+        if 'output' in data and isinstance(data['output'], dict):
+            return data['output'].get('results', [])
+        return []
--- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
@@ -22,10 +22,12 @@ spec:
    type: integer
    required: true
    default: 120
-  # LiteLLM provider prefix - when set, uses unified LiteLLMRequester
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: manufacturer
+execution:
+  python:
+    path: ./chatcmpl.py
+    attr: OpenAIChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: cohere
  support_type:
  - rerank
  provider_category: manufacturer
+execution:
+  python:
+    path: ./chatcmpl.py
+    attr: OpenAIChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """CompShare ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.modelverse.cn/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  provider_category: maas
+execution:
+  python:
+    path: ./compsharechatcmpl.py
+    attr: CompShareChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import typing
+
+from . import chatcmpl
+from .. import errors, requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """Deepseek ChatCompletion API 请求器"""
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.deepseek.com',
+        'timeout': 120,
+    }
+
+    async def _closure(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> tuple[provider_message.Message, dict]:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages
+
+        # deepseek 不支持多模态，把content都转换成纯文字
+        for m in messages:
+            if 'content' in m and isinstance(m['content'], list):
+                m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c])
+
+        args['messages'] = messages
+
+        # 发送请求
+        resp = await self._req(args, extra_body=extra_args)
+
+        # print(resp)
+
+        if resp is None:
+            raise errors.RequesterError('接口返回为空，请确定模型提供商服务是否正常')
+        # 处理请求结果
+        message = await self._make_msg(resp, remove_think)
+
+        # Extract token usage from response
+        usage_info = {}
+        if hasattr(resp, 'usage') and resp.usage:
+            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
+            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
+            usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+        return message, usage_info
--- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: deepseek
  support_type:
  - llm
  provider_category: manufacturer
+execution:
+  python:
+    path: ./deepseekchatcmpl.py
+    attr: DeepseekChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
@@ -0,0 +1,205 @@
+from __future__ import annotations
+
+import typing
+import httpx
+
+from . import chatcmpl
+
+import uuid
+
+from .. import requester
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+
+
+class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """Google Gemini API 请求器"""
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
+        'timeout': 120,
+    }
+
+    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+        models_url = 'https://generativelanguage.googleapis.com/v1beta/models'
+        params = {'key': api_key} if api_key else {}
+
+        all_models: list[dict[str, typing.Any]] = []
+        next_page_token = ''
+        last_payload: dict[str, typing.Any] = {}
+
+        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
+            while True:
+                request_params = dict(params)
+                if next_page_token:
+                    request_params['pageToken'] = next_page_token
+
+                response = await client.get(models_url, params=request_params)
+                response.raise_for_status()
+                payload = response.json()
+                last_payload = payload
+
+                for item in payload.get('models', []):
+                    model_name = item.get('name', '')
+                    model_id = model_name.replace('models/', '', 1)
+                    if not model_id:
+                        continue
+
+                    supported_methods = item.get('supportedGenerationMethods', []) or []
+                    if 'embedContent' in supported_methods and 'generateContent' not in supported_methods:
+                        model_type = 'embedding'
+                    else:
+                        model_type = 'llm'
+
+                    all_models.append(
+                        {
+                            'id': model_id,
+                            'name': model_id,
+                            'type': model_type,
+                            'abilities': self._infer_model_abilities(item, model_id),
+                            'display_name': item.get('displayName') or None,
+                            'description': item.get('description') or None,
+                            'context_length': item.get('inputTokenLimit'),
+                            'input_modalities': self._normalize_modalities(item.get('inputModalities')),
+                            'output_modalities': self._normalize_modalities(item.get('outputModalities')),
+                        }
+                    )
+
+                next_page_token = payload.get('nextPageToken', '')
+                if not next_page_token:
+                    break
+
+        all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
+        return {
+            'models': all_models,
+            'debug': {
+                'request': {
+                    'method': 'GET',
+                    'url': models_url,
+                    'query': {'key': self._mask_api_key(api_key)} if api_key else {},
+                },
+                'response': last_payload,
+            },
+        }
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # 流式处理状态
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+        tool_id = ''
+        tool_name = ''
+        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
+
+        async for chunk in self._req_stream(args, extra_body=extra_args):
+            # 解析 chunk 数据
+
+            if hasattr(chunk, 'choices') and chunk.choices:
+                choice = chunk.choices[0]
+                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+
+                finish_reason = getattr(choice, 'finish_reason', None)
+            else:
+                delta = {}
+                finish_reason = None
+            # 从第一个 chunk 获取 role，后续使用这个 role
+            if 'role' in delta and delta['role']:
+                role = delta['role']
+
+            # 获取增量内容
+            delta_content = delta.get('content', '')
+            reasoning_content = delta.get('reasoning_content', '')
+
+            # 处理 reasoning_content
+            if reasoning_content:
+                # accumulated_reasoning += reasoning_content
+                # 如果设置了 remove_think，跳过 reasoning_content
+                if remove_think:
+                    chunk_idx += 1
+                    continue
+
+                # 第一次出现 reasoning_content，添加 <think> 开始标签
+                if not thinking_started:
+                    thinking_started = True
+                    delta_content = '<think>\n' + reasoning_content
+                else:
+                    # 继续输出 reasoning_content
+                    delta_content = reasoning_content
+            elif thinking_started and not thinking_ended and delta_content:
+                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
+                thinking_ended = True
+                delta_content = '\n</think>\n' + delta_content
+
+            # 处理 content 中已有的 <think> 标签（如果需要移除）
+            # if delta_content and remove_think and '<think>' in delta_content:
+            #     import re
+            #
+            #     # 移除 <think> 标签及其内容
+            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
+
+            # 处理工具调用增量
+            # delta_tool_calls = None
+            if delta.get('tool_calls'):
+                for tool_call in delta['tool_calls']:
+                    if tool_call['id'] == '' and tool_id == '':
+                        tool_id = str(uuid.uuid4())
+                    if tool_call['function']['name']:
+                        tool_name = tool_call['function']['name']
+                    tool_call['id'] = tool_id
+                    tool_call['function']['name'] = tool_name
+                    if tool_call['type'] is None:
+                        tool_call['type'] = 'function'
+
+            # 跳过空的第一个 chunk（只有 role 没有内容）
+            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+                chunk_idx += 1
+                continue
+            # 构建 MessageChunk - 只包含增量内容
+            chunk_data = {
+                'role': role,
+                'content': delta_content if delta_content else None,
+                'tool_calls': delta.get('tool_calls'),
+                'is_final': bool(finish_reason),
+            }
+
+            # 移除 None 值
+            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+            yield provider_message.MessageChunk(**chunk_data)
+            chunk_idx += 1
--- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: gemini
  support_type:
  - llm
  provider_category: manufacturer
+execution:
+  python:
+    path: ./geminichatcmpl.py
+    attr: GeminiChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+
+import typing
+
+from . import ppiochatcmpl
+
+
+class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
+    """Gitee AI ChatCompletions API 请求器"""
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://ai.gitee.com/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
@@ -22,9 +22,12 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: maas
+execution:
+  python:
+    path: ./giteeaichatcmpl.py
+    attr: GiteeAIChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
@@ -0,0 +1,208 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+from .. import requester
+import openai.types.chat.chat_completion as chat_completion
+import re
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+
+
+class JieKouAIChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """接口 AI ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.jiekou.ai/openai',
+        'timeout': 120,
+    }
+
+    is_think: bool = False
+
+    async def _make_msg(
+        self,
+        chat_completion: chat_completion.ChatCompletion,
+        remove_think: bool,
+    ) -> provider_message.Message:
+        chatcmpl_message = chat_completion.choices[0].message.model_dump()
+        # print(chatcmpl_message.keys(), chatcmpl_message.values())
+
+        # 确保 role 字段存在且不为 None
+        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
+            chatcmpl_message['role'] = 'assistant'
+
+        reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
+
+        # deepseek的reasoner模型
+        chatcmpl_message['content'] = await self._process_thinking_content(
+            chatcmpl_message['content'], reasoning_content, remove_think
+        )
+
+        # 移除 reasoning_content 字段，避免传递给 Message
+        if 'reasoning_content' in chatcmpl_message:
+            del chatcmpl_message['reasoning_content']
+
+        message = provider_message.Message(**chatcmpl_message)
+
+        return message
+
+    async def _process_thinking_content(
+        self,
+        content: str,
+        reasoning_content: str = None,
+        remove_think: bool = False,
+    ) -> tuple[str, str]:
+        """处理思维链内容
+
+        Args:
+            content: 原始内容
+            reasoning_content: reasoning_content 字段内容
+            remove_think: 是否移除思维链
+
+        Returns:
+            处理后的内容
+        """
+        if remove_think:
+            content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
+        else:
+            if reasoning_content is not None:
+                content = '<think>\n' + reasoning_content + '\n</think>\n' + content
+        return content
+
+    async def _make_msg_chunk(
+        self,
+        delta: dict[str, typing.Any],
+        idx: int,
+    ) -> provider_message.MessageChunk:
+        # 处理流式chunk和完整响应的差异
+        # print(chat_completion.choices[0])
+
+        # 确保 role 字段存在且不为 None
+        if 'role' not in delta or delta['role'] is None:
+            delta['role'] = 'assistant'
+
+        reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
+
+        delta['content'] = '' if delta['content'] is None else delta['content']
+        # print(reasoning_content)
+
+        # deepseek的reasoner模型
+
+        if reasoning_content is not None:
+            delta['content'] += reasoning_content
+
+        message = provider_message.MessageChunk(**delta)
+
+        return message
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+        async for chunk in self._req_stream(args, extra_body=extra_args):
+            # 解析 chunk 数据
+            if hasattr(chunk, 'choices') and chunk.choices:
+                choice = chunk.choices[0]
+                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+                finish_reason = getattr(choice, 'finish_reason', None)
+            else:
+                delta = {}
+                finish_reason = None
+
+            # 从第一个 chunk 获取 role，后续使用这个 role
+            if 'role' in delta and delta['role']:
+                role = delta['role']
+
+            # 获取增量内容
+            delta_content = delta.get('content', '')
+            # reasoning_content = delta.get('reasoning_content', '')
+
+            if remove_think:
+                if delta['content'] is not None:
+                    if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
+                        thinking_started = True
+                        continue
+                    elif delta['content'] == r'</think>' and not thinking_ended:
+                        thinking_ended = True
+                        continue
+                    elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
+                        thinking_started = False
+                        continue
+                    elif thinking_started and not thinking_ended:
+                        continue
+
+            # delta_tool_calls = None
+            if delta.get('tool_calls'):
+                for tool_call in delta['tool_calls']:
+                    if tool_call['id'] and tool_call['function']['name']:
+                        tool_id = tool_call['id']
+                        tool_name = tool_call['function']['name']
+
+                    if tool_call['id'] is None:
+                        tool_call['id'] = tool_id
+                    if tool_call['function']['name'] is None:
+                        tool_call['function']['name'] = tool_name
+                    if tool_call['function']['arguments'] is None:
+                        tool_call['function']['arguments'] = ''
+                    if tool_call['type'] is None:
+                        tool_call['type'] = 'function'
+
+            # 跳过空的第一个 chunk（只有 role 没有内容）
+            if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
+                chunk_idx += 1
+                continue
+
+            # 构建 MessageChunk - 只包含增量内容
+            chunk_data = {
+                'role': role,
+                'content': delta_content if delta_content else None,
+                'tool_calls': delta.get('tool_calls'),
+                'is_final': bool(finish_reason),
+            }
+
+            # 移除 None 值
+            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+            yield provider_message.MessageChunk(**chunk_data)
+            chunk_idx += 1
--- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
@@ -29,8 +29,11 @@ spec:
    type: int
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  provider_category: maas
+execution:
+  python:
+    path: ./jiekouaichatcmpl.py
+    attr: JieKouAIChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: jina
  support_type:
  - rerank
  provider_category: manufacturer
+execution:
+  python:
+    path: ./chatcmpl.py
+    attr: OpenAIChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class LmStudioChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """LMStudio ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'http://127.0.0.1:1234/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
@@ -22,8 +22,11 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  provider_category: self-hosted
+execution:
+  python:
+    path: ./lmstudiochatcmpl.py
+    attr: LmStudioChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
@@ -0,0 +1,561 @@
+from __future__ import annotations
+
+import asyncio
+import typing
+
+import openai
+import openai.types.chat.chat_completion as chat_completion
+import httpx
+
+from .. import entities, errors, requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class ModelScopeChatCompletions(requester.ProviderAPIRequester):
+    """ModelScope ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api-inference.modelscope.cn/v1',
+        'timeout': 120,
+    }
+
+    async def initialize(self):
+        self.client = openai.AsyncClient(
+            api_key='',
+            base_url=self.requester_cfg['base_url'],
+            timeout=self.requester_cfg['timeout'],
+            http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
+        )
+
+    def _mask_api_key(self, api_key: str | None) -> str:
+        if not api_key:
+            return ''
+        if len(api_key) <= 8:
+            return '****'
+        return f'{api_key[:4]}...{api_key[-4:]}'
+
+    def _infer_model_type(self, model_id: str) -> str:
+        normalized_model_id = (model_id or '').lower()
+        embedding_keywords = (
+            'embedding',
+            'embed',
+            'bge-',
+            'e5-',
+            'm3e',
+            'gte-',
+            'multilingual-e5',
+            'text-embedding',
+        )
+        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
+
+    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
+        normalized_model_id = (model_id or '').lower()
+        abilities: set[str] = set()
+
+        def _flatten(value: typing.Any) -> list[str]:
+            if value is None:
+                return []
+            if isinstance(value, str):
+                return [value.lower()]
+            if isinstance(value, dict):
+                flattened: list[str] = []
+                for nested_value in value.values():
+                    flattened.extend(_flatten(nested_value))
+                return flattened
+            if isinstance(value, (list, tuple, set)):
+                flattened: list[str] = []
+                for nested_value in value:
+                    flattened.extend(_flatten(nested_value))
+                return flattened
+            return [str(value).lower()]
+
+        capability_tokens = _flatten(item.get('capabilities'))
+        capability_tokens.extend(_flatten(item.get('modalities')))
+        capability_tokens.extend(_flatten(item.get('input_modalities')))
+        capability_tokens.extend(_flatten(item.get('output_modalities')))
+        capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
+        capability_tokens.extend(_flatten(item.get('supported_parameters')))
+        capability_tokens.extend(_flatten(item.get('architecture')))
+
+        combined_tokens = capability_tokens + [normalized_model_id]
+
+        vision_keywords = ('vision', 'image', 'file', 'video', 'multimodal', 'vl', 'ocr', 'omni')
+        function_call_keywords = ('function', 'tool', 'tools', 'tool_choice', 'tool_call', 'tool-use', 'tool_use')
+
+        if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
+            abilities.add('vision')
+
+        if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
+            abilities.add('func_call')
+
+        return sorted(abilities)
+
+    def _normalize_modalities(self, value: typing.Any) -> list[str]:
+        normalized: list[str] = []
+
+        def _collect(item: typing.Any):
+            if item is None:
+                return
+            if isinstance(item, str):
+                for part in item.replace('->', ',').replace('+', ',').split(','):
+                    token = part.strip().lower()
+                    if token and token not in normalized:
+                        normalized.append(token)
+                return
+            if isinstance(item, dict):
+                for nested in item.values():
+                    _collect(nested)
+                return
+            if isinstance(item, (list, tuple, set)):
+                for nested in item:
+                    _collect(nested)
+                return
+
+        _collect(value)
+        return normalized
+
+    def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
+        display_name = item.get('name')
+        if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
+            display_name = ''
+
+        description = item.get('description')
+        if not isinstance(description, str) or not description.strip():
+            description = ''
+
+        context_length = item.get('context_length')
+        if context_length is None and isinstance(item.get('top_provider'), dict):
+            context_length = item['top_provider'].get('context_length')
+
+        if not isinstance(context_length, int):
+            try:
+                context_length = int(context_length) if context_length is not None else None
+            except (TypeError, ValueError):
+                context_length = None
+
+        input_modalities = self._normalize_modalities(item.get('input_modalities'))
+        output_modalities = self._normalize_modalities(item.get('output_modalities'))
+
+        if isinstance(item.get('architecture'), dict):
+            if not input_modalities:
+                input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
+            if not output_modalities:
+                output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
+
+        owned_by = item.get('owned_by')
+        if not isinstance(owned_by, str) or not owned_by.strip():
+            owned_by = ''
+
+        return {
+            'display_name': display_name or None,
+            'description': description or None,
+            'context_length': context_length,
+            'owned_by': owned_by or None,
+            'input_modalities': input_modalities,
+            'output_modalities': output_modalities,
+        }
+
+    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+        headers = {}
+        if api_key:
+            headers['Authorization'] = f'Bearer {api_key}'
+
+        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
+        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
+            response = await client.get(models_url, headers=headers)
+            response.raise_for_status()
+            payload = response.json()
+
+        models = []
+        for item in payload.get('data', []):
+            model_id = item.get('id')
+            if not model_id:
+                continue
+            models.append(
+                {
+                    'id': model_id,
+                    'name': model_id,
+                    'type': self._infer_model_type(model_id),
+                    'abilities': self._infer_model_abilities(item, model_id),
+                    **self._extract_scan_metadata(item, model_id),
+                }
+            )
+
+        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
+        return {
+            'models': models,
+            'debug': {
+                'request': {
+                    'method': 'GET',
+                    'url': models_url,
+                    'headers': {
+                        'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
+                    },
+                },
+                'response': payload,
+            },
+        }
+
+    async def _req(
+        self,
+        query: pipeline_query.Query,
+        args: dict,
+        extra_body: dict = {},
+        remove_think: bool = False,
+    ) -> list[dict[str, typing.Any]]:
+        args['stream'] = True
+
+        chunk = None
+
+        pending_content = ''
+
+        tool_calls = []
+
+        resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)
+
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        tool_id = ''
+        tool_name = ''
+        message_delta = {}
+        async for chunk in resp_gen:
+            if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
+                continue
+
+            delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
+            reasoning_content = delta.get('reasoning_content')
+            # 处理 reasoning_content
+            if reasoning_content:
+                # accumulated_reasoning += reasoning_content
+                # 如果设置了 remove_think，跳过 reasoning_content
+                if remove_think:
+                    chunk_idx += 1
+                    continue
+
+                # 第一次出现 reasoning_content，添加 <think> 开始标签
+                if not thinking_started:
+                    thinking_started = True
+                    pending_content += '<think>\n' + reasoning_content
+                else:
+                    # 继续输出 reasoning_content
+                    pending_content += reasoning_content
+            elif thinking_started and not thinking_ended and delta.get('content'):
+                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
+                thinking_ended = True
+                pending_content += '\n</think>\n' + delta.get('content')
+
+            if delta.get('content') is not None:
+                pending_content += delta.get('content')
+
+            if delta.get('tool_calls') is not None:
+                for tool_call in delta.get('tool_calls'):
+                    if tool_call['id'] != '':
+                        tool_id = tool_call['id']
+                    if tool_call['function']['name'] is not None:
+                        tool_name = tool_call['function']['name']
+                    if tool_call['function']['arguments'] is None:
+                        continue
+                    tool_call['id'] = tool_id
+                    tool_call['name'] = tool_name
+                    for tc in tool_calls:
+                        if tc['index'] == tool_call['index']:
+                            tc['function']['arguments'] += tool_call['function']['arguments']
+                            break
+                    else:
+                        tool_calls.append(tool_call)
+
+            if chunk.choices[0].finish_reason is not None:
+                break
+        message_delta['content'] = pending_content
+        message_delta['role'] = 'assistant'
+
+        message_delta['tool_calls'] = tool_calls if tool_calls else None
+        return [message_delta]
+
+    async def _make_msg(
+        self,
+        chat_completion: list[dict[str, typing.Any]],
+    ) -> provider_message.Message:
+        chatcmpl_message = chat_completion[0]
+
+        # 确保 role 字段存在且不为 None
+        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
+            chatcmpl_message['role'] = 'assistant'
+
+        message = provider_message.Message(**chatcmpl_message)
+
+        return message
+
+    async def _closure(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> tuple[provider_message.Message, dict]:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+
+        # 发送请求
+        resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)
+
+        # 处理请求结果
+        message = await self._make_msg(resp)
+
+        # ModelScope uses streaming, usage info not available
+        usage_info = {}
+
+        return message, usage_info
+
+    async def _req_stream(
+        self,
+        args: dict,
+        extra_body: dict = {},
+    ) -> chat_completion.ChatCompletion:
+        async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
+            yield chunk
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # 流式处理状态
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
+
+        async for chunk in self._req_stream(args, extra_body=extra_args):
+            # 解析 chunk 数据
+            if hasattr(chunk, 'choices') and chunk.choices:
+                choice = chunk.choices[0]
+                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+                finish_reason = getattr(choice, 'finish_reason', None)
+            else:
+                delta = {}
+                finish_reason = None
+
+            # 从第一个 chunk 获取 role，后续使用这个 role
+            if 'role' in delta and delta['role']:
+                role = delta['role']
+
+            # 获取增量内容
+            delta_content = delta.get('content', '')
+            reasoning_content = delta.get('reasoning_content', '')
+
+            # 处理 reasoning_content
+            if reasoning_content:
+                # accumulated_reasoning += reasoning_content
+                # 如果设置了 remove_think，跳过 reasoning_content
+                if remove_think:
+                    chunk_idx += 1
+                    continue
+
+                # 第一次出现 reasoning_content，添加 <think> 开始标签
+                if not thinking_started:
+                    thinking_started = True
+                    delta_content = '<think>\n' + reasoning_content
+                else:
+                    # 继续输出 reasoning_content
+                    delta_content = reasoning_content
+            elif thinking_started and not thinking_ended and delta_content:
+                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
+                thinking_ended = True
+                delta_content = '\n</think>\n' + delta_content
+
+            # 处理 content 中已有的 <think> 标签（如果需要移除）
+            # if delta_content and remove_think and '<think>' in delta_content:
+            #     import re
+            #
+            #     # 移除 <think> 标签及其内容
+            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
+
+            # 处理工具调用增量
+            if delta.get('tool_calls'):
+                for tool_call in delta['tool_calls']:
+                    if tool_call['id'] != '':
+                        tool_id = tool_call['id']
+                    if tool_call['function']['name'] is not None:
+                        tool_name = tool_call['function']['name']
+
+                    if tool_call['type'] is None:
+                        tool_call['type'] = 'function'
+                    tool_call['id'] = tool_id
+                    tool_call['function']['name'] = tool_name
+                    tool_call['function']['arguments'] = (
+                        '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
+                    )
+
+            # 跳过空的第一个 chunk（只有 role 没有内容）
+            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+                chunk_idx += 1
+                continue
+
+            # 构建 MessageChunk - 只包含增量内容
+            chunk_data = {
+                'role': role,
+                'content': delta_content if delta_content else None,
+                'tool_calls': delta.get('tool_calls'),
+                'is_final': bool(finish_reason),
+            }
+
+            # 移除 None 值
+            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+            yield provider_message.MessageChunk(**chunk_data)
+            chunk_idx += 1
+            # return
+
+    async def invoke_llm(
+        self,
+        query: pipeline_query.Query,
+        model: entities.LLMModelInfo,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message:
+        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
+        for m in messages:
+            msg_dict = m.dict(exclude_none=True)
+            content = msg_dict.get('content')
+            if isinstance(content, list):
+                # 检查 content 列表中是否每个部分都是文本
+                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+                    # 将所有文本部分合并为一个字符串
+                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
+            req_messages.append(msg_dict)
+
+        try:
+            return await self._closure(
+                query=query,
+                req_messages=req_messages,
+                use_model=model,
+                use_funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
+            )
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+        except openai.BadRequestError as e:
+            if 'context_length_exceeded' in e.message:
+                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
+            else:
+                raise errors.RequesterError(f'请求参数错误: {e.message}')
+        except openai.AuthenticationError as e:
+            raise errors.RequesterError(f'无效的 api-key: {e.message}')
+        except openai.NotFoundError as e:
+            raise errors.RequesterError(f'请求路径错误: {e.message}')
+        except openai.RateLimitError as e:
+            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
+        except openai.APIError as e:
+            raise errors.RequesterError(f'请求错误: {e.message}')
+
+    async def invoke_llm_stream(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
+        for m in messages:
+            msg_dict = m.dict(exclude_none=True)
+            content = msg_dict.get('content')
+            if isinstance(content, list):
+                # 检查 content 列表中是否每个部分都是文本
+                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+                    # 将所有文本部分合并为一个字符串
+                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
+            req_messages.append(msg_dict)
+
+        try:
+            async for item in self._closure_stream(
+                query=query,
+                req_messages=req_messages,
+                use_model=model,
+                use_funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
+            ):
+                yield item
+
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+        except openai.BadRequestError as e:
+            if 'context_length_exceeded' in e.message:
+                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
+            else:
+                raise errors.RequesterError(f'请求参数错误: {e.message}')
+        except openai.AuthenticationError as e:
+            raise errors.RequesterError(f'无效的 api-key: {e.message}')
+        except openai.NotFoundError as e:
+            raise errors.RequesterError(f'请求路径错误: {e.message}')
+        except openai.RateLimitError as e:
+            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
+        except openai.APIError as e:
+            raise errors.RequesterError(f'请求错误: {e.message}')
--- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
@@ -29,7 +29,10 @@ spec:
    type: int
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  provider_category: maas
+execution:
+  python:
+    path: ./modelscopechatcmpl.py
+    attr: ModelScopeChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import typing
+
+
+from . import chatcmpl
+from .. import requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+
+class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """Moonshot ChatCompletion API 请求器"""
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.moonshot.cn/v1',
+        'timeout': 120,
+    }
+
+    async def _closure(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> tuple[provider_message.Message, dict]:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages
+
+        # deepseek 不支持多模态，把content都转换成纯文字
+        for m in messages:
+            if 'content' in m and isinstance(m['content'], list):
+                m['content'] = ' '.join([c['text'] for c in m['content']])
+
+        # 删除空的，不知道干嘛的，直接删了。
+        # messages = [m for m in messages if m["content"].strip() != "" and ('tool_calls' not in m or not m['tool_calls'])]
+
+        args['messages'] = messages
+
+        # 发送请求
+        resp = await self._req(args, extra_body=extra_args)
+
+        # 处理请求结果
+        message = await self._make_msg(resp, remove_think)
+
+        # Extract token usage from response
+        usage_info = {}
+        if hasattr(resp, 'usage') and resp.usage:
+            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
+            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
+            usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+        return message, usage_info
--- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: moonshot
  support_type:
  - llm
  provider_category: manufacturer
+execution:
+  python:
+    path: ./moonshotchatcmpl.py
+    attr: MoonshotChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """New API ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'http://localhost:3000/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
@@ -22,8 +22,11 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  provider_category: maas
+execution:
+  python:
+    path: ./newapichatcmpl.py
+    attr: NewAPIChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
@@ -0,0 +1,314 @@
+from __future__ import annotations
+
+import asyncio
+import os
+import typing
+from typing import Union, Mapping, Any, AsyncIterator
+import uuid
+import json
+
+import ollama
+import httpx
+
+from .. import errors, requester
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+
+REQUESTER_NAME: str = 'ollama-chat'
+
+
+class OllamaChatCompletions(requester.ProviderAPIRequester):
+    """Ollama平台 ChatCompletion API请求器"""
+
+    client: ollama.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'http://127.0.0.1:11434',
+        'timeout': 120,
+    }
+
+    async def initialize(self):
+        os.environ['OLLAMA_HOST'] = self.requester_cfg['base_url']
+        self.client = ollama.AsyncClient(timeout=self.requester_cfg['timeout'])
+
+    def _infer_model_type(self, model_id: str) -> str:
+        normalized_model_id = (model_id or '').lower()
+        embedding_keywords = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding')
+        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
+
+    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
+        normalized_model_id = (model_id or '').lower()
+        abilities: set[str] = set()
+        details = item.get('details', {}) or {}
+        families = details.get('families', []) or []
+        tokens = [normalized_model_id, str(details.get('family', '')).lower()]
+        tokens.extend(str(family).lower() for family in families)
+
+        if any(keyword in token for token in tokens for keyword in ('vision', 'vl', 'omni', 'llava', 'ocr')):
+            abilities.add('vision')
+        if any(keyword in token for token in tokens for keyword in ('tool', 'function')):
+            abilities.add('func_call')
+        return sorted(abilities)
+
+    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+        del api_key
+        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/api/tags'
+
+        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
+            response = await client.get(models_url)
+            response.raise_for_status()
+            payload = response.json()
+
+        models: list[dict[str, typing.Any]] = []
+        for item in payload.get('models', []):
+            model_id = item.get('model') or item.get('name')
+            if not model_id:
+                continue
+            models.append(
+                {
+                    'id': model_id,
+                    'name': item.get('name', model_id),
+                    'type': self._infer_model_type(model_id),
+                    'abilities': self._infer_model_abilities(item, model_id),
+                }
+            )
+
+        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
+        return {
+            'models': models,
+            'debug': {
+                'request': {
+                    'method': 'GET',
+                    'url': models_url,
+                },
+                'response': payload,
+            },
+        }
+
+    async def _req(
+        self,
+        args: dict,
+    ) -> Union[Mapping[str, Any], AsyncIterator[Mapping[str, Any]]]:
+        return await self.client.chat(**args)
+
+    async def _closure(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message:
+        args = extra_args.copy()
+        args['model'] = use_model.model_entity.name
+
+        messages: list[dict] = req_messages.copy()
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                text_content: list = []
+                image_urls: list = []
+                for me in msg['content']:
+                    if me['type'] == 'text':
+                        text_content.append(me['text'])
+                    elif me['type'] == 'image_base64':
+                        image_urls.append(me['image_base64'])
+
+                msg['content'] = '\n'.join(text_content)
+                msg['images'] = [url.split(',')[1] for url in image_urls]
+            if 'tool_calls' in msg:  # LangBot 内部以 str 存储 tool_calls 的参数，这里需要转换为 dict
+                for tool_call in msg['tool_calls']:
+                    tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
+        args['messages'] = messages
+
+        args['tools'] = []
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+            if tools:
+                args['tools'] = tools
+
+        resp = await self._req(args)
+        message: provider_message.Message = await self._make_msg(resp)
+        return message
+
+    async def _make_msg(self, chat_completions: ollama.ChatResponse) -> provider_message.Message:
+        message: ollama.Message = chat_completions.message
+        if message is None:
+            raise ValueError("chat_completions must contain a 'message' field")
+
+        ret_msg: provider_message.Message = None
+
+        if message.content is not None:
+            ret_msg = provider_message.Message(role='assistant', content=message.content)
+        if message.tool_calls is not None and len(message.tool_calls) > 0:
+            tool_calls: list[provider_message.ToolCall] = []
+
+            for tool_call in message.tool_calls:
+                tool_calls.append(
+                    provider_message.ToolCall(
+                        id=uuid.uuid4().hex,
+                        type='function',
+                        function=provider_message.FunctionCall(
+                            name=tool_call.function.name,
+                            arguments=json.dumps(tool_call.function.arguments),
+                        ),
+                    )
+                )
+            ret_msg.tool_calls = tool_calls
+
+        return ret_msg
+
+    async def _prepare_messages(
+        self,
+        messages: typing.List[provider_message.Message],
+    ) -> list[dict]:
+        """Prepare messages for Ollama API request."""
+        req_messages: list = []
+        for m in messages:
+            msg_dict: dict = m.dict(exclude_none=True)
+            content: Any = msg_dict.get('content')
+            if isinstance(content, list):
+                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
+            req_messages.append(msg_dict)
+        return req_messages
+
+    async def invoke_llm(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message:
+        req_messages = await self._prepare_messages(messages)
+        try:
+            return await self._closure(
+                query=query,
+                req_messages=req_messages,
+                use_model=model,
+                use_funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
+            )
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+
+    async def invoke_llm_stream(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        req_messages = await self._prepare_messages(messages)
+
+        try:
+            args = extra_args.copy()
+            args['model'] = model.model_entity.name
+
+            # Process messages for Ollama format
+            msgs: list[dict] = req_messages.copy()
+            for msg in msgs:
+                if 'content' in msg and isinstance(msg['content'], list):
+                    text_content: list = []
+                    image_urls: list = []
+                    for me in msg['content']:
+                        if me['type'] == 'text':
+                            text_content.append(me['text'])
+                        elif me['type'] == 'image_base64':
+                            image_urls.append(me['image_base64'])
+                    msg['content'] = '\n'.join(text_content)
+                    msg['images'] = [url.split(',')[1] for url in image_urls]
+                if 'tool_calls' in msg:
+                    for tool_call in msg['tool_calls']:
+                        tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
+            args['messages'] = msgs
+
+            args['tools'] = []
+            if funcs:
+                tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs)
+                if tools:
+                    args['tools'] = tools
+
+            args['stream'] = True
+
+            chunk_idx = 0
+            thinking_started = False
+            thinking_ended = False
+            role = 'assistant'
+
+            async for chunk in await self.client.chat(**args):
+                message: ollama.Message = chunk.message
+                done = chunk.done
+
+                delta_content = message.content or ''
+                reasoning_content = getattr(message, 'thinking', '') or ''
+
+                # Handle reasoning/thinking content
+                if reasoning_content:
+                    if remove_think:
+                        chunk_idx += 1
+                        continue
+
+                    if not thinking_started:
+                        thinking_started = True
+                        delta_content = '<think>\n' + reasoning_content
+                    else:
+                        delta_content = reasoning_content
+                elif thinking_started and not thinking_ended and delta_content:
+                    thinking_ended = True
+                    delta_content = '\n</think>\n' + delta_content
+
+                # Handle tool calls
+                tool_calls_data = None
+                if message.tool_calls:
+                    tool_calls_data = []
+                    for tc in message.tool_calls:
+                        tool_calls_data.append(
+                            {
+                                'id': uuid.uuid4().hex,
+                                'type': 'function',
+                                'function': {
+                                    'name': tc.function.name,
+                                    'arguments': json.dumps(tc.function.arguments),
+                                },
+                            }
+                        )
+
+                # Skip empty first chunk
+                if chunk_idx == 0 and not delta_content and not reasoning_content and not tool_calls_data:
+                    chunk_idx += 1
+                    continue
+
+                chunk_data = {
+                    'role': role,
+                    'content': delta_content if delta_content else None,
+                    'tool_calls': tool_calls_data,
+                    'is_final': bool(done),
+                }
+                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+                yield provider_message.MessageChunk(**chunk_data)
+                chunk_idx += 1
+
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+
+    async def invoke_embedding(
+        self,
+        model: requester.RuntimeEmbeddingModel,
+        input_text: list[str],
+        extra_args: dict[str, typing.Any] = {},
+    ) -> list[list[float]]:
+        return (
+            await self.client.embed(
+                model=model.model_entity.name,
+                input=input_text,
+                **extra_args,
+            )
+        ).embeddings
--- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
@@ -22,8 +22,11 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: ollama
  support_type:
  - llm
  - text-embedding
  provider_category: self-hosted
+execution:
+  python:
+    path: ./ollamachat.py
+    attr: OllamaChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import modelscopechatcmpl
+
+
+class OpenRouterChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
+    """OpenRouter ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://openrouter.ai/api/v1',
+        'timeout': 120,
+    }
+
+    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
+        original_base_url = self.requester_cfg.get('base_url', '')
+        self.requester_cfg['base_url'] = 'https://openrouter.ai/api/v1'
+        try:
+            return await super().scan_models(api_key)
+        finally:
+            self.requester_cfg['base_url'] = original_base_url
--- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
@@ -22,9 +22,12 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openrouter
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: maas
+execution:
+  python:
+    path: ./openrouterchatcmpl.py
+    attr: OpenRouterChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
@@ -0,0 +1,208 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+from .. import requester
+import openai.types.chat.chat_completion as chat_completion
+import re
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+
+
+class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """欧派云 ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.ppinfra.com/v3/openai',
+        'timeout': 120,
+    }
+
+    is_think: bool = False
+
+    async def _make_msg(
+        self,
+        chat_completion: chat_completion.ChatCompletion,
+        remove_think: bool,
+    ) -> provider_message.Message:
+        chatcmpl_message = chat_completion.choices[0].message.model_dump()
+        # print(chatcmpl_message.keys(), chatcmpl_message.values())
+
+        # 确保 role 字段存在且不为 None
+        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
+            chatcmpl_message['role'] = 'assistant'
+
+        reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
+
+        # deepseek的reasoner模型
+        chatcmpl_message['content'] = await self._process_thinking_content(
+            chatcmpl_message['content'], reasoning_content, remove_think
+        )
+
+        # 移除 reasoning_content 字段，避免传递给 Message
+        if 'reasoning_content' in chatcmpl_message:
+            del chatcmpl_message['reasoning_content']
+
+        message = provider_message.Message(**chatcmpl_message)
+
+        return message
+
+    async def _process_thinking_content(
+        self,
+        content: str,
+        reasoning_content: str = None,
+        remove_think: bool = False,
+    ) -> tuple[str, str]:
+        """处理思维链内容
+
+        Args:
+            content: 原始内容
+            reasoning_content: reasoning_content 字段内容
+            remove_think: 是否移除思维链
+
+        Returns:
+            处理后的内容
+        """
+        if remove_think:
+            content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
+        else:
+            if reasoning_content is not None:
+                content = '<think>\n' + reasoning_content + '\n</think>\n' + content
+        return content
+
+    async def _make_msg_chunk(
+        self,
+        delta: dict[str, typing.Any],
+        idx: int,
+    ) -> provider_message.MessageChunk:
+        # 处理流式chunk和完整响应的差异
+        # print(chat_completion.choices[0])
+
+        # 确保 role 字段存在且不为 None
+        if 'role' not in delta or delta['role'] is None:
+            delta['role'] = 'assistant'
+
+        reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
+
+        delta['content'] = '' if delta['content'] is None else delta['content']
+        # print(reasoning_content)
+
+        # deepseek的reasoner模型
+
+        if reasoning_content is not None:
+            delta['content'] += reasoning_content
+
+        message = provider_message.MessageChunk(**delta)
+
+        return message
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+        self.client.api_key = use_model.provider.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+        async for chunk in self._req_stream(args, extra_body=extra_args):
+            # 解析 chunk 数据
+            if hasattr(chunk, 'choices') and chunk.choices:
+                choice = chunk.choices[0]
+                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+                finish_reason = getattr(choice, 'finish_reason', None)
+            else:
+                delta = {}
+                finish_reason = None
+
+            # 从第一个 chunk 获取 role，后续使用这个 role
+            if 'role' in delta and delta['role']:
+                role = delta['role']
+
+            # 获取增量内容
+            delta_content = delta.get('content', '')
+            # reasoning_content = delta.get('reasoning_content', '')
+
+            if remove_think:
+                if delta['content'] is not None:
+                    if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
+                        thinking_started = True
+                        continue
+                    elif delta['content'] == r'</think>' and not thinking_ended:
+                        thinking_ended = True
+                        continue
+                    elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
+                        thinking_started = False
+                        continue
+                    elif thinking_started and not thinking_ended:
+                        continue
+
+            # delta_tool_calls = None
+            if delta.get('tool_calls'):
+                for tool_call in delta['tool_calls']:
+                    if tool_call['id'] and tool_call['function']['name']:
+                        tool_id = tool_call['id']
+                        tool_name = tool_call['function']['name']
+
+                    if tool_call['id'] is None:
+                        tool_call['id'] = tool_id
+                    if tool_call['function']['name'] is None:
+                        tool_call['function']['name'] = tool_name
+                    if tool_call['function']['arguments'] is None:
+                        tool_call['function']['arguments'] = ''
+                    if tool_call['type'] is None:
+                        tool_call['type'] = 'function'
+
+            # 跳过空的第一个 chunk（只有 role 没有内容）
+            if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
+                chunk_idx += 1
+                continue
+
+            # 构建 MessageChunk - 只包含增量内容
+            chunk_data = {
+                'role': role,
+                'content': delta_content if delta_content else None,
+                'tool_calls': delta.get('tool_calls'),
+                'is_final': bool(finish_reason),
+            }
+
+            # 移除 None 值
+            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+            yield provider_message.MessageChunk(**chunk_data)
+            chunk_idx += 1
--- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
@@ -29,8 +29,11 @@ spec:
    type: int
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  provider_category: maas
+execution:
+  python:
+    path: ./ppiochatcmpl.py
+    attr: PPIOChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+
+
+class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """启航 AI ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.qhaigc.com/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
@@ -29,8 +29,11 @@ spec:
    type: int
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  provider_category: maas
+execution:
+  python:
+    path: ./qhaigcchatcmpl.py
+    attr: QHAIGCChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+import openai.types.chat.chat_completion as chat_completion
+
+
+class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://router.shengsuanyun.com/api/v1',
+        'timeout': 120,
+    }
+
+    async def _req(
+        self,
+        args: dict,
+        extra_body: dict = {},
+    ) -> chat_completion.ChatCompletion:
+        return await self.client.chat.completions.create(
+            **args,
+            extra_body=extra_body,
+            extra_headers={
+                'HTTP-Referer': 'https://langbot.app',
+                'X-Title': 'LangBot',
+            },
+        )
--- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
@@ -29,8 +29,11 @@ spec:
    type: int
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  provider_category: maas
+execution:
+  python:
+    path: ./shengsuanyun.py
+    attr: ShengSuanYunChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class SiliconFlowChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """SiliconFlow ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.siliconflow.cn/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
@@ -22,9 +22,12 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: siliconflow
  support_type:
  - llm
  - text-embedding
  - rerank
  provider_category: maas
+execution:
+  python:
+    path: ./siliconflowchatcmpl.py
+    attr: SiliconFlowChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class LangBotSpaceChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """LangBot Space ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.langbot.cloud/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
@@ -22,8 +22,11 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  provider_category: maas
+execution:
+  python:
+    path: ./spacechatcmpl.py
+    attr: LangBotSpaceChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
@@ -22,8 +22,11 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  - text-embedding
  provider_category: maas
+execution:
+  python:
+    path: ./tokenponychatcmpl.py
+    attr: TokenPonyChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class TokenPonyChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """TokenPony ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.tokenpony.cn/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class VolcArkChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """火山方舟大模型平台 ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://ark.cn-beijing.volces.com/api/v3',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: openai
  support_type:
  - llm
  provider_category: maas
+execution:
+  python:
+    path: ./volcarkchatcmpl.py
+    attr: VolcArkChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: voyage
  support_type:
  - rerank
  provider_category: manufacturer
+execution:
+  python:
+    path: ./chatcmpl.py
+    attr: OpenAIChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class XaiChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """xAI ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.x.ai/v1',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: xai
  support_type:
  - llm
  provider_category: manufacturer
+execution:
+  python:
+    path: ./xaichatcmpl.py
+    attr: XaiChatCompletions
--- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class ZhipuAIChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """智谱AI ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://open.bigmodel.cn/api/paas/v4',
+        'timeout': 120,
+    }
--- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
@@ -22,7 +22,10 @@ spec:
    type: integer
    required: true
    default: 120
-  litellm_provider: zhipu
  support_type:
  - llm
  provider_category: manufacturer
+execution:
+  python:
+    path: ./zhipuaichatcmpl.py
+    attr: ZhipuAIChatCompletions