diff --git a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py
deleted file mode 100644
index 40a41718..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """302.AI ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.302.ai/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
index e4f70cae..1d8f9a47 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/302aichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 302.AI
   icon: 302ai.png
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
deleted file mode 100644
index 1428dc88..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.py
+++ /dev/null
@@ -1,370 +0,0 @@
-from __future__ import annotations
-
-import typing
-import json
-import platform
-import socket
-import anthropic
-import httpx
-
-from .. import errors, requester
-
-from ....utils import image
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class AnthropicMessages(requester.ProviderAPIRequester):
-    """Anthropic Messages API 请求器"""
-
-    client: anthropic.AsyncAnthropic
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.anthropic.com',
-        'timeout': 120,
-    }
-
-    async def initialize(self):
-        # 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题
-        if platform.system() == 'Windows':
-            if not hasattr(socket, 'TCP_KEEPINTVL'):
-                socket.TCP_KEEPINTVL = 0
-            if not hasattr(socket, 'TCP_KEEPCNT'):
-                socket.TCP_KEEPCNT = 0
-        httpx_client = anthropic._base_client.AsyncHttpxClientWrapper(
-            base_url=self.requester_cfg['base_url'],
-            # cast to a valid type because mypy doesn't understand our type narrowing
-            timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']),
-            limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS,
-            follow_redirects=True,
-            trust_env=True,
-        )
-
-        self.client = anthropic.AsyncAnthropic(
-            api_key='',
-            http_client=httpx_client,
-            base_url=self.requester_cfg['base_url'],
-        )
-
-    async def invoke_llm(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message:
-        self.client.api_key = model.provider.token_mgr.get_token()
-
-        args = extra_args.copy()
-        args['model'] = model.model_entity.name
-
-        # 处理消息
-
-        # system
-        system_role_message = None
-
-        for i, m in enumerate(messages):
-            if m.role == 'system':
-                system_role_message = m
-
-                break
-
-        if system_role_message:
-            messages.pop(i)
-
-        if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
-            args['system'] = system_role_message.content
-
-        req_messages = []
-
-        for m in messages:
-            if m.role == 'tool':
-                tool_call_id = m.tool_call_id
-
-                req_messages.append(
-                    {
-                        'role': 'user',
-                        'content': [
-                            {
-                                'type': 'tool_result',
-                                'tool_use_id': tool_call_id,
-                                'is_error': False,
-                                'content': [{'type': 'text', 'text': m.content}],
-                            }
-                        ],
-                    }
-                )
-
-                continue
-
-            msg_dict = m.dict(exclude_none=True)
-
-            if isinstance(m.content, str) and m.content.strip() != '':
-                msg_dict['content'] = [{'type': 'text', 'text': m.content}]
-            elif isinstance(m.content, list):
-                for i, ce in enumerate(m.content):
-                    if ce.type == 'image_base64':
-                        image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
-
-                        alter_image_ele = {
-                            'type': 'image',
-                            'source': {
-                                'type': 'base64',
-                                'media_type': f'image/{image_format}',
-                                'data': image_b64,
-                            },
-                        }
-                        msg_dict['content'][i] = alter_image_ele
-
-            if m.tool_calls:
-                for tool_call in m.tool_calls:
-                    msg_dict['content'].append(
-                        {
-                            'type': 'tool_use',
-                            'id': tool_call.id,
-                            'name': tool_call.function.name,
-                            'input': json.loads(tool_call.function.arguments),
-                        }
-                    )
-
-                del msg_dict['tool_calls']
-
-            req_messages.append(msg_dict)
-
-        args['messages'] = req_messages
-
-        if 'thinking' in args:
-            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
-
-        if funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        try:
-            resp = await self.client.messages.create(**args)
-
-            args = {
-                'content': '',
-                'role': resp.role,
-            }
-            assert type(resp) is anthropic.types.message.Message
-
-            for block in resp.content:
-                if not remove_think and block.type == 'thinking':
-                    args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
-                elif block.type == 'text':
-                    args['content'] += block.text
-                elif block.type == 'tool_use':
-                    assert type(block) is anthropic.types.tool_use_block.ToolUseBlock
-                    tool_call = provider_message.ToolCall(
-                        id=block.id,
-                        type='function',
-                        function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)),
-                    )
-                    if 'tool_calls' not in args:
-                        args['tool_calls'] = []
-                    args['tool_calls'].append(tool_call)
-
-            return provider_message.Message(**args)
-        except anthropic.AuthenticationError as e:
-            raise errors.RequesterError(f'api-key 无效: {e.message}')
-        except anthropic.BadRequestError as e:
-            raise errors.RequesterError(str(e.message))
-        except anthropic.NotFoundError as e:
-            if 'model: ' in str(e):
-                raise errors.RequesterError(f'模型无效: {e.message}')
-            else:
-                raise errors.RequesterError(f'请求地址无效: {e.message}')
-
-    async def invoke_llm_stream(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message:
-        self.client.api_key = model.provider.token_mgr.get_token()
-
-        args = extra_args.copy()
-        args['model'] = model.model_entity.name
-        args['stream'] = True
-
-        # 处理消息
-
-        # system
-        system_role_message = None
-
-        for i, m in enumerate(messages):
-            if m.role == 'system':
-                system_role_message = m
-
-                break
-
-        if system_role_message:
-            messages.pop(i)
-
-        if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
-            args['system'] = system_role_message.content
-
-        req_messages = []
-
-        for m in messages:
-            if m.role == 'tool':
-                tool_call_id = m.tool_call_id
-
-                req_messages.append(
-                    {
-                        'role': 'user',
-                        'content': [
-                            {
-                                'type': 'tool_result',
-                                'tool_use_id': tool_call_id,
-                                'is_error': False,  # 暂时直接写false
-                                'content': [
-                                    {'type': 'text', 'text': m.content}
-                                ],  # 这里要是list包裹，应该是多个返回的情况？type类型好像也可以填其他的，暂时只写text
-                            }
-                        ],
-                    }
-                )
-
-                continue
-
-            msg_dict = m.dict(exclude_none=True)
-
-            if isinstance(m.content, str) and m.content.strip() != '':
-                msg_dict['content'] = [{'type': 'text', 'text': m.content}]
-            elif isinstance(m.content, list):
-                for i, ce in enumerate(m.content):
-                    if ce.type == 'image_base64':
-                        image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
-
-                        alter_image_ele = {
-                            'type': 'image',
-                            'source': {
-                                'type': 'base64',
-                                'media_type': f'image/{image_format}',
-                                'data': image_b64,
-                            },
-                        }
-                        msg_dict['content'][i] = alter_image_ele
-            if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
-                msg_dict['content'] = []  # 这里不知道为什么会莫名有个空导致content为字符
-            if m.tool_calls:
-                for tool_call in m.tool_calls:
-                    msg_dict['content'].append(
-                        {
-                            'type': 'tool_use',
-                            'id': tool_call.id,
-                            'name': tool_call.function.name,
-                            'input': json.loads(tool_call.function.arguments),
-                        }
-                    )
-
-                del msg_dict['tool_calls']
-
-            req_messages.append(msg_dict)
-        if 'thinking' in args:
-            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
-
-        args['messages'] = req_messages
-
-        if funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        try:
-            role = 'assistant'  # 默认角色
-            # chunk_idx = 0
-            think_started = False
-            think_ended = False
-            finish_reason = False
-            tool_name = ''
-            tool_id = ''
-            async for chunk in await self.client.messages.create(**args):
-                content = ''
-                tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
-                if isinstance(
-                    chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
-                ):  # 记录开始
-                    if chunk.content_block.type == 'tool_use':
-                        if chunk.content_block.name is not None:
-                            tool_name = chunk.content_block.name
-                        if chunk.content_block.id is not None:
-                            tool_id = chunk.content_block.id
-
-                        tool_call['function']['name'] = tool_name
-                        tool_call['function']['arguments'] = ''
-                        tool_call['id'] = tool_id
-
-                    if not remove_think:
-                        if chunk.content_block.type == 'thinking' and not remove_think:
-                            think_started = True
-                        elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
-                            think_ended = True
-                        continue
-                elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
-                    if chunk.delta.type == 'thinking_delta':
-                        if think_started:
-                            think_started = False
-                            content = '<think>\n' + chunk.delta.thinking
-                        elif remove_think:
-                            continue
-                        else:
-                            content = chunk.delta.thinking
-                    elif chunk.delta.type == 'text_delta':
-                        if think_ended:
-                            think_ended = False
-                            content = '\n</think>\n' + chunk.delta.text
-                        else:
-                            content = chunk.delta.text
-                    elif chunk.delta.type == 'input_json_delta':
-                        tool_call['function']['arguments'] = chunk.delta.partial_json
-                        tool_call['function']['name'] = tool_name
-                        tool_call['id'] = tool_id
-                elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
-                    continue  # 记录raw_content_block结束的
-
-                elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
-                    if chunk.delta.stop_reason == 'end_turn':
-                        finish_reason = True
-                elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
-                    continue  # 这个好像是完全结束
-                else:
-                    # print(chunk)
-                    self.ap.logger.debug(f'anthropic chunk: {chunk}')
-                    continue
-
-                args = {
-                    'content': content,
-                    'role': role,
-                    'is_final': finish_reason,
-                    'tool_calls': None if tool_call['id'] is None else [tool_call],
-                }
-                # if chunk_idx == 0:
-                #     chunk_idx += 1
-                #     continue
-
-                # assert type(chunk) is anthropic.types.message.Chunk
-
-                yield provider_message.MessageChunk(**args)
-
-            # return llm_entities.Message(**args)
-        except anthropic.AuthenticationError as e:
-            raise errors.RequesterError(f'api-key 无效: {e.message}')
-        except anthropic.BadRequestError as e:
-            raise errors.RequesterError(str(e.message))
-        except anthropic.NotFoundError as e:
-            if 'model: ' in str(e):
-                raise errors.RequesterError(f'模型无效: {e.message}')
-            else:
-                raise errors.RequesterError(f'请求地址无效: {e.message}')
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
index 0ef60d3e..811ff6a2 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/anthropicmsgs.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: Anthropic
   icon: anthropic.svg
 spec:
+  litellm_provider: anthropic
   config:
   - name: base_url
     label:
@@ -24,6 +25,8 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
+  - rerank
   provider_category: manufacturer
 execution:
   python:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/baidu.svg b/src/langbot/pkg/provider/modelmgr/requesters/baidu.svg
new file mode 100644
index 00000000..a541c95e
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/baidu.svg
@@ -0,0 +1,5 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#2932E1"/>
+  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Baidu</text>
+  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">ERNIE</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/baiduchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/baiduchatcmpl.yaml
new file mode 100644
index 00000000..55e7c328
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/baiduchatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: baidu-chat-completions
+  label:
+    en_US: Baidu ERNIE
+    zh_Hans: 百度文心一言
+  icon: baidu.svg
+spec:
+  litellm_provider: openai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
deleted file mode 100644
index 9da6e1b4..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.py
+++ /dev/null
@@ -1,242 +0,0 @@
-from __future__ import annotations
-
-import typing
-import dashscope
-import openai
-
-from . import modelscopechatcmpl
-from .. import requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
-    """阿里云百炼大模型平台 ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
-        'timeout': 120,
-    }
-
-    async def _closure_stream(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        is_use_dashscope_call = False  # 是否使用阿里原生库调用
-        is_enable_multi_model = True  # 是否支持多轮对话
-        use_time_num = 0  # 模型已调用次数，防止存在多文件时重复调用
-        use_time_ids = []  # 已调用的ID列表
-        message_id = 0  # 记录消息序号
-
-        for msg in messages:
-            # print(msg)
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-                    elif me['type'] == 'file_url' and '.' in me.get('file_name', ''):
-                        # 1. 视频文件推理
-                        # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871
-                        file_type = me.get('file_name').lower().split('.')[-1]
-                        if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']:
-                            me['type'] = 'video_url'
-                            me['video_url'] = {'url': me['file_url']}
-                            del me['file_url']
-                            del me['file_name']
-                            use_time_num += 1
-                            use_time_ids.append(message_id)
-                            is_enable_multi_model = False
-                        # 2. 语音文件识别, 无法通过openai的audio字段传递，暂时不支持
-                        # https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031
-                        elif file_type in [
-                            'aac',
-                            'amr',
-                            'aiff',
-                            'flac',
-                            'm4a',
-                            'mp3',
-                            'mpeg',
-                            'ogg',
-                            'opus',
-                            'wav',
-                            'webm',
-                            'wma',
-                        ]:
-                            me['audio'] = me['file_url']
-                            me['type'] = 'audio'
-                            del me['file_url']
-                            del me['type']
-                            del me['file_name']
-                            is_use_dashscope_call = True
-                            use_time_num += 1
-                            use_time_ids.append(message_id)
-                            is_enable_multi_model = False
-            message_id += 1
-
-        # 使用列表推导式，保留不在 use_time_ids[:-1] 中的元素，仅保留最后一个多媒体消息
-        if not is_enable_multi_model and use_time_num > 1:
-            messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]]
-
-        if not is_enable_multi_model:
-            messages = [msg for msg in messages if 'resp_message_id' not in msg]
-
-        args['messages'] = messages
-        args['stream'] = True
-
-        # 流式处理状态
-        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        role = 'assistant'  # 默认角色
-
-        if is_use_dashscope_call:
-            response = dashscope.MultiModalConversation.call(
-                # 若没有配置环境变量，请用百炼API Key将下行替换为：api_key = "sk-xxx"
-                api_key=use_model.provider.token_mgr.get_token(),
-                model=use_model.model_entity.name,
-                messages=messages,
-                result_format='message',
-                asr_options={
-                    # "language": "zh", # 可选，若已知音频的语种，可通过该参数指定待识别语种，以提升识别准确率
-                    'enable_lid': True,
-                    'enable_itn': False,
-                },
-                stream=True,
-            )
-            content_length_list = []
-            previous_length = 0  # 记录上一次的内容长度
-            for res in response:
-                chunk = res['output']
-                # 解析 chunk 数据
-                if hasattr(chunk, 'choices') and chunk.choices:
-                    choice = chunk.choices[0]
-                    delta_content = choice['message'].content[0]['text']
-                    finish_reason = choice['finish_reason']
-                    content_length_list.append(len(delta_content))
-                else:
-                    delta_content = ''
-                    finish_reason = None
-
-                # 跳过空的第一个 chunk（只有 role 没有内容）
-                if chunk_idx == 0 and not delta_content:
-                    chunk_idx += 1
-                    continue
-
-                # 检查 content_length_list 是否有足够的数据
-                if len(content_length_list) >= 2:
-                    now_content = delta_content[previous_length : content_length_list[-1]]
-                    previous_length = content_length_list[-1]  # 更新上一次的长度
-                else:
-                    now_content = delta_content  # 第一次循环时直接使用 delta_content
-                    previous_length = len(delta_content)  # 更新上一次的长度
-
-                # 构建 MessageChunk - 只包含增量内容
-                chunk_data = {
-                    'role': role,
-                    'content': now_content if now_content else None,
-                    'is_final': bool(finish_reason) and finish_reason != 'null',
-                }
-
-                # 移除 None 值
-                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-                yield provider_message.MessageChunk(**chunk_data)
-                chunk_idx += 1
-        else:
-            async for chunk in self._req_stream(args, extra_body=extra_args):
-                # 解析 chunk 数据
-                if hasattr(chunk, 'choices') and chunk.choices:
-                    choice = chunk.choices[0]
-                    delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
-                    finish_reason = getattr(choice, 'finish_reason', None)
-                else:
-                    delta = {}
-                    finish_reason = None
-
-                # 从第一个 chunk 获取 role，后续使用这个 role
-                if 'role' in delta and delta['role']:
-                    role = delta['role']
-
-                # 获取增量内容
-                delta_content = delta.get('content', '')
-                reasoning_content = delta.get('reasoning_content', '')
-
-                # 处理 reasoning_content
-                if reasoning_content:
-                    # accumulated_reasoning += reasoning_content
-                    # 如果设置了 remove_think，跳过 reasoning_content
-                    if remove_think:
-                        chunk_idx += 1
-                        continue
-
-                    # 第一次出现 reasoning_content，添加 <think> 开始标签
-                    if not thinking_started:
-                        thinking_started = True
-                        delta_content = '<think>\n' + reasoning_content
-                    else:
-                        # 继续输出 reasoning_content
-                        delta_content = reasoning_content
-                elif thinking_started and not thinking_ended and delta_content:
-                    # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
-                    thinking_ended = True
-                    delta_content = '\n</think>\n' + delta_content
-
-                # 处理工具调用增量
-                if delta.get('tool_calls'):
-                    for tool_call in delta['tool_calls']:
-                        if tool_call['id'] != '':
-                            tool_id = tool_call['id']
-                        if tool_call['function']['name'] is not None:
-                            tool_name = tool_call['function']['name']
-
-                        if tool_call['type'] is None:
-                            tool_call['type'] = 'function'
-                        tool_call['id'] = tool_id
-                        tool_call['function']['name'] = tool_name
-                        tool_call['function']['arguments'] = (
-                            '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
-                        )
-
-                # 跳过空的第一个 chunk（只有 role 没有内容）
-                if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
-                    chunk_idx += 1
-                    continue
-
-                # 构建 MessageChunk - 只包含增量内容
-                chunk_data = {
-                    'role': role,
-                    'content': delta_content if delta_content else None,
-                    'tool_calls': delta.get('tool_calls'),
-                    'is_final': bool(finish_reason),
-                }
-
-                # 移除 None 值
-                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-                yield provider_message.MessageChunk(**chunk_data)
-                chunk_idx += 1
-                # return
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
index fc5998c4..15203876 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/bailianchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 阿里云百炼
   icon: bailian.png
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
@@ -24,6 +25,7 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
   - rerank
   provider_category: maas
 execution:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
deleted file mode 100644
index da24bda0..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
+++ /dev/null
@@ -1,702 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import typing
-
-import openai
-import openai.types.chat.chat_completion as chat_completion_module
-import httpx
-
-from .. import errors, requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class OpenAIChatCompletions(requester.ProviderAPIRequester):
-    """OpenAI ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.openai.com/v1',
-        'timeout': 120,
-    }
-
-    async def initialize(self):
-        self.client = openai.AsyncClient(
-            api_key='',
-            base_url=self.requester_cfg['base_url'].replace(' ', ''),
-            timeout=self.requester_cfg['timeout'],
-            http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
-        )
-
-    def _mask_api_key(self, api_key: str | None) -> str:
-        if not api_key:
-            return ''
-        if len(api_key) <= 8:
-            return '****'
-        return f'{api_key[:4]}...{api_key[-4:]}'
-
-    def _infer_model_type(self, model_id: str) -> str:
-        normalized_model_id = (model_id or '').lower()
-        embedding_keywords = (
-            'embedding',
-            'embed',
-            'bge-',
-            'e5-',
-            'm3e',
-            'gte-',
-            'multilingual-e5',
-            'text-embedding',
-        )
-        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
-
-    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
-        normalized_model_id = (model_id or '').lower()
-        abilities: set[str] = set()
-
-        def _flatten(value: typing.Any) -> list[str]:
-            if value is None:
-                return []
-            if isinstance(value, str):
-                return [value.lower()]
-            if isinstance(value, dict):
-                flattened: list[str] = []
-                for nested_value in value.values():
-                    flattened.extend(_flatten(nested_value))
-                return flattened
-            if isinstance(value, (list, tuple, set)):
-                flattened: list[str] = []
-                for nested_value in value:
-                    flattened.extend(_flatten(nested_value))
-                return flattened
-            return [str(value).lower()]
-
-        capability_tokens = _flatten(item.get('capabilities'))
-        capability_tokens.extend(_flatten(item.get('modalities')))
-        capability_tokens.extend(_flatten(item.get('input_modalities')))
-        capability_tokens.extend(_flatten(item.get('output_modalities')))
-        capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
-        capability_tokens.extend(_flatten(item.get('supported_parameters')))
-        capability_tokens.extend(_flatten(item.get('architecture')))
-
-        combined_tokens = capability_tokens + [normalized_model_id]
-
-        vision_keywords = (
-            'vision',
-            'image',
-            'file',
-            'video',
-            'multimodal',
-            'vl',
-            'ocr',
-            'omni',
-        )
-        function_call_keywords = (
-            'function',
-            'tool',
-            'tools',
-            'tool_choice',
-            'tool_call',
-            'tool-use',
-            'tool_use',
-        )
-
-        if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
-            abilities.add('vision')
-
-        if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
-            abilities.add('func_call')
-
-        return sorted(abilities)
-
-    def _normalize_modalities(self, value: typing.Any) -> list[str]:
-        normalized: list[str] = []
-
-        def _collect(item: typing.Any):
-            if item is None:
-                return
-            if isinstance(item, str):
-                for part in item.replace('->', ',').replace('+', ',').split(','):
-                    token = part.strip().lower()
-                    if token and token not in normalized:
-                        normalized.append(token)
-                return
-            if isinstance(item, dict):
-                for nested in item.values():
-                    _collect(nested)
-                return
-            if isinstance(item, (list, tuple, set)):
-                for nested in item:
-                    _collect(nested)
-                return
-
-        _collect(value)
-        return normalized
-
-    def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
-        display_name = item.get('name')
-        if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
-            display_name = ''
-
-        description = item.get('description')
-        if not isinstance(description, str) or not description.strip():
-            description = ''
-
-        context_length = item.get('context_length')
-        if context_length is None and isinstance(item.get('top_provider'), dict):
-            context_length = item['top_provider'].get('context_length')
-
-        if not isinstance(context_length, int):
-            try:
-                context_length = int(context_length) if context_length is not None else None
-            except (TypeError, ValueError):
-                context_length = None
-
-        input_modalities = self._normalize_modalities(item.get('input_modalities'))
-        output_modalities = self._normalize_modalities(item.get('output_modalities'))
-
-        if isinstance(item.get('architecture'), dict):
-            if not input_modalities:
-                input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
-            if not output_modalities:
-                output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
-
-        owned_by = item.get('owned_by')
-        if not isinstance(owned_by, str) or not owned_by.strip():
-            owned_by = ''
-
-        return {
-            'display_name': display_name or None,
-            'description': description or None,
-            'context_length': context_length,
-            'owned_by': owned_by or None,
-            'input_modalities': input_modalities,
-            'output_modalities': output_modalities,
-        }
-
-    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
-        headers = {}
-        if api_key:
-            headers['Authorization'] = f'Bearer {api_key}'
-
-        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
-        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
-            response = await client.get(models_url, headers=headers)
-            response.raise_for_status()
-            payload = response.json()
-
-        models = []
-        for item in payload.get('data', []):
-            model_id = item.get('id')
-            if not model_id:
-                continue
-            models.append(
-                {
-                    'id': model_id,
-                    'name': model_id,
-                    'type': self._infer_model_type(model_id),
-                    'abilities': self._infer_model_abilities(item, model_id),
-                    **self._extract_scan_metadata(item, model_id),
-                }
-            )
-
-        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
-        return {
-            'models': models,
-            'debug': {
-                'request': {
-                    'method': 'GET',
-                    'url': models_url,
-                    'headers': {
-                        'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
-                    },
-                },
-                'response': payload,
-            },
-        }
-
-    async def _req(
-        self,
-        args: dict,
-        extra_body: dict = {},
-    ) -> chat_completion_module.ChatCompletion:
-        return await self.client.chat.completions.create(**args, extra_body=extra_body)
-
-    async def _req_stream(
-        self,
-        args: dict,
-        extra_body: dict = {},
-    ):
-        async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
-            yield chunk
-
-    async def _make_msg(
-        self,
-        chat_completion: chat_completion_module.ChatCompletion,
-        remove_think: bool = False,
-    ) -> provider_message.Message:
-        if not isinstance(chat_completion, chat_completion_module.ChatCompletion):
-            raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}')
-
-        chatcmpl_message = chat_completion.choices[0].message.model_dump()
-
-        # 确保 role 字段存在且不为 None
-        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
-            chatcmpl_message['role'] = 'assistant'
-
-        # 处理思维链
-        content = chatcmpl_message.get('content', '')
-        reasoning_content = chatcmpl_message.get('reasoning_content', None)
-
-        processed_content, _ = await self._process_thinking_content(
-            content=content, reasoning_content=reasoning_content, remove_think=remove_think
-        )
-
-        chatcmpl_message['content'] = processed_content
-
-        # 移除 reasoning_content 字段，避免传递给 Message
-        if 'reasoning_content' in chatcmpl_message:
-            del chatcmpl_message['reasoning_content']
-
-        message = provider_message.Message(**chatcmpl_message)
-
-        return message
-
-    async def _process_thinking_content(
-        self,
-        content: str,
-        reasoning_content: str = None,
-        remove_think: bool = False,
-    ) -> tuple[str, str]:
-        """处理思维链内容
-
-        Args:
-            content: 原始内容
-            reasoning_content: reasoning_content 字段内容
-            remove_think: 是否移除思维链
-
-        Returns:
-            (处理后的内容, 提取的思维链内容)
-        """
-        thinking_content = ''
-
-        # 1. 从 reasoning_content 提取思维链
-        if reasoning_content:
-            thinking_content = reasoning_content
-
-        # 2. 从 content 中提取 <think> 标签内容
-        if content and '<think>' in content and '</think>' in content:
-            import re
-
-            think_pattern = r'<think>(.*?)</think>'
-            think_matches = re.findall(think_pattern, content, re.DOTALL)
-            if think_matches:
-                # 如果已有 reasoning_content，则追加
-                if thinking_content:
-                    thinking_content += '\n' + '\n'.join(think_matches)
-                else:
-                    thinking_content = '\n'.join(think_matches)
-                # 移除 content 中的 <think> 标签
-                content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
-
-        # 3. 根据 remove_think 参数决定是否保留思维链
-        if remove_think:
-            return content, ''
-        else:
-            # 如果有思维链内容，将其以 <think> 格式添加到 content 开头
-            if thinking_content:
-                content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
-            return content, thinking_content
-
-    async def _closure_stream(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.MessageChunk:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-        args['stream'] = True
-
-        # 流式处理状态
-        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        role = 'assistant'  # 默认角色
-        tool_id = ''
-        tool_name = ''
-        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
-
-        async for chunk in self._req_stream(args, extra_body=extra_args):
-            # 解析 chunk 数据
-
-            if hasattr(chunk, 'choices') and chunk.choices:
-                choice = chunk.choices[0]
-                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
-
-                finish_reason = getattr(choice, 'finish_reason', None)
-            else:
-                delta = {}
-                finish_reason = None
-            # 从第一个 chunk 获取 role，后续使用这个 role
-            if 'role' in delta and delta['role']:
-                role = delta['role']
-
-            # 获取增量内容
-            delta_content = delta.get('content', '')
-            reasoning_content = delta.get('reasoning_content', '')
-
-            # 处理 reasoning_content
-            if reasoning_content:
-                # accumulated_reasoning += reasoning_content
-                # 如果设置了 remove_think，跳过 reasoning_content
-                if remove_think:
-                    chunk_idx += 1
-                    continue
-
-                # 第一次出现 reasoning_content，添加 <think> 开始标签
-                if not thinking_started:
-                    thinking_started = True
-                    delta_content = '<think>\n' + reasoning_content
-                else:
-                    # 继续输出 reasoning_content
-                    delta_content = reasoning_content
-            elif thinking_started and not thinking_ended and delta_content:
-                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
-                thinking_ended = True
-                delta_content = '\n</think>\n' + delta_content
-
-            # 处理 content 中已有的 <think> 标签（如果需要移除）
-            # if delta_content and remove_think and '<think>' in delta_content:
-            #     import re
-            #
-            #     # 移除 <think> 标签及其内容
-            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
-
-            # 处理工具调用增量
-            # delta_tool_calls = None
-            if delta.get('tool_calls'):
-                for tool_call in delta['tool_calls']:
-                    if tool_call['id'] and tool_call['function']['name']:
-                        tool_id = tool_call['id']
-                        tool_name = tool_call['function']['name']
-                    else:
-                        tool_call['id'] = tool_id
-                        tool_call['function']['name'] = tool_name
-                    if tool_call['type'] is None:
-                        tool_call['type'] = 'function'
-
-            # 跳过空的第一个 chunk（只有 role 没有内容）
-            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
-                chunk_idx += 1
-                continue
-            # 构建 MessageChunk - 只包含增量内容
-            chunk_data = {
-                'role': role,
-                'content': delta_content if delta_content else None,
-                'tool_calls': delta.get('tool_calls'),
-                'is_final': bool(finish_reason),
-            }
-
-            # 移除 None 值
-            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-            yield provider_message.MessageChunk(**chunk_data)
-            chunk_idx += 1
-
-    async def _closure(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> tuple[provider_message.Message, dict]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-
-        # 发送请求
-
-        resp = await self._req(args, extra_body=extra_args)
-        # 处理请求结果
-        message = await self._make_msg(resp, remove_think)
-
-        # Extract token usage from response
-        usage_info = {}
-        if hasattr(resp, 'usage') and resp.usage:
-            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
-            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
-            usage_info['total_tokens'] = resp.usage.total_tokens or 0
-
-        return message, usage_info
-
-    async def invoke_llm(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> tuple[provider_message.Message, dict]:
-        """Invoke LLM and return message with usage info"""
-        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
-        for m in messages:
-            msg_dict = m.dict(exclude_none=True)
-            content = msg_dict.get('content')
-            if isinstance(content, list):
-                # 检查 content 列表中是否每个部分都是文本
-                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
-                    # 将所有文本部分合并为一个字符串
-                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
-            req_messages.append(msg_dict)
-
-        try:
-            msg, usage_info = await self._closure(
-                query=query,
-                req_messages=req_messages,
-                use_model=model,
-                use_funcs=funcs,
-                extra_args=extra_args,
-                remove_think=remove_think,
-            )
-            return msg, usage_info
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-        except openai.BadRequestError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            if 'context_length_exceeded' in str(e):
-                raise errors.RequesterError(f'上文过长，请重置会话: {error_message}')
-            else:
-                raise errors.RequesterError(f'请求参数错误: {error_message}')
-        except openai.AuthenticationError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            raise errors.RequesterError(f'无效的 api-key: {error_message}')
-        except openai.NotFoundError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            raise errors.RequesterError(f'请求路径错误: {error_message}')
-        except openai.RateLimitError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
-        except openai.APIConnectionError as e:
-            error_message = f'连接错误: {str(e)}'
-            raise errors.RequesterError(error_message)
-        except openai.APIError as e:
-            error_message = str(e.message) if hasattr(e, 'message') else str(e)
-            raise errors.RequesterError(f'请求错误: {error_message}')
-
-    async def invoke_embedding(
-        self,
-        model: requester.RuntimeEmbeddingModel,
-        input_text: list[str],
-        extra_args: dict[str, typing.Any] = {},
-    ) -> tuple[list[list[float]], dict]:
-        """调用 Embedding API, returns (embeddings, usage_info)"""
-        self.client.api_key = model.provider.token_mgr.get_token()
-
-        args = {
-            'model': model.model_entity.name,
-            'input': input_text,
-        }
-
-        if model.model_entity.extra_args:
-            args.update(model.model_entity.extra_args)
-
-        args.update(extra_args)
-
-        try:
-            resp = await self.client.embeddings.create(**args)
-
-            # Extract usage info
-            usage_info = {}
-            if hasattr(resp, 'usage') and resp.usage:
-                usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
-                usage_info['total_tokens'] = resp.usage.total_tokens or 0
-
-            return [d.embedding for d in resp.data], usage_info
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-        except openai.BadRequestError as e:
-            raise errors.RequesterError(f'请求参数错误: {e.message}')
-
-    async def invoke_llm_stream(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.MessageChunk:
-        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
-        for m in messages:
-            msg_dict = m.dict(exclude_none=True)
-            content = msg_dict.get('content')
-            if isinstance(content, list):
-                # 检查 content 列表中是否每个部分都是文本
-                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
-                    # 将所有文本部分合并为一个字符串
-                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
-            req_messages.append(msg_dict)
-
-        try:
-            async for item in self._closure_stream(
-                query=query,
-                req_messages=req_messages,
-                use_model=model,
-                use_funcs=funcs,
-                extra_args=extra_args,
-                remove_think=remove_think,
-            ):
-                yield item
-
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-        except openai.BadRequestError as e:
-            if 'context_length_exceeded' in e.message:
-                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
-            else:
-                raise errors.RequesterError(f'请求参数错误: {e.message}')
-        except openai.AuthenticationError as e:
-            raise errors.RequesterError(f'无效的 api-key: {e.message}')
-        except openai.NotFoundError as e:
-            raise errors.RequesterError(f'请求路径错误: {e.message}')
-        except openai.RateLimitError as e:
-            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
-        except openai.APIError as e:
-            raise errors.RequesterError(f'请求错误: {e.message}')
-
-    async def invoke_rerank(
-        self,
-        model: requester.RuntimeRerankModel,
-        query: str,
-        documents: typing.List[str],
-        extra_args: dict[str, typing.Any] = {},
-    ) -> typing.List[dict]:
-        """Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible)
-
-        Supports extra_args from model.extra_args:
-        - rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks")
-        - rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank")
-        - Any other fields are merged into the request payload.
-        """
-        api_key = model.provider.token_mgr.get_token()
-        base_url = self.requester_cfg.get('base_url', '').rstrip('/')
-        timeout = self.requester_cfg.get('timeout', 120)
-
-        merged_args = {}
-        if model.model_entity.extra_args:
-            merged_args.update(model.model_entity.extra_args)
-        if extra_args:
-            merged_args.update(extra_args)
-
-        rerank_url = merged_args.pop('rerank_url', None)
-        rerank_path = merged_args.pop('rerank_path', 'rerank')
-        if not rerank_url:
-            rerank_url = f'{base_url}/{rerank_path}'
-
-        headers = {
-            'Content-Type': 'application/json',
-            'Authorization': f'Bearer {api_key}',
-        }
-
-        payload = {
-            'model': model.model_entity.name,
-            'query': query,
-            'documents': documents[:64],
-            'top_n': min(len(documents), 64),
-        }
-
-        if merged_args:
-            payload.update(merged_args)
-
-        try:
-            async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
-                resp = await client.post(rerank_url, headers=headers, json=payload)
-                resp.raise_for_status()
-                data = resp.json()
-
-            results = self._parse_rerank_response(data)
-
-            if results:
-                scores = [r.get('relevance_score', 0.0) for r in results]
-                min_score = min(scores)
-                max_score = max(scores)
-                if max_score - min_score > 1e-6:
-                    for r in results:
-                        r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
-
-            return results
-        except httpx.HTTPStatusError as e:
-            raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}')
-        except httpx.TimeoutException:
-            raise errors.RequesterError('Rerank request timed out')
-        except Exception as e:
-            raise errors.RequesterError(f'Rerank request error: {str(e)}')
-
-    @staticmethod
-    def _parse_rerank_response(data: dict) -> typing.List[dict]:
-        """Parse rerank response from various providers.
-
-        Handles:
-        - Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]}
-        - Voyage AI: {"data": [{"index", "relevance_score"}]}
-        - DashScope: {"output": {"results": [{"index", "relevance_score"}]}}
-        """
-        if 'results' in data:
-            return data['results']
-        if 'data' in data:
-            return data['data']
-        if 'output' in data and isinstance(data['output'], dict):
-            return data['output'].get('results', [])
-        return []
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
index 21bd6a05..b77a352c 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: OpenAI
   icon: openai.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
index f1ca209b..504442ea 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/coherererank.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: Cohere
   icon: cohere.svg
 spec:
+  litellm_provider: cohere
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py
deleted file mode 100644
index d272e721..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """CompShare ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.modelverse.cn/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
index 92fcafdc..8958eb41 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 优云智算
   icon: compshare.png
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
@@ -24,6 +25,8 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
+  - rerank
   provider_category: maas
 execution:
   python:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
deleted file mode 100644
index 5bcbd40c..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import annotations
-
-import typing
-
-from . import chatcmpl
-from .. import errors, requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """Deepseek ChatCompletion API 请求器"""
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.deepseek.com',
-        'timeout': 120,
-    }
-
-    async def _closure(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> tuple[provider_message.Message, dict]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages
-
-        # deepseek 不支持多模态，把content都转换成纯文字
-        for m in messages:
-            if 'content' in m and isinstance(m['content'], list):
-                m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c])
-
-        args['messages'] = messages
-
-        # 发送请求
-        resp = await self._req(args, extra_body=extra_args)
-
-        # print(resp)
-
-        if resp is None:
-            raise errors.RequesterError('接口返回为空，请确定模型提供商服务是否正常')
-        # 处理请求结果
-        message = await self._make_msg(resp, remove_think)
-
-        # Extract token usage from response
-        usage_info = {}
-        if hasattr(resp, 'usage') and resp.usage:
-            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
-            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
-            usage_info['total_tokens'] = resp.usage.total_tokens or 0
-
-        return message, usage_info
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
index 8ef1fcf9..c8da83c7 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/deepseekchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: DeepSeek
   icon: deepseek.svg
 spec:
+  litellm_provider: deepseek
   config:
   - name: base_url
     label:
@@ -24,6 +25,8 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
+  - rerank
   provider_category: manufacturer
 execution:
   python:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/doubao.svg b/src/langbot/pkg/provider/modelmgr/requesters/doubao.svg
new file mode 100644
index 00000000..e47c7232
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/doubao.svg
@@ -0,0 +1,4 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#3B82F6"/>
+  <text x="30" y="32" font-family="Arial, sans-serif" font-size="12" font-weight="bold" fill="white" text-anchor="middle">豆包</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/doubaochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/doubaochatcmpl.yaml
new file mode 100644
index 00000000..0b608b96
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/doubaochatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: doubao-chat-completions
+  label:
+    en_US: ByteDance Doubao
+    zh_Hans: 字节豆包
+  icon: doubao.svg
+spec:
+  litellm_provider: openai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://ark.cn-beijing.volces.com/api/v3
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
deleted file mode 100644
index 956b49f6..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.py
+++ /dev/null
@@ -1,205 +0,0 @@
-from __future__ import annotations
-
-import typing
-import httpx
-
-from . import chatcmpl
-
-import uuid
-
-from .. import requester
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-
-
-class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """Google Gemini API 请求器"""
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
-        'timeout': 120,
-    }
-
-    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
-        models_url = 'https://generativelanguage.googleapis.com/v1beta/models'
-        params = {'key': api_key} if api_key else {}
-
-        all_models: list[dict[str, typing.Any]] = []
-        next_page_token = ''
-        last_payload: dict[str, typing.Any] = {}
-
-        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
-            while True:
-                request_params = dict(params)
-                if next_page_token:
-                    request_params['pageToken'] = next_page_token
-
-                response = await client.get(models_url, params=request_params)
-                response.raise_for_status()
-                payload = response.json()
-                last_payload = payload
-
-                for item in payload.get('models', []):
-                    model_name = item.get('name', '')
-                    model_id = model_name.replace('models/', '', 1)
-                    if not model_id:
-                        continue
-
-                    supported_methods = item.get('supportedGenerationMethods', []) or []
-                    if 'embedContent' in supported_methods and 'generateContent' not in supported_methods:
-                        model_type = 'embedding'
-                    else:
-                        model_type = 'llm'
-
-                    all_models.append(
-                        {
-                            'id': model_id,
-                            'name': model_id,
-                            'type': model_type,
-                            'abilities': self._infer_model_abilities(item, model_id),
-                            'display_name': item.get('displayName') or None,
-                            'description': item.get('description') or None,
-                            'context_length': item.get('inputTokenLimit'),
-                            'input_modalities': self._normalize_modalities(item.get('inputModalities')),
-                            'output_modalities': self._normalize_modalities(item.get('outputModalities')),
-                        }
-                    )
-
-                next_page_token = payload.get('nextPageToken', '')
-                if not next_page_token:
-                    break
-
-        all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
-        return {
-            'models': all_models,
-            'debug': {
-                'request': {
-                    'method': 'GET',
-                    'url': models_url,
-                    'query': {'key': self._mask_api_key(api_key)} if api_key else {},
-                },
-                'response': last_payload,
-            },
-        }
-
-    async def _closure_stream(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.MessageChunk:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-        args['stream'] = True
-
-        # 流式处理状态
-        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        role = 'assistant'  # 默认角色
-        tool_id = ''
-        tool_name = ''
-        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
-
-        async for chunk in self._req_stream(args, extra_body=extra_args):
-            # 解析 chunk 数据
-
-            if hasattr(chunk, 'choices') and chunk.choices:
-                choice = chunk.choices[0]
-                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
-
-                finish_reason = getattr(choice, 'finish_reason', None)
-            else:
-                delta = {}
-                finish_reason = None
-            # 从第一个 chunk 获取 role，后续使用这个 role
-            if 'role' in delta and delta['role']:
-                role = delta['role']
-
-            # 获取增量内容
-            delta_content = delta.get('content', '')
-            reasoning_content = delta.get('reasoning_content', '')
-
-            # 处理 reasoning_content
-            if reasoning_content:
-                # accumulated_reasoning += reasoning_content
-                # 如果设置了 remove_think，跳过 reasoning_content
-                if remove_think:
-                    chunk_idx += 1
-                    continue
-
-                # 第一次出现 reasoning_content，添加 <think> 开始标签
-                if not thinking_started:
-                    thinking_started = True
-                    delta_content = '<think>\n' + reasoning_content
-                else:
-                    # 继续输出 reasoning_content
-                    delta_content = reasoning_content
-            elif thinking_started and not thinking_ended and delta_content:
-                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
-                thinking_ended = True
-                delta_content = '\n</think>\n' + delta_content
-
-            # 处理 content 中已有的 <think> 标签（如果需要移除）
-            # if delta_content and remove_think and '<think>' in delta_content:
-            #     import re
-            #
-            #     # 移除 <think> 标签及其内容
-            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
-
-            # 处理工具调用增量
-            # delta_tool_calls = None
-            if delta.get('tool_calls'):
-                for tool_call in delta['tool_calls']:
-                    if tool_call['id'] == '' and tool_id == '':
-                        tool_id = str(uuid.uuid4())
-                    if tool_call['function']['name']:
-                        tool_name = tool_call['function']['name']
-                    tool_call['id'] = tool_id
-                    tool_call['function']['name'] = tool_name
-                    if tool_call['type'] is None:
-                        tool_call['type'] = 'function'
-
-            # 跳过空的第一个 chunk（只有 role 没有内容）
-            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
-                chunk_idx += 1
-                continue
-            # 构建 MessageChunk - 只包含增量内容
-            chunk_data = {
-                'role': role,
-                'content': delta_content if delta_content else None,
-                'tool_calls': delta.get('tool_calls'),
-                'is_final': bool(finish_reason),
-            }
-
-            # 移除 None 值
-            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-            yield provider_message.MessageChunk(**chunk_data)
-            chunk_idx += 1
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
index fdebe9b9..7c7ca308 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/geminichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: Google Gemini
   icon: gemini.svg
 spec:
+  litellm_provider: gemini
   config:
   - name: base_url
     label:
@@ -24,6 +25,8 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
+  - rerank
   provider_category: manufacturer
 execution:
   python:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
deleted file mode 100644
index 4e295e9f..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from __future__ import annotations
-
-
-import typing
-
-from . import ppiochatcmpl
-
-
-class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
-    """Gitee AI ChatCompletions API 请求器"""
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://ai.gitee.com/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
index b7b158a7..f898889a 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/giteeaichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: Gitee AI
   icon: giteeai.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/groq.svg b/src/langbot/pkg/provider/modelmgr/requesters/groq.svg
new file mode 100644
index 00000000..7c84ba68
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/groq.svg
@@ -0,0 +1,4 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#F97316"/>
+  <text x="30" y="32" font-family="Arial, sans-serif" font-size="14" font-weight="bold" fill="white" text-anchor="middle">Groq</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/groqchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/groqchatcmpl.yaml
new file mode 100644
index 00000000..74632d36
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/groqchatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: groq-chat-completions
+  label:
+    en_US: Groq
+    zh_Hans: Groq
+  icon: groq.svg
+spec:
+  litellm_provider: groq
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://api.groq.com/openai/v1
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/iflytek.svg b/src/langbot/pkg/provider/modelmgr/requesters/iflytek.svg
new file mode 100644
index 00000000..7498b149
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/iflytek.svg
@@ -0,0 +1,5 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#0066FF"/>
+  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">iFlytek</text>
+  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Spark</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/iflytekchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/iflytekchatcmpl.yaml
new file mode 100644
index 00000000..a02f38d1
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/iflytekchatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: iflytek-chat-completions
+  label:
+    en_US: iFlytek Spark
+    zh_Hans: 讯飞星火
+  icon: iflytek.svg
+spec:
+  litellm_provider: openai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://spark-api-open.xf-yun.com/v1
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
deleted file mode 100644
index 305ae21f..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.py
+++ /dev/null
@@ -1,208 +0,0 @@
-from __future__ import annotations
-
-import openai
-import typing
-
-from . import chatcmpl
-from .. import requester
-import openai.types.chat.chat_completion as chat_completion
-import re
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-
-
-class JieKouAIChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """接口 AI ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.jiekou.ai/openai',
-        'timeout': 120,
-    }
-
-    is_think: bool = False
-
-    async def _make_msg(
-        self,
-        chat_completion: chat_completion.ChatCompletion,
-        remove_think: bool,
-    ) -> provider_message.Message:
-        chatcmpl_message = chat_completion.choices[0].message.model_dump()
-        # print(chatcmpl_message.keys(), chatcmpl_message.values())
-
-        # 确保 role 字段存在且不为 None
-        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
-            chatcmpl_message['role'] = 'assistant'
-
-        reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
-
-        # deepseek的reasoner模型
-        chatcmpl_message['content'] = await self._process_thinking_content(
-            chatcmpl_message['content'], reasoning_content, remove_think
-        )
-
-        # 移除 reasoning_content 字段，避免传递给 Message
-        if 'reasoning_content' in chatcmpl_message:
-            del chatcmpl_message['reasoning_content']
-
-        message = provider_message.Message(**chatcmpl_message)
-
-        return message
-
-    async def _process_thinking_content(
-        self,
-        content: str,
-        reasoning_content: str = None,
-        remove_think: bool = False,
-    ) -> tuple[str, str]:
-        """处理思维链内容
-
-        Args:
-            content: 原始内容
-            reasoning_content: reasoning_content 字段内容
-            remove_think: 是否移除思维链
-
-        Returns:
-            处理后的内容
-        """
-        if remove_think:
-            content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
-        else:
-            if reasoning_content is not None:
-                content = '<think>\n' + reasoning_content + '\n</think>\n' + content
-        return content
-
-    async def _make_msg_chunk(
-        self,
-        delta: dict[str, typing.Any],
-        idx: int,
-    ) -> provider_message.MessageChunk:
-        # 处理流式chunk和完整响应的差异
-        # print(chat_completion.choices[0])
-
-        # 确保 role 字段存在且不为 None
-        if 'role' not in delta or delta['role'] is None:
-            delta['role'] = 'assistant'
-
-        reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
-
-        delta['content'] = '' if delta['content'] is None else delta['content']
-        # print(reasoning_content)
-
-        # deepseek的reasoner模型
-
-        if reasoning_content is not None:
-            delta['content'] += reasoning_content
-
-        message = provider_message.MessageChunk(**delta)
-
-        return message
-
-    async def _closure_stream(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-        args['stream'] = True
-
-        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        role = 'assistant'  # 默认角色
-        async for chunk in self._req_stream(args, extra_body=extra_args):
-            # 解析 chunk 数据
-            if hasattr(chunk, 'choices') and chunk.choices:
-                choice = chunk.choices[0]
-                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
-                finish_reason = getattr(choice, 'finish_reason', None)
-            else:
-                delta = {}
-                finish_reason = None
-
-            # 从第一个 chunk 获取 role，后续使用这个 role
-            if 'role' in delta and delta['role']:
-                role = delta['role']
-
-            # 获取增量内容
-            delta_content = delta.get('content', '')
-            # reasoning_content = delta.get('reasoning_content', '')
-
-            if remove_think:
-                if delta['content'] is not None:
-                    if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
-                        thinking_started = True
-                        continue
-                    elif delta['content'] == r'</think>' and not thinking_ended:
-                        thinking_ended = True
-                        continue
-                    elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
-                        thinking_started = False
-                        continue
-                    elif thinking_started and not thinking_ended:
-                        continue
-
-            # delta_tool_calls = None
-            if delta.get('tool_calls'):
-                for tool_call in delta['tool_calls']:
-                    if tool_call['id'] and tool_call['function']['name']:
-                        tool_id = tool_call['id']
-                        tool_name = tool_call['function']['name']
-
-                    if tool_call['id'] is None:
-                        tool_call['id'] = tool_id
-                    if tool_call['function']['name'] is None:
-                        tool_call['function']['name'] = tool_name
-                    if tool_call['function']['arguments'] is None:
-                        tool_call['function']['arguments'] = ''
-                    if tool_call['type'] is None:
-                        tool_call['type'] = 'function'
-
-            # 跳过空的第一个 chunk（只有 role 没有内容）
-            if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
-                chunk_idx += 1
-                continue
-
-            # 构建 MessageChunk - 只包含增量内容
-            chunk_data = {
-                'role': role,
-                'content': delta_content if delta_content else None,
-                'tool_calls': delta.get('tool_calls'),
-                'is_final': bool(finish_reason),
-            }
-
-            # 移除 None 值
-            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-            yield provider_message.MessageChunk(**chunk_data)
-            chunk_idx += 1
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
index 3c791d73..60ed9840 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jiekouaichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 接口 AI
   icon: jiekouai.png
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
index 3b448e38..87c6b72d 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/jinarerank.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: Jina
   icon: jina.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
index ae776e4d..46f81179 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
@@ -152,6 +152,10 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
             args['stream'] = True
             args['stream_options'] = {'include_usage': True}
         self._build_common_args(args)
+
+        # Apply model-level extra_args first, then call-level extra_args
+        if model.model_entity.extra_args:
+            args.update(model.model_entity.extra_args)
         args.update(extra_args)
 
         if funcs:
@@ -239,9 +243,15 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
                 delta_content = delta.get('content', '')
                 reasoning_content = delta.get('reasoning_content', '')
 
+                # Handle reasoning_content based on remove_think flag
                 if reasoning_content:
-                    chunk_idx += 1
-                    continue
+                    if remove_think:
+                        # Skip reasoning content when remove_think is True
+                        chunk_idx += 1
+                        continue
+                    else:
+                        # Use reasoning_content as the displayed content
+                        delta_content = reasoning_content
 
                 if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
                     chunk_idx += 1
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py
deleted file mode 100644
index c9060c1b..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class LmStudioChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """LMStudio ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'http://127.0.0.1:1234/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
index 81dc82cf..11570903 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/lmstudiochatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: LM Studio
   icon: lmstudio.webp
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/mimo.svg b/src/langbot/pkg/provider/modelmgr/requesters/mimo.svg
new file mode 100644
index 00000000..5d9b21dc
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/mimo.svg
@@ -0,0 +1,4 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#FF6700"/>
+  <text x="30" y="32" font-family="Arial, sans-serif" font-size="18" font-weight="bold" fill="white" text-anchor="middle">MiMo</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/mimochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/mimochatcmpl.yaml
new file mode 100644
index 00000000..a871145b
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/mimochatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: mimo-chat-completions
+  label:
+    en_US: Xiaomi MiMo
+    zh_Hans: 小米 MiMo
+  icon: mimo.svg
+spec:
+  litellm_provider: openai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://api.xiaomimimo.com/v1
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/minimax.svg b/src/langbot/pkg/provider/modelmgr/requesters/minimax.svg
new file mode 100644
index 00000000..1afeadc3
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/minimax.svg
@@ -0,0 +1,4 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#4F46E5"/>
+  <text x="30" y="32" font-family="Arial, sans-serif" font-size="12" font-weight="bold" fill="white" text-anchor="middle">MiniMax</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/minimaxchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/minimaxchatcmpl.yaml
new file mode 100644
index 00000000..c92bce69
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/minimaxchatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: minimax-chat-completions
+  label:
+    en_US: MiniMax
+    zh_Hans: MiniMax
+  icon: minimax.svg
+spec:
+  litellm_provider: openai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://api.minimax.chat/v1
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/mistral.svg b/src/langbot/pkg/provider/modelmgr/requesters/mistral.svg
new file mode 100644
index 00000000..853022d9
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/mistral.svg
@@ -0,0 +1,5 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#FF6B35"/>
+  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Mistral</text>
+  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">AI</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/mistralchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/mistralchatcmpl.yaml
new file mode 100644
index 00000000..7d66f599
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/mistralchatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: mistral-chat-completions
+  label:
+    en_US: Mistral AI
+    zh_Hans: Mistral AI
+  icon: mistral.svg
+spec:
+  litellm_provider: mistral
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://api.mistral.ai/v1
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
deleted file mode 100644
index ed5d8795..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
+++ /dev/null
@@ -1,561 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import typing
-
-import openai
-import openai.types.chat.chat_completion as chat_completion
-import httpx
-
-from .. import entities, errors, requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class ModelScopeChatCompletions(requester.ProviderAPIRequester):
-    """ModelScope ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api-inference.modelscope.cn/v1',
-        'timeout': 120,
-    }
-
-    async def initialize(self):
-        self.client = openai.AsyncClient(
-            api_key='',
-            base_url=self.requester_cfg['base_url'],
-            timeout=self.requester_cfg['timeout'],
-            http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
-        )
-
-    def _mask_api_key(self, api_key: str | None) -> str:
-        if not api_key:
-            return ''
-        if len(api_key) <= 8:
-            return '****'
-        return f'{api_key[:4]}...{api_key[-4:]}'
-
-    def _infer_model_type(self, model_id: str) -> str:
-        normalized_model_id = (model_id or '').lower()
-        embedding_keywords = (
-            'embedding',
-            'embed',
-            'bge-',
-            'e5-',
-            'm3e',
-            'gte-',
-            'multilingual-e5',
-            'text-embedding',
-        )
-        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
-
-    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
-        normalized_model_id = (model_id or '').lower()
-        abilities: set[str] = set()
-
-        def _flatten(value: typing.Any) -> list[str]:
-            if value is None:
-                return []
-            if isinstance(value, str):
-                return [value.lower()]
-            if isinstance(value, dict):
-                flattened: list[str] = []
-                for nested_value in value.values():
-                    flattened.extend(_flatten(nested_value))
-                return flattened
-            if isinstance(value, (list, tuple, set)):
-                flattened: list[str] = []
-                for nested_value in value:
-                    flattened.extend(_flatten(nested_value))
-                return flattened
-            return [str(value).lower()]
-
-        capability_tokens = _flatten(item.get('capabilities'))
-        capability_tokens.extend(_flatten(item.get('modalities')))
-        capability_tokens.extend(_flatten(item.get('input_modalities')))
-        capability_tokens.extend(_flatten(item.get('output_modalities')))
-        capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
-        capability_tokens.extend(_flatten(item.get('supported_parameters')))
-        capability_tokens.extend(_flatten(item.get('architecture')))
-
-        combined_tokens = capability_tokens + [normalized_model_id]
-
-        vision_keywords = ('vision', 'image', 'file', 'video', 'multimodal', 'vl', 'ocr', 'omni')
-        function_call_keywords = ('function', 'tool', 'tools', 'tool_choice', 'tool_call', 'tool-use', 'tool_use')
-
-        if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
-            abilities.add('vision')
-
-        if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
-            abilities.add('func_call')
-
-        return sorted(abilities)
-
-    def _normalize_modalities(self, value: typing.Any) -> list[str]:
-        normalized: list[str] = []
-
-        def _collect(item: typing.Any):
-            if item is None:
-                return
-            if isinstance(item, str):
-                for part in item.replace('->', ',').replace('+', ',').split(','):
-                    token = part.strip().lower()
-                    if token and token not in normalized:
-                        normalized.append(token)
-                return
-            if isinstance(item, dict):
-                for nested in item.values():
-                    _collect(nested)
-                return
-            if isinstance(item, (list, tuple, set)):
-                for nested in item:
-                    _collect(nested)
-                return
-
-        _collect(value)
-        return normalized
-
-    def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
-        display_name = item.get('name')
-        if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
-            display_name = ''
-
-        description = item.get('description')
-        if not isinstance(description, str) or not description.strip():
-            description = ''
-
-        context_length = item.get('context_length')
-        if context_length is None and isinstance(item.get('top_provider'), dict):
-            context_length = item['top_provider'].get('context_length')
-
-        if not isinstance(context_length, int):
-            try:
-                context_length = int(context_length) if context_length is not None else None
-            except (TypeError, ValueError):
-                context_length = None
-
-        input_modalities = self._normalize_modalities(item.get('input_modalities'))
-        output_modalities = self._normalize_modalities(item.get('output_modalities'))
-
-        if isinstance(item.get('architecture'), dict):
-            if not input_modalities:
-                input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
-            if not output_modalities:
-                output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
-
-        owned_by = item.get('owned_by')
-        if not isinstance(owned_by, str) or not owned_by.strip():
-            owned_by = ''
-
-        return {
-            'display_name': display_name or None,
-            'description': description or None,
-            'context_length': context_length,
-            'owned_by': owned_by or None,
-            'input_modalities': input_modalities,
-            'output_modalities': output_modalities,
-        }
-
-    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
-        headers = {}
-        if api_key:
-            headers['Authorization'] = f'Bearer {api_key}'
-
-        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
-        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
-            response = await client.get(models_url, headers=headers)
-            response.raise_for_status()
-            payload = response.json()
-
-        models = []
-        for item in payload.get('data', []):
-            model_id = item.get('id')
-            if not model_id:
-                continue
-            models.append(
-                {
-                    'id': model_id,
-                    'name': model_id,
-                    'type': self._infer_model_type(model_id),
-                    'abilities': self._infer_model_abilities(item, model_id),
-                    **self._extract_scan_metadata(item, model_id),
-                }
-            )
-
-        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
-        return {
-            'models': models,
-            'debug': {
-                'request': {
-                    'method': 'GET',
-                    'url': models_url,
-                    'headers': {
-                        'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
-                    },
-                },
-                'response': payload,
-            },
-        }
-
-    async def _req(
-        self,
-        query: pipeline_query.Query,
-        args: dict,
-        extra_body: dict = {},
-        remove_think: bool = False,
-    ) -> list[dict[str, typing.Any]]:
-        args['stream'] = True
-
-        chunk = None
-
-        pending_content = ''
-
-        tool_calls = []
-
-        resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)
-
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        tool_id = ''
-        tool_name = ''
-        message_delta = {}
-        async for chunk in resp_gen:
-            if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
-                continue
-
-            delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
-            reasoning_content = delta.get('reasoning_content')
-            # 处理 reasoning_content
-            if reasoning_content:
-                # accumulated_reasoning += reasoning_content
-                # 如果设置了 remove_think，跳过 reasoning_content
-                if remove_think:
-                    chunk_idx += 1
-                    continue
-
-                # 第一次出现 reasoning_content，添加 <think> 开始标签
-                if not thinking_started:
-                    thinking_started = True
-                    pending_content += '<think>\n' + reasoning_content
-                else:
-                    # 继续输出 reasoning_content
-                    pending_content += reasoning_content
-            elif thinking_started and not thinking_ended and delta.get('content'):
-                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
-                thinking_ended = True
-                pending_content += '\n</think>\n' + delta.get('content')
-
-            if delta.get('content') is not None:
-                pending_content += delta.get('content')
-
-            if delta.get('tool_calls') is not None:
-                for tool_call in delta.get('tool_calls'):
-                    if tool_call['id'] != '':
-                        tool_id = tool_call['id']
-                    if tool_call['function']['name'] is not None:
-                        tool_name = tool_call['function']['name']
-                    if tool_call['function']['arguments'] is None:
-                        continue
-                    tool_call['id'] = tool_id
-                    tool_call['name'] = tool_name
-                    for tc in tool_calls:
-                        if tc['index'] == tool_call['index']:
-                            tc['function']['arguments'] += tool_call['function']['arguments']
-                            break
-                    else:
-                        tool_calls.append(tool_call)
-
-            if chunk.choices[0].finish_reason is not None:
-                break
-        message_delta['content'] = pending_content
-        message_delta['role'] = 'assistant'
-
-        message_delta['tool_calls'] = tool_calls if tool_calls else None
-        return [message_delta]
-
-    async def _make_msg(
-        self,
-        chat_completion: list[dict[str, typing.Any]],
-    ) -> provider_message.Message:
-        chatcmpl_message = chat_completion[0]
-
-        # 确保 role 字段存在且不为 None
-        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
-            chatcmpl_message['role'] = 'assistant'
-
-        message = provider_message.Message(**chatcmpl_message)
-
-        return message
-
-    async def _closure(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> tuple[provider_message.Message, dict]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-
-        # 发送请求
-        resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)
-
-        # 处理请求结果
-        message = await self._make_msg(resp)
-
-        # ModelScope uses streaming, usage info not available
-        usage_info = {}
-
-        return message, usage_info
-
-    async def _req_stream(
-        self,
-        args: dict,
-        extra_body: dict = {},
-    ) -> chat_completion.ChatCompletion:
-        async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
-            yield chunk
-
-    async def _closure_stream(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-        args['stream'] = True
-
-        # 流式处理状态
-        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        role = 'assistant'  # 默认角色
-        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
-
-        async for chunk in self._req_stream(args, extra_body=extra_args):
-            # 解析 chunk 数据
-            if hasattr(chunk, 'choices') and chunk.choices:
-                choice = chunk.choices[0]
-                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
-                finish_reason = getattr(choice, 'finish_reason', None)
-            else:
-                delta = {}
-                finish_reason = None
-
-            # 从第一个 chunk 获取 role，后续使用这个 role
-            if 'role' in delta and delta['role']:
-                role = delta['role']
-
-            # 获取增量内容
-            delta_content = delta.get('content', '')
-            reasoning_content = delta.get('reasoning_content', '')
-
-            # 处理 reasoning_content
-            if reasoning_content:
-                # accumulated_reasoning += reasoning_content
-                # 如果设置了 remove_think，跳过 reasoning_content
-                if remove_think:
-                    chunk_idx += 1
-                    continue
-
-                # 第一次出现 reasoning_content，添加 <think> 开始标签
-                if not thinking_started:
-                    thinking_started = True
-                    delta_content = '<think>\n' + reasoning_content
-                else:
-                    # 继续输出 reasoning_content
-                    delta_content = reasoning_content
-            elif thinking_started and not thinking_ended and delta_content:
-                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
-                thinking_ended = True
-                delta_content = '\n</think>\n' + delta_content
-
-            # 处理 content 中已有的 <think> 标签（如果需要移除）
-            # if delta_content and remove_think and '<think>' in delta_content:
-            #     import re
-            #
-            #     # 移除 <think> 标签及其内容
-            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
-
-            # 处理工具调用增量
-            if delta.get('tool_calls'):
-                for tool_call in delta['tool_calls']:
-                    if tool_call['id'] != '':
-                        tool_id = tool_call['id']
-                    if tool_call['function']['name'] is not None:
-                        tool_name = tool_call['function']['name']
-
-                    if tool_call['type'] is None:
-                        tool_call['type'] = 'function'
-                    tool_call['id'] = tool_id
-                    tool_call['function']['name'] = tool_name
-                    tool_call['function']['arguments'] = (
-                        '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
-                    )
-
-            # 跳过空的第一个 chunk（只有 role 没有内容）
-            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
-                chunk_idx += 1
-                continue
-
-            # 构建 MessageChunk - 只包含增量内容
-            chunk_data = {
-                'role': role,
-                'content': delta_content if delta_content else None,
-                'tool_calls': delta.get('tool_calls'),
-                'is_final': bool(finish_reason),
-            }
-
-            # 移除 None 值
-            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-            yield provider_message.MessageChunk(**chunk_data)
-            chunk_idx += 1
-            # return
-
-    async def invoke_llm(
-        self,
-        query: pipeline_query.Query,
-        model: entities.LLMModelInfo,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message:
-        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
-        for m in messages:
-            msg_dict = m.dict(exclude_none=True)
-            content = msg_dict.get('content')
-            if isinstance(content, list):
-                # 检查 content 列表中是否每个部分都是文本
-                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
-                    # 将所有文本部分合并为一个字符串
-                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
-            req_messages.append(msg_dict)
-
-        try:
-            return await self._closure(
-                query=query,
-                req_messages=req_messages,
-                use_model=model,
-                use_funcs=funcs,
-                extra_args=extra_args,
-                remove_think=remove_think,
-            )
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-        except openai.BadRequestError as e:
-            if 'context_length_exceeded' in e.message:
-                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
-            else:
-                raise errors.RequesterError(f'请求参数错误: {e.message}')
-        except openai.AuthenticationError as e:
-            raise errors.RequesterError(f'无效的 api-key: {e.message}')
-        except openai.NotFoundError as e:
-            raise errors.RequesterError(f'请求路径错误: {e.message}')
-        except openai.RateLimitError as e:
-            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
-        except openai.APIError as e:
-            raise errors.RequesterError(f'请求错误: {e.message}')
-
-    async def invoke_llm_stream(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.MessageChunk:
-        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
-        for m in messages:
-            msg_dict = m.dict(exclude_none=True)
-            content = msg_dict.get('content')
-            if isinstance(content, list):
-                # 检查 content 列表中是否每个部分都是文本
-                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
-                    # 将所有文本部分合并为一个字符串
-                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
-            req_messages.append(msg_dict)
-
-        try:
-            async for item in self._closure_stream(
-                query=query,
-                req_messages=req_messages,
-                use_model=model,
-                use_funcs=funcs,
-                extra_args=extra_args,
-                remove_think=remove_think,
-            ):
-                yield item
-
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-        except openai.BadRequestError as e:
-            if 'context_length_exceeded' in e.message:
-                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
-            else:
-                raise errors.RequesterError(f'请求参数错误: {e.message}')
-        except openai.AuthenticationError as e:
-            raise errors.RequesterError(f'无效的 api-key: {e.message}')
-        except openai.NotFoundError as e:
-            raise errors.RequesterError(f'请求路径错误: {e.message}')
-        except openai.RateLimitError as e:
-            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
-        except openai.APIError as e:
-            raise errors.RequesterError(f'请求错误: {e.message}')
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
index 8d22002d..35705c2e 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 魔搭社区
   icon: modelscope.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
@@ -31,6 +32,8 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
+  - rerank
   provider_category: maas
 execution:
   python:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
deleted file mode 100644
index b6852963..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import annotations
-
-import typing
-
-
-from . import chatcmpl
-from .. import requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-
-class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """Moonshot ChatCompletion API 请求器"""
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.moonshot.cn/v1',
-        'timeout': 120,
-    }
-
-    async def _closure(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> tuple[provider_message.Message, dict]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages
-
-        # deepseek 不支持多模态，把content都转换成纯文字
-        for m in messages:
-            if 'content' in m and isinstance(m['content'], list):
-                m['content'] = ' '.join([c['text'] for c in m['content']])
-
-        # 删除空的，不知道干嘛的，直接删了。
-        # messages = [m for m in messages if m["content"].strip() != "" and ('tool_calls' not in m or not m['tool_calls'])]
-
-        args['messages'] = messages
-
-        # 发送请求
-        resp = await self._req(args, extra_body=extra_args)
-
-        # 处理请求结果
-        message = await self._make_msg(resp, remove_think)
-
-        # Extract token usage from response
-        usage_info = {}
-        if hasattr(resp, 'usage') and resp.usage:
-            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
-            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
-            usage_info['total_tokens'] = resp.usage.total_tokens or 0
-
-        return message, usage_info
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
index 7a7e3060..81a59e8f 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/moonshotchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 月之暗面
   icon: moonshot.png
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
@@ -24,6 +25,8 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
+  - rerank
   provider_category: manufacturer
 execution:
   python:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py
deleted file mode 100644
index 3c2bd3fb..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """New API ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'http://localhost:3000/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
index e0f44e99..9eaf182c 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: New API
   icon: newapi.png
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
deleted file mode 100644
index 50f601d7..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.py
+++ /dev/null
@@ -1,314 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import os
-import typing
-from typing import Union, Mapping, Any, AsyncIterator
-import uuid
-import json
-
-import ollama
-import httpx
-
-from .. import errors, requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-
-REQUESTER_NAME: str = 'ollama-chat'
-
-
-class OllamaChatCompletions(requester.ProviderAPIRequester):
-    """Ollama平台 ChatCompletion API请求器"""
-
-    client: ollama.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'http://127.0.0.1:11434',
-        'timeout': 120,
-    }
-
-    async def initialize(self):
-        os.environ['OLLAMA_HOST'] = self.requester_cfg['base_url']
-        self.client = ollama.AsyncClient(timeout=self.requester_cfg['timeout'])
-
-    def _infer_model_type(self, model_id: str) -> str:
-        normalized_model_id = (model_id or '').lower()
-        embedding_keywords = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding')
-        return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
-
-    def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
-        normalized_model_id = (model_id or '').lower()
-        abilities: set[str] = set()
-        details = item.get('details', {}) or {}
-        families = details.get('families', []) or []
-        tokens = [normalized_model_id, str(details.get('family', '')).lower()]
-        tokens.extend(str(family).lower() for family in families)
-
-        if any(keyword in token for token in tokens for keyword in ('vision', 'vl', 'omni', 'llava', 'ocr')):
-            abilities.add('vision')
-        if any(keyword in token for token in tokens for keyword in ('tool', 'function')):
-            abilities.add('func_call')
-        return sorted(abilities)
-
-    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
-        del api_key
-        models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/api/tags'
-
-        async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
-            response = await client.get(models_url)
-            response.raise_for_status()
-            payload = response.json()
-
-        models: list[dict[str, typing.Any]] = []
-        for item in payload.get('models', []):
-            model_id = item.get('model') or item.get('name')
-            if not model_id:
-                continue
-            models.append(
-                {
-                    'id': model_id,
-                    'name': item.get('name', model_id),
-                    'type': self._infer_model_type(model_id),
-                    'abilities': self._infer_model_abilities(item, model_id),
-                }
-            )
-
-        models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
-        return {
-            'models': models,
-            'debug': {
-                'request': {
-                    'method': 'GET',
-                    'url': models_url,
-                },
-                'response': payload,
-            },
-        }
-
-    async def _req(
-        self,
-        args: dict,
-    ) -> Union[Mapping[str, Any], AsyncIterator[Mapping[str, Any]]]:
-        return await self.client.chat(**args)
-
-    async def _closure(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message:
-        args = extra_args.copy()
-        args['model'] = use_model.model_entity.name
-
-        messages: list[dict] = req_messages.copy()
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                text_content: list = []
-                image_urls: list = []
-                for me in msg['content']:
-                    if me['type'] == 'text':
-                        text_content.append(me['text'])
-                    elif me['type'] == 'image_base64':
-                        image_urls.append(me['image_base64'])
-
-                msg['content'] = '\n'.join(text_content)
-                msg['images'] = [url.split(',')[1] for url in image_urls]
-            if 'tool_calls' in msg:  # LangBot 内部以 str 存储 tool_calls 的参数，这里需要转换为 dict
-                for tool_call in msg['tool_calls']:
-                    tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
-        args['messages'] = messages
-
-        args['tools'] = []
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-            if tools:
-                args['tools'] = tools
-
-        resp = await self._req(args)
-        message: provider_message.Message = await self._make_msg(resp)
-        return message
-
-    async def _make_msg(self, chat_completions: ollama.ChatResponse) -> provider_message.Message:
-        message: ollama.Message = chat_completions.message
-        if message is None:
-            raise ValueError("chat_completions must contain a 'message' field")
-
-        ret_msg: provider_message.Message = None
-
-        if message.content is not None:
-            ret_msg = provider_message.Message(role='assistant', content=message.content)
-        if message.tool_calls is not None and len(message.tool_calls) > 0:
-            tool_calls: list[provider_message.ToolCall] = []
-
-            for tool_call in message.tool_calls:
-                tool_calls.append(
-                    provider_message.ToolCall(
-                        id=uuid.uuid4().hex,
-                        type='function',
-                        function=provider_message.FunctionCall(
-                            name=tool_call.function.name,
-                            arguments=json.dumps(tool_call.function.arguments),
-                        ),
-                    )
-                )
-            ret_msg.tool_calls = tool_calls
-
-        return ret_msg
-
-    async def _prepare_messages(
-        self,
-        messages: typing.List[provider_message.Message],
-    ) -> list[dict]:
-        """Prepare messages for Ollama API request."""
-        req_messages: list = []
-        for m in messages:
-            msg_dict: dict = m.dict(exclude_none=True)
-            content: Any = msg_dict.get('content')
-            if isinstance(content, list):
-                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
-                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
-            req_messages.append(msg_dict)
-        return req_messages
-
-    async def invoke_llm(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message:
-        req_messages = await self._prepare_messages(messages)
-        try:
-            return await self._closure(
-                query=query,
-                req_messages=req_messages,
-                use_model=model,
-                use_funcs=funcs,
-                extra_args=extra_args,
-                remove_think=remove_think,
-            )
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-
-    async def invoke_llm_stream(
-        self,
-        query: pipeline_query.Query,
-        model: requester.RuntimeLLMModel,
-        messages: typing.List[provider_message.Message],
-        funcs: typing.List[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.MessageChunk:
-        req_messages = await self._prepare_messages(messages)
-
-        try:
-            args = extra_args.copy()
-            args['model'] = model.model_entity.name
-
-            # Process messages for Ollama format
-            msgs: list[dict] = req_messages.copy()
-            for msg in msgs:
-                if 'content' in msg and isinstance(msg['content'], list):
-                    text_content: list = []
-                    image_urls: list = []
-                    for me in msg['content']:
-                        if me['type'] == 'text':
-                            text_content.append(me['text'])
-                        elif me['type'] == 'image_base64':
-                            image_urls.append(me['image_base64'])
-                    msg['content'] = '\n'.join(text_content)
-                    msg['images'] = [url.split(',')[1] for url in image_urls]
-                if 'tool_calls' in msg:
-                    for tool_call in msg['tool_calls']:
-                        tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
-            args['messages'] = msgs
-
-            args['tools'] = []
-            if funcs:
-                tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs)
-                if tools:
-                    args['tools'] = tools
-
-            args['stream'] = True
-
-            chunk_idx = 0
-            thinking_started = False
-            thinking_ended = False
-            role = 'assistant'
-
-            async for chunk in await self.client.chat(**args):
-                message: ollama.Message = chunk.message
-                done = chunk.done
-
-                delta_content = message.content or ''
-                reasoning_content = getattr(message, 'thinking', '') or ''
-
-                # Handle reasoning/thinking content
-                if reasoning_content:
-                    if remove_think:
-                        chunk_idx += 1
-                        continue
-
-                    if not thinking_started:
-                        thinking_started = True
-                        delta_content = '<think>\n' + reasoning_content
-                    else:
-                        delta_content = reasoning_content
-                elif thinking_started and not thinking_ended and delta_content:
-                    thinking_ended = True
-                    delta_content = '\n</think>\n' + delta_content
-
-                # Handle tool calls
-                tool_calls_data = None
-                if message.tool_calls:
-                    tool_calls_data = []
-                    for tc in message.tool_calls:
-                        tool_calls_data.append(
-                            {
-                                'id': uuid.uuid4().hex,
-                                'type': 'function',
-                                'function': {
-                                    'name': tc.function.name,
-                                    'arguments': json.dumps(tc.function.arguments),
-                                },
-                            }
-                        )
-
-                # Skip empty first chunk
-                if chunk_idx == 0 and not delta_content and not reasoning_content and not tool_calls_data:
-                    chunk_idx += 1
-                    continue
-
-                chunk_data = {
-                    'role': role,
-                    'content': delta_content if delta_content else None,
-                    'tool_calls': tool_calls_data,
-                    'is_final': bool(done),
-                }
-                chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-                yield provider_message.MessageChunk(**chunk_data)
-                chunk_idx += 1
-
-        except asyncio.TimeoutError:
-            raise errors.RequesterError('请求超时')
-
-    async def invoke_embedding(
-        self,
-        model: requester.RuntimeEmbeddingModel,
-        input_text: list[str],
-        extra_args: dict[str, typing.Any] = {},
-    ) -> list[list[float]]:
-        return (
-            await self.client.embed(
-                model=model.model_entity.name,
-                input=input_text,
-                **extra_args,
-            )
-        ).embeddings
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
index a724f8f8..51bb6332 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ollamachat.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: Ollama
   icon: ollama.svg
 spec:
+  litellm_provider: ollama
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py
deleted file mode 100644
index 17b88431..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import modelscopechatcmpl
-
-
-class OpenRouterChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
-    """OpenRouter ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://openrouter.ai/api/v1',
-        'timeout': 120,
-    }
-
-    async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
-        original_base_url = self.requester_cfg.get('base_url', '')
-        self.requester_cfg['base_url'] = 'https://openrouter.ai/api/v1'
-        try:
-            return await super().scan_models(api_key)
-        finally:
-            self.requester_cfg['base_url'] = original_base_url
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
index 71064dc0..296a8811 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/openrouterchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: OpenRouter
   icon: openrouter.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
deleted file mode 100644
index 1836bd62..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
+++ /dev/null
@@ -1,208 +0,0 @@
-from __future__ import annotations
-
-import openai
-import typing
-
-from . import chatcmpl
-from .. import requester
-import openai.types.chat.chat_completion as chat_completion
-import re
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-
-
-class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """欧派云 ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.ppinfra.com/v3/openai',
-        'timeout': 120,
-    }
-
-    is_think: bool = False
-
-    async def _make_msg(
-        self,
-        chat_completion: chat_completion.ChatCompletion,
-        remove_think: bool,
-    ) -> provider_message.Message:
-        chatcmpl_message = chat_completion.choices[0].message.model_dump()
-        # print(chatcmpl_message.keys(), chatcmpl_message.values())
-
-        # 确保 role 字段存在且不为 None
-        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
-            chatcmpl_message['role'] = 'assistant'
-
-        reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
-
-        # deepseek的reasoner模型
-        chatcmpl_message['content'] = await self._process_thinking_content(
-            chatcmpl_message['content'], reasoning_content, remove_think
-        )
-
-        # 移除 reasoning_content 字段，避免传递给 Message
-        if 'reasoning_content' in chatcmpl_message:
-            del chatcmpl_message['reasoning_content']
-
-        message = provider_message.Message(**chatcmpl_message)
-
-        return message
-
-    async def _process_thinking_content(
-        self,
-        content: str,
-        reasoning_content: str = None,
-        remove_think: bool = False,
-    ) -> tuple[str, str]:
-        """处理思维链内容
-
-        Args:
-            content: 原始内容
-            reasoning_content: reasoning_content 字段内容
-            remove_think: 是否移除思维链
-
-        Returns:
-            处理后的内容
-        """
-        if remove_think:
-            content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
-        else:
-            if reasoning_content is not None:
-                content = '<think>\n' + reasoning_content + '\n</think>\n' + content
-        return content
-
-    async def _make_msg_chunk(
-        self,
-        delta: dict[str, typing.Any],
-        idx: int,
-    ) -> provider_message.MessageChunk:
-        # 处理流式chunk和完整响应的差异
-        # print(chat_completion.choices[0])
-
-        # 确保 role 字段存在且不为 None
-        if 'role' not in delta or delta['role'] is None:
-            delta['role'] = 'assistant'
-
-        reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
-
-        delta['content'] = '' if delta['content'] is None else delta['content']
-        # print(reasoning_content)
-
-        # deepseek的reasoner模型
-
-        if reasoning_content is not None:
-            delta['content'] += reasoning_content
-
-        message = provider_message.MessageChunk(**delta)
-
-        return message
-
-    async def _closure_stream(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-        remove_think: bool = False,
-    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
-        self.client.api_key = use_model.provider.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # 设置此次请求中的messages
-        messages = req_messages.copy()
-
-        # 检查vision
-        for msg in messages:
-            if 'content' in msg and isinstance(msg['content'], list):
-                for me in msg['content']:
-                    if me['type'] == 'image_base64':
-                        me['image_url'] = {'url': me['image_base64']}
-                        me['type'] = 'image_url'
-                        del me['image_base64']
-
-        args['messages'] = messages
-        args['stream'] = True
-
-        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
-        chunk_idx = 0
-        thinking_started = False
-        thinking_ended = False
-        role = 'assistant'  # 默认角色
-        async for chunk in self._req_stream(args, extra_body=extra_args):
-            # 解析 chunk 数据
-            if hasattr(chunk, 'choices') and chunk.choices:
-                choice = chunk.choices[0]
-                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
-                finish_reason = getattr(choice, 'finish_reason', None)
-            else:
-                delta = {}
-                finish_reason = None
-
-            # 从第一个 chunk 获取 role，后续使用这个 role
-            if 'role' in delta and delta['role']:
-                role = delta['role']
-
-            # 获取增量内容
-            delta_content = delta.get('content', '')
-            # reasoning_content = delta.get('reasoning_content', '')
-
-            if remove_think:
-                if delta['content'] is not None:
-                    if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
-                        thinking_started = True
-                        continue
-                    elif delta['content'] == r'</think>' and not thinking_ended:
-                        thinking_ended = True
-                        continue
-                    elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
-                        thinking_started = False
-                        continue
-                    elif thinking_started and not thinking_ended:
-                        continue
-
-            # delta_tool_calls = None
-            if delta.get('tool_calls'):
-                for tool_call in delta['tool_calls']:
-                    if tool_call['id'] and tool_call['function']['name']:
-                        tool_id = tool_call['id']
-                        tool_name = tool_call['function']['name']
-
-                    if tool_call['id'] is None:
-                        tool_call['id'] = tool_id
-                    if tool_call['function']['name'] is None:
-                        tool_call['function']['name'] = tool_name
-                    if tool_call['function']['arguments'] is None:
-                        tool_call['function']['arguments'] = ''
-                    if tool_call['type'] is None:
-                        tool_call['type'] = 'function'
-
-            # 跳过空的第一个 chunk（只有 role 没有内容）
-            if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
-                chunk_idx += 1
-                continue
-
-            # 构建 MessageChunk - 只包含增量内容
-            chunk_data = {
-                'role': role,
-                'content': delta_content if delta_content else None,
-                'tool_calls': delta.get('tool_calls'),
-                'is_final': bool(finish_reason),
-            }
-
-            # 移除 None 值
-            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
-
-            yield provider_message.MessageChunk(**chunk_data)
-            chunk_idx += 1
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
index 9e8eb1b0..46a0dbae 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/ppiochatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 派欧云
   icon: ppio.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
deleted file mode 100644
index a68b6896..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import openai
-import typing
-
-from . import chatcmpl
-
-
-class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """启航 AI ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.qhaigc.com/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
index 46ae1fad..d5e59d6e 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 启航 AI
   icon: qhaigc.png
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
deleted file mode 100644
index 122eaf7d..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from __future__ import annotations
-
-import openai
-import typing
-
-from . import chatcmpl
-import openai.types.chat.chat_completion as chat_completion
-
-
-class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://router.shengsuanyun.com/api/v1',
-        'timeout': 120,
-    }
-
-    async def _req(
-        self,
-        args: dict,
-        extra_body: dict = {},
-    ) -> chat_completion.ChatCompletion:
-        return await self.client.chat.completions.create(
-            **args,
-            extra_body=extra_body,
-            extra_headers={
-                'HTTP-Referer': 'https://langbot.app',
-                'X-Title': 'LangBot',
-            },
-        )
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
index 77cf682c..7e0797f0 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/shengsuanyun.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 胜算云
   icon: shengsuanyun.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py
deleted file mode 100644
index 3636d9d1..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class SiliconFlowChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """SiliconFlow ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.siliconflow.cn/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
index 11a2ffa3..915cd537 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/siliconflowchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 硅基流动
   icon: siliconflow.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py
deleted file mode 100644
index 91740a1f..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class LangBotSpaceChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """LangBot Space ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.langbot.cloud/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
index 29c23a83..5dd2693e 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/spacechatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: Space
   icon: space.webp
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tencent.svg b/src/langbot/pkg/provider/modelmgr/requesters/tencent.svg
new file mode 100644
index 00000000..de32c1bf
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tencent.svg
@@ -0,0 +1,5 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#0052D9"/>
+  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Tencent</text>
+  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Hunyuan</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tencentchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/tencentchatcmpl.yaml
new file mode 100644
index 00000000..98130765
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tencentchatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: tencent-chat-completions
+  label:
+    en_US: Tencent Hunyuan
+    zh_Hans: 腾讯混元
+  icon: tencent.svg
+spec:
+  litellm_provider: openai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://hunyuan.tencentcloudapi.com/v1
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/together.svg b/src/langbot/pkg/provider/modelmgr/requesters/together.svg
new file mode 100644
index 00000000..b6ce0f80
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/together.svg
@@ -0,0 +1,5 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#8B5CF6"/>
+  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Together</text>
+  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">AI</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/togetherchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/togetherchatcmpl.yaml
new file mode 100644
index 00000000..90fe61c7
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/togetherchatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: together-chat-completions
+  label:
+    en_US: Together AI
+    zh_Hans: Together AI
+  icon: together.svg
+spec:
+  litellm_provider: together_ai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://api.together.xyz/v1
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
index f160bdea..89afb1f7 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/tokenpony.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 小马算力
   icon: tokenpony.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py
deleted file mode 100644
index 92311454..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/tokenponychatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class TokenPonyChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """TokenPony ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.tokenpony.cn/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py
deleted file mode 100644
index 7eb68956..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class VolcArkChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """火山方舟大模型平台 ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://ark.cn-beijing.volces.com/api/v3',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
index e5c82657..5e4ab111 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/volcarkchatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 火山方舟
   icon: volcark.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
@@ -24,6 +25,8 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
+  - rerank
   provider_category: maas
 execution:
   python:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
index a47b8d47..f35f949c 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/voyageairerank.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: Voyage AI
   icon: voyageai.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
deleted file mode 100644
index db2022f1..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class XaiChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """xAI ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.x.ai/v1',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
index 2e721d70..379a9da0 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/xaichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: xAI
   icon: xai.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
@@ -24,6 +25,8 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
+  - rerank
   provider_category: manufacturer
 execution:
   python:
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/yi.svg b/src/langbot/pkg/provider/modelmgr/requesters/yi.svg
new file mode 100644
index 00000000..8dc5e827
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/yi.svg
@@ -0,0 +1,5 @@
+<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
+  <rect width="60" height="50" rx="8" fill="#10B981"/>
+  <text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">01.AI</text>
+  <text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Yi</text>
+</svg>
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/yichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/yichatcmpl.yaml
new file mode 100644
index 00000000..2617fc30
--- /dev/null
+++ b/src/langbot/pkg/provider/modelmgr/requesters/yichatcmpl.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: yi-chat-completions
+  label:
+    en_US: 01.AI Yi
+    zh_Hans: 零一万物
+  icon: yi.svg
+spec:
+  litellm_provider: openai
+  config:
+  - name: base_url
+    label:
+      en_US: Base URL
+      zh_Hans: 基础 URL
+    type: string
+    required: true
+    default: https://api.lingyiwanwu.com/v1
+  - name: timeout
+    label:
+      en_US: Timeout
+      zh_Hans: 超时时间
+    type: integer
+    required: true
+    default: 120
+  support_type:
+  - llm
+  - text-embedding
+  - rerank
+  provider_category: manufacturer
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py
deleted file mode 100644
index a1a07068..00000000
--- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-import typing
-import openai
-
-from . import chatcmpl
-
-
-class ZhipuAIChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """智谱AI ChatCompletion API 请求器"""
-
-    client: openai.AsyncClient
-
-    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://open.bigmodel.cn/api/paas/v4',
-        'timeout': 120,
-    }
diff --git a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
index a4ebb2ec..31bec259 100644
--- a/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
+++ b/src/langbot/pkg/provider/modelmgr/requesters/zhipuaichatcmpl.yaml
@@ -7,6 +7,7 @@ metadata:
     zh_Hans: 智谱 AI
   icon: zhipuai.svg
 spec:
+  litellm_provider: openai
   config:
   - name: base_url
     label:
@@ -24,6 +25,8 @@ spec:
     default: 120
   support_type:
   - llm
+  - text-embedding
+  - rerank
   provider_category: manufacturer
 execution:
   python:
diff --git a/web/src/app/home/components/models-dialog/component/provider-form/ProviderForm.tsx b/web/src/app/home/components/models-dialog/component/provider-form/ProviderForm.tsx
index c596037a..91dac280 100644
--- a/web/src/app/home/components/models-dialog/component/provider-form/ProviderForm.tsx
+++ b/web/src/app/home/components/models-dialog/component/provider-form/ProviderForm.tsx
@@ -1,4 +1,4 @@
-import { useEffect, useState } from 'react';
+import { useEffect, useState, useRef, useCallback } from 'react';
 import { httpClient } from '@/app/infra/http/HttpClient';
 
 import { zodResolver } from '@hookform/resolvers/zod';
@@ -16,19 +16,12 @@ import {
   FormMessage,
 } from '@/components/ui/form';
 import { Input } from '@/components/ui/input';
-import {
-  Select,
-  SelectContent,
-  SelectGroup,
-  SelectItem,
-  SelectLabel,
-  SelectTrigger,
-  SelectValue,
-} from '@/components/ui/select';
 import { DialogFooter } from '@/components/ui/dialog';
 import { toast } from 'sonner';
 import { extractI18nObject } from '@/i18n/I18nProvider';
 import { CustomApiError } from '@/app/infra/entities/common';
+import { cn } from '@/lib/utils';
+import { Check, ChevronDown, Search } from 'lucide-react';
 
 const getFormSchema = (t: (key: string) => string) =>
   z.object({
@@ -71,6 +64,10 @@ export default function ProviderForm({
       description: string;
     }[]
   >([]);
+  const [searchQuery, setSearchQuery] = useState('');
+  const [isOpen, setIsOpen] = useState(false);
+  const dropdownRef = useRef<HTMLDivElement>(null);
+  const searchInputRef = useRef<HTMLInputElement>(null);
 
   useEffect(() => {
     async function init() {
@@ -82,6 +79,54 @@ export default function ProviderForm({
     init();
   }, [providerId]);
 
+  // Close dropdown when clicking outside
+  useEffect(() => {
+    function handleClickOutside(event: MouseEvent) {
+      if (
+        dropdownRef.current &&
+        !dropdownRef.current.contains(event.target as Node)
+      ) {
+        setIsOpen(false);
+        setSearchQuery('');
+      }
+    }
+    document.addEventListener('mousedown', handleClickOutside);
+    return () => document.removeEventListener('mousedown', handleClickOutside);
+  }, []);
+
+  // Focus search input when dropdown opens
+  useEffect(() => {
+    if (isOpen && searchInputRef.current) {
+      searchInputRef.current.focus();
+    }
+  }, [isOpen]);
+
+  // Filter requesters based on search query
+  const filteredRequesters = requesterList.filter(
+    (r) =>
+      r.label.toLowerCase().includes(searchQuery.toLowerCase()) ||
+      r.value.toLowerCase().includes(searchQuery.toLowerCase()),
+  );
+
+  // Group filtered requesters by category
+  const groupedRequesters = {
+    builtin: filteredRequesters.filter((r) => r.category === 'builtin'),
+    manufacturer: filteredRequesters.filter(
+      (r) => r.category === 'manufacturer',
+    ),
+    maas: filteredRequesters.filter((r) => r.category === 'maas'),
+    'self-hosted': filteredRequesters.filter(
+      (r) => r.category === 'self-hosted',
+    ),
+  };
+
+  const categoryLabels: Record<string, string> = {
+    builtin: t('models.builtin'),
+    manufacturer: t('models.modelManufacturer'),
+    maas: t('models.aggregationPlatform'),
+    'self-hosted': t('models.selfDeployed'),
+  };
+
   async function loadRequesters() {
     const resp = await httpClient.getProviderRequesters();
     setRequesterList(
@@ -168,17 +213,16 @@ export default function ProviderForm({
                   {t('models.requester')}
                   <span className="text-red-500">*</span>
                 </FormLabel>
-                <Select
-                  onValueChange={(v) => {
-                    field.onChange(v);
-                    const req = requesterList.find((r) => r.value === v);
-                    if (req && (!providerId || !form.getValues('base_url'))) {
-                      form.setValue('base_url', req.defaultUrl);
-                    }
-                  }}
-                  value={field.value}
-                >
-                  <SelectTrigger className="bg-background">
+                <div ref={dropdownRef} className="relative">
+                  {/* Trigger button */}
+                  <button
+                    type="button"
+                    onClick={() => setIsOpen(!isOpen)}
+                    className={cn(
+                      'flex h-10 w-full items-center justify-between rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50',
+                      isOpen && 'ring-2 ring-ring ring-offset-2',
+                    )}
+                  >
                     {selectedRequester ? (
                       <div className="flex items-center gap-2">
                         <img
@@ -191,90 +235,102 @@ export default function ProviderForm({
                         <span>{selectedRequester.label}</span>
                       </div>
                     ) : (
-                      <SelectValue placeholder={t('models.selectRequester')} />
+                      <span className="text-muted-foreground">
+                        {t('models.selectRequester')}
+                      </span>
                     )}
-                  </SelectTrigger>
-                  <SelectContent>
-                    <SelectGroup>
-                      <SelectLabel>{t('models.builtin')}</SelectLabel>
-                      {requesterList
-                        .filter((r) => r.category === 'builtin')
-                        .map((r) => (
-                          <SelectItem key={r.value} value={r.value}>
-                            <div className="flex items-center gap-2">
-                              <img
-                                src={httpClient.getProviderRequesterIconURL(
-                                  r.value,
-                                )}
-                                alt={r.label}
-                                className="h-5 w-5 rounded"
-                              />
-                              <span>{r.label}</span>
-                            </div>
-                          </SelectItem>
-                        ))}
-                    </SelectGroup>
-                    <SelectGroup>
-                      <SelectLabel>{t('models.modelManufacturer')}</SelectLabel>
-                      {requesterList
-                        .filter((r) => r.category === 'manufacturer')
-                        .map((r) => (
-                          <SelectItem key={r.value} value={r.value}>
-                            <div className="flex items-center gap-2">
-                              <img
-                                src={httpClient.getProviderRequesterIconURL(
-                                  r.value,
-                                )}
-                                alt={r.label}
-                                className="h-5 w-5 rounded"
-                              />
-                              <span>{r.label}</span>
-                            </div>
-                          </SelectItem>
-                        ))}
-                    </SelectGroup>
-                    <SelectGroup>
-                      <SelectLabel>
-                        {t('models.aggregationPlatform')}
-                      </SelectLabel>
-                      {requesterList
-                        .filter((r) => r.category === 'maas')
-                        .map((r) => (
-                          <SelectItem key={r.value} value={r.value}>
-                            <div className="flex items-center gap-2">
-                              <img
-                                src={httpClient.getProviderRequesterIconURL(
-                                  r.value,
-                                )}
-                                alt={r.label}
-                                className="h-5 w-5 rounded"
-                              />
-                              <span>{r.label}</span>
-                            </div>
-                          </SelectItem>
-                        ))}
-                    </SelectGroup>
-                    <SelectGroup>
-                      <SelectLabel>{t('models.selfDeployed')}</SelectLabel>
-                      {requesterList
-                        .filter((r) => r.category === 'self-hosted')
-                        .map((r) => (
-                          <SelectItem key={r.value} value={r.value}>
-                            <div className="flex items-center gap-2">
-                              <img
-                                src={httpClient.getProviderRequesterIconURL(
-                                  r.value,
-                                )}
-                                alt={r.label}
-                                className="h-5 w-5 rounded"
-                              />
-                              <span>{r.label}</span>
-                            </div>
-                          </SelectItem>
-                        ))}
-                    </SelectGroup>
-                  </SelectContent>
-                </Select>
+                    <ChevronDown
+                      className={cn(
+                        'h-4 w-4 opacity-50 transition-transform',
+                        isOpen && 'rotate-180',
+                      )}
+                    />
+                  </button>
+
+                  {/* Dropdown */}
+                  {isOpen && (
+                    <div className="absolute z-50 mt-1 w-full rounded-md border bg-popover text-popover-foreground shadow-md animate-in fade-in-0 zoom-in-95">
+                      {/* Search input */}
+                      <div className="flex items-center border-b px-3">
+                        <Search className="mr-2 h-4 w-4 shrink-0 opacity-50" />
+                        <input
+                          ref={searchInputRef}
+                          type="text"
+                          placeholder={
+                            t('models.searchProviders') || 'Search providers...'
+                          }
+                          value={searchQuery}
+                          onChange={(e) => setSearchQuery(e.target.value)}
+                          className="flex h-10 w-full rounded-md bg-transparent py-3 text-sm outline-none placeholder:text-muted-foreground"
+                        />
+                      </div>
+
+                      {/* Options list */}
+                      <div className="max-h-[300px] overflow-y-auto p-1">
+                        {Object.entries(groupedRequesters).map(
+                          ([category, items]) => {
+                            if (items.length === 0) return null;
+                            return (
+                              <div key={category}>
+                                <div className="py-1.5 px-2 text-xs font-semibold text-muted-foreground">
+                                  {categoryLabels[category]}
+                                </div>
+                                {items.map((r) => (
+                                  <button
+                                    key={r.value}
+                                    type="button"
+                                    onClick={() => {
+                                      field.onChange(r.value);
+                                      const req = requesterList.find(
+                                        (req) => req.value === r.value,
+                                      );
+                                      if (
+                                        req &&
+                                        (!providerId ||
+                                          !form.getValues('base_url'))
+                                      ) {
+                                        form.setValue(
+                                          'base_url',
+                                          req.defaultUrl,
+                                        );
+                                      }
+                                      setIsOpen(false);
+                                      setSearchQuery('');
+                                    }}
+                                    className={cn(
+                                      'flex w-full items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-none hover:bg-accent hover:text-accent-foreground cursor-pointer',
+                                      field.value === r.value &&
+                                        'bg-accent text-accent-foreground',
+                                    )}
+                                  >
+                                    <img
+                                      src={httpClient.getProviderRequesterIconURL(
+                                        r.value,
+                                      )}
+                                      alt={r.label}
+                                      className="h-5 w-5 rounded"
+                                    />
+                                    <span className="flex-1 text-left">
+                                      {r.label}
+                                    </span>
+                                    {field.value === r.value && (
+                                      <Check className="h-4 w-4" />
+                                    )}
+                                  </button>
+                                ))}
+                              </div>
+                            );
+                          },
+                        )}
+                        {filteredRequesters.length === 0 && (
+                          <div className="py-6 text-center text-sm text-muted-foreground">
+                            No results found.
+                          </div>
+                        )}
+                      </div>
+                    </div>
+                  )}
+                </div>
                 <FormMessage />
                 {selectedRequester?.description && (
                   <p className="text-sm text-muted-foreground">
diff --git a/web/src/i18n/locales/en-US.ts b/web/src/i18n/locales/en-US.ts
index 2313b723..2e723f85 100644
--- a/web/src/i18n/locales/en-US.ts
+++ b/web/src/i18n/locales/en-US.ts
@@ -258,6 +258,7 @@ const enUS = {
     selectProvider: 'Select Provider',
     requester: 'Provider Type',
     selectRequester: 'Select Provider Type',
+    searchProviders: 'Search providers...',
     langbotModelsDescription: 'Cloud models powered by LangBot Space',
     credits: 'Credits',
     loginWithSpace: 'Login with Space',
diff --git a/web/src/i18n/locales/zh-Hans.ts b/web/src/i18n/locales/zh-Hans.ts
index 5c885d5d..6271fb58 100644
--- a/web/src/i18n/locales/zh-Hans.ts
+++ b/web/src/i18n/locales/zh-Hans.ts
@@ -248,6 +248,7 @@ const zhHans = {
     selectProvider: '选择供应商',
     requester: '供应商类型',
     selectRequester: '选择供应商类型',
+    searchProviders: '搜索供应商...',
     langbotModelsDescription: 'LangBot Space 提供的云端模型',
     credits: '积分',
     loginWithSpace: '通过 Space 登录',