Merge branch 'rc/new-plugin' into refactor/new-plugin-system

2026-07-24 13:26:08 +00:00 · 2025-08-24 21:40:02 +08:00
parent f2d5c21712 83ff64698b
commit 64764c412b
232 changed files with 11998 additions and 1440 deletions
@@ -17,7 +17,7 @@ class LLMModelInfo(pydantic.BaseModel):

    token_mgr: token.TokenManager

-    requester: requester.LLMAPIRequester
+    requester: requester.ProviderAPIRequester

    tool_call_supported: typing.Optional[bool] = False

@@ -20,13 +20,16 @@ class ModelManager:

    llm_models: list[requester.RuntimeLLMModel]

+    embedding_models: list[requester.RuntimeEmbeddingModel]
+
    requester_components: list[engine.Component]

-    requester_dict: dict[str, type[requester.LLMAPIRequester]]  # cache
+    requester_dict: dict[str, type[requester.ProviderAPIRequester]]  # cache

    def __init__(self, ap: app.Application):
        self.ap = ap
        self.llm_models = []
+        self.embedding_models = []
        self.requester_components = []
        self.requester_dict = {}

@@ -34,7 +37,7 @@ class ModelManager:
        self.requester_components = self.ap.discover.get_components_by_kind('LLMAPIRequester')

        # forge requester class dict
-        requester_dict: dict[str, type[requester.LLMAPIRequester]] = {}
+        requester_dict: dict[str, type[requester.ProviderAPIRequester]] = {}
        for component in self.requester_components:
            requester_dict[component.metadata.name] = component.get_python_component_class()

@@ -47,13 +50,11 @@ class ModelManager:
        self.ap.logger.info('Loading models from db...')

        self.llm_models = []
+        self.embedding_models = []

        # llm models
        result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.LLMModel))
-
        llm_models = result.all()
-
-        # load models
        for llm_model in llm_models:
            try:
                await self.load_llm_model(llm_model)
@@ -62,11 +63,17 @@ class ModelManager:
            except Exception as e:
                self.ap.logger.error(f'Failed to load model {llm_model.uuid}: {e}\n{traceback.format_exc()}')

+        # embedding models
+        result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.EmbeddingModel))
+        embedding_models = result.all()
+        for embedding_model in embedding_models:
+            await self.load_embedding_model(embedding_model)
+
    async def init_runtime_llm_model(
        self,
        model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict,
    ):
-        """初始化运行时模型"""
+        """初始化运行时 LLM 模型"""
        if isinstance(model_info, sqlalchemy.Row):
            model_info = persistence_model.LLMModel(**model_info._mapping)
        elif isinstance(model_info, dict):
@@ -90,31 +97,85 @@ class ModelManager:

        return runtime_llm_model

+    async def init_runtime_embedding_model(
+        self,
+        model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict,
+    ):
+        """初始化运行时 Embedding 模型"""
+        if isinstance(model_info, sqlalchemy.Row):
+            model_info = persistence_model.EmbeddingModel(**model_info._mapping)
+        elif isinstance(model_info, dict):
+            model_info = persistence_model.EmbeddingModel(**model_info)
+
+        requester_inst = self.requester_dict[model_info.requester](ap=self.ap, config=model_info.requester_config)
+
+        await requester_inst.initialize()
+
+        runtime_embedding_model = requester.RuntimeEmbeddingModel(
+            model_entity=model_info,
+            token_mgr=token.TokenManager(
+                name=model_info.uuid,
+                tokens=model_info.api_keys,
+            ),
+            requester=requester_inst,
+        )
+
+        return runtime_embedding_model
+
    async def load_llm_model(
        self,
        model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict,
    ):
-        """加载模型"""
+        """加载 LLM 模型"""
        runtime_llm_model = await self.init_runtime_llm_model(model_info)
        self.llm_models.append(runtime_llm_model)

+    async def load_embedding_model(
+        self,
+        model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict,
+    ):
+        """加载 Embedding 模型"""
+        runtime_embedding_model = await self.init_runtime_embedding_model(model_info)
+        self.embedding_models.append(runtime_embedding_model)
+
    async def get_model_by_uuid(self, uuid: str) -> requester.RuntimeLLMModel:
-        """通过uuid获取模型"""
+        """通过uuid获取 LLM 模型"""
        for model in self.llm_models:
            if model.model_entity.uuid == uuid:
                return model
-        raise ValueError(f'model {uuid} not found')
+        raise ValueError(f'LLM model {uuid} not found')
+
+    async def get_embedding_model_by_uuid(self, uuid: str) -> requester.RuntimeEmbeddingModel:
+        """通过uuid获取 Embedding 模型"""
+        for model in self.embedding_models:
+            if model.model_entity.uuid == uuid:
+                return model
+        raise ValueError(f'Embedding model {uuid} not found')

    async def remove_llm_model(self, model_uuid: str):
-        """移除模型"""
+        """移除 LLM 模型"""
        for model in self.llm_models:
            if model.model_entity.uuid == model_uuid:
                self.llm_models.remove(model)
                return

-    def get_available_requesters_info(self) -> list[dict]:
+    async def remove_embedding_model(self, model_uuid: str):
+        """移除 Embedding 模型"""
+        for model in self.embedding_models:
+            if model.model_entity.uuid == model_uuid:
+                self.embedding_models.remove(model)
+                return
+
+    def get_available_requesters_info(self, model_type: str) -> list[dict]:
        """获取所有可用的请求器"""
-        return [component.to_plain_dict() for component in self.requester_components]
+        if model_type != '':
+            return [
+                component.to_plain_dict()
+                for component in self.requester_components
+                if model_type in component.spec['support_type']
+            ]
+        else:
+            return [component.to_plain_dict() for component in self.requester_components]

    def get_available_requester_info_by_name(self, name: str) -> dict | None:
        """通过名称获取请求器信息"""
@@ -20,22 +20,45 @@ class RuntimeLLMModel:
    token_mgr: token.TokenManager
    """api key管理器"""

-    requester: LLMAPIRequester
+    requester: ProviderAPIRequester
    """请求器实例"""

    def __init__(
        self,
        model_entity: persistence_model.LLMModel,
        token_mgr: token.TokenManager,
-        requester: LLMAPIRequester,
+        requester: ProviderAPIRequester,
    ):
        self.model_entity = model_entity
        self.token_mgr = token_mgr
        self.requester = requester


-class LLMAPIRequester(metaclass=abc.ABCMeta):
-    """LLM API请求器"""
+class RuntimeEmbeddingModel:
+    """运行时 Embedding 模型"""
+
+    model_entity: persistence_model.EmbeddingModel
+    """模型数据"""
+
+    token_mgr: token.TokenManager
+    """api key管理器"""
+
+    requester: ProviderAPIRequester
+    """请求器实例"""
+
+    def __init__(
+        self,
+        model_entity: persistence_model.EmbeddingModel,
+        token_mgr: token.TokenManager,
+        requester: ProviderAPIRequester,
+    ):
+        self.model_entity = model_entity
+        self.token_mgr = token_mgr
+        self.requester = requester
+
+
+class ProviderAPIRequester(metaclass=abc.ABCMeta):
+    """Provider API请求器"""

    name: str = None

@@ -61,6 +84,7 @@ class LLMAPIRequester(metaclass=abc.ABCMeta):
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        """调用API

@@ -69,8 +93,50 @@ class LLMAPIRequester(metaclass=abc.ABCMeta):
            messages (typing.List[llm_entities.Message]): 消息对象列表
            funcs (typing.List[tools_entities.LLMFunction], optional): 使用的工具函数列表. Defaults to None.
            extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
+            remove_think (bool, optional): 是否移思考中的消息. Defaults to False.

        Returns:
            llm_entities.Message: 返回消息对象
        """
        pass
+
+    async def invoke_llm_stream(
+        self,
+        query: pipeline_query.Query,
+        model: RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        """调用API
+
+        Args:
+            model (RuntimeLLMModel): 使用的模型信息
+            messages (typing.List[provider_message.Message]): 消息对象列表
+            funcs (typing.List[resource_tool.LLMTool], optional): 使用的工具函数列表. Defaults to None.
+            extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
+            remove_think (bool, optional): 是否移除思考中的消息. Defaults to False.
+
+        Returns:
+            typing.AsyncGenerator[provider_message.MessageChunk]: 返回消息对象
+        """
+        pass
+
+    async def invoke_embedding(
+        self,
+        model: RuntimeEmbeddingModel,
+        input_text: typing.List[str],
+        extra_args: dict[str, typing.Any] = {},
+    ) -> typing.List[typing.List[float]]:
+        """调用 Embedding API
+
+        Args:
+            model (RuntimeEmbeddingModel): 使用的模型信息
+            input_text (typing.List[str]): 输入文本
+            extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
+
+        Returns:
+            typing.List[typing.List[float]]: 返回的 embedding 向量
+        """
+        pass
@@ -7,7 +7,7 @@ from . import chatcmpl


 class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
-    """302 AI ChatCompletion API 请求器"""
+    """302.AI ChatCompletion API 请求器"""

    client: openai.AsyncClient

@@ -3,8 +3,8 @@ kind: LLMAPIRequester
 metadata:
  name: 302-ai-chat-completions
  label:
-    en_US: 302 AI
-    zh_Hans: 302 AI
+    en_US: 302.AI
+    zh_Hans: 302.AI
  icon: 302ai.png
 spec:
  config:
@@ -22,6 +22,9 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
+    - text-embedding
 execution:
  python:
    path: ./302aichatcmpl.py
@@ -15,13 +15,13 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message


-class AnthropicMessages(requester.LLMAPIRequester):
+class AnthropicMessages(requester.ProviderAPIRequester):
    """Anthropic Messages API 请求器"""

    client: anthropic.AsyncAnthropic

    default_config: dict[str, typing.Any] = {
-        'base_url': 'https://api.anthropic.com/v1',
+        'base_url': 'https://api.anthropic.com',
        'timeout': 120,
    }

@@ -44,6 +44,7 @@ class AnthropicMessages(requester.LLMAPIRequester):
        self.client = anthropic.AsyncAnthropic(
            api_key='',
            http_client=httpx_client,
+            base_url=self.requester_cfg['base_url'],
        )

    async def invoke_llm(
@@ -53,6 +54,7 @@ class AnthropicMessages(requester.LLMAPIRequester):
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        self.client.api_key = model.token_mgr.get_token()

@@ -89,7 +91,8 @@ class AnthropicMessages(requester.LLMAPIRequester):
                            {
                                'type': 'tool_result',
                                'tool_use_id': tool_call_id,
-                                'content': m.content,
+                                'is_error': False,
+                                'content': [{'type': 'text', 'text': m.content}],
                            }
                        ],
                    }
@@ -133,6 +136,9 @@ class AnthropicMessages(requester.LLMAPIRequester):

        args['messages'] = req_messages

+        if 'thinking' in args:
+            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
+
        if funcs:
            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)

@@ -140,19 +146,17 @@ class AnthropicMessages(requester.LLMAPIRequester):
                args['tools'] = tools

        try:
-            # print(json.dumps(args, indent=4, ensure_ascii=False))
            resp = await self.client.messages.create(**args)

            args = {
                'content': '',
                'role': resp.role,
            }
-
            assert type(resp) is anthropic.types.message.Message

            for block in resp.content:
-                if block.type == 'thinking':
-                    args['content'] = '<think>' + block.thinking + '</think>\n' + args['content']
+                if not remove_think and block.type == 'thinking':
+                    args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
                elif block.type == 'text':
                    args['content'] += block.text
                elif block.type == 'tool_use':
@@ -176,3 +180,191 @@ class AnthropicMessages(requester.LLMAPIRequester):
                raise errors.RequesterError(f'模型无效: {e.message}')
            else:
                raise errors.RequesterError(f'请求地址无效: {e.message}')
+
+    async def invoke_llm_stream(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message:
+        self.client.api_key = model.token_mgr.get_token()
+
+        args = extra_args.copy()
+        args['model'] = model.model_entity.name
+        args['stream'] = True
+
+        # 处理消息
+
+        # system
+        system_role_message = None
+
+        for i, m in enumerate(messages):
+            if m.role == 'system':
+                system_role_message = m
+
+                break
+
+        if system_role_message:
+            messages.pop(i)
+
+        if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
+            args['system'] = system_role_message.content
+
+        req_messages = []
+
+        for m in messages:
+            if m.role == 'tool':
+                tool_call_id = m.tool_call_id
+
+                req_messages.append(
+                    {
+                        'role': 'user',
+                        'content': [
+                            {
+                                'type': 'tool_result',
+                                'tool_use_id': tool_call_id,
+                                'is_error': False,  # 暂时直接写false
+                                'content': [
+                                    {'type': 'text', 'text': m.content}
+                                ],  # 这里要是list包裹，应该是多个返回的情况？type类型好像也可以填其他的，暂时只写text
+                            }
+                        ],
+                    }
+                )
+
+                continue
+
+            msg_dict = m.dict(exclude_none=True)
+
+            if isinstance(m.content, str) and m.content.strip() != '':
+                msg_dict['content'] = [{'type': 'text', 'text': m.content}]
+            elif isinstance(m.content, list):
+                for i, ce in enumerate(m.content):
+                    if ce.type == 'image_base64':
+                        image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
+
+                        alter_image_ele = {
+                            'type': 'image',
+                            'source': {
+                                'type': 'base64',
+                                'media_type': f'image/{image_format}',
+                                'data': image_b64,
+                            },
+                        }
+                        msg_dict['content'][i] = alter_image_ele
+            if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
+                msg_dict['content'] = []  # 这里不知道为什么会莫名有个空导致content为字符
+            if m.tool_calls:
+                for tool_call in m.tool_calls:
+                    msg_dict['content'].append(
+                        {
+                            'type': 'tool_use',
+                            'id': tool_call.id,
+                            'name': tool_call.function.name,
+                            'input': json.loads(tool_call.function.arguments),
+                        }
+                    )
+
+                del msg_dict['tool_calls']
+
+            req_messages.append(msg_dict)
+        if 'thinking' in args:
+            args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
+
+        args['messages'] = req_messages
+
+        if funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        try:
+            role = 'assistant'  # 默认角色
+            # chunk_idx = 0
+            think_started = False
+            think_ended = False
+            finish_reason = False
+            content = ''
+            tool_name = ''
+            tool_id = ''
+            async for chunk in await self.client.messages.create(**args):
+                tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
+                if isinstance(
+                    chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
+                ):  # 记录开始
+                    if chunk.content_block.type == 'tool_use':
+                        if chunk.content_block.name is not None:
+                            tool_name = chunk.content_block.name
+                        if chunk.content_block.id is not None:
+                            tool_id = chunk.content_block.id
+
+                        tool_call['function']['name'] = tool_name
+                        tool_call['function']['arguments'] = ''
+                        tool_call['id'] = tool_id
+
+                    if not remove_think:
+                        if chunk.content_block.type == 'thinking' and not remove_think:
+                            think_started = True
+                        elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
+                            think_ended = True
+                        continue
+                elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
+                    if chunk.delta.type == 'thinking_delta':
+                        if think_started:
+                            think_started = False
+                            content = '<think>\n' + chunk.delta.thinking
+                        elif remove_think:
+                            continue
+                        else:
+                            content = chunk.delta.thinking
+                    elif chunk.delta.type == 'text_delta':
+                        if think_ended:
+                            think_ended = False
+                            content = '\n</think>\n' + chunk.delta.text
+                        else:
+                            content = chunk.delta.text
+                    elif chunk.delta.type == 'input_json_delta':
+                        tool_call['function']['arguments'] = chunk.delta.partial_json
+                        tool_call['function']['name'] = tool_name
+                        tool_call['id'] = tool_id
+                elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
+                    continue  # 记录raw_content_block结束的
+
+                elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
+                    if chunk.delta.stop_reason == 'end_turn':
+                        finish_reason = True
+                elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
+                    continue  # 这个好像是完全结束
+                else:
+                    # print(chunk)
+                    self.ap.logger.debug(f'anthropic chunk: {chunk}')
+                    continue
+
+                args = {
+                    'content': content,
+                    'role': role,
+                    'is_final': finish_reason,
+                    'tool_calls': None if tool_call['id'] is None else [tool_call],
+                }
+                # if chunk_idx == 0:
+                #     chunk_idx += 1
+                #     continue
+
+                # assert type(chunk) is anthropic.types.message.Chunk
+
+                yield provider_message.MessageChunk(**args)
+
+            # return llm_entities.Message(**args)
+        except anthropic.AuthenticationError as e:
+            raise errors.RequesterError(f'api-key 无效: {e.message}')
+        except anthropic.BadRequestError as e:
+            raise errors.RequesterError(str(e.message))
+        except anthropic.NotFoundError as e:
+            if 'model: ' in str(e):
+                raise errors.RequesterError(f'模型无效: {e.message}')
+            else:
+                raise errors.RequesterError(f'请求地址无效: {e.message}')
@@ -14,7 +14,7 @@ spec:
        zh_Hans: 基础 URL
      type: string
      required: true
-      default: "https://api.anthropic.com/v1"
+      default: "https://api.anthropic.com"
    - name: timeout
      label:
        en_US: Timeout
@@ -22,6 +22,8 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
 execution:
  python:
    path: ./anthropicmsgs.py
@@ -22,6 +22,8 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
 execution:
  python:
    path: ./bailianchatcmpl.py
@@ -13,7 +13,7 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message


-class OpenAIChatCompletions(requester.LLMAPIRequester):
+class OpenAIChatCompletions(requester.ProviderAPIRequester):
    """OpenAI ChatCompletion API 请求器"""

    client: openai.AsyncClient
@@ -38,9 +38,18 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
    ) -> chat_completion.ChatCompletion:
        return await self.client.chat.completions.create(**args, extra_body=extra_body)

+    async def _req_stream(
+        self,
+        args: dict,
+        extra_body: dict = {},
+    ):
+        async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
+            yield chunk
+
    async def _make_msg(
        self,
        chat_completion: chat_completion.ChatCompletion,
+        remove_think: bool = False,
    ) -> provider_message.Message:
        chatcmpl_message = chat_completion.choices[0].message.model_dump()

@@ -48,16 +57,191 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
            chatcmpl_message['role'] = 'assistant'

-        reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
+        # 处理思维链
+        content = chatcmpl_message.get('content', '')
+        reasoning_content = chatcmpl_message.get('reasoning_content', None)

-        # deepseek的reasoner模型
-        if reasoning_content is not None:
-            chatcmpl_message['content'] = '<think>\n' + reasoning_content + '\n</think>\n' + chatcmpl_message['content']
+        processed_content, _ = await self._process_thinking_content(
+            content=content, reasoning_content=reasoning_content, remove_think=remove_think
+        )
+
+        chatcmpl_message['content'] = processed_content
+
+        # 移除 reasoning_content 字段，避免传递给 Message
+        if 'reasoning_content' in chatcmpl_message:
+            del chatcmpl_message['reasoning_content']

        message = provider_message.Message(**chatcmpl_message)

        return message

+    async def _process_thinking_content(
+        self,
+        content: str,
+        reasoning_content: str = None,
+        remove_think: bool = False,
+    ) -> tuple[str, str]:
+        """处理思维链内容
+
+        Args:
+            content: 原始内容
+            reasoning_content: reasoning_content 字段内容
+            remove_think: 是否移除思维链
+
+        Returns:
+            (处理后的内容, 提取的思维链内容)
+        """
+        thinking_content = ''
+
+        # 1. 从 reasoning_content 提取思维链
+        if reasoning_content:
+            thinking_content = reasoning_content
+
+        # 2. 从 content 中提取 <think> 标签内容
+        if content and '<think>' in content and '</think>' in content:
+            import re
+
+            think_pattern = r'<think>(.*?)</think>'
+            think_matches = re.findall(think_pattern, content, re.DOTALL)
+            if think_matches:
+                # 如果已有 reasoning_content，则追加
+                if thinking_content:
+                    thinking_content += '\n' + '\n'.join(think_matches)
+                else:
+                    thinking_content = '\n'.join(think_matches)
+                # 移除 content 中的 <think> 标签
+                content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
+
+        # 3. 根据 remove_think 参数决定是否保留思维链
+        if remove_think:
+            return content, ''
+        else:
+            # 如果有思维链内容，将其以 <think> 格式添加到 content 开头
+            if thinking_content:
+                content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
+            return content, thinking_content
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        self.client.api_key = use_model.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # 流式处理状态
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+        tool_id = ''
+        tool_name = ''
+        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
+
+        async for chunk in self._req_stream(args, extra_body=extra_args):
+            # 解析 chunk 数据
+
+            if hasattr(chunk, 'choices') and chunk.choices:
+                choice = chunk.choices[0]
+                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+
+                finish_reason = getattr(choice, 'finish_reason', None)
+            else:
+                delta = {}
+                finish_reason = None
+            # 从第一个 chunk 获取 role，后续使用这个 role
+            if 'role' in delta and delta['role']:
+                role = delta['role']
+
+            # 获取增量内容
+            delta_content = delta.get('content', '')
+            reasoning_content = delta.get('reasoning_content', '')
+
+            # 处理 reasoning_content
+            if reasoning_content:
+                # accumulated_reasoning += reasoning_content
+                # 如果设置了 remove_think，跳过 reasoning_content
+                if remove_think:
+                    chunk_idx += 1
+                    continue
+
+                # 第一次出现 reasoning_content，添加 <think> 开始标签
+                if not thinking_started:
+                    thinking_started = True
+                    delta_content = '<think>\n' + reasoning_content
+                else:
+                    # 继续输出 reasoning_content
+                    delta_content = reasoning_content
+            elif thinking_started and not thinking_ended and delta_content:
+                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
+                thinking_ended = True
+                delta_content = '\n</think>\n' + delta_content
+
+            # 处理 content 中已有的 <think> 标签（如果需要移除）
+            # if delta_content and remove_think and '<think>' in delta_content:
+            #     import re
+            #
+            #     # 移除 <think> 标签及其内容
+            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
+
+            # 处理工具调用增量
+            # delta_tool_calls = None
+            if delta.get('tool_calls'):
+                for tool_call in delta['tool_calls']:
+                    if tool_call['id'] and tool_call['function']['name']:
+                        tool_id = tool_call['id']
+                        tool_name = tool_call['function']['name']
+                    else:
+                        tool_call['id'] = tool_id
+                        tool_call['function']['name'] = tool_name
+                    if tool_call['type'] is None:
+                        tool_call['type'] = 'function'
+
+            # 跳过空的第一个 chunk（只有 role 没有内容）
+            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+                chunk_idx += 1
+                continue
+            # 构建 MessageChunk - 只包含增量内容
+            chunk_data = {
+                'role': role,
+                'content': delta_content if delta_content else None,
+                'tool_calls': delta.get('tool_calls'),
+                'is_final': bool(finish_reason),
+            }
+
+            # 移除 None 值
+            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+            yield provider_message.MessageChunk(**chunk_data)
+            chunk_idx += 1
+
    async def _closure(
        self,
        query: pipeline_query.Query,
@@ -65,6 +249,7 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        self.client.api_key = use_model.token_mgr.get_token()

@@ -92,10 +277,10 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
        args['messages'] = messages

        # 发送请求
-        resp = await self._req(args, extra_body=extra_args)

+        resp = await self._req(args, extra_body=extra_args)
        # 处理请求结果
-        message = await self._make_msg(resp)
+        message = await self._make_msg(resp, remove_think)

        return message

@@ -106,6 +291,7 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
        for m in messages:
@@ -119,13 +305,90 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
            req_messages.append(msg_dict)

        try:
-            return await self._closure(
+            msg = await self._closure(
                query=query,
                req_messages=req_messages,
                use_model=model,
                use_funcs=funcs,
                extra_args=extra_args,
+                remove_think=remove_think,
            )
+            return msg
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+        except openai.BadRequestError as e:
+            if 'context_length_exceeded' in e.message:
+                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
+            else:
+                raise errors.RequesterError(f'请求参数错误: {e.message}')
+        except openai.AuthenticationError as e:
+            raise errors.RequesterError(f'无效的 api-key: {e.message}')
+        except openai.NotFoundError as e:
+            raise errors.RequesterError(f'请求路径错误: {e.message}')
+        except openai.RateLimitError as e:
+            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
+        except openai.APIError as e:
+            raise errors.RequesterError(f'请求错误: {e.message}')
+
+    async def invoke_embedding(
+        self,
+        model: requester.RuntimeEmbeddingModel,
+        input_text: list[str],
+        extra_args: dict[str, typing.Any] = {},
+    ) -> list[list[float]]:
+        """调用 Embedding API"""
+        self.client.api_key = model.token_mgr.get_token()
+
+        args = {
+            'model': model.model_entity.name,
+            'input': input_text,
+        }
+
+        if model.model_entity.extra_args:
+            args.update(model.model_entity.extra_args)
+
+        args.update(extra_args)
+
+        try:
+            resp = await self.client.embeddings.create(**args)
+
+            return [d.embedding for d in resp.data]
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+        except openai.BadRequestError as e:
+            raise errors.RequesterError(f'请求参数错误: {e.message}')
+
+    async def invoke_llm_stream(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
+        for m in messages:
+            msg_dict = m.dict(exclude_none=True)
+            content = msg_dict.get('content')
+            if isinstance(content, list):
+                # 检查 content 列表中是否每个部分都是文本
+                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+                    # 将所有文本部分合并为一个字符串
+                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
+            req_messages.append(msg_dict)
+
+        try:
+            async for item in self._closure_stream(
+                query=query,
+                req_messages=req_messages,
+                use_model=model,
+                use_funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
+            ):
+                yield item
+
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
        except openai.BadRequestError as e:
@@ -22,6 +22,9 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
+    - text-embedding
 execution:
  python:
    path: ./chatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """CompShare ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.modelverse.cn/v1',
+        'timeout': 120,
+    }
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: compshare-chat-completions
+  label:
+    en_US: CompShare
+    zh_Hans: 优云智算
+  icon: compshare.png
+spec:
+  config:
+    - name: base_url
+      label:
+        en_US: Base URL
+        zh_Hans: 基础 URL
+      type: string
+      required: true
+      default: "https://api.modelverse.cn/v1"
+    - name: timeout
+      label:
+        en_US: Timeout
+        zh_Hans: 超时时间
+      type: integer
+      required: true
+      default: 120
+  support_type:
+    - llm
+execution:
+  python:
+    path: ./compsharechatcmpl.py
+    attr: CompShareChatCompletions
@@ -24,6 +24,7 @@ class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        self.client.api_key = use_model.token_mgr.get_token()

@@ -49,10 +50,11 @@ class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
        # 发送请求
        resp = await self._req(args, extra_body=extra_args)

+        # print(resp)
+
        if resp is None:
            raise errors.RequesterError('接口返回为空，请确定模型提供商服务是否正常')
-
        # 处理请求结果
-        message = await self._make_msg(resp)
+        message = await self._make_msg(resp, remove_think)

        return message
@@ -4,7 +4,7 @@ metadata:
  name: deepseek-chat-completions
  label:
    en_US: DeepSeek
-    zh_Hans: 深度求索
+    zh_Hans: DeepSeek
  icon: deepseek.svg
 spec:
  config:
@@ -22,6 +22,8 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
 execution:
  python:
    path: ./deepseekchatcmpl.py
@@ -4,6 +4,13 @@ import typing

 from . import chatcmpl

+import uuid
+
+from .. import requester
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+

 class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
    """Google Gemini API 请求器"""
@@ -12,3 +19,124 @@ class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
        'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
        'timeout': 120,
    }
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        self.client.api_key = use_model.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # 流式处理状态
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+        tool_id = ''
+        tool_name = ''
+        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
+
+        async for chunk in self._req_stream(args, extra_body=extra_args):
+            # 解析 chunk 数据
+
+            if hasattr(chunk, 'choices') and chunk.choices:
+                choice = chunk.choices[0]
+                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+
+                finish_reason = getattr(choice, 'finish_reason', None)
+            else:
+                delta = {}
+                finish_reason = None
+            # 从第一个 chunk 获取 role，后续使用这个 role
+            if 'role' in delta and delta['role']:
+                role = delta['role']
+
+            # 获取增量内容
+            delta_content = delta.get('content', '')
+            reasoning_content = delta.get('reasoning_content', '')
+
+            # 处理 reasoning_content
+            if reasoning_content:
+                # accumulated_reasoning += reasoning_content
+                # 如果设置了 remove_think，跳过 reasoning_content
+                if remove_think:
+                    chunk_idx += 1
+                    continue
+
+                # 第一次出现 reasoning_content，添加 <think> 开始标签
+                if not thinking_started:
+                    thinking_started = True
+                    delta_content = '<think>\n' + reasoning_content
+                else:
+                    # 继续输出 reasoning_content
+                    delta_content = reasoning_content
+            elif thinking_started and not thinking_ended and delta_content:
+                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
+                thinking_ended = True
+                delta_content = '\n</think>\n' + delta_content
+
+            # 处理 content 中已有的 <think> 标签（如果需要移除）
+            # if delta_content and remove_think and '<think>' in delta_content:
+            #     import re
+            #
+            #     # 移除 <think> 标签及其内容
+            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
+
+            # 处理工具调用增量
+            # delta_tool_calls = None
+            if delta.get('tool_calls'):
+                for tool_call in delta['tool_calls']:
+                    if tool_call['id'] == '' and tool_id == '':
+                        tool_id = str(uuid.uuid4())
+                    if tool_call['function']['name']:
+                        tool_name = tool_call['function']['name']
+                    tool_call['id'] = tool_id
+                    tool_call['function']['name'] = tool_name
+                    if tool_call['type'] is None:
+                        tool_call['type'] = 'function'
+
+            # 跳过空的第一个 chunk（只有 role 没有内容）
+            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+                chunk_idx += 1
+                continue
+            # 构建 MessageChunk - 只包含增量内容
+            chunk_data = {
+                'role': role,
+                'content': delta_content if delta_content else None,
+                'tool_calls': delta.get('tool_calls'),
+                'is_final': bool(finish_reason),
+            }
+
+            # 移除 None 值
+            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+            yield provider_message.MessageChunk(**chunk_data)
+            chunk_idx += 1
@@ -22,6 +22,8 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
 execution:
  python:
    path: ./geminichatcmpl.py
@@ -3,49 +3,13 @@ from __future__ import annotations

 import typing

-from . import chatcmpl
-from .. import requester
-import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
-import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
-import langbot_plugin.api.entities.builtin.provider.message as provider_message
+from . import ppiochatcmpl


-class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
+class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
    """Gitee AI ChatCompletions API 请求器"""

    default_config: dict[str, typing.Any] = {
        'base_url': 'https://ai.gitee.com/v1',
        'timeout': 120,
    }
-
-    async def _closure(
-        self,
-        query: pipeline_query.Query,
-        req_messages: list[dict],
-        use_model: requester.RuntimeLLMModel,
-        use_funcs: list[resource_tool.LLMTool] = None,
-        extra_args: dict[str, typing.Any] = {},
-    ) -> provider_message.Message:
-        self.client.api_key = use_model.token_mgr.get_token()
-
-        args = {}
-        args['model'] = use_model.model_entity.name
-
-        if use_funcs:
-            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
-
-            if tools:
-                args['tools'] = tools
-
-        # gitee 不支持多模态，把content都转换成纯文字
-        for m in req_messages:
-            if 'content' in m and isinstance(m['content'], list):
-                m['content'] = ' '.join([c['text'] for c in m['content']])
-
-        args['messages'] = req_messages
-
-        resp = await self._req(args, extra_body=extra_args)
-
-        message = await self._make_msg(resp)
-
-        return message
@@ -22,6 +22,9 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
+    - text-embedding
 execution:
  python:
    path: ./giteeaichatcmpl.py
@@ -22,6 +22,9 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
+    - text-embedding
 execution:
  python:
    path: ./lmstudiochatcmpl.py
@@ -5,7 +5,6 @@ import typing

 import openai
 import openai.types.chat.chat_completion as chat_completion
-import openai.types.chat.chat_completion_message_tool_call as chat_completion_message_tool_call
 import httpx

 from .. import entities, errors, requester
@@ -14,7 +13,7 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message


-class ModelScopeChatCompletions(requester.LLMAPIRequester):
+class ModelScopeChatCompletions(requester.ProviderAPIRequester):
    """ModelScope ChatCompletion API 请求器"""

    client: openai.AsyncClient
@@ -34,9 +33,11 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):

    async def _req(
        self,
+        query: pipeline_query.Query,
        args: dict,
        extra_body: dict = {},
-    ) -> chat_completion.ChatCompletion:
+        remove_think: bool = False,
+    ) -> list[dict[str, typing.Any]]:
        args['stream'] = True

        chunk = None
@@ -47,73 +48,71 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):

        resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)

+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        tool_id = ''
+        tool_name = ''
+        message_delta = {}
        async for chunk in resp_gen:
-            # print(chunk)
            if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
                continue

-            if chunk.choices[0].delta.content is not None:
-                pending_content += chunk.choices[0].delta.content
+            delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
+            reasoning_content = delta.get('reasoning_content')
+            # 处理 reasoning_content
+            if reasoning_content:
+                # accumulated_reasoning += reasoning_content
+                # 如果设置了 remove_think，跳过 reasoning_content
+                if remove_think:
+                    chunk_idx += 1
+                    continue

-            if chunk.choices[0].delta.tool_calls is not None:
-                for tool_call in chunk.choices[0].delta.tool_calls:
-                    if tool_call.function.arguments is None:
+                # 第一次出现 reasoning_content，添加 <think> 开始标签
+                if not thinking_started:
+                    thinking_started = True
+                    pending_content += '<think>\n' + reasoning_content
+                else:
+                    # 继续输出 reasoning_content
+                    pending_content += reasoning_content
+            elif thinking_started and not thinking_ended and delta.get('content'):
+                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
+                thinking_ended = True
+                pending_content += '\n</think>\n' + delta.get('content')
+
+            if delta.get('content') is not None:
+                pending_content += delta.get('content')
+
+            if delta.get('tool_calls') is not None:
+                for tool_call in delta.get('tool_calls'):
+                    if tool_call['id'] != '':
+                        tool_id = tool_call['id']
+                    if tool_call['function']['name'] is not None:
+                        tool_name = tool_call['function']['name']
+                    if tool_call['function']['arguments'] is None:
                        continue
+                    tool_call['id'] = tool_id
+                    tool_call['name'] = tool_name
                    for tc in tool_calls:
-                        if tc.index == tool_call.index:
-                            tc.function.arguments += tool_call.function.arguments
+                        if tc['index'] == tool_call['index']:
+                            tc['function']['arguments'] += tool_call['function']['arguments']
                            break
                    else:
                        tool_calls.append(tool_call)

            if chunk.choices[0].finish_reason is not None:
                break
+        message_delta['content'] = pending_content
+        message_delta['role'] = 'assistant'

-        real_tool_calls = []
-
-        for tc in tool_calls:
-            function = chat_completion_message_tool_call.Function(
-                name=tc.function.name, arguments=tc.function.arguments
-            )
-            real_tool_calls.append(
-                chat_completion_message_tool_call.ChatCompletionMessageToolCall(
-                    id=tc.id, function=function, type='function'
-                )
-            )
-
-        return (
-            chat_completion.ChatCompletion(
-                id=chunk.id,
-                object='chat.completion',
-                created=chunk.created,
-                choices=[
-                    chat_completion.Choice(
-                        index=0,
-                        message=chat_completion.ChatCompletionMessage(
-                            role='assistant',
-                            content=pending_content,
-                            tool_calls=real_tool_calls if len(real_tool_calls) > 0 else None,
-                        ),
-                        finish_reason=chunk.choices[0].finish_reason
-                        if hasattr(chunk.choices[0], 'finish_reason') and chunk.choices[0].finish_reason is not None
-                        else 'stop',
-                        logprobs=chunk.choices[0].logprobs,
-                    )
-                ],
-                model=chunk.model,
-                service_tier=chunk.service_tier if hasattr(chunk, 'service_tier') else None,
-                system_fingerprint=chunk.system_fingerprint if hasattr(chunk, 'system_fingerprint') else None,
-                usage=chunk.usage if hasattr(chunk, 'usage') else None,
-            )
-            if chunk
-            else None
-        )
+        message_delta['tool_calls'] = tool_calls if tool_calls else None
+        return [message_delta]

    async def _make_msg(
        self,
-        chat_completion: chat_completion.ChatCompletion,
+        chat_completion: list[dict[str, typing.Any]],
    ) -> provider_message.Message:
-        chatcmpl_message = chat_completion.choices[0].message.dict()
+        chatcmpl_message = chat_completion[0]

        # 确保 role 字段存在且不为 None
        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
@@ -130,6 +129,7 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        self.client.api_key = use_model.token_mgr.get_token()

@@ -157,13 +157,145 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
        args['messages'] = messages

        # 发送请求
-        resp = await self._req(args, extra_body=extra_args)
+        resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)

        # 处理请求结果
        message = await self._make_msg(resp)

        return message

+    async def _req_stream(
+        self,
+        args: dict,
+        extra_body: dict = {},
+    ) -> chat_completion.ChatCompletion:
+        async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
+            yield chunk
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+        self.client.api_key = use_model.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # 流式处理状态
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+        # accumulated_reasoning = ''  # 仅用于判断何时结束思维链
+
+        async for chunk in self._req_stream(args, extra_body=extra_args):
+            # 解析 chunk 数据
+            if hasattr(chunk, 'choices') and chunk.choices:
+                choice = chunk.choices[0]
+                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+                finish_reason = getattr(choice, 'finish_reason', None)
+            else:
+                delta = {}
+                finish_reason = None
+
+            # 从第一个 chunk 获取 role，后续使用这个 role
+            if 'role' in delta and delta['role']:
+                role = delta['role']
+
+            # 获取增量内容
+            delta_content = delta.get('content', '')
+            reasoning_content = delta.get('reasoning_content', '')
+
+            # 处理 reasoning_content
+            if reasoning_content:
+                # accumulated_reasoning += reasoning_content
+                # 如果设置了 remove_think，跳过 reasoning_content
+                if remove_think:
+                    chunk_idx += 1
+                    continue
+
+                # 第一次出现 reasoning_content，添加 <think> 开始标签
+                if not thinking_started:
+                    thinking_started = True
+                    delta_content = '<think>\n' + reasoning_content
+                else:
+                    # 继续输出 reasoning_content
+                    delta_content = reasoning_content
+            elif thinking_started and not thinking_ended and delta_content:
+                # reasoning_content 结束，normal content 开始，添加 </think> 结束标签
+                thinking_ended = True
+                delta_content = '\n</think>\n' + delta_content
+
+            # 处理 content 中已有的 <think> 标签（如果需要移除）
+            # if delta_content and remove_think and '<think>' in delta_content:
+            #     import re
+            #
+            #     # 移除 <think> 标签及其内容
+            #     delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
+
+            # 处理工具调用增量
+            if delta.get('tool_calls'):
+                for tool_call in delta['tool_calls']:
+                    if tool_call['id'] != '':
+                        tool_id = tool_call['id']
+                    if tool_call['function']['name'] is not None:
+                        tool_name = tool_call['function']['name']
+
+                    if tool_call['type'] is None:
+                        tool_call['type'] = 'function'
+                    tool_call['id'] = tool_id
+                    tool_call['function']['name'] = tool_name
+                    tool_call['function']['arguments'] = (
+                        '' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
+                    )
+
+            # 跳过空的第一个 chunk（只有 role 没有内容）
+            if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
+                chunk_idx += 1
+                continue
+
+            # 构建 MessageChunk - 只包含增量内容
+            chunk_data = {
+                'role': role,
+                'content': delta_content if delta_content else None,
+                'tool_calls': delta.get('tool_calls'),
+                'is_final': bool(finish_reason),
+            }
+
+            # 移除 None 值
+            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+            yield provider_message.MessageChunk(**chunk_data)
+            chunk_idx += 1
+            # return
+
    async def invoke_llm(
        self,
        query: pipeline_query.Query,
@@ -171,6 +303,7 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
        for m in messages:
@@ -185,7 +318,12 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):

        try:
            return await self._closure(
-                query=query, req_messages=req_messages, use_model=model, use_funcs=funcs, extra_args=extra_args
+                query=query,
+                req_messages=req_messages,
+                use_model=model,
+                use_funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
            )
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
@@ -202,3 +340,50 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
        except openai.APIError as e:
            raise errors.RequesterError(f'请求错误: {e.message}')
+
+    async def invoke_llm_stream(
+        self,
+        query: pipeline_query.Query,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
+        for m in messages:
+            msg_dict = m.dict(exclude_none=True)
+            content = msg_dict.get('content')
+            if isinstance(content, list):
+                # 检查 content 列表中是否每个部分都是文本
+                if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
+                    # 将所有文本部分合并为一个字符串
+                    msg_dict['content'] = '\n'.join(part['text'] for part in content)
+            req_messages.append(msg_dict)
+
+        try:
+            async for item in self._closure_stream(
+                query=query,
+                req_messages=req_messages,
+                use_model=model,
+                use_funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
+            ):
+                yield item
+
+        except asyncio.TimeoutError:
+            raise errors.RequesterError('请求超时')
+        except openai.BadRequestError as e:
+            if 'context_length_exceeded' in e.message:
+                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
+            else:
+                raise errors.RequesterError(f'请求参数错误: {e.message}')
+        except openai.AuthenticationError as e:
+            raise errors.RequesterError(f'无效的 api-key: {e.message}')
+        except openai.NotFoundError as e:
+            raise errors.RequesterError(f'请求路径错误: {e.message}')
+        except openai.RateLimitError as e:
+            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
+        except openai.APIError as e:
+            raise errors.RequesterError(f'请求错误: {e.message}')
@@ -29,6 +29,8 @@ spec:
      type: int
      required: true
      default: 120
+  support_type:
+    - llm
 execution:
  python:
    path: ./modelscopechatcmpl.py
@@ -25,6 +25,7 @@ class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        self.client.api_key = use_model.token_mgr.get_token()

@@ -54,6 +55,6 @@ class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
        resp = await self._req(args, extra_body=extra_args)

        # 处理请求结果
-        message = await self._make_msg(resp)
+        message = await self._make_msg(resp, remove_think)

        return message
@@ -14,7 +14,7 @@ spec:
        zh_Hans: 基础 URL
      type: string
      required: true
-      default: "https://api.moonshot.com/v1"
+      default: "https://api.moonshot.ai/v1"
    - name: timeout
      label:
        en_US: Timeout
@@ -22,6 +22,8 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
 execution:
  python:
    path: ./moonshotchatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import typing
+import openai
+
+from . import chatcmpl
+
+
+class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """New API ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'http://localhost:3000/v1',
+        'timeout': 120,
+    }
@@ -0,0 +1,31 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: new-api-chat-completions
+  label:
+    en_US: New API
+    zh_Hans: New API
+  icon: newapi.png
+spec:
+  config:
+    - name: base_url
+      label:
+        en_US: Base URL
+        zh_Hans: 基础 URL
+      type: string
+      required: true
+      default: "http://localhost:3000/v1"
+    - name: timeout
+      label:
+        en_US: Timeout
+        zh_Hans: 超时时间
+      type: integer
+      required: true
+      default: 120
+  support_type:
+    - llm
+    - text-embedding
+execution:
+  python:
+    path: ./newapichatcmpl.py
+    attr: NewAPIChatCompletions
@@ -17,7 +17,7 @@ import langbot_plugin.api.entities.builtin.provider.message as provider_message
 REQUESTER_NAME: str = 'ollama-chat'


-class OllamaChatCompletions(requester.LLMAPIRequester):
+class OllamaChatCompletions(requester.ProviderAPIRequester):
    """Ollama平台 ChatCompletion API请求器"""

    client: ollama.AsyncClient
@@ -44,6 +44,7 @@ class OllamaChatCompletions(requester.LLMAPIRequester):
        use_model: requester.RuntimeLLMModel,
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        args = extra_args.copy()
        args['model'] = use_model.model_entity.name
@@ -110,6 +111,7 @@ class OllamaChatCompletions(requester.LLMAPIRequester):
        messages: typing.List[provider_message.Message],
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
    ) -> provider_message.Message:
        req_messages: list = []
        for m in messages:
@@ -126,6 +128,21 @@ class OllamaChatCompletions(requester.LLMAPIRequester):
                use_model=model,
                use_funcs=funcs,
                extra_args=extra_args,
+                remove_think=remove_think,
            )
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
+
+    async def invoke_embedding(
+        self,
+        model: requester.RuntimeEmbeddingModel,
+        input_text: list[str],
+        extra_args: dict[str, typing.Any] = {},
+    ) -> list[list[float]]:
+        return (
+            await self.client.embed(
+                model=model.model_entity.name,
+                input=input_text,
+                **extra_args,
+            )
+        ).embeddings
@@ -22,6 +22,9 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
+    - text-embedding
 execution:
  python:
    path: ./ollamachat.py
@@ -22,6 +22,9 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
+    - text-embedding
 execution:
  python:
    path: ./openrouterchatcmpl.py
@@ -4,6 +4,12 @@ import openai
 import typing

 from . import chatcmpl
+from .. import requester
+import openai.types.chat.chat_completion as chat_completion
+import re
+import langbot_plugin.api.entities.builtin.provider.message as provider_message
+import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool


 class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
@@ -15,3 +21,188 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
        'base_url': 'https://api.ppinfra.com/v3/openai',
        'timeout': 120,
    }
+
+    is_think: bool = False
+
+    async def _make_msg(
+        self,
+        chat_completion: chat_completion.ChatCompletion,
+        remove_think: bool,
+    ) -> provider_message.Message:
+        chatcmpl_message = chat_completion.choices[0].message.model_dump()
+        # print(chatcmpl_message.keys(), chatcmpl_message.values())
+
+        # 确保 role 字段存在且不为 None
+        if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
+            chatcmpl_message['role'] = 'assistant'
+
+        reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
+
+        # deepseek的reasoner模型
+        chatcmpl_message['content'] = await self._process_thinking_content(
+            chatcmpl_message['content'], reasoning_content, remove_think
+        )
+
+        # 移除 reasoning_content 字段，避免传递给 Message
+        if 'reasoning_content' in chatcmpl_message:
+            del chatcmpl_message['reasoning_content']
+
+        message = provider_message.Message(**chatcmpl_message)
+
+        return message
+
+    async def _process_thinking_content(
+        self,
+        content: str,
+        reasoning_content: str = None,
+        remove_think: bool = False,
+    ) -> tuple[str, str]:
+        """处理思维链内容
+
+        Args:
+            content: 原始内容
+            reasoning_content: reasoning_content 字段内容
+            remove_think: 是否移除思维链
+
+        Returns:
+            处理后的内容
+        """
+        if remove_think:
+            content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
+        else:
+            if reasoning_content is not None:
+                content = '<think>\n' + reasoning_content + '\n</think>\n' + content
+        return content
+
+    async def _make_msg_chunk(
+        self,
+        delta: dict[str, typing.Any],
+        idx: int,
+    ) -> provider_message.MessageChunk:
+        # 处理流式chunk和完整响应的差异
+        # print(chat_completion.choices[0])
+
+        # 确保 role 字段存在且不为 None
+        if 'role' not in delta or delta['role'] is None:
+            delta['role'] = 'assistant'
+
+        reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
+
+        delta['content'] = '' if delta['content'] is None else delta['content']
+        # print(reasoning_content)
+
+        # deepseek的reasoner模型
+
+        if reasoning_content is not None:
+            delta['content'] += reasoning_content
+
+        message = provider_message.MessageChunk(**delta)
+
+        return message
+
+    async def _closure_stream(
+        self,
+        query: pipeline_query.Query,
+        req_messages: list[dict],
+        use_model: requester.RuntimeLLMModel,
+        use_funcs: list[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
+        self.client.api_key = use_model.token_mgr.get_token()
+
+        args = {}
+        args['model'] = use_model.model_entity.name
+
+        if use_funcs:
+            tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
+
+            if tools:
+                args['tools'] = tools
+
+        # 设置此次请求中的messages
+        messages = req_messages.copy()
+
+        # 检查vision
+        for msg in messages:
+            if 'content' in msg and isinstance(msg['content'], list):
+                for me in msg['content']:
+                    if me['type'] == 'image_base64':
+                        me['image_url'] = {'url': me['image_base64']}
+                        me['type'] = 'image_url'
+                        del me['image_base64']
+
+        args['messages'] = messages
+        args['stream'] = True
+
+        # tool_calls_map: dict[str, provider_message.ToolCall] = {}
+        chunk_idx = 0
+        thinking_started = False
+        thinking_ended = False
+        role = 'assistant'  # 默认角色
+        async for chunk in self._req_stream(args, extra_body=extra_args):
+            # 解析 chunk 数据
+            if hasattr(chunk, 'choices') and chunk.choices:
+                choice = chunk.choices[0]
+                delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
+                finish_reason = getattr(choice, 'finish_reason', None)
+            else:
+                delta = {}
+                finish_reason = None
+
+            # 从第一个 chunk 获取 role，后续使用这个 role
+            if 'role' in delta and delta['role']:
+                role = delta['role']
+
+            # 获取增量内容
+            delta_content = delta.get('content', '')
+            # reasoning_content = delta.get('reasoning_content', '')
+
+            if remove_think:
+                if delta['content'] is not None:
+                    if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
+                        thinking_started = True
+                        continue
+                    elif delta['content'] == r'</think>' and not thinking_ended:
+                        thinking_ended = True
+                        continue
+                    elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
+                        thinking_started = False
+                        continue
+                    elif thinking_started and not thinking_ended:
+                        continue
+
+            # delta_tool_calls = None
+            if delta.get('tool_calls'):
+                for tool_call in delta['tool_calls']:
+                    if tool_call['id'] and tool_call['function']['name']:
+                        tool_id = tool_call['id']
+                        tool_name = tool_call['function']['name']
+
+                    if tool_call['id'] is None:
+                        tool_call['id'] = tool_id
+                    if tool_call['function']['name'] is None:
+                        tool_call['function']['name'] = tool_name
+                    if tool_call['function']['arguments'] is None:
+                        tool_call['function']['arguments'] = ''
+                    if tool_call['type'] is None:
+                        tool_call['type'] = 'function'
+
+            # 跳过空的第一个 chunk（只有 role 没有内容）
+            if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
+                chunk_idx += 1
+                continue
+
+            # 构建 MessageChunk - 只包含增量内容
+            chunk_data = {
+                'role': role,
+                'content': delta_content if delta_content else None,
+                'tool_calls': delta.get('tool_calls'),
+                'is_final': bool(finish_reason),
+            }
+
+            # 移除 None 值
+            chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
+
+            yield provider_message.MessageChunk(**chunk_data)
+            chunk_idx += 1
@@ -29,6 +29,9 @@ spec:
      type: int
      required: true
      default: 120
+  support_type:
+    - llm
+    - text-embedding
 execution:
  python:
    path: ./ppiochatcmpl.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+
+
+class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """启航 AI ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://api.qhaigc.com/v1',
+        'timeout': 120,
+    }
@@ -0,0 +1,38 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: qhaigc-chat-completions
+  label:
+    en_US: QH AI
+    zh_Hans: 启航 AI
+  icon: qhaigc.png
+spec:
+  config:
+    - name: base_url
+      label:
+        en_US: Base URL
+        zh_Hans: 基础 URL
+      type: string
+      required: true
+      default: "https://api.qhaigc.net/v1"
+    - name: args
+      label:
+        en_US: Args
+        zh_Hans: 附加参数
+      type: object
+      required: true
+      default: {}
+    - name: timeout
+      label:
+        en_US: Timeout
+        zh_Hans: 超时时间
+      type: int
+      required: true
+      default: 120
+  support_type:
+    - llm
+    - text-embedding
+execution:
+  python:
+    path: ./qhaigcchatcmpl.py
+    attr: QHAIGCChatCompletions
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+import openai
+import typing
+
+from . import chatcmpl
+import openai.types.chat.chat_completion as chat_completion
+
+
+class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
+    """胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
+
+    client: openai.AsyncClient
+
+    default_config: dict[str, typing.Any] = {
+        'base_url': 'https://router.shengsuanyun.com/api/v1',
+        'timeout': 120,
+    }
+
+    async def _req(
+        self,
+        args: dict,
+        extra_body: dict = {},
+    ) -> chat_completion.ChatCompletion:
+        return await self.client.chat.completions.create(
+            **args,
+            extra_body=extra_body,
+            extra_headers={
+                'HTTP-Referer': 'https://langbot.app',
+                'X-Title': 'LangBot',
+            },
+        )
@@ -0,0 +1,38 @@
+apiVersion: v1
+kind: LLMAPIRequester
+metadata:
+  name: shengsuanyun-chat-completions
+  label:
+    en_US: ShengSuanYun
+    zh_Hans: 胜算云
+  icon: shengsuanyun.svg
+spec:
+  config:
+    - name: base_url
+      label:
+        en_US: Base URL
+        zh_Hans: 基础 URL
+      type: string
+      required: true
+      default: "https://router.shengsuanyun.com/api/v1"
+    - name: args
+      label:
+        en_US: Args
+        zh_Hans: 附加参数
+      type: object
+      required: true
+      default: {}
+    - name: timeout
+      label:
+        en_US: Timeout
+        zh_Hans: 超时时间
+      type: int
+      required: true
+      default: 120
+  support_type:
+    - llm
+    - text-embedding
+execution:
+  python:
+    path: ./shengsuanyun.py
+    attr: ShengSuanYunChatCompletions
@@ -22,6 +22,9 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
+    - text-embedding
 execution:
  python:
    path: ./siliconflowchatcmpl.py
@@ -22,6 +22,8 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
 execution:
  python:
    path: ./volcarkchatcmpl.py
@@ -22,6 +22,8 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
 execution:
  python:
    path: ./xaichatcmpl.py
@@ -22,6 +22,8 @@ spec:
      type: integer
      required: true
      default: 120
+  support_type:
+    - llm
 execution:
  python:
    path: ./zhipuaichatcmpl.py