diff --git a/pkg/provider/modelmgr/requesters/chatcmpl.py b/pkg/provider/modelmgr/requesters/chatcmpl.py index adeaa251..2d2a0b7e 100644 --- a/pkg/provider/modelmgr/requesters/chatcmpl.py +++ b/pkg/provider/modelmgr/requesters/chatcmpl.py @@ -160,7 +160,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester): thinking_started = False thinking_ended = False role = 'assistant' # 默认角色 - accumulated_reasoning = '' # 仅用于判断何时结束思维链 + # accumulated_reasoning = '' # 仅用于判断何时结束思维链 async for chunk in self._req_stream(args, extra_body=extra_args): # 解析 chunk 数据 @@ -182,7 +182,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester): # 处理 reasoning_content if reasoning_content: - accumulated_reasoning += reasoning_content + # accumulated_reasoning += reasoning_content # 如果设置了 remove_think,跳过 reasoning_content if remove_think: chunk_idx += 1 @@ -289,6 +289,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester): # 发送请求 resp = await self._req(args, extra_body=extra_args) + print(resp) # 处理请求结果 message = await self._make_msg(resp, remove_think) diff --git a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py index 0ff49798..f8cf15ca 100644 --- a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py +++ b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py @@ -3,7 +3,7 @@ from __future__ import annotations import typing -from . import chatcmpl +from . import ppiochatcmpl from .. import requester from ....core import entities as core_entities from ... import entities as llm_entities @@ -12,7 +12,7 @@ import re import openai.types.chat.chat_completion as chat_completion -class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions): +class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions): """Gitee AI ChatCompletions API 请求器""" default_config: dict[str, typing.Any] = { @@ -20,181 +20,3 @@ class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions): 'timeout': 120, } - async def _closure( - self, - query: core_entities.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[tools_entities.LLMFunction] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> llm_entities.Message: - self.client.api_key = use_model.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # gitee 不支持多模态,把content都转换成纯文字 - for m in req_messages: - if 'content' in m and isinstance(m['content'], list): - m['content'] = ' '.join([c['text'] for c in m['content']]) - - args['messages'] = req_messages - - resp = await self._req(args, extra_body=extra_args) - - - message = await self._make_msg(resp, remove_think) - - return message - - async def _make_msg( - self, - chat_completion: chat_completion.ChatCompletion, - remove_think: bool, - ) -> llm_entities.Message: - chatcmpl_message = chat_completion.choices[0].message.model_dump() - # print(chatcmpl_message.keys(), chatcmpl_message.values()) - - # 确保 role 字段存在且不为 None - if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None: - chatcmpl_message['role'] = 'assistant' - - reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None - - # deepseek的reasoner模型 - if remove_think: - chatcmpl_message['content'] = re.sub( - r'.*?', '', chatcmpl_message['content'], flags=re.DOTALL - ) - else: - if reasoning_content is not None: - chatcmpl_message['content'] = ( - '\n' + reasoning_content + '\n\n' + chatcmpl_message['content'] - ) - - message = llm_entities.Message(**chatcmpl_message) - - return message - - async def _make_msg_chunk( - self, - delta: dict[str, typing.Any], - idx: int, - ) -> llm_entities.MessageChunk: - # 处理流式chunk和完整响应的差异 - # print(chat_completion.choices[0]) - - - # 确保 role 字段存在且不为 None - if 'role' not in delta or delta['role'] is None: - delta['role'] = 'assistant' - - reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None - - delta['content'] = '' if delta['content'] is None else delta['content'] - # print(reasoning_content) - - # deepseek的reasoner模型 - - if reasoning_content is not None: - delta['content'] += reasoning_content - - message = llm_entities.MessageChunk(**delta) - - return message - - async def _closure_stream( - self, - query: core_entities.Query, - req_messages: list[dict], - use_model: requester.RuntimeLLMModel, - use_funcs: list[tools_entities.LLMFunction] = None, - extra_args: dict[str, typing.Any] = {}, - remove_think: bool = False, - ) -> llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]: - self.client.api_key = use_model.token_mgr.get_token() - - args = {} - args['model'] = use_model.model_entity.name - - if use_funcs: - tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs) - - if tools: - args['tools'] = tools - - # 设置此次请求中的messages - messages = req_messages.copy() - - # 检查vision - for msg in messages: - if 'content' in msg and isinstance(msg['content'], list): - for me in msg['content']: - if me['type'] == 'image_base64': - me['image_url'] = {'url': me['image_base64']} - me['type'] = 'image_url' - del me['image_base64'] - - args['messages'] = messages - - current_content = '' - args['stream'] = True - chunk_idx = 0 - is_think = False - tool_calls_map: dict[str, llm_entities.ToolCall] = {} - async for chunk in self._req_stream(args, extra_body=extra_args): - # 处理流式消息 - if hasattr(chunk, 'choices'): - # 完整响应模式 - if chunk.choices: - choice = chunk.choices[0] - delta = choice.delta.model_dump() if hasattr(choice, 'delta') else choice.message.model_dump() - else: - continue - else: - # 流式chunk模式 - delta = chunk.delta.model_dump() if hasattr(chunk, 'delta') else {} - if remove_think: - print(delta) - if delta['content'] == '': - is_think = True - continue - elif delta['content'] == r'': - is_think = False - continue - elif is_think or delta['content'] == '\n\n': - continue - - delta_message = await self._make_msg_chunk(delta, chunk_idx) - if delta_message.content: - current_content += delta_message.content - delta_message.content = current_content - # delta_message.all_content = current_content - if delta_message.tool_calls: - for tool_call in delta_message.tool_calls: - if tool_call.id not in tool_calls_map: - tool_calls_map[tool_call.id] = llm_entities.ToolCall( - id=tool_call.id, - type=tool_call.type, - function=llm_entities.FunctionCall( - name=tool_call.function.name if tool_call.function else '', arguments='' - ), - ) - if tool_call.function and tool_call.function.arguments: - # 流式处理中,工具调用参数可能分多个chunk返回,需要追加而不是覆盖 - tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments - - chunk_idx += 1 - chunk_choices = getattr(chunk, 'choices', None) - if chunk_choices and getattr(chunk_choices[0], 'finish_reason', None): - delta_message.is_final = True - delta_message.content = current_content - - yield delta_message diff --git a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py index 0007623e..c526313a 100644 --- a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py +++ b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py @@ -36,6 +36,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester): self, args: dict, extra_body: dict = {}, + remove_think:bool = False, ) -> chat_completion.ChatCompletion: args['stream'] = True @@ -47,11 +48,35 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester): resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body) + chunk_idx = 0 + thinking_started = False + thinking_ended = False async for chunk in resp_gen: # print(chunk) if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta: continue + reasoning_content = chunk.choices[0].delta.reasoning_content + # 处理 reasoning_content + if reasoning_content: + # accumulated_reasoning += reasoning_content + # 如果设置了 remove_think,跳过 reasoning_content + if remove_think: + chunk_idx += 1 + continue + + # 第一次出现 reasoning_content,添加 开始标签 + if not thinking_started: + thinking_started = True + pending_content += '\n' + reasoning_content + else: + # 继续输出 reasoning_content + pending_content += reasoning_content + elif thinking_started and not thinking_ended and chunk.choices[0].delta.content: + # reasoning_content 结束,normal content 开始,添加 结束标签 + thinking_ended = True + pending_content += '\n\n' + chunk.choices[0].delta.content + if chunk.choices[0].delta.content is not None: pending_content += chunk.choices[0].delta.content @@ -130,6 +155,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester): use_model: requester.RuntimeLLMModel, use_funcs: list[tools_entities.LLMFunction] = None, extra_args: dict[str, typing.Any] = {}, + remove_think:bool = False, ) -> llm_entities.Message: self.client.api_key = use_model.token_mgr.get_token() @@ -157,7 +183,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester): args['messages'] = messages # 发送请求 - resp = await self._req(args, extra_body=extra_args) + resp = await self._req(args, extra_body=extra_args, remove_think=remove_think) # 处理请求结果 message = await self._make_msg(resp) @@ -172,41 +198,6 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester): async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body): yield chunk - async def _make_msg_chunk(self, - delta: dict[str, typing.Any], - idx: int, - is_content: bool, - is_think: bool, - ) -> llm_entities.MessageChunk: - # 处理流式chunk和完整响应的差异 - # print(chat_completion.choices[0]) - - if 'role' not in delta or delta['role'] is None: - delta['role'] = 'assistant' - - reasoning_content = delta['reasoning_content'] - - delta['content'] = '' if delta['content'] is None else delta['content'] - # print(reasoning_content) - - # deepseek的reasoner模型 - - if reasoning_content is not None and idx == 0: - delta['content'] += f'\n{reasoning_content}' - is_think = True - elif reasoning_content is None and idx != 0: - if is_content: - delta['content'] = delta['content'] - elif is_think: - delta['content'] = f'\n\n\n{delta["content"]}' - is_content = True - is_think = False - elif reasoning_content is not None: - delta['content'] = reasoning_content - - message = llm_entities.MessageChunk(**delta) - - return message, is_content, is_think async def _closure_stream( self, @@ -250,7 +241,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester): thinking_started = False thinking_ended = False role = 'assistant' # 默认角色 - accumulated_reasoning = '' # 仅用于判断何时结束思维链 + # accumulated_reasoning = '' # 仅用于判断何时结束思维链 async for chunk in self._req_stream(args, extra_body=extra_args): # 解析 chunk 数据 @@ -272,7 +263,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester): # 处理 reasoning_content if reasoning_content: - accumulated_reasoning += reasoning_content + # accumulated_reasoning += reasoning_content # 如果设置了 remove_think,跳过 reasoning_content if remove_think: chunk_idx += 1 @@ -365,7 +356,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester): try: return await self._closure( - query=query, req_messages=req_messages, use_model=model, use_funcs=funcs, extra_args=extra_args + query=query, req_messages=req_messages, use_model=model, use_funcs=funcs, extra_args=extra_args, remove_think=remove_think ) except asyncio.TimeoutError: raise errors.RequesterError('请求超时') diff --git a/pkg/provider/modelmgr/requesters/ppiochatcmpl.py b/pkg/provider/modelmgr/requesters/ppiochatcmpl.py index 49f03143..967bb676 100644 --- a/pkg/provider/modelmgr/requesters/ppiochatcmpl.py +++ b/pkg/provider/modelmgr/requesters/ppiochatcmpl.py @@ -39,20 +39,45 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions): reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None # deepseek的reasoner模型 - if remove_think: - chatcmpl_message['content'] = re.sub( - r'.*?', '', chatcmpl_message['content'], flags=re.DOTALL - ) - else: - if reasoning_content is not None: - chatcmpl_message['content'] = ( - '\n' + reasoning_content + '\n\n' + chatcmpl_message['content'] - ) + chatcmpl_message["content"] = await self._process_thinking_content( + chatcmpl_message['content'],reasoning_content,remove_think) + + # 移除 reasoning_content 字段,避免传递给 Message + if 'reasoning_content' in chatcmpl_message: + del chatcmpl_message['reasoning_content'] + message = llm_entities.Message(**chatcmpl_message) return message + async def _process_thinking_content( + self, + content: str, + reasoning_content: str = None, + remove_think: bool = False, + ) -> tuple[str, str]: + """处理思维链内容 + + Args: + content: 原始内容 + reasoning_content: reasoning_content 字段内容 + remove_think: 是否移除思维链 + + Returns: + 处理后的内容 + """ + if remove_think: + content = re.sub( + r'.*?', '', content, flags=re.DOTALL + ) + else: + if reasoning_content is not None: + content = ( + '\n' + reasoning_content + '\n\n' + content + ) + return content + async def _make_msg_chunk( self, delta: dict[str, typing.Any], @@ -119,7 +144,6 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions): thinking_started = False thinking_ended = False role = 'assistant' # 默认角色 - accumulated_reasoning = '' # 仅用于判断何时结束思维链 async for chunk in self._req_stream(args, extra_body=extra_args): # 解析 chunk 数据 if hasattr(chunk, 'choices') and chunk.choices: @@ -140,14 +164,18 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions): if remove_think: if delta['content'] is not None: - if '' in delta['content']: - is_think = True + if '' in delta['content'] and not thinking_started and not thinking_ended: + thinking_started = True continue - elif delta['content'] == r'': - is_think = False + elif delta['content'] == r'' and not thinking_ended: + thinking_ended = True continue - elif is_think or delta['content'] == '\n\n': + elif thinking_ended and delta['content'] == '\n\n' and thinking_started: + thinking_started = False continue + elif thinking_started and not thinking_ended: + continue + delta_tool_calls = None if delta.get('tool_calls'):