diff --git a/pkg/provider/modelmgr/requester.py b/pkg/provider/modelmgr/requester.py
index 6352b6c5..6af8ba70 100644
--- a/pkg/provider/modelmgr/requester.py
+++ b/pkg/provider/modelmgr/requester.py
@@ -84,6 +84,7 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
messages: typing.List[llm_entities.Message],
funcs: typing.List[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message:
"""调用API
@@ -92,6 +93,7 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
messages (typing.List[llm_entities.Message]): 消息对象列表
funcs (typing.List[tools_entities.LLMFunction], optional): 使用的工具函数列表. Defaults to None.
extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
+ remove_think (bool, optional): 是否移思考中的消息. Defaults to False.
Returns:
llm_entities.Message: 返回消息对象
@@ -105,6 +107,7 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
messages: typing.List[llm_entities.Message],
funcs: typing.List[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.MessageChunk:
"""调用API
@@ -113,6 +116,7 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
messages (typing.List[llm_entities.Message]): 消息对象列表
funcs (typing.List[tools_entities.LLMFunction], optional): 使用的工具函数列表. Defaults to None.
extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
+ remove_think (bool, optional): 是否移除思考中的消息. Defaults to False.
Returns:
typing.AsyncGenerator[llm_entities.MessageChunk]: 返回消息对象
diff --git a/pkg/provider/modelmgr/requesters/anthropicmsgs.py b/pkg/provider/modelmgr/requesters/anthropicmsgs.py
index b195ae51..75f2bf7e 100644
--- a/pkg/provider/modelmgr/requesters/anthropicmsgs.py
+++ b/pkg/provider/modelmgr/requesters/anthropicmsgs.py
@@ -53,6 +53,7 @@ class AnthropicMessages(requester.ProviderAPIRequester):
messages: typing.List[llm_entities.Message],
funcs: typing.List[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message:
self.client.api_key = model.token_mgr.get_token()
@@ -151,7 +152,7 @@ class AnthropicMessages(requester.ProviderAPIRequester):
assert type(resp) is anthropic.types.message.Message
for block in resp.content:
- if block.type == 'thinking':
+ if not remove_think and block.type == 'thinking':
args['content'] = '' + block.thinking + '\n' + args['content']
elif block.type == 'text':
args['content'] += block.text
diff --git a/pkg/provider/modelmgr/requesters/chatcmpl.py b/pkg/provider/modelmgr/requesters/chatcmpl.py
index 51ea864b..04e7da20 100644
--- a/pkg/provider/modelmgr/requesters/chatcmpl.py
+++ b/pkg/provider/modelmgr/requesters/chatcmpl.py
@@ -51,7 +51,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
async def _make_msg(
self,
chat_completion: chat_completion.ChatCompletion,
- pipeline_config: dict[str, typing.Any] = {'trigger': {'misc': {'remove_think': False}}},
+ remove_think: bool = False,
) -> llm_entities.Message:
chatcmpl_message = chat_completion.choices[0].message.model_dump()
# print(chatcmpl_message.keys(),chatcmpl_message.values())
@@ -63,7 +63,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
# deepseek的reasoner模型
- if pipeline_config['trigger'].get('misc', '').get('remove_think'):
+ if remove_think:
pass
else:
if reasoning_content is not None:
@@ -77,7 +77,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
async def _make_msg_chunk(
self,
- pipeline_config: dict[str, typing.Any],
+ remove_think: bool,
chat_completion: chat_completion.ChatCompletion,
idx: int,
) -> llm_entities.MessageChunk:
@@ -102,7 +102,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
# print(reasoning_content)
# deepseek的reasoner模型
- if pipeline_config['trigger'].get('misc', '').get('remove_think'):
+ if remove_think:
if reasoning_content is not None:
pass
else:
@@ -130,6 +130,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
use_model: requester.RuntimeLLMModel,
use_funcs: list[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.MessageChunk:
self.client.api_key = use_model.token_mgr.get_token()
@@ -161,10 +162,9 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
chunk_idx = 0
self.is_content = False
tool_calls_map: dict[str, llm_entities.ToolCall] = {}
- pipeline_config = query.pipeline_config
async for chunk in self._req_stream(args, extra_body=extra_args):
# 处理流式消息
- delta_message = await self._make_msg_chunk(pipeline_config, chunk, chunk_idx)
+ delta_message = await self._make_msg_chunk(remove_think, chunk, chunk_idx)
if delta_message.content:
current_content += delta_message.content
delta_message.content = current_content
@@ -199,6 +199,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
use_model: requester.RuntimeLLMModel,
use_funcs: list[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message:
self.client.api_key = use_model.token_mgr.get_token()
@@ -229,8 +230,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
resp = await self._req(args, extra_body=extra_args)
# 处理请求结果
- pipeline_config = query.pipeline_config
- message = await self._make_msg(resp, pipeline_config)
+ message = await self._make_msg(resp, remove_think)
return message
diff --git a/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py b/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
index d75d0fb6..4866caf4 100644
--- a/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
+++ b/pkg/provider/modelmgr/requesters/deepseekchatcmpl.py
@@ -24,6 +24,7 @@ class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
use_model: requester.RuntimeLLMModel,
use_funcs: list[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message:
self.client.api_key = use_model.token_mgr.get_token()
@@ -53,8 +54,7 @@ class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
if resp is None:
raise errors.RequesterError('接口返回为空,请确定模型提供商服务是否正常')
- pipeline_config = query.pipeline_config
# 处理请求结果
- message = await self._make_msg(resp, pipeline_config)
+ message = await self._make_msg(resp, remove_think)
return message
diff --git a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
index 7ac9fa1a..a8d6eb16 100644
--- a/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
+++ b/pkg/provider/modelmgr/requesters/giteeaichatcmpl.py
@@ -28,6 +28,7 @@ class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
use_model: requester.RuntimeLLMModel,
use_funcs: list[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message:
self.client.api_key = use_model.token_mgr.get_token()
@@ -49,16 +50,15 @@ class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
resp = await self._req(args, extra_body=extra_args)
- pipeline_config = query.pipeline_config
- message = await self._make_msg(resp, pipeline_config)
+ message = await self._make_msg(resp, remove_think)
return message
async def _make_msg(
self,
chat_completion: chat_completion.ChatCompletion,
- pipeline_config: dict[str, typing.Any] = {'trigger': {'misc': {'remove_think': False}}},
+ remove_think: bool,
) -> llm_entities.Message:
chatcmpl_message = chat_completion.choices[0].message.model_dump()
# print(chatcmpl_message.keys(), chatcmpl_message.values())
@@ -70,7 +70,7 @@ class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
# deepseek的reasoner模型
- if pipeline_config['trigger'].get('misc', '').get('remove_think'):
+ if remove_think:
chatcmpl_message['content'] = re.sub(
r'.*?', '', chatcmpl_message['content'], flags=re.DOTALL
)
@@ -86,7 +86,7 @@ class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
async def _make_msg_chunk(
self,
- pipeline_config: dict[str, typing.Any],
+ remove_think: bool,
chat_completion: chat_completion.ChatCompletion,
idx: int,
) -> llm_entities.MessageChunk:
@@ -110,7 +110,7 @@ class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
# print(reasoning_content)
# deepseek的reasoner模型
- if pipeline_config['trigger'].get('misc', '').get('remove_think'):
+ if remove_think:
if delta['content'] == '':
self.is_think = True
delta['content'] = ''
@@ -136,6 +136,7 @@ class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
use_model: requester.RuntimeLLMModel,
use_funcs: list[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]:
self.client.api_key = use_model.token_mgr.get_token()
@@ -167,10 +168,9 @@ class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
chunk_idx = 0
self.is_content = False
tool_calls_map: dict[str, llm_entities.ToolCall] = {}
- pipeline_config = query.pipeline_config
async for chunk in self._req_stream(args, extra_body=extra_args):
# 处理流式消息
- delta_message = await self._make_msg_chunk(pipeline_config, chunk, chunk_idx)
+ delta_message = await self._make_msg_chunk(remove_think, chunk, chunk_idx)
if delta_message.content:
current_content += delta_message.content
delta_message.content = current_content
diff --git a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
index 04987c19..7895a87e 100644
--- a/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
+++ b/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
@@ -174,7 +174,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):
async def _make_msg_chunk(
self,
- pipeline_config: dict[str, typing.Any],
+ remove_think: bool,
chat_completion: chat_completion.ChatCompletion,
idx: int,
) -> llm_entities.MessageChunk:
@@ -199,7 +199,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):
# print(reasoning_content)
# deepseek的reasoner模型
- if pipeline_config['trigger'].get('misc', '').get('remove_think'):
+ if remove_think:
if reasoning_content is not None:
pass
else:
@@ -227,6 +227,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):
use_model: requester.RuntimeLLMModel,
use_funcs: list[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]:
self.client.api_key = use_model.token_mgr.get_token()
@@ -258,10 +259,9 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):
chunk_idx = 0
self.is_content = False
tool_calls_map: dict[str, llm_entities.ToolCall] = {}
- pipeline_config = query.pipeline_config
async for chunk in self._req_stream(args, extra_body=extra_args):
# 处理流式消息
- delta_message = await self._make_msg_chunk(pipeline_config, chunk, chunk_idx)
+ delta_message = await self._make_msg_chunk(remove_think, chunk, chunk_idx)
if delta_message.content:
current_content += delta_message.content
delta_message.content = current_content
@@ -296,6 +296,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):
messages: typing.List[llm_entities.Message],
funcs: typing.List[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
@@ -335,6 +336,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):
messages: typing.List[llm_entities.Message],
funcs: typing.List[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.MessageChunk:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
@@ -354,6 +356,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
+ remove_think=remove_think,
):
yield item
diff --git a/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py b/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
index f3621a09..b8c0e950 100644
--- a/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
+++ b/pkg/provider/modelmgr/requesters/moonshotchatcmpl.py
@@ -25,6 +25,7 @@ class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
use_model: requester.RuntimeLLMModel,
use_funcs: list[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message:
self.client.api_key = use_model.token_mgr.get_token()
@@ -54,6 +55,6 @@ class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
resp = await self._req(args, extra_body=extra_args)
# 处理请求结果
- message = await self._make_msg(resp)
+ message = await self._make_msg(resp,remove_think)
return message
diff --git a/pkg/provider/modelmgr/requesters/ollamachat.py b/pkg/provider/modelmgr/requesters/ollamachat.py
index 9e6f5a77..0a8943c0 100644
--- a/pkg/provider/modelmgr/requesters/ollamachat.py
+++ b/pkg/provider/modelmgr/requesters/ollamachat.py
@@ -110,6 +110,7 @@ class OllamaChatCompletions(requester.ProviderAPIRequester):
messages: typing.List[llm_entities.Message],
funcs: typing.List[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message:
req_messages: list = []
for m in messages:
@@ -126,6 +127,7 @@ class OllamaChatCompletions(requester.ProviderAPIRequester):
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
+ remove_think=remove_think,
)
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
diff --git a/pkg/provider/modelmgr/requesters/ppiochatcmpl.py b/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
index 46da6e01..ca49df10 100644
--- a/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
+++ b/pkg/provider/modelmgr/requesters/ppiochatcmpl.py
@@ -27,7 +27,7 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
async def _make_msg(
self,
chat_completion: chat_completion.ChatCompletion,
- pipeline_config: dict[str, typing.Any] = {'trigger': {'misc': {'remove_think': False}}},
+ remove_think: bool,
) -> llm_entities.Message:
chatcmpl_message = chat_completion.choices[0].message.model_dump()
# print(chatcmpl_message.keys(), chatcmpl_message.values())
@@ -39,7 +39,7 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
# deepseek的reasoner模型
- if pipeline_config['trigger'].get('misc', '').get('remove_think'):
+ if remove_think:
chatcmpl_message['content'] = re.sub(
r'.*?', '', chatcmpl_message['content'], flags=re.DOTALL
)
@@ -55,7 +55,7 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
async def _make_msg_chunk(
self,
- pipeline_config: dict[str, typing.Any],
+ remove_think: bool,
chat_completion: chat_completion.ChatCompletion,
idx: int,
) -> llm_entities.MessageChunk:
@@ -79,7 +79,7 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
# print(reasoning_content)
# deepseek的reasoner模型
- if pipeline_config['trigger'].get('misc', '').get('remove_think'):
+ if remove_think:
if '' in delta['content']:
self.is_think = True
delta['content'] = ''
@@ -105,6 +105,7 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
use_model: requester.RuntimeLLMModel,
use_funcs: list[tools_entities.LLMFunction] = None,
extra_args: dict[str, typing.Any] = {},
+ remove_think: bool = False,
) -> llm_entities.Message | typing.AsyncGenerator[llm_entities.MessageChunk, None]:
self.client.api_key = use_model.token_mgr.get_token()
@@ -136,10 +137,9 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
chunk_idx = 0
self.is_content = False
tool_calls_map: dict[str, llm_entities.ToolCall] = {}
- pipeline_config = query.pipeline_config
async for chunk in self._req_stream(args, extra_body=extra_args):
# 处理流式消息
- delta_message = await self._make_msg_chunk(pipeline_config, chunk, chunk_idx)
+ delta_message = await self._make_msg_chunk(remove_think, chunk, chunk_idx)
if delta_message.content:
current_content += delta_message.content
delta_message.content = current_content
diff --git a/pkg/provider/runners/localagent.py b/pkg/provider/runners/localagent.py
index 0d7bdd0a..03a9b43b 100644
--- a/pkg/provider/runners/localagent.py
+++ b/pkg/provider/runners/localagent.py
@@ -92,9 +92,11 @@ class LocalAgentRunner(runner.RequestRunner):
is_stream = query.adapter.is_stream_output_supported()
try:
is_stream = await query.adapter.is_stream_output_supported()
-
except AttributeError:
is_stream = False
+
+ remove_think = self.pipeline_config['trigger'].get('misc', '').get('remove_think')
+
if not is_stream:
# 非流式输出,直接请求
@@ -105,6 +107,7 @@ class LocalAgentRunner(runner.RequestRunner):
req_messages,
query.use_funcs,
extra_args=query.use_llm_model.model_entity.extra_args,
+ remove_think=remove_think,
)
yield msg
final_msg = msg
@@ -118,6 +121,7 @@ class LocalAgentRunner(runner.RequestRunner):
req_messages,
query.use_funcs,
extra_args=query.use_llm_model.model_entity.extra_args,
+ remove_think=remove_think,
):
msg_idx = msg_idx + 1
if msg_idx % 8 == 0 or msg.is_final: