mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-07 14:26:03 +00:00
Merge branch 'rc/new-plugin' into refactor/new-plugin-system
This commit is contained in:
@@ -17,7 +17,7 @@ class LLMModelInfo(pydantic.BaseModel):
|
||||
|
||||
token_mgr: token.TokenManager
|
||||
|
||||
requester: requester.LLMAPIRequester
|
||||
requester: requester.ProviderAPIRequester
|
||||
|
||||
tool_call_supported: typing.Optional[bool] = False
|
||||
|
||||
|
||||
@@ -20,13 +20,16 @@ class ModelManager:
|
||||
|
||||
llm_models: list[requester.RuntimeLLMModel]
|
||||
|
||||
embedding_models: list[requester.RuntimeEmbeddingModel]
|
||||
|
||||
requester_components: list[engine.Component]
|
||||
|
||||
requester_dict: dict[str, type[requester.LLMAPIRequester]] # cache
|
||||
requester_dict: dict[str, type[requester.ProviderAPIRequester]] # cache
|
||||
|
||||
def __init__(self, ap: app.Application):
|
||||
self.ap = ap
|
||||
self.llm_models = []
|
||||
self.embedding_models = []
|
||||
self.requester_components = []
|
||||
self.requester_dict = {}
|
||||
|
||||
@@ -34,7 +37,7 @@ class ModelManager:
|
||||
self.requester_components = self.ap.discover.get_components_by_kind('LLMAPIRequester')
|
||||
|
||||
# forge requester class dict
|
||||
requester_dict: dict[str, type[requester.LLMAPIRequester]] = {}
|
||||
requester_dict: dict[str, type[requester.ProviderAPIRequester]] = {}
|
||||
for component in self.requester_components:
|
||||
requester_dict[component.metadata.name] = component.get_python_component_class()
|
||||
|
||||
@@ -47,13 +50,11 @@ class ModelManager:
|
||||
self.ap.logger.info('Loading models from db...')
|
||||
|
||||
self.llm_models = []
|
||||
self.embedding_models = []
|
||||
|
||||
# llm models
|
||||
result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.LLMModel))
|
||||
|
||||
llm_models = result.all()
|
||||
|
||||
# load models
|
||||
for llm_model in llm_models:
|
||||
try:
|
||||
await self.load_llm_model(llm_model)
|
||||
@@ -62,11 +63,17 @@ class ModelManager:
|
||||
except Exception as e:
|
||||
self.ap.logger.error(f'Failed to load model {llm_model.uuid}: {e}\n{traceback.format_exc()}')
|
||||
|
||||
# embedding models
|
||||
result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.EmbeddingModel))
|
||||
embedding_models = result.all()
|
||||
for embedding_model in embedding_models:
|
||||
await self.load_embedding_model(embedding_model)
|
||||
|
||||
async def init_runtime_llm_model(
|
||||
self,
|
||||
model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict,
|
||||
):
|
||||
"""初始化运行时模型"""
|
||||
"""初始化运行时 LLM 模型"""
|
||||
if isinstance(model_info, sqlalchemy.Row):
|
||||
model_info = persistence_model.LLMModel(**model_info._mapping)
|
||||
elif isinstance(model_info, dict):
|
||||
@@ -90,31 +97,85 @@ class ModelManager:
|
||||
|
||||
return runtime_llm_model
|
||||
|
||||
async def init_runtime_embedding_model(
|
||||
self,
|
||||
model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict,
|
||||
):
|
||||
"""初始化运行时 Embedding 模型"""
|
||||
if isinstance(model_info, sqlalchemy.Row):
|
||||
model_info = persistence_model.EmbeddingModel(**model_info._mapping)
|
||||
elif isinstance(model_info, dict):
|
||||
model_info = persistence_model.EmbeddingModel(**model_info)
|
||||
|
||||
requester_inst = self.requester_dict[model_info.requester](ap=self.ap, config=model_info.requester_config)
|
||||
|
||||
await requester_inst.initialize()
|
||||
|
||||
runtime_embedding_model = requester.RuntimeEmbeddingModel(
|
||||
model_entity=model_info,
|
||||
token_mgr=token.TokenManager(
|
||||
name=model_info.uuid,
|
||||
tokens=model_info.api_keys,
|
||||
),
|
||||
requester=requester_inst,
|
||||
)
|
||||
|
||||
return runtime_embedding_model
|
||||
|
||||
async def load_llm_model(
|
||||
self,
|
||||
model_info: persistence_model.LLMModel | sqlalchemy.Row[persistence_model.LLMModel] | dict,
|
||||
):
|
||||
"""加载模型"""
|
||||
"""加载 LLM 模型"""
|
||||
runtime_llm_model = await self.init_runtime_llm_model(model_info)
|
||||
self.llm_models.append(runtime_llm_model)
|
||||
|
||||
async def load_embedding_model(
|
||||
self,
|
||||
model_info: persistence_model.EmbeddingModel | sqlalchemy.Row[persistence_model.EmbeddingModel] | dict,
|
||||
):
|
||||
"""加载 Embedding 模型"""
|
||||
runtime_embedding_model = await self.init_runtime_embedding_model(model_info)
|
||||
self.embedding_models.append(runtime_embedding_model)
|
||||
|
||||
async def get_model_by_uuid(self, uuid: str) -> requester.RuntimeLLMModel:
|
||||
"""通过uuid获取模型"""
|
||||
"""通过uuid获取 LLM 模型"""
|
||||
for model in self.llm_models:
|
||||
if model.model_entity.uuid == uuid:
|
||||
return model
|
||||
raise ValueError(f'model {uuid} not found')
|
||||
raise ValueError(f'LLM model {uuid} not found')
|
||||
|
||||
async def get_embedding_model_by_uuid(self, uuid: str) -> requester.RuntimeEmbeddingModel:
|
||||
"""通过uuid获取 Embedding 模型"""
|
||||
for model in self.embedding_models:
|
||||
if model.model_entity.uuid == uuid:
|
||||
return model
|
||||
raise ValueError(f'Embedding model {uuid} not found')
|
||||
|
||||
async def remove_llm_model(self, model_uuid: str):
|
||||
"""移除模型"""
|
||||
"""移除 LLM 模型"""
|
||||
for model in self.llm_models:
|
||||
if model.model_entity.uuid == model_uuid:
|
||||
self.llm_models.remove(model)
|
||||
return
|
||||
|
||||
def get_available_requesters_info(self) -> list[dict]:
|
||||
async def remove_embedding_model(self, model_uuid: str):
|
||||
"""移除 Embedding 模型"""
|
||||
for model in self.embedding_models:
|
||||
if model.model_entity.uuid == model_uuid:
|
||||
self.embedding_models.remove(model)
|
||||
return
|
||||
|
||||
def get_available_requesters_info(self, model_type: str) -> list[dict]:
|
||||
"""获取所有可用的请求器"""
|
||||
return [component.to_plain_dict() for component in self.requester_components]
|
||||
if model_type != '':
|
||||
return [
|
||||
component.to_plain_dict()
|
||||
for component in self.requester_components
|
||||
if model_type in component.spec['support_type']
|
||||
]
|
||||
else:
|
||||
return [component.to_plain_dict() for component in self.requester_components]
|
||||
|
||||
def get_available_requester_info_by_name(self, name: str) -> dict | None:
|
||||
"""通过名称获取请求器信息"""
|
||||
|
||||
@@ -20,22 +20,45 @@ class RuntimeLLMModel:
|
||||
token_mgr: token.TokenManager
|
||||
"""api key管理器"""
|
||||
|
||||
requester: LLMAPIRequester
|
||||
requester: ProviderAPIRequester
|
||||
"""请求器实例"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_entity: persistence_model.LLMModel,
|
||||
token_mgr: token.TokenManager,
|
||||
requester: LLMAPIRequester,
|
||||
requester: ProviderAPIRequester,
|
||||
):
|
||||
self.model_entity = model_entity
|
||||
self.token_mgr = token_mgr
|
||||
self.requester = requester
|
||||
|
||||
|
||||
class LLMAPIRequester(metaclass=abc.ABCMeta):
|
||||
"""LLM API请求器"""
|
||||
class RuntimeEmbeddingModel:
|
||||
"""运行时 Embedding 模型"""
|
||||
|
||||
model_entity: persistence_model.EmbeddingModel
|
||||
"""模型数据"""
|
||||
|
||||
token_mgr: token.TokenManager
|
||||
"""api key管理器"""
|
||||
|
||||
requester: ProviderAPIRequester
|
||||
"""请求器实例"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_entity: persistence_model.EmbeddingModel,
|
||||
token_mgr: token.TokenManager,
|
||||
requester: ProviderAPIRequester,
|
||||
):
|
||||
self.model_entity = model_entity
|
||||
self.token_mgr = token_mgr
|
||||
self.requester = requester
|
||||
|
||||
|
||||
class ProviderAPIRequester(metaclass=abc.ABCMeta):
|
||||
"""Provider API请求器"""
|
||||
|
||||
name: str = None
|
||||
|
||||
@@ -61,6 +84,7 @@ class LLMAPIRequester(metaclass=abc.ABCMeta):
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
"""调用API
|
||||
|
||||
@@ -69,8 +93,50 @@ class LLMAPIRequester(metaclass=abc.ABCMeta):
|
||||
messages (typing.List[llm_entities.Message]): 消息对象列表
|
||||
funcs (typing.List[tools_entities.LLMFunction], optional): 使用的工具函数列表. Defaults to None.
|
||||
extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
|
||||
remove_think (bool, optional): 是否移思考中的消息. Defaults to False.
|
||||
|
||||
Returns:
|
||||
llm_entities.Message: 返回消息对象
|
||||
"""
|
||||
pass
|
||||
|
||||
async def invoke_llm_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
"""调用API
|
||||
|
||||
Args:
|
||||
model (RuntimeLLMModel): 使用的模型信息
|
||||
messages (typing.List[provider_message.Message]): 消息对象列表
|
||||
funcs (typing.List[resource_tool.LLMTool], optional): 使用的工具函数列表. Defaults to None.
|
||||
extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
|
||||
remove_think (bool, optional): 是否移除思考中的消息. Defaults to False.
|
||||
|
||||
Returns:
|
||||
typing.AsyncGenerator[provider_message.MessageChunk]: 返回消息对象
|
||||
"""
|
||||
pass
|
||||
|
||||
async def invoke_embedding(
|
||||
self,
|
||||
model: RuntimeEmbeddingModel,
|
||||
input_text: typing.List[str],
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
) -> typing.List[typing.List[float]]:
|
||||
"""调用 Embedding API
|
||||
|
||||
Args:
|
||||
model (RuntimeEmbeddingModel): 使用的模型信息
|
||||
input_text (typing.List[str]): 输入文本
|
||||
extra_args (dict[str, typing.Any], optional): 额外的参数. Defaults to {}.
|
||||
|
||||
Returns:
|
||||
typing.List[typing.List[float]]: 返回的 embedding 向量
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -7,7 +7,7 @@ from . import chatcmpl
|
||||
|
||||
|
||||
class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""302 AI ChatCompletion API 请求器"""
|
||||
"""302.AI ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@ kind: LLMAPIRequester
|
||||
metadata:
|
||||
name: 302-ai-chat-completions
|
||||
label:
|
||||
en_US: 302 AI
|
||||
zh_Hans: 302 AI
|
||||
en_US: 302.AI
|
||||
zh_Hans: 302.AI
|
||||
icon: 302ai.png
|
||||
spec:
|
||||
config:
|
||||
@@ -22,6 +22,9 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./302aichatcmpl.py
|
||||
|
||||
@@ -15,13 +15,13 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
class AnthropicMessages(requester.LLMAPIRequester):
|
||||
class AnthropicMessages(requester.ProviderAPIRequester):
|
||||
"""Anthropic Messages API 请求器"""
|
||||
|
||||
client: anthropic.AsyncAnthropic
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.anthropic.com/v1',
|
||||
'base_url': 'https://api.anthropic.com',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
@@ -44,6 +44,7 @@ class AnthropicMessages(requester.LLMAPIRequester):
|
||||
self.client = anthropic.AsyncAnthropic(
|
||||
api_key='',
|
||||
http_client=httpx_client,
|
||||
base_url=self.requester_cfg['base_url'],
|
||||
)
|
||||
|
||||
async def invoke_llm(
|
||||
@@ -53,6 +54,7 @@ class AnthropicMessages(requester.LLMAPIRequester):
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
self.client.api_key = model.token_mgr.get_token()
|
||||
|
||||
@@ -89,7 +91,8 @@ class AnthropicMessages(requester.LLMAPIRequester):
|
||||
{
|
||||
'type': 'tool_result',
|
||||
'tool_use_id': tool_call_id,
|
||||
'content': m.content,
|
||||
'is_error': False,
|
||||
'content': [{'type': 'text', 'text': m.content}],
|
||||
}
|
||||
],
|
||||
}
|
||||
@@ -133,6 +136,9 @@ class AnthropicMessages(requester.LLMAPIRequester):
|
||||
|
||||
args['messages'] = req_messages
|
||||
|
||||
if 'thinking' in args:
|
||||
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
|
||||
|
||||
if funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
|
||||
|
||||
@@ -140,19 +146,17 @@ class AnthropicMessages(requester.LLMAPIRequester):
|
||||
args['tools'] = tools
|
||||
|
||||
try:
|
||||
# print(json.dumps(args, indent=4, ensure_ascii=False))
|
||||
resp = await self.client.messages.create(**args)
|
||||
|
||||
args = {
|
||||
'content': '',
|
||||
'role': resp.role,
|
||||
}
|
||||
|
||||
assert type(resp) is anthropic.types.message.Message
|
||||
|
||||
for block in resp.content:
|
||||
if block.type == 'thinking':
|
||||
args['content'] = '<think>' + block.thinking + '</think>\n' + args['content']
|
||||
if not remove_think and block.type == 'thinking':
|
||||
args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
|
||||
elif block.type == 'text':
|
||||
args['content'] += block.text
|
||||
elif block.type == 'tool_use':
|
||||
@@ -176,3 +180,191 @@ class AnthropicMessages(requester.LLMAPIRequester):
|
||||
raise errors.RequesterError(f'模型无效: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求地址无效: {e.message}')
|
||||
|
||||
async def invoke_llm_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
self.client.api_key = model.token_mgr.get_token()
|
||||
|
||||
args = extra_args.copy()
|
||||
args['model'] = model.model_entity.name
|
||||
args['stream'] = True
|
||||
|
||||
# 处理消息
|
||||
|
||||
# system
|
||||
system_role_message = None
|
||||
|
||||
for i, m in enumerate(messages):
|
||||
if m.role == 'system':
|
||||
system_role_message = m
|
||||
|
||||
break
|
||||
|
||||
if system_role_message:
|
||||
messages.pop(i)
|
||||
|
||||
if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
|
||||
args['system'] = system_role_message.content
|
||||
|
||||
req_messages = []
|
||||
|
||||
for m in messages:
|
||||
if m.role == 'tool':
|
||||
tool_call_id = m.tool_call_id
|
||||
|
||||
req_messages.append(
|
||||
{
|
||||
'role': 'user',
|
||||
'content': [
|
||||
{
|
||||
'type': 'tool_result',
|
||||
'tool_use_id': tool_call_id,
|
||||
'is_error': False, # 暂时直接写false
|
||||
'content': [
|
||||
{'type': 'text', 'text': m.content}
|
||||
], # 这里要是list包裹,应该是多个返回的情况?type类型好像也可以填其他的,暂时只写text
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
continue
|
||||
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
|
||||
if isinstance(m.content, str) and m.content.strip() != '':
|
||||
msg_dict['content'] = [{'type': 'text', 'text': m.content}]
|
||||
elif isinstance(m.content, list):
|
||||
for i, ce in enumerate(m.content):
|
||||
if ce.type == 'image_base64':
|
||||
image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
|
||||
|
||||
alter_image_ele = {
|
||||
'type': 'image',
|
||||
'source': {
|
||||
'type': 'base64',
|
||||
'media_type': f'image/{image_format}',
|
||||
'data': image_b64,
|
||||
},
|
||||
}
|
||||
msg_dict['content'][i] = alter_image_ele
|
||||
if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
|
||||
msg_dict['content'] = [] # 这里不知道为什么会莫名有个空导致content为字符
|
||||
if m.tool_calls:
|
||||
for tool_call in m.tool_calls:
|
||||
msg_dict['content'].append(
|
||||
{
|
||||
'type': 'tool_use',
|
||||
'id': tool_call.id,
|
||||
'name': tool_call.function.name,
|
||||
'input': json.loads(tool_call.function.arguments),
|
||||
}
|
||||
)
|
||||
|
||||
del msg_dict['tool_calls']
|
||||
|
||||
req_messages.append(msg_dict)
|
||||
if 'thinking' in args:
|
||||
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
|
||||
|
||||
args['messages'] = req_messages
|
||||
|
||||
if funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
try:
|
||||
role = 'assistant' # 默认角色
|
||||
# chunk_idx = 0
|
||||
think_started = False
|
||||
think_ended = False
|
||||
finish_reason = False
|
||||
content = ''
|
||||
tool_name = ''
|
||||
tool_id = ''
|
||||
async for chunk in await self.client.messages.create(**args):
|
||||
tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
|
||||
if isinstance(
|
||||
chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
|
||||
): # 记录开始
|
||||
if chunk.content_block.type == 'tool_use':
|
||||
if chunk.content_block.name is not None:
|
||||
tool_name = chunk.content_block.name
|
||||
if chunk.content_block.id is not None:
|
||||
tool_id = chunk.content_block.id
|
||||
|
||||
tool_call['function']['name'] = tool_name
|
||||
tool_call['function']['arguments'] = ''
|
||||
tool_call['id'] = tool_id
|
||||
|
||||
if not remove_think:
|
||||
if chunk.content_block.type == 'thinking' and not remove_think:
|
||||
think_started = True
|
||||
elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
|
||||
think_ended = True
|
||||
continue
|
||||
elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
|
||||
if chunk.delta.type == 'thinking_delta':
|
||||
if think_started:
|
||||
think_started = False
|
||||
content = '<think>\n' + chunk.delta.thinking
|
||||
elif remove_think:
|
||||
continue
|
||||
else:
|
||||
content = chunk.delta.thinking
|
||||
elif chunk.delta.type == 'text_delta':
|
||||
if think_ended:
|
||||
think_ended = False
|
||||
content = '\n</think>\n' + chunk.delta.text
|
||||
else:
|
||||
content = chunk.delta.text
|
||||
elif chunk.delta.type == 'input_json_delta':
|
||||
tool_call['function']['arguments'] = chunk.delta.partial_json
|
||||
tool_call['function']['name'] = tool_name
|
||||
tool_call['id'] = tool_id
|
||||
elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
|
||||
continue # 记录raw_content_block结束的
|
||||
|
||||
elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
|
||||
if chunk.delta.stop_reason == 'end_turn':
|
||||
finish_reason = True
|
||||
elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
|
||||
continue # 这个好像是完全结束
|
||||
else:
|
||||
# print(chunk)
|
||||
self.ap.logger.debug(f'anthropic chunk: {chunk}')
|
||||
continue
|
||||
|
||||
args = {
|
||||
'content': content,
|
||||
'role': role,
|
||||
'is_final': finish_reason,
|
||||
'tool_calls': None if tool_call['id'] is None else [tool_call],
|
||||
}
|
||||
# if chunk_idx == 0:
|
||||
# chunk_idx += 1
|
||||
# continue
|
||||
|
||||
# assert type(chunk) is anthropic.types.message.Chunk
|
||||
|
||||
yield provider_message.MessageChunk(**args)
|
||||
|
||||
# return llm_entities.Message(**args)
|
||||
except anthropic.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'api-key 无效: {e.message}')
|
||||
except anthropic.BadRequestError as e:
|
||||
raise errors.RequesterError(str(e.message))
|
||||
except anthropic.NotFoundError as e:
|
||||
if 'model: ' in str(e):
|
||||
raise errors.RequesterError(f'模型无效: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求地址无效: {e.message}')
|
||||
|
||||
@@ -14,7 +14,7 @@ spec:
|
||||
zh_Hans: 基础 URL
|
||||
type: string
|
||||
required: true
|
||||
default: "https://api.anthropic.com/v1"
|
||||
default: "https://api.anthropic.com"
|
||||
- name: timeout
|
||||
label:
|
||||
en_US: Timeout
|
||||
@@ -22,6 +22,8 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./anthropicmsgs.py
|
||||
|
||||
@@ -22,6 +22,8 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./bailianchatcmpl.py
|
||||
|
||||
@@ -13,7 +13,7 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
class OpenAIChatCompletions(requester.LLMAPIRequester):
|
||||
class OpenAIChatCompletions(requester.ProviderAPIRequester):
|
||||
"""OpenAI ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
@@ -38,9 +38,18 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
|
||||
) -> chat_completion.ChatCompletion:
|
||||
return await self.client.chat.completions.create(**args, extra_body=extra_body)
|
||||
|
||||
async def _req_stream(
|
||||
self,
|
||||
args: dict,
|
||||
extra_body: dict = {},
|
||||
):
|
||||
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
|
||||
yield chunk
|
||||
|
||||
async def _make_msg(
|
||||
self,
|
||||
chat_completion: chat_completion.ChatCompletion,
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
chatcmpl_message = chat_completion.choices[0].message.model_dump()
|
||||
|
||||
@@ -48,16 +57,191 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
|
||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
||||
chatcmpl_message['role'] = 'assistant'
|
||||
|
||||
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
|
||||
# 处理思维链
|
||||
content = chatcmpl_message.get('content', '')
|
||||
reasoning_content = chatcmpl_message.get('reasoning_content', None)
|
||||
|
||||
# deepseek的reasoner模型
|
||||
if reasoning_content is not None:
|
||||
chatcmpl_message['content'] = '<think>\n' + reasoning_content + '\n</think>\n' + chatcmpl_message['content']
|
||||
processed_content, _ = await self._process_thinking_content(
|
||||
content=content, reasoning_content=reasoning_content, remove_think=remove_think
|
||||
)
|
||||
|
||||
chatcmpl_message['content'] = processed_content
|
||||
|
||||
# 移除 reasoning_content 字段,避免传递给 Message
|
||||
if 'reasoning_content' in chatcmpl_message:
|
||||
del chatcmpl_message['reasoning_content']
|
||||
|
||||
message = provider_message.Message(**chatcmpl_message)
|
||||
|
||||
return message
|
||||
|
||||
async def _process_thinking_content(
|
||||
self,
|
||||
content: str,
|
||||
reasoning_content: str = None,
|
||||
remove_think: bool = False,
|
||||
) -> tuple[str, str]:
|
||||
"""处理思维链内容
|
||||
|
||||
Args:
|
||||
content: 原始内容
|
||||
reasoning_content: reasoning_content 字段内容
|
||||
remove_think: 是否移除思维链
|
||||
|
||||
Returns:
|
||||
(处理后的内容, 提取的思维链内容)
|
||||
"""
|
||||
thinking_content = ''
|
||||
|
||||
# 1. 从 reasoning_content 提取思维链
|
||||
if reasoning_content:
|
||||
thinking_content = reasoning_content
|
||||
|
||||
# 2. 从 content 中提取 <think> 标签内容
|
||||
if content and '<think>' in content and '</think>' in content:
|
||||
import re
|
||||
|
||||
think_pattern = r'<think>(.*?)</think>'
|
||||
think_matches = re.findall(think_pattern, content, re.DOTALL)
|
||||
if think_matches:
|
||||
# 如果已有 reasoning_content,则追加
|
||||
if thinking_content:
|
||||
thinking_content += '\n' + '\n'.join(think_matches)
|
||||
else:
|
||||
thinking_content = '\n'.join(think_matches)
|
||||
# 移除 content 中的 <think> 标签
|
||||
content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
|
||||
|
||||
# 3. 根据 remove_think 参数决定是否保留思维链
|
||||
if remove_think:
|
||||
return content, ''
|
||||
else:
|
||||
# 如果有思维链内容,将其以 <think> 格式添加到 content 开头
|
||||
if thinking_content:
|
||||
content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
|
||||
return content, thinking_content
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
self.client.api_key = use_model.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# 流式处理状态
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
tool_id = ''
|
||||
tool_name = ''
|
||||
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
|
||||
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
# 处理 reasoning_content
|
||||
if reasoning_content:
|
||||
# accumulated_reasoning += reasoning_content
|
||||
# 如果设置了 remove_think,跳过 reasoning_content
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
delta_content = '<think>\n' + reasoning_content
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
delta_content = reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta_content:
|
||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
||||
thinking_ended = True
|
||||
delta_content = '\n</think>\n' + delta_content
|
||||
|
||||
# 处理 content 中已有的 <think> 标签(如果需要移除)
|
||||
# if delta_content and remove_think and '<think>' in delta_content:
|
||||
# import re
|
||||
#
|
||||
# # 移除 <think> 标签及其内容
|
||||
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
|
||||
|
||||
# 处理工具调用增量
|
||||
# delta_tool_calls = None
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] and tool_call['function']['name']:
|
||||
tool_id = tool_call['id']
|
||||
tool_name = tool_call['function']['name']
|
||||
else:
|
||||
tool_call['id'] = tool_id
|
||||
tool_call['function']['name'] = tool_name
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
|
||||
async def _closure(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
@@ -65,6 +249,7 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
self.client.api_key = use_model.token_mgr.get_token()
|
||||
|
||||
@@ -92,10 +277,10 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
|
||||
args['messages'] = messages
|
||||
|
||||
# 发送请求
|
||||
resp = await self._req(args, extra_body=extra_args)
|
||||
|
||||
resp = await self._req(args, extra_body=extra_args)
|
||||
# 处理请求结果
|
||||
message = await self._make_msg(resp)
|
||||
message = await self._make_msg(resp, remove_think)
|
||||
|
||||
return message
|
||||
|
||||
@@ -106,6 +291,7 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
||||
for m in messages:
|
||||
@@ -119,13 +305,90 @@ class OpenAIChatCompletions(requester.LLMAPIRequester):
|
||||
req_messages.append(msg_dict)
|
||||
|
||||
try:
|
||||
return await self._closure(
|
||||
msg = await self._closure(
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
return msg
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
if 'context_length_exceeded' in e.message:
|
||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
||||
except openai.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
||||
except openai.NotFoundError as e:
|
||||
raise errors.RequesterError(f'请求路径错误: {e.message}')
|
||||
except openai.RateLimitError as e:
|
||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
||||
except openai.APIError as e:
|
||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
||||
|
||||
async def invoke_embedding(
|
||||
self,
|
||||
model: requester.RuntimeEmbeddingModel,
|
||||
input_text: list[str],
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
) -> list[list[float]]:
|
||||
"""调用 Embedding API"""
|
||||
self.client.api_key = model.token_mgr.get_token()
|
||||
|
||||
args = {
|
||||
'model': model.model_entity.name,
|
||||
'input': input_text,
|
||||
}
|
||||
|
||||
if model.model_entity.extra_args:
|
||||
args.update(model.model_entity.extra_args)
|
||||
|
||||
args.update(extra_args)
|
||||
|
||||
try:
|
||||
resp = await self.client.embeddings.create(**args)
|
||||
|
||||
return [d.embedding for d in resp.data]
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
||||
|
||||
async def invoke_llm_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
||||
for m in messages:
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
content = msg_dict.get('content')
|
||||
if isinstance(content, list):
|
||||
# 检查 content 列表中是否每个部分都是文本
|
||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
||||
# 将所有文本部分合并为一个字符串
|
||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
||||
req_messages.append(msg_dict)
|
||||
|
||||
try:
|
||||
async for item in self._closure_stream(
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
):
|
||||
yield item
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
|
||||
@@ -22,6 +22,9 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./chatcmpl.py
|
||||
|
||||
BIN
pkg/provider/modelmgr/requesters/compshare.png
Normal file
BIN
pkg/provider/modelmgr/requesters/compshare.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 59 KiB |
17
pkg/provider/modelmgr/requesters/compsharechatcmpl.py
Normal file
17
pkg/provider/modelmgr/requesters/compsharechatcmpl.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""CompShare ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.modelverse.cn/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
30
pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
Normal file
30
pkg/provider/modelmgr/requesters/compsharechatcmpl.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
apiVersion: v1
|
||||
kind: LLMAPIRequester
|
||||
metadata:
|
||||
name: compshare-chat-completions
|
||||
label:
|
||||
en_US: CompShare
|
||||
zh_Hans: 优云智算
|
||||
icon: compshare.png
|
||||
spec:
|
||||
config:
|
||||
- name: base_url
|
||||
label:
|
||||
en_US: Base URL
|
||||
zh_Hans: 基础 URL
|
||||
type: string
|
||||
required: true
|
||||
default: "https://api.modelverse.cn/v1"
|
||||
- name: timeout
|
||||
label:
|
||||
en_US: Timeout
|
||||
zh_Hans: 超时时间
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./compsharechatcmpl.py
|
||||
attr: CompShareChatCompletions
|
||||
@@ -24,6 +24,7 @@ class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
self.client.api_key = use_model.token_mgr.get_token()
|
||||
|
||||
@@ -49,10 +50,11 @@ class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
# 发送请求
|
||||
resp = await self._req(args, extra_body=extra_args)
|
||||
|
||||
# print(resp)
|
||||
|
||||
if resp is None:
|
||||
raise errors.RequesterError('接口返回为空,请确定模型提供商服务是否正常')
|
||||
|
||||
# 处理请求结果
|
||||
message = await self._make_msg(resp)
|
||||
message = await self._make_msg(resp, remove_think)
|
||||
|
||||
return message
|
||||
|
||||
@@ -4,7 +4,7 @@ metadata:
|
||||
name: deepseek-chat-completions
|
||||
label:
|
||||
en_US: DeepSeek
|
||||
zh_Hans: 深度求索
|
||||
zh_Hans: DeepSeek
|
||||
icon: deepseek.svg
|
||||
spec:
|
||||
config:
|
||||
@@ -22,6 +22,8 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./deepseekchatcmpl.py
|
||||
|
||||
@@ -4,6 +4,13 @@ import typing
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
import uuid
|
||||
|
||||
from .. import requester
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
|
||||
|
||||
class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""Google Gemini API 请求器"""
|
||||
@@ -12,3 +19,124 @@ class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
self.client.api_key = use_model.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# 流式处理状态
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
tool_id = ''
|
||||
tool_name = ''
|
||||
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
|
||||
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
# 处理 reasoning_content
|
||||
if reasoning_content:
|
||||
# accumulated_reasoning += reasoning_content
|
||||
# 如果设置了 remove_think,跳过 reasoning_content
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
delta_content = '<think>\n' + reasoning_content
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
delta_content = reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta_content:
|
||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
||||
thinking_ended = True
|
||||
delta_content = '\n</think>\n' + delta_content
|
||||
|
||||
# 处理 content 中已有的 <think> 标签(如果需要移除)
|
||||
# if delta_content and remove_think and '<think>' in delta_content:
|
||||
# import re
|
||||
#
|
||||
# # 移除 <think> 标签及其内容
|
||||
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
|
||||
|
||||
# 处理工具调用增量
|
||||
# delta_tool_calls = None
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] == '' and tool_id == '':
|
||||
tool_id = str(uuid.uuid4())
|
||||
if tool_call['function']['name']:
|
||||
tool_name = tool_call['function']['name']
|
||||
tool_call['id'] = tool_id
|
||||
tool_call['function']['name'] = tool_name
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
|
||||
@@ -22,6 +22,8 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./geminichatcmpl.py
|
||||
|
||||
@@ -3,49 +3,13 @@ from __future__ import annotations
|
||||
|
||||
import typing
|
||||
|
||||
from . import chatcmpl
|
||||
from .. import requester
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
from . import ppiochatcmpl
|
||||
|
||||
|
||||
class GiteeAIChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
|
||||
"""Gitee AI ChatCompletions API 请求器"""
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://ai.gitee.com/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def _closure(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
) -> provider_message.Message:
|
||||
self.client.api_key = use_model.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# gitee 不支持多模态,把content都转换成纯文字
|
||||
for m in req_messages:
|
||||
if 'content' in m and isinstance(m['content'], list):
|
||||
m['content'] = ' '.join([c['text'] for c in m['content']])
|
||||
|
||||
args['messages'] = req_messages
|
||||
|
||||
resp = await self._req(args, extra_body=extra_args)
|
||||
|
||||
message = await self._make_msg(resp)
|
||||
|
||||
return message
|
||||
|
||||
@@ -22,6 +22,9 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./giteeaichatcmpl.py
|
||||
|
||||
@@ -22,6 +22,9 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./lmstudiochatcmpl.py
|
||||
|
||||
@@ -5,7 +5,6 @@ import typing
|
||||
|
||||
import openai
|
||||
import openai.types.chat.chat_completion as chat_completion
|
||||
import openai.types.chat.chat_completion_message_tool_call as chat_completion_message_tool_call
|
||||
import httpx
|
||||
|
||||
from .. import entities, errors, requester
|
||||
@@ -14,7 +13,7 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
class ModelScopeChatCompletions(requester.LLMAPIRequester):
|
||||
class ModelScopeChatCompletions(requester.ProviderAPIRequester):
|
||||
"""ModelScope ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
@@ -34,9 +33,11 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
|
||||
|
||||
async def _req(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
args: dict,
|
||||
extra_body: dict = {},
|
||||
) -> chat_completion.ChatCompletion:
|
||||
remove_think: bool = False,
|
||||
) -> list[dict[str, typing.Any]]:
|
||||
args['stream'] = True
|
||||
|
||||
chunk = None
|
||||
@@ -47,73 +48,71 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
|
||||
|
||||
resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)
|
||||
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
tool_id = ''
|
||||
tool_name = ''
|
||||
message_delta = {}
|
||||
async for chunk in resp_gen:
|
||||
# print(chunk)
|
||||
if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
|
||||
continue
|
||||
|
||||
if chunk.choices[0].delta.content is not None:
|
||||
pending_content += chunk.choices[0].delta.content
|
||||
delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
|
||||
reasoning_content = delta.get('reasoning_content')
|
||||
# 处理 reasoning_content
|
||||
if reasoning_content:
|
||||
# accumulated_reasoning += reasoning_content
|
||||
# 如果设置了 remove_think,跳过 reasoning_content
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
if chunk.choices[0].delta.tool_calls is not None:
|
||||
for tool_call in chunk.choices[0].delta.tool_calls:
|
||||
if tool_call.function.arguments is None:
|
||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
pending_content += '<think>\n' + reasoning_content
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
pending_content += reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta.get('content'):
|
||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
||||
thinking_ended = True
|
||||
pending_content += '\n</think>\n' + delta.get('content')
|
||||
|
||||
if delta.get('content') is not None:
|
||||
pending_content += delta.get('content')
|
||||
|
||||
if delta.get('tool_calls') is not None:
|
||||
for tool_call in delta.get('tool_calls'):
|
||||
if tool_call['id'] != '':
|
||||
tool_id = tool_call['id']
|
||||
if tool_call['function']['name'] is not None:
|
||||
tool_name = tool_call['function']['name']
|
||||
if tool_call['function']['arguments'] is None:
|
||||
continue
|
||||
tool_call['id'] = tool_id
|
||||
tool_call['name'] = tool_name
|
||||
for tc in tool_calls:
|
||||
if tc.index == tool_call.index:
|
||||
tc.function.arguments += tool_call.function.arguments
|
||||
if tc['index'] == tool_call['index']:
|
||||
tc['function']['arguments'] += tool_call['function']['arguments']
|
||||
break
|
||||
else:
|
||||
tool_calls.append(tool_call)
|
||||
|
||||
if chunk.choices[0].finish_reason is not None:
|
||||
break
|
||||
message_delta['content'] = pending_content
|
||||
message_delta['role'] = 'assistant'
|
||||
|
||||
real_tool_calls = []
|
||||
|
||||
for tc in tool_calls:
|
||||
function = chat_completion_message_tool_call.Function(
|
||||
name=tc.function.name, arguments=tc.function.arguments
|
||||
)
|
||||
real_tool_calls.append(
|
||||
chat_completion_message_tool_call.ChatCompletionMessageToolCall(
|
||||
id=tc.id, function=function, type='function'
|
||||
)
|
||||
)
|
||||
|
||||
return (
|
||||
chat_completion.ChatCompletion(
|
||||
id=chunk.id,
|
||||
object='chat.completion',
|
||||
created=chunk.created,
|
||||
choices=[
|
||||
chat_completion.Choice(
|
||||
index=0,
|
||||
message=chat_completion.ChatCompletionMessage(
|
||||
role='assistant',
|
||||
content=pending_content,
|
||||
tool_calls=real_tool_calls if len(real_tool_calls) > 0 else None,
|
||||
),
|
||||
finish_reason=chunk.choices[0].finish_reason
|
||||
if hasattr(chunk.choices[0], 'finish_reason') and chunk.choices[0].finish_reason is not None
|
||||
else 'stop',
|
||||
logprobs=chunk.choices[0].logprobs,
|
||||
)
|
||||
],
|
||||
model=chunk.model,
|
||||
service_tier=chunk.service_tier if hasattr(chunk, 'service_tier') else None,
|
||||
system_fingerprint=chunk.system_fingerprint if hasattr(chunk, 'system_fingerprint') else None,
|
||||
usage=chunk.usage if hasattr(chunk, 'usage') else None,
|
||||
)
|
||||
if chunk
|
||||
else None
|
||||
)
|
||||
message_delta['tool_calls'] = tool_calls if tool_calls else None
|
||||
return [message_delta]
|
||||
|
||||
async def _make_msg(
|
||||
self,
|
||||
chat_completion: chat_completion.ChatCompletion,
|
||||
chat_completion: list[dict[str, typing.Any]],
|
||||
) -> provider_message.Message:
|
||||
chatcmpl_message = chat_completion.choices[0].message.dict()
|
||||
chatcmpl_message = chat_completion[0]
|
||||
|
||||
# 确保 role 字段存在且不为 None
|
||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
||||
@@ -130,6 +129,7 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
self.client.api_key = use_model.token_mgr.get_token()
|
||||
|
||||
@@ -157,13 +157,145 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
|
||||
args['messages'] = messages
|
||||
|
||||
# 发送请求
|
||||
resp = await self._req(args, extra_body=extra_args)
|
||||
resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)
|
||||
|
||||
# 处理请求结果
|
||||
message = await self._make_msg(resp)
|
||||
|
||||
return message
|
||||
|
||||
async def _req_stream(
|
||||
self,
|
||||
args: dict,
|
||||
extra_body: dict = {},
|
||||
) -> chat_completion.ChatCompletion:
|
||||
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
|
||||
yield chunk
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
||||
self.client.api_key = use_model.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# 流式处理状态
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
|
||||
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
# 处理 reasoning_content
|
||||
if reasoning_content:
|
||||
# accumulated_reasoning += reasoning_content
|
||||
# 如果设置了 remove_think,跳过 reasoning_content
|
||||
if remove_think:
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
||||
if not thinking_started:
|
||||
thinking_started = True
|
||||
delta_content = '<think>\n' + reasoning_content
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
delta_content = reasoning_content
|
||||
elif thinking_started and not thinking_ended and delta_content:
|
||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
||||
thinking_ended = True
|
||||
delta_content = '\n</think>\n' + delta_content
|
||||
|
||||
# 处理 content 中已有的 <think> 标签(如果需要移除)
|
||||
# if delta_content and remove_think and '<think>' in delta_content:
|
||||
# import re
|
||||
#
|
||||
# # 移除 <think> 标签及其内容
|
||||
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
|
||||
|
||||
# 处理工具调用增量
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] != '':
|
||||
tool_id = tool_call['id']
|
||||
if tool_call['function']['name'] is not None:
|
||||
tool_name = tool_call['function']['name']
|
||||
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
tool_call['id'] = tool_id
|
||||
tool_call['function']['name'] = tool_name
|
||||
tool_call['function']['arguments'] = (
|
||||
'' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
|
||||
)
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
# return
|
||||
|
||||
async def invoke_llm(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
@@ -171,6 +303,7 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
||||
for m in messages:
|
||||
@@ -185,7 +318,12 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
|
||||
|
||||
try:
|
||||
return await self._closure(
|
||||
query=query, req_messages=req_messages, use_model=model, use_funcs=funcs, extra_args=extra_args
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
@@ -202,3 +340,50 @@ class ModelScopeChatCompletions(requester.LLMAPIRequester):
|
||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
||||
except openai.APIError as e:
|
||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
||||
|
||||
async def invoke_llm_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: requester.RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
||||
for m in messages:
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
content = msg_dict.get('content')
|
||||
if isinstance(content, list):
|
||||
# 检查 content 列表中是否每个部分都是文本
|
||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
||||
# 将所有文本部分合并为一个字符串
|
||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
||||
req_messages.append(msg_dict)
|
||||
|
||||
try:
|
||||
async for item in self._closure_stream(
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
):
|
||||
yield item
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
if 'context_length_exceeded' in e.message:
|
||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
||||
except openai.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
||||
except openai.NotFoundError as e:
|
||||
raise errors.RequesterError(f'请求路径错误: {e.message}')
|
||||
except openai.RateLimitError as e:
|
||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
||||
except openai.APIError as e:
|
||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
||||
|
||||
@@ -29,6 +29,8 @@ spec:
|
||||
type: int
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./modelscopechatcmpl.py
|
||||
|
||||
@@ -25,6 +25,7 @@ class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
self.client.api_key = use_model.token_mgr.get_token()
|
||||
|
||||
@@ -54,6 +55,6 @@ class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
resp = await self._req(args, extra_body=extra_args)
|
||||
|
||||
# 处理请求结果
|
||||
message = await self._make_msg(resp)
|
||||
message = await self._make_msg(resp, remove_think)
|
||||
|
||||
return message
|
||||
|
||||
@@ -14,7 +14,7 @@ spec:
|
||||
zh_Hans: 基础 URL
|
||||
type: string
|
||||
required: true
|
||||
default: "https://api.moonshot.com/v1"
|
||||
default: "https://api.moonshot.ai/v1"
|
||||
- name: timeout
|
||||
label:
|
||||
en_US: Timeout
|
||||
@@ -22,6 +22,8 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./moonshotchatcmpl.py
|
||||
|
||||
BIN
pkg/provider/modelmgr/requesters/newapi.png
Normal file
BIN
pkg/provider/modelmgr/requesters/newapi.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 9.4 KiB |
17
pkg/provider/modelmgr/requesters/newapichatcmpl.py
Normal file
17
pkg/provider/modelmgr/requesters/newapichatcmpl.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
import openai
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""New API ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'http://localhost:3000/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
31
pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
Normal file
31
pkg/provider/modelmgr/requesters/newapichatcmpl.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
apiVersion: v1
|
||||
kind: LLMAPIRequester
|
||||
metadata:
|
||||
name: new-api-chat-completions
|
||||
label:
|
||||
en_US: New API
|
||||
zh_Hans: New API
|
||||
icon: newapi.png
|
||||
spec:
|
||||
config:
|
||||
- name: base_url
|
||||
label:
|
||||
en_US: Base URL
|
||||
zh_Hans: 基础 URL
|
||||
type: string
|
||||
required: true
|
||||
default: "http://localhost:3000/v1"
|
||||
- name: timeout
|
||||
label:
|
||||
en_US: Timeout
|
||||
zh_Hans: 超时时间
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./newapichatcmpl.py
|
||||
attr: NewAPIChatCompletions
|
||||
@@ -17,7 +17,7 @@ import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
REQUESTER_NAME: str = 'ollama-chat'
|
||||
|
||||
|
||||
class OllamaChatCompletions(requester.LLMAPIRequester):
|
||||
class OllamaChatCompletions(requester.ProviderAPIRequester):
|
||||
"""Ollama平台 ChatCompletion API请求器"""
|
||||
|
||||
client: ollama.AsyncClient
|
||||
@@ -44,6 +44,7 @@ class OllamaChatCompletions(requester.LLMAPIRequester):
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
args = extra_args.copy()
|
||||
args['model'] = use_model.model_entity.name
|
||||
@@ -110,6 +111,7 @@ class OllamaChatCompletions(requester.LLMAPIRequester):
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
req_messages: list = []
|
||||
for m in messages:
|
||||
@@ -126,6 +128,21 @@ class OllamaChatCompletions(requester.LLMAPIRequester):
|
||||
use_model=model,
|
||||
use_funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
|
||||
async def invoke_embedding(
|
||||
self,
|
||||
model: requester.RuntimeEmbeddingModel,
|
||||
input_text: list[str],
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
) -> list[list[float]]:
|
||||
return (
|
||||
await self.client.embed(
|
||||
model=model.model_entity.name,
|
||||
input=input_text,
|
||||
**extra_args,
|
||||
)
|
||||
).embeddings
|
||||
|
||||
@@ -22,6 +22,9 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./ollamachat.py
|
||||
|
||||
@@ -22,6 +22,9 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./openrouterchatcmpl.py
|
||||
|
||||
@@ -4,6 +4,12 @@ import openai
|
||||
import typing
|
||||
|
||||
from . import chatcmpl
|
||||
from .. import requester
|
||||
import openai.types.chat.chat_completion as chat_completion
|
||||
import re
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||
|
||||
|
||||
class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
@@ -15,3 +21,188 @@ class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
'base_url': 'https://api.ppinfra.com/v3/openai',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
is_think: bool = False
|
||||
|
||||
async def _make_msg(
|
||||
self,
|
||||
chat_completion: chat_completion.ChatCompletion,
|
||||
remove_think: bool,
|
||||
) -> provider_message.Message:
|
||||
chatcmpl_message = chat_completion.choices[0].message.model_dump()
|
||||
# print(chatcmpl_message.keys(), chatcmpl_message.values())
|
||||
|
||||
# 确保 role 字段存在且不为 None
|
||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
||||
chatcmpl_message['role'] = 'assistant'
|
||||
|
||||
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
|
||||
|
||||
# deepseek的reasoner模型
|
||||
chatcmpl_message['content'] = await self._process_thinking_content(
|
||||
chatcmpl_message['content'], reasoning_content, remove_think
|
||||
)
|
||||
|
||||
# 移除 reasoning_content 字段,避免传递给 Message
|
||||
if 'reasoning_content' in chatcmpl_message:
|
||||
del chatcmpl_message['reasoning_content']
|
||||
|
||||
message = provider_message.Message(**chatcmpl_message)
|
||||
|
||||
return message
|
||||
|
||||
async def _process_thinking_content(
|
||||
self,
|
||||
content: str,
|
||||
reasoning_content: str = None,
|
||||
remove_think: bool = False,
|
||||
) -> tuple[str, str]:
|
||||
"""处理思维链内容
|
||||
|
||||
Args:
|
||||
content: 原始内容
|
||||
reasoning_content: reasoning_content 字段内容
|
||||
remove_think: 是否移除思维链
|
||||
|
||||
Returns:
|
||||
处理后的内容
|
||||
"""
|
||||
if remove_think:
|
||||
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
||||
else:
|
||||
if reasoning_content is not None:
|
||||
content = '<think>\n' + reasoning_content + '\n</think>\n' + content
|
||||
return content
|
||||
|
||||
async def _make_msg_chunk(
|
||||
self,
|
||||
delta: dict[str, typing.Any],
|
||||
idx: int,
|
||||
) -> provider_message.MessageChunk:
|
||||
# 处理流式chunk和完整响应的差异
|
||||
# print(chat_completion.choices[0])
|
||||
|
||||
# 确保 role 字段存在且不为 None
|
||||
if 'role' not in delta or delta['role'] is None:
|
||||
delta['role'] = 'assistant'
|
||||
|
||||
reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
|
||||
|
||||
delta['content'] = '' if delta['content'] is None else delta['content']
|
||||
# print(reasoning_content)
|
||||
|
||||
# deepseek的reasoner模型
|
||||
|
||||
if reasoning_content is not None:
|
||||
delta['content'] += reasoning_content
|
||||
|
||||
message = provider_message.MessageChunk(**delta)
|
||||
|
||||
return message
|
||||
|
||||
async def _closure_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
req_messages: list[dict],
|
||||
use_model: requester.RuntimeLLMModel,
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
||||
self.client.api_key = use_model.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
args['model'] = use_model.model_entity.name
|
||||
|
||||
if use_funcs:
|
||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
||||
|
||||
if tools:
|
||||
args['tools'] = tools
|
||||
|
||||
# 设置此次请求中的messages
|
||||
messages = req_messages.copy()
|
||||
|
||||
# 检查vision
|
||||
for msg in messages:
|
||||
if 'content' in msg and isinstance(msg['content'], list):
|
||||
for me in msg['content']:
|
||||
if me['type'] == 'image_base64':
|
||||
me['image_url'] = {'url': me['image_base64']}
|
||||
me['type'] = 'image_url'
|
||||
del me['image_base64']
|
||||
|
||||
args['messages'] = messages
|
||||
args['stream'] = True
|
||||
|
||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
chunk_idx = 0
|
||||
thinking_started = False
|
||||
thinking_ended = False
|
||||
role = 'assistant' # 默认角色
|
||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
||||
# 解析 chunk 数据
|
||||
if hasattr(chunk, 'choices') and chunk.choices:
|
||||
choice = chunk.choices[0]
|
||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||
finish_reason = getattr(choice, 'finish_reason', None)
|
||||
else:
|
||||
delta = {}
|
||||
finish_reason = None
|
||||
|
||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
||||
if 'role' in delta and delta['role']:
|
||||
role = delta['role']
|
||||
|
||||
# 获取增量内容
|
||||
delta_content = delta.get('content', '')
|
||||
# reasoning_content = delta.get('reasoning_content', '')
|
||||
|
||||
if remove_think:
|
||||
if delta['content'] is not None:
|
||||
if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
|
||||
thinking_started = True
|
||||
continue
|
||||
elif delta['content'] == r'</think>' and not thinking_ended:
|
||||
thinking_ended = True
|
||||
continue
|
||||
elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
|
||||
thinking_started = False
|
||||
continue
|
||||
elif thinking_started and not thinking_ended:
|
||||
continue
|
||||
|
||||
# delta_tool_calls = None
|
||||
if delta.get('tool_calls'):
|
||||
for tool_call in delta['tool_calls']:
|
||||
if tool_call['id'] and tool_call['function']['name']:
|
||||
tool_id = tool_call['id']
|
||||
tool_name = tool_call['function']['name']
|
||||
|
||||
if tool_call['id'] is None:
|
||||
tool_call['id'] = tool_id
|
||||
if tool_call['function']['name'] is None:
|
||||
tool_call['function']['name'] = tool_name
|
||||
if tool_call['function']['arguments'] is None:
|
||||
tool_call['function']['arguments'] = ''
|
||||
if tool_call['type'] is None:
|
||||
tool_call['type'] = 'function'
|
||||
|
||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
||||
if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
|
||||
chunk_idx += 1
|
||||
continue
|
||||
|
||||
# 构建 MessageChunk - 只包含增量内容
|
||||
chunk_data = {
|
||||
'role': role,
|
||||
'content': delta_content if delta_content else None,
|
||||
'tool_calls': delta.get('tool_calls'),
|
||||
'is_final': bool(finish_reason),
|
||||
}
|
||||
|
||||
# 移除 None 值
|
||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||
|
||||
yield provider_message.MessageChunk(**chunk_data)
|
||||
chunk_idx += 1
|
||||
|
||||
@@ -29,6 +29,9 @@ spec:
|
||||
type: int
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./ppiochatcmpl.py
|
||||
|
||||
BIN
pkg/provider/modelmgr/requesters/qhaigc.png
Normal file
BIN
pkg/provider/modelmgr/requesters/qhaigc.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 24 KiB |
17
pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
Normal file
17
pkg/provider/modelmgr/requesters/qhaigcchatcmpl.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import openai
|
||||
import typing
|
||||
|
||||
from . import chatcmpl
|
||||
|
||||
|
||||
class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""启航 AI ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://api.qhaigc.com/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
38
pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
Normal file
38
pkg/provider/modelmgr/requesters/qhaigcchatcmpl.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
apiVersion: v1
|
||||
kind: LLMAPIRequester
|
||||
metadata:
|
||||
name: qhaigc-chat-completions
|
||||
label:
|
||||
en_US: QH AI
|
||||
zh_Hans: 启航 AI
|
||||
icon: qhaigc.png
|
||||
spec:
|
||||
config:
|
||||
- name: base_url
|
||||
label:
|
||||
en_US: Base URL
|
||||
zh_Hans: 基础 URL
|
||||
type: string
|
||||
required: true
|
||||
default: "https://api.qhaigc.net/v1"
|
||||
- name: args
|
||||
label:
|
||||
en_US: Args
|
||||
zh_Hans: 附加参数
|
||||
type: object
|
||||
required: true
|
||||
default: {}
|
||||
- name: timeout
|
||||
label:
|
||||
en_US: Timeout
|
||||
zh_Hans: 超时时间
|
||||
type: int
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./qhaigcchatcmpl.py
|
||||
attr: QHAIGCChatCompletions
|
||||
32
pkg/provider/modelmgr/requesters/shengsuanyun.py
Normal file
32
pkg/provider/modelmgr/requesters/shengsuanyun.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import openai
|
||||
import typing
|
||||
|
||||
from . import chatcmpl
|
||||
import openai.types.chat.chat_completion as chat_completion
|
||||
|
||||
|
||||
class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
|
||||
"""胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
|
||||
|
||||
client: openai.AsyncClient
|
||||
|
||||
default_config: dict[str, typing.Any] = {
|
||||
'base_url': 'https://router.shengsuanyun.com/api/v1',
|
||||
'timeout': 120,
|
||||
}
|
||||
|
||||
async def _req(
|
||||
self,
|
||||
args: dict,
|
||||
extra_body: dict = {},
|
||||
) -> chat_completion.ChatCompletion:
|
||||
return await self.client.chat.completions.create(
|
||||
**args,
|
||||
extra_body=extra_body,
|
||||
extra_headers={
|
||||
'HTTP-Referer': 'https://langbot.app',
|
||||
'X-Title': 'LangBot',
|
||||
},
|
||||
)
|
||||
1
pkg/provider/modelmgr/requesters/shengsuanyun.svg
Normal file
1
pkg/provider/modelmgr/requesters/shengsuanyun.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 7.4 KiB |
38
pkg/provider/modelmgr/requesters/shengsuanyun.yaml
Normal file
38
pkg/provider/modelmgr/requesters/shengsuanyun.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
apiVersion: v1
|
||||
kind: LLMAPIRequester
|
||||
metadata:
|
||||
name: shengsuanyun-chat-completions
|
||||
label:
|
||||
en_US: ShengSuanYun
|
||||
zh_Hans: 胜算云
|
||||
icon: shengsuanyun.svg
|
||||
spec:
|
||||
config:
|
||||
- name: base_url
|
||||
label:
|
||||
en_US: Base URL
|
||||
zh_Hans: 基础 URL
|
||||
type: string
|
||||
required: true
|
||||
default: "https://router.shengsuanyun.com/api/v1"
|
||||
- name: args
|
||||
label:
|
||||
en_US: Args
|
||||
zh_Hans: 附加参数
|
||||
type: object
|
||||
required: true
|
||||
default: {}
|
||||
- name: timeout
|
||||
label:
|
||||
en_US: Timeout
|
||||
zh_Hans: 超时时间
|
||||
type: int
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./shengsuanyun.py
|
||||
attr: ShengSuanYunChatCompletions
|
||||
@@ -22,6 +22,9 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
- text-embedding
|
||||
execution:
|
||||
python:
|
||||
path: ./siliconflowchatcmpl.py
|
||||
|
||||
@@ -22,6 +22,8 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./volcarkchatcmpl.py
|
||||
|
||||
@@ -22,6 +22,8 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./xaichatcmpl.py
|
||||
|
||||
@@ -22,6 +22,8 @@ spec:
|
||||
type: integer
|
||||
required: true
|
||||
default: 120
|
||||
support_type:
|
||||
- llm
|
||||
execution:
|
||||
python:
|
||||
path: ./zhipuaichatcmpl.py
|
||||
|
||||
@@ -102,8 +102,14 @@ class DashScopeAPIRunner(runner.RequestRunner):
|
||||
plain_text = '' # 用户输入的纯文本信息
|
||||
image_ids = [] # 用户输入的图片ID列表 (暂不支持)
|
||||
|
||||
plain_text, image_ids = await self._preprocess_user_message(query)
|
||||
think_start = False
|
||||
think_end = False
|
||||
|
||||
plain_text, image_ids = await self._preprocess_user_message(query)
|
||||
has_thoughts = True # 获取思考过程
|
||||
remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
|
||||
if remove_think:
|
||||
has_thoughts = False
|
||||
# 发送对话请求
|
||||
response = dashscope.Application.call(
|
||||
api_key=self.api_key, # 智能体应用的API Key
|
||||
@@ -112,43 +118,108 @@ class DashScopeAPIRunner(runner.RequestRunner):
|
||||
stream=True, # 流式输出
|
||||
incremental_output=True, # 增量输出,使用流式输出需要开启增量输出
|
||||
session_id=query.session.using_conversation.uuid, # 会话ID用于,多轮对话
|
||||
has_thoughts=has_thoughts,
|
||||
# rag_options={ # 主要用于文件交互,暂不支持
|
||||
# "session_file_ids": ["FILE_ID1"], # FILE_ID1 替换为实际的临时文件ID,逗号隔开多个
|
||||
# }
|
||||
)
|
||||
idx_chunk = 0
|
||||
try:
|
||||
is_stream = await query.adapter.is_stream_output_supported()
|
||||
|
||||
for chunk in response:
|
||||
if chunk.get('status_code') != 200:
|
||||
raise DashscopeAPIError(
|
||||
f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
|
||||
)
|
||||
if not chunk:
|
||||
continue
|
||||
except AttributeError:
|
||||
is_stream = False
|
||||
if is_stream:
|
||||
for chunk in response:
|
||||
if chunk.get('status_code') != 200:
|
||||
raise DashscopeAPIError(
|
||||
f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
|
||||
)
|
||||
if not chunk:
|
||||
continue
|
||||
idx_chunk += 1
|
||||
# 获取流式传输的output
|
||||
stream_output = chunk.get('output', {})
|
||||
stream_think = stream_output.get('thoughts', [])
|
||||
if stream_think[0].get('thought'):
|
||||
if not think_start:
|
||||
think_start = True
|
||||
pending_content += f'<think>\n{stream_think[0].get("thought")}'
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
pending_content += stream_think[0].get('thought')
|
||||
elif stream_think[0].get('thought') == '' and not think_end:
|
||||
think_end = True
|
||||
pending_content += '\n</think>\n'
|
||||
if stream_output.get('text') is not None:
|
||||
pending_content += stream_output.get('text')
|
||||
# 是否是流式最后一个chunk
|
||||
is_final = False if stream_output.get('finish_reason', False) == 'null' else True
|
||||
|
||||
# 获取流式传输的output
|
||||
stream_output = chunk.get('output', {})
|
||||
if stream_output.get('text') is not None:
|
||||
pending_content += stream_output.get('text')
|
||||
# 获取模型传出的参考资料列表
|
||||
references_dict_list = stream_output.get('doc_references', [])
|
||||
|
||||
# 保存当前会话的session_id用于下次对话的语境
|
||||
query.session.using_conversation.uuid = stream_output.get('session_id')
|
||||
# 从模型传出的参考资料信息中提取用于替换的字典
|
||||
if references_dict_list is not None:
|
||||
for doc in references_dict_list:
|
||||
if doc.get('index_id') is not None:
|
||||
references_dict[doc.get('index_id')] = doc.get('doc_name')
|
||||
|
||||
# 获取模型传出的参考资料列表
|
||||
references_dict_list = stream_output.get('doc_references', [])
|
||||
# 将参考资料替换到文本中
|
||||
pending_content = self._replace_references(pending_content, references_dict)
|
||||
|
||||
# 从模型传出的参考资料信息中提取用于替换的字典
|
||||
if references_dict_list is not None:
|
||||
for doc in references_dict_list:
|
||||
if doc.get('index_id') is not None:
|
||||
references_dict[doc.get('index_id')] = doc.get('doc_name')
|
||||
if idx_chunk % 8 == 0 or is_final:
|
||||
yield provider_message.MessageChunk(
|
||||
role='assistant',
|
||||
content=pending_content,
|
||||
is_final=is_final,
|
||||
)
|
||||
# 保存当前会话的session_id用于下次对话的语境
|
||||
query.session.using_conversation.uuid = stream_output.get('session_id')
|
||||
else:
|
||||
for chunk in response:
|
||||
if chunk.get('status_code') != 200:
|
||||
raise DashscopeAPIError(
|
||||
f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
|
||||
)
|
||||
if not chunk:
|
||||
continue
|
||||
idx_chunk += 1
|
||||
# 获取流式传输的output
|
||||
stream_output = chunk.get('output', {})
|
||||
stream_think = stream_output.get('thoughts', [])
|
||||
if stream_think[0].get('thought'):
|
||||
if not think_start:
|
||||
think_start = True
|
||||
pending_content += f'<think>\n{stream_think[0].get("thought")}'
|
||||
else:
|
||||
# 继续输出 reasoning_content
|
||||
pending_content += stream_think[0].get('thought')
|
||||
elif stream_think[0].get('thought') == '' and not think_end:
|
||||
think_end = True
|
||||
pending_content += '\n</think>\n'
|
||||
if stream_output.get('text') is not None:
|
||||
pending_content += stream_output.get('text')
|
||||
|
||||
# 将参考资料替换到文本中
|
||||
pending_content = self._replace_references(pending_content, references_dict)
|
||||
# 保存当前会话的session_id用于下次对话的语境
|
||||
query.session.using_conversation.uuid = stream_output.get('session_id')
|
||||
|
||||
yield provider_message.Message(
|
||||
role='assistant',
|
||||
content=pending_content,
|
||||
)
|
||||
# 获取模型传出的参考资料列表
|
||||
references_dict_list = stream_output.get('doc_references', [])
|
||||
|
||||
# 从模型传出的参考资料信息中提取用于替换的字典
|
||||
if references_dict_list is not None:
|
||||
for doc in references_dict_list:
|
||||
if doc.get('index_id') is not None:
|
||||
references_dict[doc.get('index_id')] = doc.get('doc_name')
|
||||
|
||||
# 将参考资料替换到文本中
|
||||
pending_content = self._replace_references(pending_content, references_dict)
|
||||
|
||||
yield provider_message.Message(
|
||||
role='assistant',
|
||||
content=pending_content,
|
||||
)
|
||||
|
||||
async def _workflow_messages(
|
||||
self, query: pipeline_query.Query
|
||||
@@ -176,52 +247,108 @@ class DashScopeAPIRunner(runner.RequestRunner):
|
||||
incremental_output=True, # 增量输出,使用流式输出需要开启增量输出
|
||||
session_id=query.session.using_conversation.uuid, # 会话ID用于,多轮对话
|
||||
biz_params=biz_params, # 工作流应用的自定义输入参数传递
|
||||
flow_stream_mode='message_format', # 消息模式,输出/结束节点的流式结果
|
||||
# rag_options={ # 主要用于文件交互,暂不支持
|
||||
# "session_file_ids": ["FILE_ID1"], # FILE_ID1 替换为实际的临时文件ID,逗号隔开多个
|
||||
# }
|
||||
)
|
||||
|
||||
# 处理API返回的流式输出
|
||||
for chunk in response:
|
||||
if chunk.get('status_code') != 200:
|
||||
raise DashscopeAPIError(
|
||||
f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
|
||||
)
|
||||
if not chunk:
|
||||
continue
|
||||
try:
|
||||
is_stream = await query.adapter.is_stream_output_supported()
|
||||
|
||||
# 获取流式传输的output
|
||||
stream_output = chunk.get('output', {})
|
||||
if stream_output.get('text') is not None:
|
||||
pending_content += stream_output.get('text')
|
||||
except AttributeError:
|
||||
is_stream = False
|
||||
idx_chunk = 0
|
||||
if is_stream:
|
||||
for chunk in response:
|
||||
if chunk.get('status_code') != 200:
|
||||
raise DashscopeAPIError(
|
||||
f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
|
||||
)
|
||||
if not chunk:
|
||||
continue
|
||||
idx_chunk += 1
|
||||
# 获取流式传输的output
|
||||
stream_output = chunk.get('output', {})
|
||||
if stream_output.get('workflow_message') is not None:
|
||||
pending_content += stream_output.get('workflow_message').get('message').get('content')
|
||||
# if stream_output.get('text') is not None:
|
||||
# pending_content += stream_output.get('text')
|
||||
|
||||
# 保存当前会话的session_id用于下次对话的语境
|
||||
query.session.using_conversation.uuid = stream_output.get('session_id')
|
||||
is_final = False if stream_output.get('finish_reason', False) == 'null' else True
|
||||
|
||||
# 获取模型传出的参考资料列表
|
||||
references_dict_list = stream_output.get('doc_references', [])
|
||||
# 获取模型传出的参考资料列表
|
||||
references_dict_list = stream_output.get('doc_references', [])
|
||||
|
||||
# 从模型传出的参考资料信息中提取用于替换的字典
|
||||
if references_dict_list is not None:
|
||||
for doc in references_dict_list:
|
||||
if doc.get('index_id') is not None:
|
||||
references_dict[doc.get('index_id')] = doc.get('doc_name')
|
||||
# 从模型传出的参考资料信息中提取用于替换的字典
|
||||
if references_dict_list is not None:
|
||||
for doc in references_dict_list:
|
||||
if doc.get('index_id') is not None:
|
||||
references_dict[doc.get('index_id')] = doc.get('doc_name')
|
||||
|
||||
# 将参考资料替换到文本中
|
||||
pending_content = self._replace_references(pending_content, references_dict)
|
||||
# 将参考资料替换到文本中
|
||||
pending_content = self._replace_references(pending_content, references_dict)
|
||||
if idx_chunk % 8 == 0 or is_final:
|
||||
yield provider_message.MessageChunk(
|
||||
role='assistant',
|
||||
content=pending_content,
|
||||
is_final=is_final,
|
||||
)
|
||||
|
||||
yield provider_message.Message(
|
||||
role='assistant',
|
||||
content=pending_content,
|
||||
)
|
||||
# 保存当前会话的session_id用于下次对话的语境
|
||||
query.session.using_conversation.uuid = stream_output.get('session_id')
|
||||
|
||||
else:
|
||||
for chunk in response:
|
||||
if chunk.get('status_code') != 200:
|
||||
raise DashscopeAPIError(
|
||||
f'Dashscope API 请求失败: status_code={chunk.get("status_code")} message={chunk.get("message")} request_id={chunk.get("request_id")} '
|
||||
)
|
||||
if not chunk:
|
||||
continue
|
||||
|
||||
# 获取流式传输的output
|
||||
stream_output = chunk.get('output', {})
|
||||
if stream_output.get('text') is not None:
|
||||
pending_content += stream_output.get('text')
|
||||
|
||||
is_final = False if stream_output.get('finish_reason', False) == 'null' else True
|
||||
|
||||
# 保存当前会话的session_id用于下次对话的语境
|
||||
query.session.using_conversation.uuid = stream_output.get('session_id')
|
||||
|
||||
# 获取模型传出的参考资料列表
|
||||
references_dict_list = stream_output.get('doc_references', [])
|
||||
|
||||
# 从模型传出的参考资料信息中提取用于替换的字典
|
||||
if references_dict_list is not None:
|
||||
for doc in references_dict_list:
|
||||
if doc.get('index_id') is not None:
|
||||
references_dict[doc.get('index_id')] = doc.get('doc_name')
|
||||
|
||||
# 将参考资料替换到文本中
|
||||
pending_content = self._replace_references(pending_content, references_dict)
|
||||
|
||||
yield provider_message.Message(
|
||||
role='assistant',
|
||||
content=pending_content,
|
||||
)
|
||||
|
||||
async def run(self, query: pipeline_query.Query) -> typing.AsyncGenerator[provider_message.Message, None]:
|
||||
"""运行"""
|
||||
msg_seq = 0
|
||||
if self.app_type == 'agent':
|
||||
async for msg in self._agent_messages(query):
|
||||
if isinstance(msg, provider_message.MessageChunk):
|
||||
msg_seq += 1
|
||||
msg.msg_sequence = msg_seq
|
||||
yield msg
|
||||
elif self.app_type == 'workflow':
|
||||
async for msg in self._workflow_messages(query):
|
||||
if isinstance(msg, provider_message.MessageChunk):
|
||||
msg_seq += 1
|
||||
msg.msg_sequence = msg_seq
|
||||
yield msg
|
||||
else:
|
||||
raise DashscopeAPIError(f'不支持的 Dashscope 应用类型: {self.app_type}')
|
||||
|
||||
@@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
import typing
|
||||
import json
|
||||
import uuid
|
||||
import re
|
||||
import base64
|
||||
|
||||
|
||||
@@ -38,29 +37,38 @@ class DifyServiceAPIRunner(runner.RequestRunner):
|
||||
base_url=self.pipeline_config['ai']['dify-service-api']['base-url'],
|
||||
)
|
||||
|
||||
def _try_convert_thinking(self, resp_text: str) -> str:
|
||||
"""尝试转换 Dify 的思考提示"""
|
||||
if not resp_text.startswith(
|
||||
'<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> <summary> Thinking... </summary>'
|
||||
):
|
||||
return resp_text
|
||||
def _process_thinking_content(
|
||||
self,
|
||||
content: str,
|
||||
) -> tuple[str, str]:
|
||||
"""处理思维链内容
|
||||
|
||||
if self.pipeline_config['ai']['dify-service-api']['thinking-convert'] == 'original':
|
||||
return resp_text
|
||||
Args:
|
||||
content: 原始内容
|
||||
Returns:
|
||||
(处理后的内容, 提取的思维链内容)
|
||||
"""
|
||||
remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
|
||||
thinking_content = ''
|
||||
# 从 content 中提取 <think> 标签内容
|
||||
if content and '<think>' in content and '</think>' in content:
|
||||
import re
|
||||
|
||||
if self.pipeline_config['ai']['dify-service-api']['thinking-convert'] == 'remove':
|
||||
return re.sub(
|
||||
r'<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> <summary> Thinking... </summary>.*?</details>',
|
||||
'',
|
||||
resp_text,
|
||||
flags=re.DOTALL,
|
||||
)
|
||||
think_pattern = r'<think>(.*?)</think>'
|
||||
think_matches = re.findall(think_pattern, content, re.DOTALL)
|
||||
if think_matches:
|
||||
thinking_content = '\n'.join(think_matches)
|
||||
# 移除 content 中的 <think> 标签
|
||||
content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
|
||||
|
||||
if self.pipeline_config['ai']['dify-service-api']['thinking-convert'] == 'plain':
|
||||
pattern = r'<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> <summary> Thinking... </summary>(.*?)</details>'
|
||||
thinking_text = re.search(pattern, resp_text, flags=re.DOTALL)
|
||||
content_text = re.sub(pattern, '', resp_text, flags=re.DOTALL)
|
||||
return f'<think>{thinking_text.group(1)}</think>\n{content_text}'
|
||||
# 3. 根据 remove_think 参数决定是否保留思维链
|
||||
if remove_think:
|
||||
return content, ''
|
||||
else:
|
||||
# 如果有思维链内容,将其以 <think> 格式添加到 content 开头
|
||||
if thinking_content:
|
||||
content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
|
||||
return content, thinking_content
|
||||
|
||||
async def _preprocess_user_message(self, query: pipeline_query.Query) -> tuple[str, list[str]]:
|
||||
"""预处理用户消息,提取纯文本,并将图片上传到 Dify 服务
|
||||
@@ -134,17 +142,20 @@ class DifyServiceAPIRunner(runner.RequestRunner):
|
||||
if mode == 'workflow':
|
||||
if chunk['event'] == 'node_finished':
|
||||
if chunk['data']['node_type'] == 'answer':
|
||||
content, _ = self._process_thinking_content(chunk['data']['outputs']['answer'])
|
||||
|
||||
yield provider_message.Message(
|
||||
role='assistant',
|
||||
content=self._try_convert_thinking(chunk['data']['outputs']['answer']),
|
||||
content=content,
|
||||
)
|
||||
elif mode == 'basic':
|
||||
if chunk['event'] == 'message':
|
||||
basic_mode_pending_chunk += chunk['answer']
|
||||
elif chunk['event'] == 'message_end':
|
||||
content, _ = self._process_thinking_content(basic_mode_pending_chunk)
|
||||
yield provider_message.Message(
|
||||
role='assistant',
|
||||
content=self._try_convert_thinking(basic_mode_pending_chunk),
|
||||
content=content,
|
||||
)
|
||||
basic_mode_pending_chunk = ''
|
||||
|
||||
@@ -195,14 +206,15 @@ class DifyServiceAPIRunner(runner.RequestRunner):
|
||||
if chunk['event'] in ignored_events:
|
||||
continue
|
||||
|
||||
if chunk['event'] == 'agent_message':
|
||||
if chunk['event'] == 'agent_message' or chunk['event'] == 'message':
|
||||
pending_agent_message += chunk['answer']
|
||||
else:
|
||||
if pending_agent_message.strip() != '':
|
||||
pending_agent_message = pending_agent_message.replace('</details>Action:', '</details>')
|
||||
content, _ = self._process_thinking_content(pending_agent_message)
|
||||
yield provider_message.Message(
|
||||
role='assistant',
|
||||
content=self._try_convert_thinking(pending_agent_message),
|
||||
content=content,
|
||||
)
|
||||
pending_agent_message = ''
|
||||
|
||||
@@ -312,26 +324,352 @@ class DifyServiceAPIRunner(runner.RequestRunner):
|
||||
elif chunk['event'] == 'workflow_finished':
|
||||
if chunk['data']['error']:
|
||||
raise errors.DifyAPIError(chunk['data']['error'])
|
||||
content, _ = self._process_thinking_content(chunk['data']['outputs']['summary'])
|
||||
|
||||
msg = provider_message.Message(
|
||||
role='assistant',
|
||||
content=chunk['data']['outputs']['summary'],
|
||||
content=content,
|
||||
)
|
||||
|
||||
yield msg
|
||||
|
||||
async def _chat_messages_chunk(
|
||||
self, query: pipeline_query.Query
|
||||
) -> typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
||||
"""调用聊天助手"""
|
||||
cov_id = query.session.using_conversation.uuid or ''
|
||||
query.variables['conversation_id'] = cov_id
|
||||
|
||||
plain_text, image_ids = await self._preprocess_user_message(query)
|
||||
|
||||
files = [
|
||||
{
|
||||
'type': 'image',
|
||||
'transfer_method': 'local_file',
|
||||
'upload_file_id': image_id,
|
||||
}
|
||||
for image_id in image_ids
|
||||
]
|
||||
|
||||
basic_mode_pending_chunk = ''
|
||||
|
||||
inputs = {}
|
||||
|
||||
inputs.update(query.variables)
|
||||
message_idx = 0
|
||||
|
||||
chunk = None # 初始化chunk变量,防止在没有响应时引用错误
|
||||
|
||||
is_final = False
|
||||
think_start = False
|
||||
think_end = False
|
||||
|
||||
remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
|
||||
|
||||
async for chunk in self.dify_client.chat_messages(
|
||||
inputs=inputs,
|
||||
query=plain_text,
|
||||
user=f'{query.session.launcher_type.value}_{query.session.launcher_id}',
|
||||
conversation_id=cov_id,
|
||||
files=files,
|
||||
timeout=120,
|
||||
):
|
||||
self.ap.logger.debug('dify-chat-chunk: ' + str(chunk))
|
||||
|
||||
# if chunk['event'] == 'workflow_started':
|
||||
# mode = 'workflow'
|
||||
# if mode == 'workflow':
|
||||
# elif mode == 'basic':
|
||||
# 因为都只是返回的 message也没有工具调用什么的,暂时不分类
|
||||
if chunk['event'] == 'message':
|
||||
message_idx += 1
|
||||
if remove_think:
|
||||
if '<think>' in chunk['answer'] and not think_start:
|
||||
think_start = True
|
||||
continue
|
||||
if '</think>' in chunk['answer'] and not think_end:
|
||||
import re
|
||||
|
||||
content = re.sub(r'^\n</think>', '', chunk['answer'])
|
||||
basic_mode_pending_chunk += content
|
||||
think_end = True
|
||||
elif think_end:
|
||||
basic_mode_pending_chunk += chunk['answer']
|
||||
if think_start:
|
||||
continue
|
||||
|
||||
else:
|
||||
basic_mode_pending_chunk += chunk['answer']
|
||||
|
||||
if chunk['event'] == 'message_end':
|
||||
is_final = True
|
||||
|
||||
if is_final or message_idx % 8 == 0:
|
||||
# content, _ = self._process_thinking_content(basic_mode_pending_chunk)
|
||||
yield provider_message.MessageChunk(
|
||||
role='assistant',
|
||||
content=basic_mode_pending_chunk,
|
||||
is_final=is_final,
|
||||
)
|
||||
|
||||
if chunk is None:
|
||||
raise errors.DifyAPIError('Dify API 没有返回任何响应,请检查网络连接和API配置')
|
||||
|
||||
query.session.using_conversation.uuid = chunk['conversation_id']
|
||||
|
||||
async def _agent_chat_messages_chunk(
|
||||
self, query: pipeline_query.Query
|
||||
) -> typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
||||
"""调用聊天助手"""
|
||||
cov_id = query.session.using_conversation.uuid or ''
|
||||
query.variables['conversation_id'] = cov_id
|
||||
|
||||
plain_text, image_ids = await self._preprocess_user_message(query)
|
||||
|
||||
files = [
|
||||
{
|
||||
'type': 'image',
|
||||
'transfer_method': 'local_file',
|
||||
'upload_file_id': image_id,
|
||||
}
|
||||
for image_id in image_ids
|
||||
]
|
||||
|
||||
ignored_events = []
|
||||
|
||||
inputs = {}
|
||||
|
||||
inputs.update(query.variables)
|
||||
|
||||
pending_agent_message = ''
|
||||
|
||||
chunk = None # 初始化chunk变量,防止在没有响应时引用错误
|
||||
message_idx = 0
|
||||
is_final = False
|
||||
think_start = False
|
||||
think_end = False
|
||||
|
||||
remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
|
||||
|
||||
async for chunk in self.dify_client.chat_messages(
|
||||
inputs=inputs,
|
||||
query=plain_text,
|
||||
user=f'{query.session.launcher_type.value}_{query.session.launcher_id}',
|
||||
response_mode='streaming',
|
||||
conversation_id=cov_id,
|
||||
files=files,
|
||||
timeout=120,
|
||||
):
|
||||
self.ap.logger.debug('dify-agent-chunk: ' + str(chunk))
|
||||
|
||||
if chunk['event'] in ignored_events:
|
||||
continue
|
||||
|
||||
if chunk['event'] == 'agent_message':
|
||||
message_idx += 1
|
||||
if remove_think:
|
||||
if '<think>' in chunk['answer'] and not think_start:
|
||||
think_start = True
|
||||
continue
|
||||
if '</think>' in chunk['answer'] and not think_end:
|
||||
import re
|
||||
|
||||
content = re.sub(r'^\n</think>', '', chunk['answer'])
|
||||
pending_agent_message += content
|
||||
think_end = True
|
||||
elif think_end or not think_start:
|
||||
pending_agent_message += chunk['answer']
|
||||
if think_start:
|
||||
continue
|
||||
|
||||
else:
|
||||
pending_agent_message += chunk['answer']
|
||||
elif chunk['event'] == 'message_end':
|
||||
is_final = True
|
||||
else:
|
||||
if chunk['event'] == 'agent_thought':
|
||||
if chunk['tool'] != '' and chunk['observation'] != '': # 工具调用结果,跳过
|
||||
continue
|
||||
message_idx += 1
|
||||
if chunk['tool']:
|
||||
msg = provider_message.MessageChunk(
|
||||
role='assistant',
|
||||
tool_calls=[
|
||||
provider_message.ToolCall(
|
||||
id=chunk['id'],
|
||||
type='function',
|
||||
function=provider_message.FunctionCall(
|
||||
name=chunk['tool'],
|
||||
arguments=json.dumps({}),
|
||||
),
|
||||
)
|
||||
],
|
||||
)
|
||||
yield msg
|
||||
if chunk['event'] == 'message_file':
|
||||
message_idx += 1
|
||||
if chunk['type'] == 'image' and chunk['belongs_to'] == 'assistant':
|
||||
base_url = self.dify_client.base_url
|
||||
|
||||
if base_url.endswith('/v1'):
|
||||
base_url = base_url[:-3]
|
||||
|
||||
image_url = base_url + chunk['url']
|
||||
|
||||
yield provider_message.MessageChunk(
|
||||
role='assistant',
|
||||
content=[provider_message.ContentElement.from_image_url(image_url)],
|
||||
is_final=is_final,
|
||||
)
|
||||
|
||||
if chunk['event'] == 'error':
|
||||
raise errors.DifyAPIError('dify 服务错误: ' + chunk['message'])
|
||||
if message_idx % 8 == 0 or is_final:
|
||||
yield provider_message.MessageChunk(
|
||||
role='assistant',
|
||||
content=pending_agent_message,
|
||||
is_final=is_final,
|
||||
)
|
||||
|
||||
if chunk is None:
|
||||
raise errors.DifyAPIError('Dify API 没有返回任何响应,请检查网络连接和API配置')
|
||||
|
||||
query.session.using_conversation.uuid = chunk['conversation_id']
|
||||
|
||||
async def _workflow_messages_chunk(
|
||||
self, query: pipeline_query.Query
|
||||
) -> typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
||||
"""调用工作流"""
|
||||
|
||||
if not query.session.using_conversation.uuid:
|
||||
query.session.using_conversation.uuid = str(uuid.uuid4())
|
||||
|
||||
query.variables['conversation_id'] = query.session.using_conversation.uuid
|
||||
|
||||
plain_text, image_ids = await self._preprocess_user_message(query)
|
||||
|
||||
files = [
|
||||
{
|
||||
'type': 'image',
|
||||
'transfer_method': 'local_file',
|
||||
'upload_file_id': image_id,
|
||||
}
|
||||
for image_id in image_ids
|
||||
]
|
||||
|
||||
ignored_events = ['workflow_started']
|
||||
|
||||
inputs = { # these variables are legacy variables, we need to keep them for compatibility
|
||||
'langbot_user_message_text': plain_text,
|
||||
'langbot_session_id': query.variables['session_id'],
|
||||
'langbot_conversation_id': query.variables['conversation_id'],
|
||||
'langbot_msg_create_time': query.variables['msg_create_time'],
|
||||
}
|
||||
|
||||
inputs.update(query.variables)
|
||||
messsage_idx = 0
|
||||
is_final = False
|
||||
think_start = False
|
||||
think_end = False
|
||||
workflow_contents = ''
|
||||
|
||||
remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
|
||||
async for chunk in self.dify_client.workflow_run(
|
||||
inputs=inputs,
|
||||
user=f'{query.session.launcher_type.value}_{query.session.launcher_id}',
|
||||
files=files,
|
||||
timeout=120,
|
||||
):
|
||||
self.ap.logger.debug('dify-workflow-chunk: ' + str(chunk))
|
||||
if chunk['event'] in ignored_events:
|
||||
continue
|
||||
if chunk['event'] == 'workflow_finished':
|
||||
is_final = True
|
||||
if chunk['data']['error']:
|
||||
raise errors.DifyAPIError(chunk['data']['error'])
|
||||
|
||||
if chunk['event'] == 'text_chunk':
|
||||
messsage_idx += 1
|
||||
if remove_think:
|
||||
if '<think>' in chunk['data']['text'] and not think_start:
|
||||
think_start = True
|
||||
continue
|
||||
if '</think>' in chunk['data']['text'] and not think_end:
|
||||
import re
|
||||
|
||||
content = re.sub(r'^\n</think>', '', chunk['data']['text'])
|
||||
workflow_contents += content
|
||||
think_end = True
|
||||
elif think_end:
|
||||
workflow_contents += chunk['data']['text']
|
||||
if think_start:
|
||||
continue
|
||||
|
||||
else:
|
||||
workflow_contents += chunk['data']['text']
|
||||
|
||||
if chunk['event'] == 'node_started':
|
||||
if chunk['data']['node_type'] == 'start' or chunk['data']['node_type'] == 'end':
|
||||
continue
|
||||
messsage_idx += 1
|
||||
msg = provider_message.MessageChunk(
|
||||
role='assistant',
|
||||
content=None,
|
||||
tool_calls=[
|
||||
provider_message.ToolCall(
|
||||
id=chunk['data']['node_id'],
|
||||
type='function',
|
||||
function=provider_message.FunctionCall(
|
||||
name=chunk['data']['title'],
|
||||
arguments=json.dumps({}),
|
||||
),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
yield msg
|
||||
|
||||
if messsage_idx % 8 == 0 or is_final:
|
||||
yield provider_message.MessageChunk(
|
||||
role='assistant',
|
||||
content=workflow_contents,
|
||||
is_final=is_final,
|
||||
)
|
||||
|
||||
async def run(self, query: pipeline_query.Query) -> typing.AsyncGenerator[provider_message.Message, None]:
|
||||
"""运行请求"""
|
||||
if self.pipeline_config['ai']['dify-service-api']['app-type'] == 'chat':
|
||||
async for msg in self._chat_messages(query):
|
||||
yield msg
|
||||
elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'agent':
|
||||
async for msg in self._agent_chat_messages(query):
|
||||
yield msg
|
||||
elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'workflow':
|
||||
async for msg in self._workflow_messages(query):
|
||||
yield msg
|
||||
if await query.adapter.is_stream_output_supported():
|
||||
msg_idx = 0
|
||||
if self.pipeline_config['ai']['dify-service-api']['app-type'] == 'chat':
|
||||
async for msg in self._chat_messages_chunk(query):
|
||||
msg_idx += 1
|
||||
msg.msg_sequence = msg_idx
|
||||
yield msg
|
||||
elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'agent':
|
||||
async for msg in self._agent_chat_messages_chunk(query):
|
||||
msg_idx += 1
|
||||
msg.msg_sequence = msg_idx
|
||||
yield msg
|
||||
elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'workflow':
|
||||
async for msg in self._workflow_messages_chunk(query):
|
||||
msg_idx += 1
|
||||
msg.msg_sequence = msg_idx
|
||||
yield msg
|
||||
else:
|
||||
raise errors.DifyAPIError(
|
||||
f'不支持的 Dify 应用类型: {self.pipeline_config["ai"]["dify-service-api"]["app-type"]}'
|
||||
)
|
||||
else:
|
||||
raise errors.DifyAPIError(
|
||||
f'不支持的 Dify 应用类型: {self.pipeline_config["ai"]["dify-service-api"]["app-type"]}'
|
||||
)
|
||||
if self.pipeline_config['ai']['dify-service-api']['app-type'] == 'chat':
|
||||
async for msg in self._chat_messages(query):
|
||||
yield msg
|
||||
elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'agent':
|
||||
async for msg in self._agent_chat_messages(query):
|
||||
yield msg
|
||||
elif self.pipeline_config['ai']['dify-service-api']['app-type'] == 'workflow':
|
||||
async for msg in self._workflow_messages(query):
|
||||
yield msg
|
||||
else:
|
||||
raise errors.DifyAPIError(
|
||||
f'不支持的 Dify 应用类型: {self.pipeline_config["ai"]["dify-service-api"]["app-type"]}'
|
||||
)
|
||||
|
||||
@@ -1,39 +1,179 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import copy
|
||||
import typing
|
||||
|
||||
from .. import runner
|
||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||
|
||||
|
||||
rag_combined_prompt_template = """
|
||||
The following are relevant context entries retrieved from the knowledge base.
|
||||
Please use them to answer the user's message.
|
||||
Respond in the same language as the user's input.
|
||||
|
||||
<context>
|
||||
{rag_context}
|
||||
</context>
|
||||
|
||||
<user_message>
|
||||
{user_message}
|
||||
</user_message>
|
||||
"""
|
||||
|
||||
|
||||
@runner.runner_class('local-agent')
|
||||
class LocalAgentRunner(runner.RequestRunner):
|
||||
"""本地Agent请求运行器"""
|
||||
|
||||
async def run(self, query: pipeline_query.Query) -> typing.AsyncGenerator[provider_message.Message, None]:
|
||||
class ToolCallTracker:
|
||||
"""工具调用追踪器"""
|
||||
|
||||
def __init__(self):
|
||||
self.active_calls: dict[str, dict] = {}
|
||||
self.completed_calls: list[provider_message.ToolCall] = []
|
||||
|
||||
async def run(
|
||||
self, query: pipeline_query.Query
|
||||
) -> typing.AsyncGenerator[provider_message.Message | provider_message.MessageChunk, None]:
|
||||
"""运行请求"""
|
||||
pending_tool_calls = []
|
||||
|
||||
req_messages = query.prompt.messages.copy() + query.messages.copy() + [query.user_message]
|
||||
kb_uuid = query.pipeline_config['ai']['local-agent']['knowledge-base']
|
||||
|
||||
use_llm_model = await self.ap.model_mgr.get_model_by_uuid(query.use_llm_model_uuid)
|
||||
if kb_uuid == '__none__':
|
||||
kb_uuid = None
|
||||
|
||||
# 首次请求
|
||||
msg = await use_llm_model.requester.invoke_llm(
|
||||
query,
|
||||
use_llm_model,
|
||||
req_messages,
|
||||
query.use_funcs,
|
||||
extra_args=use_llm_model.model_entity.extra_args,
|
||||
)
|
||||
user_message = copy.deepcopy(query.user_message)
|
||||
|
||||
yield msg
|
||||
user_message_text = ''
|
||||
|
||||
pending_tool_calls = msg.tool_calls
|
||||
if isinstance(user_message.content, str):
|
||||
user_message_text = user_message.content
|
||||
elif isinstance(user_message.content, list):
|
||||
for ce in user_message.content:
|
||||
if ce.type == 'text':
|
||||
user_message_text += ce.text
|
||||
break
|
||||
|
||||
req_messages.append(msg)
|
||||
if kb_uuid and user_message_text:
|
||||
# only support text for now
|
||||
kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid)
|
||||
|
||||
if not kb:
|
||||
self.ap.logger.warning(f'Knowledge base {kb_uuid} not found')
|
||||
raise ValueError(f'Knowledge base {kb_uuid} not found')
|
||||
|
||||
result = await kb.retrieve(user_message_text, kb.knowledge_base_entity.top_k)
|
||||
|
||||
final_user_message_text = ''
|
||||
|
||||
if result:
|
||||
rag_context = '\n\n'.join(
|
||||
f'[{i + 1}] {entry.metadata.get("text", "")}' for i, entry in enumerate(result)
|
||||
)
|
||||
final_user_message_text = rag_combined_prompt_template.format(
|
||||
rag_context=rag_context, user_message=user_message_text
|
||||
)
|
||||
|
||||
else:
|
||||
final_user_message_text = user_message_text
|
||||
|
||||
self.ap.logger.debug(f'Final user message text: {final_user_message_text}')
|
||||
|
||||
for ce in user_message.content:
|
||||
if ce.type == 'text':
|
||||
ce.text = final_user_message_text
|
||||
break
|
||||
|
||||
req_messages = query.prompt.messages.copy() + query.messages.copy() + [user_message]
|
||||
|
||||
try:
|
||||
is_stream = await query.adapter.is_stream_output_supported()
|
||||
except AttributeError:
|
||||
is_stream = False
|
||||
|
||||
remove_think = self.pipeline_config['output'].get('misc', '').get('remove-think')
|
||||
|
||||
if not is_stream:
|
||||
# 非流式输出,直接请求
|
||||
|
||||
msg = await query.use_llm_model.requester.invoke_llm(
|
||||
query,
|
||||
query.use_llm_model,
|
||||
req_messages,
|
||||
query.use_funcs,
|
||||
extra_args=query.use_llm_model.model_entity.extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
yield msg
|
||||
final_msg = msg
|
||||
else:
|
||||
# 流式输出,需要处理工具调用
|
||||
tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||
msg_idx = 0
|
||||
accumulated_content = '' # 从开始累积的所有内容
|
||||
last_role = 'assistant'
|
||||
msg_sequence = 1
|
||||
async for msg in query.use_llm_model.requester.invoke_llm_stream(
|
||||
query,
|
||||
query.use_llm_model,
|
||||
req_messages,
|
||||
query.use_funcs,
|
||||
extra_args=query.use_llm_model.model_entity.extra_args,
|
||||
remove_think=remove_think,
|
||||
):
|
||||
msg_idx = msg_idx + 1
|
||||
|
||||
# 记录角色
|
||||
if msg.role:
|
||||
last_role = msg.role
|
||||
|
||||
# 累积内容
|
||||
if msg.content:
|
||||
accumulated_content += msg.content
|
||||
|
||||
# 处理工具调用
|
||||
if msg.tool_calls:
|
||||
for tool_call in msg.tool_calls:
|
||||
if tool_call.id not in tool_calls_map:
|
||||
tool_calls_map[tool_call.id] = provider_message.ToolCall(
|
||||
id=tool_call.id,
|
||||
type=tool_call.type,
|
||||
function=provider_message.FunctionCall(
|
||||
name=tool_call.function.name if tool_call.function else '', arguments=''
|
||||
),
|
||||
)
|
||||
if tool_call.function and tool_call.function.arguments:
|
||||
# 流式处理中,工具调用参数可能分多个chunk返回,需要追加而不是覆盖
|
||||
tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
|
||||
# continue
|
||||
# 每8个chunk或最后一个chunk时,输出所有累积的内容
|
||||
if msg_idx % 8 == 0 or msg.is_final:
|
||||
msg_sequence += 1
|
||||
yield provider_message.MessageChunk(
|
||||
role=last_role,
|
||||
content=accumulated_content, # 输出所有累积内容
|
||||
tool_calls=list(tool_calls_map.values()) if (tool_calls_map and msg.is_final) else None,
|
||||
is_final=msg.is_final,
|
||||
msg_sequence=msg_sequence,
|
||||
)
|
||||
|
||||
# 创建最终消息用于后续处理
|
||||
final_msg = provider_message.MessageChunk(
|
||||
role=last_role,
|
||||
content=accumulated_content,
|
||||
tool_calls=list(tool_calls_map.values()) if tool_calls_map else None,
|
||||
msg_sequence=msg_sequence,
|
||||
)
|
||||
|
||||
pending_tool_calls = final_msg.tool_calls
|
||||
first_content = final_msg.content
|
||||
if isinstance(final_msg, provider_message.MessageChunk):
|
||||
first_end_sequence = final_msg.msg_sequence
|
||||
|
||||
req_messages.append(final_msg)
|
||||
|
||||
# 持续请求,只要还有待处理的工具调用就继续处理调用
|
||||
while pending_tool_calls:
|
||||
@@ -43,13 +183,19 @@ class LocalAgentRunner(runner.RequestRunner):
|
||||
|
||||
parameters = json.loads(func.arguments)
|
||||
|
||||
func_ret = await self.ap.tool_mgr.execute_func_call(func.name, parameters)
|
||||
|
||||
msg = provider_message.Message(
|
||||
role='tool',
|
||||
content=json.dumps(func_ret, ensure_ascii=False),
|
||||
tool_call_id=tool_call.id,
|
||||
)
|
||||
func_ret = await self.ap.tool_mgr.execute_func_call(query, func.name, parameters)
|
||||
if is_stream:
|
||||
msg = provider_message.MessageChunk(
|
||||
role='tool',
|
||||
content=json.dumps(func_ret, ensure_ascii=False),
|
||||
tool_call_id=tool_call.id,
|
||||
)
|
||||
else:
|
||||
msg = provider_message.Message(
|
||||
role='tool',
|
||||
content=json.dumps(func_ret, ensure_ascii=False),
|
||||
tool_call_id=tool_call.id,
|
||||
)
|
||||
|
||||
yield msg
|
||||
|
||||
@@ -62,17 +208,81 @@ class LocalAgentRunner(runner.RequestRunner):
|
||||
|
||||
req_messages.append(err_msg)
|
||||
|
||||
# 处理完所有调用,再次请求
|
||||
msg = await use_llm_model.requester.invoke_llm(
|
||||
query,
|
||||
use_llm_model,
|
||||
req_messages,
|
||||
query.use_funcs,
|
||||
extra_args=use_llm_model.model_entity.extra_args,
|
||||
)
|
||||
if is_stream:
|
||||
tool_calls_map = {}
|
||||
msg_idx = 0
|
||||
accumulated_content = '' # 从开始累积的所有内容
|
||||
last_role = 'assistant'
|
||||
msg_sequence = first_end_sequence
|
||||
|
||||
yield msg
|
||||
async for msg in query.use_llm_model.requester.invoke_llm_stream(
|
||||
query,
|
||||
query.use_llm_model,
|
||||
req_messages,
|
||||
query.use_funcs,
|
||||
extra_args=query.use_llm_model.model_entity.extra_args,
|
||||
remove_think=remove_think,
|
||||
):
|
||||
msg_idx += 1
|
||||
|
||||
pending_tool_calls = msg.tool_calls
|
||||
# 记录角色
|
||||
if msg.role:
|
||||
last_role = msg.role
|
||||
|
||||
req_messages.append(msg)
|
||||
# 第一次请求工具调用时的内容
|
||||
if msg_idx == 1:
|
||||
accumulated_content = first_content if first_content is not None else accumulated_content
|
||||
|
||||
# 累积内容
|
||||
if msg.content:
|
||||
accumulated_content += msg.content
|
||||
|
||||
# 处理工具调用
|
||||
if msg.tool_calls:
|
||||
for tool_call in msg.tool_calls:
|
||||
if tool_call.id not in tool_calls_map:
|
||||
tool_calls_map[tool_call.id] = provider_message.ToolCall(
|
||||
id=tool_call.id,
|
||||
type=tool_call.type,
|
||||
function=provider_message.FunctionCall(
|
||||
name=tool_call.function.name if tool_call.function else '', arguments=''
|
||||
),
|
||||
)
|
||||
if tool_call.function and tool_call.function.arguments:
|
||||
# 流式处理中,工具调用参数可能分多个chunk返回,需要追加而不是覆盖
|
||||
tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
|
||||
|
||||
# 每8个chunk或最后一个chunk时,输出所有累积的内容
|
||||
if msg_idx % 8 == 0 or msg.is_final:
|
||||
msg_sequence += 1
|
||||
yield provider_message.MessageChunk(
|
||||
role=last_role,
|
||||
content=accumulated_content, # 输出所有累积内容
|
||||
tool_calls=list(tool_calls_map.values()) if (tool_calls_map and msg.is_final) else None,
|
||||
is_final=msg.is_final,
|
||||
msg_sequence=msg_sequence,
|
||||
)
|
||||
|
||||
final_msg = provider_message.MessageChunk(
|
||||
role=last_role,
|
||||
content=accumulated_content,
|
||||
tool_calls=list(tool_calls_map.values()) if tool_calls_map else None,
|
||||
msg_sequence=msg_sequence,
|
||||
)
|
||||
else:
|
||||
# 处理完所有调用,再次请求
|
||||
msg = await query.use_llm_model.requester.invoke_llm(
|
||||
query,
|
||||
query.use_llm_model,
|
||||
req_messages,
|
||||
query.use_funcs,
|
||||
extra_args=query.use_llm_model.model_entity.extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
|
||||
yield msg
|
||||
final_msg = msg
|
||||
|
||||
pending_tool_calls = final_msg.tool_calls
|
||||
|
||||
req_messages.append(final_msg)
|
||||
|
||||
Reference in New Issue
Block a user