Files
LangBot/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py

703 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import asyncio
import typing
import openai
import openai.types.chat.chat_completion as chat_completion_module
import httpx
from .. import errors, requester
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class OpenAIChatCompletions(requester.ProviderAPIRequester):
"""OpenAI ChatCompletion API 请求器"""
client: openai.AsyncClient
default_config: dict[str, typing.Any] = {
'base_url': 'https://api.openai.com/v1',
'timeout': 120,
}
async def initialize(self):
self.client = openai.AsyncClient(
api_key=self.init_api_key,
base_url=self.requester_cfg['base_url'].replace(' ', ''),
timeout=self.requester_cfg['timeout'],
http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
)
def _mask_api_key(self, api_key: str | None) -> str:
if not api_key:
return ''
if len(api_key) <= 8:
return '****'
return f'{api_key[:4]}...{api_key[-4:]}'
def _infer_model_type(self, model_id: str) -> str:
normalized_model_id = (model_id or '').lower()
embedding_keywords = (
'embedding',
'embed',
'bge-',
'e5-',
'm3e',
'gte-',
'multilingual-e5',
'text-embedding',
)
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
normalized_model_id = (model_id or '').lower()
abilities: set[str] = set()
def _flatten(value: typing.Any) -> list[str]:
if value is None:
return []
if isinstance(value, str):
return [value.lower()]
if isinstance(value, dict):
flattened: list[str] = []
for nested_value in value.values():
flattened.extend(_flatten(nested_value))
return flattened
if isinstance(value, (list, tuple, set)):
flattened: list[str] = []
for nested_value in value:
flattened.extend(_flatten(nested_value))
return flattened
return [str(value).lower()]
capability_tokens = _flatten(item.get('capabilities'))
capability_tokens.extend(_flatten(item.get('modalities')))
capability_tokens.extend(_flatten(item.get('input_modalities')))
capability_tokens.extend(_flatten(item.get('output_modalities')))
capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
capability_tokens.extend(_flatten(item.get('supported_parameters')))
capability_tokens.extend(_flatten(item.get('architecture')))
combined_tokens = capability_tokens + [normalized_model_id]
vision_keywords = (
'vision',
'image',
'file',
'video',
'multimodal',
'vl',
'ocr',
'omni',
)
function_call_keywords = (
'function',
'tool',
'tools',
'tool_choice',
'tool_call',
'tool-use',
'tool_use',
)
if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
abilities.add('vision')
if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
abilities.add('func_call')
return sorted(abilities)
def _normalize_modalities(self, value: typing.Any) -> list[str]:
normalized: list[str] = []
def _collect(item: typing.Any):
if item is None:
return
if isinstance(item, str):
for part in item.replace('->', ',').replace('+', ',').split(','):
token = part.strip().lower()
if token and token not in normalized:
normalized.append(token)
return
if isinstance(item, dict):
for nested in item.values():
_collect(nested)
return
if isinstance(item, (list, tuple, set)):
for nested in item:
_collect(nested)
return
_collect(value)
return normalized
def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
display_name = item.get('name')
if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
display_name = ''
description = item.get('description')
if not isinstance(description, str) or not description.strip():
description = ''
context_length = item.get('context_length')
if context_length is None and isinstance(item.get('top_provider'), dict):
context_length = item['top_provider'].get('context_length')
if not isinstance(context_length, int):
try:
context_length = int(context_length) if context_length is not None else None
except (TypeError, ValueError):
context_length = None
input_modalities = self._normalize_modalities(item.get('input_modalities'))
output_modalities = self._normalize_modalities(item.get('output_modalities'))
if isinstance(item.get('architecture'), dict):
if not input_modalities:
input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
if not output_modalities:
output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
owned_by = item.get('owned_by')
if not isinstance(owned_by, str) or not owned_by.strip():
owned_by = ''
return {
'display_name': display_name or None,
'description': description or None,
'context_length': context_length,
'owned_by': owned_by or None,
'input_modalities': input_modalities,
'output_modalities': output_modalities,
}
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
headers = {}
if api_key:
headers['Authorization'] = f'Bearer {api_key}'
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
response = await client.get(models_url, headers=headers)
response.raise_for_status()
payload = response.json()
models = []
for item in payload.get('data', []):
model_id = item.get('id')
if not model_id:
continue
models.append(
{
'id': model_id,
'name': model_id,
'type': self._infer_model_type(model_id),
'abilities': self._infer_model_abilities(item, model_id),
**self._extract_scan_metadata(item, model_id),
}
)
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
return {
'models': models,
'debug': {
'request': {
'method': 'GET',
'url': models_url,
'headers': {
'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
},
},
'response': payload,
},
}
async def _req(
self,
args: dict,
extra_body: dict = {},
) -> chat_completion_module.ChatCompletion:
return await self.client.chat.completions.create(**args, extra_body=extra_body)
async def _req_stream(
self,
args: dict,
extra_body: dict = {},
):
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
yield chunk
async def _make_msg(
self,
chat_completion: chat_completion_module.ChatCompletion,
remove_think: bool = False,
) -> provider_message.Message:
if not isinstance(chat_completion, chat_completion_module.ChatCompletion):
raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}')
chatcmpl_message = chat_completion.choices[0].message.model_dump()
# 确保 role 字段存在且不为 None
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
chatcmpl_message['role'] = 'assistant'
# 处理思维链
content = chatcmpl_message.get('content', '')
reasoning_content = chatcmpl_message.get('reasoning_content', None)
processed_content, _ = await self._process_thinking_content(
content=content, reasoning_content=reasoning_content, remove_think=remove_think
)
chatcmpl_message['content'] = processed_content
# 移除 reasoning_content 字段,避免传递给 Message
if 'reasoning_content' in chatcmpl_message:
del chatcmpl_message['reasoning_content']
message = provider_message.Message(**chatcmpl_message)
return message
async def _process_thinking_content(
self,
content: str,
reasoning_content: str = None,
remove_think: bool = False,
) -> tuple[str, str]:
"""处理思维链内容
Args:
content: 原始内容
reasoning_content: reasoning_content 字段内容
remove_think: 是否移除思维链
Returns:
(处理后的内容, 提取的思维链内容)
"""
thinking_content = ''
# 1. 从 reasoning_content 提取思维链
if reasoning_content:
thinking_content = reasoning_content
# 2. 从 content 中提取 <think> 标签内容
if content and '<think>' in content and '</think>' in content:
import re
think_pattern = r'<think>(.*?)</think>'
think_matches = re.findall(think_pattern, content, re.DOTALL)
if think_matches:
# 如果已有 reasoning_content则追加
if thinking_content:
thinking_content += '\n' + '\n'.join(think_matches)
else:
thinking_content = '\n'.join(think_matches)
# 移除 content 中的 <think> 标签
content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
# 3. 根据 remove_think 参数决定是否保留思维链
if remove_think:
return content, ''
else:
# 如果有思维链内容,将其以 <think> 格式添加到 content 开头
if thinking_content:
content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
return content, thinking_content
async def _closure_stream(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
args['stream'] = True
# 流式处理状态
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
chunk_idx = 0
thinking_started = False
thinking_ended = False
role = 'assistant' # 默认角色
tool_id = ''
tool_name = ''
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
async for chunk in self._req_stream(args, extra_body=extra_args):
# 解析 chunk 数据
if hasattr(chunk, 'choices') and chunk.choices:
choice = chunk.choices[0]
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
finish_reason = getattr(choice, 'finish_reason', None)
else:
delta = {}
finish_reason = None
# 从第一个 chunk 获取 role后续使用这个 role
if 'role' in delta and delta['role']:
role = delta['role']
# 获取增量内容
delta_content = delta.get('content', '')
reasoning_content = delta.get('reasoning_content', '')
# 处理 reasoning_content
if reasoning_content:
# accumulated_reasoning += reasoning_content
# 如果设置了 remove_think跳过 reasoning_content
if remove_think:
chunk_idx += 1
continue
# 第一次出现 reasoning_content添加 <think> 开始标签
if not thinking_started:
thinking_started = True
delta_content = '<think>\n' + reasoning_content
else:
# 继续输出 reasoning_content
delta_content = reasoning_content
elif thinking_started and not thinking_ended and delta_content:
# reasoning_content 结束normal content 开始,添加 </think> 结束标签
thinking_ended = True
delta_content = '\n</think>\n' + delta_content
# 处理 content 中已有的 <think> 标签(如果需要移除)
# if delta_content and remove_think and '<think>' in delta_content:
# import re
#
# # 移除 <think> 标签及其内容
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
# 处理工具调用增量
# delta_tool_calls = None
if delta.get('tool_calls'):
for tool_call in delta['tool_calls']:
if tool_call['id'] and tool_call['function']['name']:
tool_id = tool_call['id']
tool_name = tool_call['function']['name']
else:
tool_call['id'] = tool_id
tool_call['function']['name'] = tool_name
if tool_call['type'] is None:
tool_call['type'] = 'function'
# 跳过空的第一个 chunk只有 role 没有内容)
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
chunk_idx += 1
continue
# 构建 MessageChunk - 只包含增量内容
chunk_data = {
'role': role,
'content': delta_content if delta_content else None,
'tool_calls': delta.get('tool_calls'),
'is_final': bool(finish_reason),
}
# 移除 None 值
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
yield provider_message.MessageChunk(**chunk_data)
chunk_idx += 1
async def _closure(
self,
query: pipeline_query.Query,
req_messages: list[dict],
use_model: requester.RuntimeLLMModel,
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> tuple[provider_message.Message, dict]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
args['model'] = use_model.model_entity.name
if use_funcs:
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
if tools:
args['tools'] = tools
# 设置此次请求中的messages
messages = req_messages.copy()
# 检查vision
for msg in messages:
if 'content' in msg and isinstance(msg['content'], list):
for me in msg['content']:
if me['type'] == 'image_base64':
me['image_url'] = {'url': me['image_base64']}
me['type'] = 'image_url'
del me['image_base64']
args['messages'] = messages
# 发送请求
resp = await self._req(args, extra_body=extra_args)
# 处理请求结果
message = await self._make_msg(resp, remove_think)
# Extract token usage from response
usage_info = {}
if hasattr(resp, 'usage') and resp.usage:
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
usage_info['total_tokens'] = resp.usage.total_tokens or 0
return message, usage_info
async def invoke_llm(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> tuple[provider_message.Message, dict]:
"""Invoke LLM and return message with usage info"""
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
msg_dict = m.dict(exclude_none=True)
content = msg_dict.get('content')
if isinstance(content, list):
# 检查 content 列表中是否每个部分都是文本
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
# 将所有文本部分合并为一个字符串
msg_dict['content'] = '\n'.join(part['text'] for part in content)
req_messages.append(msg_dict)
try:
msg, usage_info = await self._closure(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
)
return msg, usage_info
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
if 'context_length_exceeded' in str(e):
raise errors.RequesterError(f'上文过长,请重置会话: {error_message}')
else:
raise errors.RequesterError(f'请求参数错误: {error_message}')
except openai.AuthenticationError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'无效的 api-key: {error_message}')
except openai.NotFoundError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'请求路径错误: {error_message}')
except openai.RateLimitError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
except openai.APIConnectionError as e:
error_message = f'连接错误: {str(e)}'
raise errors.RequesterError(error_message)
except openai.APIError as e:
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'请求错误: {error_message}')
async def invoke_embedding(
self,
model: requester.RuntimeEmbeddingModel,
input_text: list[str],
extra_args: dict[str, typing.Any] = {},
) -> tuple[list[list[float]], dict]:
"""调用 Embedding API, returns (embeddings, usage_info)"""
self.client.api_key = model.provider.token_mgr.get_token()
args = {
'model': model.model_entity.name,
'input': input_text,
}
if model.model_entity.extra_args:
args.update(model.model_entity.extra_args)
args.update(extra_args)
try:
resp = await self.client.embeddings.create(**args)
# Extract usage info
usage_info = {}
if hasattr(resp, 'usage') and resp.usage:
usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
usage_info['total_tokens'] = resp.usage.total_tokens or 0
return [d.embedding for d in resp.data], usage_info
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
raise errors.RequesterError(f'请求参数错误: {e.message}')
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: requester.RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
msg_dict = m.dict(exclude_none=True)
content = msg_dict.get('content')
if isinstance(content, list):
# 检查 content 列表中是否每个部分都是文本
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
# 将所有文本部分合并为一个字符串
msg_dict['content'] = '\n'.join(part['text'] for part in content)
req_messages.append(msg_dict)
try:
async for item in self._closure_stream(
query=query,
req_messages=req_messages,
use_model=model,
use_funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
):
yield item
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
if 'context_length_exceeded' in e.message:
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
else:
raise errors.RequesterError(f'请求参数错误: {e.message}')
except openai.AuthenticationError as e:
raise errors.RequesterError(f'无效的 api-key: {e.message}')
except openai.NotFoundError as e:
raise errors.RequesterError(f'请求路径错误: {e.message}')
except openai.RateLimitError as e:
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
except openai.APIError as e:
raise errors.RequesterError(f'请求错误: {e.message}')
async def invoke_rerank(
self,
model: requester.RuntimeRerankModel,
query: str,
documents: typing.List[str],
extra_args: dict[str, typing.Any] = {},
) -> typing.List[dict]:
"""Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible)
Supports extra_args from model.extra_args:
- rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks")
- rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank")
- Any other fields are merged into the request payload.
"""
api_key = model.provider.token_mgr.get_token()
base_url = self.requester_cfg.get('base_url', '').rstrip('/')
timeout = self.requester_cfg.get('timeout', 120)
merged_args = {}
if model.model_entity.extra_args:
merged_args.update(model.model_entity.extra_args)
if extra_args:
merged_args.update(extra_args)
rerank_url = merged_args.pop('rerank_url', None)
rerank_path = merged_args.pop('rerank_path', 'rerank')
if not rerank_url:
rerank_url = f'{base_url}/{rerank_path}'
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {api_key}',
}
payload = {
'model': model.model_entity.name,
'query': query,
'documents': documents[:64],
'top_n': min(len(documents), 64),
}
if merged_args:
payload.update(merged_args)
try:
async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
resp = await client.post(rerank_url, headers=headers, json=payload)
resp.raise_for_status()
data = resp.json()
results = self._parse_rerank_response(data)
if results:
scores = [r.get('relevance_score', 0.0) for r in results]
min_score = min(scores)
max_score = max(scores)
if max_score - min_score > 1e-6:
for r in results:
r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
return results
except httpx.HTTPStatusError as e:
raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}')
except httpx.TimeoutException:
raise errors.RequesterError('Rerank request timed out')
except Exception as e:
raise errors.RequesterError(f'Rerank request error: {str(e)}')
@staticmethod
def _parse_rerank_response(data: dict) -> typing.List[dict]:
"""Parse rerank response from various providers.
Handles:
- Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]}
- Voyage AI: {"data": [{"index", "relevance_score"}]}
- DashScope: {"output": {"results": [{"index", "relevance_score"}]}}
"""
if 'results' in data:
return data['results']
if 'data' in data:
return data['data']
if 'output' in data and isinstance(data['output'], dict):
return data['output'].get('results', [])
return []