Compare commits
9 Commits
master
...
feat/litel
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
573e1fe36e | ||
|
|
7fb3cfa638 | ||
|
|
39673444d2 | ||
|
|
d450226701 | ||
|
|
926e0c0854 | ||
|
|
89bcf82518 | ||
|
|
7ea1ce2fd3 | ||
|
|
31ad85517b | ||
|
|
a62fce1cf7 |
@@ -79,6 +79,7 @@ dependencies = [
|
|||||||
"pymilvus>=2.6.4",
|
"pymilvus>=2.6.4",
|
||||||
"pgvector>=0.4.1",
|
"pgvector>=0.4.1",
|
||||||
"botocore>=1.42.39",
|
"botocore>=1.42.39",
|
||||||
|
"litellm>=1.0.0",
|
||||||
]
|
]
|
||||||
keywords = [
|
keywords = [
|
||||||
"bot",
|
"bot",
|
||||||
|
|||||||
@@ -46,6 +46,30 @@ class MonitoringRouterGroup(group.RouterGroup):
|
|||||||
|
|
||||||
return self.success(data=metrics)
|
return self.success(data=metrics)
|
||||||
|
|
||||||
|
@self.route('/token-statistics', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
|
||||||
|
async def get_token_statistics() -> str:
|
||||||
|
"""Get detailed token usage statistics (summary, per-model, timeseries)."""
|
||||||
|
bot_ids = quart.request.args.getlist('botId')
|
||||||
|
pipeline_ids = quart.request.args.getlist('pipelineId')
|
||||||
|
start_time_str = quart.request.args.get('startTime')
|
||||||
|
end_time_str = quart.request.args.get('endTime')
|
||||||
|
bucket = quart.request.args.get('bucket', 'hour')
|
||||||
|
if bucket not in ('hour', 'day'):
|
||||||
|
bucket = 'hour'
|
||||||
|
|
||||||
|
start_time = parse_iso_datetime(start_time_str)
|
||||||
|
end_time = parse_iso_datetime(end_time_str)
|
||||||
|
|
||||||
|
stats = await self.ap.monitoring_service.get_token_statistics(
|
||||||
|
bot_ids=bot_ids if bot_ids else None,
|
||||||
|
pipeline_ids=pipeline_ids if pipeline_ids else None,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
bucket=bucket,
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.success(data=stats)
|
||||||
|
|
||||||
@self.route('/messages', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
|
@self.route('/messages', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
|
||||||
async def get_messages() -> str:
|
async def get_messages() -> str:
|
||||||
"""Get message logs"""
|
"""Get message logs"""
|
||||||
|
|||||||
@@ -472,6 +472,179 @@ class MonitoringService:
|
|||||||
'active_sessions': active_sessions,
|
'active_sessions': active_sessions,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def get_token_statistics(
|
||||||
|
self,
|
||||||
|
bot_ids: list[str] | None = None,
|
||||||
|
pipeline_ids: list[str] | None = None,
|
||||||
|
start_time: datetime.datetime | None = None,
|
||||||
|
end_time: datetime.datetime | None = None,
|
||||||
|
bucket: str = 'hour',
|
||||||
|
) -> dict:
|
||||||
|
"""Get detailed token usage statistics for production observability.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- summary: aggregate token counters and call/latency stats over the window
|
||||||
|
- by_model: per-model token + call breakdown (sorted by total tokens desc)
|
||||||
|
- timeseries: token usage bucketed by `bucket` ('hour' or 'day')
|
||||||
|
|
||||||
|
Only successful LLM calls are counted toward token totals; error calls are
|
||||||
|
reported separately so a spike in failures is visible without polluting
|
||||||
|
token accounting.
|
||||||
|
"""
|
||||||
|
LLMCall = persistence_monitoring.MonitoringLLMCall
|
||||||
|
|
||||||
|
conditions = []
|
||||||
|
if bot_ids:
|
||||||
|
conditions.append(LLMCall.bot_id.in_(bot_ids))
|
||||||
|
if pipeline_ids:
|
||||||
|
conditions.append(LLMCall.pipeline_id.in_(pipeline_ids))
|
||||||
|
if start_time:
|
||||||
|
conditions.append(LLMCall.timestamp >= start_time)
|
||||||
|
if end_time:
|
||||||
|
conditions.append(LLMCall.timestamp <= end_time)
|
||||||
|
|
||||||
|
def _apply(query):
|
||||||
|
if conditions:
|
||||||
|
query = query.where(sqlalchemy.and_(*conditions))
|
||||||
|
return query
|
||||||
|
|
||||||
|
# ---- Summary aggregates ----
|
||||||
|
summary_query = _apply(
|
||||||
|
sqlalchemy.select(
|
||||||
|
sqlalchemy.func.count(LLMCall.id),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.input_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.output_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.total_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.duration), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.cost), 0.0),
|
||||||
|
sqlalchemy.func.sum(sqlalchemy.case((LLMCall.status == 'success', 1), else_=0)),
|
||||||
|
sqlalchemy.func.sum(sqlalchemy.case((LLMCall.status == 'error', 1), else_=0)),
|
||||||
|
# Count of successful calls that nonetheless recorded zero tokens —
|
||||||
|
# a data-quality signal that usage reporting may be broken upstream.
|
||||||
|
sqlalchemy.func.sum(
|
||||||
|
sqlalchemy.case(
|
||||||
|
(sqlalchemy.and_(LLMCall.status == 'success', LLMCall.total_tokens == 0), 1),
|
||||||
|
else_=0,
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
summary_result = await self.ap.persistence_mgr.execute_async(summary_query)
|
||||||
|
row = summary_result.first()
|
||||||
|
(
|
||||||
|
total_calls,
|
||||||
|
total_input_tokens,
|
||||||
|
total_output_tokens,
|
||||||
|
total_tokens,
|
||||||
|
total_duration,
|
||||||
|
total_cost,
|
||||||
|
success_calls,
|
||||||
|
error_calls,
|
||||||
|
zero_token_success_calls,
|
||||||
|
) = row if row else (0, 0, 0, 0, 0, 0.0, 0, 0, 0)
|
||||||
|
|
||||||
|
total_calls = total_calls or 0
|
||||||
|
success_calls = success_calls or 0
|
||||||
|
error_calls = error_calls or 0
|
||||||
|
zero_token_success_calls = zero_token_success_calls or 0
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
'total_calls': total_calls,
|
||||||
|
'success_calls': success_calls,
|
||||||
|
'error_calls': error_calls,
|
||||||
|
'total_input_tokens': int(total_input_tokens or 0),
|
||||||
|
'total_output_tokens': int(total_output_tokens or 0),
|
||||||
|
'total_tokens': int(total_tokens or 0),
|
||||||
|
'total_cost': round(float(total_cost or 0.0), 6),
|
||||||
|
'avg_tokens_per_call': int((total_tokens or 0) / total_calls) if total_calls > 0 else 0,
|
||||||
|
'avg_duration_ms': int((total_duration or 0) / total_calls) if total_calls > 0 else 0,
|
||||||
|
'avg_tokens_per_second': round((total_output_tokens or 0) / (total_duration / 1000), 2)
|
||||||
|
if total_duration and total_duration > 0
|
||||||
|
else 0,
|
||||||
|
'zero_token_success_calls': zero_token_success_calls,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---- Per-model breakdown ----
|
||||||
|
by_model_query = _apply(
|
||||||
|
sqlalchemy.select(
|
||||||
|
LLMCall.model_name,
|
||||||
|
sqlalchemy.func.count(LLMCall.id),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.input_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.output_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.total_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.duration), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.cost), 0.0),
|
||||||
|
sqlalchemy.func.sum(sqlalchemy.case((LLMCall.status == 'error', 1), else_=0)),
|
||||||
|
).group_by(LLMCall.model_name)
|
||||||
|
)
|
||||||
|
by_model_result = await self.ap.persistence_mgr.execute_async(by_model_query)
|
||||||
|
by_model = []
|
||||||
|
for mrow in by_model_result.all():
|
||||||
|
(
|
||||||
|
model_name,
|
||||||
|
m_calls,
|
||||||
|
m_in,
|
||||||
|
m_out,
|
||||||
|
m_total,
|
||||||
|
m_duration,
|
||||||
|
m_cost,
|
||||||
|
m_errors,
|
||||||
|
) = mrow
|
||||||
|
m_calls = m_calls or 0
|
||||||
|
by_model.append(
|
||||||
|
{
|
||||||
|
'model_name': model_name,
|
||||||
|
'calls': m_calls,
|
||||||
|
'error_calls': m_errors or 0,
|
||||||
|
'input_tokens': int(m_in or 0),
|
||||||
|
'output_tokens': int(m_out or 0),
|
||||||
|
'total_tokens': int(m_total or 0),
|
||||||
|
'cost': round(float(m_cost or 0.0), 6),
|
||||||
|
'avg_tokens_per_call': int((m_total or 0) / m_calls) if m_calls > 0 else 0,
|
||||||
|
'avg_duration_ms': int((m_duration or 0) / m_calls) if m_calls > 0 else 0,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
by_model.sort(key=lambda x: x['total_tokens'], reverse=True)
|
||||||
|
|
||||||
|
# ---- Time-bucketed series ----
|
||||||
|
# Use a DB-agnostic bucketing approach: fetch (timestamp, tokens) rows and
|
||||||
|
# aggregate in Python. The window is bounded by the time filter, so this is
|
||||||
|
# cheap for typical dashboard ranges (hours/days).
|
||||||
|
series_query = _apply(
|
||||||
|
sqlalchemy.select(
|
||||||
|
LLMCall.timestamp,
|
||||||
|
LLMCall.input_tokens,
|
||||||
|
LLMCall.output_tokens,
|
||||||
|
LLMCall.total_tokens,
|
||||||
|
).order_by(LLMCall.timestamp.asc())
|
||||||
|
)
|
||||||
|
series_result = await self.ap.persistence_mgr.execute_async(series_query)
|
||||||
|
|
||||||
|
bucket_fmt = '%Y-%m-%d %H:00' if bucket == 'hour' else '%Y-%m-%d'
|
||||||
|
buckets: dict[str, dict] = {}
|
||||||
|
for srow in series_result.all():
|
||||||
|
ts, s_in, s_out, s_total = srow
|
||||||
|
if ts is None:
|
||||||
|
continue
|
||||||
|
key = ts.strftime(bucket_fmt)
|
||||||
|
b = buckets.setdefault(
|
||||||
|
key,
|
||||||
|
{'bucket': key, 'input_tokens': 0, 'output_tokens': 0, 'total_tokens': 0, 'calls': 0},
|
||||||
|
)
|
||||||
|
b['input_tokens'] += int(s_in or 0)
|
||||||
|
b['output_tokens'] += int(s_out or 0)
|
||||||
|
b['total_tokens'] += int(s_total or 0)
|
||||||
|
b['calls'] += 1
|
||||||
|
|
||||||
|
timeseries = [buckets[k] for k in sorted(buckets.keys())]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'summary': summary,
|
||||||
|
'by_model': by_model,
|
||||||
|
'timeseries': timeseries,
|
||||||
|
'bucket': bucket,
|
||||||
|
}
|
||||||
|
|
||||||
async def get_messages(
|
async def get_messages(
|
||||||
self,
|
self,
|
||||||
bot_ids: list[str] | None = None,
|
bot_ids: list[str] | None = None,
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ required_deps = {
|
|||||||
'telegramify_markdown': 'telegramify-markdown',
|
'telegramify_markdown': 'telegramify-markdown',
|
||||||
'slack_sdk': 'slack_sdk',
|
'slack_sdk': 'slack_sdk',
|
||||||
'asyncpg': 'asyncpg',
|
'asyncpg': 'asyncpg',
|
||||||
|
'litellm': 'litellm',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ class PreProcessor(stage.PipelineStage):
|
|||||||
if llm_model:
|
if llm_model:
|
||||||
query.use_llm_model_uuid = llm_model.model_entity.uuid
|
query.use_llm_model_uuid = llm_model.model_entity.uuid
|
||||||
|
|
||||||
if llm_model.model_entity.abilities.__contains__('func_call'):
|
if 'func_call' in (llm_model.model_entity.abilities or []):
|
||||||
# Get bound plugins and MCP servers for filtering tools
|
# Get bound plugins and MCP servers for filtering tools
|
||||||
bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
|
bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
|
||||||
bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
|
bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
|
||||||
@@ -159,11 +159,7 @@ class PreProcessor(stage.PipelineStage):
|
|||||||
|
|
||||||
# Check if this model supports vision, if not, remove all images
|
# Check if this model supports vision, if not, remove all images
|
||||||
# TODO this checking should be performed in runner, and in this stage, the image should be reserved
|
# TODO this checking should be performed in runner, and in this stage, the image should be reserved
|
||||||
if (
|
if selected_runner == 'local-agent' and llm_model and 'vision' not in (llm_model.model_entity.abilities or []):
|
||||||
selected_runner == 'local-agent'
|
|
||||||
and llm_model
|
|
||||||
and not llm_model.model_entity.abilities.__contains__('vision')
|
|
||||||
):
|
|
||||||
for msg in query.messages:
|
for msg in query.messages:
|
||||||
if isinstance(msg.content, list):
|
if isinstance(msg.content, list):
|
||||||
for me in msg.content:
|
for me in msg.content:
|
||||||
@@ -181,7 +177,7 @@ class PreProcessor(stage.PipelineStage):
|
|||||||
plain_text += me.text
|
plain_text += me.text
|
||||||
elif isinstance(me, platform_message.Image):
|
elif isinstance(me, platform_message.Image):
|
||||||
if selected_runner != 'local-agent' or (
|
if selected_runner != 'local-agent' or (
|
||||||
llm_model and llm_model.model_entity.abilities.__contains__('vision')
|
llm_model and 'vision' in (llm_model.model_entity.abilities or [])
|
||||||
):
|
):
|
||||||
if me.base64 is not None:
|
if me.base64 is not None:
|
||||||
content_list.append(provider_message.ContentElement.from_image_base64(me.base64))
|
content_list.append(provider_message.ContentElement.from_image_base64(me.base64))
|
||||||
@@ -202,7 +198,7 @@ class PreProcessor(stage.PipelineStage):
|
|||||||
content_list.append(provider_message.ContentElement.from_text(msg.text))
|
content_list.append(provider_message.ContentElement.from_text(msg.text))
|
||||||
elif isinstance(msg, platform_message.Image):
|
elif isinstance(msg, platform_message.Image):
|
||||||
if selected_runner != 'local-agent' or (
|
if selected_runner != 'local-agent' or (
|
||||||
llm_model and llm_model.model_entity.abilities.__contains__('vision')
|
llm_model and 'vision' in (llm_model.model_entity.abilities or [])
|
||||||
):
|
):
|
||||||
if msg.base64 is not None:
|
if msg.base64 is not None:
|
||||||
content_list.append(provider_message.ContentElement.from_image_base64(msg.base64))
|
content_list.append(provider_message.ContentElement.from_image_base64(msg.base64))
|
||||||
|
|||||||
@@ -37,11 +37,41 @@ class ModelManager:
|
|||||||
self.requester_components = []
|
self.requester_components = []
|
||||||
self.requester_dict = {}
|
self.requester_dict = {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_litellm_provider_from_manifest(component: engine.Component | None) -> str | None:
|
||||||
|
if component is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
spec = getattr(component, 'spec', None) or {}
|
||||||
|
litellm_provider = None
|
||||||
|
|
||||||
|
if isinstance(spec, dict):
|
||||||
|
litellm_provider = spec.get('litellm_provider')
|
||||||
|
else:
|
||||||
|
getter = getattr(spec, 'get', None)
|
||||||
|
if callable(getter):
|
||||||
|
try:
|
||||||
|
litellm_provider = getter('litellm_provider')
|
||||||
|
except Exception:
|
||||||
|
litellm_provider = None
|
||||||
|
|
||||||
|
if isinstance(litellm_provider, str) and litellm_provider:
|
||||||
|
return litellm_provider
|
||||||
|
return None
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
self.requester_components = self.ap.discover.get_components_by_kind('LLMAPIRequester')
|
self.requester_components = self.ap.discover.get_components_by_kind('LLMAPIRequester')
|
||||||
|
|
||||||
requester_dict: dict[str, type[requester.ProviderAPIRequester]] = {}
|
requester_dict: dict[str, type[requester.ProviderAPIRequester]] = {}
|
||||||
for component in self.requester_components:
|
for component in self.requester_components:
|
||||||
|
# Skip components that use litellm_provider (they will use litellmchat.py instead)
|
||||||
|
litellm_provider = self._get_litellm_provider_from_manifest(component)
|
||||||
|
if litellm_provider:
|
||||||
|
self.ap.logger.debug(
|
||||||
|
f'Skipping Python class loading for {component.metadata.name} '
|
||||||
|
f'(uses litellm_provider={litellm_provider})'
|
||||||
|
)
|
||||||
|
continue
|
||||||
requester_dict[component.metadata.name] = component.get_python_component_class()
|
requester_dict[component.metadata.name] = component.get_python_component_class()
|
||||||
|
|
||||||
self.requester_dict = requester_dict
|
self.requester_dict = requester_dict
|
||||||
@@ -294,13 +324,37 @@ class ModelManager:
|
|||||||
else:
|
else:
|
||||||
provider_entity = provider_info
|
provider_entity = provider_info
|
||||||
|
|
||||||
|
# Get requester manifest to check for litellm_provider
|
||||||
|
requester_manifest = self.get_available_requester_manifest_by_name(provider_entity.requester)
|
||||||
|
litellm_provider = self._get_litellm_provider_from_manifest(requester_manifest)
|
||||||
|
|
||||||
|
# Build config from base_url
|
||||||
|
config = {'base_url': provider_entity.base_url}
|
||||||
|
|
||||||
|
# Check if requester manifest specifies litellm_provider
|
||||||
|
if litellm_provider:
|
||||||
|
from .requesters import litellmchat
|
||||||
|
|
||||||
|
# Use unified LiteLLMRequester with provider prefix
|
||||||
|
# Map litellm_provider (YAML spec) to custom_llm_provider (config)
|
||||||
|
config['custom_llm_provider'] = litellm_provider
|
||||||
|
requester_inst = litellmchat.LiteLLMRequester(
|
||||||
|
ap=self.ap,
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
|
self.ap.logger.debug(
|
||||||
|
f'Using LiteLLMRequester for {provider_entity.requester} '
|
||||||
|
f'with custom_llm_provider={config["custom_llm_provider"]}'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Use original requester class (for backward compatibility)
|
||||||
if provider_entity.requester not in self.requester_dict:
|
if provider_entity.requester not in self.requester_dict:
|
||||||
raise provider_errors.RequesterNotFoundError(provider_entity.requester)
|
raise provider_errors.RequesterNotFoundError(provider_entity.requester)
|
||||||
|
|
||||||
requester_inst = self.requester_dict[provider_entity.requester](
|
requester_inst = self.requester_dict[provider_entity.requester](
|
||||||
ap=self.ap,
|
ap=self.ap,
|
||||||
config={'base_url': provider_entity.base_url},
|
config=config,
|
||||||
)
|
)
|
||||||
|
|
||||||
await requester_inst.initialize()
|
await requester_inst.initialize()
|
||||||
|
|
||||||
token_mgr = token.TokenManager(name=provider_entity.uuid, tokens=provider_entity.api_keys or [])
|
token_mgr = token.TokenManager(name=provider_entity.uuid, tokens=provider_entity.api_keys or [])
|
||||||
|
|||||||
@@ -67,8 +67,8 @@ class RuntimeProvider:
|
|||||||
if isinstance(result, tuple):
|
if isinstance(result, tuple):
|
||||||
msg, usage_info = result
|
msg, usage_info = result
|
||||||
if usage_info:
|
if usage_info:
|
||||||
input_tokens = usage_info.get('input_tokens', 0)
|
input_tokens = usage_info.get('prompt_tokens', 0)
|
||||||
output_tokens = usage_info.get('output_tokens', 0)
|
output_tokens = usage_info.get('completion_tokens', 0)
|
||||||
return msg
|
return msg
|
||||||
else:
|
else:
|
||||||
return result
|
return result
|
||||||
@@ -128,7 +128,6 @@ class RuntimeProvider:
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
status = 'success'
|
status = 'success'
|
||||||
error_message = None
|
error_message = None
|
||||||
# Note: Stream doesn't easily provide token counts, set to 0
|
|
||||||
input_tokens = 0
|
input_tokens = 0
|
||||||
output_tokens = 0
|
output_tokens = 0
|
||||||
|
|
||||||
@@ -143,6 +142,15 @@ class RuntimeProvider:
|
|||||||
remove_think=remove_think,
|
remove_think=remove_think,
|
||||||
):
|
):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
# Extract usage from stream if available (stored by LiteLLM requester)
|
||||||
|
if query:
|
||||||
|
if query.variables is None:
|
||||||
|
query.variables = {}
|
||||||
|
if '_stream_usage' in query.variables:
|
||||||
|
usage_info = query.variables['_stream_usage']
|
||||||
|
input_tokens = usage_info.get('prompt_tokens', 0)
|
||||||
|
output_tokens = usage_info.get('completion_tokens', 0)
|
||||||
|
del query.variables['_stream_usage']
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
status = 'error'
|
status = 'error'
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class AI302ChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""302.AI ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.302.ai/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 302.AI
|
zh_Hans: 302.AI
|
||||||
icon: 302ai.png
|
icon: 302ai.png
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,370 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import json
|
|
||||||
import platform
|
|
||||||
import socket
|
|
||||||
import anthropic
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from .. import errors, requester
|
|
||||||
|
|
||||||
from ....utils import image
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
|
|
||||||
|
|
||||||
class AnthropicMessages(requester.ProviderAPIRequester):
|
|
||||||
"""Anthropic Messages API 请求器"""
|
|
||||||
|
|
||||||
client: anthropic.AsyncAnthropic
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.anthropic.com',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def initialize(self):
|
|
||||||
# 兼容 Windows 缺失 TCP_KEEPINTVL 和 TCP_KEEPCNT 的问题
|
|
||||||
if platform.system() == 'Windows':
|
|
||||||
if not hasattr(socket, 'TCP_KEEPINTVL'):
|
|
||||||
socket.TCP_KEEPINTVL = 0
|
|
||||||
if not hasattr(socket, 'TCP_KEEPCNT'):
|
|
||||||
socket.TCP_KEEPCNT = 0
|
|
||||||
httpx_client = anthropic._base_client.AsyncHttpxClientWrapper(
|
|
||||||
base_url=self.requester_cfg['base_url'],
|
|
||||||
# cast to a valid type because mypy doesn't understand our type narrowing
|
|
||||||
timeout=typing.cast(httpx.Timeout, self.requester_cfg['timeout']),
|
|
||||||
limits=anthropic._constants.DEFAULT_CONNECTION_LIMITS,
|
|
||||||
follow_redirects=True,
|
|
||||||
trust_env=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.client = anthropic.AsyncAnthropic(
|
|
||||||
api_key='',
|
|
||||||
http_client=httpx_client,
|
|
||||||
base_url=self.requester_cfg['base_url'],
|
|
||||||
)
|
|
||||||
|
|
||||||
async def invoke_llm(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
model: requester.RuntimeLLMModel,
|
|
||||||
messages: typing.List[provider_message.Message],
|
|
||||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message:
|
|
||||||
self.client.api_key = model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = extra_args.copy()
|
|
||||||
args['model'] = model.model_entity.name
|
|
||||||
|
|
||||||
# 处理消息
|
|
||||||
|
|
||||||
# system
|
|
||||||
system_role_message = None
|
|
||||||
|
|
||||||
for i, m in enumerate(messages):
|
|
||||||
if m.role == 'system':
|
|
||||||
system_role_message = m
|
|
||||||
|
|
||||||
break
|
|
||||||
|
|
||||||
if system_role_message:
|
|
||||||
messages.pop(i)
|
|
||||||
|
|
||||||
if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
|
|
||||||
args['system'] = system_role_message.content
|
|
||||||
|
|
||||||
req_messages = []
|
|
||||||
|
|
||||||
for m in messages:
|
|
||||||
if m.role == 'tool':
|
|
||||||
tool_call_id = m.tool_call_id
|
|
||||||
|
|
||||||
req_messages.append(
|
|
||||||
{
|
|
||||||
'role': 'user',
|
|
||||||
'content': [
|
|
||||||
{
|
|
||||||
'type': 'tool_result',
|
|
||||||
'tool_use_id': tool_call_id,
|
|
||||||
'is_error': False,
|
|
||||||
'content': [{'type': 'text', 'text': m.content}],
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
continue
|
|
||||||
|
|
||||||
msg_dict = m.dict(exclude_none=True)
|
|
||||||
|
|
||||||
if isinstance(m.content, str) and m.content.strip() != '':
|
|
||||||
msg_dict['content'] = [{'type': 'text', 'text': m.content}]
|
|
||||||
elif isinstance(m.content, list):
|
|
||||||
for i, ce in enumerate(m.content):
|
|
||||||
if ce.type == 'image_base64':
|
|
||||||
image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
|
|
||||||
|
|
||||||
alter_image_ele = {
|
|
||||||
'type': 'image',
|
|
||||||
'source': {
|
|
||||||
'type': 'base64',
|
|
||||||
'media_type': f'image/{image_format}',
|
|
||||||
'data': image_b64,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
msg_dict['content'][i] = alter_image_ele
|
|
||||||
|
|
||||||
if m.tool_calls:
|
|
||||||
for tool_call in m.tool_calls:
|
|
||||||
msg_dict['content'].append(
|
|
||||||
{
|
|
||||||
'type': 'tool_use',
|
|
||||||
'id': tool_call.id,
|
|
||||||
'name': tool_call.function.name,
|
|
||||||
'input': json.loads(tool_call.function.arguments),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
del msg_dict['tool_calls']
|
|
||||||
|
|
||||||
req_messages.append(msg_dict)
|
|
||||||
|
|
||||||
args['messages'] = req_messages
|
|
||||||
|
|
||||||
if 'thinking' in args:
|
|
||||||
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
|
|
||||||
|
|
||||||
if funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
try:
|
|
||||||
resp = await self.client.messages.create(**args)
|
|
||||||
|
|
||||||
args = {
|
|
||||||
'content': '',
|
|
||||||
'role': resp.role,
|
|
||||||
}
|
|
||||||
assert type(resp) is anthropic.types.message.Message
|
|
||||||
|
|
||||||
for block in resp.content:
|
|
||||||
if not remove_think and block.type == 'thinking':
|
|
||||||
args['content'] = '<think>\n' + block.thinking + '\n</think>\n' + args['content']
|
|
||||||
elif block.type == 'text':
|
|
||||||
args['content'] += block.text
|
|
||||||
elif block.type == 'tool_use':
|
|
||||||
assert type(block) is anthropic.types.tool_use_block.ToolUseBlock
|
|
||||||
tool_call = provider_message.ToolCall(
|
|
||||||
id=block.id,
|
|
||||||
type='function',
|
|
||||||
function=provider_message.FunctionCall(name=block.name, arguments=json.dumps(block.input)),
|
|
||||||
)
|
|
||||||
if 'tool_calls' not in args:
|
|
||||||
args['tool_calls'] = []
|
|
||||||
args['tool_calls'].append(tool_call)
|
|
||||||
|
|
||||||
return provider_message.Message(**args)
|
|
||||||
except anthropic.AuthenticationError as e:
|
|
||||||
raise errors.RequesterError(f'api-key 无效: {e.message}')
|
|
||||||
except anthropic.BadRequestError as e:
|
|
||||||
raise errors.RequesterError(str(e.message))
|
|
||||||
except anthropic.NotFoundError as e:
|
|
||||||
if 'model: ' in str(e):
|
|
||||||
raise errors.RequesterError(f'模型无效: {e.message}')
|
|
||||||
else:
|
|
||||||
raise errors.RequesterError(f'请求地址无效: {e.message}')
|
|
||||||
|
|
||||||
async def invoke_llm_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
model: requester.RuntimeLLMModel,
|
|
||||||
messages: typing.List[provider_message.Message],
|
|
||||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message:
|
|
||||||
self.client.api_key = model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = extra_args.copy()
|
|
||||||
args['model'] = model.model_entity.name
|
|
||||||
args['stream'] = True
|
|
||||||
|
|
||||||
# 处理消息
|
|
||||||
|
|
||||||
# system
|
|
||||||
system_role_message = None
|
|
||||||
|
|
||||||
for i, m in enumerate(messages):
|
|
||||||
if m.role == 'system':
|
|
||||||
system_role_message = m
|
|
||||||
|
|
||||||
break
|
|
||||||
|
|
||||||
if system_role_message:
|
|
||||||
messages.pop(i)
|
|
||||||
|
|
||||||
if isinstance(system_role_message, provider_message.Message) and isinstance(system_role_message.content, str):
|
|
||||||
args['system'] = system_role_message.content
|
|
||||||
|
|
||||||
req_messages = []
|
|
||||||
|
|
||||||
for m in messages:
|
|
||||||
if m.role == 'tool':
|
|
||||||
tool_call_id = m.tool_call_id
|
|
||||||
|
|
||||||
req_messages.append(
|
|
||||||
{
|
|
||||||
'role': 'user',
|
|
||||||
'content': [
|
|
||||||
{
|
|
||||||
'type': 'tool_result',
|
|
||||||
'tool_use_id': tool_call_id,
|
|
||||||
'is_error': False, # 暂时直接写false
|
|
||||||
'content': [
|
|
||||||
{'type': 'text', 'text': m.content}
|
|
||||||
], # 这里要是list包裹,应该是多个返回的情况?type类型好像也可以填其他的,暂时只写text
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
continue
|
|
||||||
|
|
||||||
msg_dict = m.dict(exclude_none=True)
|
|
||||||
|
|
||||||
if isinstance(m.content, str) and m.content.strip() != '':
|
|
||||||
msg_dict['content'] = [{'type': 'text', 'text': m.content}]
|
|
||||||
elif isinstance(m.content, list):
|
|
||||||
for i, ce in enumerate(m.content):
|
|
||||||
if ce.type == 'image_base64':
|
|
||||||
image_b64, image_format = await image.extract_b64_and_format(ce.image_base64)
|
|
||||||
|
|
||||||
alter_image_ele = {
|
|
||||||
'type': 'image',
|
|
||||||
'source': {
|
|
||||||
'type': 'base64',
|
|
||||||
'media_type': f'image/{image_format}',
|
|
||||||
'data': image_b64,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
msg_dict['content'][i] = alter_image_ele
|
|
||||||
if isinstance(msg_dict['content'], str) and msg_dict['content'] == '':
|
|
||||||
msg_dict['content'] = [] # 这里不知道为什么会莫名有个空导致content为字符
|
|
||||||
if m.tool_calls:
|
|
||||||
for tool_call in m.tool_calls:
|
|
||||||
msg_dict['content'].append(
|
|
||||||
{
|
|
||||||
'type': 'tool_use',
|
|
||||||
'id': tool_call.id,
|
|
||||||
'name': tool_call.function.name,
|
|
||||||
'input': json.loads(tool_call.function.arguments),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
del msg_dict['tool_calls']
|
|
||||||
|
|
||||||
req_messages.append(msg_dict)
|
|
||||||
if 'thinking' in args:
|
|
||||||
args['thinking'] = {'type': 'enabled', 'budget_tokens': 10000}
|
|
||||||
|
|
||||||
args['messages'] = req_messages
|
|
||||||
|
|
||||||
if funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_anthropic(funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
try:
|
|
||||||
role = 'assistant' # 默认角色
|
|
||||||
# chunk_idx = 0
|
|
||||||
think_started = False
|
|
||||||
think_ended = False
|
|
||||||
finish_reason = False
|
|
||||||
tool_name = ''
|
|
||||||
tool_id = ''
|
|
||||||
async for chunk in await self.client.messages.create(**args):
|
|
||||||
content = ''
|
|
||||||
tool_call = {'id': None, 'function': {'name': None, 'arguments': None}, 'type': 'function'}
|
|
||||||
if isinstance(
|
|
||||||
chunk, anthropic.types.raw_content_block_start_event.RawContentBlockStartEvent
|
|
||||||
): # 记录开始
|
|
||||||
if chunk.content_block.type == 'tool_use':
|
|
||||||
if chunk.content_block.name is not None:
|
|
||||||
tool_name = chunk.content_block.name
|
|
||||||
if chunk.content_block.id is not None:
|
|
||||||
tool_id = chunk.content_block.id
|
|
||||||
|
|
||||||
tool_call['function']['name'] = tool_name
|
|
||||||
tool_call['function']['arguments'] = ''
|
|
||||||
tool_call['id'] = tool_id
|
|
||||||
|
|
||||||
if not remove_think:
|
|
||||||
if chunk.content_block.type == 'thinking' and not remove_think:
|
|
||||||
think_started = True
|
|
||||||
elif chunk.content_block.type == 'text' and chunk.index != 0 and not remove_think:
|
|
||||||
think_ended = True
|
|
||||||
continue
|
|
||||||
elif isinstance(chunk, anthropic.types.raw_content_block_delta_event.RawContentBlockDeltaEvent):
|
|
||||||
if chunk.delta.type == 'thinking_delta':
|
|
||||||
if think_started:
|
|
||||||
think_started = False
|
|
||||||
content = '<think>\n' + chunk.delta.thinking
|
|
||||||
elif remove_think:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
content = chunk.delta.thinking
|
|
||||||
elif chunk.delta.type == 'text_delta':
|
|
||||||
if think_ended:
|
|
||||||
think_ended = False
|
|
||||||
content = '\n</think>\n' + chunk.delta.text
|
|
||||||
else:
|
|
||||||
content = chunk.delta.text
|
|
||||||
elif chunk.delta.type == 'input_json_delta':
|
|
||||||
tool_call['function']['arguments'] = chunk.delta.partial_json
|
|
||||||
tool_call['function']['name'] = tool_name
|
|
||||||
tool_call['id'] = tool_id
|
|
||||||
elif isinstance(chunk, anthropic.types.raw_content_block_stop_event.RawContentBlockStopEvent):
|
|
||||||
continue # 记录raw_content_block结束的
|
|
||||||
|
|
||||||
elif isinstance(chunk, anthropic.types.raw_message_delta_event.RawMessageDeltaEvent):
|
|
||||||
if chunk.delta.stop_reason == 'end_turn':
|
|
||||||
finish_reason = True
|
|
||||||
elif isinstance(chunk, anthropic.types.raw_message_stop_event.RawMessageStopEvent):
|
|
||||||
continue # 这个好像是完全结束
|
|
||||||
else:
|
|
||||||
# print(chunk)
|
|
||||||
self.ap.logger.debug(f'anthropic chunk: {chunk}')
|
|
||||||
continue
|
|
||||||
|
|
||||||
args = {
|
|
||||||
'content': content,
|
|
||||||
'role': role,
|
|
||||||
'is_final': finish_reason,
|
|
||||||
'tool_calls': None if tool_call['id'] is None else [tool_call],
|
|
||||||
}
|
|
||||||
# if chunk_idx == 0:
|
|
||||||
# chunk_idx += 1
|
|
||||||
# continue
|
|
||||||
|
|
||||||
# assert type(chunk) is anthropic.types.message.Chunk
|
|
||||||
|
|
||||||
yield provider_message.MessageChunk(**args)
|
|
||||||
|
|
||||||
# return llm_entities.Message(**args)
|
|
||||||
except anthropic.AuthenticationError as e:
|
|
||||||
raise errors.RequesterError(f'api-key 无效: {e.message}')
|
|
||||||
except anthropic.BadRequestError as e:
|
|
||||||
raise errors.RequesterError(str(e.message))
|
|
||||||
except anthropic.NotFoundError as e:
|
|
||||||
if 'model: ' in str(e):
|
|
||||||
raise errors.RequesterError(f'模型无效: {e.message}')
|
|
||||||
else:
|
|
||||||
raise errors.RequesterError(f'请求地址无效: {e.message}')
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: Anthropic
|
zh_Hans: Anthropic
|
||||||
icon: anthropic.svg
|
icon: anthropic.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: anthropic
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -24,6 +25,8 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
provider_category: manufacturer
|
provider_category: manufacturer
|
||||||
execution:
|
execution:
|
||||||
python:
|
python:
|
||||||
|
|||||||
5
src/langbot/pkg/provider/modelmgr/requesters/baidu.svg
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#2932E1"/>
|
||||||
|
<text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Baidu</text>
|
||||||
|
<text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">ERNIE</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 396 B |
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: baidu-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: Baidu ERNIE
|
||||||
|
zh_Hans: 百度文心一言
|
||||||
|
icon: baidu.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
@@ -1,242 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import dashscope
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import modelscopechatcmpl
|
|
||||||
from .. import requester
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
|
|
||||||
|
|
||||||
class BailianChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
|
|
||||||
"""阿里云百炼大模型平台 ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _closure_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages.copy()
|
|
||||||
|
|
||||||
is_use_dashscope_call = False # 是否使用阿里原生库调用
|
|
||||||
is_enable_multi_model = True # 是否支持多轮对话
|
|
||||||
use_time_num = 0 # 模型已调用次数,防止存在多文件时重复调用
|
|
||||||
use_time_ids = [] # 已调用的ID列表
|
|
||||||
message_id = 0 # 记录消息序号
|
|
||||||
|
|
||||||
for msg in messages:
|
|
||||||
# print(msg)
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'image_base64':
|
|
||||||
me['image_url'] = {'url': me['image_base64']}
|
|
||||||
me['type'] = 'image_url'
|
|
||||||
del me['image_base64']
|
|
||||||
elif me['type'] == 'file_url' and '.' in me.get('file_name', ''):
|
|
||||||
# 1. 视频文件推理
|
|
||||||
# https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2845871
|
|
||||||
file_type = me.get('file_name').lower().split('.')[-1]
|
|
||||||
if file_type in ['mp4', 'avi', 'mkv', 'mov', 'flv', 'wmv']:
|
|
||||||
me['type'] = 'video_url'
|
|
||||||
me['video_url'] = {'url': me['file_url']}
|
|
||||||
del me['file_url']
|
|
||||||
del me['file_name']
|
|
||||||
use_time_num += 1
|
|
||||||
use_time_ids.append(message_id)
|
|
||||||
is_enable_multi_model = False
|
|
||||||
# 2. 语音文件识别, 无法通过openai的audio字段传递,暂时不支持
|
|
||||||
# https://bailian.console.aliyun.com/?tab=doc#/doc/?type=model&url=2979031
|
|
||||||
elif file_type in [
|
|
||||||
'aac',
|
|
||||||
'amr',
|
|
||||||
'aiff',
|
|
||||||
'flac',
|
|
||||||
'm4a',
|
|
||||||
'mp3',
|
|
||||||
'mpeg',
|
|
||||||
'ogg',
|
|
||||||
'opus',
|
|
||||||
'wav',
|
|
||||||
'webm',
|
|
||||||
'wma',
|
|
||||||
]:
|
|
||||||
me['audio'] = me['file_url']
|
|
||||||
me['type'] = 'audio'
|
|
||||||
del me['file_url']
|
|
||||||
del me['type']
|
|
||||||
del me['file_name']
|
|
||||||
is_use_dashscope_call = True
|
|
||||||
use_time_num += 1
|
|
||||||
use_time_ids.append(message_id)
|
|
||||||
is_enable_multi_model = False
|
|
||||||
message_id += 1
|
|
||||||
|
|
||||||
# 使用列表推导式,保留不在 use_time_ids[:-1] 中的元素,仅保留最后一个多媒体消息
|
|
||||||
if not is_enable_multi_model and use_time_num > 1:
|
|
||||||
messages = [msg for idx, msg in enumerate(messages) if idx not in use_time_ids[:-1]]
|
|
||||||
|
|
||||||
if not is_enable_multi_model:
|
|
||||||
messages = [msg for msg in messages if 'resp_message_id' not in msg]
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
args['stream'] = True
|
|
||||||
|
|
||||||
# 流式处理状态
|
|
||||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
|
||||||
chunk_idx = 0
|
|
||||||
thinking_started = False
|
|
||||||
thinking_ended = False
|
|
||||||
role = 'assistant' # 默认角色
|
|
||||||
|
|
||||||
if is_use_dashscope_call:
|
|
||||||
response = dashscope.MultiModalConversation.call(
|
|
||||||
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key = "sk-xxx"
|
|
||||||
api_key=use_model.provider.token_mgr.get_token(),
|
|
||||||
model=use_model.model_entity.name,
|
|
||||||
messages=messages,
|
|
||||||
result_format='message',
|
|
||||||
asr_options={
|
|
||||||
# "language": "zh", # 可选,若已知音频的语种,可通过该参数指定待识别语种,以提升识别准确率
|
|
||||||
'enable_lid': True,
|
|
||||||
'enable_itn': False,
|
|
||||||
},
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
content_length_list = []
|
|
||||||
previous_length = 0 # 记录上一次的内容长度
|
|
||||||
for res in response:
|
|
||||||
chunk = res['output']
|
|
||||||
# 解析 chunk 数据
|
|
||||||
if hasattr(chunk, 'choices') and chunk.choices:
|
|
||||||
choice = chunk.choices[0]
|
|
||||||
delta_content = choice['message'].content[0]['text']
|
|
||||||
finish_reason = choice['finish_reason']
|
|
||||||
content_length_list.append(len(delta_content))
|
|
||||||
else:
|
|
||||||
delta_content = ''
|
|
||||||
finish_reason = None
|
|
||||||
|
|
||||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
|
||||||
if chunk_idx == 0 and not delta_content:
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 检查 content_length_list 是否有足够的数据
|
|
||||||
if len(content_length_list) >= 2:
|
|
||||||
now_content = delta_content[previous_length : content_length_list[-1]]
|
|
||||||
previous_length = content_length_list[-1] # 更新上一次的长度
|
|
||||||
else:
|
|
||||||
now_content = delta_content # 第一次循环时直接使用 delta_content
|
|
||||||
previous_length = len(delta_content) # 更新上一次的长度
|
|
||||||
|
|
||||||
# 构建 MessageChunk - 只包含增量内容
|
|
||||||
chunk_data = {
|
|
||||||
'role': role,
|
|
||||||
'content': now_content if now_content else None,
|
|
||||||
'is_final': bool(finish_reason) and finish_reason != 'null',
|
|
||||||
}
|
|
||||||
|
|
||||||
# 移除 None 值
|
|
||||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
|
||||||
yield provider_message.MessageChunk(**chunk_data)
|
|
||||||
chunk_idx += 1
|
|
||||||
else:
|
|
||||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
|
||||||
# 解析 chunk 数据
|
|
||||||
if hasattr(chunk, 'choices') and chunk.choices:
|
|
||||||
choice = chunk.choices[0]
|
|
||||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
|
||||||
finish_reason = getattr(choice, 'finish_reason', None)
|
|
||||||
else:
|
|
||||||
delta = {}
|
|
||||||
finish_reason = None
|
|
||||||
|
|
||||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
|
||||||
if 'role' in delta and delta['role']:
|
|
||||||
role = delta['role']
|
|
||||||
|
|
||||||
# 获取增量内容
|
|
||||||
delta_content = delta.get('content', '')
|
|
||||||
reasoning_content = delta.get('reasoning_content', '')
|
|
||||||
|
|
||||||
# 处理 reasoning_content
|
|
||||||
if reasoning_content:
|
|
||||||
# accumulated_reasoning += reasoning_content
|
|
||||||
# 如果设置了 remove_think,跳过 reasoning_content
|
|
||||||
if remove_think:
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
|
||||||
if not thinking_started:
|
|
||||||
thinking_started = True
|
|
||||||
delta_content = '<think>\n' + reasoning_content
|
|
||||||
else:
|
|
||||||
# 继续输出 reasoning_content
|
|
||||||
delta_content = reasoning_content
|
|
||||||
elif thinking_started and not thinking_ended and delta_content:
|
|
||||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
|
||||||
thinking_ended = True
|
|
||||||
delta_content = '\n</think>\n' + delta_content
|
|
||||||
|
|
||||||
# 处理工具调用增量
|
|
||||||
if delta.get('tool_calls'):
|
|
||||||
for tool_call in delta['tool_calls']:
|
|
||||||
if tool_call['id'] != '':
|
|
||||||
tool_id = tool_call['id']
|
|
||||||
if tool_call['function']['name'] is not None:
|
|
||||||
tool_name = tool_call['function']['name']
|
|
||||||
|
|
||||||
if tool_call['type'] is None:
|
|
||||||
tool_call['type'] = 'function'
|
|
||||||
tool_call['id'] = tool_id
|
|
||||||
tool_call['function']['name'] = tool_name
|
|
||||||
tool_call['function']['arguments'] = (
|
|
||||||
'' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
|
|
||||||
)
|
|
||||||
|
|
||||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
|
||||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 构建 MessageChunk - 只包含增量内容
|
|
||||||
chunk_data = {
|
|
||||||
'role': role,
|
|
||||||
'content': delta_content if delta_content else None,
|
|
||||||
'tool_calls': delta.get('tool_calls'),
|
|
||||||
'is_final': bool(finish_reason),
|
|
||||||
}
|
|
||||||
|
|
||||||
# 移除 None 值
|
|
||||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
|
||||||
|
|
||||||
yield provider_message.MessageChunk(**chunk_data)
|
|
||||||
chunk_idx += 1
|
|
||||||
# return
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 阿里云百炼
|
zh_Hans: 阿里云百炼
|
||||||
icon: bailian.png
|
icon: bailian.png
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -24,6 +25,7 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
- rerank
|
- rerank
|
||||||
provider_category: maas
|
provider_category: maas
|
||||||
execution:
|
execution:
|
||||||
|
|||||||
@@ -1,702 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import typing
|
|
||||||
|
|
||||||
import openai
|
|
||||||
import openai.types.chat.chat_completion as chat_completion_module
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from .. import errors, requester
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
|
|
||||||
|
|
||||||
class OpenAIChatCompletions(requester.ProviderAPIRequester):
|
|
||||||
"""OpenAI ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.openai.com/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def initialize(self):
|
|
||||||
self.client = openai.AsyncClient(
|
|
||||||
api_key=self.init_api_key,
|
|
||||||
base_url=self.requester_cfg['base_url'].replace(' ', ''),
|
|
||||||
timeout=self.requester_cfg['timeout'],
|
|
||||||
http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _mask_api_key(self, api_key: str | None) -> str:
|
|
||||||
if not api_key:
|
|
||||||
return ''
|
|
||||||
if len(api_key) <= 8:
|
|
||||||
return '****'
|
|
||||||
return f'{api_key[:4]}...{api_key[-4:]}'
|
|
||||||
|
|
||||||
def _infer_model_type(self, model_id: str) -> str:
|
|
||||||
normalized_model_id = (model_id or '').lower()
|
|
||||||
embedding_keywords = (
|
|
||||||
'embedding',
|
|
||||||
'embed',
|
|
||||||
'bge-',
|
|
||||||
'e5-',
|
|
||||||
'm3e',
|
|
||||||
'gte-',
|
|
||||||
'multilingual-e5',
|
|
||||||
'text-embedding',
|
|
||||||
)
|
|
||||||
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
|
|
||||||
|
|
||||||
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
|
|
||||||
normalized_model_id = (model_id or '').lower()
|
|
||||||
abilities: set[str] = set()
|
|
||||||
|
|
||||||
def _flatten(value: typing.Any) -> list[str]:
|
|
||||||
if value is None:
|
|
||||||
return []
|
|
||||||
if isinstance(value, str):
|
|
||||||
return [value.lower()]
|
|
||||||
if isinstance(value, dict):
|
|
||||||
flattened: list[str] = []
|
|
||||||
for nested_value in value.values():
|
|
||||||
flattened.extend(_flatten(nested_value))
|
|
||||||
return flattened
|
|
||||||
if isinstance(value, (list, tuple, set)):
|
|
||||||
flattened: list[str] = []
|
|
||||||
for nested_value in value:
|
|
||||||
flattened.extend(_flatten(nested_value))
|
|
||||||
return flattened
|
|
||||||
return [str(value).lower()]
|
|
||||||
|
|
||||||
capability_tokens = _flatten(item.get('capabilities'))
|
|
||||||
capability_tokens.extend(_flatten(item.get('modalities')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('input_modalities')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('output_modalities')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('supported_parameters')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('architecture')))
|
|
||||||
|
|
||||||
combined_tokens = capability_tokens + [normalized_model_id]
|
|
||||||
|
|
||||||
vision_keywords = (
|
|
||||||
'vision',
|
|
||||||
'image',
|
|
||||||
'file',
|
|
||||||
'video',
|
|
||||||
'multimodal',
|
|
||||||
'vl',
|
|
||||||
'ocr',
|
|
||||||
'omni',
|
|
||||||
)
|
|
||||||
function_call_keywords = (
|
|
||||||
'function',
|
|
||||||
'tool',
|
|
||||||
'tools',
|
|
||||||
'tool_choice',
|
|
||||||
'tool_call',
|
|
||||||
'tool-use',
|
|
||||||
'tool_use',
|
|
||||||
)
|
|
||||||
|
|
||||||
if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
|
|
||||||
abilities.add('vision')
|
|
||||||
|
|
||||||
if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
|
|
||||||
abilities.add('func_call')
|
|
||||||
|
|
||||||
return sorted(abilities)
|
|
||||||
|
|
||||||
def _normalize_modalities(self, value: typing.Any) -> list[str]:
|
|
||||||
normalized: list[str] = []
|
|
||||||
|
|
||||||
def _collect(item: typing.Any):
|
|
||||||
if item is None:
|
|
||||||
return
|
|
||||||
if isinstance(item, str):
|
|
||||||
for part in item.replace('->', ',').replace('+', ',').split(','):
|
|
||||||
token = part.strip().lower()
|
|
||||||
if token and token not in normalized:
|
|
||||||
normalized.append(token)
|
|
||||||
return
|
|
||||||
if isinstance(item, dict):
|
|
||||||
for nested in item.values():
|
|
||||||
_collect(nested)
|
|
||||||
return
|
|
||||||
if isinstance(item, (list, tuple, set)):
|
|
||||||
for nested in item:
|
|
||||||
_collect(nested)
|
|
||||||
return
|
|
||||||
|
|
||||||
_collect(value)
|
|
||||||
return normalized
|
|
||||||
|
|
||||||
def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
|
|
||||||
display_name = item.get('name')
|
|
||||||
if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
|
|
||||||
display_name = ''
|
|
||||||
|
|
||||||
description = item.get('description')
|
|
||||||
if not isinstance(description, str) or not description.strip():
|
|
||||||
description = ''
|
|
||||||
|
|
||||||
context_length = item.get('context_length')
|
|
||||||
if context_length is None and isinstance(item.get('top_provider'), dict):
|
|
||||||
context_length = item['top_provider'].get('context_length')
|
|
||||||
|
|
||||||
if not isinstance(context_length, int):
|
|
||||||
try:
|
|
||||||
context_length = int(context_length) if context_length is not None else None
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
context_length = None
|
|
||||||
|
|
||||||
input_modalities = self._normalize_modalities(item.get('input_modalities'))
|
|
||||||
output_modalities = self._normalize_modalities(item.get('output_modalities'))
|
|
||||||
|
|
||||||
if isinstance(item.get('architecture'), dict):
|
|
||||||
if not input_modalities:
|
|
||||||
input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
|
|
||||||
if not output_modalities:
|
|
||||||
output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
|
|
||||||
|
|
||||||
owned_by = item.get('owned_by')
|
|
||||||
if not isinstance(owned_by, str) or not owned_by.strip():
|
|
||||||
owned_by = ''
|
|
||||||
|
|
||||||
return {
|
|
||||||
'display_name': display_name or None,
|
|
||||||
'description': description or None,
|
|
||||||
'context_length': context_length,
|
|
||||||
'owned_by': owned_by or None,
|
|
||||||
'input_modalities': input_modalities,
|
|
||||||
'output_modalities': output_modalities,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
|
||||||
headers = {}
|
|
||||||
if api_key:
|
|
||||||
headers['Authorization'] = f'Bearer {api_key}'
|
|
||||||
|
|
||||||
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
|
|
||||||
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
|
|
||||||
response = await client.get(models_url, headers=headers)
|
|
||||||
response.raise_for_status()
|
|
||||||
payload = response.json()
|
|
||||||
|
|
||||||
models = []
|
|
||||||
for item in payload.get('data', []):
|
|
||||||
model_id = item.get('id')
|
|
||||||
if not model_id:
|
|
||||||
continue
|
|
||||||
models.append(
|
|
||||||
{
|
|
||||||
'id': model_id,
|
|
||||||
'name': model_id,
|
|
||||||
'type': self._infer_model_type(model_id),
|
|
||||||
'abilities': self._infer_model_abilities(item, model_id),
|
|
||||||
**self._extract_scan_metadata(item, model_id),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
|
|
||||||
return {
|
|
||||||
'models': models,
|
|
||||||
'debug': {
|
|
||||||
'request': {
|
|
||||||
'method': 'GET',
|
|
||||||
'url': models_url,
|
|
||||||
'headers': {
|
|
||||||
'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
'response': payload,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _req(
|
|
||||||
self,
|
|
||||||
args: dict,
|
|
||||||
extra_body: dict = {},
|
|
||||||
) -> chat_completion_module.ChatCompletion:
|
|
||||||
return await self.client.chat.completions.create(**args, extra_body=extra_body)
|
|
||||||
|
|
||||||
async def _req_stream(
|
|
||||||
self,
|
|
||||||
args: dict,
|
|
||||||
extra_body: dict = {},
|
|
||||||
):
|
|
||||||
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
async def _make_msg(
|
|
||||||
self,
|
|
||||||
chat_completion: chat_completion_module.ChatCompletion,
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message:
|
|
||||||
if not isinstance(chat_completion, chat_completion_module.ChatCompletion):
|
|
||||||
raise TypeError(f'Expected ChatCompletion, got {type(chat_completion).__name__}: {chat_completion[:16]}')
|
|
||||||
|
|
||||||
chatcmpl_message = chat_completion.choices[0].message.model_dump()
|
|
||||||
|
|
||||||
# 确保 role 字段存在且不为 None
|
|
||||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
|
||||||
chatcmpl_message['role'] = 'assistant'
|
|
||||||
|
|
||||||
# 处理思维链
|
|
||||||
content = chatcmpl_message.get('content', '')
|
|
||||||
reasoning_content = chatcmpl_message.get('reasoning_content', None)
|
|
||||||
|
|
||||||
processed_content, _ = await self._process_thinking_content(
|
|
||||||
content=content, reasoning_content=reasoning_content, remove_think=remove_think
|
|
||||||
)
|
|
||||||
|
|
||||||
chatcmpl_message['content'] = processed_content
|
|
||||||
|
|
||||||
# 移除 reasoning_content 字段,避免传递给 Message
|
|
||||||
if 'reasoning_content' in chatcmpl_message:
|
|
||||||
del chatcmpl_message['reasoning_content']
|
|
||||||
|
|
||||||
message = provider_message.Message(**chatcmpl_message)
|
|
||||||
|
|
||||||
return message
|
|
||||||
|
|
||||||
async def _process_thinking_content(
|
|
||||||
self,
|
|
||||||
content: str,
|
|
||||||
reasoning_content: str = None,
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> tuple[str, str]:
|
|
||||||
"""处理思维链内容
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: 原始内容
|
|
||||||
reasoning_content: reasoning_content 字段内容
|
|
||||||
remove_think: 是否移除思维链
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(处理后的内容, 提取的思维链内容)
|
|
||||||
"""
|
|
||||||
thinking_content = ''
|
|
||||||
|
|
||||||
# 1. 从 reasoning_content 提取思维链
|
|
||||||
if reasoning_content:
|
|
||||||
thinking_content = reasoning_content
|
|
||||||
|
|
||||||
# 2. 从 content 中提取 <think> 标签内容
|
|
||||||
if content and '<think>' in content and '</think>' in content:
|
|
||||||
import re
|
|
||||||
|
|
||||||
think_pattern = r'<think>(.*?)</think>'
|
|
||||||
think_matches = re.findall(think_pattern, content, re.DOTALL)
|
|
||||||
if think_matches:
|
|
||||||
# 如果已有 reasoning_content,则追加
|
|
||||||
if thinking_content:
|
|
||||||
thinking_content += '\n' + '\n'.join(think_matches)
|
|
||||||
else:
|
|
||||||
thinking_content = '\n'.join(think_matches)
|
|
||||||
# 移除 content 中的 <think> 标签
|
|
||||||
content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
|
|
||||||
|
|
||||||
# 3. 根据 remove_think 参数决定是否保留思维链
|
|
||||||
if remove_think:
|
|
||||||
return content, ''
|
|
||||||
else:
|
|
||||||
# 如果有思维链内容,将其以 <think> 格式添加到 content 开头
|
|
||||||
if thinking_content:
|
|
||||||
content = f'<think>\n{thinking_content}\n</think>\n{content}'.strip()
|
|
||||||
return content, thinking_content
|
|
||||||
|
|
||||||
async def _closure_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.MessageChunk:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages.copy()
|
|
||||||
|
|
||||||
# 检查vision
|
|
||||||
for msg in messages:
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'image_base64':
|
|
||||||
me['image_url'] = {'url': me['image_base64']}
|
|
||||||
me['type'] = 'image_url'
|
|
||||||
del me['image_base64']
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
args['stream'] = True
|
|
||||||
|
|
||||||
# 流式处理状态
|
|
||||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
|
||||||
chunk_idx = 0
|
|
||||||
thinking_started = False
|
|
||||||
thinking_ended = False
|
|
||||||
role = 'assistant' # 默认角色
|
|
||||||
tool_id = ''
|
|
||||||
tool_name = ''
|
|
||||||
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
|
|
||||||
|
|
||||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
|
||||||
# 解析 chunk 数据
|
|
||||||
|
|
||||||
if hasattr(chunk, 'choices') and chunk.choices:
|
|
||||||
choice = chunk.choices[0]
|
|
||||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
|
||||||
|
|
||||||
finish_reason = getattr(choice, 'finish_reason', None)
|
|
||||||
else:
|
|
||||||
delta = {}
|
|
||||||
finish_reason = None
|
|
||||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
|
||||||
if 'role' in delta and delta['role']:
|
|
||||||
role = delta['role']
|
|
||||||
|
|
||||||
# 获取增量内容
|
|
||||||
delta_content = delta.get('content', '')
|
|
||||||
reasoning_content = delta.get('reasoning_content', '')
|
|
||||||
|
|
||||||
# 处理 reasoning_content
|
|
||||||
if reasoning_content:
|
|
||||||
# accumulated_reasoning += reasoning_content
|
|
||||||
# 如果设置了 remove_think,跳过 reasoning_content
|
|
||||||
if remove_think:
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
|
||||||
if not thinking_started:
|
|
||||||
thinking_started = True
|
|
||||||
delta_content = '<think>\n' + reasoning_content
|
|
||||||
else:
|
|
||||||
# 继续输出 reasoning_content
|
|
||||||
delta_content = reasoning_content
|
|
||||||
elif thinking_started and not thinking_ended and delta_content:
|
|
||||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
|
||||||
thinking_ended = True
|
|
||||||
delta_content = '\n</think>\n' + delta_content
|
|
||||||
|
|
||||||
# 处理 content 中已有的 <think> 标签(如果需要移除)
|
|
||||||
# if delta_content and remove_think and '<think>' in delta_content:
|
|
||||||
# import re
|
|
||||||
#
|
|
||||||
# # 移除 <think> 标签及其内容
|
|
||||||
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
|
|
||||||
|
|
||||||
# 处理工具调用增量
|
|
||||||
# delta_tool_calls = None
|
|
||||||
if delta.get('tool_calls'):
|
|
||||||
for tool_call in delta['tool_calls']:
|
|
||||||
if tool_call['id'] and tool_call['function']['name']:
|
|
||||||
tool_id = tool_call['id']
|
|
||||||
tool_name = tool_call['function']['name']
|
|
||||||
else:
|
|
||||||
tool_call['id'] = tool_id
|
|
||||||
tool_call['function']['name'] = tool_name
|
|
||||||
if tool_call['type'] is None:
|
|
||||||
tool_call['type'] = 'function'
|
|
||||||
|
|
||||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
|
||||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
# 构建 MessageChunk - 只包含增量内容
|
|
||||||
chunk_data = {
|
|
||||||
'role': role,
|
|
||||||
'content': delta_content if delta_content else None,
|
|
||||||
'tool_calls': delta.get('tool_calls'),
|
|
||||||
'is_final': bool(finish_reason),
|
|
||||||
}
|
|
||||||
|
|
||||||
# 移除 None 值
|
|
||||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
|
||||||
|
|
||||||
yield provider_message.MessageChunk(**chunk_data)
|
|
||||||
chunk_idx += 1
|
|
||||||
|
|
||||||
async def _closure(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> tuple[provider_message.Message, dict]:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages.copy()
|
|
||||||
|
|
||||||
# 检查vision
|
|
||||||
for msg in messages:
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'image_base64':
|
|
||||||
me['image_url'] = {'url': me['image_base64']}
|
|
||||||
me['type'] = 'image_url'
|
|
||||||
del me['image_base64']
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
|
|
||||||
# 发送请求
|
|
||||||
|
|
||||||
resp = await self._req(args, extra_body=extra_args)
|
|
||||||
# 处理请求结果
|
|
||||||
message = await self._make_msg(resp, remove_think)
|
|
||||||
|
|
||||||
# Extract token usage from response
|
|
||||||
usage_info = {}
|
|
||||||
if hasattr(resp, 'usage') and resp.usage:
|
|
||||||
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
|
|
||||||
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
|
|
||||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
|
||||||
|
|
||||||
return message, usage_info
|
|
||||||
|
|
||||||
async def invoke_llm(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
model: requester.RuntimeLLMModel,
|
|
||||||
messages: typing.List[provider_message.Message],
|
|
||||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> tuple[provider_message.Message, dict]:
|
|
||||||
"""Invoke LLM and return message with usage info"""
|
|
||||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
|
||||||
for m in messages:
|
|
||||||
msg_dict = m.dict(exclude_none=True)
|
|
||||||
content = msg_dict.get('content')
|
|
||||||
if isinstance(content, list):
|
|
||||||
# 检查 content 列表中是否每个部分都是文本
|
|
||||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
|
||||||
# 将所有文本部分合并为一个字符串
|
|
||||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
|
||||||
req_messages.append(msg_dict)
|
|
||||||
|
|
||||||
try:
|
|
||||||
msg, usage_info = await self._closure(
|
|
||||||
query=query,
|
|
||||||
req_messages=req_messages,
|
|
||||||
use_model=model,
|
|
||||||
use_funcs=funcs,
|
|
||||||
extra_args=extra_args,
|
|
||||||
remove_think=remove_think,
|
|
||||||
)
|
|
||||||
return msg, usage_info
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
raise errors.RequesterError('请求超时')
|
|
||||||
except openai.BadRequestError as e:
|
|
||||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
|
||||||
if 'context_length_exceeded' in str(e):
|
|
||||||
raise errors.RequesterError(f'上文过长,请重置会话: {error_message}')
|
|
||||||
else:
|
|
||||||
raise errors.RequesterError(f'请求参数错误: {error_message}')
|
|
||||||
except openai.AuthenticationError as e:
|
|
||||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
|
||||||
raise errors.RequesterError(f'无效的 api-key: {error_message}')
|
|
||||||
except openai.NotFoundError as e:
|
|
||||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
|
||||||
raise errors.RequesterError(f'请求路径错误: {error_message}')
|
|
||||||
except openai.RateLimitError as e:
|
|
||||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
|
||||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
|
|
||||||
except openai.APIConnectionError as e:
|
|
||||||
error_message = f'连接错误: {str(e)}'
|
|
||||||
raise errors.RequesterError(error_message)
|
|
||||||
except openai.APIError as e:
|
|
||||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
|
||||||
raise errors.RequesterError(f'请求错误: {error_message}')
|
|
||||||
|
|
||||||
async def invoke_embedding(
|
|
||||||
self,
|
|
||||||
model: requester.RuntimeEmbeddingModel,
|
|
||||||
input_text: list[str],
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
) -> tuple[list[list[float]], dict]:
|
|
||||||
"""调用 Embedding API, returns (embeddings, usage_info)"""
|
|
||||||
self.client.api_key = model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {
|
|
||||||
'model': model.model_entity.name,
|
|
||||||
'input': input_text,
|
|
||||||
}
|
|
||||||
|
|
||||||
if model.model_entity.extra_args:
|
|
||||||
args.update(model.model_entity.extra_args)
|
|
||||||
|
|
||||||
args.update(extra_args)
|
|
||||||
|
|
||||||
try:
|
|
||||||
resp = await self.client.embeddings.create(**args)
|
|
||||||
|
|
||||||
# Extract usage info
|
|
||||||
usage_info = {}
|
|
||||||
if hasattr(resp, 'usage') and resp.usage:
|
|
||||||
usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
|
|
||||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
|
||||||
|
|
||||||
return [d.embedding for d in resp.data], usage_info
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
raise errors.RequesterError('请求超时')
|
|
||||||
except openai.BadRequestError as e:
|
|
||||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
|
||||||
|
|
||||||
async def invoke_llm_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
model: requester.RuntimeLLMModel,
|
|
||||||
messages: typing.List[provider_message.Message],
|
|
||||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.MessageChunk:
|
|
||||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
|
||||||
for m in messages:
|
|
||||||
msg_dict = m.dict(exclude_none=True)
|
|
||||||
content = msg_dict.get('content')
|
|
||||||
if isinstance(content, list):
|
|
||||||
# 检查 content 列表中是否每个部分都是文本
|
|
||||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
|
||||||
# 将所有文本部分合并为一个字符串
|
|
||||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
|
||||||
req_messages.append(msg_dict)
|
|
||||||
|
|
||||||
try:
|
|
||||||
async for item in self._closure_stream(
|
|
||||||
query=query,
|
|
||||||
req_messages=req_messages,
|
|
||||||
use_model=model,
|
|
||||||
use_funcs=funcs,
|
|
||||||
extra_args=extra_args,
|
|
||||||
remove_think=remove_think,
|
|
||||||
):
|
|
||||||
yield item
|
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
raise errors.RequesterError('请求超时')
|
|
||||||
except openai.BadRequestError as e:
|
|
||||||
if 'context_length_exceeded' in e.message:
|
|
||||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
|
||||||
else:
|
|
||||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
|
||||||
except openai.AuthenticationError as e:
|
|
||||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
|
||||||
except openai.NotFoundError as e:
|
|
||||||
raise errors.RequesterError(f'请求路径错误: {e.message}')
|
|
||||||
except openai.RateLimitError as e:
|
|
||||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
|
||||||
except openai.APIError as e:
|
|
||||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
|
||||||
|
|
||||||
async def invoke_rerank(
|
|
||||||
self,
|
|
||||||
model: requester.RuntimeRerankModel,
|
|
||||||
query: str,
|
|
||||||
documents: typing.List[str],
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
) -> typing.List[dict]:
|
|
||||||
"""Standard /rerank endpoint (Jina/Cohere/SiliconFlow/Voyage/DashScope compatible)
|
|
||||||
|
|
||||||
Supports extra_args from model.extra_args:
|
|
||||||
- rerank_url: full URL override (e.g. "https://dashscope.aliyuncs.com/compatible-api/v1/reranks")
|
|
||||||
- rerank_path: path override appended to base_url (e.g. "reranks" instead of default "rerank")
|
|
||||||
- Any other fields are merged into the request payload.
|
|
||||||
"""
|
|
||||||
api_key = model.provider.token_mgr.get_token()
|
|
||||||
base_url = self.requester_cfg.get('base_url', '').rstrip('/')
|
|
||||||
timeout = self.requester_cfg.get('timeout', 120)
|
|
||||||
|
|
||||||
merged_args = {}
|
|
||||||
if model.model_entity.extra_args:
|
|
||||||
merged_args.update(model.model_entity.extra_args)
|
|
||||||
if extra_args:
|
|
||||||
merged_args.update(extra_args)
|
|
||||||
|
|
||||||
rerank_url = merged_args.pop('rerank_url', None)
|
|
||||||
rerank_path = merged_args.pop('rerank_path', 'rerank')
|
|
||||||
if not rerank_url:
|
|
||||||
rerank_url = f'{base_url}/{rerank_path}'
|
|
||||||
|
|
||||||
headers = {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Authorization': f'Bearer {api_key}',
|
|
||||||
}
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
'model': model.model_entity.name,
|
|
||||||
'query': query,
|
|
||||||
'documents': documents[:64],
|
|
||||||
'top_n': min(len(documents), 64),
|
|
||||||
}
|
|
||||||
|
|
||||||
if merged_args:
|
|
||||||
payload.update(merged_args)
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
|
|
||||||
resp = await client.post(rerank_url, headers=headers, json=payload)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
|
|
||||||
results = self._parse_rerank_response(data)
|
|
||||||
|
|
||||||
if results:
|
|
||||||
scores = [r.get('relevance_score', 0.0) for r in results]
|
|
||||||
min_score = min(scores)
|
|
||||||
max_score = max(scores)
|
|
||||||
if max_score - min_score > 1e-6:
|
|
||||||
for r in results:
|
|
||||||
r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
|
|
||||||
|
|
||||||
return results
|
|
||||||
except httpx.HTTPStatusError as e:
|
|
||||||
raise errors.RequesterError(f'Rerank request failed: {e.response.status_code} - {e.response.text}')
|
|
||||||
except httpx.TimeoutException:
|
|
||||||
raise errors.RequesterError('Rerank request timed out')
|
|
||||||
except Exception as e:
|
|
||||||
raise errors.RequesterError(f'Rerank request error: {str(e)}')
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _parse_rerank_response(data: dict) -> typing.List[dict]:
|
|
||||||
"""Parse rerank response from various providers.
|
|
||||||
|
|
||||||
Handles:
|
|
||||||
- Jina/Cohere/SiliconFlow: {"results": [{"index", "relevance_score"}]}
|
|
||||||
- Voyage AI: {"data": [{"index", "relevance_score"}]}
|
|
||||||
- DashScope: {"output": {"results": [{"index", "relevance_score"}]}}
|
|
||||||
"""
|
|
||||||
if 'results' in data:
|
|
||||||
return data['results']
|
|
||||||
if 'data' in data:
|
|
||||||
return data['data']
|
|
||||||
if 'output' in data and isinstance(data['output'], dict):
|
|
||||||
return data['output'].get('results', [])
|
|
||||||
return []
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: OpenAI
|
zh_Hans: OpenAI
|
||||||
icon: openai.svg
|
icon: openai.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: Cohere
|
zh_Hans: Cohere
|
||||||
icon: cohere.svg
|
icon: cohere.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: cohere
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class CompShareChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""CompShare ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.modelverse.cn/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 优云智算
|
zh_Hans: 优云智算
|
||||||
icon: compshare.png
|
icon: compshare.png
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -24,6 +25,8 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
provider_category: maas
|
provider_category: maas
|
||||||
execution:
|
execution:
|
||||||
python:
|
python:
|
||||||
|
|||||||
@@ -1,67 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
from .. import errors, requester
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
|
|
||||||
|
|
||||||
class DeepseekChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""Deepseek ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.deepseek.com',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _closure(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> tuple[provider_message.Message, dict]:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages
|
|
||||||
|
|
||||||
# deepseek 不支持多模态,把content都转换成纯文字
|
|
||||||
for m in messages:
|
|
||||||
if 'content' in m and isinstance(m['content'], list):
|
|
||||||
m['content'] = ' '.join([c['text'] for c in m['content'] if 'text' in c])
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
|
|
||||||
# 发送请求
|
|
||||||
resp = await self._req(args, extra_body=extra_args)
|
|
||||||
|
|
||||||
# print(resp)
|
|
||||||
|
|
||||||
if resp is None:
|
|
||||||
raise errors.RequesterError('接口返回为空,请确定模型提供商服务是否正常')
|
|
||||||
# 处理请求结果
|
|
||||||
message = await self._make_msg(resp, remove_think)
|
|
||||||
|
|
||||||
# Extract token usage from response
|
|
||||||
usage_info = {}
|
|
||||||
if hasattr(resp, 'usage') and resp.usage:
|
|
||||||
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
|
|
||||||
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
|
|
||||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
|
||||||
|
|
||||||
return message, usage_info
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: DeepSeek
|
zh_Hans: DeepSeek
|
||||||
icon: deepseek.svg
|
icon: deepseek.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: deepseek
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -24,6 +25,8 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
provider_category: manufacturer
|
provider_category: manufacturer
|
||||||
execution:
|
execution:
|
||||||
python:
|
python:
|
||||||
|
|||||||
4
src/langbot/pkg/provider/modelmgr/requesters/doubao.svg
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#3B82F6"/>
|
||||||
|
<text x="30" y="32" font-family="Arial, sans-serif" font-size="12" font-weight="bold" fill="white" text-anchor="middle">豆包</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 282 B |
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: doubao-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: ByteDance Doubao
|
||||||
|
zh_Hans: 字节豆包
|
||||||
|
icon: doubao.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://ark.cn-beijing.volces.com/api/v3
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
@@ -1,205 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
from .. import requester
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
|
|
||||||
|
|
||||||
class GeminiChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""Google Gemini API 请求器"""
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://generativelanguage.googleapis.com/v1beta/openai',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
|
||||||
models_url = 'https://generativelanguage.googleapis.com/v1beta/models'
|
|
||||||
params = {'key': api_key} if api_key else {}
|
|
||||||
|
|
||||||
all_models: list[dict[str, typing.Any]] = []
|
|
||||||
next_page_token = ''
|
|
||||||
last_payload: dict[str, typing.Any] = {}
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
|
|
||||||
while True:
|
|
||||||
request_params = dict(params)
|
|
||||||
if next_page_token:
|
|
||||||
request_params['pageToken'] = next_page_token
|
|
||||||
|
|
||||||
response = await client.get(models_url, params=request_params)
|
|
||||||
response.raise_for_status()
|
|
||||||
payload = response.json()
|
|
||||||
last_payload = payload
|
|
||||||
|
|
||||||
for item in payload.get('models', []):
|
|
||||||
model_name = item.get('name', '')
|
|
||||||
model_id = model_name.replace('models/', '', 1)
|
|
||||||
if not model_id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
supported_methods = item.get('supportedGenerationMethods', []) or []
|
|
||||||
if 'embedContent' in supported_methods and 'generateContent' not in supported_methods:
|
|
||||||
model_type = 'embedding'
|
|
||||||
else:
|
|
||||||
model_type = 'llm'
|
|
||||||
|
|
||||||
all_models.append(
|
|
||||||
{
|
|
||||||
'id': model_id,
|
|
||||||
'name': model_id,
|
|
||||||
'type': model_type,
|
|
||||||
'abilities': self._infer_model_abilities(item, model_id),
|
|
||||||
'display_name': item.get('displayName') or None,
|
|
||||||
'description': item.get('description') or None,
|
|
||||||
'context_length': item.get('inputTokenLimit'),
|
|
||||||
'input_modalities': self._normalize_modalities(item.get('inputModalities')),
|
|
||||||
'output_modalities': self._normalize_modalities(item.get('outputModalities')),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
next_page_token = payload.get('nextPageToken', '')
|
|
||||||
if not next_page_token:
|
|
||||||
break
|
|
||||||
|
|
||||||
all_models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
|
|
||||||
return {
|
|
||||||
'models': all_models,
|
|
||||||
'debug': {
|
|
||||||
'request': {
|
|
||||||
'method': 'GET',
|
|
||||||
'url': models_url,
|
|
||||||
'query': {'key': self._mask_api_key(api_key)} if api_key else {},
|
|
||||||
},
|
|
||||||
'response': last_payload,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _closure_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.MessageChunk:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages.copy()
|
|
||||||
|
|
||||||
# 检查vision
|
|
||||||
for msg in messages:
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'image_base64':
|
|
||||||
me['image_url'] = {'url': me['image_base64']}
|
|
||||||
me['type'] = 'image_url'
|
|
||||||
del me['image_base64']
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
args['stream'] = True
|
|
||||||
|
|
||||||
# 流式处理状态
|
|
||||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
|
||||||
chunk_idx = 0
|
|
||||||
thinking_started = False
|
|
||||||
thinking_ended = False
|
|
||||||
role = 'assistant' # 默认角色
|
|
||||||
tool_id = ''
|
|
||||||
tool_name = ''
|
|
||||||
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
|
|
||||||
|
|
||||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
|
||||||
# 解析 chunk 数据
|
|
||||||
|
|
||||||
if hasattr(chunk, 'choices') and chunk.choices:
|
|
||||||
choice = chunk.choices[0]
|
|
||||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
|
||||||
|
|
||||||
finish_reason = getattr(choice, 'finish_reason', None)
|
|
||||||
else:
|
|
||||||
delta = {}
|
|
||||||
finish_reason = None
|
|
||||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
|
||||||
if 'role' in delta and delta['role']:
|
|
||||||
role = delta['role']
|
|
||||||
|
|
||||||
# 获取增量内容
|
|
||||||
delta_content = delta.get('content', '')
|
|
||||||
reasoning_content = delta.get('reasoning_content', '')
|
|
||||||
|
|
||||||
# 处理 reasoning_content
|
|
||||||
if reasoning_content:
|
|
||||||
# accumulated_reasoning += reasoning_content
|
|
||||||
# 如果设置了 remove_think,跳过 reasoning_content
|
|
||||||
if remove_think:
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
|
||||||
if not thinking_started:
|
|
||||||
thinking_started = True
|
|
||||||
delta_content = '<think>\n' + reasoning_content
|
|
||||||
else:
|
|
||||||
# 继续输出 reasoning_content
|
|
||||||
delta_content = reasoning_content
|
|
||||||
elif thinking_started and not thinking_ended and delta_content:
|
|
||||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
|
||||||
thinking_ended = True
|
|
||||||
delta_content = '\n</think>\n' + delta_content
|
|
||||||
|
|
||||||
# 处理 content 中已有的 <think> 标签(如果需要移除)
|
|
||||||
# if delta_content and remove_think and '<think>' in delta_content:
|
|
||||||
# import re
|
|
||||||
#
|
|
||||||
# # 移除 <think> 标签及其内容
|
|
||||||
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
|
|
||||||
|
|
||||||
# 处理工具调用增量
|
|
||||||
# delta_tool_calls = None
|
|
||||||
if delta.get('tool_calls'):
|
|
||||||
for tool_call in delta['tool_calls']:
|
|
||||||
if tool_call['id'] == '' and tool_id == '':
|
|
||||||
tool_id = str(uuid.uuid4())
|
|
||||||
if tool_call['function']['name']:
|
|
||||||
tool_name = tool_call['function']['name']
|
|
||||||
tool_call['id'] = tool_id
|
|
||||||
tool_call['function']['name'] = tool_name
|
|
||||||
if tool_call['type'] is None:
|
|
||||||
tool_call['type'] = 'function'
|
|
||||||
|
|
||||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
|
||||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
# 构建 MessageChunk - 只包含增量内容
|
|
||||||
chunk_data = {
|
|
||||||
'role': role,
|
|
||||||
'content': delta_content if delta_content else None,
|
|
||||||
'tool_calls': delta.get('tool_calls'),
|
|
||||||
'is_final': bool(finish_reason),
|
|
||||||
}
|
|
||||||
|
|
||||||
# 移除 None 值
|
|
||||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
|
||||||
|
|
||||||
yield provider_message.MessageChunk(**chunk_data)
|
|
||||||
chunk_idx += 1
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: Google Gemini
|
zh_Hans: Google Gemini
|
||||||
icon: gemini.svg
|
icon: gemini.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: gemini
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -24,6 +25,8 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
provider_category: manufacturer
|
provider_category: manufacturer
|
||||||
execution:
|
execution:
|
||||||
python:
|
python:
|
||||||
|
|||||||
@@ -1,15 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
import typing
|
|
||||||
|
|
||||||
from . import ppiochatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class GiteeAIChatCompletions(ppiochatcmpl.PPIOChatCompletions):
|
|
||||||
"""Gitee AI ChatCompletions API 请求器"""
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://ai.gitee.com/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: Gitee AI
|
zh_Hans: Gitee AI
|
||||||
icon: giteeai.svg
|
icon: giteeai.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
4
src/langbot/pkg/provider/modelmgr/requesters/groq.svg
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#F97316"/>
|
||||||
|
<text x="30" y="32" font-family="Arial, sans-serif" font-size="14" font-weight="bold" fill="white" text-anchor="middle">Groq</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 280 B |
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: groq-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: Groq
|
||||||
|
zh_Hans: Groq
|
||||||
|
icon: groq.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: groq
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://api.groq.com/openai/v1
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
5
src/langbot/pkg/provider/modelmgr/requesters/iflytek.svg
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#0066FF"/>
|
||||||
|
<text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">iFlytek</text>
|
||||||
|
<text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Spark</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 398 B |
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: iflytek-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: iFlytek Spark
|
||||||
|
zh_Hans: 讯飞星火
|
||||||
|
icon: iflytek.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://spark-api-open.xf-yun.com/v1
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
@@ -1,208 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import openai
|
|
||||||
import typing
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
from .. import requester
|
|
||||||
import openai.types.chat.chat_completion as chat_completion
|
|
||||||
import re
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
|
|
||||||
|
|
||||||
class JieKouAIChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""接口 AI ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.jiekou.ai/openai',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
is_think: bool = False
|
|
||||||
|
|
||||||
async def _make_msg(
|
|
||||||
self,
|
|
||||||
chat_completion: chat_completion.ChatCompletion,
|
|
||||||
remove_think: bool,
|
|
||||||
) -> provider_message.Message:
|
|
||||||
chatcmpl_message = chat_completion.choices[0].message.model_dump()
|
|
||||||
# print(chatcmpl_message.keys(), chatcmpl_message.values())
|
|
||||||
|
|
||||||
# 确保 role 字段存在且不为 None
|
|
||||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
|
||||||
chatcmpl_message['role'] = 'assistant'
|
|
||||||
|
|
||||||
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
|
|
||||||
|
|
||||||
# deepseek的reasoner模型
|
|
||||||
chatcmpl_message['content'] = await self._process_thinking_content(
|
|
||||||
chatcmpl_message['content'], reasoning_content, remove_think
|
|
||||||
)
|
|
||||||
|
|
||||||
# 移除 reasoning_content 字段,避免传递给 Message
|
|
||||||
if 'reasoning_content' in chatcmpl_message:
|
|
||||||
del chatcmpl_message['reasoning_content']
|
|
||||||
|
|
||||||
message = provider_message.Message(**chatcmpl_message)
|
|
||||||
|
|
||||||
return message
|
|
||||||
|
|
||||||
async def _process_thinking_content(
|
|
||||||
self,
|
|
||||||
content: str,
|
|
||||||
reasoning_content: str = None,
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> tuple[str, str]:
|
|
||||||
"""处理思维链内容
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: 原始内容
|
|
||||||
reasoning_content: reasoning_content 字段内容
|
|
||||||
remove_think: 是否移除思维链
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
处理后的内容
|
|
||||||
"""
|
|
||||||
if remove_think:
|
|
||||||
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
|
||||||
else:
|
|
||||||
if reasoning_content is not None:
|
|
||||||
content = '<think>\n' + reasoning_content + '\n</think>\n' + content
|
|
||||||
return content
|
|
||||||
|
|
||||||
async def _make_msg_chunk(
|
|
||||||
self,
|
|
||||||
delta: dict[str, typing.Any],
|
|
||||||
idx: int,
|
|
||||||
) -> provider_message.MessageChunk:
|
|
||||||
# 处理流式chunk和完整响应的差异
|
|
||||||
# print(chat_completion.choices[0])
|
|
||||||
|
|
||||||
# 确保 role 字段存在且不为 None
|
|
||||||
if 'role' not in delta or delta['role'] is None:
|
|
||||||
delta['role'] = 'assistant'
|
|
||||||
|
|
||||||
reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
|
|
||||||
|
|
||||||
delta['content'] = '' if delta['content'] is None else delta['content']
|
|
||||||
# print(reasoning_content)
|
|
||||||
|
|
||||||
# deepseek的reasoner模型
|
|
||||||
|
|
||||||
if reasoning_content is not None:
|
|
||||||
delta['content'] += reasoning_content
|
|
||||||
|
|
||||||
message = provider_message.MessageChunk(**delta)
|
|
||||||
|
|
||||||
return message
|
|
||||||
|
|
||||||
async def _closure_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages.copy()
|
|
||||||
|
|
||||||
# 检查vision
|
|
||||||
for msg in messages:
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'image_base64':
|
|
||||||
me['image_url'] = {'url': me['image_base64']}
|
|
||||||
me['type'] = 'image_url'
|
|
||||||
del me['image_base64']
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
args['stream'] = True
|
|
||||||
|
|
||||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
|
||||||
chunk_idx = 0
|
|
||||||
thinking_started = False
|
|
||||||
thinking_ended = False
|
|
||||||
role = 'assistant' # 默认角色
|
|
||||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
|
||||||
# 解析 chunk 数据
|
|
||||||
if hasattr(chunk, 'choices') and chunk.choices:
|
|
||||||
choice = chunk.choices[0]
|
|
||||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
|
||||||
finish_reason = getattr(choice, 'finish_reason', None)
|
|
||||||
else:
|
|
||||||
delta = {}
|
|
||||||
finish_reason = None
|
|
||||||
|
|
||||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
|
||||||
if 'role' in delta and delta['role']:
|
|
||||||
role = delta['role']
|
|
||||||
|
|
||||||
# 获取增量内容
|
|
||||||
delta_content = delta.get('content', '')
|
|
||||||
# reasoning_content = delta.get('reasoning_content', '')
|
|
||||||
|
|
||||||
if remove_think:
|
|
||||||
if delta['content'] is not None:
|
|
||||||
if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
|
|
||||||
thinking_started = True
|
|
||||||
continue
|
|
||||||
elif delta['content'] == r'</think>' and not thinking_ended:
|
|
||||||
thinking_ended = True
|
|
||||||
continue
|
|
||||||
elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
|
|
||||||
thinking_started = False
|
|
||||||
continue
|
|
||||||
elif thinking_started and not thinking_ended:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# delta_tool_calls = None
|
|
||||||
if delta.get('tool_calls'):
|
|
||||||
for tool_call in delta['tool_calls']:
|
|
||||||
if tool_call['id'] and tool_call['function']['name']:
|
|
||||||
tool_id = tool_call['id']
|
|
||||||
tool_name = tool_call['function']['name']
|
|
||||||
|
|
||||||
if tool_call['id'] is None:
|
|
||||||
tool_call['id'] = tool_id
|
|
||||||
if tool_call['function']['name'] is None:
|
|
||||||
tool_call['function']['name'] = tool_name
|
|
||||||
if tool_call['function']['arguments'] is None:
|
|
||||||
tool_call['function']['arguments'] = ''
|
|
||||||
if tool_call['type'] is None:
|
|
||||||
tool_call['type'] = 'function'
|
|
||||||
|
|
||||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
|
||||||
if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 构建 MessageChunk - 只包含增量内容
|
|
||||||
chunk_data = {
|
|
||||||
'role': role,
|
|
||||||
'content': delta_content if delta_content else None,
|
|
||||||
'tool_calls': delta.get('tool_calls'),
|
|
||||||
'is_final': bool(finish_reason),
|
|
||||||
}
|
|
||||||
|
|
||||||
# 移除 None 值
|
|
||||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
|
||||||
|
|
||||||
yield provider_message.MessageChunk(**chunk_data)
|
|
||||||
chunk_idx += 1
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 接口 AI
|
zh_Hans: 接口 AI
|
||||||
icon: jiekouai.png
|
icon: jiekouai.png
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: Jina
|
zh_Hans: Jina
|
||||||
icon: jina.svg
|
icon: jina.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
571
src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
Normal file
@@ -0,0 +1,571 @@
|
|||||||
|
"""LiteLLM unified requester for chat, embedding, and rerank."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import typing
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import acompletion, aembedding, arerank
|
||||||
|
|
||||||
|
from .. import errors, requester
|
||||||
|
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||||
|
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLMRequester(requester.ProviderAPIRequester):
|
||||||
|
"""LiteLLM unified API requester supporting chat, embedding, and rerank."""
|
||||||
|
|
||||||
|
_EMBEDDING_MODEL_HINTS = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding')
|
||||||
|
_RERANK_MODEL_HINTS = ('rerank', 're-rank', 're_rank')
|
||||||
|
|
||||||
|
default_config: dict[str, typing.Any] = {
|
||||||
|
'base_url': '',
|
||||||
|
'timeout': 120,
|
||||||
|
'custom_llm_provider': '',
|
||||||
|
'drop_params': False,
|
||||||
|
'num_retries': 0,
|
||||||
|
'api_version': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
async def initialize(self):
|
||||||
|
"""Initialize LiteLLM client settings."""
|
||||||
|
# LiteLLM doesn't require explicit client initialization
|
||||||
|
# Configuration is passed per-request via litellm params
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _build_litellm_model_name(self, model_name: str, custom_llm_provider: str | None = None) -> str:
|
||||||
|
"""Build LiteLLM model name with provider prefix if needed."""
|
||||||
|
provider = custom_llm_provider or self.requester_cfg.get('custom_llm_provider', '')
|
||||||
|
if provider:
|
||||||
|
# LiteLLM format: provider/model_name
|
||||||
|
if model_name.startswith(f'{provider}/'):
|
||||||
|
return model_name
|
||||||
|
return f'{provider}/{model_name}'
|
||||||
|
# If no custom provider, assume model_name already includes prefix or is OpenAI-compatible
|
||||||
|
return model_name
|
||||||
|
|
||||||
|
def _get_custom_llm_provider(self) -> str | None:
|
||||||
|
return self.requester_cfg.get('custom_llm_provider') or None
|
||||||
|
|
||||||
|
def _safe_litellm_bool_helper(self, helper_name: str, model_name: str) -> bool:
|
||||||
|
"""Call a LiteLLM boolean capability helper without letting metadata gaps fail requests."""
|
||||||
|
helper = getattr(litellm, helper_name, None)
|
||||||
|
if not callable(helper):
|
||||||
|
return False
|
||||||
|
|
||||||
|
provider = self._get_custom_llm_provider()
|
||||||
|
candidates: list[tuple[str, str | None]] = [(model_name, provider)]
|
||||||
|
litellm_model_name = self._build_litellm_model_name(model_name)
|
||||||
|
if litellm_model_name != model_name:
|
||||||
|
candidates.append((litellm_model_name, None))
|
||||||
|
|
||||||
|
for candidate_model, candidate_provider in candidates:
|
||||||
|
try:
|
||||||
|
if bool(helper(model=candidate_model, custom_llm_provider=candidate_provider)):
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _safe_context_length(self, model_name: str) -> int | None:
|
||||||
|
helper = getattr(litellm, 'get_max_tokens', None)
|
||||||
|
if not callable(helper):
|
||||||
|
return None
|
||||||
|
|
||||||
|
candidates = [model_name]
|
||||||
|
litellm_model_name = self._build_litellm_model_name(model_name)
|
||||||
|
if litellm_model_name != model_name:
|
||||||
|
candidates.append(litellm_model_name)
|
||||||
|
|
||||||
|
for candidate in candidates:
|
||||||
|
try:
|
||||||
|
max_tokens = helper(candidate)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if isinstance(max_tokens, int) and max_tokens > 0:
|
||||||
|
return max_tokens
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _supports_function_calling(self, model_name: str) -> bool:
|
||||||
|
return self._safe_litellm_bool_helper('supports_function_calling', model_name)
|
||||||
|
|
||||||
|
def _supports_vision(self, model_name: str) -> bool:
|
||||||
|
return self._safe_litellm_bool_helper('supports_vision', model_name)
|
||||||
|
|
||||||
|
def _infer_model_type(self, model_id: str) -> str:
|
||||||
|
normalized_id = (model_id or '').lower()
|
||||||
|
if any(kw in normalized_id for kw in self._RERANK_MODEL_HINTS):
|
||||||
|
return 'rerank'
|
||||||
|
if any(kw in normalized_id for kw in self._EMBEDDING_MODEL_HINTS):
|
||||||
|
return 'embedding'
|
||||||
|
return 'llm'
|
||||||
|
|
||||||
|
def _enrich_scanned_model(self, model_id: str) -> dict[str, typing.Any]:
|
||||||
|
model_type = self._infer_model_type(model_id)
|
||||||
|
scanned_model: dict[str, typing.Any] = {
|
||||||
|
'id': model_id,
|
||||||
|
'name': model_id,
|
||||||
|
'type': model_type,
|
||||||
|
}
|
||||||
|
|
||||||
|
if model_type == 'llm':
|
||||||
|
abilities = []
|
||||||
|
if self._supports_function_calling(model_id):
|
||||||
|
abilities.append('func_call')
|
||||||
|
if self._supports_vision(model_id):
|
||||||
|
abilities.append('vision')
|
||||||
|
scanned_model['abilities'] = abilities
|
||||||
|
|
||||||
|
context_length = self._safe_context_length(model_id)
|
||||||
|
if context_length is not None:
|
||||||
|
scanned_model['context_length'] = context_length
|
||||||
|
|
||||||
|
return scanned_model
|
||||||
|
|
||||||
|
def _convert_messages(self, messages: typing.List[provider_message.Message]) -> list[dict]:
|
||||||
|
"""Convert LangBot messages to LiteLLM/OpenAI format."""
|
||||||
|
req_messages = []
|
||||||
|
for m in messages:
|
||||||
|
msg_dict = m.dict(exclude_none=True)
|
||||||
|
content = msg_dict.get('content')
|
||||||
|
|
||||||
|
if isinstance(content, list):
|
||||||
|
for part in content:
|
||||||
|
if isinstance(part, dict) and part.get('type') == 'image_base64':
|
||||||
|
part['image_url'] = {'url': part['image_base64']}
|
||||||
|
part['type'] = 'image_url'
|
||||||
|
del part['image_base64']
|
||||||
|
|
||||||
|
req_messages.append(msg_dict)
|
||||||
|
|
||||||
|
return req_messages
|
||||||
|
|
||||||
|
def _process_thinking_content(self, content: str, reasoning_content: str | None, remove_think: bool) -> str:
|
||||||
|
"""Process thinking/reasoning content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: The main content from response
|
||||||
|
reasoning_content: Separate reasoning content from model
|
||||||
|
remove_think: If True, remove thinking markers; if False, preserve them
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Processed content string
|
||||||
|
"""
|
||||||
|
# Extract and handle thinking tags
|
||||||
|
if content and 'CRETIRE_REASONING_BEGINk' in content and 'CRETIRE_REASONING_ENDk' in content:
|
||||||
|
import re
|
||||||
|
|
||||||
|
think_pattern = r'CRETIRE_REASONING_BEGINk(.*?)CRETIRE_REASONING_ENDk'
|
||||||
|
|
||||||
|
if remove_think:
|
||||||
|
# Remove thinking tags and their content from output
|
||||||
|
content = re.sub(think_pattern, '', content, flags=re.DOTALL).strip()
|
||||||
|
# else: preserve thinking content as-is
|
||||||
|
|
||||||
|
# Handle separate reasoning_content field
|
||||||
|
# Currently we don't include reasoning_content in user-facing output regardless of remove_think
|
||||||
|
# because it's typically internal model reasoning, not user-visible thinking
|
||||||
|
return content or ''
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _normalize_usage(usage: typing.Any) -> dict:
|
||||||
|
"""Normalize a LiteLLM/OpenAI usage object into a plain token dict.
|
||||||
|
|
||||||
|
Handles several real-world shapes returned by different upstreams:
|
||||||
|
- object with ``prompt_tokens`` / ``completion_tokens`` / ``total_tokens`` attrs
|
||||||
|
- dict with the same keys
|
||||||
|
- missing ``total_tokens`` (derived from prompt + completion)
|
||||||
|
- ``None`` / partially-populated usage (defaults to 0)
|
||||||
|
"""
|
||||||
|
if usage is None:
|
||||||
|
return {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}
|
||||||
|
|
||||||
|
def _get(key: str) -> typing.Any:
|
||||||
|
if isinstance(usage, dict):
|
||||||
|
return usage.get(key)
|
||||||
|
return getattr(usage, key, None)
|
||||||
|
|
||||||
|
prompt_tokens = _get('prompt_tokens') or 0
|
||||||
|
completion_tokens = _get('completion_tokens') or 0
|
||||||
|
total_tokens = _get('total_tokens') or 0
|
||||||
|
|
||||||
|
# Some providers omit total_tokens in streaming usage; derive it.
|
||||||
|
if not total_tokens:
|
||||||
|
total_tokens = prompt_tokens + completion_tokens
|
||||||
|
|
||||||
|
return {
|
||||||
|
'prompt_tokens': int(prompt_tokens),
|
||||||
|
'completion_tokens': int(completion_tokens),
|
||||||
|
'total_tokens': int(total_tokens),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_usage(self, response) -> dict:
|
||||||
|
"""Extract usage info from a non-streaming LiteLLM response."""
|
||||||
|
return self._normalize_usage(getattr(response, 'usage', None))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _as_dict(value: typing.Any) -> dict:
|
||||||
|
if value is None:
|
||||||
|
return {}
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return value
|
||||||
|
if hasattr(value, 'model_dump'):
|
||||||
|
return value.model_dump()
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _normalize_stream_tool_calls(
|
||||||
|
self,
|
||||||
|
raw_tool_calls: typing.Any,
|
||||||
|
tool_call_state: dict[int, dict[str, str]],
|
||||||
|
) -> list[dict] | None:
|
||||||
|
"""Fill OpenAI-style streaming tool-call deltas so MessageChunk can validate them."""
|
||||||
|
if not raw_tool_calls:
|
||||||
|
return None
|
||||||
|
|
||||||
|
normalized = []
|
||||||
|
for fallback_index, raw_tool_call in enumerate(raw_tool_calls):
|
||||||
|
tool_call = self._as_dict(raw_tool_call)
|
||||||
|
index = tool_call.get('index')
|
||||||
|
if not isinstance(index, int):
|
||||||
|
index = fallback_index
|
||||||
|
|
||||||
|
state = tool_call_state.setdefault(index, {'id': '', 'type': 'function', 'name': ''})
|
||||||
|
if tool_call.get('id'):
|
||||||
|
state['id'] = tool_call['id']
|
||||||
|
if tool_call.get('type'):
|
||||||
|
state['type'] = tool_call['type']
|
||||||
|
|
||||||
|
function = self._as_dict(tool_call.get('function'))
|
||||||
|
if function.get('name'):
|
||||||
|
state['name'] = function['name']
|
||||||
|
|
||||||
|
arguments = function.get('arguments')
|
||||||
|
if arguments is None:
|
||||||
|
arguments = ''
|
||||||
|
elif not isinstance(arguments, str):
|
||||||
|
arguments = str(arguments)
|
||||||
|
|
||||||
|
if not state['id'] or not state['name']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
normalized.append(
|
||||||
|
{
|
||||||
|
'id': state['id'],
|
||||||
|
'type': state['type'] or 'function',
|
||||||
|
'function': {
|
||||||
|
'name': state['name'],
|
||||||
|
'arguments': arguments,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return normalized or None
|
||||||
|
|
||||||
|
def _build_common_args(self, args: dict, include_retry_params: bool = True) -> dict:
|
||||||
|
"""Apply common requester config to args dict."""
|
||||||
|
if self.requester_cfg.get('base_url'):
|
||||||
|
args['api_base'] = self.requester_cfg['base_url']
|
||||||
|
if self.requester_cfg.get('timeout'):
|
||||||
|
args['timeout'] = self.requester_cfg['timeout']
|
||||||
|
if include_retry_params:
|
||||||
|
if self.requester_cfg.get('drop_params'):
|
||||||
|
args['drop_params'] = self.requester_cfg['drop_params']
|
||||||
|
if self.requester_cfg.get('num_retries'):
|
||||||
|
args['num_retries'] = self.requester_cfg['num_retries']
|
||||||
|
if self.requester_cfg.get('api_version'):
|
||||||
|
args['api_version'] = self.requester_cfg['api_version']
|
||||||
|
return args
|
||||||
|
|
||||||
|
def _handle_litellm_error(self, e: Exception) -> None:
|
||||||
|
"""Convert LiteLLM exceptions to RequesterError. Never returns, always raises."""
|
||||||
|
# Check more specific exceptions first (they inherit from base exceptions)
|
||||||
|
if isinstance(e, litellm.ContextWindowExceededError):
|
||||||
|
raise errors.RequesterError(f'上下文长度超限: {str(e)}')
|
||||||
|
if isinstance(e, litellm.BadRequestError):
|
||||||
|
raise errors.RequesterError(f'请求参数错误: {str(e)}')
|
||||||
|
if isinstance(e, litellm.AuthenticationError):
|
||||||
|
raise errors.RequesterError(f'API key 无效: {str(e)}')
|
||||||
|
if isinstance(e, litellm.NotFoundError):
|
||||||
|
raise errors.RequesterError(f'模型或路径无效: {str(e)}')
|
||||||
|
if isinstance(e, litellm.RateLimitError):
|
||||||
|
raise errors.RequesterError(f'请求过于频繁或余额不足: {str(e)}')
|
||||||
|
if isinstance(e, litellm.Timeout):
|
||||||
|
raise errors.RequesterError(f'请求超时: {str(e)}')
|
||||||
|
if isinstance(e, litellm.APIConnectionError):
|
||||||
|
raise errors.RequesterError(f'连接错误: {str(e)}')
|
||||||
|
if isinstance(e, litellm.APIError):
|
||||||
|
raise errors.RequesterError(f'API 错误: {str(e)}')
|
||||||
|
raise errors.RequesterError(f'未知错误: {str(e)}')
|
||||||
|
|
||||||
|
async def _build_completion_args(
|
||||||
|
self,
|
||||||
|
model: requester.RuntimeLLMModel,
|
||||||
|
messages: typing.List[provider_message.Message],
|
||||||
|
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||||
|
extra_args: dict[str, typing.Any] = {},
|
||||||
|
stream: bool = False,
|
||||||
|
) -> dict:
|
||||||
|
"""Build common completion arguments for invoke_llm and invoke_llm_stream."""
|
||||||
|
req_messages = self._convert_messages(messages)
|
||||||
|
model_name = self._build_litellm_model_name(model.model_entity.name)
|
||||||
|
api_key = model.provider.token_mgr.get_token()
|
||||||
|
|
||||||
|
args = {
|
||||||
|
'model': model_name,
|
||||||
|
'messages': req_messages,
|
||||||
|
'api_key': api_key,
|
||||||
|
}
|
||||||
|
if stream:
|
||||||
|
args['stream'] = True
|
||||||
|
args['stream_options'] = {'include_usage': True}
|
||||||
|
self._build_common_args(args)
|
||||||
|
|
||||||
|
# Apply model-level extra_args first, then call-level extra_args
|
||||||
|
if model.model_entity.extra_args:
|
||||||
|
args.update(model.model_entity.extra_args)
|
||||||
|
args.update(extra_args)
|
||||||
|
|
||||||
|
if funcs:
|
||||||
|
tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs)
|
||||||
|
if tools:
|
||||||
|
args['tools'] = tools
|
||||||
|
args.setdefault('tool_choice', 'auto')
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
async def invoke_llm(
|
||||||
|
self,
|
||||||
|
query: pipeline_query.Query,
|
||||||
|
model: requester.RuntimeLLMModel,
|
||||||
|
messages: typing.List[provider_message.Message],
|
||||||
|
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||||
|
extra_args: dict[str, typing.Any] = {},
|
||||||
|
remove_think: bool = False,
|
||||||
|
) -> tuple[provider_message.Message, dict]:
|
||||||
|
"""Invoke LLM and return message with usage info."""
|
||||||
|
args = await self._build_completion_args(model, messages, funcs, extra_args, stream=False)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await acompletion(**args)
|
||||||
|
|
||||||
|
message_data = response.choices[0].message.model_dump()
|
||||||
|
if 'role' not in message_data or message_data['role'] is None:
|
||||||
|
message_data['role'] = 'assistant'
|
||||||
|
|
||||||
|
content = message_data.get('content', '')
|
||||||
|
reasoning_content = message_data.get('reasoning_content', None)
|
||||||
|
message_data['content'] = self._process_thinking_content(content, reasoning_content, remove_think)
|
||||||
|
|
||||||
|
if 'reasoning_content' in message_data:
|
||||||
|
del message_data['reasoning_content']
|
||||||
|
|
||||||
|
message = provider_message.Message(**message_data)
|
||||||
|
usage_info = self._extract_usage(response)
|
||||||
|
|
||||||
|
return message, usage_info
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._handle_litellm_error(e)
|
||||||
|
|
||||||
|
async def invoke_llm_stream(
|
||||||
|
self,
|
||||||
|
query: pipeline_query.Query,
|
||||||
|
model: requester.RuntimeLLMModel,
|
||||||
|
messages: typing.List[provider_message.Message],
|
||||||
|
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||||
|
extra_args: dict[str, typing.Any] = {},
|
||||||
|
remove_think: bool = False,
|
||||||
|
) -> provider_message.MessageChunk:
|
||||||
|
"""Invoke LLM streaming and yield chunks."""
|
||||||
|
args = await self._build_completion_args(model, messages, funcs, extra_args, stream=True)
|
||||||
|
|
||||||
|
chunk_idx = 0
|
||||||
|
role = 'assistant'
|
||||||
|
tool_call_state: dict[int, dict[str, str]] = {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await acompletion(**args)
|
||||||
|
async for chunk in response:
|
||||||
|
# Capture usage whenever a chunk carries it.
|
||||||
|
#
|
||||||
|
# Important: many OpenAI-compatible gateways (e.g. new-api) and
|
||||||
|
# providers send the final usage payload in a chunk that STILL
|
||||||
|
# contains a (empty-delta) choice, not an empty `choices` list.
|
||||||
|
# The previous implementation only captured usage when `choices`
|
||||||
|
# was empty, so streamed calls always recorded 0 tokens.
|
||||||
|
# We therefore capture usage independently of `choices`, and then
|
||||||
|
# fall through to also process any content this chunk may carry.
|
||||||
|
if getattr(chunk, 'usage', None):
|
||||||
|
usage_info = self._normalize_usage(chunk.usage)
|
||||||
|
if query is not None:
|
||||||
|
if query.variables is None:
|
||||||
|
query.variables = {}
|
||||||
|
query.variables['_stream_usage'] = usage_info
|
||||||
|
|
||||||
|
if not hasattr(chunk, 'choices') or not chunk.choices:
|
||||||
|
continue
|
||||||
|
|
||||||
|
choice = chunk.choices[0]
|
||||||
|
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
||||||
|
finish_reason = getattr(choice, 'finish_reason', None)
|
||||||
|
|
||||||
|
if 'role' in delta and delta['role']:
|
||||||
|
role = delta['role']
|
||||||
|
|
||||||
|
delta_content = delta.get('content', '')
|
||||||
|
reasoning_content = delta.get('reasoning_content', '')
|
||||||
|
|
||||||
|
# Handle reasoning_content based on remove_think flag
|
||||||
|
if reasoning_content:
|
||||||
|
if remove_think:
|
||||||
|
# Skip reasoning content when remove_think is True
|
||||||
|
chunk_idx += 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Use reasoning_content as the displayed content
|
||||||
|
delta_content = reasoning_content
|
||||||
|
|
||||||
|
tool_calls = self._normalize_stream_tool_calls(delta.get('tool_calls'), tool_call_state)
|
||||||
|
|
||||||
|
if chunk_idx == 0 and not delta_content and not tool_calls:
|
||||||
|
chunk_idx += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
chunk_data = {
|
||||||
|
'role': role,
|
||||||
|
'content': delta_content if delta_content else None,
|
||||||
|
'tool_calls': tool_calls,
|
||||||
|
'is_final': bool(finish_reason),
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
||||||
|
yield provider_message.MessageChunk(**chunk_data)
|
||||||
|
chunk_idx += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._handle_litellm_error(e)
|
||||||
|
|
||||||
|
async def invoke_embedding(
|
||||||
|
self,
|
||||||
|
model: requester.RuntimeEmbeddingModel,
|
||||||
|
input_text: list[str],
|
||||||
|
extra_args: dict[str, typing.Any] = {},
|
||||||
|
) -> tuple[list[list[float]], dict]:
|
||||||
|
"""Invoke embedding and return vectors with usage info."""
|
||||||
|
model_name = self._build_litellm_model_name(model.model_entity.name)
|
||||||
|
api_key = model.provider.token_mgr.get_token()
|
||||||
|
|
||||||
|
args = {
|
||||||
|
'model': model_name,
|
||||||
|
'input': input_text,
|
||||||
|
'api_key': api_key,
|
||||||
|
}
|
||||||
|
self._build_common_args(args, include_retry_params=False)
|
||||||
|
|
||||||
|
if model.model_entity.extra_args:
|
||||||
|
args.update(model.model_entity.extra_args)
|
||||||
|
|
||||||
|
args.update(extra_args)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await aembedding(**args)
|
||||||
|
|
||||||
|
embeddings = [d.embedding for d in response.data]
|
||||||
|
usage_info = self._extract_usage(response)
|
||||||
|
|
||||||
|
return embeddings, usage_info
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._handle_litellm_error(e)
|
||||||
|
|
||||||
|
async def invoke_rerank(
|
||||||
|
self,
|
||||||
|
model: requester.RuntimeRerankModel,
|
||||||
|
query: str,
|
||||||
|
documents: typing.List[str],
|
||||||
|
extra_args: dict[str, typing.Any] = {},
|
||||||
|
) -> typing.List[dict]:
|
||||||
|
"""Invoke rerank and return relevance scores."""
|
||||||
|
model_name = self._build_litellm_model_name(model.model_entity.name)
|
||||||
|
api_key = model.provider.token_mgr.get_token()
|
||||||
|
|
||||||
|
args = {
|
||||||
|
'model': model_name,
|
||||||
|
'query': query,
|
||||||
|
'documents': documents,
|
||||||
|
'api_key': api_key,
|
||||||
|
'top_n': min(len(documents), 64),
|
||||||
|
}
|
||||||
|
self._build_common_args(args, include_retry_params=False)
|
||||||
|
|
||||||
|
if model.model_entity.extra_args:
|
||||||
|
args.update(model.model_entity.extra_args)
|
||||||
|
|
||||||
|
args.update(extra_args)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await arerank(**args)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for r in response.results:
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
'index': r.get('index', 0),
|
||||||
|
'relevance_score': r.get('relevance_score', 0.0),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if results:
|
||||||
|
scores = [r['relevance_score'] for r in results]
|
||||||
|
min_score = min(scores)
|
||||||
|
max_score = max(scores)
|
||||||
|
if max_score - min_score > 1e-6:
|
||||||
|
for r in results:
|
||||||
|
r['relevance_score'] = (r['relevance_score'] - min_score) / (max_score - min_score)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._handle_litellm_error(e)
|
||||||
|
|
||||||
|
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
||||||
|
"""Scan models supported by the provider."""
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
base_url = self.requester_cfg.get('base_url', '').rstrip('/')
|
||||||
|
timeout = self.requester_cfg.get('timeout', 120)
|
||||||
|
|
||||||
|
if not base_url:
|
||||||
|
raise errors.RequesterError('Base URL required for model scanning')
|
||||||
|
|
||||||
|
headers = {}
|
||||||
|
if api_key:
|
||||||
|
headers['Authorization'] = f'Bearer {api_key}'
|
||||||
|
|
||||||
|
models_url = f'{base_url}/models'
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(trust_env=True, timeout=timeout) as client:
|
||||||
|
response = await client.get(models_url, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
payload = response.json()
|
||||||
|
|
||||||
|
models = []
|
||||||
|
for item in payload.get('data', []):
|
||||||
|
model_id = item.get('id')
|
||||||
|
if not model_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
models.append(self._enrich_scanned_model(model_id))
|
||||||
|
|
||||||
|
models.sort(key=lambda x: (x['type'] != 'llm', x['name'].lower()))
|
||||||
|
|
||||||
|
return {'models': models}
|
||||||
|
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
raise errors.RequesterError(f'Model scan failed: {e.response.status_code}')
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
raise errors.RequesterError('Model scan timeout')
|
||||||
|
except Exception as e:
|
||||||
|
raise errors.RequesterError(f'Model scan error: {str(e)}')
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: litellm-chat
|
||||||
|
label:
|
||||||
|
en_US: LiteLLM (Unified)
|
||||||
|
zh_Hans: LiteLLM (统一请求器)
|
||||||
|
icon: litellm.svg
|
||||||
|
spec:
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
- name: custom_llm_provider
|
||||||
|
label:
|
||||||
|
en_US: Custom Provider
|
||||||
|
zh_Hans: 自定义 Provider
|
||||||
|
type: string
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
description:
|
||||||
|
en_US: Force provider type (e.g., anthropic, openai, gemini)
|
||||||
|
zh_Hans: 强制指定 provider 类型(如 anthropic, openai, gemini)
|
||||||
|
- name: drop_params
|
||||||
|
label:
|
||||||
|
en_US: Drop Unsupported Params
|
||||||
|
zh_Hans: 丢弃不支持参数
|
||||||
|
type: boolean
|
||||||
|
required: false
|
||||||
|
default: false
|
||||||
|
- name: num_retries
|
||||||
|
label:
|
||||||
|
en_US: Number of Retries
|
||||||
|
zh_Hans: 重试次数
|
||||||
|
type: integer
|
||||||
|
required: false
|
||||||
|
default: 0
|
||||||
|
- name: api_version
|
||||||
|
label:
|
||||||
|
en_US: API Version
|
||||||
|
zh_Hans: API 版本
|
||||||
|
type: string
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: unified
|
||||||
|
execution:
|
||||||
|
python:
|
||||||
|
path: ./litellmchat.py
|
||||||
|
attr: LiteLLMRequester
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class LmStudioChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""LMStudio ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'http://127.0.0.1:1234/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: LM Studio
|
zh_Hans: LM Studio
|
||||||
icon: lmstudio.webp
|
icon: lmstudio.webp
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
4
src/langbot/pkg/provider/modelmgr/requesters/mimo.svg
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#FF6700"/>
|
||||||
|
<text x="30" y="32" font-family="Arial, sans-serif" font-size="18" font-weight="bold" fill="white" text-anchor="middle">MiMo</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 280 B |
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: mimo-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: Xiaomi MiMo
|
||||||
|
zh_Hans: 小米 MiMo
|
||||||
|
icon: mimo.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://api.xiaomimimo.com/v1
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
4
src/langbot/pkg/provider/modelmgr/requesters/minimax.svg
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#4F46E5"/>
|
||||||
|
<text x="30" y="32" font-family="Arial, sans-serif" font-size="12" font-weight="bold" fill="white" text-anchor="middle">MiniMax</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 283 B |
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: minimax-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: MiniMax
|
||||||
|
zh_Hans: MiniMax
|
||||||
|
icon: minimax.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://api.minimax.chat/v1
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
5
src/langbot/pkg/provider/modelmgr/requesters/mistral.svg
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#FF6B35"/>
|
||||||
|
<text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Mistral</text>
|
||||||
|
<text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">AI</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 395 B |
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: mistral-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: Mistral AI
|
||||||
|
zh_Hans: Mistral AI
|
||||||
|
icon: mistral.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: mistral
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://api.mistral.ai/v1
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
@@ -1,561 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import typing
|
|
||||||
|
|
||||||
import openai
|
|
||||||
import openai.types.chat.chat_completion as chat_completion
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from .. import entities, errors, requester
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
|
|
||||||
|
|
||||||
class ModelScopeChatCompletions(requester.ProviderAPIRequester):
|
|
||||||
"""ModelScope ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api-inference.modelscope.cn/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def initialize(self):
|
|
||||||
self.client = openai.AsyncClient(
|
|
||||||
api_key=self.init_api_key,
|
|
||||||
base_url=self.requester_cfg['base_url'],
|
|
||||||
timeout=self.requester_cfg['timeout'],
|
|
||||||
http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _mask_api_key(self, api_key: str | None) -> str:
|
|
||||||
if not api_key:
|
|
||||||
return ''
|
|
||||||
if len(api_key) <= 8:
|
|
||||||
return '****'
|
|
||||||
return f'{api_key[:4]}...{api_key[-4:]}'
|
|
||||||
|
|
||||||
def _infer_model_type(self, model_id: str) -> str:
|
|
||||||
normalized_model_id = (model_id or '').lower()
|
|
||||||
embedding_keywords = (
|
|
||||||
'embedding',
|
|
||||||
'embed',
|
|
||||||
'bge-',
|
|
||||||
'e5-',
|
|
||||||
'm3e',
|
|
||||||
'gte-',
|
|
||||||
'multilingual-e5',
|
|
||||||
'text-embedding',
|
|
||||||
)
|
|
||||||
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
|
|
||||||
|
|
||||||
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
|
|
||||||
normalized_model_id = (model_id or '').lower()
|
|
||||||
abilities: set[str] = set()
|
|
||||||
|
|
||||||
def _flatten(value: typing.Any) -> list[str]:
|
|
||||||
if value is None:
|
|
||||||
return []
|
|
||||||
if isinstance(value, str):
|
|
||||||
return [value.lower()]
|
|
||||||
if isinstance(value, dict):
|
|
||||||
flattened: list[str] = []
|
|
||||||
for nested_value in value.values():
|
|
||||||
flattened.extend(_flatten(nested_value))
|
|
||||||
return flattened
|
|
||||||
if isinstance(value, (list, tuple, set)):
|
|
||||||
flattened: list[str] = []
|
|
||||||
for nested_value in value:
|
|
||||||
flattened.extend(_flatten(nested_value))
|
|
||||||
return flattened
|
|
||||||
return [str(value).lower()]
|
|
||||||
|
|
||||||
capability_tokens = _flatten(item.get('capabilities'))
|
|
||||||
capability_tokens.extend(_flatten(item.get('modalities')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('input_modalities')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('output_modalities')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('supported_generation_methods')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('supported_parameters')))
|
|
||||||
capability_tokens.extend(_flatten(item.get('architecture')))
|
|
||||||
|
|
||||||
combined_tokens = capability_tokens + [normalized_model_id]
|
|
||||||
|
|
||||||
vision_keywords = ('vision', 'image', 'file', 'video', 'multimodal', 'vl', 'ocr', 'omni')
|
|
||||||
function_call_keywords = ('function', 'tool', 'tools', 'tool_choice', 'tool_call', 'tool-use', 'tool_use')
|
|
||||||
|
|
||||||
if any(any(keyword in token for keyword in vision_keywords) for token in combined_tokens):
|
|
||||||
abilities.add('vision')
|
|
||||||
|
|
||||||
if any(any(keyword in token for keyword in function_call_keywords) for token in combined_tokens):
|
|
||||||
abilities.add('func_call')
|
|
||||||
|
|
||||||
return sorted(abilities)
|
|
||||||
|
|
||||||
def _normalize_modalities(self, value: typing.Any) -> list[str]:
|
|
||||||
normalized: list[str] = []
|
|
||||||
|
|
||||||
def _collect(item: typing.Any):
|
|
||||||
if item is None:
|
|
||||||
return
|
|
||||||
if isinstance(item, str):
|
|
||||||
for part in item.replace('->', ',').replace('+', ',').split(','):
|
|
||||||
token = part.strip().lower()
|
|
||||||
if token and token not in normalized:
|
|
||||||
normalized.append(token)
|
|
||||||
return
|
|
||||||
if isinstance(item, dict):
|
|
||||||
for nested in item.values():
|
|
||||||
_collect(nested)
|
|
||||||
return
|
|
||||||
if isinstance(item, (list, tuple, set)):
|
|
||||||
for nested in item:
|
|
||||||
_collect(nested)
|
|
||||||
return
|
|
||||||
|
|
||||||
_collect(value)
|
|
||||||
return normalized
|
|
||||||
|
|
||||||
def _extract_scan_metadata(self, item: dict[str, typing.Any], model_id: str) -> dict[str, typing.Any]:
|
|
||||||
display_name = item.get('name')
|
|
||||||
if not isinstance(display_name, str) or not display_name.strip() or display_name == model_id:
|
|
||||||
display_name = ''
|
|
||||||
|
|
||||||
description = item.get('description')
|
|
||||||
if not isinstance(description, str) or not description.strip():
|
|
||||||
description = ''
|
|
||||||
|
|
||||||
context_length = item.get('context_length')
|
|
||||||
if context_length is None and isinstance(item.get('top_provider'), dict):
|
|
||||||
context_length = item['top_provider'].get('context_length')
|
|
||||||
|
|
||||||
if not isinstance(context_length, int):
|
|
||||||
try:
|
|
||||||
context_length = int(context_length) if context_length is not None else None
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
context_length = None
|
|
||||||
|
|
||||||
input_modalities = self._normalize_modalities(item.get('input_modalities'))
|
|
||||||
output_modalities = self._normalize_modalities(item.get('output_modalities'))
|
|
||||||
|
|
||||||
if isinstance(item.get('architecture'), dict):
|
|
||||||
if not input_modalities:
|
|
||||||
input_modalities = self._normalize_modalities(item['architecture'].get('input_modalities'))
|
|
||||||
if not output_modalities:
|
|
||||||
output_modalities = self._normalize_modalities(item['architecture'].get('output_modalities'))
|
|
||||||
|
|
||||||
owned_by = item.get('owned_by')
|
|
||||||
if not isinstance(owned_by, str) or not owned_by.strip():
|
|
||||||
owned_by = ''
|
|
||||||
|
|
||||||
return {
|
|
||||||
'display_name': display_name or None,
|
|
||||||
'description': description or None,
|
|
||||||
'context_length': context_length,
|
|
||||||
'owned_by': owned_by or None,
|
|
||||||
'input_modalities': input_modalities,
|
|
||||||
'output_modalities': output_modalities,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
|
||||||
headers = {}
|
|
||||||
if api_key:
|
|
||||||
headers['Authorization'] = f'Bearer {api_key}'
|
|
||||||
|
|
||||||
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/models'
|
|
||||||
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
|
|
||||||
response = await client.get(models_url, headers=headers)
|
|
||||||
response.raise_for_status()
|
|
||||||
payload = response.json()
|
|
||||||
|
|
||||||
models = []
|
|
||||||
for item in payload.get('data', []):
|
|
||||||
model_id = item.get('id')
|
|
||||||
if not model_id:
|
|
||||||
continue
|
|
||||||
models.append(
|
|
||||||
{
|
|
||||||
'id': model_id,
|
|
||||||
'name': model_id,
|
|
||||||
'type': self._infer_model_type(model_id),
|
|
||||||
'abilities': self._infer_model_abilities(item, model_id),
|
|
||||||
**self._extract_scan_metadata(item, model_id),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
|
|
||||||
return {
|
|
||||||
'models': models,
|
|
||||||
'debug': {
|
|
||||||
'request': {
|
|
||||||
'method': 'GET',
|
|
||||||
'url': models_url,
|
|
||||||
'headers': {
|
|
||||||
'Authorization': f'Bearer {self._mask_api_key(api_key)}' if api_key else '',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
'response': payload,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _req(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
args: dict,
|
|
||||||
extra_body: dict = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> list[dict[str, typing.Any]]:
|
|
||||||
args['stream'] = True
|
|
||||||
|
|
||||||
chunk = None
|
|
||||||
|
|
||||||
pending_content = ''
|
|
||||||
|
|
||||||
tool_calls = []
|
|
||||||
|
|
||||||
resp_gen: openai.AsyncStream = await self.client.chat.completions.create(**args, extra_body=extra_body)
|
|
||||||
|
|
||||||
chunk_idx = 0
|
|
||||||
thinking_started = False
|
|
||||||
thinking_ended = False
|
|
||||||
tool_id = ''
|
|
||||||
tool_name = ''
|
|
||||||
message_delta = {}
|
|
||||||
async for chunk in resp_gen:
|
|
||||||
if not chunk or not chunk.id or not chunk.choices or not chunk.choices[0] or not chunk.choices[0].delta:
|
|
||||||
continue
|
|
||||||
|
|
||||||
delta = chunk.choices[0].delta.model_dump() if hasattr(chunk.choices[0], 'delta') else {}
|
|
||||||
reasoning_content = delta.get('reasoning_content')
|
|
||||||
# 处理 reasoning_content
|
|
||||||
if reasoning_content:
|
|
||||||
# accumulated_reasoning += reasoning_content
|
|
||||||
# 如果设置了 remove_think,跳过 reasoning_content
|
|
||||||
if remove_think:
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
|
||||||
if not thinking_started:
|
|
||||||
thinking_started = True
|
|
||||||
pending_content += '<think>\n' + reasoning_content
|
|
||||||
else:
|
|
||||||
# 继续输出 reasoning_content
|
|
||||||
pending_content += reasoning_content
|
|
||||||
elif thinking_started and not thinking_ended and delta.get('content'):
|
|
||||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
|
||||||
thinking_ended = True
|
|
||||||
pending_content += '\n</think>\n' + delta.get('content')
|
|
||||||
|
|
||||||
if delta.get('content') is not None:
|
|
||||||
pending_content += delta.get('content')
|
|
||||||
|
|
||||||
if delta.get('tool_calls') is not None:
|
|
||||||
for tool_call in delta.get('tool_calls'):
|
|
||||||
if tool_call['id'] != '':
|
|
||||||
tool_id = tool_call['id']
|
|
||||||
if tool_call['function']['name'] is not None:
|
|
||||||
tool_name = tool_call['function']['name']
|
|
||||||
if tool_call['function']['arguments'] is None:
|
|
||||||
continue
|
|
||||||
tool_call['id'] = tool_id
|
|
||||||
tool_call['name'] = tool_name
|
|
||||||
for tc in tool_calls:
|
|
||||||
if tc['index'] == tool_call['index']:
|
|
||||||
tc['function']['arguments'] += tool_call['function']['arguments']
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
tool_calls.append(tool_call)
|
|
||||||
|
|
||||||
if chunk.choices[0].finish_reason is not None:
|
|
||||||
break
|
|
||||||
message_delta['content'] = pending_content
|
|
||||||
message_delta['role'] = 'assistant'
|
|
||||||
|
|
||||||
message_delta['tool_calls'] = tool_calls if tool_calls else None
|
|
||||||
return [message_delta]
|
|
||||||
|
|
||||||
async def _make_msg(
|
|
||||||
self,
|
|
||||||
chat_completion: list[dict[str, typing.Any]],
|
|
||||||
) -> provider_message.Message:
|
|
||||||
chatcmpl_message = chat_completion[0]
|
|
||||||
|
|
||||||
# 确保 role 字段存在且不为 None
|
|
||||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
|
||||||
chatcmpl_message['role'] = 'assistant'
|
|
||||||
|
|
||||||
message = provider_message.Message(**chatcmpl_message)
|
|
||||||
|
|
||||||
return message
|
|
||||||
|
|
||||||
async def _closure(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> tuple[provider_message.Message, dict]:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages.copy()
|
|
||||||
|
|
||||||
# 检查vision
|
|
||||||
for msg in messages:
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'image_base64':
|
|
||||||
me['image_url'] = {'url': me['image_base64']}
|
|
||||||
me['type'] = 'image_url'
|
|
||||||
del me['image_base64']
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
|
|
||||||
# 发送请求
|
|
||||||
resp = await self._req(query, args, extra_body=extra_args, remove_think=remove_think)
|
|
||||||
|
|
||||||
# 处理请求结果
|
|
||||||
message = await self._make_msg(resp)
|
|
||||||
|
|
||||||
# ModelScope uses streaming, usage info not available
|
|
||||||
usage_info = {}
|
|
||||||
|
|
||||||
return message, usage_info
|
|
||||||
|
|
||||||
async def _req_stream(
|
|
||||||
self,
|
|
||||||
args: dict,
|
|
||||||
extra_body: dict = {},
|
|
||||||
) -> chat_completion.ChatCompletion:
|
|
||||||
async for chunk in await self.client.chat.completions.create(**args, extra_body=extra_body):
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
async def _closure_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages.copy()
|
|
||||||
|
|
||||||
# 检查vision
|
|
||||||
for msg in messages:
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'image_base64':
|
|
||||||
me['image_url'] = {'url': me['image_base64']}
|
|
||||||
me['type'] = 'image_url'
|
|
||||||
del me['image_base64']
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
args['stream'] = True
|
|
||||||
|
|
||||||
# 流式处理状态
|
|
||||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
|
||||||
chunk_idx = 0
|
|
||||||
thinking_started = False
|
|
||||||
thinking_ended = False
|
|
||||||
role = 'assistant' # 默认角色
|
|
||||||
# accumulated_reasoning = '' # 仅用于判断何时结束思维链
|
|
||||||
|
|
||||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
|
||||||
# 解析 chunk 数据
|
|
||||||
if hasattr(chunk, 'choices') and chunk.choices:
|
|
||||||
choice = chunk.choices[0]
|
|
||||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
|
||||||
finish_reason = getattr(choice, 'finish_reason', None)
|
|
||||||
else:
|
|
||||||
delta = {}
|
|
||||||
finish_reason = None
|
|
||||||
|
|
||||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
|
||||||
if 'role' in delta and delta['role']:
|
|
||||||
role = delta['role']
|
|
||||||
|
|
||||||
# 获取增量内容
|
|
||||||
delta_content = delta.get('content', '')
|
|
||||||
reasoning_content = delta.get('reasoning_content', '')
|
|
||||||
|
|
||||||
# 处理 reasoning_content
|
|
||||||
if reasoning_content:
|
|
||||||
# accumulated_reasoning += reasoning_content
|
|
||||||
# 如果设置了 remove_think,跳过 reasoning_content
|
|
||||||
if remove_think:
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 第一次出现 reasoning_content,添加 <think> 开始标签
|
|
||||||
if not thinking_started:
|
|
||||||
thinking_started = True
|
|
||||||
delta_content = '<think>\n' + reasoning_content
|
|
||||||
else:
|
|
||||||
# 继续输出 reasoning_content
|
|
||||||
delta_content = reasoning_content
|
|
||||||
elif thinking_started and not thinking_ended and delta_content:
|
|
||||||
# reasoning_content 结束,normal content 开始,添加 </think> 结束标签
|
|
||||||
thinking_ended = True
|
|
||||||
delta_content = '\n</think>\n' + delta_content
|
|
||||||
|
|
||||||
# 处理 content 中已有的 <think> 标签(如果需要移除)
|
|
||||||
# if delta_content and remove_think and '<think>' in delta_content:
|
|
||||||
# import re
|
|
||||||
#
|
|
||||||
# # 移除 <think> 标签及其内容
|
|
||||||
# delta_content = re.sub(r'<think>.*?</think>', '', delta_content, flags=re.DOTALL)
|
|
||||||
|
|
||||||
# 处理工具调用增量
|
|
||||||
if delta.get('tool_calls'):
|
|
||||||
for tool_call in delta['tool_calls']:
|
|
||||||
if tool_call['id'] != '':
|
|
||||||
tool_id = tool_call['id']
|
|
||||||
if tool_call['function']['name'] is not None:
|
|
||||||
tool_name = tool_call['function']['name']
|
|
||||||
|
|
||||||
if tool_call['type'] is None:
|
|
||||||
tool_call['type'] = 'function'
|
|
||||||
tool_call['id'] = tool_id
|
|
||||||
tool_call['function']['name'] = tool_name
|
|
||||||
tool_call['function']['arguments'] = (
|
|
||||||
'' if tool_call['function']['arguments'] is None else tool_call['function']['arguments']
|
|
||||||
)
|
|
||||||
|
|
||||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
|
||||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not delta.get('tool_calls'):
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 构建 MessageChunk - 只包含增量内容
|
|
||||||
chunk_data = {
|
|
||||||
'role': role,
|
|
||||||
'content': delta_content if delta_content else None,
|
|
||||||
'tool_calls': delta.get('tool_calls'),
|
|
||||||
'is_final': bool(finish_reason),
|
|
||||||
}
|
|
||||||
|
|
||||||
# 移除 None 值
|
|
||||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
|
||||||
|
|
||||||
yield provider_message.MessageChunk(**chunk_data)
|
|
||||||
chunk_idx += 1
|
|
||||||
# return
|
|
||||||
|
|
||||||
async def invoke_llm(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
model: entities.LLMModelInfo,
|
|
||||||
messages: typing.List[provider_message.Message],
|
|
||||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message:
|
|
||||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
|
||||||
for m in messages:
|
|
||||||
msg_dict = m.dict(exclude_none=True)
|
|
||||||
content = msg_dict.get('content')
|
|
||||||
if isinstance(content, list):
|
|
||||||
# 检查 content 列表中是否每个部分都是文本
|
|
||||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
|
||||||
# 将所有文本部分合并为一个字符串
|
|
||||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
|
||||||
req_messages.append(msg_dict)
|
|
||||||
|
|
||||||
try:
|
|
||||||
return await self._closure(
|
|
||||||
query=query,
|
|
||||||
req_messages=req_messages,
|
|
||||||
use_model=model,
|
|
||||||
use_funcs=funcs,
|
|
||||||
extra_args=extra_args,
|
|
||||||
remove_think=remove_think,
|
|
||||||
)
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
raise errors.RequesterError('请求超时')
|
|
||||||
except openai.BadRequestError as e:
|
|
||||||
if 'context_length_exceeded' in e.message:
|
|
||||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
|
||||||
else:
|
|
||||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
|
||||||
except openai.AuthenticationError as e:
|
|
||||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
|
||||||
except openai.NotFoundError as e:
|
|
||||||
raise errors.RequesterError(f'请求路径错误: {e.message}')
|
|
||||||
except openai.RateLimitError as e:
|
|
||||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
|
||||||
except openai.APIError as e:
|
|
||||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
|
||||||
|
|
||||||
async def invoke_llm_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
model: requester.RuntimeLLMModel,
|
|
||||||
messages: typing.List[provider_message.Message],
|
|
||||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.MessageChunk:
|
|
||||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
|
||||||
for m in messages:
|
|
||||||
msg_dict = m.dict(exclude_none=True)
|
|
||||||
content = msg_dict.get('content')
|
|
||||||
if isinstance(content, list):
|
|
||||||
# 检查 content 列表中是否每个部分都是文本
|
|
||||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
|
||||||
# 将所有文本部分合并为一个字符串
|
|
||||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
|
||||||
req_messages.append(msg_dict)
|
|
||||||
|
|
||||||
try:
|
|
||||||
async for item in self._closure_stream(
|
|
||||||
query=query,
|
|
||||||
req_messages=req_messages,
|
|
||||||
use_model=model,
|
|
||||||
use_funcs=funcs,
|
|
||||||
extra_args=extra_args,
|
|
||||||
remove_think=remove_think,
|
|
||||||
):
|
|
||||||
yield item
|
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
raise errors.RequesterError('请求超时')
|
|
||||||
except openai.BadRequestError as e:
|
|
||||||
if 'context_length_exceeded' in e.message:
|
|
||||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
|
||||||
else:
|
|
||||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
|
||||||
except openai.AuthenticationError as e:
|
|
||||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
|
||||||
except openai.NotFoundError as e:
|
|
||||||
raise errors.RequesterError(f'请求路径错误: {e.message}')
|
|
||||||
except openai.RateLimitError as e:
|
|
||||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
|
||||||
except openai.APIError as e:
|
|
||||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 魔搭社区
|
zh_Hans: 魔搭社区
|
||||||
icon: modelscope.svg
|
icon: modelscope.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -31,6 +32,8 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
provider_category: maas
|
provider_category: maas
|
||||||
execution:
|
execution:
|
||||||
python:
|
python:
|
||||||
|
|||||||
@@ -1,67 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
from .. import requester
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
|
|
||||||
|
|
||||||
class MoonshotChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""Moonshot ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.moonshot.cn/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _closure(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> tuple[provider_message.Message, dict]:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages
|
|
||||||
|
|
||||||
# deepseek 不支持多模态,把content都转换成纯文字
|
|
||||||
for m in messages:
|
|
||||||
if 'content' in m and isinstance(m['content'], list):
|
|
||||||
m['content'] = ' '.join([c['text'] for c in m['content']])
|
|
||||||
|
|
||||||
# 删除空的,不知道干嘛的,直接删了。
|
|
||||||
# messages = [m for m in messages if m["content"].strip() != "" and ('tool_calls' not in m or not m['tool_calls'])]
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
|
|
||||||
# 发送请求
|
|
||||||
resp = await self._req(args, extra_body=extra_args)
|
|
||||||
|
|
||||||
# 处理请求结果
|
|
||||||
message = await self._make_msg(resp, remove_think)
|
|
||||||
|
|
||||||
# Extract token usage from response
|
|
||||||
usage_info = {}
|
|
||||||
if hasattr(resp, 'usage') and resp.usage:
|
|
||||||
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
|
|
||||||
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
|
|
||||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
|
||||||
|
|
||||||
return message, usage_info
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 月之暗面
|
zh_Hans: 月之暗面
|
||||||
icon: moonshot.png
|
icon: moonshot.png
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -24,6 +25,8 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
provider_category: manufacturer
|
provider_category: manufacturer
|
||||||
execution:
|
execution:
|
||||||
python:
|
python:
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class NewAPIChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""New API ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'http://localhost:3000/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: New API
|
zh_Hans: New API
|
||||||
icon: newapi.png
|
icon: newapi.png
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,314 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
import typing
|
|
||||||
from typing import Union, Mapping, Any, AsyncIterator
|
|
||||||
import uuid
|
|
||||||
import json
|
|
||||||
|
|
||||||
import ollama
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from .. import errors, requester
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
|
|
||||||
REQUESTER_NAME: str = 'ollama-chat'
|
|
||||||
|
|
||||||
|
|
||||||
class OllamaChatCompletions(requester.ProviderAPIRequester):
|
|
||||||
"""Ollama平台 ChatCompletion API请求器"""
|
|
||||||
|
|
||||||
client: ollama.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'http://127.0.0.1:11434',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def initialize(self):
|
|
||||||
os.environ['OLLAMA_HOST'] = self.requester_cfg['base_url']
|
|
||||||
self.client = ollama.AsyncClient(timeout=self.requester_cfg['timeout'])
|
|
||||||
|
|
||||||
def _infer_model_type(self, model_id: str) -> str:
|
|
||||||
normalized_model_id = (model_id or '').lower()
|
|
||||||
embedding_keywords = ('embedding', 'embed', 'bge-', 'e5-', 'm3e', 'gte-', 'text-embedding')
|
|
||||||
return 'embedding' if any(keyword in normalized_model_id for keyword in embedding_keywords) else 'llm'
|
|
||||||
|
|
||||||
def _infer_model_abilities(self, item: dict[str, typing.Any], model_id: str) -> list[str]:
|
|
||||||
normalized_model_id = (model_id or '').lower()
|
|
||||||
abilities: set[str] = set()
|
|
||||||
details = item.get('details', {}) or {}
|
|
||||||
families = details.get('families', []) or []
|
|
||||||
tokens = [normalized_model_id, str(details.get('family', '')).lower()]
|
|
||||||
tokens.extend(str(family).lower() for family in families)
|
|
||||||
|
|
||||||
if any(keyword in token for token in tokens for keyword in ('vision', 'vl', 'omni', 'llava', 'ocr')):
|
|
||||||
abilities.add('vision')
|
|
||||||
if any(keyword in token for token in tokens for keyword in ('tool', 'function')):
|
|
||||||
abilities.add('func_call')
|
|
||||||
return sorted(abilities)
|
|
||||||
|
|
||||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
|
||||||
del api_key
|
|
||||||
models_url = f'{self.requester_cfg["base_url"].rstrip("/")}/api/tags'
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']) as client:
|
|
||||||
response = await client.get(models_url)
|
|
||||||
response.raise_for_status()
|
|
||||||
payload = response.json()
|
|
||||||
|
|
||||||
models: list[dict[str, typing.Any]] = []
|
|
||||||
for item in payload.get('models', []):
|
|
||||||
model_id = item.get('model') or item.get('name')
|
|
||||||
if not model_id:
|
|
||||||
continue
|
|
||||||
models.append(
|
|
||||||
{
|
|
||||||
'id': model_id,
|
|
||||||
'name': item.get('name', model_id),
|
|
||||||
'type': self._infer_model_type(model_id),
|
|
||||||
'abilities': self._infer_model_abilities(item, model_id),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
models.sort(key=lambda item: (item['type'] != 'llm', item['name'].lower()))
|
|
||||||
return {
|
|
||||||
'models': models,
|
|
||||||
'debug': {
|
|
||||||
'request': {
|
|
||||||
'method': 'GET',
|
|
||||||
'url': models_url,
|
|
||||||
},
|
|
||||||
'response': payload,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _req(
|
|
||||||
self,
|
|
||||||
args: dict,
|
|
||||||
) -> Union[Mapping[str, Any], AsyncIterator[Mapping[str, Any]]]:
|
|
||||||
return await self.client.chat(**args)
|
|
||||||
|
|
||||||
async def _closure(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message:
|
|
||||||
args = extra_args.copy()
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
messages: list[dict] = req_messages.copy()
|
|
||||||
for msg in messages:
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
text_content: list = []
|
|
||||||
image_urls: list = []
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'text':
|
|
||||||
text_content.append(me['text'])
|
|
||||||
elif me['type'] == 'image_base64':
|
|
||||||
image_urls.append(me['image_base64'])
|
|
||||||
|
|
||||||
msg['content'] = '\n'.join(text_content)
|
|
||||||
msg['images'] = [url.split(',')[1] for url in image_urls]
|
|
||||||
if 'tool_calls' in msg: # LangBot 内部以 str 存储 tool_calls 的参数,这里需要转换为 dict
|
|
||||||
for tool_call in msg['tool_calls']:
|
|
||||||
tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
|
|
||||||
args['messages'] = messages
|
|
||||||
|
|
||||||
args['tools'] = []
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
resp = await self._req(args)
|
|
||||||
message: provider_message.Message = await self._make_msg(resp)
|
|
||||||
return message
|
|
||||||
|
|
||||||
async def _make_msg(self, chat_completions: ollama.ChatResponse) -> provider_message.Message:
|
|
||||||
message: ollama.Message = chat_completions.message
|
|
||||||
if message is None:
|
|
||||||
raise ValueError("chat_completions must contain a 'message' field")
|
|
||||||
|
|
||||||
ret_msg: provider_message.Message = None
|
|
||||||
|
|
||||||
if message.content is not None:
|
|
||||||
ret_msg = provider_message.Message(role='assistant', content=message.content)
|
|
||||||
if message.tool_calls is not None and len(message.tool_calls) > 0:
|
|
||||||
tool_calls: list[provider_message.ToolCall] = []
|
|
||||||
|
|
||||||
for tool_call in message.tool_calls:
|
|
||||||
tool_calls.append(
|
|
||||||
provider_message.ToolCall(
|
|
||||||
id=uuid.uuid4().hex,
|
|
||||||
type='function',
|
|
||||||
function=provider_message.FunctionCall(
|
|
||||||
name=tool_call.function.name,
|
|
||||||
arguments=json.dumps(tool_call.function.arguments),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
ret_msg.tool_calls = tool_calls
|
|
||||||
|
|
||||||
return ret_msg
|
|
||||||
|
|
||||||
async def _prepare_messages(
|
|
||||||
self,
|
|
||||||
messages: typing.List[provider_message.Message],
|
|
||||||
) -> list[dict]:
|
|
||||||
"""Prepare messages for Ollama API request."""
|
|
||||||
req_messages: list = []
|
|
||||||
for m in messages:
|
|
||||||
msg_dict: dict = m.dict(exclude_none=True)
|
|
||||||
content: Any = msg_dict.get('content')
|
|
||||||
if isinstance(content, list):
|
|
||||||
if all(isinstance(part, dict) and part.get('type') == 'text' for part in content):
|
|
||||||
msg_dict['content'] = '\n'.join(part['text'] for part in content)
|
|
||||||
req_messages.append(msg_dict)
|
|
||||||
return req_messages
|
|
||||||
|
|
||||||
async def invoke_llm(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
model: requester.RuntimeLLMModel,
|
|
||||||
messages: typing.List[provider_message.Message],
|
|
||||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message:
|
|
||||||
req_messages = await self._prepare_messages(messages)
|
|
||||||
try:
|
|
||||||
return await self._closure(
|
|
||||||
query=query,
|
|
||||||
req_messages=req_messages,
|
|
||||||
use_model=model,
|
|
||||||
use_funcs=funcs,
|
|
||||||
extra_args=extra_args,
|
|
||||||
remove_think=remove_think,
|
|
||||||
)
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
raise errors.RequesterError('请求超时')
|
|
||||||
|
|
||||||
async def invoke_llm_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
model: requester.RuntimeLLMModel,
|
|
||||||
messages: typing.List[provider_message.Message],
|
|
||||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.MessageChunk:
|
|
||||||
req_messages = await self._prepare_messages(messages)
|
|
||||||
|
|
||||||
try:
|
|
||||||
args = extra_args.copy()
|
|
||||||
args['model'] = model.model_entity.name
|
|
||||||
|
|
||||||
# Process messages for Ollama format
|
|
||||||
msgs: list[dict] = req_messages.copy()
|
|
||||||
for msg in msgs:
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
text_content: list = []
|
|
||||||
image_urls: list = []
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'text':
|
|
||||||
text_content.append(me['text'])
|
|
||||||
elif me['type'] == 'image_base64':
|
|
||||||
image_urls.append(me['image_base64'])
|
|
||||||
msg['content'] = '\n'.join(text_content)
|
|
||||||
msg['images'] = [url.split(',')[1] for url in image_urls]
|
|
||||||
if 'tool_calls' in msg:
|
|
||||||
for tool_call in msg['tool_calls']:
|
|
||||||
tool_call['function']['arguments'] = json.loads(tool_call['function']['arguments'])
|
|
||||||
args['messages'] = msgs
|
|
||||||
|
|
||||||
args['tools'] = []
|
|
||||||
if funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(funcs)
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
args['stream'] = True
|
|
||||||
|
|
||||||
chunk_idx = 0
|
|
||||||
thinking_started = False
|
|
||||||
thinking_ended = False
|
|
||||||
role = 'assistant'
|
|
||||||
|
|
||||||
async for chunk in await self.client.chat(**args):
|
|
||||||
message: ollama.Message = chunk.message
|
|
||||||
done = chunk.done
|
|
||||||
|
|
||||||
delta_content = message.content or ''
|
|
||||||
reasoning_content = getattr(message, 'thinking', '') or ''
|
|
||||||
|
|
||||||
# Handle reasoning/thinking content
|
|
||||||
if reasoning_content:
|
|
||||||
if remove_think:
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not thinking_started:
|
|
||||||
thinking_started = True
|
|
||||||
delta_content = '<think>\n' + reasoning_content
|
|
||||||
else:
|
|
||||||
delta_content = reasoning_content
|
|
||||||
elif thinking_started and not thinking_ended and delta_content:
|
|
||||||
thinking_ended = True
|
|
||||||
delta_content = '\n</think>\n' + delta_content
|
|
||||||
|
|
||||||
# Handle tool calls
|
|
||||||
tool_calls_data = None
|
|
||||||
if message.tool_calls:
|
|
||||||
tool_calls_data = []
|
|
||||||
for tc in message.tool_calls:
|
|
||||||
tool_calls_data.append(
|
|
||||||
{
|
|
||||||
'id': uuid.uuid4().hex,
|
|
||||||
'type': 'function',
|
|
||||||
'function': {
|
|
||||||
'name': tc.function.name,
|
|
||||||
'arguments': json.dumps(tc.function.arguments),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Skip empty first chunk
|
|
||||||
if chunk_idx == 0 and not delta_content and not reasoning_content and not tool_calls_data:
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
chunk_data = {
|
|
||||||
'role': role,
|
|
||||||
'content': delta_content if delta_content else None,
|
|
||||||
'tool_calls': tool_calls_data,
|
|
||||||
'is_final': bool(done),
|
|
||||||
}
|
|
||||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
|
||||||
|
|
||||||
yield provider_message.MessageChunk(**chunk_data)
|
|
||||||
chunk_idx += 1
|
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
raise errors.RequesterError('请求超时')
|
|
||||||
|
|
||||||
async def invoke_embedding(
|
|
||||||
self,
|
|
||||||
model: requester.RuntimeEmbeddingModel,
|
|
||||||
input_text: list[str],
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
) -> list[list[float]]:
|
|
||||||
return (
|
|
||||||
await self.client.embed(
|
|
||||||
model=model.model_entity.name,
|
|
||||||
input=input_text,
|
|
||||||
**extra_args,
|
|
||||||
)
|
|
||||||
).embeddings
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: Ollama
|
zh_Hans: Ollama
|
||||||
icon: ollama.svg
|
icon: ollama.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: ollama
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import modelscopechatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class OpenRouterChatCompletions(modelscopechatcmpl.ModelScopeChatCompletions):
|
|
||||||
"""OpenRouter ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://openrouter.ai/api/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
|
||||||
original_base_url = self.requester_cfg.get('base_url', '')
|
|
||||||
self.requester_cfg['base_url'] = 'https://openrouter.ai/api/v1'
|
|
||||||
try:
|
|
||||||
return await super().scan_models(api_key)
|
|
||||||
finally:
|
|
||||||
self.requester_cfg['base_url'] = original_base_url
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: OpenRouter
|
zh_Hans: OpenRouter
|
||||||
icon: openrouter.svg
|
icon: openrouter.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,208 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import openai
|
|
||||||
import typing
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
from .. import requester
|
|
||||||
import openai.types.chat.chat_completion as chat_completion
|
|
||||||
import re
|
|
||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
|
||||||
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|
||||||
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
|
||||||
|
|
||||||
|
|
||||||
class PPIOChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""欧派云 ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.ppinfra.com/v3/openai',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
is_think: bool = False
|
|
||||||
|
|
||||||
async def _make_msg(
|
|
||||||
self,
|
|
||||||
chat_completion: chat_completion.ChatCompletion,
|
|
||||||
remove_think: bool,
|
|
||||||
) -> provider_message.Message:
|
|
||||||
chatcmpl_message = chat_completion.choices[0].message.model_dump()
|
|
||||||
# print(chatcmpl_message.keys(), chatcmpl_message.values())
|
|
||||||
|
|
||||||
# 确保 role 字段存在且不为 None
|
|
||||||
if 'role' not in chatcmpl_message or chatcmpl_message['role'] is None:
|
|
||||||
chatcmpl_message['role'] = 'assistant'
|
|
||||||
|
|
||||||
reasoning_content = chatcmpl_message['reasoning_content'] if 'reasoning_content' in chatcmpl_message else None
|
|
||||||
|
|
||||||
# deepseek的reasoner模型
|
|
||||||
chatcmpl_message['content'] = await self._process_thinking_content(
|
|
||||||
chatcmpl_message['content'], reasoning_content, remove_think
|
|
||||||
)
|
|
||||||
|
|
||||||
# 移除 reasoning_content 字段,避免传递给 Message
|
|
||||||
if 'reasoning_content' in chatcmpl_message:
|
|
||||||
del chatcmpl_message['reasoning_content']
|
|
||||||
|
|
||||||
message = provider_message.Message(**chatcmpl_message)
|
|
||||||
|
|
||||||
return message
|
|
||||||
|
|
||||||
async def _process_thinking_content(
|
|
||||||
self,
|
|
||||||
content: str,
|
|
||||||
reasoning_content: str = None,
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> tuple[str, str]:
|
|
||||||
"""处理思维链内容
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: 原始内容
|
|
||||||
reasoning_content: reasoning_content 字段内容
|
|
||||||
remove_think: 是否移除思维链
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
处理后的内容
|
|
||||||
"""
|
|
||||||
if remove_think:
|
|
||||||
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
|
||||||
else:
|
|
||||||
if reasoning_content is not None:
|
|
||||||
content = '<think>\n' + reasoning_content + '\n</think>\n' + content
|
|
||||||
return content
|
|
||||||
|
|
||||||
async def _make_msg_chunk(
|
|
||||||
self,
|
|
||||||
delta: dict[str, typing.Any],
|
|
||||||
idx: int,
|
|
||||||
) -> provider_message.MessageChunk:
|
|
||||||
# 处理流式chunk和完整响应的差异
|
|
||||||
# print(chat_completion.choices[0])
|
|
||||||
|
|
||||||
# 确保 role 字段存在且不为 None
|
|
||||||
if 'role' not in delta or delta['role'] is None:
|
|
||||||
delta['role'] = 'assistant'
|
|
||||||
|
|
||||||
reasoning_content = delta['reasoning_content'] if 'reasoning_content' in delta else None
|
|
||||||
|
|
||||||
delta['content'] = '' if delta['content'] is None else delta['content']
|
|
||||||
# print(reasoning_content)
|
|
||||||
|
|
||||||
# deepseek的reasoner模型
|
|
||||||
|
|
||||||
if reasoning_content is not None:
|
|
||||||
delta['content'] += reasoning_content
|
|
||||||
|
|
||||||
message = provider_message.MessageChunk(**delta)
|
|
||||||
|
|
||||||
return message
|
|
||||||
|
|
||||||
async def _closure_stream(
|
|
||||||
self,
|
|
||||||
query: pipeline_query.Query,
|
|
||||||
req_messages: list[dict],
|
|
||||||
use_model: requester.RuntimeLLMModel,
|
|
||||||
use_funcs: list[resource_tool.LLMTool] = None,
|
|
||||||
extra_args: dict[str, typing.Any] = {},
|
|
||||||
remove_think: bool = False,
|
|
||||||
) -> provider_message.Message | typing.AsyncGenerator[provider_message.MessageChunk, None]:
|
|
||||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
|
||||||
|
|
||||||
args = {}
|
|
||||||
args['model'] = use_model.model_entity.name
|
|
||||||
|
|
||||||
if use_funcs:
|
|
||||||
tools = await self.ap.tool_mgr.generate_tools_for_openai(use_funcs)
|
|
||||||
|
|
||||||
if tools:
|
|
||||||
args['tools'] = tools
|
|
||||||
|
|
||||||
# 设置此次请求中的messages
|
|
||||||
messages = req_messages.copy()
|
|
||||||
|
|
||||||
# 检查vision
|
|
||||||
for msg in messages:
|
|
||||||
if 'content' in msg and isinstance(msg['content'], list):
|
|
||||||
for me in msg['content']:
|
|
||||||
if me['type'] == 'image_base64':
|
|
||||||
me['image_url'] = {'url': me['image_base64']}
|
|
||||||
me['type'] = 'image_url'
|
|
||||||
del me['image_base64']
|
|
||||||
|
|
||||||
args['messages'] = messages
|
|
||||||
args['stream'] = True
|
|
||||||
|
|
||||||
# tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
|
||||||
chunk_idx = 0
|
|
||||||
thinking_started = False
|
|
||||||
thinking_ended = False
|
|
||||||
role = 'assistant' # 默认角色
|
|
||||||
async for chunk in self._req_stream(args, extra_body=extra_args):
|
|
||||||
# 解析 chunk 数据
|
|
||||||
if hasattr(chunk, 'choices') and chunk.choices:
|
|
||||||
choice = chunk.choices[0]
|
|
||||||
delta = choice.delta.model_dump() if hasattr(choice, 'delta') else {}
|
|
||||||
finish_reason = getattr(choice, 'finish_reason', None)
|
|
||||||
else:
|
|
||||||
delta = {}
|
|
||||||
finish_reason = None
|
|
||||||
|
|
||||||
# 从第一个 chunk 获取 role,后续使用这个 role
|
|
||||||
if 'role' in delta and delta['role']:
|
|
||||||
role = delta['role']
|
|
||||||
|
|
||||||
# 获取增量内容
|
|
||||||
delta_content = delta.get('content', '')
|
|
||||||
# reasoning_content = delta.get('reasoning_content', '')
|
|
||||||
|
|
||||||
if remove_think:
|
|
||||||
if delta['content'] is not None:
|
|
||||||
if '<think>' in delta['content'] and not thinking_started and not thinking_ended:
|
|
||||||
thinking_started = True
|
|
||||||
continue
|
|
||||||
elif delta['content'] == r'</think>' and not thinking_ended:
|
|
||||||
thinking_ended = True
|
|
||||||
continue
|
|
||||||
elif thinking_ended and delta['content'] == '\n\n' and thinking_started:
|
|
||||||
thinking_started = False
|
|
||||||
continue
|
|
||||||
elif thinking_started and not thinking_ended:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# delta_tool_calls = None
|
|
||||||
if delta.get('tool_calls'):
|
|
||||||
for tool_call in delta['tool_calls']:
|
|
||||||
if tool_call['id'] and tool_call['function']['name']:
|
|
||||||
tool_id = tool_call['id']
|
|
||||||
tool_name = tool_call['function']['name']
|
|
||||||
|
|
||||||
if tool_call['id'] is None:
|
|
||||||
tool_call['id'] = tool_id
|
|
||||||
if tool_call['function']['name'] is None:
|
|
||||||
tool_call['function']['name'] = tool_name
|
|
||||||
if tool_call['function']['arguments'] is None:
|
|
||||||
tool_call['function']['arguments'] = ''
|
|
||||||
if tool_call['type'] is None:
|
|
||||||
tool_call['type'] = 'function'
|
|
||||||
|
|
||||||
# 跳过空的第一个 chunk(只有 role 没有内容)
|
|
||||||
if chunk_idx == 0 and not delta_content and not delta.get('tool_calls'):
|
|
||||||
chunk_idx += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 构建 MessageChunk - 只包含增量内容
|
|
||||||
chunk_data = {
|
|
||||||
'role': role,
|
|
||||||
'content': delta_content if delta_content else None,
|
|
||||||
'tool_calls': delta.get('tool_calls'),
|
|
||||||
'is_final': bool(finish_reason),
|
|
||||||
}
|
|
||||||
|
|
||||||
# 移除 None 值
|
|
||||||
chunk_data = {k: v for k, v in chunk_data.items() if v is not None}
|
|
||||||
|
|
||||||
yield provider_message.MessageChunk(**chunk_data)
|
|
||||||
chunk_idx += 1
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 派欧云
|
zh_Hans: 派欧云
|
||||||
icon: ppio.svg
|
icon: ppio.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import openai
|
|
||||||
import typing
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class QHAIGCChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""启航 AI ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.qhaigc.com/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 启航 AI
|
zh_Hans: 启航 AI
|
||||||
icon: qhaigc.png
|
icon: qhaigc.png
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -2,19 +2,16 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
import openai
|
from . import litellmchat
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class QiniuChatCompletions(chatcmpl.OpenAIChatCompletions):
|
class QiniuChatCompletions(litellmchat.LiteLLMRequester):
|
||||||
"""七牛云 ChatCompletion API 请求器"""
|
"""七牛云 ChatCompletion API 请求器"""
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
default_config: dict[str, typing.Any] = {
|
||||||
'base_url': 'https://api.qnaigc.com/v1',
|
'base_url': 'https://api.qnaigc.com/v1',
|
||||||
'timeout': 120,
|
'timeout': 120,
|
||||||
|
'custom_llm_provider': 'openai',
|
||||||
}
|
}
|
||||||
|
|
||||||
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
async def scan_models(self, api_key: str | None = None) -> dict[str, typing.Any]:
|
||||||
|
|||||||
@@ -1,32 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import openai
|
|
||||||
import typing
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
import openai.types.chat.chat_completion as chat_completion
|
|
||||||
|
|
||||||
|
|
||||||
class ShengSuanYunChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""胜算云(ModelSpot.AI) ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://router.shengsuanyun.com/api/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _req(
|
|
||||||
self,
|
|
||||||
args: dict,
|
|
||||||
extra_body: dict = {},
|
|
||||||
) -> chat_completion.ChatCompletion:
|
|
||||||
return await self.client.chat.completions.create(
|
|
||||||
**args,
|
|
||||||
extra_body=extra_body,
|
|
||||||
extra_headers={
|
|
||||||
'HTTP-Referer': 'https://langbot.app',
|
|
||||||
'X-Title': 'LangBot',
|
|
||||||
},
|
|
||||||
)
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 胜算云
|
zh_Hans: 胜算云
|
||||||
icon: shengsuanyun.svg
|
icon: shengsuanyun.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class SiliconFlowChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""SiliconFlow ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.siliconflow.cn/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 硅基流动
|
zh_Hans: 硅基流动
|
||||||
icon: siliconflow.svg
|
icon: siliconflow.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class LangBotSpaceChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""LangBot Space ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.langbot.cloud/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: Space
|
zh_Hans: Space
|
||||||
icon: space.webp
|
icon: space.webp
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
5
src/langbot/pkg/provider/modelmgr/requesters/tencent.svg
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#0052D9"/>
|
||||||
|
<text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Tencent</text>
|
||||||
|
<text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Hunyuan</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 400 B |
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: tencent-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: Tencent Hunyuan
|
||||||
|
zh_Hans: 腾讯混元
|
||||||
|
icon: tencent.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://hunyuan.tencentcloudapi.com/v1
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#8B5CF6"/>
|
||||||
|
<text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">Together</text>
|
||||||
|
<text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">AI</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 396 B |
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: together-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: Together AI
|
||||||
|
zh_Hans: Together AI
|
||||||
|
icon: together.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: together_ai
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://api.together.xyz/v1
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 小马算力
|
zh_Hans: 小马算力
|
||||||
icon: tokenpony.svg
|
icon: tokenpony.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class TokenPonyChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""TokenPony ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.tokenpony.cn/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class VolcArkChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""火山方舟大模型平台 ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://ark.cn-beijing.volces.com/api/v3',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 火山方舟
|
zh_Hans: 火山方舟
|
||||||
icon: volcark.svg
|
icon: volcark.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -24,6 +25,8 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
provider_category: maas
|
provider_category: maas
|
||||||
execution:
|
execution:
|
||||||
python:
|
python:
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: Voyage AI
|
zh_Hans: Voyage AI
|
||||||
icon: voyageai.svg
|
icon: voyageai.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
|
|||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class XaiChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""xAI ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://api.x.ai/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: xAI
|
zh_Hans: xAI
|
||||||
icon: xai.svg
|
icon: xai.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -24,6 +25,8 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
provider_category: manufacturer
|
provider_category: manufacturer
|
||||||
execution:
|
execution:
|
||||||
python:
|
python:
|
||||||
|
|||||||
5
src/langbot/pkg/provider/modelmgr/requesters/yi.svg
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
<svg width="60" height="50" viewBox="0 0 60 50" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<rect width="60" height="50" rx="8" fill="#10B981"/>
|
||||||
|
<text x="30" y="28" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="white" text-anchor="middle">01.AI</text>
|
||||||
|
<text x="30" y="40" font-family="Arial, sans-serif" font-size="8" fill="white" text-anchor="middle">Yi</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 393 B |
30
src/langbot/pkg/provider/modelmgr/requesters/yichatcmpl.yaml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: LLMAPIRequester
|
||||||
|
metadata:
|
||||||
|
name: yi-chat-completions
|
||||||
|
label:
|
||||||
|
en_US: 01.AI Yi
|
||||||
|
zh_Hans: 零一万物
|
||||||
|
icon: yi.svg
|
||||||
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
|
config:
|
||||||
|
- name: base_url
|
||||||
|
label:
|
||||||
|
en_US: Base URL
|
||||||
|
zh_Hans: 基础 URL
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
default: https://api.lingyiwanwu.com/v1
|
||||||
|
- name: timeout
|
||||||
|
label:
|
||||||
|
en_US: Timeout
|
||||||
|
zh_Hans: 超时时间
|
||||||
|
type: integer
|
||||||
|
required: true
|
||||||
|
default: 120
|
||||||
|
support_type:
|
||||||
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
|
provider_category: manufacturer
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import typing
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from . import chatcmpl
|
|
||||||
|
|
||||||
|
|
||||||
class ZhipuAIChatCompletions(chatcmpl.OpenAIChatCompletions):
|
|
||||||
"""智谱AI ChatCompletion API 请求器"""
|
|
||||||
|
|
||||||
client: openai.AsyncClient
|
|
||||||
|
|
||||||
default_config: dict[str, typing.Any] = {
|
|
||||||
'base_url': 'https://open.bigmodel.cn/api/paas/v4',
|
|
||||||
'timeout': 120,
|
|
||||||
}
|
|
||||||
@@ -7,6 +7,7 @@ metadata:
|
|||||||
zh_Hans: 智谱 AI
|
zh_Hans: 智谱 AI
|
||||||
icon: zhipuai.svg
|
icon: zhipuai.svg
|
||||||
spec:
|
spec:
|
||||||
|
litellm_provider: openai
|
||||||
config:
|
config:
|
||||||
- name: base_url
|
- name: base_url
|
||||||
label:
|
label:
|
||||||
@@ -24,6 +25,8 @@ spec:
|
|||||||
default: 120
|
default: 120
|
||||||
support_type:
|
support_type:
|
||||||
- llm
|
- llm
|
||||||
|
- text-embedding
|
||||||
|
- rerank
|
||||||
provider_category: manufacturer
|
provider_category: manufacturer
|
||||||
execution:
|
execution:
|
||||||
python:
|
python:
|
||||||
|
|||||||
@@ -41,6 +41,64 @@ SANDBOX_EXEC_SYSTEM_GUIDANCE = (
|
|||||||
MAX_TOOL_CALL_ROUNDS = 128
|
MAX_TOOL_CALL_ROUNDS = 128
|
||||||
|
|
||||||
|
|
||||||
|
def _model_has_ability(model: modelmgr_requester.RuntimeLLMModel, ability: str) -> bool:
|
||||||
|
return ability in (model.model_entity.abilities or [])
|
||||||
|
|
||||||
|
|
||||||
|
class _StreamAccumulator:
|
||||||
|
"""Accumulate streamed content and fragmented OpenAI-style tool calls."""
|
||||||
|
|
||||||
|
def __init__(self, msg_sequence: int = 0, initial_content: str | None = None):
|
||||||
|
self.tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
||||||
|
self.msg_idx = 0
|
||||||
|
self.accumulated_content = initial_content or ''
|
||||||
|
self.last_role = 'assistant'
|
||||||
|
self.msg_sequence = msg_sequence
|
||||||
|
|
||||||
|
def add(self, msg: provider_message.MessageChunk) -> provider_message.MessageChunk | None:
|
||||||
|
self.msg_idx += 1
|
||||||
|
|
||||||
|
if msg.role:
|
||||||
|
self.last_role = msg.role
|
||||||
|
|
||||||
|
if msg.content:
|
||||||
|
self.accumulated_content += msg.content
|
||||||
|
|
||||||
|
if msg.tool_calls:
|
||||||
|
for tool_call in msg.tool_calls:
|
||||||
|
if tool_call.id not in self.tool_calls_map:
|
||||||
|
self.tool_calls_map[tool_call.id] = provider_message.ToolCall(
|
||||||
|
id=tool_call.id,
|
||||||
|
type=tool_call.type,
|
||||||
|
function=provider_message.FunctionCall(
|
||||||
|
name=tool_call.function.name if tool_call.function else '',
|
||||||
|
arguments='',
|
||||||
|
),
|
||||||
|
)
|
||||||
|
if tool_call.function and tool_call.function.arguments:
|
||||||
|
self.tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
|
||||||
|
|
||||||
|
if self.msg_idx % 8 == 0 or msg.is_final:
|
||||||
|
self.msg_sequence += 1
|
||||||
|
return provider_message.MessageChunk(
|
||||||
|
role=self.last_role,
|
||||||
|
content=self.accumulated_content,
|
||||||
|
tool_calls=list(self.tool_calls_map.values()) if (self.tool_calls_map and msg.is_final) else None,
|
||||||
|
is_final=msg.is_final,
|
||||||
|
msg_sequence=self.msg_sequence,
|
||||||
|
)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def final_message(self) -> provider_message.MessageChunk:
|
||||||
|
return provider_message.MessageChunk(
|
||||||
|
role=self.last_role,
|
||||||
|
content=self.accumulated_content,
|
||||||
|
tool_calls=list(self.tool_calls_map.values()) if self.tool_calls_map else None,
|
||||||
|
msg_sequence=self.msg_sequence,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@runner.runner_class('local-agent')
|
@runner.runner_class('local-agent')
|
||||||
class LocalAgentRunner(runner.RequestRunner):
|
class LocalAgentRunner(runner.RequestRunner):
|
||||||
"""Local agent request runner"""
|
"""Local agent request runner"""
|
||||||
@@ -105,7 +163,7 @@ class LocalAgentRunner(runner.RequestRunner):
|
|||||||
query,
|
query,
|
||||||
model,
|
model,
|
||||||
messages,
|
messages,
|
||||||
funcs if model.model_entity.abilities.__contains__('func_call') else [],
|
funcs if _model_has_ability(model, 'func_call') else [],
|
||||||
extra_args=model.model_entity.extra_args,
|
extra_args=model.model_entity.extra_args,
|
||||||
remove_think=remove_think,
|
remove_think=remove_think,
|
||||||
)
|
)
|
||||||
@@ -135,7 +193,7 @@ class LocalAgentRunner(runner.RequestRunner):
|
|||||||
query,
|
query,
|
||||||
model,
|
model,
|
||||||
messages,
|
messages,
|
||||||
funcs if model.model_entity.abilities.__contains__('func_call') else [],
|
funcs if _model_has_ability(model, 'func_call') else [],
|
||||||
extra_args=model.model_entity.extra_args,
|
extra_args=model.model_entity.extra_args,
|
||||||
remove_think=remove_think,
|
remove_think=remove_think,
|
||||||
)
|
)
|
||||||
@@ -302,11 +360,7 @@ class LocalAgentRunner(runner.RequestRunner):
|
|||||||
final_msg = msg
|
final_msg = msg
|
||||||
else:
|
else:
|
||||||
# Streaming: invoke with fallback
|
# Streaming: invoke with fallback
|
||||||
tool_calls_map: dict[str, provider_message.ToolCall] = {}
|
stream_accumulator = _StreamAccumulator(msg_sequence=1)
|
||||||
msg_idx = 0
|
|
||||||
accumulated_content = ''
|
|
||||||
last_role = 'assistant'
|
|
||||||
msg_sequence = 1
|
|
||||||
|
|
||||||
stream_src, use_llm_model = await self._invoke_stream_with_fallback(
|
stream_src, use_llm_model = await self._invoke_stream_with_fallback(
|
||||||
query,
|
query,
|
||||||
@@ -316,44 +370,12 @@ class LocalAgentRunner(runner.RequestRunner):
|
|||||||
remove_think,
|
remove_think,
|
||||||
)
|
)
|
||||||
async for msg in stream_src:
|
async for msg in stream_src:
|
||||||
msg_idx = msg_idx + 1
|
chunk = stream_accumulator.add(msg)
|
||||||
|
if chunk:
|
||||||
if msg.role:
|
yield chunk
|
||||||
last_role = msg.role
|
|
||||||
|
|
||||||
if msg.content:
|
|
||||||
accumulated_content += msg.content
|
|
||||||
|
|
||||||
if msg.tool_calls:
|
|
||||||
for tool_call in msg.tool_calls:
|
|
||||||
if tool_call.id not in tool_calls_map:
|
|
||||||
tool_calls_map[tool_call.id] = provider_message.ToolCall(
|
|
||||||
id=tool_call.id,
|
|
||||||
type=tool_call.type,
|
|
||||||
function=provider_message.FunctionCall(
|
|
||||||
name=tool_call.function.name if tool_call.function else '', arguments=''
|
|
||||||
),
|
|
||||||
)
|
|
||||||
if tool_call.function and tool_call.function.arguments:
|
|
||||||
tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
|
|
||||||
|
|
||||||
if msg_idx % 8 == 0 or msg.is_final:
|
|
||||||
msg_sequence += 1
|
|
||||||
yield provider_message.MessageChunk(
|
|
||||||
role=last_role,
|
|
||||||
content=accumulated_content,
|
|
||||||
tool_calls=list(tool_calls_map.values()) if (tool_calls_map and msg.is_final) else None,
|
|
||||||
is_final=msg.is_final,
|
|
||||||
msg_sequence=msg_sequence,
|
|
||||||
)
|
|
||||||
initial_response_emitted = True
|
initial_response_emitted = True
|
||||||
|
|
||||||
final_msg = provider_message.MessageChunk(
|
final_msg = stream_accumulator.final_message()
|
||||||
role=last_role,
|
|
||||||
content=accumulated_content,
|
|
||||||
tool_calls=list(tool_calls_map.values()) if tool_calls_map else None,
|
|
||||||
msg_sequence=msg_sequence,
|
|
||||||
)
|
|
||||||
|
|
||||||
pending_tool_calls = final_msg.tool_calls
|
pending_tool_calls = final_msg.tool_calls
|
||||||
first_content = final_msg.content
|
first_content = final_msg.content
|
||||||
@@ -438,69 +460,32 @@ class LocalAgentRunner(runner.RequestRunner):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if is_stream:
|
if is_stream:
|
||||||
tool_calls_map = {}
|
stream_accumulator = _StreamAccumulator(
|
||||||
msg_idx = 0
|
msg_sequence=first_end_sequence,
|
||||||
accumulated_content = ''
|
initial_content=first_content,
|
||||||
last_role = 'assistant'
|
)
|
||||||
msg_sequence = first_end_sequence
|
|
||||||
|
|
||||||
tool_stream_src = use_llm_model.provider.invoke_llm_stream(
|
tool_stream_src = use_llm_model.provider.invoke_llm_stream(
|
||||||
query,
|
query,
|
||||||
use_llm_model,
|
use_llm_model,
|
||||||
req_messages,
|
req_messages,
|
||||||
query.use_funcs if use_llm_model.model_entity.abilities.__contains__('func_call') else [],
|
query.use_funcs if _model_has_ability(use_llm_model, 'func_call') else [],
|
||||||
extra_args=use_llm_model.model_entity.extra_args,
|
extra_args=use_llm_model.model_entity.extra_args,
|
||||||
remove_think=remove_think,
|
remove_think=remove_think,
|
||||||
)
|
)
|
||||||
async for msg in tool_stream_src:
|
async for msg in tool_stream_src:
|
||||||
msg_idx += 1
|
chunk = stream_accumulator.add(msg)
|
||||||
|
if chunk:
|
||||||
|
yield chunk
|
||||||
|
|
||||||
if msg.role:
|
final_msg = stream_accumulator.final_message()
|
||||||
last_role = msg.role
|
|
||||||
|
|
||||||
# Prepend first-round content on first chunk of tool-call round
|
|
||||||
if msg_idx == 1:
|
|
||||||
accumulated_content = first_content if first_content is not None else accumulated_content
|
|
||||||
|
|
||||||
if msg.content:
|
|
||||||
accumulated_content += msg.content
|
|
||||||
|
|
||||||
if msg.tool_calls:
|
|
||||||
for tool_call in msg.tool_calls:
|
|
||||||
if tool_call.id not in tool_calls_map:
|
|
||||||
tool_calls_map[tool_call.id] = provider_message.ToolCall(
|
|
||||||
id=tool_call.id,
|
|
||||||
type=tool_call.type,
|
|
||||||
function=provider_message.FunctionCall(
|
|
||||||
name=tool_call.function.name if tool_call.function else '', arguments=''
|
|
||||||
),
|
|
||||||
)
|
|
||||||
if tool_call.function and tool_call.function.arguments:
|
|
||||||
tool_calls_map[tool_call.id].function.arguments += tool_call.function.arguments
|
|
||||||
|
|
||||||
if msg_idx % 8 == 0 or msg.is_final:
|
|
||||||
msg_sequence += 1
|
|
||||||
yield provider_message.MessageChunk(
|
|
||||||
role=last_role,
|
|
||||||
content=accumulated_content,
|
|
||||||
tool_calls=list(tool_calls_map.values()) if (tool_calls_map and msg.is_final) else None,
|
|
||||||
is_final=msg.is_final,
|
|
||||||
msg_sequence=msg_sequence,
|
|
||||||
)
|
|
||||||
|
|
||||||
final_msg = provider_message.MessageChunk(
|
|
||||||
role=last_role,
|
|
||||||
content=accumulated_content,
|
|
||||||
tool_calls=list(tool_calls_map.values()) if tool_calls_map else None,
|
|
||||||
msg_sequence=msg_sequence,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
# Non-streaming: use committed model directly (no fallback in tool loop)
|
# Non-streaming: use committed model directly (no fallback in tool loop)
|
||||||
msg = await use_llm_model.provider.invoke_llm(
|
msg = await use_llm_model.provider.invoke_llm(
|
||||||
query,
|
query,
|
||||||
use_llm_model,
|
use_llm_model,
|
||||||
req_messages,
|
req_messages,
|
||||||
query.use_funcs if use_llm_model.model_entity.abilities.__contains__('func_call') else [],
|
query.use_funcs if _model_has_ability(use_llm_model, 'func_call') else [],
|
||||||
extra_args=use_llm_model.model_entity.extra_args,
|
extra_args=use_llm_model.model_entity.extra_args,
|
||||||
remove_think=remove_think,
|
remove_think=remove_think,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -83,19 +83,6 @@ class ToolManager:
|
|||||||
|
|
||||||
return tools
|
return tools
|
||||||
|
|
||||||
async def generate_tools_for_anthropic(self, use_funcs: list[resource_tool.LLMTool]) -> list:
|
|
||||||
tools = []
|
|
||||||
|
|
||||||
for function in use_funcs:
|
|
||||||
function_schema = {
|
|
||||||
'name': function.name,
|
|
||||||
'description': function.description,
|
|
||||||
'input_schema': function.parameters,
|
|
||||||
}
|
|
||||||
tools.append(function_schema)
|
|
||||||
|
|
||||||
return tools
|
|
||||||
|
|
||||||
async def execute_func_call(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any:
|
async def execute_func_call(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any:
|
||||||
if await self.native_tool_loader.has_tool(name):
|
if await self.native_tool_loader.has_tool(name):
|
||||||
return await self.native_tool_loader.invoke_tool(name, parameters, query)
|
return await self.native_tool_loader.invoke_tool(name, parameters, query)
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
|
"""Provider requester tests"""
|
||||||
|
|||||||
@@ -1,32 +0,0 @@
|
|||||||
"""Tests for AnthropicMessages requester.
|
|
||||||
|
|
||||||
Tests config and pure utility methods.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
|
||||||
|
|
||||||
|
|
||||||
class TestAnthropicMessagesConfig:
|
|
||||||
"""Tests for default config."""
|
|
||||||
|
|
||||||
def test_default_config_values(self):
|
|
||||||
"""Check default_config."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.anthropicmsgs import AnthropicMessages
|
|
||||||
|
|
||||||
assert AnthropicMessages.default_config['base_url'] == 'https://api.anthropic.com'
|
|
||||||
assert AnthropicMessages.default_config['timeout'] == 120
|
|
||||||
|
|
||||||
def test_config_override(self):
|
|
||||||
"""Config can override defaults."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.anthropicmsgs import AnthropicMessages
|
|
||||||
|
|
||||||
mock_app = MagicMock()
|
|
||||||
req = AnthropicMessages(mock_app, {
|
|
||||||
'base_url': 'https://custom.anthropic.com',
|
|
||||||
'timeout': 60,
|
|
||||||
})
|
|
||||||
|
|
||||||
assert req.requester_cfg['base_url'] == 'https://custom.anthropic.com'
|
|
||||||
assert req.requester_cfg['timeout'] == 60
|
|
||||||
@@ -1,247 +0,0 @@
|
|||||||
"""Tests for requester error handling - direct import version.
|
|
||||||
|
|
||||||
Tests error handling branches by importing real packages and mocking
|
|
||||||
only the necessary dependencies.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
from unittest.mock import AsyncMock, MagicMock
|
|
||||||
import pytest
|
|
||||||
import openai # Import real openai package
|
|
||||||
|
|
||||||
from langbot.pkg.provider.modelmgr.errors import RequesterError
|
|
||||||
|
|
||||||
|
|
||||||
class TestInvokeLLMErrorHandling:
|
|
||||||
"""Tests for invoke_llm error handling branches."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_app(self):
|
|
||||||
"""Create mock Application."""
|
|
||||||
app = MagicMock()
|
|
||||||
app.tool_mgr = MagicMock()
|
|
||||||
app.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=[])
|
|
||||||
return app
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_model(self):
|
|
||||||
"""Create mock RuntimeLLMModel."""
|
|
||||||
model = MagicMock()
|
|
||||||
model.model_entity = MagicMock()
|
|
||||||
model.model_entity.name = 'gpt-4'
|
|
||||||
model.provider = MagicMock()
|
|
||||||
model.provider.token_mgr = MagicMock()
|
|
||||||
model.provider.token_mgr.get_token = MagicMock(return_value='test-key')
|
|
||||||
return model
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_message(self):
|
|
||||||
"""Create mock provider message."""
|
|
||||||
msg = MagicMock()
|
|
||||||
msg.dict = MagicMock(return_value={'role': 'user', 'content': 'test'})
|
|
||||||
return msg
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def requester_with_mocked_client(self, mock_app):
|
|
||||||
"""Create requester with mocked OpenAI client."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
req = OpenAIChatCompletions(mock_app, {
|
|
||||||
'base_url': 'https://api.openai.com/v1',
|
|
||||||
'timeout': 120,
|
|
||||||
})
|
|
||||||
|
|
||||||
# Replace client with mock
|
|
||||||
req.client = MagicMock()
|
|
||||||
req.client.chat = MagicMock()
|
|
||||||
req.client.chat.completions = MagicMock()
|
|
||||||
req.client.chat.completions.create = AsyncMock()
|
|
||||||
|
|
||||||
return req
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_timeout_error(self, requester_with_mocked_client, mock_model, mock_message):
|
|
||||||
"""TimeoutError is wrapped as RequesterError."""
|
|
||||||
requester_with_mocked_client.client.chat.completions.create = AsyncMock(
|
|
||||||
side_effect=asyncio.TimeoutError()
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(RequesterError) as exc:
|
|
||||||
await requester_with_mocked_client.invoke_llm(
|
|
||||||
query=None,
|
|
||||||
model=mock_model,
|
|
||||||
messages=[mock_message],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert '超时' in str(exc.value)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_bad_request_context_length(self, requester_with_mocked_client, mock_model, mock_message):
|
|
||||||
"""BadRequestError with context_length_exceeded has special message."""
|
|
||||||
error = openai.BadRequestError(
|
|
||||||
message='context_length_exceeded: max 4096',
|
|
||||||
response=MagicMock(status_code=400),
|
|
||||||
body={}
|
|
||||||
)
|
|
||||||
requester_with_mocked_client.client.chat.completions.create = AsyncMock(
|
|
||||||
side_effect=error
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(RequesterError) as exc:
|
|
||||||
await requester_with_mocked_client.invoke_llm(
|
|
||||||
query=None,
|
|
||||||
model=mock_model,
|
|
||||||
messages=[mock_message],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert '上文过长' in str(exc.value)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_authentication_error(self, requester_with_mocked_client, mock_model, mock_message):
|
|
||||||
"""AuthenticationError shows invalid api-key message."""
|
|
||||||
error = openai.AuthenticationError(
|
|
||||||
message='Invalid API key',
|
|
||||||
response=MagicMock(status_code=401),
|
|
||||||
body={}
|
|
||||||
)
|
|
||||||
requester_with_mocked_client.client.chat.completions.create = AsyncMock(
|
|
||||||
side_effect=error
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(RequesterError) as exc:
|
|
||||||
await requester_with_mocked_client.invoke_llm(
|
|
||||||
query=None,
|
|
||||||
model=mock_model,
|
|
||||||
messages=[mock_message],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert 'api-key' in str(exc.value).lower() or '无效' in str(exc.value)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_rate_limit_error(self, requester_with_mocked_client, mock_model, mock_message):
|
|
||||||
"""RateLimitError shows rate limit message."""
|
|
||||||
error = openai.RateLimitError(
|
|
||||||
message='Rate limit exceeded',
|
|
||||||
response=MagicMock(status_code=429),
|
|
||||||
body={}
|
|
||||||
)
|
|
||||||
requester_with_mocked_client.client.chat.completions.create = AsyncMock(
|
|
||||||
side_effect=error
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(RequesterError) as exc:
|
|
||||||
await requester_with_mocked_client.invoke_llm(
|
|
||||||
query=None,
|
|
||||||
model=mock_model,
|
|
||||||
messages=[mock_message],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert '频繁' in str(exc.value) or '余额' in str(exc.value)
|
|
||||||
|
|
||||||
|
|
||||||
class TestInvokeEmbeddingErrorHandling:
|
|
||||||
"""Tests for invoke_embedding error handling."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_app(self):
|
|
||||||
return MagicMock()
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_embedding_model(self):
|
|
||||||
model = MagicMock()
|
|
||||||
model.model_entity = MagicMock()
|
|
||||||
model.model_entity.name = 'text-embedding-ada-002'
|
|
||||||
model.model_entity.extra_args = {}
|
|
||||||
model.provider = MagicMock()
|
|
||||||
model.provider.token_mgr = MagicMock()
|
|
||||||
model.provider.token_mgr.get_token = MagicMock(return_value='test-key')
|
|
||||||
return model
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def requester_with_mocked_client(self, mock_app):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
req = OpenAIChatCompletions(mock_app, {})
|
|
||||||
req.client = MagicMock()
|
|
||||||
req.client.embeddings = MagicMock()
|
|
||||||
req.client.embeddings.create = AsyncMock()
|
|
||||||
|
|
||||||
return req
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_embedding_timeout_error(self, requester_with_mocked_client, mock_embedding_model):
|
|
||||||
"""TimeoutError in embedding request."""
|
|
||||||
requester_with_mocked_client.client.embeddings.create = AsyncMock(
|
|
||||||
side_effect=asyncio.TimeoutError()
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(RequesterError) as exc:
|
|
||||||
await requester_with_mocked_client.invoke_embedding(
|
|
||||||
model=mock_embedding_model,
|
|
||||||
input_text=['test'],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert '超时' in str(exc.value)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_embedding_bad_request_error(self, requester_with_mocked_client, mock_embedding_model):
|
|
||||||
"""BadRequestError in embedding request."""
|
|
||||||
error = openai.BadRequestError(
|
|
||||||
message='Invalid model',
|
|
||||||
response=MagicMock(status_code=400),
|
|
||||||
body={}
|
|
||||||
)
|
|
||||||
requester_with_mocked_client.client.embeddings.create = AsyncMock(
|
|
||||||
side_effect=error
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(RequesterError) as exc:
|
|
||||||
await requester_with_mocked_client.invoke_embedding(
|
|
||||||
model=mock_embedding_model,
|
|
||||||
input_text=['test'],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert '参数' in str(exc.value)
|
|
||||||
|
|
||||||
|
|
||||||
class TestRequesterErrorClass:
|
|
||||||
"""Tests for RequesterError."""
|
|
||||||
|
|
||||||
def test_error_message_prefix(self):
|
|
||||||
"""RequesterError has '模型请求失败' prefix."""
|
|
||||||
from langbot.pkg.provider.modelmgr.errors import RequesterError
|
|
||||||
|
|
||||||
error = RequesterError('test error')
|
|
||||||
assert '模型请求失败' in str(error)
|
|
||||||
|
|
||||||
def test_error_is_exception(self):
|
|
||||||
"""RequesterError inherits Exception."""
|
|
||||||
from langbot.pkg.provider.modelmgr.errors import RequesterError
|
|
||||||
|
|
||||||
error = RequesterError('test')
|
|
||||||
assert isinstance(error, Exception)
|
|
||||||
|
|
||||||
|
|
||||||
class TestDefaultConfig:
|
|
||||||
"""Tests for requester default config."""
|
|
||||||
|
|
||||||
def test_default_config(self):
|
|
||||||
"""Check default_config values."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
assert OpenAIChatCompletions.default_config['base_url'] == 'https://api.openai.com/v1'
|
|
||||||
assert OpenAIChatCompletions.default_config['timeout'] == 120
|
|
||||||
|
|
||||||
def test_config_override(self):
|
|
||||||
"""Config overrides defaults."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
req = OpenAIChatCompletions(MagicMock(), {
|
|
||||||
'base_url': 'https://custom.com/v1',
|
|
||||||
'timeout': 60,
|
|
||||||
})
|
|
||||||
|
|
||||||
assert req.requester_cfg['base_url'] == 'https://custom.com/v1'
|
|
||||||
assert req.requester_cfg['timeout'] == 60
|
|
||||||
@@ -1,340 +0,0 @@
|
|||||||
"""Tests for requester pure utility functions.
|
|
||||||
|
|
||||||
Tests the helper methods in OpenAIChatCompletions that don't require network calls.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
|
||||||
|
|
||||||
from tests.utils.import_isolation import isolated_sys_modules
|
|
||||||
|
|
||||||
|
|
||||||
class TestMaskApiKey:
|
|
||||||
"""Tests for _mask_api_key method."""
|
|
||||||
|
|
||||||
def _create_requester_with_mocks(self):
|
|
||||||
"""Create requester instance with mocked dependencies."""
|
|
||||||
mocks = {
|
|
||||||
'langbot.pkg.core.app': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.resource.tool': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.pipeline.query': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.provider.message': MagicMock(),
|
|
||||||
'langbot.pkg.provider.modelmgr.errors': MagicMock(),
|
|
||||||
}
|
|
||||||
|
|
||||||
with isolated_sys_modules(mocks):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
mock_app = MagicMock()
|
|
||||||
requester = OpenAIChatCompletions(mock_app, {})
|
|
||||||
return requester
|
|
||||||
|
|
||||||
def test_mask_api_key_full(self):
|
|
||||||
"""Mask a full API key."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._mask_api_key('sk-1234567890abcdef')
|
|
||||||
assert result == 'sk-1...cdef'
|
|
||||||
|
|
||||||
def test_mask_api_key_short(self):
|
|
||||||
"""Mask a short API key (<=8 chars)."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._mask_api_key('short')
|
|
||||||
assert result == '****'
|
|
||||||
|
|
||||||
def test_mask_api_key_empty(self):
|
|
||||||
"""Empty API key returns empty string."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._mask_api_key('')
|
|
||||||
assert result == ''
|
|
||||||
|
|
||||||
def test_mask_api_key_none(self):
|
|
||||||
"""None API key returns empty string."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._mask_api_key(None)
|
|
||||||
assert result == ''
|
|
||||||
|
|
||||||
def test_mask_api_key_exact_8_chars(self):
|
|
||||||
"""API key with exactly 8 chars is masked as **** (<=8 threshold)."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._mask_api_key('12345678')
|
|
||||||
assert result == '****' # <= 8 chars gets masked
|
|
||||||
|
|
||||||
|
|
||||||
class TestInferModelType:
|
|
||||||
"""Tests for _infer_model_type method."""
|
|
||||||
|
|
||||||
def _create_requester_with_mocks(self):
|
|
||||||
mocks = {
|
|
||||||
'langbot.pkg.core.app': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.resource.tool': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.pipeline.query': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.provider.message': MagicMock(),
|
|
||||||
'langbot.pkg.provider.modelmgr.errors': MagicMock(),
|
|
||||||
}
|
|
||||||
|
|
||||||
with isolated_sys_modules(mocks):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
mock_app = MagicMock()
|
|
||||||
requester = OpenAIChatCompletions(mock_app, {})
|
|
||||||
return requester
|
|
||||||
|
|
||||||
def test_infer_embedding_from_name(self):
|
|
||||||
"""Infer embedding type from model name."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
assert requester._infer_model_type('text-embedding-ada-002') == 'embedding'
|
|
||||||
assert requester._infer_model_type('bge-large-en') == 'embedding'
|
|
||||||
assert requester._infer_model_type('e5-base') == 'embedding'
|
|
||||||
assert requester._infer_model_type('m3e-base') == 'embedding'
|
|
||||||
|
|
||||||
def test_infer_llm_from_name(self):
|
|
||||||
"""Infer LLM type from model name."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
assert requester._infer_model_type('gpt-4') == 'llm'
|
|
||||||
assert requester._infer_model_type('claude-3-opus') == 'llm'
|
|
||||||
assert requester._infer_model_type('llama-2-70b') == 'llm'
|
|
||||||
|
|
||||||
def test_infer_model_type_none_id(self):
|
|
||||||
"""Handle None model_id."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._infer_model_type(None)
|
|
||||||
assert result == 'llm' # Default
|
|
||||||
|
|
||||||
def test_infer_model_type_empty_id(self):
|
|
||||||
"""Handle empty model_id."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._infer_model_type('')
|
|
||||||
assert result == 'llm' # Default
|
|
||||||
|
|
||||||
|
|
||||||
class TestNormalizeModalities:
|
|
||||||
"""Tests for _normalize_modalities method."""
|
|
||||||
|
|
||||||
def _create_requester_with_mocks(self):
|
|
||||||
mocks = {
|
|
||||||
'langbot.pkg.core.app': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.resource.tool': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.pipeline.query': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.provider.message': MagicMock(),
|
|
||||||
'langbot.pkg.provider.modelmgr.errors': MagicMock(),
|
|
||||||
}
|
|
||||||
|
|
||||||
with isolated_sys_modules(mocks):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
mock_app = MagicMock()
|
|
||||||
requester = OpenAIChatCompletions(mock_app, {})
|
|
||||||
return requester
|
|
||||||
|
|
||||||
def test_normalize_string_modality(self):
|
|
||||||
"""Normalize single string modality."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._normalize_modalities('text,image')
|
|
||||||
assert result == ['text', 'image']
|
|
||||||
|
|
||||||
def test_normalize_list_modalities(self):
|
|
||||||
"""Normalize list of modalities."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._normalize_modalities(['text', 'image', 'audio'])
|
|
||||||
assert result == ['text', 'image', 'audio']
|
|
||||||
|
|
||||||
def test_normalize_dict_modalities(self):
|
|
||||||
"""Normalize dict with nested modalities."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._normalize_modalities({'input': ['text'], 'output': ['text', 'image']})
|
|
||||||
assert result == ['text', 'image']
|
|
||||||
|
|
||||||
def test_normalize_none(self):
|
|
||||||
"""Handle None input."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._normalize_modalities(None)
|
|
||||||
assert result == []
|
|
||||||
|
|
||||||
def test_normalize_arrow_separator(self):
|
|
||||||
"""Handle arrow separator in modality string."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
result = requester._normalize_modalities('text->image')
|
|
||||||
assert result == ['text', 'image']
|
|
||||||
|
|
||||||
|
|
||||||
class TestParseRerankResponse:
|
|
||||||
"""Tests for _parse_rerank_response static method."""
|
|
||||||
|
|
||||||
def test_parse_cohere_jina_format(self):
|
|
||||||
"""Parse Cohere/Jina/SiliconFlow format."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'results': [
|
|
||||||
{'index': 0, 'relevance_score': 0.95},
|
|
||||||
{'index': 1, 'relevance_score': 0.80},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
result = OpenAIChatCompletions._parse_rerank_response(data)
|
|
||||||
assert result == [
|
|
||||||
{'index': 0, 'relevance_score': 0.95},
|
|
||||||
{'index': 1, 'relevance_score': 0.80},
|
|
||||||
]
|
|
||||||
|
|
||||||
def test_parse_voyage_format(self):
|
|
||||||
"""Parse Voyage AI format."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'data': [
|
|
||||||
{'index': 0, 'relevance_score': 0.90},
|
|
||||||
{'index': 2, 'relevance_score': 0.75},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
result = OpenAIChatCompletions._parse_rerank_response(data)
|
|
||||||
assert result == [
|
|
||||||
{'index': 0, 'relevance_score': 0.90},
|
|
||||||
{'index': 2, 'relevance_score': 0.75},
|
|
||||||
]
|
|
||||||
|
|
||||||
def test_parse_dashscope_format(self):
|
|
||||||
"""Parse DashScope format."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'output': {
|
|
||||||
'results': [
|
|
||||||
{'index': 0, 'relevance_score': 0.85},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result = OpenAIChatCompletions._parse_rerank_response(data)
|
|
||||||
assert result == [{'index': 0, 'relevance_score': 0.85}]
|
|
||||||
|
|
||||||
def test_parse_unknown_format(self):
|
|
||||||
"""Handle unknown format returns empty list."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
data = {'unknown_key': 'value'}
|
|
||||||
|
|
||||||
result = OpenAIChatCompletions._parse_rerank_response(data)
|
|
||||||
assert result == []
|
|
||||||
|
|
||||||
def test_parse_empty_results(self):
|
|
||||||
"""Handle empty results."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
data = {'results': []}
|
|
||||||
|
|
||||||
result = OpenAIChatCompletions._parse_rerank_response(data)
|
|
||||||
assert result == []
|
|
||||||
|
|
||||||
|
|
||||||
class TestExtractScanMetadata:
|
|
||||||
"""Tests for _extract_scan_metadata method."""
|
|
||||||
|
|
||||||
def _create_requester_with_mocks(self):
|
|
||||||
mocks = {
|
|
||||||
'langbot.pkg.core.app': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.resource.tool': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.pipeline.query': MagicMock(),
|
|
||||||
'langbot_plugin.api.entities.builtin.provider.message': MagicMock(),
|
|
||||||
'langbot.pkg.provider.modelmgr.errors': MagicMock(),
|
|
||||||
}
|
|
||||||
|
|
||||||
with isolated_sys_modules(mocks):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
|
|
||||||
mock_app = MagicMock()
|
|
||||||
requester = OpenAIChatCompletions(mock_app, {})
|
|
||||||
return requester
|
|
||||||
|
|
||||||
def test_extract_basic_metadata(self):
|
|
||||||
"""Extract basic model metadata."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
item = {
|
|
||||||
'id': 'gpt-4',
|
|
||||||
'name': 'GPT-4 Turbo',
|
|
||||||
'description': 'Most capable GPT-4 model',
|
|
||||||
'context_length': 128000,
|
|
||||||
'owned_by': 'openai',
|
|
||||||
}
|
|
||||||
|
|
||||||
result = requester._extract_scan_metadata(item, 'gpt-4')
|
|
||||||
|
|
||||||
assert result['display_name'] == 'GPT-4 Turbo'
|
|
||||||
assert result['description'] == 'Most capable GPT-4 model'
|
|
||||||
assert result['context_length'] == 128000
|
|
||||||
assert result['owned_by'] == 'openai'
|
|
||||||
|
|
||||||
def test_extract_metadata_missing_fields(self):
|
|
||||||
"""Handle missing metadata fields."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
item = {'id': 'unknown-model'}
|
|
||||||
|
|
||||||
result = requester._extract_scan_metadata(item, 'unknown-model')
|
|
||||||
|
|
||||||
assert result['display_name'] is None
|
|
||||||
assert result['description'] is None
|
|
||||||
assert result['context_length'] is None
|
|
||||||
assert result['owned_by'] is None
|
|
||||||
|
|
||||||
def test_extract_metadata_top_provider_context(self):
|
|
||||||
"""Extract context_length from top_provider."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
item = {
|
|
||||||
'id': 'model',
|
|
||||||
'top_provider': {
|
|
||||||
'context_length': 4096,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
result = requester._extract_scan_metadata(item, 'model')
|
|
||||||
|
|
||||||
assert result['context_length'] == 4096
|
|
||||||
|
|
||||||
def test_extract_metadata_empty_strings(self):
|
|
||||||
"""Handle empty string values."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
item = {
|
|
||||||
'id': 'model',
|
|
||||||
'name': '', # Empty name
|
|
||||||
'description': ' ', # Whitespace only
|
|
||||||
'owned_by': '',
|
|
||||||
}
|
|
||||||
|
|
||||||
result = requester._extract_scan_metadata(item, 'model')
|
|
||||||
|
|
||||||
assert result['display_name'] is None
|
|
||||||
assert result['description'] is None
|
|
||||||
assert result['owned_by'] is None
|
|
||||||
|
|
||||||
def test_extract_metadata_name_matches_id(self):
|
|
||||||
"""When name equals id, display_name is None."""
|
|
||||||
requester = self._create_requester_with_mocks()
|
|
||||||
|
|
||||||
item = {
|
|
||||||
'id': 'gpt-4',
|
|
||||||
'name': 'gpt-4', # Same as id
|
|
||||||
}
|
|
||||||
|
|
||||||
result = requester._extract_scan_metadata(item, 'gpt-4')
|
|
||||||
|
|
||||||
assert result['display_name'] is None
|
|
||||||
@@ -1,264 +0,0 @@
|
|||||||
"""Tests for OllamaChatCompletions requester.
|
|
||||||
|
|
||||||
Tests model inference, payload construction, and error handling.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
from unittest.mock import AsyncMock, MagicMock
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from langbot.pkg.provider.modelmgr.errors import RequesterError
|
|
||||||
|
|
||||||
|
|
||||||
class TestOllamaRequesterConfig:
|
|
||||||
"""Tests for default config."""
|
|
||||||
|
|
||||||
def test_default_config_values(self):
|
|
||||||
"""Check default_config."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
|
|
||||||
|
|
||||||
assert OllamaChatCompletions.default_config['base_url'] == 'http://127.0.0.1:11434'
|
|
||||||
assert OllamaChatCompletions.default_config['timeout'] == 120
|
|
||||||
|
|
||||||
def test_config_override(self):
|
|
||||||
"""Config can override defaults."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
|
|
||||||
|
|
||||||
mock_app = MagicMock()
|
|
||||||
req = OllamaChatCompletions(mock_app, {
|
|
||||||
'base_url': 'http://custom.ollama:11434',
|
|
||||||
'timeout': 300,
|
|
||||||
})
|
|
||||||
|
|
||||||
assert req.requester_cfg['base_url'] == 'http://custom.ollama:11434'
|
|
||||||
assert req.requester_cfg['timeout'] == 300
|
|
||||||
|
|
||||||
|
|
||||||
class TestOllamaInferModelType:
|
|
||||||
"""Tests for _infer_model_type pure function."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def requester(self):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
|
|
||||||
|
|
||||||
return OllamaChatCompletions(MagicMock(), {})
|
|
||||||
|
|
||||||
def test_infer_embedding_from_name(self, requester):
|
|
||||||
"""Embedding keywords return 'embedding'."""
|
|
||||||
assert requester._infer_model_type('nomic-embed-text') == 'embedding'
|
|
||||||
assert requester._infer_model_type('bge-large') == 'embedding'
|
|
||||||
assert requester._infer_model_type('text-embedding') == 'embedding'
|
|
||||||
|
|
||||||
def test_infer_llm_from_name(self, requester):
|
|
||||||
"""Non-embedding keywords return 'llm'."""
|
|
||||||
assert requester._infer_model_type('llama2') == 'llm'
|
|
||||||
assert requester._infer_model_type('mistral') == 'llm'
|
|
||||||
assert requester._infer_model_type('codellama') == 'llm'
|
|
||||||
|
|
||||||
def test_infer_model_type_none(self, requester):
|
|
||||||
"""None model_id returns 'llm'."""
|
|
||||||
assert requester._infer_model_type(None) == 'llm'
|
|
||||||
|
|
||||||
def test_infer_model_type_empty(self, requester):
|
|
||||||
"""Empty model_id returns 'llm'."""
|
|
||||||
assert requester._infer_model_type('') == 'llm'
|
|
||||||
|
|
||||||
|
|
||||||
class TestOllamaInferModelAbilities:
|
|
||||||
"""Tests for _infer_model_abilities pure function."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def requester(self):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
|
|
||||||
|
|
||||||
return OllamaChatCompletions(MagicMock(), {})
|
|
||||||
|
|
||||||
def test_infer_vision_ability(self, requester):
|
|
||||||
"""Vision keywords add 'vision' ability."""
|
|
||||||
item = {
|
|
||||||
'details': {
|
|
||||||
'family': 'llava',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
abilities = requester._infer_model_abilities(item, 'llava-v1.5')
|
|
||||||
assert 'vision' in abilities
|
|
||||||
|
|
||||||
def test_infer_vision_from_model_id(self, requester):
|
|
||||||
"""Vision keywords in model_id add 'vision' ability."""
|
|
||||||
item = {}
|
|
||||||
abilities = requester._infer_model_abilities(item, 'llava-7b')
|
|
||||||
assert 'vision' in abilities
|
|
||||||
|
|
||||||
def test_infer_func_call_ability(self, requester):
|
|
||||||
"""Tool/function keywords add 'func_call' ability."""
|
|
||||||
item = {
|
|
||||||
'details': {
|
|
||||||
'families': ['tools'],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
abilities = requester._infer_model_abilities(item, 'model')
|
|
||||||
assert 'func_call' in abilities
|
|
||||||
|
|
||||||
def test_infer_no_abilities(self, requester):
|
|
||||||
"""No matching keywords returns empty abilities."""
|
|
||||||
item = {
|
|
||||||
'details': {
|
|
||||||
'family': 'llama',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
abilities = requester._infer_model_abilities(item, 'llama-2')
|
|
||||||
assert len(abilities) == 0
|
|
||||||
|
|
||||||
def test_infer_multiple_abilities(self, requester):
|
|
||||||
"""Multiple keywords can add multiple abilities."""
|
|
||||||
item = {
|
|
||||||
'details': {
|
|
||||||
'family': 'vision',
|
|
||||||
'families': ['tools'],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
abilities = requester._infer_model_abilities(item, 'vision-tool-model')
|
|
||||||
assert 'vision' in abilities
|
|
||||||
assert 'func_call' in abilities
|
|
||||||
|
|
||||||
|
|
||||||
class TestOllamaMakeMessage:
|
|
||||||
"""Tests for _make_msg response parsing."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def requester(self):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
|
|
||||||
|
|
||||||
return OllamaChatCompletions(MagicMock(), {})
|
|
||||||
|
|
||||||
def _create_ollama_response(self, content, tool_calls=None):
|
|
||||||
"""Helper to create mock ollama response."""
|
|
||||||
import ollama
|
|
||||||
|
|
||||||
mock_response = MagicMock(spec=ollama.ChatResponse)
|
|
||||||
mock_message = MagicMock(spec=ollama.Message)
|
|
||||||
mock_message.content = content
|
|
||||||
mock_message.tool_calls = tool_calls
|
|
||||||
mock_response.message = mock_message
|
|
||||||
|
|
||||||
return mock_response
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_make_msg_text_content(self, requester):
|
|
||||||
"""Text content is extracted."""
|
|
||||||
mock_response = self._create_ollama_response('Hello world')
|
|
||||||
|
|
||||||
result = await requester._make_msg(mock_response)
|
|
||||||
|
|
||||||
assert result.content == 'Hello world'
|
|
||||||
assert result.role == 'assistant'
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_make_msg_with_tool_calls(self, requester):
|
|
||||||
"""Tool calls are parsed."""
|
|
||||||
mock_tool_call = MagicMock()
|
|
||||||
mock_tool_call.function = MagicMock()
|
|
||||||
mock_tool_call.function.name = 'get_weather'
|
|
||||||
mock_tool_call.function.arguments = {'location': 'Beijing'}
|
|
||||||
|
|
||||||
mock_response = self._create_ollama_response('', tool_calls=[mock_tool_call])
|
|
||||||
|
|
||||||
result = await requester._make_msg(mock_response)
|
|
||||||
|
|
||||||
assert result.tool_calls is not None
|
|
||||||
assert len(result.tool_calls) == 1
|
|
||||||
assert result.tool_calls[0].function.name == 'get_weather'
|
|
||||||
# Arguments should be JSON string
|
|
||||||
assert isinstance(result.tool_calls[0].function.arguments, str)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_make_msg_empty_message_raises(self, requester):
|
|
||||||
"""Empty message raises ValueError."""
|
|
||||||
mock_response = MagicMock()
|
|
||||||
mock_response.message = None
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match='message'):
|
|
||||||
await requester._make_msg(mock_response)
|
|
||||||
|
|
||||||
|
|
||||||
class TestOllamaErrorHandling:
|
|
||||||
"""Tests for error handling branches."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_app(self):
|
|
||||||
app = MagicMock()
|
|
||||||
app.tool_mgr = MagicMock()
|
|
||||||
app.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=[])
|
|
||||||
return app
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def requester_with_mocked_client(self, mock_app):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
|
|
||||||
|
|
||||||
req = OllamaChatCompletions(mock_app, {})
|
|
||||||
req.client = MagicMock()
|
|
||||||
req.client.chat = AsyncMock()
|
|
||||||
|
|
||||||
return req
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_model(self):
|
|
||||||
model = MagicMock()
|
|
||||||
model.model_entity = MagicMock()
|
|
||||||
model.model_entity.name = 'llama2'
|
|
||||||
model.provider = MagicMock()
|
|
||||||
model.provider.token_mgr = MagicMock()
|
|
||||||
model.provider.token_mgr.get_token = MagicMock(return_value='')
|
|
||||||
return model
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_message(self):
|
|
||||||
msg = MagicMock()
|
|
||||||
msg.role = 'user'
|
|
||||||
msg.content = 'test'
|
|
||||||
msg.dict = MagicMock(return_value={'role': 'user', 'content': 'test'})
|
|
||||||
return msg
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_timeout_error(self, requester_with_mocked_client, mock_model, mock_message):
|
|
||||||
"""TimeoutError is converted to RequesterError."""
|
|
||||||
requester_with_mocked_client.client.chat = AsyncMock(side_effect=asyncio.TimeoutError())
|
|
||||||
|
|
||||||
with pytest.raises(RequesterError) as exc:
|
|
||||||
await requester_with_mocked_client.invoke_llm(
|
|
||||||
query=None,
|
|
||||||
model=mock_model,
|
|
||||||
messages=[mock_message],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert '超时' in str(exc.value)
|
|
||||||
|
|
||||||
|
|
||||||
class TestOllamaScanModels:
|
|
||||||
"""Tests for scan_models method."""
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def mock_app(self):
|
|
||||||
return MagicMock()
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def requester(self, mock_app):
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.ollamachat import OllamaChatCompletions
|
|
||||||
|
|
||||||
req = OllamaChatCompletions(mock_app, {
|
|
||||||
'base_url': 'http://127.0.0.1:11434',
|
|
||||||
'timeout': 120,
|
|
||||||
})
|
|
||||||
return req
|
|
||||||
|
|
||||||
def test_requester_name_constant(self):
|
|
||||||
"""REQUESTER_NAME constant exists."""
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.ollamachat import REQUESTER_NAME
|
|
||||||
|
|
||||||
assert REQUESTER_NAME == 'ollama-chat'
|
|
||||||
916
tests/unit_tests/provider/test_litellmchat.py
Normal file
@@ -0,0 +1,916 @@
|
|||||||
|
"""
|
||||||
|
Tests for LiteLLMRequester - unified requester for chat, embedding, and rerank.
|
||||||
|
|
||||||
|
These tests verify:
|
||||||
|
- Parameter building and LiteLLM API calls
|
||||||
|
- Response processing and usage extraction
|
||||||
|
- Error handling and exception translation
|
||||||
|
- Model name building with provider prefix
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, AsyncMock, patch
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
from langbot.pkg.provider.modelmgr.requesters import litellmchat
|
||||||
|
from langbot.pkg.provider.modelmgr import errors
|
||||||
|
|
||||||
|
|
||||||
|
class MockRuntimeModel:
|
||||||
|
"""Mock RuntimeLLMModel for testing"""
|
||||||
|
|
||||||
|
def __init__(self, model_name: str = 'gpt-4o', api_key: str = 'test-key'):
|
||||||
|
self.model_entity = Mock()
|
||||||
|
self.model_entity.name = model_name
|
||||||
|
self.model_entity.extra_args = {}
|
||||||
|
self.provider = Mock()
|
||||||
|
self.provider.token_mgr = Mock()
|
||||||
|
self.provider.token_mgr.get_token = Mock(return_value=api_key)
|
||||||
|
|
||||||
|
|
||||||
|
class MockRuntimeEmbeddingModel:
|
||||||
|
"""Mock RuntimeEmbeddingModel for testing"""
|
||||||
|
|
||||||
|
def __init__(self, model_name: str = 'text-embedding-3-small', api_key: str = 'test-key'):
|
||||||
|
self.model_entity = Mock()
|
||||||
|
self.model_entity.name = model_name
|
||||||
|
self.model_entity.extra_args = {}
|
||||||
|
self.provider = Mock()
|
||||||
|
self.provider.token_mgr = Mock()
|
||||||
|
self.provider.token_mgr.get_token = Mock(return_value=api_key)
|
||||||
|
|
||||||
|
|
||||||
|
class MockRuntimeRerankModel:
|
||||||
|
"""Mock RuntimeRerankModel for testing"""
|
||||||
|
|
||||||
|
def __init__(self, model_name: str = 'cohere/rerank-english-v3.0', api_key: str = 'test-key'):
|
||||||
|
self.model_entity = Mock()
|
||||||
|
self.model_entity.name = model_name
|
||||||
|
self.model_entity.extra_args = {}
|
||||||
|
self.provider = Mock()
|
||||||
|
self.provider.token_mgr = Mock()
|
||||||
|
self.provider.token_mgr.get_token = Mock(return_value=api_key)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildLiteLLMModelName:
|
||||||
|
"""Test _build_litellm_model_name method"""
|
||||||
|
|
||||||
|
def test_no_provider_prefix(self):
|
||||||
|
"""Test model name without provider prefix"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={'custom_llm_provider': ''})
|
||||||
|
result = requester._build_litellm_model_name('gpt-4o')
|
||||||
|
assert result == 'gpt-4o'
|
||||||
|
|
||||||
|
def test_with_provider_prefix(self):
|
||||||
|
"""Test model name with provider prefix"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={'custom_llm_provider': 'openai'})
|
||||||
|
result = requester._build_litellm_model_name('gpt-4o')
|
||||||
|
assert result == 'openai/gpt-4o'
|
||||||
|
|
||||||
|
def test_avoid_duplicate_provider_prefix(self):
|
||||||
|
"""Test model name with an existing matching provider prefix."""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={'custom_llm_provider': 'openai'})
|
||||||
|
result = requester._build_litellm_model_name('openai/gpt-4o')
|
||||||
|
assert result == 'openai/gpt-4o'
|
||||||
|
|
||||||
|
def test_override_provider(self):
|
||||||
|
"""Test override provider via parameter"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={'custom_llm_provider': 'openai'})
|
||||||
|
result = requester._build_litellm_model_name('claude-3', custom_llm_provider='anthropic')
|
||||||
|
assert result == 'anthropic/claude-3'
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractUsage:
|
||||||
|
"""Test _extract_usage method"""
|
||||||
|
|
||||||
|
def test_extract_usage_with_data(self):
|
||||||
|
"""Test extraction with valid usage data"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
response = Mock()
|
||||||
|
response.usage = Mock()
|
||||||
|
response.usage.prompt_tokens = 100
|
||||||
|
response.usage.completion_tokens = 50
|
||||||
|
response.usage.total_tokens = 150
|
||||||
|
|
||||||
|
result = requester._extract_usage(response)
|
||||||
|
|
||||||
|
assert result['prompt_tokens'] == 100
|
||||||
|
assert result['completion_tokens'] == 50
|
||||||
|
assert result['total_tokens'] == 150
|
||||||
|
|
||||||
|
def test_extract_usage_with_zero_values(self):
|
||||||
|
"""Test extraction when values are 0"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
response = Mock()
|
||||||
|
response.usage = Mock()
|
||||||
|
response.usage.prompt_tokens = 0
|
||||||
|
response.usage.completion_tokens = 0
|
||||||
|
response.usage.total_tokens = 0
|
||||||
|
|
||||||
|
result = requester._extract_usage(response)
|
||||||
|
|
||||||
|
assert result['prompt_tokens'] == 0
|
||||||
|
assert result['completion_tokens'] == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestNormalizeUsage:
|
||||||
|
"""Test _normalize_usage helper covering real-world usage shapes"""
|
||||||
|
|
||||||
|
def test_none_usage(self):
|
||||||
|
"""None usage -> all zeros (no crash)"""
|
||||||
|
result = litellmchat.LiteLLMRequester._normalize_usage(None)
|
||||||
|
assert result == {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}
|
||||||
|
|
||||||
|
def test_dict_usage(self):
|
||||||
|
"""Usage given as a plain dict"""
|
||||||
|
result = litellmchat.LiteLLMRequester._normalize_usage(
|
||||||
|
{'prompt_tokens': 12, 'completion_tokens': 8, 'total_tokens': 20}
|
||||||
|
)
|
||||||
|
assert result == {'prompt_tokens': 12, 'completion_tokens': 8, 'total_tokens': 20}
|
||||||
|
|
||||||
|
def test_missing_total_is_derived(self):
|
||||||
|
"""When total_tokens is absent/zero it is derived from prompt + completion"""
|
||||||
|
usage = Mock()
|
||||||
|
usage.prompt_tokens = 42
|
||||||
|
usage.completion_tokens = 10
|
||||||
|
usage.total_tokens = 0
|
||||||
|
result = litellmchat.LiteLLMRequester._normalize_usage(usage)
|
||||||
|
assert result['total_tokens'] == 52
|
||||||
|
|
||||||
|
def test_partial_attrs_default_to_zero(self):
|
||||||
|
"""Missing attributes default to 0 instead of raising"""
|
||||||
|
usage = Mock(spec=['prompt_tokens'])
|
||||||
|
usage.prompt_tokens = 5
|
||||||
|
result = litellmchat.LiteLLMRequester._normalize_usage(usage)
|
||||||
|
assert result == {'prompt_tokens': 5, 'completion_tokens': 0, 'total_tokens': 5}
|
||||||
|
|
||||||
|
|
||||||
|
class TestInvokeLLMStreamUsage:
|
||||||
|
"""Regression tests for streaming token usage capture.
|
||||||
|
|
||||||
|
Real OpenAI-compatible gateways (e.g. new-api) send the final usage payload
|
||||||
|
in a chunk that still carries a (empty-delta) choice rather than an empty
|
||||||
|
`choices` list. The usage must be captured regardless, otherwise streamed
|
||||||
|
calls record 0 tokens.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _make_chunk(self, *, content=None, tool_calls=None, finish_reason=None, usage=None, has_choice=True):
|
||||||
|
chunk = Mock()
|
||||||
|
if usage is not None:
|
||||||
|
chunk.usage = usage
|
||||||
|
else:
|
||||||
|
chunk.usage = None
|
||||||
|
if has_choice:
|
||||||
|
choice = Mock()
|
||||||
|
delta = Mock()
|
||||||
|
delta.model_dump = Mock(
|
||||||
|
return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls}
|
||||||
|
)
|
||||||
|
choice.delta = delta
|
||||||
|
choice.finish_reason = finish_reason
|
||||||
|
chunk.choices = [choice]
|
||||||
|
else:
|
||||||
|
chunk.choices = []
|
||||||
|
return chunk
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stream_usage_with_nonempty_choices(self):
|
||||||
|
"""Usage chunk that still has a choice must populate _stream_usage."""
|
||||||
|
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
mock_ap = Mock()
|
||||||
|
mock_ap.tool_mgr = Mock()
|
||||||
|
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=None)
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={})
|
||||||
|
model = MockRuntimeModel('gpt-4o', 'test-api-key')
|
||||||
|
|
||||||
|
usage = Mock()
|
||||||
|
usage.prompt_tokens = 24
|
||||||
|
usage.completion_tokens = 48
|
||||||
|
usage.total_tokens = 72
|
||||||
|
|
||||||
|
chunks = [
|
||||||
|
self._make_chunk(content='Hello'),
|
||||||
|
self._make_chunk(content=None, finish_reason='stop'),
|
||||||
|
# Final usage chunk WITH a non-empty (empty-delta) choice — the bug case.
|
||||||
|
self._make_chunk(content=None, usage=usage, has_choice=True),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def _aiter(*args, **kwargs):
|
||||||
|
for c in chunks:
|
||||||
|
yield c
|
||||||
|
|
||||||
|
query = Mock(spec=pipeline_query.Query)
|
||||||
|
query.variables = {}
|
||||||
|
|
||||||
|
messages = [provider_message.Message(role='user', content='Hi')]
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
|
||||||
|
collected = []
|
||||||
|
async for ch in requester.invoke_llm_stream(query=query, model=model, messages=messages):
|
||||||
|
collected.append(ch)
|
||||||
|
|
||||||
|
assert '_stream_usage' in query.variables
|
||||||
|
assert query.variables['_stream_usage']['prompt_tokens'] == 24
|
||||||
|
assert query.variables['_stream_usage']['completion_tokens'] == 48
|
||||||
|
assert query.variables['_stream_usage']['total_tokens'] == 72
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stream_usage_with_empty_choices(self):
|
||||||
|
"""Usage chunk with empty choices list must also populate _stream_usage."""
|
||||||
|
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
mock_ap = Mock()
|
||||||
|
mock_ap.tool_mgr = Mock()
|
||||||
|
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=None)
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={})
|
||||||
|
model = MockRuntimeModel('gpt-4o', 'test-api-key')
|
||||||
|
|
||||||
|
usage = Mock()
|
||||||
|
usage.prompt_tokens = 5
|
||||||
|
usage.completion_tokens = 7
|
||||||
|
usage.total_tokens = 12
|
||||||
|
|
||||||
|
chunks = [
|
||||||
|
self._make_chunk(content='Hi there'),
|
||||||
|
self._make_chunk(content=None, finish_reason='stop'),
|
||||||
|
self._make_chunk(usage=usage, has_choice=False),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def _aiter(*args, **kwargs):
|
||||||
|
for c in chunks:
|
||||||
|
yield c
|
||||||
|
|
||||||
|
query = Mock(spec=pipeline_query.Query)
|
||||||
|
query.variables = {}
|
||||||
|
messages = [provider_message.Message(role='user', content='Hi')]
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
|
||||||
|
async for _ in requester.invoke_llm_stream(query=query, model=model, messages=messages):
|
||||||
|
pass
|
||||||
|
|
||||||
|
assert query.variables['_stream_usage']['total_tokens'] == 12
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stream_tool_call_delta_missing_id_and_name(self):
|
||||||
|
"""LiteLLM may stream tool-call argument deltas with id/name set to None."""
|
||||||
|
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
mock_ap = Mock()
|
||||||
|
mock_ap.tool_mgr = Mock()
|
||||||
|
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(
|
||||||
|
return_value=[{'type': 'function', 'function': {'name': 'qa_plugin_echo'}}]
|
||||||
|
)
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={})
|
||||||
|
model = MockRuntimeModel('gpt-4o', 'test-api-key')
|
||||||
|
|
||||||
|
chunks = [
|
||||||
|
self._make_chunk(
|
||||||
|
tool_calls=[
|
||||||
|
{
|
||||||
|
'index': 0,
|
||||||
|
'id': 'call_123',
|
||||||
|
'type': 'function',
|
||||||
|
'function': {'name': 'qa_plugin_echo', 'arguments': ''},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
),
|
||||||
|
self._make_chunk(
|
||||||
|
tool_calls=[
|
||||||
|
{
|
||||||
|
'index': 0,
|
||||||
|
'id': None,
|
||||||
|
'type': None,
|
||||||
|
'function': {'name': None, 'arguments': '{"text":'},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
),
|
||||||
|
self._make_chunk(
|
||||||
|
tool_calls=[
|
||||||
|
{
|
||||||
|
'index': 0,
|
||||||
|
'function': {'arguments': '"plugin-tool-ok"}'},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
),
|
||||||
|
self._make_chunk(finish_reason='tool_calls'),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def _aiter(*args, **kwargs):
|
||||||
|
for c in chunks:
|
||||||
|
yield c
|
||||||
|
|
||||||
|
query = Mock(spec=pipeline_query.Query)
|
||||||
|
query.variables = {}
|
||||||
|
messages = [provider_message.Message(role='user', content='Call the tool')]
|
||||||
|
funcs = [Mock()]
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
|
||||||
|
collected = [
|
||||||
|
chunk async for chunk in requester.invoke_llm_stream(
|
||||||
|
query=query,
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
funcs=funcs,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
tool_chunks = [chunk for chunk in collected if chunk.tool_calls]
|
||||||
|
assert len(tool_chunks) == 3
|
||||||
|
assert tool_chunks[1].tool_calls[0].id == 'call_123'
|
||||||
|
assert tool_chunks[1].tool_calls[0].function.name == 'qa_plugin_echo'
|
||||||
|
assert tool_chunks[1].tool_calls[0].function.arguments == '{"text":'
|
||||||
|
assert tool_chunks[2].tool_calls[0].function.arguments == '"plugin-tool-ok"}'
|
||||||
|
|
||||||
|
|
||||||
|
class TestProcessThinkingContent:
|
||||||
|
"""Test _process_thinking_content method"""
|
||||||
|
|
||||||
|
def test_no_thinking_markers(self):
|
||||||
|
"""Test content without thinking markers"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
result = requester._process_thinking_content('Hello world', None, remove_think=True)
|
||||||
|
assert result == 'Hello world'
|
||||||
|
|
||||||
|
def test_remove_thinking_markers(self):
|
||||||
|
"""Test removing thinking markers when remove_think=True"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
content = 'CRETIRE_REASONING_BEGINkLet me think...CRETIRE_REASONING_ENDk The answer is 42.'
|
||||||
|
result = requester._process_thinking_content(content, None, remove_think=True)
|
||||||
|
assert result == 'The answer is 42.'
|
||||||
|
|
||||||
|
def test_preserve_thinking_markers(self):
|
||||||
|
"""Test preserving thinking markers when remove_think=False"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
content = 'CRETIRE_REASONING_BEGINkLet me think...CRETIRE_REASONING_ENDk The answer is 42.'
|
||||||
|
result = requester._process_thinking_content(content, None, remove_think=False)
|
||||||
|
assert 'CRETIRE_REASONING_BEGINk' in result
|
||||||
|
assert 'The answer is 42.' in result
|
||||||
|
|
||||||
|
def test_empty_content(self):
|
||||||
|
"""Test empty content"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
result = requester._process_thinking_content('', None, remove_think=True)
|
||||||
|
assert result == ''
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildCommonArgs:
|
||||||
|
"""Test _build_common_args method"""
|
||||||
|
|
||||||
|
def test_build_args_with_all_params(self):
|
||||||
|
"""Test building args with all config params"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(
|
||||||
|
ap=Mock(),
|
||||||
|
config={
|
||||||
|
'base_url': 'https://api.openai.com/v1',
|
||||||
|
'timeout': 60,
|
||||||
|
'drop_params': True,
|
||||||
|
'num_retries': 3,
|
||||||
|
'api_version': '2024-01-01',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
args = {}
|
||||||
|
requester._build_common_args(args)
|
||||||
|
|
||||||
|
assert args['api_base'] == 'https://api.openai.com/v1'
|
||||||
|
assert args['timeout'] == 60
|
||||||
|
assert args['drop_params'] == True
|
||||||
|
assert args['num_retries'] == 3
|
||||||
|
assert args['api_version'] == '2024-01-01'
|
||||||
|
|
||||||
|
def test_build_args_without_retry_params(self):
|
||||||
|
"""Test building args without retry params for embedding/rerank"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(
|
||||||
|
ap=Mock(),
|
||||||
|
config={
|
||||||
|
'base_url': 'https://api.openai.com/v1',
|
||||||
|
'timeout': 60,
|
||||||
|
'num_retries': 3,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
args = {}
|
||||||
|
requester._build_common_args(args, include_retry_params=False)
|
||||||
|
|
||||||
|
assert args['api_base'] == 'https://api.openai.com/v1'
|
||||||
|
assert args['timeout'] == 60
|
||||||
|
assert 'num_retries' not in args
|
||||||
|
|
||||||
|
|
||||||
|
class TestHandleLiteLLMError:
|
||||||
|
"""Test _handle_litellm_error method"""
|
||||||
|
|
||||||
|
def test_bad_request_error(self):
|
||||||
|
"""Test BadRequestError translation"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
# Create proper LiteLLM exception with required args
|
||||||
|
error = litellm.BadRequestError(message='test error', model='gpt-4o', llm_provider='openai')
|
||||||
|
|
||||||
|
with pytest.raises(errors.RequesterError) as exc_info:
|
||||||
|
requester._handle_litellm_error(error)
|
||||||
|
|
||||||
|
assert '请求参数错误' in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_authentication_error(self):
|
||||||
|
"""Test AuthenticationError translation"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
error = litellm.AuthenticationError(message='invalid key', model='gpt-4o', llm_provider='openai')
|
||||||
|
|
||||||
|
with pytest.raises(errors.RequesterError) as exc_info:
|
||||||
|
requester._handle_litellm_error(error)
|
||||||
|
|
||||||
|
assert 'API key 无效' in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_rate_limit_error(self):
|
||||||
|
"""Test RateLimitError translation"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
error = litellm.RateLimitError(message='rate limited', model='gpt-4o', llm_provider='openai')
|
||||||
|
|
||||||
|
with pytest.raises(errors.RequesterError) as exc_info:
|
||||||
|
requester._handle_litellm_error(error)
|
||||||
|
|
||||||
|
assert '请求过于频繁' in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_timeout_error(self):
|
||||||
|
"""Test Timeout translation"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
error = litellm.Timeout(message='timeout', model='gpt-4o', llm_provider='openai')
|
||||||
|
|
||||||
|
with pytest.raises(errors.RequesterError) as exc_info:
|
||||||
|
requester._handle_litellm_error(error)
|
||||||
|
|
||||||
|
assert '请求超时' in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_context_window_error(self):
|
||||||
|
"""Test ContextWindowExceededError translation"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
error = litellm.ContextWindowExceededError(message='context too long', model='gpt-4o', llm_provider='openai')
|
||||||
|
|
||||||
|
with pytest.raises(errors.RequesterError) as exc_info:
|
||||||
|
requester._handle_litellm_error(error)
|
||||||
|
|
||||||
|
assert '上下文长度超限' in str(exc_info.value)
|
||||||
|
|
||||||
|
def test_unknown_error(self):
|
||||||
|
"""Test unknown error translation"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
with pytest.raises(errors.RequesterError) as exc_info:
|
||||||
|
requester._handle_litellm_error(Exception('unknown'))
|
||||||
|
|
||||||
|
assert '未知错误' in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestInvokeLLM:
|
||||||
|
"""Test invoke_llm method"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_invoke_llm_basic(self):
|
||||||
|
"""Test basic LLM invocation"""
|
||||||
|
mock_ap = Mock()
|
||||||
|
mock_ap.tool_mgr = Mock()
|
||||||
|
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
requester = litellmchat.LiteLLMRequester(
|
||||||
|
ap=mock_ap,
|
||||||
|
config={
|
||||||
|
'base_url': 'https://api.openai.com/v1',
|
||||||
|
'timeout': 60,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
model = MockRuntimeModel('gpt-4o', 'test-api-key')
|
||||||
|
|
||||||
|
# Mock LiteLLM response
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.choices = [Mock()]
|
||||||
|
mock_response.choices[0].message = Mock()
|
||||||
|
mock_response.choices[0].message.model_dump = Mock(
|
||||||
|
return_value={
|
||||||
|
'role': 'assistant',
|
||||||
|
'content': 'Hello! How can I help you?',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mock_response.usage = Mock()
|
||||||
|
mock_response.usage.prompt_tokens = 10
|
||||||
|
mock_response.usage.completion_tokens = 20
|
||||||
|
mock_response.usage.total_tokens = 30
|
||||||
|
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
messages = [provider_message.Message(role='user', content='Hello')]
|
||||||
|
|
||||||
|
# Patch acompletion at the import location
|
||||||
|
with patch.object(litellmchat, 'acompletion', new_callable=AsyncMock, return_value=mock_response):
|
||||||
|
result_msg, usage = await requester.invoke_llm(
|
||||||
|
query=None,
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result_msg.role == 'assistant'
|
||||||
|
assert result_msg.content == 'Hello! How can I help you?'
|
||||||
|
assert usage['prompt_tokens'] == 10
|
||||||
|
assert usage['completion_tokens'] == 20
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_invoke_llm_with_tools(self):
|
||||||
|
"""Test LLM invocation with function calling"""
|
||||||
|
mock_ap = Mock()
|
||||||
|
mock_ap.tool_mgr = Mock()
|
||||||
|
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(
|
||||||
|
return_value=[{'type': 'function', 'function': {'name': 'get_weather'}}]
|
||||||
|
)
|
||||||
|
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={})
|
||||||
|
|
||||||
|
model = MockRuntimeModel('gpt-4o', 'test-api-key')
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.choices = [Mock()]
|
||||||
|
mock_response.choices[0].message = Mock()
|
||||||
|
mock_response.choices[0].message.model_dump = Mock(
|
||||||
|
return_value={
|
||||||
|
'role': 'assistant',
|
||||||
|
'content': None,
|
||||||
|
'tool_calls': [
|
||||||
|
{'id': 'call_123', 'type': 'function', 'function': {'name': 'get_weather', 'arguments': '{}'}}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mock_response.usage = Mock()
|
||||||
|
mock_response.usage.prompt_tokens = 15
|
||||||
|
mock_response.usage.completion_tokens = 10
|
||||||
|
mock_response.usage.total_tokens = 25
|
||||||
|
|
||||||
|
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
messages = [provider_message.Message(role='user', content='What is the weather?')]
|
||||||
|
# Create proper LLMTool with all required fields
|
||||||
|
funcs = [Mock(spec=resource_tool.LLMTool)]
|
||||||
|
funcs[0].name = 'get_weather'
|
||||||
|
funcs[0].description = 'Get weather'
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'acompletion', new_callable=AsyncMock, return_value=mock_response):
|
||||||
|
result_msg, usage = await requester.invoke_llm(
|
||||||
|
query=None,
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
funcs=funcs,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result_msg.tool_calls is not None
|
||||||
|
called_kwargs = litellmchat.acompletion.await_args.kwargs
|
||||||
|
assert called_kwargs['tools'] == [{'type': 'function', 'function': {'name': 'get_weather'}}]
|
||||||
|
assert called_kwargs['tool_choice'] == 'auto'
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_build_completion_args_preserves_explicit_tool_choice(self):
|
||||||
|
"""Model extra args can override the default auto tool choice."""
|
||||||
|
mock_ap = Mock()
|
||||||
|
mock_ap.tool_mgr = Mock()
|
||||||
|
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(
|
||||||
|
return_value=[{'type': 'function', 'function': {'name': 'get_weather'}}]
|
||||||
|
)
|
||||||
|
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={})
|
||||||
|
model = MockRuntimeModel('gpt-4o', 'test-api-key')
|
||||||
|
model.model_entity.extra_args = {'tool_choice': 'required'}
|
||||||
|
|
||||||
|
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
funcs = [Mock(spec=resource_tool.LLMTool)]
|
||||||
|
messages = [provider_message.Message(role='user', content='What is the weather?')]
|
||||||
|
|
||||||
|
args = await requester._build_completion_args(model, messages, funcs)
|
||||||
|
|
||||||
|
assert args['tool_choice'] == 'required'
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_invoke_llm_error_handling(self):
|
||||||
|
"""Test LLM invocation error handling"""
|
||||||
|
mock_ap = Mock()
|
||||||
|
mock_ap.tool_mgr = Mock()
|
||||||
|
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={})
|
||||||
|
|
||||||
|
model = MockRuntimeModel('gpt-4o', 'test-api-key')
|
||||||
|
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
messages = [provider_message.Message(role='user', content='Hello')]
|
||||||
|
|
||||||
|
error = litellm.AuthenticationError(message='invalid key', model='gpt-4o', llm_provider='openai')
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'acompletion', new_callable=AsyncMock, side_effect=error):
|
||||||
|
with pytest.raises(errors.RequesterError) as exc_info:
|
||||||
|
await requester.invoke_llm(
|
||||||
|
query=None,
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert 'API key 无效' in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
class TestInvokeEmbedding:
|
||||||
|
"""Test invoke_embedding method"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_invoke_embedding_basic(self):
|
||||||
|
"""Test basic embedding invocation"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(
|
||||||
|
ap=Mock(),
|
||||||
|
config={
|
||||||
|
'base_url': 'https://api.openai.com/v1',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
model = MockRuntimeEmbeddingModel('text-embedding-3-small', 'test-api-key')
|
||||||
|
|
||||||
|
# Mock LiteLLM embedding response
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.data = [
|
||||||
|
Mock(embedding=[0.1, 0.2, 0.3]),
|
||||||
|
Mock(embedding=[0.4, 0.5, 0.6]),
|
||||||
|
]
|
||||||
|
mock_response.usage = Mock()
|
||||||
|
mock_response.usage.prompt_tokens = 20
|
||||||
|
mock_response.usage.completion_tokens = 0
|
||||||
|
mock_response.usage.total_tokens = 20
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'aembedding', new_callable=AsyncMock, return_value=mock_response):
|
||||||
|
embeddings, usage = await requester.invoke_embedding(
|
||||||
|
model=model,
|
||||||
|
input_text=['Hello', 'World'],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(embeddings) == 2
|
||||||
|
assert embeddings[0] == [0.1, 0.2, 0.3]
|
||||||
|
assert embeddings[1] == [0.4, 0.5, 0.6]
|
||||||
|
assert usage['prompt_tokens'] == 20
|
||||||
|
|
||||||
|
|
||||||
|
class TestInvokeRerank:
|
||||||
|
"""Test invoke_rerank method"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_invoke_rerank_basic(self):
|
||||||
|
"""Test basic rerank invocation"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(
|
||||||
|
ap=Mock(),
|
||||||
|
config={
|
||||||
|
'base_url': 'https://api.cohere.ai',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
model = MockRuntimeRerankModel('rerank-english-v3.0', 'test-api-key')
|
||||||
|
|
||||||
|
# Mock LiteLLM rerank response
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.results = [
|
||||||
|
{'index': 0, 'relevance_score': 0.95},
|
||||||
|
{'index': 1, 'relevance_score': 0.3},
|
||||||
|
{'index': 2, 'relevance_score': 0.8},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'arerank', new_callable=AsyncMock, return_value=mock_response):
|
||||||
|
results = await requester.invoke_rerank(
|
||||||
|
model=model,
|
||||||
|
query='What is the capital of France?',
|
||||||
|
documents=['Paris is the capital.', 'London is a city.', 'France is in Europe.'],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 3
|
||||||
|
# Scores should be normalized
|
||||||
|
assert results[0]['index'] == 0
|
||||||
|
assert results[0]['relevance_score'] >= 0 and results[0]['relevance_score'] <= 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_invoke_rerank_normalization(self):
|
||||||
|
"""Test rerank score normalization"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
model = MockRuntimeRerankModel('rerank-english-v3.0', 'test-api-key')
|
||||||
|
|
||||||
|
# Mock response with varying scores
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.results = [
|
||||||
|
{'index': 0, 'relevance_score': 0.9},
|
||||||
|
{'index': 1, 'relevance_score': 0.1},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'arerank', new_callable=AsyncMock, return_value=mock_response):
|
||||||
|
results = await requester.invoke_rerank(
|
||||||
|
model=model,
|
||||||
|
query='test query',
|
||||||
|
documents=['doc1', 'doc2'],
|
||||||
|
)
|
||||||
|
|
||||||
|
# After normalization: 0.9 -> 1.0, 0.1 -> 0.0
|
||||||
|
assert results[0]['relevance_score'] == 1.0
|
||||||
|
assert results[1]['relevance_score'] == 0.0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_invoke_rerank_single_document(self):
|
||||||
|
"""Test rerank with single document (no normalization needed)"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
model = MockRuntimeRerankModel('rerank-english-v3.0', 'test-api-key')
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.results = [
|
||||||
|
{'index': 0, 'relevance_score': 0.5},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'arerank', new_callable=AsyncMock, return_value=mock_response):
|
||||||
|
results = await requester.invoke_rerank(
|
||||||
|
model=model,
|
||||||
|
query='test query',
|
||||||
|
documents=['doc1'],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
# Single score stays as is (min==max, no normalization)
|
||||||
|
assert results[0]['relevance_score'] == 0.5
|
||||||
|
|
||||||
|
|
||||||
|
class TestConvertMessages:
|
||||||
|
"""Test _convert_messages method"""
|
||||||
|
|
||||||
|
def test_convert_simple_message(self):
|
||||||
|
"""Test converting simple text message"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
messages = [provider_message.Message(role='user', content='Hello')]
|
||||||
|
result = requester._convert_messages(messages)
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0]['role'] == 'user'
|
||||||
|
assert result[0]['content'] == 'Hello'
|
||||||
|
|
||||||
|
def test_convert_message_with_image_base64(self):
|
||||||
|
"""Test converting message with image_base64 content"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
provider_message.Message(
|
||||||
|
role='user',
|
||||||
|
content=[
|
||||||
|
{'type': 'text', 'text': 'What is in this image?'},
|
||||||
|
{'type': 'image_base64', 'image_base64': 'data:image/png;base64,abc123'},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
result = requester._convert_messages(messages)
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
content = result[0]['content']
|
||||||
|
assert isinstance(content, list)
|
||||||
|
# Check image_base64 converted to image_url
|
||||||
|
image_part = [p for p in content if p.get('type') == 'image_url'][0]
|
||||||
|
assert 'image_url' in image_part
|
||||||
|
assert image_part['image_url']['url'] == 'data:image/png;base64,abc123'
|
||||||
|
|
||||||
|
def test_convert_message_with_multiple_text_parts(self):
|
||||||
|
"""Test converting message with multiple text parts (LiteLLM handles this)"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
|
||||||
|
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
provider_message.Message(
|
||||||
|
role='user',
|
||||||
|
content=[
|
||||||
|
{'type': 'text', 'text': 'Hello'},
|
||||||
|
{'type': 'text', 'text': 'World'},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
result = requester._convert_messages(messages)
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
# LiteLLM handles multiple text parts, we pass them through
|
||||||
|
assert isinstance(result[0]['content'], list)
|
||||||
|
|
||||||
|
|
||||||
|
class TestScanModels:
|
||||||
|
"""Test scan_models method"""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_scan_models_basic(self):
|
||||||
|
"""Test basic model scanning"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(
|
||||||
|
ap=Mock(),
|
||||||
|
config={
|
||||||
|
'base_url': 'https://api.openai.com/v1',
|
||||||
|
'timeout': 60,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock httpx response
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.json = Mock(
|
||||||
|
return_value={
|
||||||
|
'data': [
|
||||||
|
{'id': 'gpt-4o'},
|
||||||
|
{'id': 'text-embedding-3-small'},
|
||||||
|
{'id': 'gpt-3.5-turbo'},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mock_response.raise_for_status = Mock()
|
||||||
|
|
||||||
|
with patch('httpx.AsyncClient') as mock_client:
|
||||||
|
mock_client.return_value.__aenter__ = AsyncMock(return_value=Mock())
|
||||||
|
mock_client.return_value.__aenter__.return_value.get = AsyncMock(return_value=mock_response)
|
||||||
|
|
||||||
|
result = await requester.scan_models(api_key='test-key')
|
||||||
|
|
||||||
|
assert 'models' in result
|
||||||
|
assert len(result['models']) == 3
|
||||||
|
# Check LLM models are first
|
||||||
|
assert result['models'][0]['type'] == 'llm'
|
||||||
|
# Check embedding model is detected
|
||||||
|
embedding_models = [m for m in result['models'] if m['type'] == 'embedding']
|
||||||
|
assert len(embedding_models) == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_scan_models_enriches_llm_abilities_and_context_length(self):
|
||||||
|
"""Scanned LLM models get LiteLLM-derived abilities and context length."""
|
||||||
|
requester = litellmchat.LiteLLMRequester(
|
||||||
|
ap=Mock(),
|
||||||
|
config={
|
||||||
|
'base_url': 'https://api.openai.com/v1',
|
||||||
|
'timeout': 60,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
requester._supports_function_calling = Mock(side_effect=lambda model_id: model_id == 'gpt-4o')
|
||||||
|
requester._supports_vision = Mock(side_effect=lambda model_id: model_id == 'gpt-4o')
|
||||||
|
requester._safe_context_length = Mock(side_effect=lambda model_id: 128000 if model_id == 'gpt-4o' else None)
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.json = Mock(
|
||||||
|
return_value={
|
||||||
|
'data': [
|
||||||
|
{'id': 'gpt-4o'},
|
||||||
|
{'id': 'text-embedding-3-small'},
|
||||||
|
{'id': 'bge-reranker-v2'},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mock_response.raise_for_status = Mock()
|
||||||
|
|
||||||
|
with patch('httpx.AsyncClient') as mock_client:
|
||||||
|
mock_client.return_value.__aenter__ = AsyncMock(return_value=Mock())
|
||||||
|
mock_client.return_value.__aenter__.return_value.get = AsyncMock(return_value=mock_response)
|
||||||
|
|
||||||
|
result = await requester.scan_models(api_key='test-key')
|
||||||
|
|
||||||
|
by_id = {model['id']: model for model in result['models']}
|
||||||
|
assert by_id['gpt-4o']['abilities'] == ['func_call', 'vision']
|
||||||
|
assert by_id['gpt-4o']['context_length'] == 128000
|
||||||
|
assert by_id['text-embedding-3-small']['type'] == 'embedding'
|
||||||
|
assert by_id['bge-reranker-v2']['type'] == 'rerank'
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_scan_models_no_base_url(self):
|
||||||
|
"""Test scan_models without base_url raises error"""
|
||||||
|
requester = litellmchat.LiteLLMRequester(
|
||||||
|
ap=Mock(),
|
||||||
|
config={
|
||||||
|
'base_url': '',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(errors.RequesterError) as exc_info:
|
||||||
|
await requester.scan_models()
|
||||||
|
|
||||||
|
assert 'Base URL required' in str(exc_info.value)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
pytest.main([__file__, '-v'])
|
||||||
@@ -10,7 +10,7 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
|||||||
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
import langbot_plugin.api.entities.builtin.provider.session as provider_session
|
import langbot_plugin.api.entities.builtin.provider.session as provider_session
|
||||||
|
|
||||||
from langbot.pkg.provider.runners.localagent import LocalAgentRunner
|
from langbot.pkg.provider.runners.localagent import LocalAgentRunner, _StreamAccumulator
|
||||||
|
|
||||||
|
|
||||||
class RecordingProvider:
|
class RecordingProvider:
|
||||||
@@ -124,6 +124,45 @@ def make_query() -> pipeline_query.Query:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_accumulator_merges_fragmented_tool_call_arguments():
|
||||||
|
accumulator = _StreamAccumulator(msg_sequence=1)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
accumulator.add(
|
||||||
|
provider_message.MessageChunk(
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=[
|
||||||
|
provider_message.ToolCall(
|
||||||
|
id='call-1',
|
||||||
|
type='function',
|
||||||
|
function=provider_message.FunctionCall(name='exec', arguments='{"command":'),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
is None
|
||||||
|
)
|
||||||
|
|
||||||
|
emitted = accumulator.add(
|
||||||
|
provider_message.MessageChunk(
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=[
|
||||||
|
provider_message.ToolCall(
|
||||||
|
id='call-1',
|
||||||
|
type='function',
|
||||||
|
function=provider_message.FunctionCall(name='exec', arguments='"pwd"}'),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
is_final=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert emitted is not None
|
||||||
|
final_msg = accumulator.final_message()
|
||||||
|
assert final_msg.tool_calls[0].function.name == 'exec'
|
||||||
|
assert final_msg.tool_calls[0].function.arguments == '{"command":"pwd"}'
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_localagent_uses_exec_for_exact_calculation():
|
async def test_localagent_uses_exec_for_exact_calculation():
|
||||||
provider = RecordingProvider()
|
provider = RecordingProvider()
|
||||||
|
|||||||
@@ -16,8 +16,6 @@ from langbot.pkg.entity.persistence import model as persistence_model
|
|||||||
from langbot.pkg.pipeline.preproc.preproc import PreProcessor
|
from langbot.pkg.pipeline.preproc.preproc import PreProcessor
|
||||||
from langbot.pkg.provider.modelmgr import requester
|
from langbot.pkg.provider.modelmgr import requester
|
||||||
from langbot.pkg.provider.modelmgr.modelmgr import ModelManager
|
from langbot.pkg.provider.modelmgr.modelmgr import ModelManager
|
||||||
from langbot.pkg.provider.modelmgr.requesters.chatcmpl import OpenAIChatCompletions
|
|
||||||
from langbot.pkg.provider.modelmgr.requesters.modelscopechatcmpl import ModelScopeChatCompletions
|
|
||||||
from langbot.pkg.provider.modelmgr.token import TokenManager
|
from langbot.pkg.provider.modelmgr.token import TokenManager
|
||||||
from langbot.pkg.provider.runners.localagent import LocalAgentRunner
|
from langbot.pkg.provider.runners.localagent import LocalAgentRunner
|
||||||
|
|
||||||
@@ -90,74 +88,6 @@ def test_token_manager_next_token_ignores_empty_token_list():
|
|||||||
assert token_mgr.using_token_index == 0
|
assert token_mgr.using_token_index == 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_openai_requester_initialize_uses_placeholder_api_key(monkeypatch):
|
|
||||||
captured_kwargs = {}
|
|
||||||
|
|
||||||
def fake_client(**kwargs):
|
|
||||||
captured_kwargs.update(kwargs)
|
|
||||||
return SimpleNamespace(**kwargs)
|
|
||||||
|
|
||||||
monkeypatch.setattr('langbot.pkg.provider.modelmgr.requesters.chatcmpl.openai.AsyncClient', fake_client)
|
|
||||||
monkeypatch.setattr('langbot.pkg.provider.modelmgr.requesters.chatcmpl.httpx.AsyncClient', fake_client)
|
|
||||||
|
|
||||||
requester_inst = OpenAIChatCompletions(ap=SimpleNamespace(), config={})
|
|
||||||
await requester_inst.initialize()
|
|
||||||
|
|
||||||
assert captured_kwargs['api_key'] == OpenAIChatCompletions.init_api_key
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_modelscope_requester_initialize_uses_placeholder_api_key(monkeypatch):
|
|
||||||
captured_kwargs = {}
|
|
||||||
|
|
||||||
def fake_client(**kwargs):
|
|
||||||
captured_kwargs.update(kwargs)
|
|
||||||
return SimpleNamespace(**kwargs)
|
|
||||||
|
|
||||||
monkeypatch.setattr('langbot.pkg.provider.modelmgr.requesters.modelscopechatcmpl.openai.AsyncClient', fake_client)
|
|
||||||
monkeypatch.setattr('langbot.pkg.provider.modelmgr.requesters.modelscopechatcmpl.httpx.AsyncClient', fake_client)
|
|
||||||
|
|
||||||
requester_inst = ModelScopeChatCompletions(ap=SimpleNamespace(), config={})
|
|
||||||
await requester_inst.initialize()
|
|
||||||
|
|
||||||
assert captured_kwargs['api_key'] == ModelScopeChatCompletions.init_api_key
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_openai_embedding_call_overrides_placeholder_api_key():
|
|
||||||
captured_request = {}
|
|
||||||
|
|
||||||
async def fake_create(**kwargs):
|
|
||||||
captured_request['api_key'] = fake_client.api_key
|
|
||||||
captured_request['kwargs'] = kwargs
|
|
||||||
return SimpleNamespace(
|
|
||||||
data=[SimpleNamespace(embedding=[0.1, 0.2])],
|
|
||||||
usage=SimpleNamespace(prompt_tokens=3, total_tokens=3),
|
|
||||||
)
|
|
||||||
|
|
||||||
fake_client = SimpleNamespace(
|
|
||||||
api_key=OpenAIChatCompletions.init_api_key,
|
|
||||||
embeddings=SimpleNamespace(create=fake_create),
|
|
||||||
)
|
|
||||||
|
|
||||||
requester_inst = OpenAIChatCompletions(ap=SimpleNamespace(), config={})
|
|
||||||
requester_inst.client = fake_client
|
|
||||||
|
|
||||||
embeddings, usage_info = await requester_inst.invoke_embedding(
|
|
||||||
model=requester.RuntimeEmbeddingModel(
|
|
||||||
model_entity=SimpleNamespace(name='text-embedding-3-small', extra_args={}),
|
|
||||||
provider=SimpleNamespace(token_mgr=TokenManager('provider-uuid', [' runtime-key ', '', 'runtime-key'])),
|
|
||||||
),
|
|
||||||
input_text=['hello'],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert captured_request['api_key'] == 'runtime-key'
|
|
||||||
assert captured_request['kwargs']['model'] == 'text-embedding-3-small'
|
|
||||||
assert embeddings == [[0.1, 0.2]]
|
|
||||||
assert usage_info == {'prompt_tokens': 3, 'total_tokens': 3}
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_updated_llm_model_is_immediately_usable_by_local_agent_pipeline():
|
async def test_updated_llm_model_is_immediately_usable_by_local_agent_pipeline():
|
||||||
from langbot.pkg.api.http.service.model import LLMModelsService
|
from langbot.pkg.api.http.service.model import LLMModelsService
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""Unit tests for ToolManager.
|
"""Unit tests for ToolManager.
|
||||||
|
|
||||||
Tests cover:
|
Tests cover:
|
||||||
- Tool schema generation for OpenAI and Anthropic
|
- Tool schema generation for OpenAI/LiteLLM
|
||||||
- Tool execution dispatch
|
- Tool execution dispatch
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -109,28 +109,6 @@ class TestToolManagerSchemaGeneration:
|
|||||||
assert tool2['type'] == 'function'
|
assert tool2['type'] == 'function'
|
||||||
assert tool2['function']['name'] == 'calculate'
|
assert tool2['function']['name'] == 'calculate'
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_generate_tools_for_anthropic(self, mock_app, sample_tools):
|
|
||||||
"""Test that generate_tools_for_anthropic produces correct schema."""
|
|
||||||
toolmgr = get_toolmgr_module()
|
|
||||||
|
|
||||||
manager = toolmgr.ToolManager(mock_app)
|
|
||||||
result = await manager.generate_tools_for_anthropic(sample_tools)
|
|
||||||
|
|
||||||
assert len(result) == 2
|
|
||||||
|
|
||||||
# Verify first tool schema (Anthropic format)
|
|
||||||
tool1 = result[0]
|
|
||||||
assert tool1['name'] == 'get_weather'
|
|
||||||
assert tool1['description'] == 'Get current weather for a location'
|
|
||||||
assert 'input_schema' in tool1
|
|
||||||
assert tool1['input_schema']['type'] == 'object'
|
|
||||||
|
|
||||||
# Verify second tool schema
|
|
||||||
tool2 = result[1]
|
|
||||||
assert tool2['name'] == 'calculate'
|
|
||||||
assert 'input_schema' in tool2
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_generate_tools_empty_list(self, mock_app):
|
async def test_generate_tools_empty_list(self, mock_app):
|
||||||
"""Test that generating tools from empty list returns empty list."""
|
"""Test that generating tools from empty list returns empty list."""
|
||||||
@@ -141,9 +119,6 @@ class TestToolManagerSchemaGeneration:
|
|||||||
openai_result = await manager.generate_tools_for_openai([])
|
openai_result = await manager.generate_tools_for_openai([])
|
||||||
assert openai_result == []
|
assert openai_result == []
|
||||||
|
|
||||||
anthropic_result = await manager.generate_tools_for_anthropic([])
|
|
||||||
assert anthropic_result == []
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_openai_schema_fields_complete(self, mock_app, sample_tools):
|
async def test_openai_schema_fields_complete(self, mock_app, sample_tools):
|
||||||
"""Test that OpenAI schema includes all required fields."""
|
"""Test that OpenAI schema includes all required fields."""
|
||||||
@@ -161,20 +136,6 @@ class TestToolManagerSchemaGeneration:
|
|||||||
assert 'description' in func
|
assert 'description' in func
|
||||||
assert 'parameters' in func
|
assert 'parameters' in func
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_anthropic_schema_fields_complete(self, mock_app, sample_tools):
|
|
||||||
"""Test that Anthropic schema includes all required fields."""
|
|
||||||
toolmgr = get_toolmgr_module()
|
|
||||||
|
|
||||||
manager = toolmgr.ToolManager(mock_app)
|
|
||||||
result = await manager.generate_tools_for_anthropic(sample_tools)
|
|
||||||
|
|
||||||
for tool_schema in result:
|
|
||||||
assert 'name' in tool_schema
|
|
||||||
assert 'description' in tool_schema
|
|
||||||
assert 'input_schema' in tool_schema
|
|
||||||
|
|
||||||
|
|
||||||
class TestToolManagerExecuteFuncCall:
|
class TestToolManagerExecuteFuncCall:
|
||||||
"""Tests for execute_func_call method."""
|
"""Tests for execute_func_call method."""
|
||||||
|
|
||||||
|
|||||||
83
uv.lock
generated
@@ -1168,6 +1168,58 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/bf/ee/aa015c5de8b0dc42a8e507eae8c2de5d1c0e068c896858fec6d502402ed6/ebooklib-0.20-py3-none-any.whl", hash = "sha256:fff5322517a37e31c972d27be7d982cc3928c16b3dcc5fd7e8f7c0f5d7bcf42b", size = 40995, upload-time = "2025-10-26T20:56:19.104Z" },
|
{ url = "https://files.pythonhosted.org/packages/bf/ee/aa015c5de8b0dc42a8e507eae8c2de5d1c0e068c896858fec6d502402ed6/ebooklib-0.20-py3-none-any.whl", hash = "sha256:fff5322517a37e31c972d27be7d982cc3928c16b3dcc5fd7e8f7c0f5d7bcf42b", size = 40995, upload-time = "2025-10-26T20:56:19.104Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fastuuid"
|
||||||
|
version = "0.14.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/c3/7d/d9daedf0f2ebcacd20d599928f8913e9d2aea1d56d2d355a93bfa2b611d7/fastuuid-0.14.0.tar.gz", hash = "sha256:178947fc2f995b38497a74172adee64fdeb8b7ec18f2a5934d037641ba265d26", size = 18232, upload-time = "2025-10-19T22:19:22.402Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/98/f3/12481bda4e5b6d3e698fbf525df4443cc7dce746f246b86b6fcb2fba1844/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:73946cb950c8caf65127d4e9a325e2b6be0442a224fd51ba3b6ac44e1912ce34", size = 516386, upload-time = "2025-10-19T22:42:40.176Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/59/19/2fc58a1446e4d72b655648eb0879b04e88ed6fa70d474efcf550f640f6ec/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:12ac85024637586a5b69645e7ed986f7535106ed3013640a393a03e461740cb7", size = 264569, upload-time = "2025-10-19T22:25:50.977Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/78/29/3c74756e5b02c40cfcc8b1d8b5bac4edbd532b55917a6bcc9113550e99d1/fastuuid-0.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:05a8dde1f395e0c9b4be515b7a521403d1e8349443e7641761af07c7ad1624b1", size = 254366, upload-time = "2025-10-19T22:29:49.166Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/52/96/d761da3fccfa84f0f353ce6e3eb8b7f76b3aa21fd25e1b00a19f9c80a063/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09378a05020e3e4883dfdab438926f31fea15fd17604908f3d39cbeb22a0b4dc", size = 278978, upload-time = "2025-10-19T22:35:41.306Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/fc/c2/f84c90167cc7765cb82b3ff7808057608b21c14a38531845d933a4637307/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbb0c4b15d66b435d2538f3827f05e44e2baafcc003dd7d8472dc67807ab8fd8", size = 279692, upload-time = "2025-10-19T22:25:36.997Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/af/7b/4bacd03897b88c12348e7bd77943bac32ccf80ff98100598fcff74f75f2e/fastuuid-0.14.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cd5a7f648d4365b41dbf0e38fe8da4884e57bed4e77c83598e076ac0c93995e7", size = 303384, upload-time = "2025-10-19T22:29:46.578Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c0/a2/584f2c29641df8bd810d00c1f21d408c12e9ad0c0dafdb8b7b29e5ddf787/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c0a94245afae4d7af8c43b3159d5e3934c53f47140be0be624b96acd672ceb73", size = 460921, upload-time = "2025-10-19T22:36:42.006Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/24/68/c6b77443bb7764c760e211002c8638c0c7cce11cb584927e723215ba1398/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2b29e23c97e77c3a9514d70ce343571e469098ac7f5a269320a0f0b3e193ab36", size = 480575, upload-time = "2025-10-19T22:28:18.975Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5a/87/93f553111b33f9bb83145be12868c3c475bf8ea87c107063d01377cc0e8e/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1e690d48f923c253f28151b3a6b4e335f2b06bf669c68a02665bc150b7839e94", size = 452317, upload-time = "2025-10-19T22:25:32.75Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9e/8c/a04d486ca55b5abb7eaa65b39df8d891b7b1635b22db2163734dc273579a/fastuuid-0.14.0-cp311-cp311-win32.whl", hash = "sha256:a6f46790d59ab38c6aa0e35c681c0484b50dc0acf9e2679c005d61e019313c24", size = 154804, upload-time = "2025-10-19T22:24:15.615Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9c/b2/2d40bf00820de94b9280366a122cbaa60090c8cf59e89ac3938cf5d75895/fastuuid-0.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:e150eab56c95dc9e3fefc234a0eedb342fac433dacc273cd4d150a5b0871e1fa", size = 156099, upload-time = "2025-10-19T22:24:31.646Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/02/a2/e78fcc5df65467f0d207661b7ef86c5b7ac62eea337c0c0fcedbeee6fb13/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77e94728324b63660ebf8adb27055e92d2e4611645bf12ed9d88d30486471d0a", size = 510164, upload-time = "2025-10-19T22:31:45.635Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/2b/b3/c846f933f22f581f558ee63f81f29fa924acd971ce903dab1a9b6701816e/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:caa1f14d2102cb8d353096bc6ef6c13b2c81f347e6ab9d6fbd48b9dea41c153d", size = 261837, upload-time = "2025-10-19T22:38:38.53Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/54/ea/682551030f8c4fa9a769d9825570ad28c0c71e30cf34020b85c1f7ee7382/fastuuid-0.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d23ef06f9e67163be38cece704170486715b177f6baae338110983f99a72c070", size = 251370, upload-time = "2025-10-19T22:40:26.07Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/14/dd/5927f0a523d8e6a76b70968e6004966ee7df30322f5fc9b6cdfb0276646a/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c9ec605ace243b6dbe3bd27ebdd5d33b00d8d1d3f580b39fdd15cd96fd71796", size = 277766, upload-time = "2025-10-19T22:37:23.779Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/16/6e/c0fb547eef61293153348f12e0f75a06abb322664b34a1573a7760501336/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:808527f2407f58a76c916d6aa15d58692a4a019fdf8d4c32ac7ff303b7d7af09", size = 278105, upload-time = "2025-10-19T22:26:56.821Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/2d/b1/b9c75e03b768f61cf2e84ee193dc18601aeaf89a4684b20f2f0e9f52b62c/fastuuid-0.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fb3c0d7fef6674bbeacdd6dbd386924a7b60b26de849266d1ff6602937675c8", size = 301564, upload-time = "2025-10-19T22:30:31.604Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/fc/fa/f7395fdac07c7a54f18f801744573707321ca0cee082e638e36452355a9d/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab3f5d36e4393e628a4df337c2c039069344db5f4b9d2a3c9cea48284f1dd741", size = 459659, upload-time = "2025-10-19T22:31:32.341Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/66/49/c9fd06a4a0b1f0f048aacb6599e7d96e5d6bc6fa680ed0d46bf111929d1b/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b9a0ca4f03b7e0b01425281ffd44e99d360e15c895f1907ca105854ed85e2057", size = 478430, upload-time = "2025-10-19T22:26:22.962Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/be/9c/909e8c95b494e8e140e8be6165d5fc3f61fdc46198c1554df7b3e1764471/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3acdf655684cc09e60fb7e4cf524e8f42ea760031945aa8086c7eae2eeeabeb8", size = 450894, upload-time = "2025-10-19T22:27:01.647Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/90/eb/d29d17521976e673c55ef7f210d4cdd72091a9ec6755d0fd4710d9b3c871/fastuuid-0.14.0-cp312-cp312-win32.whl", hash = "sha256:9579618be6280700ae36ac42c3efd157049fe4dd40ca49b021280481c78c3176", size = 154374, upload-time = "2025-10-19T22:29:19.879Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cc/fc/f5c799a6ea6d877faec0472d0b27c079b47c86b1cdc577720a5386483b36/fastuuid-0.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:d9e4332dc4ba054434a9594cbfaf7823b57993d7d8e7267831c3e059857cf397", size = 156550, upload-time = "2025-10-19T22:27:49.658Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a5/83/ae12dd39b9a39b55d7f90abb8971f1a5f3c321fd72d5aa83f90dc67fe9ed/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77a09cb7427e7af74c594e409f7731a0cf887221de2f698e1ca0ebf0f3139021", size = 510720, upload-time = "2025-10-19T22:42:34.633Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/53/b0/a4b03ff5d00f563cc7546b933c28cb3f2a07344b2aec5834e874f7d44143/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:9bd57289daf7b153bfa3e8013446aa144ce5e8c825e9e366d455155ede5ea2dc", size = 262024, upload-time = "2025-10-19T22:30:25.482Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9c/6d/64aee0a0f6a58eeabadd582e55d0d7d70258ffdd01d093b30c53d668303b/fastuuid-0.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ac60fc860cdf3c3f327374db87ab8e064c86566ca8c49d2e30df15eda1b0c2d5", size = 251679, upload-time = "2025-10-19T22:36:14.096Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/60/f5/a7e9cda8369e4f7919d36552db9b2ae21db7915083bc6336f1b0082c8b2e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab32f74bd56565b186f036e33129da77db8be09178cd2f5206a5d4035fb2a23f", size = 277862, upload-time = "2025-10-19T22:36:23.302Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f0/d3/8ce11827c783affffd5bd4d6378b28eb6cc6d2ddf41474006b8d62e7448e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e678459cf4addaedd9936bbb038e35b3f6b2061330fd8f2f6a1d80414c0f87", size = 278278, upload-time = "2025-10-19T22:29:43.809Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a2/51/680fb6352d0bbade04036da46264a8001f74b7484e2fd1f4da9e3db1c666/fastuuid-0.14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1e3cc56742f76cd25ecb98e4b82a25f978ccffba02e4bdce8aba857b6d85d87b", size = 301788, upload-time = "2025-10-19T22:36:06.825Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/fa/7c/2014b5785bd8ebdab04ec857635ebd84d5ee4950186a577db9eff0fb8ff6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cb9a030f609194b679e1660f7e32733b7a0f332d519c5d5a6a0a580991290022", size = 459819, upload-time = "2025-10-19T22:35:31.623Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/01/d2/524d4ceeba9160e7a9bc2ea3e8f4ccf1ad78f3bde34090ca0c51f09a5e91/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:09098762aad4f8da3a888eb9ae01c84430c907a297b97166b8abc07b640f2995", size = 478546, upload-time = "2025-10-19T22:26:03.023Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/bc/17/354d04951ce114bf4afc78e27a18cfbd6ee319ab1829c2d5fb5e94063ac6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1383fff584fa249b16329a059c68ad45d030d5a4b70fb7c73a08d98fd53bcdab", size = 450921, upload-time = "2025-10-19T22:31:02.151Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/fb/be/d7be8670151d16d88f15bb121c5b66cdb5ea6a0c2a362d0dcf30276ade53/fastuuid-0.14.0-cp313-cp313-win32.whl", hash = "sha256:a0809f8cc5731c066c909047f9a314d5f536c871a7a22e815cc4967c110ac9ad", size = 154559, upload-time = "2025-10-19T22:36:36.011Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/22/1d/5573ef3624ceb7abf4a46073d3554e37191c868abc3aecd5289a72f9810a/fastuuid-0.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:0df14e92e7ad3276327631c9e7cec09e32572ce82089c55cb1bb8df71cf394ed", size = 156539, upload-time = "2025-10-19T22:33:35.898Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/16/c9/8c7660d1fe3862e3f8acabd9be7fc9ad71eb270f1c65cce9a2b7a31329ab/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:b852a870a61cfc26c884af205d502881a2e59cc07076b60ab4a951cc0c94d1ad", size = 510600, upload-time = "2025-10-19T22:43:44.17Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/4c/f4/a989c82f9a90d0ad995aa957b3e572ebef163c5299823b4027986f133dfb/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c7502d6f54cd08024c3ea9b3514e2d6f190feb2f46e6dbcd3747882264bb5f7b", size = 262069, upload-time = "2025-10-19T22:43:38.38Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/da/6c/a1a24f73574ac995482b1326cf7ab41301af0fabaa3e37eeb6b3df00e6e2/fastuuid-0.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ca61b592120cf314cfd66e662a5b54a578c5a15b26305e1b8b618a6f22df714", size = 251543, upload-time = "2025-10-19T22:32:22.537Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/1a/20/2a9b59185ba7a6c7b37808431477c2d739fcbdabbf63e00243e37bd6bf49/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa75b6657ec129d0abded3bec745e6f7ab642e6dba3a5272a68247e85f5f316f", size = 277798, upload-time = "2025-10-19T22:33:53.821Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ef/33/4105ca574f6ded0af6a797d39add041bcfb468a1255fbbe82fcb6f592da2/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8a0dfea3972200f72d4c7df02c8ac70bad1bb4c58d7e0ec1e6f341679073a7f", size = 278283, upload-time = "2025-10-19T22:29:02.812Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/fe/8c/fca59f8e21c4deb013f574eae05723737ddb1d2937ce87cb2a5d20992dc3/fastuuid-0.14.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1bf539a7a95f35b419f9ad105d5a8a35036df35fdafae48fb2fd2e5f318f0d75", size = 301627, upload-time = "2025-10-19T22:35:54.985Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cb/e2/f78c271b909c034d429218f2798ca4e89eeda7983f4257d7865976ddbb6c/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9a133bf9cc78fdbd1179cb58a59ad0100aa32d8675508150f3658814aeefeaa4", size = 459778, upload-time = "2025-10-19T22:28:00.999Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/1e/f0/5ff209d865897667a2ff3e7a572267a9ced8f7313919f6d6043aed8b1caa/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_i686.whl", hash = "sha256:f54d5b36c56a2d5e1a31e73b950b28a0d83eb0c37b91d10408875a5a29494bad", size = 478605, upload-time = "2025-10-19T22:36:21.764Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e0/c8/2ce1c78f983a2c4987ea865d9516dbdfb141a120fd3abb977ae6f02ba7ca/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:ec27778c6ca3393ef662e2762dba8af13f4ec1aaa32d08d77f71f2a70ae9feb8", size = 450837, upload-time = "2025-10-19T22:34:37.178Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/df/60/dad662ec9a33b4a5fe44f60699258da64172c39bd041da2994422cdc40fe/fastuuid-0.14.0-cp314-cp314-win32.whl", hash = "sha256:e23fc6a83f112de4be0cc1990e5b127c27663ae43f866353166f87df58e73d06", size = 154532, upload-time = "2025-10-19T22:35:18.217Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/1f/f6/da4db31001e854025ffd26bc9ba0740a9cbba2c3259695f7c5834908b336/fastuuid-0.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:df61342889d0f5e7a32f7284e55ef95103f2110fee433c2ae7c2c0956d76ac8a", size = 156457, upload-time = "2025-10-19T22:33:44.579Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filelock"
|
name = "filelock"
|
||||||
version = "3.20.3"
|
version = "3.20.3"
|
||||||
@@ -1933,6 +1985,7 @@ dependencies = [
|
|||||||
{ name = "langsmith" },
|
{ name = "langsmith" },
|
||||||
{ name = "lark-oapi" },
|
{ name = "lark-oapi" },
|
||||||
{ name = "line-bot-sdk" },
|
{ name = "line-bot-sdk" },
|
||||||
|
{ name = "litellm" },
|
||||||
{ name = "mako" },
|
{ name = "mako" },
|
||||||
{ name = "markdown" },
|
{ name = "markdown" },
|
||||||
{ name = "matrix-nio" },
|
{ name = "matrix-nio" },
|
||||||
@@ -2020,6 +2073,7 @@ requires-dist = [
|
|||||||
{ name = "langsmith", specifier = ">=0.7.31" },
|
{ name = "langsmith", specifier = ">=0.7.31" },
|
||||||
{ name = "lark-oapi", specifier = ">=1.5.5" },
|
{ name = "lark-oapi", specifier = ">=1.5.5" },
|
||||||
{ name = "line-bot-sdk", specifier = ">=3.19.0" },
|
{ name = "line-bot-sdk", specifier = ">=3.19.0" },
|
||||||
|
{ name = "litellm", specifier = ">=1.0.0" },
|
||||||
{ name = "mako", specifier = ">=1.3.11" },
|
{ name = "mako", specifier = ">=1.3.11" },
|
||||||
{ name = "markdown", specifier = ">=3.6" },
|
{ name = "markdown", specifier = ">=3.6" },
|
||||||
{ name = "matrix-nio", specifier = ">=0.25.2" },
|
{ name = "matrix-nio", specifier = ">=0.25.2" },
|
||||||
@@ -2345,6 +2399,29 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/04/1e/b832de447dee8b582cac175871d2f6c3d5077cc56d5575cadba1fd1cccfa/linkify_it_py-2.0.3-py3-none-any.whl", hash = "sha256:6bcbc417b0ac14323382aef5c5192c0075bf8a9d6b41820a2b66371eac6b6d79", size = 19820, upload-time = "2024-02-04T14:48:02.496Z" },
|
{ url = "https://files.pythonhosted.org/packages/04/1e/b832de447dee8b582cac175871d2f6c3d5077cc56d5575cadba1fd1cccfa/linkify_it_py-2.0.3-py3-none-any.whl", hash = "sha256:6bcbc417b0ac14323382aef5c5192c0075bf8a9d6b41820a2b66371eac6b6d79", size = 19820, upload-time = "2024-02-04T14:48:02.496Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "litellm"
|
||||||
|
version = "1.87.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "aiohttp" },
|
||||||
|
{ name = "click" },
|
||||||
|
{ name = "fastuuid" },
|
||||||
|
{ name = "httpx" },
|
||||||
|
{ name = "importlib-metadata" },
|
||||||
|
{ name = "jinja2" },
|
||||||
|
{ name = "jsonschema" },
|
||||||
|
{ name = "openai" },
|
||||||
|
{ name = "pydantic" },
|
||||||
|
{ name = "python-dotenv" },
|
||||||
|
{ name = "tiktoken" },
|
||||||
|
{ name = "tokenizers" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/5d/e5/d0ac1c8f55e2c8d8799589e831bef0d450e69e02ecb511901ffc8de054d9/litellm-1.87.1.tar.gz", hash = "sha256:70ac9d6b25f56ad30de6ff95d26fac3b3fc697a95da582b6072d25d8dc73d493", size = 15455709, upload-time = "2026-06-04T16:23:23.339Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ff/18/8275c95ef09e81ab0c01a162c7b780ce3fbc49066b5d532c6b6ab3dc0118/litellm-1.87.1-py3-none-any.whl", hash = "sha256:dd4e00278cdb846d52e99a09d732575a897273540b54eb044247ecbc0d98f67c", size = 17105482, upload-time = "2026-06-04T16:23:20.769Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "logbook"
|
name = "logbook"
|
||||||
version = "1.9.2"
|
version = "1.9.2"
|
||||||
@@ -3284,7 +3361,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openai"
|
name = "openai"
|
||||||
version = "2.16.0"
|
version = "2.41.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "anyio" },
|
{ name = "anyio" },
|
||||||
@@ -3296,9 +3373,9 @@ dependencies = [
|
|||||||
{ name = "tqdm" },
|
{ name = "tqdm" },
|
||||||
{ name = "typing-extensions" },
|
{ name = "typing-extensions" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/b1/6c/e4c964fcf1d527fdf4739e7cc940c60075a4114d50d03871d5d5b1e13a88/openai-2.16.0.tar.gz", hash = "sha256:42eaa22ca0d8ded4367a77374104d7a2feafee5bd60a107c3c11b5243a11cd12", size = 629649, upload-time = "2026-01-27T23:28:02.579Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/3c/a6/5815fe2e2aca74b36c650d1bd43b69827cee568073d0d2d9b6fc5aaac80c/openai-2.41.0.tar.gz", hash = "sha256:db5c362acd6604b84f076abbefa66826ea4b46ecba2954ed866e6a149a1352c0", size = 783525, upload-time = "2026-06-03T22:39:40.719Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/16/83/0315bf2cfd75a2ce8a7e54188e9456c60cec6c0cf66728ed07bd9859ff26/openai-2.16.0-py3-none-any.whl", hash = "sha256:5f46643a8f42899a84e80c38838135d7038e7718333ce61396994f887b09a59b", size = 1068612, upload-time = "2026-01-27T23:28:00.356Z" },
|
{ url = "https://files.pythonhosted.org/packages/be/51/d82bb424e8aa372190c5233253a2ceb399a778747d18b42cff487411e663/openai-2.41.0-py3-none-any.whl", hash = "sha256:20cc7952e8501c7e5773dd2ef7be437bae9cb549044902e1041a83a54516e375", size = 1353378, upload-time = "2026-06-03T22:39:38.964Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { useEffect, useState } from 'react';
|
import { useEffect, useState, useRef, useCallback } from 'react';
|
||||||
import { httpClient } from '@/app/infra/http/HttpClient';
|
import { httpClient } from '@/app/infra/http/HttpClient';
|
||||||
|
|
||||||
import { zodResolver } from '@hookform/resolvers/zod';
|
import { zodResolver } from '@hookform/resolvers/zod';
|
||||||
@@ -16,19 +16,12 @@ import {
|
|||||||
FormMessage,
|
FormMessage,
|
||||||
} from '@/components/ui/form';
|
} from '@/components/ui/form';
|
||||||
import { Input } from '@/components/ui/input';
|
import { Input } from '@/components/ui/input';
|
||||||
import {
|
|
||||||
Select,
|
|
||||||
SelectContent,
|
|
||||||
SelectGroup,
|
|
||||||
SelectItem,
|
|
||||||
SelectLabel,
|
|
||||||
SelectTrigger,
|
|
||||||
SelectValue,
|
|
||||||
} from '@/components/ui/select';
|
|
||||||
import { DialogFooter } from '@/components/ui/dialog';
|
import { DialogFooter } from '@/components/ui/dialog';
|
||||||
import { toast } from 'sonner';
|
import { toast } from 'sonner';
|
||||||
import { extractI18nObject } from '@/i18n/I18nProvider';
|
import { extractI18nObject } from '@/i18n/I18nProvider';
|
||||||
import { CustomApiError } from '@/app/infra/entities/common';
|
import { CustomApiError } from '@/app/infra/entities/common';
|
||||||
|
import { cn } from '@/lib/utils';
|
||||||
|
import { Check, ChevronDown, Search } from 'lucide-react';
|
||||||
|
|
||||||
const getFormSchema = (t: (key: string) => string) =>
|
const getFormSchema = (t: (key: string) => string) =>
|
||||||
z.object({
|
z.object({
|
||||||
@@ -61,6 +54,7 @@ export default function ProviderForm({
|
|||||||
api_key: '',
|
api_key: '',
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
const { setValue } = form;
|
||||||
|
|
||||||
const [requesterList, setRequesterList] = useState<
|
const [requesterList, setRequesterList] = useState<
|
||||||
{
|
{
|
||||||
@@ -71,18 +65,12 @@ export default function ProviderForm({
|
|||||||
description: string;
|
description: string;
|
||||||
}[]
|
}[]
|
||||||
>([]);
|
>([]);
|
||||||
|
const [searchQuery, setSearchQuery] = useState('');
|
||||||
|
const [isOpen, setIsOpen] = useState(false);
|
||||||
|
const dropdownRef = useRef<HTMLDivElement>(null);
|
||||||
|
const searchInputRef = useRef<HTMLInputElement>(null);
|
||||||
|
|
||||||
useEffect(() => {
|
const loadRequesters = useCallback(async () => {
|
||||||
async function init() {
|
|
||||||
await loadRequesters();
|
|
||||||
if (providerId) {
|
|
||||||
await loadProvider(providerId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
init();
|
|
||||||
}, [providerId]);
|
|
||||||
|
|
||||||
async function loadRequesters() {
|
|
||||||
const resp = await httpClient.getProviderRequesters();
|
const resp = await httpClient.getProviderRequesters();
|
||||||
setRequesterList(
|
setRequesterList(
|
||||||
resp.requesters
|
resp.requesters
|
||||||
@@ -98,17 +86,78 @@ export default function ProviderForm({
|
|||||||
description: extractI18nObject(item.description),
|
description: extractI18nObject(item.description),
|
||||||
})),
|
})),
|
||||||
);
|
);
|
||||||
}
|
}, []);
|
||||||
|
|
||||||
async function loadProvider(id: string) {
|
const loadProvider = useCallback(
|
||||||
|
async (id: string) => {
|
||||||
const resp = await httpClient.getModelProvider(id);
|
const resp = await httpClient.getModelProvider(id);
|
||||||
const provider = resp.provider;
|
const provider = resp.provider;
|
||||||
|
|
||||||
form.setValue('name', provider.name);
|
setValue('name', provider.name);
|
||||||
form.setValue('requester', provider.requester);
|
setValue('requester', provider.requester);
|
||||||
form.setValue('base_url', provider.base_url);
|
setValue('base_url', provider.base_url);
|
||||||
form.setValue('api_key', provider.api_keys?.[0] || '');
|
setValue('api_key', provider.api_keys?.[0] || '');
|
||||||
|
},
|
||||||
|
[setValue],
|
||||||
|
);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
async function init() {
|
||||||
|
await loadRequesters();
|
||||||
|
if (providerId) {
|
||||||
|
await loadProvider(providerId);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
init();
|
||||||
|
}, [providerId, loadProvider, loadRequesters]);
|
||||||
|
|
||||||
|
// Close dropdown when clicking outside
|
||||||
|
useEffect(() => {
|
||||||
|
function handleClickOutside(event: MouseEvent) {
|
||||||
|
if (
|
||||||
|
dropdownRef.current &&
|
||||||
|
!dropdownRef.current.contains(event.target as Node)
|
||||||
|
) {
|
||||||
|
setIsOpen(false);
|
||||||
|
setSearchQuery('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.addEventListener('mousedown', handleClickOutside);
|
||||||
|
return () => document.removeEventListener('mousedown', handleClickOutside);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Focus search input when dropdown opens
|
||||||
|
useEffect(() => {
|
||||||
|
if (isOpen && searchInputRef.current) {
|
||||||
|
searchInputRef.current.focus();
|
||||||
|
}
|
||||||
|
}, [isOpen]);
|
||||||
|
|
||||||
|
// Filter requesters based on search query
|
||||||
|
const filteredRequesters = requesterList.filter(
|
||||||
|
(r) =>
|
||||||
|
r.label.toLowerCase().includes(searchQuery.toLowerCase()) ||
|
||||||
|
r.value.toLowerCase().includes(searchQuery.toLowerCase()),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Group filtered requesters by category
|
||||||
|
const groupedRequesters = {
|
||||||
|
builtin: filteredRequesters.filter((r) => r.category === 'builtin'),
|
||||||
|
manufacturer: filteredRequesters.filter(
|
||||||
|
(r) => r.category === 'manufacturer',
|
||||||
|
),
|
||||||
|
maas: filteredRequesters.filter((r) => r.category === 'maas'),
|
||||||
|
'self-hosted': filteredRequesters.filter(
|
||||||
|
(r) => r.category === 'self-hosted',
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
const categoryLabels: Record<string, string> = {
|
||||||
|
builtin: t('models.builtin'),
|
||||||
|
manufacturer: t('models.modelManufacturer'),
|
||||||
|
maas: t('models.aggregationPlatform'),
|
||||||
|
'self-hosted': t('models.selfDeployed'),
|
||||||
|
};
|
||||||
|
|
||||||
async function handleFormSubmit(values: z.infer<typeof formSchema>) {
|
async function handleFormSubmit(values: z.infer<typeof formSchema>) {
|
||||||
const data = {
|
const data = {
|
||||||
@@ -168,17 +217,16 @@ export default function ProviderForm({
|
|||||||
{t('models.requester')}
|
{t('models.requester')}
|
||||||
<span className="text-red-500">*</span>
|
<span className="text-red-500">*</span>
|
||||||
</FormLabel>
|
</FormLabel>
|
||||||
<Select
|
<div ref={dropdownRef} className="relative">
|
||||||
onValueChange={(v) => {
|
{/* Trigger button */}
|
||||||
field.onChange(v);
|
<button
|
||||||
const req = requesterList.find((r) => r.value === v);
|
type="button"
|
||||||
if (req && (!providerId || !form.getValues('base_url'))) {
|
onClick={() => setIsOpen(!isOpen)}
|
||||||
form.setValue('base_url', req.defaultUrl);
|
className={cn(
|
||||||
}
|
'flex h-10 w-full items-center justify-between rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50',
|
||||||
}}
|
isOpen && 'ring-2 ring-ring ring-offset-2',
|
||||||
value={field.value}
|
)}
|
||||||
>
|
>
|
||||||
<SelectTrigger className="bg-background">
|
|
||||||
{selectedRequester ? (
|
{selectedRequester ? (
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
<img
|
<img
|
||||||
@@ -191,17 +239,74 @@ export default function ProviderForm({
|
|||||||
<span>{selectedRequester.label}</span>
|
<span>{selectedRequester.label}</span>
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<SelectValue placeholder={t('models.selectRequester')} />
|
<span className="text-muted-foreground">
|
||||||
|
{t('models.selectRequester')}
|
||||||
|
</span>
|
||||||
)}
|
)}
|
||||||
</SelectTrigger>
|
<ChevronDown
|
||||||
<SelectContent>
|
className={cn(
|
||||||
<SelectGroup>
|
'h-4 w-4 opacity-50 transition-transform',
|
||||||
<SelectLabel>{t('models.builtin')}</SelectLabel>
|
isOpen && 'rotate-180',
|
||||||
{requesterList
|
)}
|
||||||
.filter((r) => r.category === 'builtin')
|
/>
|
||||||
.map((r) => (
|
</button>
|
||||||
<SelectItem key={r.value} value={r.value}>
|
|
||||||
<div className="flex items-center gap-2">
|
{/* Dropdown */}
|
||||||
|
{isOpen && (
|
||||||
|
<div className="absolute z-50 mt-1 w-full rounded-md border bg-popover text-popover-foreground shadow-md animate-in fade-in-0 zoom-in-95">
|
||||||
|
{/* Search input */}
|
||||||
|
<div className="flex items-center border-b px-3">
|
||||||
|
<Search className="mr-2 h-4 w-4 shrink-0 opacity-50" />
|
||||||
|
<input
|
||||||
|
ref={searchInputRef}
|
||||||
|
type="text"
|
||||||
|
placeholder={
|
||||||
|
t('models.searchProviders') || 'Search providers...'
|
||||||
|
}
|
||||||
|
value={searchQuery}
|
||||||
|
onChange={(e) => setSearchQuery(e.target.value)}
|
||||||
|
className="flex h-10 w-full rounded-md bg-transparent py-3 text-sm outline-none placeholder:text-muted-foreground"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Options list */}
|
||||||
|
<div className="max-h-[300px] overflow-y-auto p-1">
|
||||||
|
{Object.entries(groupedRequesters).map(
|
||||||
|
([category, items]) => {
|
||||||
|
if (items.length === 0) return null;
|
||||||
|
return (
|
||||||
|
<div key={category}>
|
||||||
|
<div className="py-1.5 px-2 text-xs font-semibold text-muted-foreground">
|
||||||
|
{categoryLabels[category]}
|
||||||
|
</div>
|
||||||
|
{items.map((r) => (
|
||||||
|
<button
|
||||||
|
key={r.value}
|
||||||
|
type="button"
|
||||||
|
onClick={() => {
|
||||||
|
field.onChange(r.value);
|
||||||
|
const req = requesterList.find(
|
||||||
|
(req) => req.value === r.value,
|
||||||
|
);
|
||||||
|
if (
|
||||||
|
req &&
|
||||||
|
(!providerId ||
|
||||||
|
!form.getValues('base_url'))
|
||||||
|
) {
|
||||||
|
form.setValue(
|
||||||
|
'base_url',
|
||||||
|
req.defaultUrl,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
setIsOpen(false);
|
||||||
|
setSearchQuery('');
|
||||||
|
}}
|
||||||
|
className={cn(
|
||||||
|
'flex w-full items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-none hover:bg-accent hover:text-accent-foreground cursor-pointer',
|
||||||
|
field.value === r.value &&
|
||||||
|
'bg-accent text-accent-foreground',
|
||||||
|
)}
|
||||||
|
>
|
||||||
<img
|
<img
|
||||||
src={httpClient.getProviderRequesterIconURL(
|
src={httpClient.getProviderRequesterIconURL(
|
||||||
r.value,
|
r.value,
|
||||||
@@ -209,72 +314,27 @@ export default function ProviderForm({
|
|||||||
alt={r.label}
|
alt={r.label}
|
||||||
className="h-5 w-5 rounded"
|
className="h-5 w-5 rounded"
|
||||||
/>
|
/>
|
||||||
<span>{r.label}</span>
|
<span className="flex-1 text-left">
|
||||||
</div>
|
{r.label}
|
||||||
</SelectItem>
|
</span>
|
||||||
))}
|
{field.value === r.value && (
|
||||||
</SelectGroup>
|
<Check className="h-4 w-4" />
|
||||||
<SelectGroup>
|
|
||||||
<SelectLabel>{t('models.modelManufacturer')}</SelectLabel>
|
|
||||||
{requesterList
|
|
||||||
.filter((r) => r.category === 'manufacturer')
|
|
||||||
.map((r) => (
|
|
||||||
<SelectItem key={r.value} value={r.value}>
|
|
||||||
<div className="flex items-center gap-2">
|
|
||||||
<img
|
|
||||||
src={httpClient.getProviderRequesterIconURL(
|
|
||||||
r.value,
|
|
||||||
)}
|
)}
|
||||||
alt={r.label}
|
</button>
|
||||||
className="h-5 w-5 rounded"
|
|
||||||
/>
|
|
||||||
<span>{r.label}</span>
|
|
||||||
</div>
|
|
||||||
</SelectItem>
|
|
||||||
))}
|
))}
|
||||||
</SelectGroup>
|
</div>
|
||||||
<SelectGroup>
|
);
|
||||||
<SelectLabel>
|
},
|
||||||
{t('models.aggregationPlatform')}
|
|
||||||
</SelectLabel>
|
|
||||||
{requesterList
|
|
||||||
.filter((r) => r.category === 'maas')
|
|
||||||
.map((r) => (
|
|
||||||
<SelectItem key={r.value} value={r.value}>
|
|
||||||
<div className="flex items-center gap-2">
|
|
||||||
<img
|
|
||||||
src={httpClient.getProviderRequesterIconURL(
|
|
||||||
r.value,
|
|
||||||
)}
|
)}
|
||||||
alt={r.label}
|
{filteredRequesters.length === 0 && (
|
||||||
className="h-5 w-5 rounded"
|
<div className="py-6 text-center text-sm text-muted-foreground">
|
||||||
/>
|
No results found.
|
||||||
<span>{r.label}</span>
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</SelectItem>
|
|
||||||
))}
|
|
||||||
</SelectGroup>
|
|
||||||
<SelectGroup>
|
|
||||||
<SelectLabel>{t('models.selfDeployed')}</SelectLabel>
|
|
||||||
{requesterList
|
|
||||||
.filter((r) => r.category === 'self-hosted')
|
|
||||||
.map((r) => (
|
|
||||||
<SelectItem key={r.value} value={r.value}>
|
|
||||||
<div className="flex items-center gap-2">
|
|
||||||
<img
|
|
||||||
src={httpClient.getProviderRequesterIconURL(
|
|
||||||
r.value,
|
|
||||||
)}
|
)}
|
||||||
alt={r.label}
|
|
||||||
className="h-5 w-5 rounded"
|
|
||||||
/>
|
|
||||||
<span>{r.label}</span>
|
|
||||||
</div>
|
</div>
|
||||||
</SelectItem>
|
|
||||||
))}
|
|
||||||
</SelectGroup>
|
|
||||||
</SelectContent>
|
|
||||||
</Select>
|
|
||||||
<FormMessage />
|
<FormMessage />
|
||||||
{selectedRequester?.description && (
|
{selectedRequester?.description && (
|
||||||
<p className="text-sm text-muted-foreground">
|
<p className="text-sm text-muted-foreground">
|
||||||
|
|||||||
@@ -130,32 +130,6 @@ export default function AddModelPopover({
|
|||||||
setScanLoading(true);
|
setScanLoading(true);
|
||||||
try {
|
try {
|
||||||
const result = await onScanModels(trigger ? undefined : tab);
|
const result = await onScanModels(trigger ? undefined : tab);
|
||||||
|
|
||||||
const debugData = (
|
|
||||||
result.debug?.response as { data?: Record<string, unknown>[] }
|
|
||||||
)?.data;
|
|
||||||
if (Array.isArray(debugData)) {
|
|
||||||
const debugMap = new Map<string, Record<string, unknown>>();
|
|
||||||
for (const item of debugData) {
|
|
||||||
if (typeof item?.id === 'string') {
|
|
||||||
debugMap.set(item.id, item);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (const model of result.models) {
|
|
||||||
const debugItem = debugMap.get(model.id);
|
|
||||||
if (!debugItem) continue;
|
|
||||||
const features = debugItem.features as
|
|
||||||
| Record<string, unknown>
|
|
||||||
| undefined;
|
|
||||||
const tools = features?.tools as Record<string, unknown> | undefined;
|
|
||||||
if (tools?.function_calling === true) {
|
|
||||||
const nextAbilities = new Set(model.abilities || []);
|
|
||||||
nextAbilities.add('func_call');
|
|
||||||
model.abilities = [...nextAbilities];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
setScannedModels(result.models);
|
setScannedModels(result.models);
|
||||||
setSelectedScannedModels({});
|
setSelectedScannedModels({});
|
||||||
} finally {
|
} finally {
|
||||||
|
|||||||
462
web/src/app/home/monitoring/components/TokenMonitoring.tsx
Normal file
@@ -0,0 +1,462 @@
|
|||||||
|
import React, { useEffect, useMemo, useState, useCallback } from 'react';
|
||||||
|
import { useTranslation } from 'react-i18next';
|
||||||
|
import {
|
||||||
|
ComposedChart,
|
||||||
|
Area,
|
||||||
|
Bar,
|
||||||
|
XAxis,
|
||||||
|
YAxis,
|
||||||
|
CartesianGrid,
|
||||||
|
Tooltip,
|
||||||
|
ResponsiveContainer,
|
||||||
|
Legend,
|
||||||
|
} from 'recharts';
|
||||||
|
import {
|
||||||
|
Coins,
|
||||||
|
ArrowDownToLine,
|
||||||
|
ArrowUpFromLine,
|
||||||
|
Gauge,
|
||||||
|
AlertTriangle,
|
||||||
|
TrendingUp,
|
||||||
|
} from 'lucide-react';
|
||||||
|
import { httpClient } from '@/app/infra/http/HttpClient';
|
||||||
|
|
||||||
|
interface TokenSummary {
|
||||||
|
total_calls: number;
|
||||||
|
success_calls: number;
|
||||||
|
error_calls: number;
|
||||||
|
total_input_tokens: number;
|
||||||
|
total_output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
total_cost: number;
|
||||||
|
avg_tokens_per_call: number;
|
||||||
|
avg_duration_ms: number;
|
||||||
|
avg_tokens_per_second: number;
|
||||||
|
zero_token_success_calls: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TokenByModel {
|
||||||
|
model_name: string;
|
||||||
|
calls: number;
|
||||||
|
error_calls: number;
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
cost: number;
|
||||||
|
avg_tokens_per_call: number;
|
||||||
|
avg_duration_ms: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TokenTimeseriesPoint {
|
||||||
|
bucket: string;
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
calls: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TokenStatistics {
|
||||||
|
summary: TokenSummary;
|
||||||
|
by_model: TokenByModel[];
|
||||||
|
timeseries: TokenTimeseriesPoint[];
|
||||||
|
bucket: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TokenMonitoringProps {
|
||||||
|
botIds?: string[];
|
||||||
|
pipelineIds?: string[];
|
||||||
|
startTime?: string;
|
||||||
|
endTime?: string;
|
||||||
|
/** Bumped by the parent to trigger a refetch on manual refresh. */
|
||||||
|
refreshKey?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatNumber(n: number): string {
|
||||||
|
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(2)}M`;
|
||||||
|
if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
|
||||||
|
return n.toLocaleString();
|
||||||
|
}
|
||||||
|
|
||||||
|
const TOOLTIP_STYLE: React.CSSProperties = {
|
||||||
|
backgroundColor: 'var(--card)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
borderRadius: '12px',
|
||||||
|
boxShadow:
|
||||||
|
'0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1)',
|
||||||
|
fontSize: '13px',
|
||||||
|
padding: '12px',
|
||||||
|
color: 'var(--foreground)',
|
||||||
|
};
|
||||||
|
|
||||||
|
function MetricTile({
|
||||||
|
icon,
|
||||||
|
label,
|
||||||
|
value,
|
||||||
|
sub,
|
||||||
|
accent,
|
||||||
|
}: {
|
||||||
|
icon: React.ReactNode;
|
||||||
|
label: string;
|
||||||
|
value: string;
|
||||||
|
sub?: string;
|
||||||
|
accent?: string;
|
||||||
|
}) {
|
||||||
|
return (
|
||||||
|
<div className="bg-card rounded-xl border p-4 flex flex-col gap-2">
|
||||||
|
<div className="flex items-center gap-2 text-muted-foreground text-sm">
|
||||||
|
<span
|
||||||
|
className="flex items-center justify-center h-7 w-7 rounded-lg"
|
||||||
|
style={{
|
||||||
|
backgroundColor: accent ? `${accent}1a` : 'var(--muted)',
|
||||||
|
color: accent || 'var(--foreground)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{icon}
|
||||||
|
</span>
|
||||||
|
{label}
|
||||||
|
</div>
|
||||||
|
<div className="text-2xl font-semibold text-foreground tabular-nums">
|
||||||
|
{value}
|
||||||
|
</div>
|
||||||
|
{sub && <div className="text-xs text-muted-foreground">{sub}</div>}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function TokenMonitoring({
|
||||||
|
botIds,
|
||||||
|
pipelineIds,
|
||||||
|
startTime,
|
||||||
|
endTime,
|
||||||
|
refreshKey,
|
||||||
|
}: TokenMonitoringProps) {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
const [bucket, setBucket] = useState<'hour' | 'day'>('hour');
|
||||||
|
const [stats, setStats] = useState<TokenStatistics | null>(null);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
|
const botIdsKey = JSON.stringify(botIds);
|
||||||
|
const pipelineIdsKey = JSON.stringify(pipelineIds);
|
||||||
|
|
||||||
|
const fetchStats = useCallback(async () => {
|
||||||
|
setLoading(true);
|
||||||
|
setError(null);
|
||||||
|
try {
|
||||||
|
const result = await httpClient.getTokenStatistics({
|
||||||
|
botId: botIds,
|
||||||
|
pipelineId: pipelineIds,
|
||||||
|
startTime,
|
||||||
|
endTime,
|
||||||
|
bucket,
|
||||||
|
});
|
||||||
|
setStats(result);
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : String(e));
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [botIdsKey, pipelineIdsKey, startTime, endTime, bucket, refreshKey]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchStats();
|
||||||
|
}, [fetchStats]);
|
||||||
|
|
||||||
|
const chartData = useMemo(() => {
|
||||||
|
if (!stats) return [];
|
||||||
|
return stats.timeseries.map((p) => ({
|
||||||
|
bucket: p.bucket,
|
||||||
|
input: p.input_tokens,
|
||||||
|
output: p.output_tokens,
|
||||||
|
total: p.total_tokens,
|
||||||
|
}));
|
||||||
|
}, [stats]);
|
||||||
|
|
||||||
|
if (loading) {
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-4">
|
||||||
|
{Array.from({ length: 6 }).map((_, i) => (
|
||||||
|
<div
|
||||||
|
key={i}
|
||||||
|
className="bg-card rounded-xl border p-4 h-24 animate-pulse"
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
<div className="bg-card rounded-xl border p-6 h-[320px] animate-pulse" />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
return (
|
||||||
|
<div className="bg-card rounded-xl border p-6 text-sm text-destructive flex items-center gap-2">
|
||||||
|
<AlertTriangle className="h-4 w-4" />
|
||||||
|
{t('monitoring.tokens.loadError', { error })}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!stats || stats.summary.total_calls === 0) {
|
||||||
|
return (
|
||||||
|
<div className="bg-card rounded-xl border p-6">
|
||||||
|
<div className="h-[260px] flex flex-col items-center justify-center text-muted-foreground gap-2">
|
||||||
|
<Coins className="h-[3rem] w-[3rem]" />
|
||||||
|
<div className="text-sm">{t('monitoring.tokens.noData')}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { summary, by_model } = stats;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-6">
|
||||||
|
{/* Data-quality warning: streamed calls that recorded 0 tokens */}
|
||||||
|
{summary.zero_token_success_calls > 0 && (
|
||||||
|
<div className="bg-amber-500/10 border border-amber-500/30 text-amber-700 dark:text-amber-400 rounded-xl p-4 text-sm flex items-start gap-2">
|
||||||
|
<AlertTriangle className="h-4 w-4 mt-0.5 shrink-0" />
|
||||||
|
<span>
|
||||||
|
{t('monitoring.tokens.zeroTokenWarning', {
|
||||||
|
count: summary.zero_token_success_calls,
|
||||||
|
})}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Summary tiles */}
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-4">
|
||||||
|
<MetricTile
|
||||||
|
icon={<Coins className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.totalTokens')}
|
||||||
|
value={formatNumber(summary.total_tokens)}
|
||||||
|
sub={t('monitoring.tokens.acrossCalls', {
|
||||||
|
count: summary.total_calls,
|
||||||
|
})}
|
||||||
|
accent="#8b5cf6"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<ArrowDownToLine className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.inputTokens')}
|
||||||
|
value={formatNumber(summary.total_input_tokens)}
|
||||||
|
accent="#3b82f6"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<ArrowUpFromLine className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.outputTokens')}
|
||||||
|
value={formatNumber(summary.total_output_tokens)}
|
||||||
|
accent="#10b981"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<TrendingUp className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.avgPerCall')}
|
||||||
|
value={formatNumber(summary.avg_tokens_per_call)}
|
||||||
|
accent="#f59e0b"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<Gauge className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.throughput')}
|
||||||
|
value={`${summary.avg_tokens_per_second}`}
|
||||||
|
sub={t('monitoring.tokens.tokensPerSec')}
|
||||||
|
accent="#06b6d4"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<AlertTriangle className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.errorCalls')}
|
||||||
|
value={`${summary.error_calls}`}
|
||||||
|
sub={t('monitoring.tokens.ofTotal', { count: summary.total_calls })}
|
||||||
|
accent="#ef4444"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Token usage over time */}
|
||||||
|
<div className="bg-card rounded-xl border p-6">
|
||||||
|
<div className="flex items-center justify-between mb-6">
|
||||||
|
<h3 className="text-base font-semibold text-foreground">
|
||||||
|
{t('monitoring.tokens.usageOverTime')}
|
||||||
|
</h3>
|
||||||
|
<div className="inline-flex rounded-lg border p-0.5 text-sm">
|
||||||
|
{(['hour', 'day'] as const).map((b) => (
|
||||||
|
<button
|
||||||
|
key={b}
|
||||||
|
onClick={() => setBucket(b)}
|
||||||
|
className={`px-3 py-1 rounded-md transition-colors ${
|
||||||
|
bucket === b
|
||||||
|
? 'bg-primary text-primary-foreground'
|
||||||
|
: 'text-muted-foreground hover:text-foreground'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{t(`monitoring.tokens.bucket.${b}`)}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="h-[320px]">
|
||||||
|
<ResponsiveContainer width="100%" height="100%">
|
||||||
|
<ComposedChart
|
||||||
|
data={chartData}
|
||||||
|
margin={{ top: 10, right: 20, left: 0, bottom: 0 }}
|
||||||
|
>
|
||||||
|
<defs>
|
||||||
|
<linearGradient id="tokTotal" x1="0" y1="0" x2="0" y2="1">
|
||||||
|
<stop offset="5%" stopColor="#8b5cf6" stopOpacity={0.35} />
|
||||||
|
<stop offset="95%" stopColor="#8b5cf6" stopOpacity={0.03} />
|
||||||
|
</linearGradient>
|
||||||
|
</defs>
|
||||||
|
<CartesianGrid
|
||||||
|
strokeDasharray="3 3"
|
||||||
|
stroke="var(--border)"
|
||||||
|
vertical={false}
|
||||||
|
/>
|
||||||
|
<XAxis
|
||||||
|
dataKey="bucket"
|
||||||
|
tick={{ fontSize: 12, fill: 'var(--muted-foreground)' }}
|
||||||
|
tickLine={false}
|
||||||
|
axisLine={{ stroke: 'var(--border)' }}
|
||||||
|
dy={10}
|
||||||
|
/>
|
||||||
|
<YAxis
|
||||||
|
tick={{ fontSize: 12, fill: 'var(--muted-foreground)' }}
|
||||||
|
tickLine={false}
|
||||||
|
axisLine={{ stroke: 'var(--border)' }}
|
||||||
|
width={48}
|
||||||
|
tickFormatter={(v) => formatNumber(Number(v))}
|
||||||
|
/>
|
||||||
|
<Tooltip
|
||||||
|
contentStyle={TOOLTIP_STYLE}
|
||||||
|
labelStyle={{
|
||||||
|
fontWeight: 600,
|
||||||
|
marginBottom: '8px',
|
||||||
|
color: 'var(--foreground)',
|
||||||
|
}}
|
||||||
|
formatter={(value: number) => formatNumber(Number(value))}
|
||||||
|
/>
|
||||||
|
<Legend
|
||||||
|
wrapperStyle={{
|
||||||
|
fontSize: '13px',
|
||||||
|
paddingTop: '16px',
|
||||||
|
fontWeight: 500,
|
||||||
|
}}
|
||||||
|
iconType="circle"
|
||||||
|
iconSize={10}
|
||||||
|
/>
|
||||||
|
<Bar
|
||||||
|
dataKey="input"
|
||||||
|
name={t('monitoring.tokens.inputTokens')}
|
||||||
|
stackId="io"
|
||||||
|
fill="#3b82f6"
|
||||||
|
radius={[0, 0, 0, 0]}
|
||||||
|
barSize={18}
|
||||||
|
/>
|
||||||
|
<Bar
|
||||||
|
dataKey="output"
|
||||||
|
name={t('monitoring.tokens.outputTokens')}
|
||||||
|
stackId="io"
|
||||||
|
fill="#10b981"
|
||||||
|
radius={[4, 4, 0, 0]}
|
||||||
|
barSize={18}
|
||||||
|
/>
|
||||||
|
<Area
|
||||||
|
type="monotone"
|
||||||
|
dataKey="total"
|
||||||
|
name={t('monitoring.tokens.totalTokens')}
|
||||||
|
stroke="#8b5cf6"
|
||||||
|
strokeWidth={2.5}
|
||||||
|
fill="url(#tokTotal)"
|
||||||
|
dot={false}
|
||||||
|
activeDot={{ r: 5, strokeWidth: 2 }}
|
||||||
|
/>
|
||||||
|
</ComposedChart>
|
||||||
|
</ResponsiveContainer>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Per-model breakdown */}
|
||||||
|
<div className="bg-card rounded-xl border p-6">
|
||||||
|
<h3 className="text-base font-semibold text-foreground mb-4">
|
||||||
|
{t('monitoring.tokens.byModel')}
|
||||||
|
</h3>
|
||||||
|
<div className="overflow-x-auto">
|
||||||
|
<table className="w-full text-sm">
|
||||||
|
<thead>
|
||||||
|
<tr className="text-left text-muted-foreground border-b">
|
||||||
|
<th className="py-2 pr-4 font-medium">
|
||||||
|
{t('monitoring.tokens.model')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.calls')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.inputTokens')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.outputTokens')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.totalTokens')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.avgPerCall')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 pl-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.avgLatency')}
|
||||||
|
</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{by_model.map((m) => {
|
||||||
|
const share =
|
||||||
|
summary.total_tokens > 0
|
||||||
|
? (m.total_tokens / summary.total_tokens) * 100
|
||||||
|
: 0;
|
||||||
|
return (
|
||||||
|
<tr
|
||||||
|
key={m.model_name}
|
||||||
|
className="border-b last:border-0 hover:bg-muted/40 transition-colors"
|
||||||
|
>
|
||||||
|
<td className="py-2.5 pr-4">
|
||||||
|
<div className="font-medium text-foreground">
|
||||||
|
{m.model_name}
|
||||||
|
</div>
|
||||||
|
<div className="mt-1 h-1.5 w-32 rounded-full bg-muted overflow-hidden">
|
||||||
|
<div
|
||||||
|
className="h-full rounded-full bg-violet-500"
|
||||||
|
style={{ width: `${share}%` }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums">
|
||||||
|
{m.calls}
|
||||||
|
{m.error_calls > 0 && (
|
||||||
|
<span className="text-destructive">
|
||||||
|
{' '}
|
||||||
|
({m.error_calls}✕)
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums">
|
||||||
|
{formatNumber(m.input_tokens)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums">
|
||||||
|
{formatNumber(m.output_tokens)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums font-medium">
|
||||||
|
{formatNumber(m.total_tokens)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums">
|
||||||
|
{formatNumber(m.avg_tokens_per_call)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 pl-4 text-right tabular-nums">
|
||||||
|
{m.avg_duration_ms}ms
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -13,6 +13,7 @@ import {
|
|||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
import OverviewCards from './components/overview-cards/OverviewCards';
|
import OverviewCards from './components/overview-cards/OverviewCards';
|
||||||
import MonitoringFilters from './components/filters/MonitoringFilters';
|
import MonitoringFilters from './components/filters/MonitoringFilters';
|
||||||
|
import TokenMonitoring from './components/TokenMonitoring';
|
||||||
import { ExportDropdown } from './components/ExportDropdown';
|
import { ExportDropdown } from './components/ExportDropdown';
|
||||||
import { useMonitoringFilters } from './hooks/useMonitoringFilters';
|
import { useMonitoringFilters } from './hooks/useMonitoringFilters';
|
||||||
import { useMonitoringData } from './hooks/useMonitoringData';
|
import { useMonitoringData } from './hooks/useMonitoringData';
|
||||||
@@ -319,6 +320,9 @@ function MonitoringPageContent() {
|
|||||||
<TabsTrigger value="modelCalls" className="px-6 py-2">
|
<TabsTrigger value="modelCalls" className="px-6 py-2">
|
||||||
{t('monitoring.tabs.modelCalls')}
|
{t('monitoring.tabs.modelCalls')}
|
||||||
</TabsTrigger>
|
</TabsTrigger>
|
||||||
|
<TabsTrigger value="tokens" className="px-6 py-2">
|
||||||
|
{t('monitoring.tabs.tokens')}
|
||||||
|
</TabsTrigger>
|
||||||
<TabsTrigger value="feedback" className="px-6 py-2">
|
<TabsTrigger value="feedback" className="px-6 py-2">
|
||||||
{t('monitoring.tabs.feedback')}
|
{t('monitoring.tabs.feedback')}
|
||||||
</TabsTrigger>
|
</TabsTrigger>
|
||||||
@@ -668,6 +672,24 @@ function MonitoringPageContent() {
|
|||||||
</div>
|
</div>
|
||||||
</TabsContent>
|
</TabsContent>
|
||||||
|
|
||||||
|
<TabsContent value="tokens" className="p-6 m-0">
|
||||||
|
<TokenMonitoring
|
||||||
|
botIds={
|
||||||
|
filterState.selectedBots.length > 0
|
||||||
|
? filterState.selectedBots
|
||||||
|
: undefined
|
||||||
|
}
|
||||||
|
pipelineIds={
|
||||||
|
filterState.selectedPipelines.length > 0
|
||||||
|
? filterState.selectedPipelines
|
||||||
|
: undefined
|
||||||
|
}
|
||||||
|
startTime={feedbackTimeRange.startTime}
|
||||||
|
endTime={feedbackTimeRange.endTime}
|
||||||
|
refreshKey={feedbackRefreshKey}
|
||||||
|
/>
|
||||||
|
</TabsContent>
|
||||||
|
|
||||||
<TabsContent value="feedback" className="p-6 m-0">
|
<TabsContent value="feedback" className="p-6 m-0">
|
||||||
<div>
|
<div>
|
||||||
{loading && (
|
{loading && (
|
||||||
|
|||||||
@@ -1224,6 +1224,68 @@ export class BackendClient extends BaseHttpClient {
|
|||||||
return this.get(`/api/v1/monitoring/overview?${queryParams.toString()}`);
|
return this.get(`/api/v1/monitoring/overview?${queryParams.toString()}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public getTokenStatistics(params: {
|
||||||
|
botId?: string[];
|
||||||
|
pipelineId?: string[];
|
||||||
|
startTime?: string;
|
||||||
|
endTime?: string;
|
||||||
|
bucket?: 'hour' | 'day';
|
||||||
|
}): Promise<{
|
||||||
|
summary: {
|
||||||
|
total_calls: number;
|
||||||
|
success_calls: number;
|
||||||
|
error_calls: number;
|
||||||
|
total_input_tokens: number;
|
||||||
|
total_output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
total_cost: number;
|
||||||
|
avg_tokens_per_call: number;
|
||||||
|
avg_duration_ms: number;
|
||||||
|
avg_tokens_per_second: number;
|
||||||
|
zero_token_success_calls: number;
|
||||||
|
};
|
||||||
|
by_model: Array<{
|
||||||
|
model_name: string;
|
||||||
|
calls: number;
|
||||||
|
error_calls: number;
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
cost: number;
|
||||||
|
avg_tokens_per_call: number;
|
||||||
|
avg_duration_ms: number;
|
||||||
|
}>;
|
||||||
|
timeseries: Array<{
|
||||||
|
bucket: string;
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
calls: number;
|
||||||
|
}>;
|
||||||
|
bucket: string;
|
||||||
|
}> {
|
||||||
|
const queryParams = new URLSearchParams();
|
||||||
|
if (params.botId) {
|
||||||
|
params.botId.forEach((id) => queryParams.append('botId', id));
|
||||||
|
}
|
||||||
|
if (params.pipelineId) {
|
||||||
|
params.pipelineId.forEach((id) => queryParams.append('pipelineId', id));
|
||||||
|
}
|
||||||
|
if (params.startTime) {
|
||||||
|
queryParams.append('startTime', params.startTime);
|
||||||
|
}
|
||||||
|
if (params.endTime) {
|
||||||
|
queryParams.append('endTime', params.endTime);
|
||||||
|
}
|
||||||
|
if (params.bucket) {
|
||||||
|
queryParams.append('bucket', params.bucket);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.get(
|
||||||
|
`/api/v1/monitoring/token-statistics?${queryParams.toString()}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// ============ Survey API ============
|
// ============ Survey API ============
|
||||||
public getSurveyPending(): Promise<{
|
public getSurveyPending(): Promise<{
|
||||||
survey: {
|
survey: {
|
||||||
|
|||||||
@@ -258,6 +258,7 @@ const enUS = {
|
|||||||
selectProvider: 'Select Provider',
|
selectProvider: 'Select Provider',
|
||||||
requester: 'Provider Type',
|
requester: 'Provider Type',
|
||||||
selectRequester: 'Select Provider Type',
|
selectRequester: 'Select Provider Type',
|
||||||
|
searchProviders: 'Search providers...',
|
||||||
langbotModelsDescription: 'Cloud models powered by LangBot Space',
|
langbotModelsDescription: 'Cloud models powered by LangBot Space',
|
||||||
credits: 'Credits',
|
credits: 'Credits',
|
||||||
loginWithSpace: 'Login with Space',
|
loginWithSpace: 'Login with Space',
|
||||||
@@ -1195,6 +1196,7 @@ const enUS = {
|
|||||||
llmCalls: 'LLM Calls',
|
llmCalls: 'LLM Calls',
|
||||||
embeddingCalls: 'Embedding Calls',
|
embeddingCalls: 'Embedding Calls',
|
||||||
modelCalls: 'Model Calls',
|
modelCalls: 'Model Calls',
|
||||||
|
tokens: 'Token Monitoring',
|
||||||
feedback: 'User Feedback',
|
feedback: 'User Feedback',
|
||||||
sessions: 'Session Analysis',
|
sessions: 'Session Analysis',
|
||||||
errors: 'Error Logs',
|
errors: 'Error Logs',
|
||||||
@@ -1233,6 +1235,30 @@ const enUS = {
|
|||||||
avgDuration: 'Avg Duration',
|
avgDuration: 'Avg Duration',
|
||||||
calls: 'Calls',
|
calls: 'Calls',
|
||||||
},
|
},
|
||||||
|
tokens: {
|
||||||
|
totalTokens: 'Total Tokens',
|
||||||
|
inputTokens: 'Input Tokens',
|
||||||
|
outputTokens: 'Output Tokens',
|
||||||
|
avgPerCall: 'Avg / Call',
|
||||||
|
throughput: 'Throughput',
|
||||||
|
tokensPerSec: 'tokens/sec',
|
||||||
|
errorCalls: 'Failed Calls',
|
||||||
|
acrossCalls: 'across {{count}} calls',
|
||||||
|
ofTotal: 'of {{count}} total',
|
||||||
|
usageOverTime: 'Token Usage Over Time',
|
||||||
|
byModel: 'By Model',
|
||||||
|
model: 'Model',
|
||||||
|
calls: 'Calls',
|
||||||
|
avgLatency: 'Avg Latency',
|
||||||
|
noData: 'No token usage in the selected time range',
|
||||||
|
loadError: 'Failed to load token statistics: {{error}}',
|
||||||
|
zeroTokenWarning:
|
||||||
|
'{{count}} successful call(s) reported zero token usage. This usually means the upstream provider did not return usage info — check the model provider configuration.',
|
||||||
|
bucket: {
|
||||||
|
hour: 'Hourly',
|
||||||
|
day: 'Daily',
|
||||||
|
},
|
||||||
|
},
|
||||||
embeddingCalls: {
|
embeddingCalls: {
|
||||||
title: 'Embedding Calls',
|
title: 'Embedding Calls',
|
||||||
model: 'Model',
|
model: 'Model',
|
||||||
|
|||||||
@@ -248,6 +248,7 @@ const zhHans = {
|
|||||||
selectProvider: '选择供应商',
|
selectProvider: '选择供应商',
|
||||||
requester: '供应商类型',
|
requester: '供应商类型',
|
||||||
selectRequester: '选择供应商类型',
|
selectRequester: '选择供应商类型',
|
||||||
|
searchProviders: '搜索供应商...',
|
||||||
langbotModelsDescription: 'LangBot Space 提供的云端模型',
|
langbotModelsDescription: 'LangBot Space 提供的云端模型',
|
||||||
credits: '积分',
|
credits: '积分',
|
||||||
loginWithSpace: '通过 Space 登录',
|
loginWithSpace: '通过 Space 登录',
|
||||||
@@ -1139,6 +1140,7 @@ const zhHans = {
|
|||||||
llmCalls: 'LLM调用',
|
llmCalls: 'LLM调用',
|
||||||
embeddingCalls: 'Embedding调用',
|
embeddingCalls: 'Embedding调用',
|
||||||
modelCalls: '模型调用',
|
modelCalls: '模型调用',
|
||||||
|
tokens: 'Token 监控',
|
||||||
feedback: '用户反馈',
|
feedback: '用户反馈',
|
||||||
sessions: '会话分析',
|
sessions: '会话分析',
|
||||||
errors: '错误日志',
|
errors: '错误日志',
|
||||||
@@ -1177,6 +1179,30 @@ const zhHans = {
|
|||||||
avgDuration: '平均耗时',
|
avgDuration: '平均耗时',
|
||||||
calls: '调用次数',
|
calls: '调用次数',
|
||||||
},
|
},
|
||||||
|
tokens: {
|
||||||
|
totalTokens: '总 Token 数',
|
||||||
|
inputTokens: '输入 Token',
|
||||||
|
outputTokens: '输出 Token',
|
||||||
|
avgPerCall: '平均每次调用',
|
||||||
|
throughput: '吞吐量',
|
||||||
|
tokensPerSec: 'Token/秒',
|
||||||
|
errorCalls: '失败调用',
|
||||||
|
acrossCalls: '共 {{count}} 次调用',
|
||||||
|
ofTotal: '共 {{count}} 次',
|
||||||
|
usageOverTime: 'Token 用量趋势',
|
||||||
|
byModel: '按模型统计',
|
||||||
|
model: '模型',
|
||||||
|
calls: '调用次数',
|
||||||
|
avgLatency: '平均延迟',
|
||||||
|
noData: '所选时间范围内暂无 Token 用量数据',
|
||||||
|
loadError: '加载 Token 统计失败:{{error}}',
|
||||||
|
zeroTokenWarning:
|
||||||
|
'检测到 {{count}} 次成功调用未上报 Token 用量(记为 0)。这通常表示上游未返回 usage 信息,请检查模型供应商配置。',
|
||||||
|
bucket: {
|
||||||
|
hour: '按小时',
|
||||||
|
day: '按天',
|
||||||
|
},
|
||||||
|
},
|
||||||
embeddingCalls: {
|
embeddingCalls: {
|
||||||
title: 'Embedding调用',
|
title: 'Embedding调用',
|
||||||
model: '模型',
|
model: '模型',
|
||||||
|
|||||||