mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-18 19:44:21 +00:00
fix(provider): capture streaming token usage; add token observability
The LiteLLM streaming requester only captured usage when a chunk had an empty `choices` list. Many OpenAI-compatible gateways (e.g. new-api) and providers send the final usage payload in a chunk that still carries an empty-delta choice, so streamed calls always recorded 0 tokens in the monitoring logs/dashboard (non-streaming worked). - Capture stream usage whenever a chunk carries it, regardless of choices - Add robust _normalize_usage (dict/obj shapes, derive missing total_tokens) - Register litellm in bootutils/deps.py (was in pyproject only) - Add MonitoringService.get_token_statistics + /monitoring/token-statistics endpoint: summary, per-model breakdown, token timeseries, and a zero-token-success data-quality signal - Add TokenMonitoring dashboard tab (summary tiles, stacked token chart, per-model table) + i18n (en/zh) - Regression tests for stream usage capture and usage normalization Verified end-to-end against a real OpenAI-compatible endpoint with gpt-5.5 and claude-opus-4-8: tokens now recorded non-zero for both streaming and non-streaming paths.
This commit is contained in:
@@ -46,6 +46,30 @@ class MonitoringRouterGroup(group.RouterGroup):
|
|||||||
|
|
||||||
return self.success(data=metrics)
|
return self.success(data=metrics)
|
||||||
|
|
||||||
|
@self.route('/token-statistics', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
|
||||||
|
async def get_token_statistics() -> str:
|
||||||
|
"""Get detailed token usage statistics (summary, per-model, timeseries)."""
|
||||||
|
bot_ids = quart.request.args.getlist('botId')
|
||||||
|
pipeline_ids = quart.request.args.getlist('pipelineId')
|
||||||
|
start_time_str = quart.request.args.get('startTime')
|
||||||
|
end_time_str = quart.request.args.get('endTime')
|
||||||
|
bucket = quart.request.args.get('bucket', 'hour')
|
||||||
|
if bucket not in ('hour', 'day'):
|
||||||
|
bucket = 'hour'
|
||||||
|
|
||||||
|
start_time = parse_iso_datetime(start_time_str)
|
||||||
|
end_time = parse_iso_datetime(end_time_str)
|
||||||
|
|
||||||
|
stats = await self.ap.monitoring_service.get_token_statistics(
|
||||||
|
bot_ids=bot_ids if bot_ids else None,
|
||||||
|
pipeline_ids=pipeline_ids if pipeline_ids else None,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
bucket=bucket,
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.success(data=stats)
|
||||||
|
|
||||||
@self.route('/messages', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
|
@self.route('/messages', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
|
||||||
async def get_messages() -> str:
|
async def get_messages() -> str:
|
||||||
"""Get message logs"""
|
"""Get message logs"""
|
||||||
|
|||||||
@@ -472,6 +472,185 @@ class MonitoringService:
|
|||||||
'active_sessions': active_sessions,
|
'active_sessions': active_sessions,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def get_token_statistics(
|
||||||
|
self,
|
||||||
|
bot_ids: list[str] | None = None,
|
||||||
|
pipeline_ids: list[str] | None = None,
|
||||||
|
start_time: datetime.datetime | None = None,
|
||||||
|
end_time: datetime.datetime | None = None,
|
||||||
|
bucket: str = 'hour',
|
||||||
|
) -> dict:
|
||||||
|
"""Get detailed token usage statistics for production observability.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- summary: aggregate token counters and call/latency stats over the window
|
||||||
|
- by_model: per-model token + call breakdown (sorted by total tokens desc)
|
||||||
|
- timeseries: token usage bucketed by `bucket` ('hour' or 'day')
|
||||||
|
|
||||||
|
Only successful LLM calls are counted toward token totals; error calls are
|
||||||
|
reported separately so a spike in failures is visible without polluting
|
||||||
|
token accounting.
|
||||||
|
"""
|
||||||
|
LLMCall = persistence_monitoring.MonitoringLLMCall
|
||||||
|
|
||||||
|
conditions = []
|
||||||
|
if bot_ids:
|
||||||
|
conditions.append(LLMCall.bot_id.in_(bot_ids))
|
||||||
|
if pipeline_ids:
|
||||||
|
conditions.append(LLMCall.pipeline_id.in_(pipeline_ids))
|
||||||
|
if start_time:
|
||||||
|
conditions.append(LLMCall.timestamp >= start_time)
|
||||||
|
if end_time:
|
||||||
|
conditions.append(LLMCall.timestamp <= end_time)
|
||||||
|
|
||||||
|
def _apply(query):
|
||||||
|
if conditions:
|
||||||
|
query = query.where(sqlalchemy.and_(*conditions))
|
||||||
|
return query
|
||||||
|
|
||||||
|
# ---- Summary aggregates ----
|
||||||
|
summary_query = _apply(
|
||||||
|
sqlalchemy.select(
|
||||||
|
sqlalchemy.func.count(LLMCall.id),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.input_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.output_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.total_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.duration), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.cost), 0.0),
|
||||||
|
sqlalchemy.func.sum(
|
||||||
|
sqlalchemy.case((LLMCall.status == 'success', 1), else_=0)
|
||||||
|
),
|
||||||
|
sqlalchemy.func.sum(
|
||||||
|
sqlalchemy.case((LLMCall.status == 'error', 1), else_=0)
|
||||||
|
),
|
||||||
|
# Count of successful calls that nonetheless recorded zero tokens —
|
||||||
|
# a data-quality signal that usage reporting may be broken upstream.
|
||||||
|
sqlalchemy.func.sum(
|
||||||
|
sqlalchemy.case(
|
||||||
|
(sqlalchemy.and_(LLMCall.status == 'success', LLMCall.total_tokens == 0), 1),
|
||||||
|
else_=0,
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
summary_result = await self.ap.persistence_mgr.execute_async(summary_query)
|
||||||
|
row = summary_result.first()
|
||||||
|
(
|
||||||
|
total_calls,
|
||||||
|
total_input_tokens,
|
||||||
|
total_output_tokens,
|
||||||
|
total_tokens,
|
||||||
|
total_duration,
|
||||||
|
total_cost,
|
||||||
|
success_calls,
|
||||||
|
error_calls,
|
||||||
|
zero_token_success_calls,
|
||||||
|
) = row if row else (0, 0, 0, 0, 0, 0.0, 0, 0, 0)
|
||||||
|
|
||||||
|
total_calls = total_calls or 0
|
||||||
|
success_calls = success_calls or 0
|
||||||
|
error_calls = error_calls or 0
|
||||||
|
zero_token_success_calls = zero_token_success_calls or 0
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
'total_calls': total_calls,
|
||||||
|
'success_calls': success_calls,
|
||||||
|
'error_calls': error_calls,
|
||||||
|
'total_input_tokens': int(total_input_tokens or 0),
|
||||||
|
'total_output_tokens': int(total_output_tokens or 0),
|
||||||
|
'total_tokens': int(total_tokens or 0),
|
||||||
|
'total_cost': round(float(total_cost or 0.0), 6),
|
||||||
|
'avg_tokens_per_call': int((total_tokens or 0) / total_calls) if total_calls > 0 else 0,
|
||||||
|
'avg_duration_ms': int((total_duration or 0) / total_calls) if total_calls > 0 else 0,
|
||||||
|
'avg_tokens_per_second': round((total_output_tokens or 0) / (total_duration / 1000), 2)
|
||||||
|
if total_duration and total_duration > 0
|
||||||
|
else 0,
|
||||||
|
'zero_token_success_calls': zero_token_success_calls,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---- Per-model breakdown ----
|
||||||
|
by_model_query = _apply(
|
||||||
|
sqlalchemy.select(
|
||||||
|
LLMCall.model_name,
|
||||||
|
sqlalchemy.func.count(LLMCall.id),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.input_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.output_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.total_tokens), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.duration), 0),
|
||||||
|
sqlalchemy.func.coalesce(sqlalchemy.func.sum(LLMCall.cost), 0.0),
|
||||||
|
sqlalchemy.func.sum(
|
||||||
|
sqlalchemy.case((LLMCall.status == 'error', 1), else_=0)
|
||||||
|
),
|
||||||
|
).group_by(LLMCall.model_name)
|
||||||
|
)
|
||||||
|
by_model_result = await self.ap.persistence_mgr.execute_async(by_model_query)
|
||||||
|
by_model = []
|
||||||
|
for mrow in by_model_result.all():
|
||||||
|
(
|
||||||
|
model_name,
|
||||||
|
m_calls,
|
||||||
|
m_in,
|
||||||
|
m_out,
|
||||||
|
m_total,
|
||||||
|
m_duration,
|
||||||
|
m_cost,
|
||||||
|
m_errors,
|
||||||
|
) = mrow
|
||||||
|
m_calls = m_calls or 0
|
||||||
|
by_model.append(
|
||||||
|
{
|
||||||
|
'model_name': model_name,
|
||||||
|
'calls': m_calls,
|
||||||
|
'error_calls': m_errors or 0,
|
||||||
|
'input_tokens': int(m_in or 0),
|
||||||
|
'output_tokens': int(m_out or 0),
|
||||||
|
'total_tokens': int(m_total or 0),
|
||||||
|
'cost': round(float(m_cost or 0.0), 6),
|
||||||
|
'avg_tokens_per_call': int((m_total or 0) / m_calls) if m_calls > 0 else 0,
|
||||||
|
'avg_duration_ms': int((m_duration or 0) / m_calls) if m_calls > 0 else 0,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
by_model.sort(key=lambda x: x['total_tokens'], reverse=True)
|
||||||
|
|
||||||
|
# ---- Time-bucketed series ----
|
||||||
|
# Use a DB-agnostic bucketing approach: fetch (timestamp, tokens) rows and
|
||||||
|
# aggregate in Python. The window is bounded by the time filter, so this is
|
||||||
|
# cheap for typical dashboard ranges (hours/days).
|
||||||
|
series_query = _apply(
|
||||||
|
sqlalchemy.select(
|
||||||
|
LLMCall.timestamp,
|
||||||
|
LLMCall.input_tokens,
|
||||||
|
LLMCall.output_tokens,
|
||||||
|
LLMCall.total_tokens,
|
||||||
|
).order_by(LLMCall.timestamp.asc())
|
||||||
|
)
|
||||||
|
series_result = await self.ap.persistence_mgr.execute_async(series_query)
|
||||||
|
|
||||||
|
bucket_fmt = '%Y-%m-%d %H:00' if bucket == 'hour' else '%Y-%m-%d'
|
||||||
|
buckets: dict[str, dict] = {}
|
||||||
|
for srow in series_result.all():
|
||||||
|
ts, s_in, s_out, s_total = srow
|
||||||
|
if ts is None:
|
||||||
|
continue
|
||||||
|
key = ts.strftime(bucket_fmt)
|
||||||
|
b = buckets.setdefault(
|
||||||
|
key,
|
||||||
|
{'bucket': key, 'input_tokens': 0, 'output_tokens': 0, 'total_tokens': 0, 'calls': 0},
|
||||||
|
)
|
||||||
|
b['input_tokens'] += int(s_in or 0)
|
||||||
|
b['output_tokens'] += int(s_out or 0)
|
||||||
|
b['total_tokens'] += int(s_total or 0)
|
||||||
|
b['calls'] += 1
|
||||||
|
|
||||||
|
timeseries = [buckets[k] for k in sorted(buckets.keys())]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'summary': summary,
|
||||||
|
'by_model': by_model,
|
||||||
|
'timeseries': timeseries,
|
||||||
|
'bucket': bucket,
|
||||||
|
}
|
||||||
|
|
||||||
async def get_messages(
|
async def get_messages(
|
||||||
self,
|
self,
|
||||||
bot_ids: list[str] | None = None,
|
bot_ids: list[str] | None = None,
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ required_deps = {
|
|||||||
'telegramify_markdown': 'telegramify-markdown',
|
'telegramify_markdown': 'telegramify-markdown',
|
||||||
'slack_sdk': 'slack_sdk',
|
'slack_sdk': 'slack_sdk',
|
||||||
'asyncpg': 'asyncpg',
|
'asyncpg': 'asyncpg',
|
||||||
|
'litellm': 'litellm',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -85,15 +85,42 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
|
|||||||
# because it's typically internal model reasoning, not user-visible thinking
|
# because it's typically internal model reasoning, not user-visible thinking
|
||||||
return content or ''
|
return content or ''
|
||||||
|
|
||||||
def _extract_usage(self, response) -> dict:
|
@staticmethod
|
||||||
"""Extract usage info from LiteLLM response."""
|
def _normalize_usage(usage: typing.Any) -> dict:
|
||||||
usage = response.usage
|
"""Normalize a LiteLLM/OpenAI usage object into a plain token dict.
|
||||||
|
|
||||||
|
Handles several real-world shapes returned by different upstreams:
|
||||||
|
- object with ``prompt_tokens`` / ``completion_tokens`` / ``total_tokens`` attrs
|
||||||
|
- dict with the same keys
|
||||||
|
- missing ``total_tokens`` (derived from prompt + completion)
|
||||||
|
- ``None`` / partially-populated usage (defaults to 0)
|
||||||
|
"""
|
||||||
|
if usage is None:
|
||||||
|
return {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}
|
||||||
|
|
||||||
|
def _get(key: str) -> typing.Any:
|
||||||
|
if isinstance(usage, dict):
|
||||||
|
return usage.get(key)
|
||||||
|
return getattr(usage, key, None)
|
||||||
|
|
||||||
|
prompt_tokens = _get('prompt_tokens') or 0
|
||||||
|
completion_tokens = _get('completion_tokens') or 0
|
||||||
|
total_tokens = _get('total_tokens') or 0
|
||||||
|
|
||||||
|
# Some providers omit total_tokens in streaming usage; derive it.
|
||||||
|
if not total_tokens:
|
||||||
|
total_tokens = prompt_tokens + completion_tokens
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'prompt_tokens': usage.prompt_tokens or 0,
|
'prompt_tokens': int(prompt_tokens),
|
||||||
'completion_tokens': usage.completion_tokens or 0,
|
'completion_tokens': int(completion_tokens),
|
||||||
'total_tokens': usage.total_tokens or 0,
|
'total_tokens': int(total_tokens),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _extract_usage(self, response) -> dict:
|
||||||
|
"""Extract usage info from a non-streaming LiteLLM response."""
|
||||||
|
return self._normalize_usage(getattr(response, 'usage', None))
|
||||||
|
|
||||||
def _build_common_args(self, args: dict, include_retry_params: bool = True) -> dict:
|
def _build_common_args(self, args: dict, include_retry_params: bool = True) -> dict:
|
||||||
"""Apply common requester config to args dict."""
|
"""Apply common requester config to args dict."""
|
||||||
if self.requester_cfg.get('base_url'):
|
if self.requester_cfg.get('base_url'):
|
||||||
@@ -217,18 +244,21 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
|
|||||||
try:
|
try:
|
||||||
response = await acompletion(**args)
|
response = await acompletion(**args)
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
# Check for usage chunk (final chunk with stream_options include_usage)
|
# Capture usage whenever a chunk carries it.
|
||||||
if hasattr(chunk, 'usage') and chunk.usage and (not hasattr(chunk, 'choices') or not chunk.choices):
|
#
|
||||||
usage_info = {
|
# Important: many OpenAI-compatible gateways (e.g. new-api) and
|
||||||
'prompt_tokens': chunk.usage.prompt_tokens or 0,
|
# providers send the final usage payload in a chunk that STILL
|
||||||
'completion_tokens': chunk.usage.completion_tokens or 0,
|
# contains a (empty-delta) choice, not an empty `choices` list.
|
||||||
'total_tokens': chunk.usage.total_tokens or 0,
|
# The previous implementation only captured usage when `choices`
|
||||||
}
|
# was empty, so streamed calls always recorded 0 tokens.
|
||||||
if query:
|
# We therefore capture usage independently of `choices`, and then
|
||||||
|
# fall through to also process any content this chunk may carry.
|
||||||
|
if getattr(chunk, 'usage', None):
|
||||||
|
usage_info = self._normalize_usage(chunk.usage)
|
||||||
|
if query is not None:
|
||||||
if query.variables is None:
|
if query.variables is None:
|
||||||
query.variables = {}
|
query.variables = {}
|
||||||
query.variables['_stream_usage'] = usage_info
|
query.variables['_stream_usage'] = usage_info
|
||||||
continue
|
|
||||||
|
|
||||||
if not hasattr(chunk, 'choices') or not chunk.choices:
|
if not hasattr(chunk, 'choices') or not chunk.choices:
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -110,6 +110,147 @@ class TestExtractUsage:
|
|||||||
assert result['completion_tokens'] == 0
|
assert result['completion_tokens'] == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestNormalizeUsage:
|
||||||
|
"""Test _normalize_usage helper covering real-world usage shapes"""
|
||||||
|
|
||||||
|
def test_none_usage(self):
|
||||||
|
"""None usage -> all zeros (no crash)"""
|
||||||
|
result = litellmchat.LiteLLMRequester._normalize_usage(None)
|
||||||
|
assert result == {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}
|
||||||
|
|
||||||
|
def test_dict_usage(self):
|
||||||
|
"""Usage given as a plain dict"""
|
||||||
|
result = litellmchat.LiteLLMRequester._normalize_usage(
|
||||||
|
{'prompt_tokens': 12, 'completion_tokens': 8, 'total_tokens': 20}
|
||||||
|
)
|
||||||
|
assert result == {'prompt_tokens': 12, 'completion_tokens': 8, 'total_tokens': 20}
|
||||||
|
|
||||||
|
def test_missing_total_is_derived(self):
|
||||||
|
"""When total_tokens is absent/zero it is derived from prompt + completion"""
|
||||||
|
usage = Mock()
|
||||||
|
usage.prompt_tokens = 42
|
||||||
|
usage.completion_tokens = 10
|
||||||
|
usage.total_tokens = 0
|
||||||
|
result = litellmchat.LiteLLMRequester._normalize_usage(usage)
|
||||||
|
assert result['total_tokens'] == 52
|
||||||
|
|
||||||
|
def test_partial_attrs_default_to_zero(self):
|
||||||
|
"""Missing attributes default to 0 instead of raising"""
|
||||||
|
usage = Mock(spec=['prompt_tokens'])
|
||||||
|
usage.prompt_tokens = 5
|
||||||
|
result = litellmchat.LiteLLMRequester._normalize_usage(usage)
|
||||||
|
assert result == {'prompt_tokens': 5, 'completion_tokens': 0, 'total_tokens': 5}
|
||||||
|
|
||||||
|
|
||||||
|
class TestInvokeLLMStreamUsage:
|
||||||
|
"""Regression tests for streaming token usage capture.
|
||||||
|
|
||||||
|
Real OpenAI-compatible gateways (e.g. new-api) send the final usage payload
|
||||||
|
in a chunk that still carries a (empty-delta) choice rather than an empty
|
||||||
|
`choices` list. The usage must be captured regardless, otherwise streamed
|
||||||
|
calls record 0 tokens.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _make_chunk(self, *, content=None, finish_reason=None, usage=None, has_choice=True):
|
||||||
|
chunk = Mock()
|
||||||
|
if usage is not None:
|
||||||
|
chunk.usage = usage
|
||||||
|
else:
|
||||||
|
chunk.usage = None
|
||||||
|
if has_choice:
|
||||||
|
choice = Mock()
|
||||||
|
delta = Mock()
|
||||||
|
delta.model_dump = Mock(
|
||||||
|
return_value={'role': 'assistant', 'content': content, 'tool_calls': None}
|
||||||
|
)
|
||||||
|
choice.delta = delta
|
||||||
|
choice.finish_reason = finish_reason
|
||||||
|
chunk.choices = [choice]
|
||||||
|
else:
|
||||||
|
chunk.choices = []
|
||||||
|
return chunk
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stream_usage_with_nonempty_choices(self):
|
||||||
|
"""Usage chunk that still has a choice must populate _stream_usage."""
|
||||||
|
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
mock_ap = Mock()
|
||||||
|
mock_ap.tool_mgr = Mock()
|
||||||
|
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=None)
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={})
|
||||||
|
model = MockRuntimeModel('gpt-4o', 'test-api-key')
|
||||||
|
|
||||||
|
usage = Mock()
|
||||||
|
usage.prompt_tokens = 24
|
||||||
|
usage.completion_tokens = 48
|
||||||
|
usage.total_tokens = 72
|
||||||
|
|
||||||
|
chunks = [
|
||||||
|
self._make_chunk(content='Hello'),
|
||||||
|
self._make_chunk(content=None, finish_reason='stop'),
|
||||||
|
# Final usage chunk WITH a non-empty (empty-delta) choice — the bug case.
|
||||||
|
self._make_chunk(content=None, usage=usage, has_choice=True),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def _aiter(*args, **kwargs):
|
||||||
|
for c in chunks:
|
||||||
|
yield c
|
||||||
|
|
||||||
|
query = Mock(spec=pipeline_query.Query)
|
||||||
|
query.variables = {}
|
||||||
|
|
||||||
|
messages = [provider_message.Message(role='user', content='Hi')]
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
|
||||||
|
collected = []
|
||||||
|
async for ch in requester.invoke_llm_stream(query=query, model=model, messages=messages):
|
||||||
|
collected.append(ch)
|
||||||
|
|
||||||
|
assert '_stream_usage' in query.variables
|
||||||
|
assert query.variables['_stream_usage']['prompt_tokens'] == 24
|
||||||
|
assert query.variables['_stream_usage']['completion_tokens'] == 48
|
||||||
|
assert query.variables['_stream_usage']['total_tokens'] == 72
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_stream_usage_with_empty_choices(self):
|
||||||
|
"""Usage chunk with empty choices list must also populate _stream_usage."""
|
||||||
|
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
|
||||||
|
import langbot_plugin.api.entities.builtin.provider.message as provider_message
|
||||||
|
|
||||||
|
mock_ap = Mock()
|
||||||
|
mock_ap.tool_mgr = Mock()
|
||||||
|
mock_ap.tool_mgr.generate_tools_for_openai = AsyncMock(return_value=None)
|
||||||
|
requester = litellmchat.LiteLLMRequester(ap=mock_ap, config={})
|
||||||
|
model = MockRuntimeModel('gpt-4o', 'test-api-key')
|
||||||
|
|
||||||
|
usage = Mock()
|
||||||
|
usage.prompt_tokens = 5
|
||||||
|
usage.completion_tokens = 7
|
||||||
|
usage.total_tokens = 12
|
||||||
|
|
||||||
|
chunks = [
|
||||||
|
self._make_chunk(content='Hi there'),
|
||||||
|
self._make_chunk(content=None, finish_reason='stop'),
|
||||||
|
self._make_chunk(usage=usage, has_choice=False),
|
||||||
|
]
|
||||||
|
|
||||||
|
async def _aiter(*args, **kwargs):
|
||||||
|
for c in chunks:
|
||||||
|
yield c
|
||||||
|
|
||||||
|
query = Mock(spec=pipeline_query.Query)
|
||||||
|
query.variables = {}
|
||||||
|
messages = [provider_message.Message(role='user', content='Hi')]
|
||||||
|
|
||||||
|
with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
|
||||||
|
async for _ in requester.invoke_llm_stream(query=query, model=model, messages=messages):
|
||||||
|
pass
|
||||||
|
|
||||||
|
assert query.variables['_stream_usage']['total_tokens'] == 12
|
||||||
|
|
||||||
|
|
||||||
class TestProcessThinkingContent:
|
class TestProcessThinkingContent:
|
||||||
"""Test _process_thinking_content method"""
|
"""Test _process_thinking_content method"""
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,462 @@
|
|||||||
|
import React, { useEffect, useMemo, useState, useCallback } from 'react';
|
||||||
|
import { useTranslation } from 'react-i18next';
|
||||||
|
import {
|
||||||
|
ComposedChart,
|
||||||
|
Area,
|
||||||
|
Bar,
|
||||||
|
XAxis,
|
||||||
|
YAxis,
|
||||||
|
CartesianGrid,
|
||||||
|
Tooltip,
|
||||||
|
ResponsiveContainer,
|
||||||
|
Legend,
|
||||||
|
} from 'recharts';
|
||||||
|
import {
|
||||||
|
Coins,
|
||||||
|
ArrowDownToLine,
|
||||||
|
ArrowUpFromLine,
|
||||||
|
Gauge,
|
||||||
|
AlertTriangle,
|
||||||
|
TrendingUp,
|
||||||
|
} from 'lucide-react';
|
||||||
|
import { httpClient } from '@/app/infra/http/HttpClient';
|
||||||
|
|
||||||
|
interface TokenSummary {
|
||||||
|
total_calls: number;
|
||||||
|
success_calls: number;
|
||||||
|
error_calls: number;
|
||||||
|
total_input_tokens: number;
|
||||||
|
total_output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
total_cost: number;
|
||||||
|
avg_tokens_per_call: number;
|
||||||
|
avg_duration_ms: number;
|
||||||
|
avg_tokens_per_second: number;
|
||||||
|
zero_token_success_calls: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TokenByModel {
|
||||||
|
model_name: string;
|
||||||
|
calls: number;
|
||||||
|
error_calls: number;
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
cost: number;
|
||||||
|
avg_tokens_per_call: number;
|
||||||
|
avg_duration_ms: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TokenTimeseriesPoint {
|
||||||
|
bucket: string;
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
calls: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TokenStatistics {
|
||||||
|
summary: TokenSummary;
|
||||||
|
by_model: TokenByModel[];
|
||||||
|
timeseries: TokenTimeseriesPoint[];
|
||||||
|
bucket: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TokenMonitoringProps {
|
||||||
|
botIds?: string[];
|
||||||
|
pipelineIds?: string[];
|
||||||
|
startTime?: string;
|
||||||
|
endTime?: string;
|
||||||
|
/** Bumped by the parent to trigger a refetch on manual refresh. */
|
||||||
|
refreshKey?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatNumber(n: number): string {
|
||||||
|
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(2)}M`;
|
||||||
|
if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
|
||||||
|
return n.toLocaleString();
|
||||||
|
}
|
||||||
|
|
||||||
|
const TOOLTIP_STYLE: React.CSSProperties = {
|
||||||
|
backgroundColor: 'var(--card)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
borderRadius: '12px',
|
||||||
|
boxShadow:
|
||||||
|
'0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1)',
|
||||||
|
fontSize: '13px',
|
||||||
|
padding: '12px',
|
||||||
|
color: 'var(--foreground)',
|
||||||
|
};
|
||||||
|
|
||||||
|
function MetricTile({
|
||||||
|
icon,
|
||||||
|
label,
|
||||||
|
value,
|
||||||
|
sub,
|
||||||
|
accent,
|
||||||
|
}: {
|
||||||
|
icon: React.ReactNode;
|
||||||
|
label: string;
|
||||||
|
value: string;
|
||||||
|
sub?: string;
|
||||||
|
accent?: string;
|
||||||
|
}) {
|
||||||
|
return (
|
||||||
|
<div className="bg-card rounded-xl border p-4 flex flex-col gap-2">
|
||||||
|
<div className="flex items-center gap-2 text-muted-foreground text-sm">
|
||||||
|
<span
|
||||||
|
className="flex items-center justify-center h-7 w-7 rounded-lg"
|
||||||
|
style={{
|
||||||
|
backgroundColor: accent ? `${accent}1a` : 'var(--muted)',
|
||||||
|
color: accent || 'var(--foreground)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{icon}
|
||||||
|
</span>
|
||||||
|
{label}
|
||||||
|
</div>
|
||||||
|
<div className="text-2xl font-semibold text-foreground tabular-nums">
|
||||||
|
{value}
|
||||||
|
</div>
|
||||||
|
{sub && <div className="text-xs text-muted-foreground">{sub}</div>}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function TokenMonitoring({
|
||||||
|
botIds,
|
||||||
|
pipelineIds,
|
||||||
|
startTime,
|
||||||
|
endTime,
|
||||||
|
refreshKey,
|
||||||
|
}: TokenMonitoringProps) {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
const [bucket, setBucket] = useState<'hour' | 'day'>('hour');
|
||||||
|
const [stats, setStats] = useState<TokenStatistics | null>(null);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
|
const botIdsKey = JSON.stringify(botIds);
|
||||||
|
const pipelineIdsKey = JSON.stringify(pipelineIds);
|
||||||
|
|
||||||
|
const fetchStats = useCallback(async () => {
|
||||||
|
setLoading(true);
|
||||||
|
setError(null);
|
||||||
|
try {
|
||||||
|
const result = await httpClient.getTokenStatistics({
|
||||||
|
botId: botIds,
|
||||||
|
pipelineId: pipelineIds,
|
||||||
|
startTime,
|
||||||
|
endTime,
|
||||||
|
bucket,
|
||||||
|
});
|
||||||
|
setStats(result);
|
||||||
|
} catch (e) {
|
||||||
|
setError(e instanceof Error ? e.message : String(e));
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [botIdsKey, pipelineIdsKey, startTime, endTime, bucket, refreshKey]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchStats();
|
||||||
|
}, [fetchStats]);
|
||||||
|
|
||||||
|
const chartData = useMemo(() => {
|
||||||
|
if (!stats) return [];
|
||||||
|
return stats.timeseries.map((p) => ({
|
||||||
|
bucket: p.bucket,
|
||||||
|
input: p.input_tokens,
|
||||||
|
output: p.output_tokens,
|
||||||
|
total: p.total_tokens,
|
||||||
|
}));
|
||||||
|
}, [stats]);
|
||||||
|
|
||||||
|
if (loading) {
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-4">
|
||||||
|
{Array.from({ length: 6 }).map((_, i) => (
|
||||||
|
<div
|
||||||
|
key={i}
|
||||||
|
className="bg-card rounded-xl border p-4 h-24 animate-pulse"
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
<div className="bg-card rounded-xl border p-6 h-[320px] animate-pulse" />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
return (
|
||||||
|
<div className="bg-card rounded-xl border p-6 text-sm text-destructive flex items-center gap-2">
|
||||||
|
<AlertTriangle className="h-4 w-4" />
|
||||||
|
{t('monitoring.tokens.loadError', { error })}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!stats || stats.summary.total_calls === 0) {
|
||||||
|
return (
|
||||||
|
<div className="bg-card rounded-xl border p-6">
|
||||||
|
<div className="h-[260px] flex flex-col items-center justify-center text-muted-foreground gap-2">
|
||||||
|
<Coins className="h-[3rem] w-[3rem]" />
|
||||||
|
<div className="text-sm">{t('monitoring.tokens.noData')}</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { summary, by_model } = stats;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-6">
|
||||||
|
{/* Data-quality warning: streamed calls that recorded 0 tokens */}
|
||||||
|
{summary.zero_token_success_calls > 0 && (
|
||||||
|
<div className="bg-amber-500/10 border border-amber-500/30 text-amber-700 dark:text-amber-400 rounded-xl p-4 text-sm flex items-start gap-2">
|
||||||
|
<AlertTriangle className="h-4 w-4 mt-0.5 shrink-0" />
|
||||||
|
<span>
|
||||||
|
{t('monitoring.tokens.zeroTokenWarning', {
|
||||||
|
count: summary.zero_token_success_calls,
|
||||||
|
})}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Summary tiles */}
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-3 lg:grid-cols-6 gap-4">
|
||||||
|
<MetricTile
|
||||||
|
icon={<Coins className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.totalTokens')}
|
||||||
|
value={formatNumber(summary.total_tokens)}
|
||||||
|
sub={t('monitoring.tokens.acrossCalls', {
|
||||||
|
count: summary.total_calls,
|
||||||
|
})}
|
||||||
|
accent="#8b5cf6"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<ArrowDownToLine className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.inputTokens')}
|
||||||
|
value={formatNumber(summary.total_input_tokens)}
|
||||||
|
accent="#3b82f6"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<ArrowUpFromLine className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.outputTokens')}
|
||||||
|
value={formatNumber(summary.total_output_tokens)}
|
||||||
|
accent="#10b981"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<TrendingUp className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.avgPerCall')}
|
||||||
|
value={formatNumber(summary.avg_tokens_per_call)}
|
||||||
|
accent="#f59e0b"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<Gauge className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.throughput')}
|
||||||
|
value={`${summary.avg_tokens_per_second}`}
|
||||||
|
sub={t('monitoring.tokens.tokensPerSec')}
|
||||||
|
accent="#06b6d4"
|
||||||
|
/>
|
||||||
|
<MetricTile
|
||||||
|
icon={<AlertTriangle className="h-4 w-4" />}
|
||||||
|
label={t('monitoring.tokens.errorCalls')}
|
||||||
|
value={`${summary.error_calls}`}
|
||||||
|
sub={t('monitoring.tokens.ofTotal', { count: summary.total_calls })}
|
||||||
|
accent="#ef4444"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Token usage over time */}
|
||||||
|
<div className="bg-card rounded-xl border p-6">
|
||||||
|
<div className="flex items-center justify-between mb-6">
|
||||||
|
<h3 className="text-base font-semibold text-foreground">
|
||||||
|
{t('monitoring.tokens.usageOverTime')}
|
||||||
|
</h3>
|
||||||
|
<div className="inline-flex rounded-lg border p-0.5 text-sm">
|
||||||
|
{(['hour', 'day'] as const).map((b) => (
|
||||||
|
<button
|
||||||
|
key={b}
|
||||||
|
onClick={() => setBucket(b)}
|
||||||
|
className={`px-3 py-1 rounded-md transition-colors ${
|
||||||
|
bucket === b
|
||||||
|
? 'bg-primary text-primary-foreground'
|
||||||
|
: 'text-muted-foreground hover:text-foreground'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{t(`monitoring.tokens.bucket.${b}`)}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="h-[320px]">
|
||||||
|
<ResponsiveContainer width="100%" height="100%">
|
||||||
|
<ComposedChart
|
||||||
|
data={chartData}
|
||||||
|
margin={{ top: 10, right: 20, left: 0, bottom: 0 }}
|
||||||
|
>
|
||||||
|
<defs>
|
||||||
|
<linearGradient id="tokTotal" x1="0" y1="0" x2="0" y2="1">
|
||||||
|
<stop offset="5%" stopColor="#8b5cf6" stopOpacity={0.35} />
|
||||||
|
<stop offset="95%" stopColor="#8b5cf6" stopOpacity={0.03} />
|
||||||
|
</linearGradient>
|
||||||
|
</defs>
|
||||||
|
<CartesianGrid
|
||||||
|
strokeDasharray="3 3"
|
||||||
|
stroke="var(--border)"
|
||||||
|
vertical={false}
|
||||||
|
/>
|
||||||
|
<XAxis
|
||||||
|
dataKey="bucket"
|
||||||
|
tick={{ fontSize: 12, fill: 'var(--muted-foreground)' }}
|
||||||
|
tickLine={false}
|
||||||
|
axisLine={{ stroke: 'var(--border)' }}
|
||||||
|
dy={10}
|
||||||
|
/>
|
||||||
|
<YAxis
|
||||||
|
tick={{ fontSize: 12, fill: 'var(--muted-foreground)' }}
|
||||||
|
tickLine={false}
|
||||||
|
axisLine={{ stroke: 'var(--border)' }}
|
||||||
|
width={48}
|
||||||
|
tickFormatter={(v) => formatNumber(Number(v))}
|
||||||
|
/>
|
||||||
|
<Tooltip
|
||||||
|
contentStyle={TOOLTIP_STYLE}
|
||||||
|
labelStyle={{
|
||||||
|
fontWeight: 600,
|
||||||
|
marginBottom: '8px',
|
||||||
|
color: 'var(--foreground)',
|
||||||
|
}}
|
||||||
|
formatter={(value: number) => formatNumber(Number(value))}
|
||||||
|
/>
|
||||||
|
<Legend
|
||||||
|
wrapperStyle={{
|
||||||
|
fontSize: '13px',
|
||||||
|
paddingTop: '16px',
|
||||||
|
fontWeight: 500,
|
||||||
|
}}
|
||||||
|
iconType="circle"
|
||||||
|
iconSize={10}
|
||||||
|
/>
|
||||||
|
<Bar
|
||||||
|
dataKey="input"
|
||||||
|
name={t('monitoring.tokens.inputTokens')}
|
||||||
|
stackId="io"
|
||||||
|
fill="#3b82f6"
|
||||||
|
radius={[0, 0, 0, 0]}
|
||||||
|
barSize={18}
|
||||||
|
/>
|
||||||
|
<Bar
|
||||||
|
dataKey="output"
|
||||||
|
name={t('monitoring.tokens.outputTokens')}
|
||||||
|
stackId="io"
|
||||||
|
fill="#10b981"
|
||||||
|
radius={[4, 4, 0, 0]}
|
||||||
|
barSize={18}
|
||||||
|
/>
|
||||||
|
<Area
|
||||||
|
type="monotone"
|
||||||
|
dataKey="total"
|
||||||
|
name={t('monitoring.tokens.totalTokens')}
|
||||||
|
stroke="#8b5cf6"
|
||||||
|
strokeWidth={2.5}
|
||||||
|
fill="url(#tokTotal)"
|
||||||
|
dot={false}
|
||||||
|
activeDot={{ r: 5, strokeWidth: 2 }}
|
||||||
|
/>
|
||||||
|
</ComposedChart>
|
||||||
|
</ResponsiveContainer>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Per-model breakdown */}
|
||||||
|
<div className="bg-card rounded-xl border p-6">
|
||||||
|
<h3 className="text-base font-semibold text-foreground mb-4">
|
||||||
|
{t('monitoring.tokens.byModel')}
|
||||||
|
</h3>
|
||||||
|
<div className="overflow-x-auto">
|
||||||
|
<table className="w-full text-sm">
|
||||||
|
<thead>
|
||||||
|
<tr className="text-left text-muted-foreground border-b">
|
||||||
|
<th className="py-2 pr-4 font-medium">
|
||||||
|
{t('monitoring.tokens.model')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.calls')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.inputTokens')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.outputTokens')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.totalTokens')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 px-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.avgPerCall')}
|
||||||
|
</th>
|
||||||
|
<th className="py-2 pl-4 font-medium text-right">
|
||||||
|
{t('monitoring.tokens.avgLatency')}
|
||||||
|
</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{by_model.map((m) => {
|
||||||
|
const share =
|
||||||
|
summary.total_tokens > 0
|
||||||
|
? (m.total_tokens / summary.total_tokens) * 100
|
||||||
|
: 0;
|
||||||
|
return (
|
||||||
|
<tr
|
||||||
|
key={m.model_name}
|
||||||
|
className="border-b last:border-0 hover:bg-muted/40 transition-colors"
|
||||||
|
>
|
||||||
|
<td className="py-2.5 pr-4">
|
||||||
|
<div className="font-medium text-foreground">
|
||||||
|
{m.model_name}
|
||||||
|
</div>
|
||||||
|
<div className="mt-1 h-1.5 w-32 rounded-full bg-muted overflow-hidden">
|
||||||
|
<div
|
||||||
|
className="h-full rounded-full bg-violet-500"
|
||||||
|
style={{ width: `${share}%` }}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums">
|
||||||
|
{m.calls}
|
||||||
|
{m.error_calls > 0 && (
|
||||||
|
<span className="text-destructive">
|
||||||
|
{' '}
|
||||||
|
({m.error_calls}✕)
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums">
|
||||||
|
{formatNumber(m.input_tokens)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums">
|
||||||
|
{formatNumber(m.output_tokens)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums font-medium">
|
||||||
|
{formatNumber(m.total_tokens)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 px-4 text-right tabular-nums">
|
||||||
|
{formatNumber(m.avg_tokens_per_call)}
|
||||||
|
</td>
|
||||||
|
<td className="py-2.5 pl-4 text-right tabular-nums">
|
||||||
|
{m.avg_duration_ms}ms
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -13,6 +13,7 @@ import {
|
|||||||
} from 'lucide-react';
|
} from 'lucide-react';
|
||||||
import OverviewCards from './components/overview-cards/OverviewCards';
|
import OverviewCards from './components/overview-cards/OverviewCards';
|
||||||
import MonitoringFilters from './components/filters/MonitoringFilters';
|
import MonitoringFilters from './components/filters/MonitoringFilters';
|
||||||
|
import TokenMonitoring from './components/TokenMonitoring';
|
||||||
import { ExportDropdown } from './components/ExportDropdown';
|
import { ExportDropdown } from './components/ExportDropdown';
|
||||||
import { useMonitoringFilters } from './hooks/useMonitoringFilters';
|
import { useMonitoringFilters } from './hooks/useMonitoringFilters';
|
||||||
import { useMonitoringData } from './hooks/useMonitoringData';
|
import { useMonitoringData } from './hooks/useMonitoringData';
|
||||||
@@ -319,6 +320,9 @@ function MonitoringPageContent() {
|
|||||||
<TabsTrigger value="modelCalls" className="px-6 py-2">
|
<TabsTrigger value="modelCalls" className="px-6 py-2">
|
||||||
{t('monitoring.tabs.modelCalls')}
|
{t('monitoring.tabs.modelCalls')}
|
||||||
</TabsTrigger>
|
</TabsTrigger>
|
||||||
|
<TabsTrigger value="tokens" className="px-6 py-2">
|
||||||
|
{t('monitoring.tabs.tokens')}
|
||||||
|
</TabsTrigger>
|
||||||
<TabsTrigger value="feedback" className="px-6 py-2">
|
<TabsTrigger value="feedback" className="px-6 py-2">
|
||||||
{t('monitoring.tabs.feedback')}
|
{t('monitoring.tabs.feedback')}
|
||||||
</TabsTrigger>
|
</TabsTrigger>
|
||||||
@@ -668,6 +672,24 @@ function MonitoringPageContent() {
|
|||||||
</div>
|
</div>
|
||||||
</TabsContent>
|
</TabsContent>
|
||||||
|
|
||||||
|
<TabsContent value="tokens" className="p-6 m-0">
|
||||||
|
<TokenMonitoring
|
||||||
|
botIds={
|
||||||
|
filterState.selectedBots.length > 0
|
||||||
|
? filterState.selectedBots
|
||||||
|
: undefined
|
||||||
|
}
|
||||||
|
pipelineIds={
|
||||||
|
filterState.selectedPipelines.length > 0
|
||||||
|
? filterState.selectedPipelines
|
||||||
|
: undefined
|
||||||
|
}
|
||||||
|
startTime={feedbackTimeRange.startTime}
|
||||||
|
endTime={feedbackTimeRange.endTime}
|
||||||
|
refreshKey={feedbackRefreshKey}
|
||||||
|
/>
|
||||||
|
</TabsContent>
|
||||||
|
|
||||||
<TabsContent value="feedback" className="p-6 m-0">
|
<TabsContent value="feedback" className="p-6 m-0">
|
||||||
<div>
|
<div>
|
||||||
{loading && (
|
{loading && (
|
||||||
|
|||||||
@@ -1224,6 +1224,68 @@ export class BackendClient extends BaseHttpClient {
|
|||||||
return this.get(`/api/v1/monitoring/overview?${queryParams.toString()}`);
|
return this.get(`/api/v1/monitoring/overview?${queryParams.toString()}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public getTokenStatistics(params: {
|
||||||
|
botId?: string[];
|
||||||
|
pipelineId?: string[];
|
||||||
|
startTime?: string;
|
||||||
|
endTime?: string;
|
||||||
|
bucket?: 'hour' | 'day';
|
||||||
|
}): Promise<{
|
||||||
|
summary: {
|
||||||
|
total_calls: number;
|
||||||
|
success_calls: number;
|
||||||
|
error_calls: number;
|
||||||
|
total_input_tokens: number;
|
||||||
|
total_output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
total_cost: number;
|
||||||
|
avg_tokens_per_call: number;
|
||||||
|
avg_duration_ms: number;
|
||||||
|
avg_tokens_per_second: number;
|
||||||
|
zero_token_success_calls: number;
|
||||||
|
};
|
||||||
|
by_model: Array<{
|
||||||
|
model_name: string;
|
||||||
|
calls: number;
|
||||||
|
error_calls: number;
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
cost: number;
|
||||||
|
avg_tokens_per_call: number;
|
||||||
|
avg_duration_ms: number;
|
||||||
|
}>;
|
||||||
|
timeseries: Array<{
|
||||||
|
bucket: string;
|
||||||
|
input_tokens: number;
|
||||||
|
output_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
calls: number;
|
||||||
|
}>;
|
||||||
|
bucket: string;
|
||||||
|
}> {
|
||||||
|
const queryParams = new URLSearchParams();
|
||||||
|
if (params.botId) {
|
||||||
|
params.botId.forEach((id) => queryParams.append('botId', id));
|
||||||
|
}
|
||||||
|
if (params.pipelineId) {
|
||||||
|
params.pipelineId.forEach((id) => queryParams.append('pipelineId', id));
|
||||||
|
}
|
||||||
|
if (params.startTime) {
|
||||||
|
queryParams.append('startTime', params.startTime);
|
||||||
|
}
|
||||||
|
if (params.endTime) {
|
||||||
|
queryParams.append('endTime', params.endTime);
|
||||||
|
}
|
||||||
|
if (params.bucket) {
|
||||||
|
queryParams.append('bucket', params.bucket);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.get(
|
||||||
|
`/api/v1/monitoring/token-statistics?${queryParams.toString()}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// ============ Survey API ============
|
// ============ Survey API ============
|
||||||
public getSurveyPending(): Promise<{
|
public getSurveyPending(): Promise<{
|
||||||
survey: {
|
survey: {
|
||||||
|
|||||||
@@ -1196,6 +1196,7 @@ const enUS = {
|
|||||||
llmCalls: 'LLM Calls',
|
llmCalls: 'LLM Calls',
|
||||||
embeddingCalls: 'Embedding Calls',
|
embeddingCalls: 'Embedding Calls',
|
||||||
modelCalls: 'Model Calls',
|
modelCalls: 'Model Calls',
|
||||||
|
tokens: 'Token Monitoring',
|
||||||
feedback: 'User Feedback',
|
feedback: 'User Feedback',
|
||||||
sessions: 'Session Analysis',
|
sessions: 'Session Analysis',
|
||||||
errors: 'Error Logs',
|
errors: 'Error Logs',
|
||||||
@@ -1234,6 +1235,30 @@ const enUS = {
|
|||||||
avgDuration: 'Avg Duration',
|
avgDuration: 'Avg Duration',
|
||||||
calls: 'Calls',
|
calls: 'Calls',
|
||||||
},
|
},
|
||||||
|
tokens: {
|
||||||
|
totalTokens: 'Total Tokens',
|
||||||
|
inputTokens: 'Input Tokens',
|
||||||
|
outputTokens: 'Output Tokens',
|
||||||
|
avgPerCall: 'Avg / Call',
|
||||||
|
throughput: 'Throughput',
|
||||||
|
tokensPerSec: 'tokens/sec',
|
||||||
|
errorCalls: 'Failed Calls',
|
||||||
|
acrossCalls: 'across {{count}} calls',
|
||||||
|
ofTotal: 'of {{count}} total',
|
||||||
|
usageOverTime: 'Token Usage Over Time',
|
||||||
|
byModel: 'By Model',
|
||||||
|
model: 'Model',
|
||||||
|
calls: 'Calls',
|
||||||
|
avgLatency: 'Avg Latency',
|
||||||
|
noData: 'No token usage in the selected time range',
|
||||||
|
loadError: 'Failed to load token statistics: {{error}}',
|
||||||
|
zeroTokenWarning:
|
||||||
|
'{{count}} successful call(s) reported zero token usage. This usually means the upstream provider did not return usage info — check the model provider configuration.',
|
||||||
|
bucket: {
|
||||||
|
hour: 'Hourly',
|
||||||
|
day: 'Daily',
|
||||||
|
},
|
||||||
|
},
|
||||||
embeddingCalls: {
|
embeddingCalls: {
|
||||||
title: 'Embedding Calls',
|
title: 'Embedding Calls',
|
||||||
model: 'Model',
|
model: 'Model',
|
||||||
|
|||||||
@@ -1140,6 +1140,7 @@ const zhHans = {
|
|||||||
llmCalls: 'LLM调用',
|
llmCalls: 'LLM调用',
|
||||||
embeddingCalls: 'Embedding调用',
|
embeddingCalls: 'Embedding调用',
|
||||||
modelCalls: '模型调用',
|
modelCalls: '模型调用',
|
||||||
|
tokens: 'Token 监控',
|
||||||
feedback: '用户反馈',
|
feedback: '用户反馈',
|
||||||
sessions: '会话分析',
|
sessions: '会话分析',
|
||||||
errors: '错误日志',
|
errors: '错误日志',
|
||||||
@@ -1178,6 +1179,30 @@ const zhHans = {
|
|||||||
avgDuration: '平均耗时',
|
avgDuration: '平均耗时',
|
||||||
calls: '调用次数',
|
calls: '调用次数',
|
||||||
},
|
},
|
||||||
|
tokens: {
|
||||||
|
totalTokens: '总 Token 数',
|
||||||
|
inputTokens: '输入 Token',
|
||||||
|
outputTokens: '输出 Token',
|
||||||
|
avgPerCall: '平均每次调用',
|
||||||
|
throughput: '吞吐量',
|
||||||
|
tokensPerSec: 'Token/秒',
|
||||||
|
errorCalls: '失败调用',
|
||||||
|
acrossCalls: '共 {{count}} 次调用',
|
||||||
|
ofTotal: '共 {{count}} 次',
|
||||||
|
usageOverTime: 'Token 用量趋势',
|
||||||
|
byModel: '按模型统计',
|
||||||
|
model: '模型',
|
||||||
|
calls: '调用次数',
|
||||||
|
avgLatency: '平均延迟',
|
||||||
|
noData: '所选时间范围内暂无 Token 用量数据',
|
||||||
|
loadError: '加载 Token 统计失败:{{error}}',
|
||||||
|
zeroTokenWarning:
|
||||||
|
'检测到 {{count}} 次成功调用未上报 Token 用量(记为 0)。这通常表示上游未返回 usage 信息,请检查模型供应商配置。',
|
||||||
|
bucket: {
|
||||||
|
hour: '按小时',
|
||||||
|
day: '按天',
|
||||||
|
},
|
||||||
|
},
|
||||||
embeddingCalls: {
|
embeddingCalls: {
|
||||||
title: 'Embedding调用',
|
title: 'Embedding调用',
|
||||||
model: '模型',
|
model: '模型',
|
||||||
|
|||||||
Reference in New Issue
Block a user