Feat/monitor (#1928)

* feat: add monitor

* feat: fix tab

* feat: work

* feat: not reliable monitor

* feat: enhance monitoring page layout with integrated filters and refresh button

* feat: add support for runner recording

* feat: add jump button & alignment

* feat: new

* fix: not show query variables in local agent

* fix: pnpm lint and python ruff check

* fix: ruff fromat

* chore: remove unnecessary migration

* style: optimize monitoring page layout and fix sticky filter issues

- Enhanced metric cards with gradient backgrounds and hover effects
- Increased traffic chart height from 200px to 300px
- Adjusted grid layout and spacing for better visual appeal
- Fixed sticky filter area to properly cover parent padding without transparent gaps
- Used negative margins and positioning to eliminate scrolling artifacts
- Matched padding/margins with other pages (pipelines, bots) for consistency
- Removed duplicate title/subtitle from page content
- Added cursor-pointer styling to tab triggers
- Removed border between tab list and tab content

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

* fix: apply prettier formatting to monitoring components

- Fixed indentation and spacing in MetricCard.tsx
- Fixed formatting in TrafficChart.tsx
- Applied prettier formatting to page.tsx

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>

* feat: update HomeSidebar to trigger action on child selection and localize monitoring titles

* refactor: streamline LLM and embedding invocation methods

* feat: add embedding model monitor

* fix: database version

* chore: simplify pnpm-lock.yaml formatting

---------

Co-authored-by: Junyan Qin <rockchinq@gmail.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Guanchao Wang
2026-01-26 21:08:23 +08:00
committed by GitHub
parent b73847f1a6
commit 5d9f6ec763
37 changed files with 6706 additions and 3182 deletions

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import abc
import typing
import time
from ...core import app
from ...entity.persistence import model as persistence_model
@@ -33,6 +34,219 @@ class RuntimeProvider:
self.token_mgr = token_mgr
self.requester = requester
async def invoke_llm(
self,
query: pipeline_query.Query,
model: RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
"""Bridge method for invoking LLM with monitoring"""
# Start timing for monitoring
start_time = time.time()
input_tokens = 0
output_tokens = 0
status = 'success'
error_message = None
try:
# Call the underlying requester
result = await self.requester.invoke_llm(
query=query,
model=model,
messages=messages,
funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
)
# Try to extract token usage if the requester returns it
# For requesters that return tuple (message, usage_info)
if isinstance(result, tuple):
msg, usage_info = result
if usage_info:
input_tokens = usage_info.get('input_tokens', 0)
output_tokens = usage_info.get('output_tokens', 0)
return msg
else:
return result
except Exception as e:
status = 'error'
error_message = str(e)
raise
finally:
# Record LLM call monitoring data (only if query is provided)
if query is not None:
duration_ms = int((time.time() - start_time) * 1000)
# Import monitoring helper
try:
from ...pipeline import monitoring_helper
# Get monitoring metadata from query variables
if query.variables:
bot_name = query.variables.get('_monitoring_bot_name', 'Unknown')
pipeline_name = query.variables.get('_monitoring_pipeline_name', 'Unknown')
message_id = query.variables.get('_monitoring_message_id')
else:
bot_name = 'Unknown'
pipeline_name = 'Unknown'
message_id = None
await monitoring_helper.MonitoringHelper.record_llm_call(
ap=self.requester.ap,
query=query,
bot_id=query.bot_uuid or 'unknown',
bot_name=bot_name,
pipeline_id=query.pipeline_uuid or 'unknown',
pipeline_name=pipeline_name,
model_name=model.model_entity.name,
input_tokens=input_tokens,
output_tokens=output_tokens,
duration_ms=duration_ms,
status=status,
error_message=error_message,
message_id=message_id,
)
except Exception as monitor_err:
self.requester.ap.logger.error(f'[Monitoring] Failed to record LLM call: {monitor_err}')
async def invoke_llm_stream(
self,
query: pipeline_query.Query,
model: RuntimeLLMModel,
messages: typing.List[provider_message.Message],
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.MessageChunk:
"""Bridge method for invoking LLM stream with monitoring"""
# Start timing for monitoring
start_time = time.time()
status = 'success'
error_message = None
# Note: Stream doesn't easily provide token counts, set to 0
input_tokens = 0
output_tokens = 0
try:
# Stream the response
async for chunk in self.requester.invoke_llm_stream(
query=query,
model=model,
messages=messages,
funcs=funcs,
extra_args=extra_args,
remove_think=remove_think,
):
yield chunk
except Exception as e:
status = 'error'
error_message = str(e)
raise
finally:
# Record LLM call monitoring data (only if query is provided)
if query is not None:
duration_ms = int((time.time() - start_time) * 1000)
# Import monitoring helper
try:
from ...pipeline import monitoring_helper
# Get monitoring metadata from query variables
if query.variables:
bot_name = query.variables.get('_monitoring_bot_name', 'Unknown')
pipeline_name = query.variables.get('_monitoring_pipeline_name', 'Unknown')
message_id = query.variables.get('_monitoring_message_id')
else:
bot_name = 'Unknown'
pipeline_name = 'Unknown'
message_id = None
await monitoring_helper.MonitoringHelper.record_llm_call(
ap=self.requester.ap,
query=query,
bot_id=query.bot_uuid or 'unknown',
bot_name=bot_name,
pipeline_id=query.pipeline_uuid or 'unknown',
pipeline_name=pipeline_name,
model_name=model.model_entity.name,
input_tokens=input_tokens,
output_tokens=output_tokens,
duration_ms=duration_ms,
status=status,
error_message=error_message,
message_id=message_id,
)
except Exception as monitor_err:
self.requester.ap.logger.error(f'[Monitoring] Failed to record LLM stream call: {monitor_err}')
async def invoke_embedding(
self,
model: RuntimeEmbeddingModel,
input_text: typing.List[str],
extra_args: dict[str, typing.Any] = {},
knowledge_base_id: str | None = None,
query_text: str | None = None,
session_id: str | None = None,
message_id: str | None = None,
call_type: str | None = None,
) -> typing.List[typing.List[float]]:
"""Bridge method for invoking embedding with monitoring"""
# Start timing for monitoring
start_time = time.time()
prompt_tokens = 0
total_tokens = 0
status = 'success'
error_message = None
try:
# Call the underlying requester
result = await self.requester.invoke_embedding(
model=model,
input_text=input_text,
extra_args=extra_args,
)
# Handle both old format (list only) and new format (tuple with usage)
if isinstance(result, tuple):
embeddings, usage_info = result
if usage_info:
prompt_tokens = usage_info.get('prompt_tokens', 0)
total_tokens = usage_info.get('total_tokens', 0)
return embeddings
else:
return result
except Exception as e:
status = 'error'
error_message = str(e)
raise
finally:
# Record embedding call monitoring data
duration_ms = int((time.time() - start_time) * 1000)
try:
await self.requester.ap.monitoring_service.record_embedding_call(
model_name=model.model_entity.name,
prompt_tokens=prompt_tokens,
total_tokens=total_tokens,
duration=duration_ms,
input_count=len(input_text),
status=status,
error_message=error_message,
knowledge_base_id=knowledge_base_id,
query_text=query_text,
session_id=session_id,
message_id=message_id,
call_type=call_type,
)
except Exception as monitor_err:
self.requester.ap.logger.error(f'[Monitoring] Failed to record embedding call: {monitor_err}')
class RuntimeLLMModel:
"""运行时模型"""
@@ -141,7 +355,7 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
model: RuntimeEmbeddingModel,
input_text: typing.List[str],
extra_args: dict[str, typing.Any] = {},
) -> typing.List[typing.List[float]]:
) -> typing.Union[typing.List[typing.List[float]], tuple[typing.List[typing.List[float]], dict]]:
"""调用 Embedding API
Args:
@@ -151,5 +365,6 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
Returns:
typing.List[typing.List[float]]: 返回的 embedding 向量
或者 tuple[typing.List[typing.List[float]], dict]: 返回 (embedding 向量, usage_info)
"""
pass

View File

@@ -253,7 +253,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
use_funcs: list[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
) -> tuple[provider_message.Message, dict]:
self.client.api_key = use_model.provider.token_mgr.get_token()
args = {}
@@ -285,7 +285,14 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
# 处理请求结果
message = await self._make_msg(resp, remove_think)
return message
# Extract token usage from response
usage_info = {}
if hasattr(resp, 'usage') and resp.usage:
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
usage_info['total_tokens'] = resp.usage.total_tokens or 0
return message, usage_info
async def invoke_llm(
self,
@@ -295,7 +302,8 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
funcs: typing.List[resource_tool.LLMTool] = None,
extra_args: dict[str, typing.Any] = {},
remove_think: bool = False,
) -> provider_message.Message:
) -> tuple[provider_message.Message, dict]:
"""Invoke LLM and return message with usage info"""
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
for m in messages:
msg_dict = m.dict(exclude_none=True)
@@ -308,7 +316,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
req_messages.append(msg_dict)
try:
msg = await self._closure(
msg, usage_info = await self._closure(
query=query,
req_messages=req_messages,
use_model=model,
@@ -316,30 +324,38 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
extra_args=extra_args,
remove_think=remove_think,
)
return msg
return msg, usage_info
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e:
if 'context_length_exceeded' in e.message:
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
error_message = str(e.message) if hasattr(e, 'message') else str(e)
if 'context_length_exceeded' in str(e):
raise errors.RequesterError(f'上文过长,请重置会话: {error_message}')
else:
raise errors.RequesterError(f'请求参数错误: {e.message}')
raise errors.RequesterError(f'请求参数错误: {error_message}')
except openai.AuthenticationError as e:
raise errors.RequesterError(f'无效的 api-key: {e.message}')
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'无效的 api-key: {error_message}')
except openai.NotFoundError as e:
raise errors.RequesterError(f'请求路径错误: {e.message}')
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'请求路径错误: {error_message}')
except openai.RateLimitError as e:
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
except openai.APIConnectionError as e:
error_message = f'连接错误: {str(e)}'
raise errors.RequesterError(error_message)
except openai.APIError as e:
raise errors.RequesterError(f'请求错误: {e.message}')
error_message = str(e.message) if hasattr(e, 'message') else str(e)
raise errors.RequesterError(f'请求错误: {error_message}')
async def invoke_embedding(
self,
model: requester.RuntimeEmbeddingModel,
input_text: list[str],
extra_args: dict[str, typing.Any] = {},
) -> list[list[float]]:
"""调用 Embedding API"""
) -> tuple[list[list[float]], dict]:
"""调用 Embedding API, returns (embeddings, usage_info)"""
self.client.api_key = model.provider.token_mgr.get_token()
args = {
@@ -355,7 +371,13 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
try:
resp = await self.client.embeddings.create(**args)
return [d.embedding for d in resp.data]
# Extract usage info
usage_info = {}
if hasattr(resp, 'usage') and resp.usage:
usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
usage_info['total_tokens'] = resp.usage.total_tokens or 0
return [d.embedding for d in resp.data], usage_info
except asyncio.TimeoutError:
raise errors.RequesterError('请求超时')
except openai.BadRequestError as e: