mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-12 16:56:02 +00:00
Feat/monitor (#1928)
* feat: add monitor * feat: fix tab * feat: work * feat: not reliable monitor * feat: enhance monitoring page layout with integrated filters and refresh button * feat: add support for runner recording * feat: add jump button & alignment * feat: new * fix: not show query variables in local agent * fix: pnpm lint and python ruff check * fix: ruff fromat * chore: remove unnecessary migration * style: optimize monitoring page layout and fix sticky filter issues - Enhanced metric cards with gradient backgrounds and hover effects - Increased traffic chart height from 200px to 300px - Adjusted grid layout and spacing for better visual appeal - Fixed sticky filter area to properly cover parent padding without transparent gaps - Used negative margins and positioning to eliminate scrolling artifacts - Matched padding/margins with other pages (pipelines, bots) for consistency - Removed duplicate title/subtitle from page content - Added cursor-pointer styling to tab triggers - Removed border between tab list and tab content Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * fix: apply prettier formatting to monitoring components - Fixed indentation and spacing in MetricCard.tsx - Fixed formatting in TrafficChart.tsx - Applied prettier formatting to page.tsx Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * feat: update HomeSidebar to trigger action on child selection and localize monitoring titles * refactor: streamline LLM and embedding invocation methods * feat: add embedding model monitor * fix: database version * chore: simplify pnpm-lock.yaml formatting --------- Co-authored-by: Junyan Qin <rockchinq@gmail.com> Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import typing
|
||||
import time
|
||||
|
||||
from ...core import app
|
||||
from ...entity.persistence import model as persistence_model
|
||||
@@ -33,6 +34,219 @@ class RuntimeProvider:
|
||||
self.token_mgr = token_mgr
|
||||
self.requester = requester
|
||||
|
||||
async def invoke_llm(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
"""Bridge method for invoking LLM with monitoring"""
|
||||
# Start timing for monitoring
|
||||
start_time = time.time()
|
||||
input_tokens = 0
|
||||
output_tokens = 0
|
||||
status = 'success'
|
||||
error_message = None
|
||||
|
||||
try:
|
||||
# Call the underlying requester
|
||||
result = await self.requester.invoke_llm(
|
||||
query=query,
|
||||
model=model,
|
||||
messages=messages,
|
||||
funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
|
||||
# Try to extract token usage if the requester returns it
|
||||
# For requesters that return tuple (message, usage_info)
|
||||
if isinstance(result, tuple):
|
||||
msg, usage_info = result
|
||||
if usage_info:
|
||||
input_tokens = usage_info.get('input_tokens', 0)
|
||||
output_tokens = usage_info.get('output_tokens', 0)
|
||||
return msg
|
||||
else:
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
status = 'error'
|
||||
error_message = str(e)
|
||||
raise
|
||||
finally:
|
||||
# Record LLM call monitoring data (only if query is provided)
|
||||
if query is not None:
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Import monitoring helper
|
||||
try:
|
||||
from ...pipeline import monitoring_helper
|
||||
|
||||
# Get monitoring metadata from query variables
|
||||
if query.variables:
|
||||
bot_name = query.variables.get('_monitoring_bot_name', 'Unknown')
|
||||
pipeline_name = query.variables.get('_monitoring_pipeline_name', 'Unknown')
|
||||
message_id = query.variables.get('_monitoring_message_id')
|
||||
else:
|
||||
bot_name = 'Unknown'
|
||||
pipeline_name = 'Unknown'
|
||||
message_id = None
|
||||
|
||||
await monitoring_helper.MonitoringHelper.record_llm_call(
|
||||
ap=self.requester.ap,
|
||||
query=query,
|
||||
bot_id=query.bot_uuid or 'unknown',
|
||||
bot_name=bot_name,
|
||||
pipeline_id=query.pipeline_uuid or 'unknown',
|
||||
pipeline_name=pipeline_name,
|
||||
model_name=model.model_entity.name,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
duration_ms=duration_ms,
|
||||
status=status,
|
||||
error_message=error_message,
|
||||
message_id=message_id,
|
||||
)
|
||||
except Exception as monitor_err:
|
||||
self.requester.ap.logger.error(f'[Monitoring] Failed to record LLM call: {monitor_err}')
|
||||
|
||||
async def invoke_llm_stream(
|
||||
self,
|
||||
query: pipeline_query.Query,
|
||||
model: RuntimeLLMModel,
|
||||
messages: typing.List[provider_message.Message],
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.MessageChunk:
|
||||
"""Bridge method for invoking LLM stream with monitoring"""
|
||||
# Start timing for monitoring
|
||||
start_time = time.time()
|
||||
status = 'success'
|
||||
error_message = None
|
||||
# Note: Stream doesn't easily provide token counts, set to 0
|
||||
input_tokens = 0
|
||||
output_tokens = 0
|
||||
|
||||
try:
|
||||
# Stream the response
|
||||
async for chunk in self.requester.invoke_llm_stream(
|
||||
query=query,
|
||||
model=model,
|
||||
messages=messages,
|
||||
funcs=funcs,
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
):
|
||||
yield chunk
|
||||
except Exception as e:
|
||||
status = 'error'
|
||||
error_message = str(e)
|
||||
raise
|
||||
finally:
|
||||
# Record LLM call monitoring data (only if query is provided)
|
||||
if query is not None:
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Import monitoring helper
|
||||
try:
|
||||
from ...pipeline import monitoring_helper
|
||||
|
||||
# Get monitoring metadata from query variables
|
||||
if query.variables:
|
||||
bot_name = query.variables.get('_monitoring_bot_name', 'Unknown')
|
||||
pipeline_name = query.variables.get('_monitoring_pipeline_name', 'Unknown')
|
||||
message_id = query.variables.get('_monitoring_message_id')
|
||||
else:
|
||||
bot_name = 'Unknown'
|
||||
pipeline_name = 'Unknown'
|
||||
message_id = None
|
||||
|
||||
await monitoring_helper.MonitoringHelper.record_llm_call(
|
||||
ap=self.requester.ap,
|
||||
query=query,
|
||||
bot_id=query.bot_uuid or 'unknown',
|
||||
bot_name=bot_name,
|
||||
pipeline_id=query.pipeline_uuid or 'unknown',
|
||||
pipeline_name=pipeline_name,
|
||||
model_name=model.model_entity.name,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
duration_ms=duration_ms,
|
||||
status=status,
|
||||
error_message=error_message,
|
||||
message_id=message_id,
|
||||
)
|
||||
except Exception as monitor_err:
|
||||
self.requester.ap.logger.error(f'[Monitoring] Failed to record LLM stream call: {monitor_err}')
|
||||
|
||||
async def invoke_embedding(
|
||||
self,
|
||||
model: RuntimeEmbeddingModel,
|
||||
input_text: typing.List[str],
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
knowledge_base_id: str | None = None,
|
||||
query_text: str | None = None,
|
||||
session_id: str | None = None,
|
||||
message_id: str | None = None,
|
||||
call_type: str | None = None,
|
||||
) -> typing.List[typing.List[float]]:
|
||||
"""Bridge method for invoking embedding with monitoring"""
|
||||
# Start timing for monitoring
|
||||
start_time = time.time()
|
||||
prompt_tokens = 0
|
||||
total_tokens = 0
|
||||
status = 'success'
|
||||
error_message = None
|
||||
|
||||
try:
|
||||
# Call the underlying requester
|
||||
result = await self.requester.invoke_embedding(
|
||||
model=model,
|
||||
input_text=input_text,
|
||||
extra_args=extra_args,
|
||||
)
|
||||
|
||||
# Handle both old format (list only) and new format (tuple with usage)
|
||||
if isinstance(result, tuple):
|
||||
embeddings, usage_info = result
|
||||
if usage_info:
|
||||
prompt_tokens = usage_info.get('prompt_tokens', 0)
|
||||
total_tokens = usage_info.get('total_tokens', 0)
|
||||
return embeddings
|
||||
else:
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
status = 'error'
|
||||
error_message = str(e)
|
||||
raise
|
||||
finally:
|
||||
# Record embedding call monitoring data
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
try:
|
||||
await self.requester.ap.monitoring_service.record_embedding_call(
|
||||
model_name=model.model_entity.name,
|
||||
prompt_tokens=prompt_tokens,
|
||||
total_tokens=total_tokens,
|
||||
duration=duration_ms,
|
||||
input_count=len(input_text),
|
||||
status=status,
|
||||
error_message=error_message,
|
||||
knowledge_base_id=knowledge_base_id,
|
||||
query_text=query_text,
|
||||
session_id=session_id,
|
||||
message_id=message_id,
|
||||
call_type=call_type,
|
||||
)
|
||||
except Exception as monitor_err:
|
||||
self.requester.ap.logger.error(f'[Monitoring] Failed to record embedding call: {monitor_err}')
|
||||
|
||||
|
||||
class RuntimeLLMModel:
|
||||
"""运行时模型"""
|
||||
@@ -141,7 +355,7 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
|
||||
model: RuntimeEmbeddingModel,
|
||||
input_text: typing.List[str],
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
) -> typing.List[typing.List[float]]:
|
||||
) -> typing.Union[typing.List[typing.List[float]], tuple[typing.List[typing.List[float]], dict]]:
|
||||
"""调用 Embedding API
|
||||
|
||||
Args:
|
||||
@@ -151,5 +365,6 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
|
||||
|
||||
Returns:
|
||||
typing.List[typing.List[float]]: 返回的 embedding 向量
|
||||
或者 tuple[typing.List[typing.List[float]], dict]: 返回 (embedding 向量, usage_info)
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -253,7 +253,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
|
||||
use_funcs: list[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
) -> tuple[provider_message.Message, dict]:
|
||||
self.client.api_key = use_model.provider.token_mgr.get_token()
|
||||
|
||||
args = {}
|
||||
@@ -285,7 +285,14 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
|
||||
# 处理请求结果
|
||||
message = await self._make_msg(resp, remove_think)
|
||||
|
||||
return message
|
||||
# Extract token usage from response
|
||||
usage_info = {}
|
||||
if hasattr(resp, 'usage') and resp.usage:
|
||||
usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
|
||||
usage_info['output_tokens'] = resp.usage.completion_tokens or 0
|
||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
||||
|
||||
return message, usage_info
|
||||
|
||||
async def invoke_llm(
|
||||
self,
|
||||
@@ -295,7 +302,8 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
|
||||
funcs: typing.List[resource_tool.LLMTool] = None,
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
remove_think: bool = False,
|
||||
) -> provider_message.Message:
|
||||
) -> tuple[provider_message.Message, dict]:
|
||||
"""Invoke LLM and return message with usage info"""
|
||||
req_messages = [] # req_messages 仅用于类内,外部同步由 query.messages 进行
|
||||
for m in messages:
|
||||
msg_dict = m.dict(exclude_none=True)
|
||||
@@ -308,7 +316,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
|
||||
req_messages.append(msg_dict)
|
||||
|
||||
try:
|
||||
msg = await self._closure(
|
||||
msg, usage_info = await self._closure(
|
||||
query=query,
|
||||
req_messages=req_messages,
|
||||
use_model=model,
|
||||
@@ -316,30 +324,38 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
|
||||
extra_args=extra_args,
|
||||
remove_think=remove_think,
|
||||
)
|
||||
return msg
|
||||
return msg, usage_info
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
if 'context_length_exceeded' in e.message:
|
||||
raise errors.RequesterError(f'上文过长,请重置会话: {e.message}')
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
if 'context_length_exceeded' in str(e):
|
||||
raise errors.RequesterError(f'上文过长,请重置会话: {error_message}')
|
||||
else:
|
||||
raise errors.RequesterError(f'请求参数错误: {e.message}')
|
||||
raise errors.RequesterError(f'请求参数错误: {error_message}')
|
||||
except openai.AuthenticationError as e:
|
||||
raise errors.RequesterError(f'无效的 api-key: {e.message}')
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
raise errors.RequesterError(f'无效的 api-key: {error_message}')
|
||||
except openai.NotFoundError as e:
|
||||
raise errors.RequesterError(f'请求路径错误: {e.message}')
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
raise errors.RequesterError(f'请求路径错误: {error_message}')
|
||||
except openai.RateLimitError as e:
|
||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
|
||||
except openai.APIConnectionError as e:
|
||||
error_message = f'连接错误: {str(e)}'
|
||||
raise errors.RequesterError(error_message)
|
||||
except openai.APIError as e:
|
||||
raise errors.RequesterError(f'请求错误: {e.message}')
|
||||
error_message = str(e.message) if hasattr(e, 'message') else str(e)
|
||||
raise errors.RequesterError(f'请求错误: {error_message}')
|
||||
|
||||
async def invoke_embedding(
|
||||
self,
|
||||
model: requester.RuntimeEmbeddingModel,
|
||||
input_text: list[str],
|
||||
extra_args: dict[str, typing.Any] = {},
|
||||
) -> list[list[float]]:
|
||||
"""调用 Embedding API"""
|
||||
) -> tuple[list[list[float]], dict]:
|
||||
"""调用 Embedding API, returns (embeddings, usage_info)"""
|
||||
self.client.api_key = model.provider.token_mgr.get_token()
|
||||
|
||||
args = {
|
||||
@@ -355,7 +371,13 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
|
||||
try:
|
||||
resp = await self.client.embeddings.create(**args)
|
||||
|
||||
return [d.embedding for d in resp.data]
|
||||
# Extract usage info
|
||||
usage_info = {}
|
||||
if hasattr(resp, 'usage') and resp.usage:
|
||||
usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
|
||||
usage_info['total_tokens'] = resp.usage.total_tokens or 0
|
||||
|
||||
return [d.embedding for d in resp.data], usage_info
|
||||
except asyncio.TimeoutError:
|
||||
raise errors.RequesterError('请求超时')
|
||||
except openai.BadRequestError as e:
|
||||
|
||||
Reference in New Issue
Block a user