Feat/monitor (#1928)

* feat: add monitor * feat: fix tab * feat: work * feat: not reliable monitor * feat: enhance monitoring page layout with integrated filters and refresh button * feat: add support for runner recording * feat: add jump button & alignment * feat: new * fix: not show query variables in local agent * fix: pnpm lint and python ruff check * fix: ruff fromat * chore: remove unnecessary migration * style: optimize monitoring page layout and fix sticky filter issues - Enhanced metric cards with gradient backgrounds and hover effects - Increased traffic chart height from 200px to 300px - Adjusted grid layout and spacing for better visual appeal - Fixed sticky filter area to properly cover parent padding without transparent gaps - Used negative margins and positioning to eliminate scrolling artifacts - Matched padding/margins with other pages (pipelines, bots) for consistency - Removed duplicate title/subtitle from page content - Added cursor-pointer styling to tab triggers - Removed border between tab list and tab content Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * fix: apply prettier formatting to monitoring components - Fixed indentation and spacing in MetricCard.tsx - Fixed formatting in TrafficChart.tsx - Applied prettier formatting to page.tsx Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * feat: update HomeSidebar to trigger action on child selection and localize monitoring titles * refactor: streamline LLM and embedding invocation methods * feat: add embedding model monitor * fix: database version * chore: simplify pnpm-lock.yaml formatting --------- Co-authored-by: Junyan Qin <rockchinq@gmail.com> Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-06-12 16:56:02 +00:00 · 2026-01-26 21:08:23 +08:00
parent b73847f1a6
commit 5d9f6ec763
37 changed files with 6706 additions and 3182 deletions
--- a/src/langbot/pkg/provider/modelmgr/requester.py
+++ b/src/langbot/pkg/provider/modelmgr/requester.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 import abc
 import typing
+import time

 from ...core import app
 from ...entity.persistence import model as persistence_model
@@ -33,6 +34,219 @@ class RuntimeProvider:
        self.token_mgr = token_mgr
        self.requester = requester

+    async def invoke_llm(
+        self,
+        query: pipeline_query.Query,
+        model: RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.Message:
+        """Bridge method for invoking LLM with monitoring"""
+        # Start timing for monitoring
+        start_time = time.time()
+        input_tokens = 0
+        output_tokens = 0
+        status = 'success'
+        error_message = None
+
+        try:
+            # Call the underlying requester
+            result = await self.requester.invoke_llm(
+                query=query,
+                model=model,
+                messages=messages,
+                funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
+            )
+
+            # Try to extract token usage if the requester returns it
+            # For requesters that return tuple (message, usage_info)
+            if isinstance(result, tuple):
+                msg, usage_info = result
+                if usage_info:
+                    input_tokens = usage_info.get('input_tokens', 0)
+                    output_tokens = usage_info.get('output_tokens', 0)
+                return msg
+            else:
+                return result
+
+        except Exception as e:
+            status = 'error'
+            error_message = str(e)
+            raise
+        finally:
+            # Record LLM call monitoring data (only if query is provided)
+            if query is not None:
+                duration_ms = int((time.time() - start_time) * 1000)
+
+                # Import monitoring helper
+                try:
+                    from ...pipeline import monitoring_helper
+
+                    # Get monitoring metadata from query variables
+                    if query.variables:
+                        bot_name = query.variables.get('_monitoring_bot_name', 'Unknown')
+                        pipeline_name = query.variables.get('_monitoring_pipeline_name', 'Unknown')
+                        message_id = query.variables.get('_monitoring_message_id')
+                    else:
+                        bot_name = 'Unknown'
+                        pipeline_name = 'Unknown'
+                        message_id = None
+
+                    await monitoring_helper.MonitoringHelper.record_llm_call(
+                        ap=self.requester.ap,
+                        query=query,
+                        bot_id=query.bot_uuid or 'unknown',
+                        bot_name=bot_name,
+                        pipeline_id=query.pipeline_uuid or 'unknown',
+                        pipeline_name=pipeline_name,
+                        model_name=model.model_entity.name,
+                        input_tokens=input_tokens,
+                        output_tokens=output_tokens,
+                        duration_ms=duration_ms,
+                        status=status,
+                        error_message=error_message,
+                        message_id=message_id,
+                    )
+                except Exception as monitor_err:
+                    self.requester.ap.logger.error(f'[Monitoring] Failed to record LLM call: {monitor_err}')
+
+    async def invoke_llm_stream(
+        self,
+        query: pipeline_query.Query,
+        model: RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+        remove_think: bool = False,
+    ) -> provider_message.MessageChunk:
+        """Bridge method for invoking LLM stream with monitoring"""
+        # Start timing for monitoring
+        start_time = time.time()
+        status = 'success'
+        error_message = None
+        # Note: Stream doesn't easily provide token counts, set to 0
+        input_tokens = 0
+        output_tokens = 0
+
+        try:
+            # Stream the response
+            async for chunk in self.requester.invoke_llm_stream(
+                query=query,
+                model=model,
+                messages=messages,
+                funcs=funcs,
+                extra_args=extra_args,
+                remove_think=remove_think,
+            ):
+                yield chunk
+        except Exception as e:
+            status = 'error'
+            error_message = str(e)
+            raise
+        finally:
+            # Record LLM call monitoring data (only if query is provided)
+            if query is not None:
+                duration_ms = int((time.time() - start_time) * 1000)
+
+                # Import monitoring helper
+                try:
+                    from ...pipeline import monitoring_helper
+
+                    # Get monitoring metadata from query variables
+                    if query.variables:
+                        bot_name = query.variables.get('_monitoring_bot_name', 'Unknown')
+                        pipeline_name = query.variables.get('_monitoring_pipeline_name', 'Unknown')
+                        message_id = query.variables.get('_monitoring_message_id')
+                    else:
+                        bot_name = 'Unknown'
+                        pipeline_name = 'Unknown'
+                        message_id = None
+
+                    await monitoring_helper.MonitoringHelper.record_llm_call(
+                        ap=self.requester.ap,
+                        query=query,
+                        bot_id=query.bot_uuid or 'unknown',
+                        bot_name=bot_name,
+                        pipeline_id=query.pipeline_uuid or 'unknown',
+                        pipeline_name=pipeline_name,
+                        model_name=model.model_entity.name,
+                        input_tokens=input_tokens,
+                        output_tokens=output_tokens,
+                        duration_ms=duration_ms,
+                        status=status,
+                        error_message=error_message,
+                        message_id=message_id,
+                    )
+                except Exception as monitor_err:
+                    self.requester.ap.logger.error(f'[Monitoring] Failed to record LLM stream call: {monitor_err}')
+
+    async def invoke_embedding(
+        self,
+        model: RuntimeEmbeddingModel,
+        input_text: typing.List[str],
+        extra_args: dict[str, typing.Any] = {},
+        knowledge_base_id: str | None = None,
+        query_text: str | None = None,
+        session_id: str | None = None,
+        message_id: str | None = None,
+        call_type: str | None = None,
+    ) -> typing.List[typing.List[float]]:
+        """Bridge method for invoking embedding with monitoring"""
+        # Start timing for monitoring
+        start_time = time.time()
+        prompt_tokens = 0
+        total_tokens = 0
+        status = 'success'
+        error_message = None
+
+        try:
+            # Call the underlying requester
+            result = await self.requester.invoke_embedding(
+                model=model,
+                input_text=input_text,
+                extra_args=extra_args,
+            )
+
+            # Handle both old format (list only) and new format (tuple with usage)
+            if isinstance(result, tuple):
+                embeddings, usage_info = result
+                if usage_info:
+                    prompt_tokens = usage_info.get('prompt_tokens', 0)
+                    total_tokens = usage_info.get('total_tokens', 0)
+                return embeddings
+            else:
+                return result
+
+        except Exception as e:
+            status = 'error'
+            error_message = str(e)
+            raise
+        finally:
+            # Record embedding call monitoring data
+            duration_ms = int((time.time() - start_time) * 1000)
+
+            try:
+                await self.requester.ap.monitoring_service.record_embedding_call(
+                    model_name=model.model_entity.name,
+                    prompt_tokens=prompt_tokens,
+                    total_tokens=total_tokens,
+                    duration=duration_ms,
+                    input_count=len(input_text),
+                    status=status,
+                    error_message=error_message,
+                    knowledge_base_id=knowledge_base_id,
+                    query_text=query_text,
+                    session_id=session_id,
+                    message_id=message_id,
+                    call_type=call_type,
+                )
+            except Exception as monitor_err:
+                self.requester.ap.logger.error(f'[Monitoring] Failed to record embedding call: {monitor_err}')
+

 class RuntimeLLMModel:
    """运行时模型"""
@@ -141,7 +355,7 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
        model: RuntimeEmbeddingModel,
        input_text: typing.List[str],
        extra_args: dict[str, typing.Any] = {},
-    ) -> typing.List[typing.List[float]]:
+    ) -> typing.Union[typing.List[typing.List[float]], tuple[typing.List[typing.List[float]], dict]]:
        """调用 Embedding API

        Args:
@@ -151,5 +365,6 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):

        Returns:
            typing.List[typing.List[float]]: 返回的 embedding 向量
+            或者 tuple[typing.List[typing.List[float]], dict]: 返回 (embedding 向量, usage_info)
        """
        pass
--- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
@@ -253,7 +253,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
        use_funcs: list[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
-    ) -> provider_message.Message:
+    ) -> tuple[provider_message.Message, dict]:
        self.client.api_key = use_model.provider.token_mgr.get_token()

        args = {}
@@ -285,7 +285,14 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
        # 处理请求结果
        message = await self._make_msg(resp, remove_think)

-        return message
+        # Extract token usage from response
+        usage_info = {}
+        if hasattr(resp, 'usage') and resp.usage:
+            usage_info['input_tokens'] = resp.usage.prompt_tokens or 0
+            usage_info['output_tokens'] = resp.usage.completion_tokens or 0
+            usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+        return message, usage_info

    async def invoke_llm(
        self,
@@ -295,7 +302,8 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
        funcs: typing.List[resource_tool.LLMTool] = None,
        extra_args: dict[str, typing.Any] = {},
        remove_think: bool = False,
-    ) -> provider_message.Message:
+    ) -> tuple[provider_message.Message, dict]:
+        """Invoke LLM and return message with usage info"""
        req_messages = []  # req_messages 仅用于类内，外部同步由 query.messages 进行
        for m in messages:
            msg_dict = m.dict(exclude_none=True)
@@ -308,7 +316,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
            req_messages.append(msg_dict)

        try:
-            msg = await self._closure(
+            msg, usage_info = await self._closure(
                query=query,
                req_messages=req_messages,
                use_model=model,
@@ -316,30 +324,38 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
                extra_args=extra_args,
                remove_think=remove_think,
            )
-            return msg
+            return msg, usage_info
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
        except openai.BadRequestError as e:
-            if 'context_length_exceeded' in e.message:
-                raise errors.RequesterError(f'上文过长，请重置会话: {e.message}')
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            if 'context_length_exceeded' in str(e):
+                raise errors.RequesterError(f'上文过长，请重置会话: {error_message}')
            else:
-                raise errors.RequesterError(f'请求参数错误: {e.message}')
+                raise errors.RequesterError(f'请求参数错误: {error_message}')
        except openai.AuthenticationError as e:
-            raise errors.RequesterError(f'无效的 api-key: {e.message}')
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            raise errors.RequesterError(f'无效的 api-key: {error_message}')
        except openai.NotFoundError as e:
-            raise errors.RequesterError(f'请求路径错误: {e.message}')
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            raise errors.RequesterError(f'请求路径错误: {error_message}')
        except openai.RateLimitError as e:
-            raise errors.RequesterError(f'请求过于频繁或余额不足: {e.message}')
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            raise errors.RequesterError(f'请求过于频繁或余额不足: {error_message}')
+        except openai.APIConnectionError as e:
+            error_message = f'连接错误: {str(e)}'
+            raise errors.RequesterError(error_message)
        except openai.APIError as e:
-            raise errors.RequesterError(f'请求错误: {e.message}')
+            error_message = str(e.message) if hasattr(e, 'message') else str(e)
+            raise errors.RequesterError(f'请求错误: {error_message}')

    async def invoke_embedding(
        self,
        model: requester.RuntimeEmbeddingModel,
        input_text: list[str],
        extra_args: dict[str, typing.Any] = {},
-    ) -> list[list[float]]:
-        """调用 Embedding API"""
+    ) -> tuple[list[list[float]], dict]:
+        """调用 Embedding API, returns (embeddings, usage_info)"""
        self.client.api_key = model.provider.token_mgr.get_token()

        args = {
@@ -355,7 +371,13 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
        try:
            resp = await self.client.embeddings.create(**args)

-            return [d.embedding for d in resp.data]
+            # Extract usage info
+            usage_info = {}
+            if hasattr(resp, 'usage') and resp.usage:
+                usage_info['prompt_tokens'] = resp.usage.prompt_tokens or 0
+                usage_info['total_tokens'] = resp.usage.total_tokens or 0
+
+            return [d.embedding for d in resp.data], usage_info
        except asyncio.TimeoutError:
            raise errors.RequesterError('请求超时')
        except openai.BadRequestError as e: