feat(telemetry): payload v2 with feature usage counters and instance heartbeat

Per-query events now carry event_type='query' and a features JSON object:
- tool_calls by source (native/plugin/mcp/skill) via ToolManager
- tool_call_rounds, kb usage (count/engine plugins/retrieved entries) via local-agent
- sandbox execs/errors via BoxService
- activated_skills and bound mcp_servers snapshots

New instance_heartbeat event (startup + daily) reports anonymous instance
profile: deploy platform, database/vdb kind, box backend/availability,
adapter type names, and resource counts. Respects space.disable_telemetry.

All collection helpers are defensive and never break the pipeline.
Verified: ruff, 37 telemetry unit tests (13 new), 504 box/provider/pipeline tests.
This commit is contained in:
RockChinQ
2026-06-12 08:11:43 -04:00
parent bca710dbd4
commit dd96da895c
10 changed files with 488 additions and 0 deletions
@@ -4,6 +4,7 @@ import json
import copy
import typing
from .. import runner
from ...telemetry import features as telemetry_features
from ..modelmgr import requester as modelmgr_requester
from ..tools.loaders.native import EXEC_TOOL_NAME
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
@@ -187,6 +188,8 @@ class LocalAgentRunner(runner.RequestRunner):
# only support text for now
all_results: list[rag_context.RetrievalResultEntry] = []
kb_engine_plugins: set[str] = set()
# Retrieve from each knowledge base
for kb_uuid in kb_uuids:
kb = await self.ap.rag_mgr.get_knowledge_base_by_uuid(kb_uuid)
@@ -195,6 +198,12 @@ class LocalAgentRunner(runner.RequestRunner):
self.ap.logger.warning(f'Knowledge base {kb_uuid} not found, skipping')
continue
try:
engine_plugin_id = kb.get_knowledge_engine_plugin_id() or 'builtin'
except Exception:
engine_plugin_id = 'builtin'
kb_engine_plugins.add(engine_plugin_id)
result = await kb.retrieve(
user_message_text,
settings={
@@ -207,6 +216,17 @@ class LocalAgentRunner(runner.RequestRunner):
if result:
all_results.extend(result)
# Telemetry: knowledge base usage (counts and engine categories only)
telemetry_features.set_value(
query,
'kb',
{
'kb_count': len(kb_uuids),
'engine_plugins': sorted(kb_engine_plugins),
'retrieved_entries': len(all_results),
},
)
# Rerank step: re-score results using a rerank model if configured
local_agent_config = query.pipeline_config.get('ai', {}).get('local-agent', {})
rerank_model_uuid = local_agent_config.get('rerank-model', '')
@@ -373,6 +393,7 @@ class LocalAgentRunner(runner.RequestRunner):
tool_call_round = 0
while pending_tool_calls:
tool_call_round += 1
telemetry_features.set_value(query, 'tool_call_rounds', tool_call_round)
if tool_call_round > MAX_TOOL_CALL_ROUNDS:
self.ap.logger.warning(
f'Tool-call loop reached the {MAX_TOOL_CALL_ROUNDS}-round cap '
@@ -97,13 +97,19 @@ class ToolManager:
return tools
async def execute_func_call(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any:
from langbot.pkg.telemetry import features as telemetry_features
if await self.native_tool_loader.has_tool(name):
telemetry_features.increment(query, 'tool_calls', 'native')
return await self.native_tool_loader.invoke_tool(name, parameters, query)
if await self.plugin_tool_loader.has_tool(name):
telemetry_features.increment(query, 'tool_calls', 'plugin')
return await self.plugin_tool_loader.invoke_tool(name, parameters, query)
if await self.mcp_tool_loader.has_tool(name):
telemetry_features.increment(query, 'tool_calls', 'mcp')
return await self.mcp_tool_loader.invoke_tool(name, parameters, query)
if await self.skill_tool_loader.has_tool(name):
telemetry_features.increment(query, 'tool_calls', 'skill')
return await self.skill_tool_loader.invoke_tool(name, parameters, query)
raise ValueError(f'未找到工具: {name}')