Files
LangBot/src/langbot/pkg/core/app.py
T
RockChinQ dd96da895c feat(telemetry): payload v2 with feature usage counters and instance heartbeat
Per-query events now carry event_type='query' and a features JSON object:
- tool_calls by source (native/plugin/mcp/skill) via ToolManager
- tool_call_rounds, kb usage (count/engine plugins/retrieved entries) via local-agent
- sandbox execs/errors via BoxService
- activated_skills and bound mcp_servers snapshots

New instance_heartbeat event (startup + daily) reports anonymous instance
profile: deploy platform, database/vdb kind, box backend/availability,
adapter type names, and resource counts. Respects space.disable_telemetry.

All collection helpers are defensive and never break the pipeline.
Verified: ruff, 37 telemetry unit tests (13 new), 504 box/provider/pipeline tests.
2026-06-12 08:11:43 -04:00

357 lines
13 KiB
Python

from __future__ import annotations
import logging
import asyncio
import traceback
import os
from ..platform import botmgr as im_mgr
from ..platform.webhook_pusher import WebhookPusher
from ..provider.session import sessionmgr as llm_session_mgr
from ..provider.modelmgr import modelmgr as llm_model_mgr
from ..box import service as box_service_module
from langbot.pkg.provider.tools import toolmgr as llm_tool_mgr
from ..config import manager as config_mgr
from ..command import cmdmgr
from ..plugin import connector as plugin_connector
from ..pipeline import pool
from ..pipeline import controller, pipelinemgr
from ..pipeline import aggregator as message_aggregator
from ..utils import version as version_mgr, proxy as proxy_mgr
from ..persistence import mgr as persistencemgr
from ..api.http.controller import main as http_controller
from ..api.http.service import user as user_service
from ..api.http.service import space as space_service
from ..api.http.service import model as model_service
from ..api.http.service import provider as provider_service
from ..api.http.service import pipeline as pipeline_service
from ..api.http.service import bot as bot_service
from ..api.http.service import knowledge as knowledge_service
from ..api.http.service import mcp as mcp_service
from ..api.http.service import apikey as apikey_service
from ..api.http.service import webhook as webhook_service
from ..api.http.service import monitoring as monitoring_service
from ..api.http.service import skill as skill_service
from ..api.http.service import maintenance as maintenance_service
from ..discover import engine as discover_engine
from ..storage import mgr as storagemgr
from ..utils import logcache
from . import taskmgr
from . import entities as core_entities
from ..rag.knowledge import kbmgr as rag_mgr
from ..rag.service import RAGRuntimeService
from ..vector import mgr as vectordb_mgr
from ..telemetry import telemetry as telemetry_module
from ..survey import manager as survey_module
from ..skill import manager as skill_mgr
class Application:
"""Runtime application object and context"""
event_loop: asyncio.AbstractEventLoop = None
# asyncio_tasks: list[asyncio.Task] = []
task_mgr: taskmgr.AsyncTaskManager = None
discover: discover_engine.ComponentDiscoveryEngine = None
platform_mgr: im_mgr.PlatformManager = None
webhook_pusher: WebhookPusher = None
cmd_mgr: cmdmgr.CommandManager = None
sess_mgr: llm_session_mgr.SessionManager = None
model_mgr: llm_model_mgr.ModelManager = None
rag_mgr: rag_mgr.RAGManager = None
rag_runtime_service: RAGRuntimeService = None
# TODO move to pipeline
tool_mgr: llm_tool_mgr.ToolManager = None
box_service: box_service_module.BoxService = None
# ======= Config manager =======
command_cfg: config_mgr.ConfigManager = None # deprecated
pipeline_cfg: config_mgr.ConfigManager = None # deprecated
platform_cfg: config_mgr.ConfigManager = None # deprecated
provider_cfg: config_mgr.ConfigManager = None # deprecated
system_cfg: config_mgr.ConfigManager = None # deprecated
instance_config: config_mgr.ConfigManager = None
instance_id: config_mgr.ConfigManager = None # used to identify the instance
# ======= Metadata config manager =======
sensitive_meta: config_mgr.ConfigManager = None
pipeline_config_meta_trigger: config_mgr.ConfigManager = None
pipeline_config_meta_safety: config_mgr.ConfigManager = None
pipeline_config_meta_ai: config_mgr.ConfigManager = None
pipeline_config_meta_output: config_mgr.ConfigManager = None
# =========================
plugin_connector: plugin_connector.PluginRuntimeConnector = None
query_pool: pool.QueryPool = None
msg_aggregator: message_aggregator.MessageAggregator = None
ctrl: controller.Controller = None
pipeline_mgr: pipelinemgr.PipelineManager = None
ver_mgr: version_mgr.VersionManager = None
proxy_mgr: proxy_mgr.ProxyManager = None
logger: logging.Logger = None
persistence_mgr: persistencemgr.PersistenceManager = None
vector_db_mgr: vectordb_mgr.VectorDBManager = None
http_ctrl: http_controller.HTTPController = None
log_cache: logcache.LogCache = None
storage_mgr: storagemgr.StorageMgr = None
# ========= HTTP Services =========
user_service: user_service.UserService = None
space_service: space_service.SpaceService = None
llm_model_service: model_service.LLMModelsService = None
embedding_models_service: model_service.EmbeddingModelsService = None
rerank_models_service: model_service.RerankModelsService = None
provider_service: provider_service.ModelProviderService = None
pipeline_service: pipeline_service.PipelineService = None
bot_service: bot_service.BotService = None
knowledge_service: knowledge_service.KnowledgeService = None
mcp_service: mcp_service.MCPService = None
apikey_service: apikey_service.ApiKeyService = None
webhook_service: webhook_service.WebhookService = None
telemetry: telemetry_module.TelemetryManager = None
survey: survey_module.SurveyManager = None
monitoring_service: monitoring_service.MonitoringService = None
skill_service: skill_service.SkillService = None
skill_mgr: skill_mgr.SkillManager = None
maintenance_service: maintenance_service.MaintenanceService = None
def __init__(self):
pass
async def initialize(self):
pass
async def run(self):
try:
await self.plugin_connector.initialize_plugins()
# 后续可能会允许动态重启其他任务
# 故为了防止程序在非 Ctrl-C 情况下退出,这里创建一个不会结束的协程
async def never_ending():
while True:
await asyncio.sleep(1)
self.task_mgr.create_task(
self.platform_mgr.run(),
name='platform-manager',
scopes=[
core_entities.LifecycleControlScope.APPLICATION,
core_entities.LifecycleControlScope.PLATFORM,
],
)
self.task_mgr.create_task(
self.ctrl.run(),
name='query-controller',
scopes=[core_entities.LifecycleControlScope.APPLICATION],
)
self.task_mgr.create_task(
self.http_ctrl.run(),
name='http-api-controller',
scopes=[core_entities.LifecycleControlScope.APPLICATION],
)
# Telemetry instance heartbeat (startup + daily); respects
# space.disable_telemetry via TelemetryManager.send().
if self.telemetry is not None:
from ..telemetry import heartbeat as telemetry_heartbeat
self.task_mgr.create_task(
telemetry_heartbeat.heartbeat_loop(self),
name='telemetry-heartbeat',
scopes=[core_entities.LifecycleControlScope.APPLICATION],
)
# Start monitoring data cleanup task if enabled
monitoring_cfg = self.instance_config.data.get('monitoring', {})
auto_cleanup_cfg = monitoring_cfg.get('auto_cleanup', {})
if auto_cleanup_cfg.get('enabled', True):
retention_days = self._get_positive_int_config(
auto_cleanup_cfg.get('retention_days', 30),
default=30,
name='monitoring.auto_cleanup.retention_days',
)
delete_batch_size = self._get_positive_int_config(
auto_cleanup_cfg.get('delete_batch_size', 1000),
default=1000,
name='monitoring.auto_cleanup.delete_batch_size',
)
check_interval_hours = self._get_positive_float_config(
auto_cleanup_cfg.get('check_interval_hours', 1),
default=1,
name='monitoring.auto_cleanup.check_interval_hours',
)
async def monitoring_cleanup_loop():
check_interval_seconds = check_interval_hours * 3600
while True:
try:
deleted = await self.monitoring_service.cleanup_expired_records(
retention_days,
batch_size=delete_batch_size,
)
total_deleted = sum(deleted.values())
if total_deleted > 0:
self.logger.info(
f'Monitoring auto-cleanup: deleted {total_deleted} expired records '
f'(retention={retention_days}d): {deleted}'
)
except Exception as e:
self.logger.warning(f'Monitoring auto-cleanup error: {e}')
await asyncio.sleep(check_interval_seconds)
self.task_mgr.create_task(
monitoring_cleanup_loop(),
name='monitoring-cleanup',
scopes=[core_entities.LifecycleControlScope.APPLICATION],
)
# Start storage/log maintenance task if enabled
storage_cleanup_cfg = self.instance_config.data.get('storage', {}).get('cleanup', {})
if storage_cleanup_cfg.get('enabled', True) and self.maintenance_service is not None:
check_interval_hours = self._get_positive_float_config(
storage_cleanup_cfg.get('check_interval_hours', 1),
default=1,
name='storage.cleanup.check_interval_hours',
)
async def storage_cleanup_loop():
check_interval_seconds = check_interval_hours * 3600
while True:
try:
deleted = await self.maintenance_service.cleanup_expired_files()
total_deleted = sum(deleted.values())
if total_deleted > 0:
self.logger.info(f'Storage maintenance: deleted expired files: {deleted}')
except Exception as e:
self.logger.warning(f'Storage maintenance error: {e}')
await asyncio.sleep(check_interval_seconds)
self.task_mgr.create_task(
storage_cleanup_loop(),
name='storage-maintenance',
scopes=[core_entities.LifecycleControlScope.APPLICATION],
)
self.task_mgr.create_task(
never_ending(),
name='never-ending-task',
scopes=[core_entities.LifecycleControlScope.APPLICATION],
)
await self.print_web_access_info()
await self.task_mgr.wait_all()
except asyncio.CancelledError:
pass
except Exception as e:
self.logger.error(f'Application runtime fatal exception: {e}')
self.logger.debug(f'Traceback: {traceback.format_exc()}')
def _get_positive_int_config(self, value, default: int, name: str) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
self.logger.warning(f'Invalid {name}: {value!r}, using {default}')
return default
if parsed < 1:
self.logger.warning(f'Invalid {name}: {value!r}, using {default}')
return default
return parsed
def _get_positive_float_config(self, value, default: float, name: str) -> float:
try:
parsed = float(value)
except (TypeError, ValueError):
self.logger.warning(f'Invalid {name}: {value!r}, using {default}')
return default
if parsed <= 0:
self.logger.warning(f'Invalid {name}: {value!r}, using {default}')
return default
return parsed
def dispose(self):
if self.plugin_connector is not None:
self.plugin_connector.dispose()
if self.box_service is not None:
self.box_service.dispose()
async def print_web_access_info(self):
"""Print access webui tips"""
from ..utils import paths
frontend_path = paths.get_frontend_path()
if not os.path.exists(frontend_path):
self.logger.warning('WebUI 文件缺失,请根据文档部署:https://docs.langbot.app/zh')
self.logger.warning(
'WebUI files are missing, please deploy according to the documentation: https://docs.langbot.app/en'
)
return
host_ip = '127.0.0.1'
port = self.instance_config.data['api']['port']
tips = f"""
=======================================
✨ Access WebUI / 访问管理面板
🏠 Local Address: http://{host_ip}:{port}/
🌐 Public Address: http://<Your Public IP>:{port}/
📌 Running this program in a container? Please ensure that the {port} port is exposed
=======================================
""".strip()
for line in tips.split('\n'):
self.logger.info(line)