Fix/storage retention cleanup (#2159)

* fix: add storage retention cleanup

* fix: prune completed tasks on completion

* fix: complete storage analysis i18n
This commit is contained in:
Junyan Chin
2026-05-02 17:09:31 +08:00
committed by GitHub
parent 8db55267d8
commit 0154ea6cd3
19 changed files with 1084 additions and 45 deletions

View File

@@ -31,6 +31,7 @@ from ..api.http.service import mcp as mcp_service
from ..api.http.service import apikey as apikey_service
from ..api.http.service import webhook as webhook_service
from ..api.http.service import monitoring as monitoring_service
from ..api.http.service import maintenance as maintenance_service
from ..discover import engine as discover_engine
from ..storage import mgr as storagemgr
@@ -155,6 +156,8 @@ class Application:
monitoring_service: monitoring_service.MonitoringService = None
maintenance_service: maintenance_service.MaintenanceService = None
def __init__(self):
pass
@@ -194,14 +197,30 @@ class Application:
monitoring_cfg = self.instance_config.data.get('monitoring', {})
auto_cleanup_cfg = monitoring_cfg.get('auto_cleanup', {})
if auto_cleanup_cfg.get('enabled', True):
retention_days = auto_cleanup_cfg.get('retention_days', 30)
check_interval_hours = auto_cleanup_cfg.get('check_interval_hours', 1)
retention_days = self._get_positive_int_config(
auto_cleanup_cfg.get('retention_days', 30),
default=30,
name='monitoring.auto_cleanup.retention_days',
)
delete_batch_size = self._get_positive_int_config(
auto_cleanup_cfg.get('delete_batch_size', 1000),
default=1000,
name='monitoring.auto_cleanup.delete_batch_size',
)
check_interval_hours = self._get_positive_float_config(
auto_cleanup_cfg.get('check_interval_hours', 1),
default=1,
name='monitoring.auto_cleanup.check_interval_hours',
)
async def monitoring_cleanup_loop():
check_interval_seconds = check_interval_hours * 3600
while True:
try:
deleted = await self.monitoring_service.cleanup_expired_records(retention_days)
deleted = await self.monitoring_service.cleanup_expired_records(
retention_days,
batch_size=delete_batch_size,
)
total_deleted = sum(deleted.values())
if total_deleted > 0:
self.logger.info(
@@ -218,6 +237,33 @@ class Application:
scopes=[core_entities.LifecycleControlScope.APPLICATION],
)
# Start storage/log maintenance task if enabled
storage_cleanup_cfg = self.instance_config.data.get('storage', {}).get('cleanup', {})
if storage_cleanup_cfg.get('enabled', True) and self.maintenance_service is not None:
check_interval_hours = self._get_positive_float_config(
storage_cleanup_cfg.get('check_interval_hours', 1),
default=1,
name='storage.cleanup.check_interval_hours',
)
async def storage_cleanup_loop():
check_interval_seconds = check_interval_hours * 3600
while True:
try:
deleted = await self.maintenance_service.cleanup_expired_files()
total_deleted = sum(deleted.values())
if total_deleted > 0:
self.logger.info(f'Storage maintenance: deleted expired files: {deleted}')
except Exception as e:
self.logger.warning(f'Storage maintenance error: {e}')
await asyncio.sleep(check_interval_seconds)
self.task_mgr.create_task(
storage_cleanup_loop(),
name='storage-maintenance',
scopes=[core_entities.LifecycleControlScope.APPLICATION],
)
self.task_mgr.create_task(
never_ending(),
name='never-ending-task',
@@ -232,6 +278,28 @@ class Application:
self.logger.error(f'Application runtime fatal exception: {e}')
self.logger.debug(f'Traceback: {traceback.format_exc()}')
def _get_positive_int_config(self, value, default: int, name: str) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
self.logger.warning(f'Invalid {name}: {value!r}, using {default}')
return default
if parsed < 1:
self.logger.warning(f'Invalid {name}: {value!r}, using {default}')
return default
return parsed
def _get_positive_float_config(self, value, default: float, name: str) -> float:
try:
parsed = float(value)
except (TypeError, ValueError):
self.logger.warning(f'Invalid {name}: {value!r}, using {default}')
return default
if parsed <= 0:
self.logger.warning(f'Invalid {name}: {value!r}, using {default}')
return default
return parsed
def dispose(self):
self.plugin_connector.dispose()

View File

@@ -28,6 +28,7 @@ from ...api.http.service import mcp as mcp_service
from ...api.http.service import apikey as apikey_service
from ...api.http.service import webhook as webhook_service
from ...api.http.service import monitoring as monitoring_service
from ...api.http.service import maintenance as maintenance_service
from ...discover import engine as discover_engine
from ...storage import mgr as storagemgr
from ...utils import logcache
@@ -167,6 +168,9 @@ class BuildAppStage(stage.BootingStage):
monitoring_service_inst = monitoring_service.MonitoringService(ap)
ap.monitoring_service = monitoring_service_inst
maintenance_service_inst = maintenance_service.MaintenanceService(ap)
ap.maintenance_service = maintenance_service_inst
async def runtime_disconnect_callback(connector: plugin_connector.PluginRuntimeConnector) -> None:
await asyncio.sleep(3)
await plugin_connector_inst.initialize()

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import asyncio
import typing
import datetime
import time
from . import app
from . import entities as core_entities
@@ -119,6 +120,7 @@ class TaskWrapper:
self.label = label if label != '' else name
self.task.set_name(name)
self.scopes = scopes
self.created_at = time.time()
def assume_exception(self):
try:
@@ -154,6 +156,7 @@ class TaskWrapper:
'name': self.name,
'label': self.label,
'scopes': [scope.value for scope in self.scopes],
'created_at': self.created_at,
'task_context': self.task_context.to_dict(),
'runtime': {
'done': self.task.done(),
@@ -193,6 +196,8 @@ class AsyncTaskManager:
) -> TaskWrapper:
wrapper = TaskWrapper(self.ap, coro, task_type, kind, name, label, context, scopes)
self.tasks.append(wrapper)
wrapper.task.add_done_callback(lambda _: self._prune_completed_tasks())
self._prune_completed_tasks()
return wrapper
def create_user_task(
@@ -226,6 +231,15 @@ class AsyncTaskManager:
'id_index': TaskWrapper._id_index,
}
def get_stats(self) -> dict:
completed = sum(1 for t in self.tasks if t.task.done())
return {
'total': len(self.tasks),
'running': len(self.tasks) - completed,
'completed': completed,
'id_index': TaskWrapper._id_index,
}
def get_task_by_id(self, id: int) -> TaskWrapper | None:
for t in self.tasks:
if t.id == id:
@@ -243,3 +257,27 @@ class AsyncTaskManager:
if not wrapper.task.done():
wrapper.task.cancel()
return
def _prune_completed_tasks(self):
completed_limit = (
self.ap.instance_config.data.get('system', {})
.get('task_retention', {})
.get(
'completed_limit',
200,
)
)
try:
completed_limit = int(completed_limit)
except (TypeError, ValueError):
completed_limit = 200
if completed_limit < 1:
completed_limit = 1
completed_tasks = [wrapper for wrapper in self.tasks if wrapper.task.done()]
overflow = len(completed_tasks) - completed_limit
if overflow <= 0:
return
remove_ids = {wrapper.id for wrapper in completed_tasks[:overflow]}
self.tasks = [wrapper for wrapper in self.tasks if wrapper.id not in remove_ids]