diff --git a/src/langbot/pkg/core/bootutils/log.py b/src/langbot/pkg/core/bootutils/log.py index 75ed5a92..7b78c93a 100644 --- a/src/langbot/pkg/core/bootutils/log.py +++ b/src/langbot/pkg/core/bootutils/log.py @@ -1,5 +1,6 @@ import logging import logging.handlers +import os import sys import time @@ -20,6 +21,66 @@ log_colors_config = { LOG_FILE_MAX_BYTES = 10 * 1024 * 1024 # 10MB per file LOG_FILE_BACKUP_COUNT = 5 # Keep 5 backup files (total ~50MB max) +LOG_DIR = 'data/logs' + + +class DailyGroupedRotatingFileHandler(logging.handlers.RotatingFileHandler): + """File handler that writes to ``data/logs/langbot-YYYY-MM-DD.log``. + + It combines two rotation triggers: + + * **Size** — within a single day the file is rotated once it exceeds + ``maxBytes``, producing numbered backups (``langbot-DATE.log.1`` etc.), + exactly like :class:`~logging.handlers.RotatingFileHandler`. + * **Date** — when the local date changes, logging switches to a fresh + ``langbot-.log`` file. This happens even within a single + long-running process, so a bot started on day N keeps writing to that + day's file and rolls over to day N+1's file at midnight, instead of + appending every subsequent day's logs to the start-day file. + + The on-disk naming stays compatible with the log-retention cleanup in + ``api/http/service/maintenance.py`` (``LOG_FILE_PATTERN``). + """ + + def __init__(self, log_dir: str, max_bytes: int, backup_count: int, encoding: str = 'utf-8'): + self.log_dir = log_dir + self._current_date = self._today() + super().__init__( + self._build_path(self._current_date), + maxBytes=max_bytes, + backupCount=backup_count, + encoding=encoding, + ) + + @staticmethod + def _today() -> str: + return time.strftime('%Y-%m-%d', time.localtime()) + + def _build_path(self, date_str: str) -> str: + return os.path.join(self.log_dir, 'langbot-%s.log' % date_str) + + def shouldRollover(self, record): + # Roll over when the day changes, regardless of file size. + if self._today() != self._current_date: + return True + return super().shouldRollover(record) + + def doRollover(self): + today = self._today() + if today != self._current_date: + # Date changed: point the handler at the new day's file. + # This is a date switch, not a size-based numbered rotation. + if self.stream: + self.stream.close() + self.stream = None + self._current_date = today + self.baseFilename = os.path.abspath(self._build_path(today)) + if not self.delay: + self.stream = self._open() + else: + # Same day, file exceeded maxBytes: numbered rotation. + super().doRollover() + async def init_logging(extra_handlers: list[logging.Handler] = None) -> logging.Logger: # Remove all existing loggers @@ -31,8 +92,6 @@ async def init_logging(extra_handlers: list[logging.Handler] = None) -> logging. if constants.debug_mode: level = logging.DEBUG - log_file_name = 'data/logs/langbot-%s.log' % time.strftime('%Y-%m-%d', time.localtime()) - qcg_logger = logging.getLogger('langbot') qcg_logger.setLevel(level) @@ -48,12 +107,13 @@ async def init_logging(extra_handlers: list[logging.Handler] = None) -> logging. # stream_handler.setFormatter(color_formatter) stream_handler.stream = open(sys.stdout.fileno(), mode='w', encoding='utf-8', buffering=1) - # Use RotatingFileHandler to prevent unbounded log file growth - rotating_file_handler = logging.handlers.RotatingFileHandler( - log_file_name, + # Rotate by size within a day and switch files when the date changes, + # so long-running processes still produce a log file for the current day. + rotating_file_handler = DailyGroupedRotatingFileHandler( + LOG_DIR, + max_bytes=LOG_FILE_MAX_BYTES, + backup_count=LOG_FILE_BACKUP_COUNT, encoding='utf-8', - maxBytes=LOG_FILE_MAX_BYTES, - backupCount=LOG_FILE_BACKUP_COUNT, ) log_handlers: list[logging.Handler] = [ diff --git a/tests/unit_tests/core/test_bootutils_log.py b/tests/unit_tests/core/test_bootutils_log.py new file mode 100644 index 00000000..fd01eb9c --- /dev/null +++ b/tests/unit_tests/core/test_bootutils_log.py @@ -0,0 +1,120 @@ +"""Tests for the daily-grouped rotating log file handler. + +Regression coverage for the bug where a long-running process names its log +file after the *start* day and keeps appending to it across midnight, so no +file ever appears for the current day. See +``langbot.pkg.core.bootutils.log.DailyGroupedRotatingFileHandler``. +""" + +from __future__ import annotations + +import logging +import os +import re + +import langbot.pkg.core.bootutils.log as logmod +from langbot.pkg.core.bootutils.log import DailyGroupedRotatingFileHandler + +# Mirror of the cleanup pattern in api/http/service/maintenance.py. +MAINTENANCE_LOG_FILE_PATTERN = re.compile(r'^langbot-(\d{4}-\d{2}-\d{2})\.log(?:\.\d+)?$') + + +def _listing(directory): + return sorted(os.listdir(directory)) + + +def _make_logger(handler, name): + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + logger.handlers.clear() + logger.addHandler(handler) + logger.propagate = False + return logger + + +class TestDailyGroupedRotatingFileHandler: + def _patch_date(self, monkeypatch, box): + """Make the handler read its current date from ``box['date']``.""" + + def fake_strftime(fmt, t=None): + if fmt == '%Y-%m-%d': + return box['date'] + return '00:00:00' + + monkeypatch.setattr(logmod.time, 'strftime', fake_strftime) + + def test_initial_file_named_for_current_day(self, tmp_path, monkeypatch): + box = {'date': '2026-06-08'} + self._patch_date(monkeypatch, box) + + handler = DailyGroupedRotatingFileHandler(str(tmp_path), max_bytes=10_000, backup_count=3) + logger = _make_logger(handler, 'lb_logtest_initial') + logger.info('hello') + handler.close() + + assert _listing(tmp_path) == ['langbot-2026-06-08.log'] + + def test_same_day_size_rotation_creates_numbered_backups(self, tmp_path, monkeypatch): + box = {'date': '2026-06-08'} + self._patch_date(monkeypatch, box) + + handler = DailyGroupedRotatingFileHandler(str(tmp_path), max_bytes=200, backup_count=3) + logger = _make_logger(handler, 'lb_logtest_size') + for i in range(40): + logger.info('padding line to exceed maxBytes %d', i) + handler.close() + + files = _listing(tmp_path) + assert 'langbot-2026-06-08.log' in files + assert any(f.startswith('langbot-2026-06-08.log.') for f in files) + + def test_rolls_to_new_file_when_day_changes(self, tmp_path, monkeypatch): + box = {'date': '2026-06-08'} + self._patch_date(monkeypatch, box) + + handler = DailyGroupedRotatingFileHandler(str(tmp_path), max_bytes=10_000, backup_count=3) + logger = _make_logger(handler, 'lb_logtest_midnight') + logger.info('day1 line') + + # Simulate crossing midnight within the same running process. + box['date'] = '2026-06-09' + logger.info('day2 line after midnight') + handler.close() + + files = _listing(tmp_path) + assert 'langbot-2026-06-08.log' in files + assert 'langbot-2026-06-09.log' in files + + day2 = (tmp_path / 'langbot-2026-06-09.log').read_text(encoding='utf-8') + assert 'day2 line after midnight' in day2 + assert 'day1 line' not in day2 + + def test_rollover_repeats_across_multiple_days(self, tmp_path, monkeypatch): + box = {'date': '2026-06-08'} + self._patch_date(monkeypatch, box) + + handler = DailyGroupedRotatingFileHandler(str(tmp_path), max_bytes=10_000, backup_count=3) + logger = _make_logger(handler, 'lb_logtest_multiday') + for day in ('2026-06-08', '2026-06-09', '2026-06-10'): + box['date'] = day + logger.info('line for %s', day) + handler.close() + + files = _listing(tmp_path) + for day in ('2026-06-08', '2026-06-09', '2026-06-10'): + assert f'langbot-{day}.log' in files + + def test_all_filenames_match_maintenance_cleanup_pattern(self, tmp_path, monkeypatch): + box = {'date': '2026-06-08'} + self._patch_date(monkeypatch, box) + + handler = DailyGroupedRotatingFileHandler(str(tmp_path), max_bytes=200, backup_count=3) + logger = _make_logger(handler, 'lb_logtest_pattern') + for i in range(40): + logger.info('padding line %d', i) + box['date'] = '2026-06-09' + logger.info('next day line') + handler.close() + + for name in _listing(tmp_path): + assert MAINTENANCE_LOG_FILE_PATTERN.match(name), name