fix: prevent memory overflow from excessive logging in streaming and query processing (#1879)

* Initial plan * fix: reduce excessive logging to prevent memory overflow - Add log file rotation (10MB max per file, 5 backups) - Reduce streaming response logging (every 10th chunk instead of every chunk) - Remove debug logging from controller tight loop - Add summary logging after streaming completes Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * refactor: address code review feedback - Extract log rotation config to module-level constants - Keep first streaming chunk at INFO level for connection debugging - Use DEBUG level for subsequent chunks Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> * style: fix code formatting whitespace Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: RockChinQ <45992437+RockChinQ@users.noreply.github.com>
2026-06-12 08:46:02 +00:00 · 2025-12-22 18:25:24 +08:00
parent 88ef9cd6ae
commit 90a22d894d
3 changed files with 37 additions and 5 deletions
--- a/src/langbot/pkg/pipeline/process/handlers/chat.py
+++ b/src/langbot/pkg/pipeline/process/handlers/chat.py
@@ -79,6 +79,7 @@ class ChatMessageHandler(handler.MessageHandler):
                    raise ValueError(f'Request Runner not found: {query.pipeline_config["ai"]["runner"]["runner"]}')
                if is_stream:
                    resp_message_id = uuid.uuid4()
+                    chunk_count = 0  # Track streaming chunks to reduce excessive logging

                    async for result in runner.run(query):
                        result.resp_message_id = str(resp_message_id)
@@ -91,15 +92,30 @@ class ChatMessageHandler(handler.MessageHandler):
                            await query.adapter.create_message_card(str(resp_message_id), query.message_event)
                            is_create_card = True
                        query.resp_messages.append(result)
-                        self.ap.logger.info(
-                            f'Conversation({query.query_id}) Streaming Response: {self.cut_str(result.readable_str())}'
-                        )
+
+                        chunk_count += 1
+                        # Only log every 10th chunk to reduce excessive logging during streaming
+                        # This prevents memory overflow from thousands of log entries per conversation
+                        # First chunk uses INFO level to confirm connection establishment
+                        if chunk_count == 1:
+                            self.ap.logger.info(
+                                f'Conversation({query.query_id}) Streaming started: {self.cut_str(result.readable_str())}'
+                            )
+                        elif chunk_count % 10 == 0:
+                            self.ap.logger.debug(
+                                f'Conversation({query.query_id}) Streaming chunk {chunk_count}: {self.cut_str(result.readable_str())}'
+                            )

                        if result.content is not None:
                            text_length += len(result.content)

                        yield entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query)

+                    # Log final summary after streaming completes
+                    self.ap.logger.info(
+                        f'Conversation({query.query_id}) Streaming completed: {chunk_count} chunks, {text_length} chars'
+                    )
+
                else:
                    async for result in runner.run(query):
                        query.resp_messages.append(result)