From 254feb6a3a7238791643f5e67eefed2cdb923528 Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Thu, 30 Oct 2025 12:37:09 +0800 Subject: [PATCH 1/2] fix: langchain error --- pkg/rag/knowledge/services/chunker.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/pkg/rag/knowledge/services/chunker.py b/pkg/rag/knowledge/services/chunker.py index f169d5f1..19b1f296 100644 --- a/pkg/rag/knowledge/services/chunker.py +++ b/pkg/rag/knowledge/services/chunker.py @@ -4,6 +4,7 @@ import json from typing import List from pkg.rag.knowledge.services import base_service from pkg.core import app +from langchain_text_splitters import RecursiveCharacterTextSplitter class Chunker(base_service.BaseService): @@ -27,21 +28,6 @@ class Chunker(base_service.BaseService): """ if not text: return [] - # words = text.split() - # chunks = [] - # current_chunk = [] - - # for word in words: - # current_chunk.append(word) - # if len(current_chunk) > self.chunk_size: - # chunks.append(" ".join(current_chunk[:self.chunk_size])) - # current_chunk = current_chunk[self.chunk_size - self.chunk_overlap:] - - # if current_chunk: - # chunks.append(" ".join(current_chunk)) - - # A more robust chunking strategy (e.g., using recursive character text splitter) - from langchain.text_splitter import RecursiveCharacterTextSplitter text_splitter = RecursiveCharacterTextSplitter( chunk_size=self.chunk_size, From f576f990de3ed4636118b4c5c6e58a76de7ef7df Mon Sep 17 00:00:00 2001 From: WangCham <651122857@qq.com> Date: Thu, 30 Oct 2025 12:52:11 +0800 Subject: [PATCH 2/2] fix: add langchain test splitter module --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index c0200bd0..1384b22c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ dependencies = [ "ebooklib>=0.18", "html2text>=2024.2.26", "langchain>=0.2.0", + "langchain-text-splitters>=0.0.1", "chromadb>=0.4.24", "qdrant-client (>=1.15.1,<2.0.0)", "langbot-plugin==0.1.4",