diff --git a/pkg/rag/knowledge/mgr.py b/pkg/rag/knowledge/mgr.py index 6ebc85a7..6c94d643 100644 --- a/pkg/rag/knowledge/mgr.py +++ b/pkg/rag/knowledge/mgr.py @@ -21,8 +21,12 @@ class RAGManager: self.parser = FileParser() self.chunker = Chunker() # Initialize Embedder with targeted model type and name - self.embedder = Embedder(model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager) - self.retriever = Retriever(model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager) + self.embedder = Embedder( + model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager + ) + self.retriever = Retriever( + model_type='third_party_api', model_name_key='bge-m3', chroma_manager=self.chroma_manager + ) async def initialize_rag_system(self): """Initializes the RAG system by creating database tables.""" @@ -143,9 +147,7 @@ class RAGManager: self.ap.logger.error(f'Error retrieving all files: {str(e)}', exc_info=True) return [] - async def store_data( - self, file_path: str, kb_id: str, file_type: str, file_id: str = None - ): + async def store_data(self, file_path: str, kb_id: str, file_type: str, file_id: str = None): """ Parses, chunks, embeds, and stores data from a given file into the RAG system. Associates the file with a knowledge base using kb_id in the File table. @@ -165,9 +167,7 @@ class RAGManager: file_name = os.path.basename(file_path) text = await self.parser.parse(file_path) if not text: - self.ap.logger.warning( - f'No text extracted from file {file_path}. ' - ) + self.ap.logger.warning(f'No text extracted from file {file_path}. ') return chunks_texts = await self.chunker.chunk(text) @@ -222,8 +222,9 @@ class RAGManager: try: await self.ap.storage_mgr.storage_provider.delete(file_id) except Exception as e: - self.ap.logger.error(f'Error deleting file from storage for file_id {file_id}: {str(e)}', exc_info=True) - await self.ap.storage_mgr.storage_provider.delete(file_id) + self.ap.logger.error( + f'Error deleting file from storage for file_id {file_id}: {str(e)}', exc_info=True + ) self.ap.logger.info(f'Deleted file record for file_id: {file_id}') else: self.ap.logger.warning( @@ -326,7 +327,14 @@ class RAGManager: return self.ap.logger.info(f'File with ID {file_id} exists, proceeding with association.') # add new file record - file_to_update = File(id=file_id, kb_id=kb.id, file_name=file_id, path=os.path.join('data', 'storage', file_id), file_type=os.path.splitext(file_id)[1].lstrip('.'), status=0) + file_to_update = File( + id=file_id, + kb_id=kb.id, + file_name=file_id, + path=os.path.join('data', 'storage', file_id), + file_type=os.path.splitext(file_id)[1].lstrip('.'), + status=0, + ) session.add(file_to_update) session.commit() self.ap.logger.info( @@ -342,12 +350,12 @@ class RAGManager: # 进行文件解析 try: await self.store_data( - file_path = os.path.join('data', 'storage', file_id), + file_path=os.path.join('data', 'storage', file_id), kb_id=knowledge_base_uuid, file_type=os.path.splitext(file_id)[1].lstrip('.'), - file_id=file_id + file_id=file_id, ) - except Exception as store_e: + except Exception: # 如果存储数据时出错,更新文件状态为失败 file_obj = session.query(File).filter_by(id=file_id).first() if file_obj: