from src.pgdb.knowledge.similarity import VectorStore_FAISS from src.loader.load import loads_path import os import shutil def Auto_Task(src_path: str,dest_path:str, faiss_db: VectorStore_FAISS): if not os.path.exists(src_path): os.makedirs(dest_path) # 检查目标目录是否存在,如果不存在则创建 if not os.path.exists(dest_path): os.makedirs(dest_path) print("目标目录不存在,已创建。") files = os.listdir(src_path) if len(files) == 0: return docs = loads_path(src_path, mode="paged", sentence_size=512, callbacks=[]) last_doc = None docs1 = [] for doc in docs: if not last_doc: last_doc = doc continue if "font-size" not in doc.metadata or "page_number" not in doc.metadata: continue if doc.metadata["font-size"] == last_doc.metadata["font-size"] and doc.metadata["page_number"] == \ last_doc.metadata["page_number"] and len(doc.page_content) + len( last_doc.page_content) < 512 / 4 * 3: last_doc.page_content += doc.page_content else: docs1.append(last_doc) last_doc = doc if last_doc: docs1.append(last_doc) docs = docs1 for i in range(0, len(docs), 300): faiss_db._add_documents(docs[i:i + 300 if i + 300 < len(docs) else len(docs)], need_split=True) faiss_db._save_local() # 遍历文件列表 for file_name in files: # 构建文件的完整路径 source_file = os.path.join(src_path, file_name) # 检查是否为文件 if os.path.isfile(source_file): # 构建目标文件路径 destination_file = os.path.join(dest_path, file_name) try: # 将文件移动到目标目录 shutil.move(source_file, destination_file) except Exception as e: print(f"移动文件时出错: {source_file} -> {destination_file},错误信息: {e}")