from langchain_community.document_loaders.pdf import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter import os # def load_split_docs(file_name: str) -> list: # file_path: str = os.path.join("documents", "pdfs", file_name) # loader = PyPDFLoader(file_path) # docs: list = loader.load() # chunk_size: int = 2000 # chunk_overlap: int = 300 # # splitter = RecursiveCharacterTextSplitter( # chunk_size=chunk_size, chunk_overlap=chunk_overlap # ) # docs_split: list = splitter.split_documents(docs) # # return docs_split def load_split_docs(file_name: str) -> list: # Obtener el directorio base del proyecto base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Construir la ruta absoluta al PDF file_path = os.path.join(base_dir, "documents", "pdfs", file_name) # Verificar si el archivo existe if not os.path.exists(file_path): print(f"Archivo no encontrado en: {file_path}") raise FileNotFoundError(f"No se encontrĂ³ el archivo en: {file_path}") loader = PyPDFLoader(file_path) docs: list = loader.load() chunk_size: int = 2000 chunk_overlap: int = 300 splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap ) docs_split: list = splitter.split_documents(docs) return docs_split