feat: add agents/app

This commit is contained in:
2025-03-24 20:33:04 -05:00
parent cb63404b50
commit b6b94ac2b5
37 changed files with 3396 additions and 1 deletions

View File

View File

@@ -0,0 +1,10 @@
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
def load_embeddins():
load_dotenv()
# model = "text-embedding-ada-002"
model = "text-embedding-3-small"
return OpenAIEmbeddings(model=model)

17
agents/app/rag/llm.py Normal file
View File

@@ -0,0 +1,17 @@
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
def load_llm_openai():
load_dotenv()
# model = "gpt-3.5-turbo-0125"
# model = "gpt-4o"
model = "gpt-4o-mini"
llm = ChatOpenAI(
model=model,
temperature=0.1,
max_tokens=2000,
)
return llm

View File

@@ -0,0 +1,46 @@
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
def create_rag_chain(llm, retriever):
contextualize_q_system_prompt = """
Given a chat history and the latest user question \
which might reference context in the chat history,
formulate a standalone question \
which can be understood without the chat history.
Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is.
"""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
[
("system", contextualize_q_system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
history_aware_retriever = create_history_aware_retriever(
llm, retriever, contextualize_q_prompt
)
# ___________________Chain con el chat history_______________________-
qa_system_prompt = """
You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
The length of the answer should be sufficient to address
what is being asked, \
meaning don't limit yourself in length.\
{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
[
("system", qa_system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
return create_retrieval_chain(
history_aware_retriever, question_answer_chain)

View File

@@ -0,0 +1,18 @@
from langchain_chroma import Chroma
def create_retriever(embeddings, persist_directory: str):
# Cargamos la vectorstore
# vectordb = Chroma.from_documents(
# persist_directory=st.session_state.persist_directory,
# Este es el directorio del la vs del docuemnto del usuario
# que se encuentra cargado en la session_state.
# embedding_function=embeddings,
# )
vectordb = Chroma(
persist_directory=persist_directory,
embedding_function=embeddings,
)
# Creamos el retriver para que retorne los fragmentos mas relevantes.
return vectordb.as_retriever(search_kwargs={"k": 6})

View File

@@ -0,0 +1,42 @@
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
# def load_split_docs(file_name: str) -> list:
# file_path: str = os.path.join("documents", "pdfs", file_name)
# loader = PyPDFLoader(file_path)
# docs: list = loader.load()
# chunk_size: int = 2000
# chunk_overlap: int = 300
#
# splitter = RecursiveCharacterTextSplitter(
# chunk_size=chunk_size, chunk_overlap=chunk_overlap
# )
# docs_split: list = splitter.split_documents(docs)
#
# return docs_split
def load_split_docs(file_name: str) -> list:
# Obtener el directorio base del proyecto
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Construir la ruta absoluta al PDF
file_path = os.path.join(base_dir, "documents", "pdfs", file_name)
# Verificar si el archivo existe
if not os.path.exists(file_path):
print(f"Archivo no encontrado en: {file_path}")
raise FileNotFoundError(f"No se encontró el archivo en: {file_path}")
loader = PyPDFLoader(file_path)
docs: list = loader.load()
chunk_size: int = 2000
chunk_overlap: int = 300
splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
docs_split: list = splitter.split_documents(docs)
return docs_split

View File

@@ -0,0 +1,48 @@
from langchain_chroma import Chroma
import os
#
# def create_vectorstore(docs_split: list, embeddings, file_name: str):
# db_name: str = file_name.replace(".pdf", "").replace(" ", "_").lower()
# persist_directory: str = f"embeddings/{db_name}"
#
# # Crear el directorio si no existe
# os.makedirs(persist_directory, exist_ok=True)
#
# # Siempre crear/actualizar el vectorstore
# vectordb = Chroma.from_documents(
# persist_directory=persist_directory,
# documents=docs_split,
# embedding=embeddings,
# )
#
# return vectordb
def create_vectorstore(docs_split: list, embeddings, file_name: str):
# Obtener el directorio base del proyecto
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Crear el nombre de la base de datos
db_name: str = file_name.replace(".pdf", "").replace(" ", "_").lower()
# Construir la ruta absoluta para los embeddings
persist_directory: str = os.path.join(base_dir, "embeddings", db_name)
# Crear el directorio si no existe
os.makedirs(persist_directory, exist_ok=True)
# Debug log
print(f"Creando vectorstore en: {persist_directory}")
try:
# Crear/actualizar el vectorstore
vectordb = Chroma.from_documents(
persist_directory=persist_directory,
documents=docs_split,
embedding=embeddings,
)
return vectordb
except Exception as e:
print(f"Error al crear vectorstore: {e}")
raise