Se corrigio el problema que mantenia el archivo entre sesiones implementando la session_state de streamlit y un mejor manejo de la cache
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -2,7 +2,6 @@ from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import Chroma
|
||||
from langchain_community.llms import OpenAI
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from langchain.memory.buffer import ConversationBufferMemory
|
||||
import os
|
||||
import streamlit as st
|
||||
@@ -10,6 +9,7 @@ from dotenv import load_dotenv
|
||||
from langchain.chains import RetrievalQAWithSourcesChain, ConversationalRetrievalChain
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from streamlit.runtime.state import session_state
|
||||
|
||||
|
||||
class LangChainTools:
|
||||
@@ -55,7 +55,7 @@ class LangChainTools:
|
||||
|
||||
return self.embedding_model
|
||||
|
||||
@st.cache_resource
|
||||
# @st.cache_resource
|
||||
def create_vector_strore(
|
||||
_self, _docs_split: list, _file_name: str, _embedding_model
|
||||
):
|
||||
@@ -66,21 +66,33 @@ class LangChainTools:
|
||||
_file_name (str): Nombre del documento
|
||||
"""
|
||||
|
||||
db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
|
||||
# db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
|
||||
st.success(_file_name)
|
||||
|
||||
if "db_name" not in st.session_state.keys():
|
||||
st.session_state.db_name = (
|
||||
_file_name.replace(".pdf", "").replace(" ", "_").lower()
|
||||
)
|
||||
|
||||
# Cargamos el modelo de embeddings
|
||||
# _embedding_model = self._embedding_model
|
||||
|
||||
# Verificamos si existe la vector strore
|
||||
persist_directory = f"embeddings/{db_name}"
|
||||
# persist_directory = f"embeddings/{db_name}"
|
||||
|
||||
if os.path.exists(persist_directory):
|
||||
if "persist_director" not in st.session_state.keys():
|
||||
st.session_state.persist_directory = (
|
||||
f"embeddings/{st.session_state.db_name}"
|
||||
)
|
||||
|
||||
if os.path.exists(st.session_state.persist_directory):
|
||||
vectordb = Chroma(
|
||||
persist_directory=persist_directory, embedding_function=_embedding_model
|
||||
persist_directory=st.session_state.persist_directory,
|
||||
embedding_function=_embedding_model,
|
||||
)
|
||||
else:
|
||||
vectordb = Chroma.from_documents(
|
||||
persist_directory=persist_directory,
|
||||
persist_directory=st.session_state.persist_directory,
|
||||
documents=_docs_split,
|
||||
embedding=_embedding_model,
|
||||
)
|
||||
@@ -110,7 +122,8 @@ class LangChainTools:
|
||||
|
||||
return llm_openai
|
||||
|
||||
def load_llm_open_source(self):
|
||||
@st.cache_resource
|
||||
def load_llm_open_source(_self):
|
||||
"""Esta funcion carga un modelo de LLM OpenSource desde HuggingFace
|
||||
|
||||
Returns:
|
||||
@@ -137,24 +150,6 @@ class LangChainTools:
|
||||
|
||||
return llm
|
||||
|
||||
def load_prompt_template(self):
|
||||
"""Esta funcion construye un prompt template de lanfchain.
|
||||
|
||||
Returns:
|
||||
_type_: Retorno a un prompt template de LangChain.
|
||||
"""
|
||||
template = """Responde en español la siguiente pregunta utilizando los documentos proporcionados y citando las fuentes relevantes entre corchetes []:
|
||||
|
||||
Pregunta: {question}
|
||||
|
||||
Respuesta:"""
|
||||
|
||||
prompt_template = PromptTemplate(
|
||||
template=template, input_variables=["question"]
|
||||
)
|
||||
|
||||
return prompt_template
|
||||
|
||||
def define_retrieval_qa(self, _llm, _vectordb, _file_name, _embedding_model):
|
||||
"""Esta función integra un LLM y una base de datos vectorial en una
|
||||
chain de LangChain para hacer requerimientos. Este modelo no integra memoria.
|
||||
@@ -193,10 +188,8 @@ class LangChainTools:
|
||||
|
||||
return qa
|
||||
|
||||
@st.cache_resource
|
||||
def define_retrieval_qa_memory(
|
||||
_self, _llm, _vectordb, _file_name, _embedding_model
|
||||
):
|
||||
# @st.cache_resource
|
||||
def define_retrieval_qa_memory(_self, _llm, _file_name, _embedding_model):
|
||||
"""Esta función integra un LLM y una base de datos vectorial en una
|
||||
chain de LangChain para hacer requerimientos. Este modelo integra memoria.
|
||||
|
||||
@@ -213,13 +206,14 @@ class LangChainTools:
|
||||
y la BDV.
|
||||
"""
|
||||
|
||||
db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
|
||||
# db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
|
||||
|
||||
# Verificamos si existe la vector strore
|
||||
persist_directory = f"embeddings/{db_name}"
|
||||
# persist_directory = f"embeddings/{db_name}"
|
||||
|
||||
_vectordb = Chroma(
|
||||
persist_directory=persist_directory, embedding_function=_embedding_model
|
||||
persist_directory=st.session_state.persist_directory,
|
||||
embedding_function=_embedding_model,
|
||||
)
|
||||
|
||||
# Configura la memoria
|
||||
|
||||
@@ -6,11 +6,11 @@ import streamlit as st
|
||||
|
||||
class PdfLangChain:
|
||||
"""Clase para menejar documentos pdf con LangChain.
|
||||
.
|
||||
.
|
||||
|
||||
Attributes:
|
||||
file_name (str): Nombre del archivo PDF.
|
||||
file_path (str): Ruta del archivo PDF.
|
||||
Attributes:
|
||||
file_name (str): Nombre del archivo PDF.
|
||||
file_path (str): Ruta del archivo PDF.
|
||||
"""
|
||||
|
||||
def __init__(self, file_name: str) -> None:
|
||||
@@ -22,7 +22,7 @@ class PdfLangChain:
|
||||
"""
|
||||
|
||||
self.file_name = file_name
|
||||
self.file_path = os.path.join('documents', 'pdfs', self.file_name)
|
||||
self.file_path = os.path.join("documents", "pdfs", self.file_name)
|
||||
|
||||
# Verificar si el directorio exist, sino, crearlo
|
||||
if not os.path.exists(self.file_path):
|
||||
@@ -38,6 +38,8 @@ class PdfLangChain:
|
||||
|
||||
loader = PyPDFLoader(_self.file_path)
|
||||
_self.docs = loader.load()
|
||||
|
||||
st.success(f"Se carga el pdf : {_self.file_path}")
|
||||
return _self.docs
|
||||
|
||||
def split_docs(self, data: list) -> list:
|
||||
@@ -54,8 +56,10 @@ class PdfLangChain:
|
||||
chunk_overlap = 300
|
||||
|
||||
splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=chunk_size,
|
||||
chunk_overlap=chunk_overlap)
|
||||
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
||||
)
|
||||
self.docs_split = splitter.split_documents(data)
|
||||
|
||||
st.success(f"{self.file_path[3][:200]}")
|
||||
|
||||
return self.docs_split
|
||||
|
||||
Reference in New Issue
Block a user