Actualizar README.md

2024-04-25 16:04:56 -05:00
20 changed files with 76 additions and 9044 deletions
--- a/.streamlit/config.toml
+++ b/.streamlit/config.toml
@@ -1,5 +1,5 @@
 [theme]
-base="dark"
-backgroundColor="#3f51b5"
-secondaryBackgroundColor="#2b2b4e"
-font="monospace"
+base = "dark"
+backgroundColor = "#0d2669"
+secondaryBackgroundColor = "#050550"
+font = "monospace"
--- a/README.md
+++ b/README.md
@@ -1,2 +1,2 @@
 # chat_pdf
-El siguiente proyecto construye un chatbot que usa un LLM, Langchain y streamlit para crear una aplicacion que permite chatear con uun pdf. 
+El siguiente proyecto construye un chatbot que usa un LLM, Langchain y streamlit para crear una aplicacion que permite chatear con un pdf. 
--- a/app.py
+++ b/app.py
@@ -1,40 +1,43 @@
-# import os
-# from dotenv import load_dotenv
-# from langchain_community.chat_models import ChatOpenAI
 import streamlit as st
-from chats.streamlit_tools import import_file  # ,clear_cache
+import os
+from dotenv import load_dotenv
+from langchain_community.chat_models import ChatOpenAI
+from chats.streamlit_tools import import_file, clear_cache
 from streamlit_extras.add_vertical_space import add_vertical_space
 from langchain_tools.pdf_tools import PdfLangChain
 from langchain_tools.lc_tools import LangChainTools
-from chats.chat_tools import MessageManager


 # App title
-st.set_page_config(page_title="LLMOneClusterTeam")
+st.set_page_config(page_title="Snowflake Arctic")

 # sidebar
 with st.sidebar:
+
    # Cargar el logo (asegúrate de que el archivo de imagen esté en la misma carpeta que tu script)
    logo_path = "documents/Logo azulblanco.png"
+    # Ajusta el ancho según sea necesario
    logo = st.sidebar.image(logo_path, width=200)

-    # Ajusta el ancho según sea necesario
-    add_vertical_space(28)
-    # pdf_name = import_file()
-    st.markdown("Built by [OneCluster](https://www.onecluster.org/).")
+    add_vertical_space(18)
+    pdf_name = import_file()

-
-col1, col2 = st.columns([1.1, 1])
-with col1:
-    st.title(
-        "DocumentAssist",
+    # Crea un botón en Streamlit que llama a la función clear_cache() cuando se presiona
+    if st.button('Eliminar caché'):
+        clear_cache()
+    if st.button('Reiniciar'):
+        st.experimental_rerun()
+    st.markdown(
+        "Built by [OneCluster](https://www.onecluster.org/)."
    )
-with col2:
-    logo_2 = st.image("documents/pdfs/logo_1-removebg-preview.png", width=110)

-pdf_name = import_file()
+st.title('💬📄 LLM CHat APP')
+
+
 if pdf_name:
+
    with st.spinner("Processing the document..."):
+
        # Inicializamos la clase PdfLangChain
        pdfLangChain = PdfLangChain(pdf_name)
        pdf_name = pdfLangChain.file_name
@@ -51,22 +54,21 @@ if pdf_name:
        # Cargamos el modelo de embeddings
        embedding_model = langChainTools.load_embedding_opnai()

-        # Cargamos el modelo de embeddings
-        # embedding_model = langChainTools.load_embedding_hf()
-
        # Creamos el vector store
        docstorage = langChainTools.create_vector_strore(
-            docs_split, pdf_name, embedding_model
-        )
+            docs_split,
+            pdf_name,
+            embedding_model)

    # Cargamos el modelo LLM desde LangChain
-    llm = langChainTools.load_llm_open_source()
+    llm = langChainTools.load_llm_openai()

    # Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
    # Para este caso la cadena tene el parametro de memoria.
    qa = langChainTools.define_retrieval_qa_memory(
-        llm, docstorage, pdf_name, embedding_model
-    )
+        llm, docstorage,
+        pdf_name,
+        embedding_model)

    # Store conversation history
    if "messages" not in st.session_state.keys():
@@ -92,7 +94,7 @@ if pdf_name:

    st.sidebar.button("Clear chat history", on_click=clear_chat_history)

-    @st.cache_resource
+    @ st.cache_resource
    def get_num_tokens(prompt):
        """Get the number of tokens in a given prompt"""
        return len(prompt.split())
@@ -108,28 +110,23 @@ if pdf_name:
        # Generate a new response if last message is not from assistant
        if st.session_state.messages[-1]["role"] != "assistant":
            with st.spinner("Thinking..."):
+
                # Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
                # Para este caso la cadena tene el parametro de memoria.
                qa = langChainTools.define_retrieval_qa_memory(
-                    llm, docstorage, pdf_name, embedding_model
-                )
+                    llm, docstorage,
+                    pdf_name,
+                    embedding_model)

-                input = "\n".join([msg["content"] for msg in st.session_state.messages])
+                input = "\n".join([msg["content"]
+                                   for msg in st.session_state.messages])

-                query = qa.invoke({"question": f"{prompt}"}, return_only_outputs=True)
+                query = qa.invoke({"question": f"{input}"},
+                                  return_only_outputs=True)

-                response_text = query["answer"]
-                documents_source = query["source_documents"]
-
-                messageManager = MessageManager()
-
-                citation: str = messageManager.generate_citations(documents_source)
-                # st.markdown(citation)
+                response = query["answer"]

            with st.chat_message("assistant"):
-                st.write(response_text)
+                st.write(response)
                st.session_state.messages.append(
-                    {"role": "assistant", "content": response_text}
-                )
-                expander = st.expander("Fuentes")
-                expander.markdown(citation)
+                    {"role": "assistant", "content": response})
--- a/app_2.py
+++ b/app_2.py
@@ -1,134 +0,0 @@
-# import os
-# from dotenv import load_dotenv
-# from langchain_community.chat_models import ChatOpenAI
-import streamlit as st
-from chats.streamlit_tools import import_file  # ,clear_cache
-from streamlit_extras.add_vertical_space import add_vertical_space
-from langchain_tools.pdf_tools import PdfLangChain
-from langchain_tools.lc_tools import LangChainTools
-from chats.chat_tools import MessageManager
-from langchain_community.llms import HuggingFaceEndpoint
-
-
-# App title
-st.set_page_config(page_title="LLMOneClusterTeam")
-
-# sidebar
-with st.sidebar:
-    # Cargar el logo (asegúrate de que el archivo de imagen esté en la misma carpeta que tu script)
-    logo_path = "documents/Logo azulblanco.png"
-    logo = st.sidebar.image(logo_path, width=200)
-
-    # Ajusta el ancho según sea necesario
-    add_vertical_space(28)
-    # pdf_name = import_file()
-    st.markdown("Built by [OneCluster](https://www.onecluster.org/).")
-
-
-col1, col2 = st.columns([1.1, 1])
-with col1:
-    st.title(
-        "DocumentAssist",
-    )
-with col2:
-    logo_2 = st.image("documents/pdfs/logo_1-removebg-preview.png", width=110)
-
-pdf_name = import_file()
-if pdf_name:
-    with st.spinner("Processing the document..."):
-        # Inicializamos la clase PdfLangChain
-        pdfLangChain = PdfLangChain(pdf_name)
-        pdf_name = pdfLangChain.file_name
-
-        # Cargamos el documento PDF
-        docs: list = pdfLangChain.load_pdf()
-
-        # Dividimos los documentos en partes mas pequenas
-        docs_split: list = pdfLangChain.split_docs(docs)
-
-        # Instanciamos la clase LangChainTools que contiene herramientras LangChain
-        langChainTools = LangChainTools()
-
-        # Cargamos el modelo de embeddings
-        embedding_model = langChainTools.load_embedding_opnai()
-
-        # Creamos el vector store
-        docstorage = langChainTools.create_vector_strore(
-            docs_split, pdf_name, embedding_model
-        )
-
-    # Cargamos el modelo LLM desde LangChain
-    llm = langChainTools.load_llm_open_source()
-
-    # Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
-    # Para este caso la cadena tene el parametro de memoria.
-    qa = langChainTools.define_retrieval_qa_memory(
-        llm, docstorage, pdf_name, embedding_model
-    )
-
-    # Store conversation history
-    if "messages" not in st.session_state.keys():
-        st.session_state.messages = [
-            {
-                "role": "assistant",
-                "content": "Hola, soy una IA con el que puedes chatear con tu PDF. Haz un pregunta al documento.",
-            }
-        ]
-
-    # Display or clear chat messages
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            st.write(message["content"])
-
-    def clear_chat_history():
-        st.session_state.messages = [
-            {
-                "role": "assistant",
-                "content": "Hola, soy una IA con el que puedes chatear con tu PDF. Haz un pregunta al documento.",
-            }
-        ]
-
-    st.sidebar.button("Clear chat history", on_click=clear_chat_history)
-
-    @st.cache_resource
-    def get_num_tokens(prompt):
-        """Get the number of tokens in a given prompt"""
-        return len(prompt.split())
-
-    # Function for generating Snowflake Arctic response
-
-    # User-provided prompt
-    if prompt := st.chat_input():
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        with st.chat_message("user"):
-            st.write(prompt)
-
-        # Generate a new response if last message is not from assistant
-        if st.session_state.messages[-1]["role"] != "assistant":
-            with st.spinner("Thinking..."):
-                # Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
-                # Para este caso la cadena tene el parametro de memoria.
-                qa = langChainTools.define_retrieval_qa_memory(
-                    llm, docstorage, pdf_name, embedding_model
-                )
-
-                input = "\n".join([msg["content"] for msg in st.session_state.messages])
-
-                query = qa.invoke({"question": f"{prompt}"}, return_only_outputs=True)
-
-                response_text_en = query["answer"]
-                documents_source = query["source_documents"]
-
-                messageManager = MessageManager()
-
-                citation: str = messageManager.generate_citations(documents_source)
-                # st.markdown(citation)
-
-            with st.chat_message("assistant"):
-                st.write(response_text_en)
-                # st.write(translation)
-                st.session_state.messages.append(
-                    {"role": "assistant", "content": response_text_en}
-                )
-                expander = st.expander("Fuentes")
-                expander.markdown(citation)
--- a/chats/pycache/chat_tools.cpython-311.pyc
+++ b/chats/pycache/chat_tools.cpython-311.pyc
--- a/chats/chat_tools.py
+++ b/chats/chat_tools.py
@@ -24,17 +24,5 @@ class MessageManager:
            print(f'{ia_emoticon} ' + Style.BRIGHT + Fore.YELLOW +
                  'IA:' + Style.RESET_ALL + f'{bot_response["answer"]}')

-    def generate_citations(self, documents_source: list) -> str:
-
-        text_source: str = ""
-
-        for index, document in enumerate(documents_source):
-            quote: str = document.page_content
-            source: str = document.metadata['source'].replace(
-                'documents/pdfs/', '')
-            page: str = document.metadata['page'] + 1
-            fuente: str = f"**Fuente #{index + 1}:** \n '{quote}'\n(*{source}, P.{page})*"
-
-            text_source += fuente + "\n\n\n"
-
-        return text_source
+    def generate_citations(self):
+        pass
--- a/documents/pdfs/1.TC_Malamud,
+++ b/documents/pdfs/1.TC_Malamud,
--- a/documents/pdfs/logo_1-removebg-preview.png
+++ b/documents/pdfs/logo_1-removebg-preview.png
--- a/documents/pdfs/logo_1.jpeg
+++ b/documents/pdfs/logo_1.jpeg
--- a/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/bfe70be6-9587-4dc3-b193-b74ea4603585/data_level0.bin
+++ b/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/bfe70be6-9587-4dc3-b193-b74ea4603585/data_level0.bin
--- a/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/bfe70be6-9587-4dc3-b193-b74ea4603585/header.bin
+++ b/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/bfe70be6-9587-4dc3-b193-b74ea4603585/header.bin
--- a/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/bfe70be6-9587-4dc3-b193-b74ea4603585/length.bin
+++ b/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/bfe70be6-9587-4dc3-b193-b74ea4603585/length.bin
--- a/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/bfe70be6-9587-4dc3-b193-b74ea4603585/link_lists.bin
+++ b/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/bfe70be6-9587-4dc3-b193-b74ea4603585/link_lists.bin
--- a/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/chroma.sqlite3
+++ b/embeddings/1.tc_malamud,_se_está_muriendo_la_democracia/chroma.sqlite3
--- a/embeddings/el_espejo_como_no-lugar/a9e94886-c385-464d-ad7b-244f5af91cc8/data_level0.bin
+++ b/embeddings/el_espejo_como_no-lugar/a9e94886-c385-464d-ad7b-244f5af91cc8/data_level0.bin
--- a/embeddings/el_espejo_como_no-lugar/a9e94886-c385-464d-ad7b-244f5af91cc8/length.bin
+++ b/embeddings/el_espejo_como_no-lugar/a9e94886-c385-464d-ad7b-244f5af91cc8/length.bin
--- a/langchain_tools/pycache/lc_tools.cpython-311.pyc
+++ b/langchain_tools/pycache/lc_tools.cpython-311.pyc
--- a/langchain_tools/lc_tools.py
+++ b/langchain_tools/lc_tools.py
@@ -7,12 +7,12 @@ from langchain.memory.buffer import ConversationBufferMemory
 import os
 import streamlit as st
 from dotenv import load_dotenv
-from langchain.chains import RetrievalQAWithSourcesChain, ConversationalRetrievalChain
-from langchain_community.llms import HuggingFaceEndpoint
-from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.chains import (
+    RetrievalQAWithSourcesChain,
+    ConversationalRetrievalChain)


-class LangChainTools:
+class LangChainTools():
    """
    Esta clase maneja algunas herramientas integraciones con las que
    cuenta LangChain.
@@ -27,38 +27,15 @@ class LangChainTools:

        # Cargamos la variable que contiene la api_key de OpenAI
        load_dotenv()
-        openai_api_key = os.getenv("api_key")
+        openai_api_key = os.getenv('api_key')
        # Define an OpenAI embeddings model
        self.embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
        # st.success('El modelo de embeddins de OpneAI se ha cargado')

        return self.embedding_model

-    def load_embedding_hf(self):
-        """Esta funcion carga un modelo de embedding de OpenAI
-
-        Returns:
-            _type_: Retorno a un objetito de tipo embedding de OpenAI
-        """
-
-        huggingfacehub_api_token = "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"
-
-        model_name = "sentence-transformers/all-mpnet-base-v2"
-        model_kwargs = {"device": "cpu"}
-        encode_kwargs = {"normalize_embeddings": False}
-
-        self.embedding_model = HuggingFaceEmbeddings(
-            model_name=model_name,
-            model_kwargs=model_kwargs,
-            encode_kwargs=encode_kwargs,
-        )
-
-        return self.embedding_model
-
    @st.cache_resource
-    def create_vector_strore(
-        _self, _docs_split: list, _file_name: str, _embedding_model
-    ):
+    def create_vector_strore(_self, _docs_split: list, _file_name: str, _embedding_model):
        """Esta funcion construye un vector store a partir de un documento

        Args:
@@ -66,7 +43,7 @@ class LangChainTools:
            _file_name (str): Nombre del documento
        """

-        db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
+        db_name = _file_name.replace('.pdf', '').replace(' ', '_').lower()

        # Cargamos el modelo de embeddings
        # _embedding_model = self._embedding_model
@@ -76,14 +53,13 @@ class LangChainTools:

        if os.path.exists(persist_directory):
            vectordb = Chroma(
-                persist_directory=persist_directory, embedding_function=_embedding_model
-            )
+                persist_directory=persist_directory,
+                embedding_function=_embedding_model)
        else:
            vectordb = Chroma.from_documents(
                persist_directory=persist_directory,
                documents=_docs_split,
-                embedding=_embedding_model,
-            )
+                embedding=_embedding_model)

            vectordb.persist()

@@ -98,64 +74,36 @@ class LangChainTools:

        # Cargamos la variable que contiene la api_key de OpenAI
        load_dotenv()
-        openai_api_key = os.getenv("api_key")
+        openai_api_key = os.getenv('api_key')

        temperature = 0.5
-        llm_openai = ChatOpenAI(
-            model_name="gpt-3.5-turbo",
+        llm_openai = ChatOpenAI(model_name="gpt-3.5-turbo",
                                temperature=temperature,
                                openai_api_key=openai_api_key,
-            max_tokens=1000,
-        )
+                                max_tokens=1000)

        return llm_openai

-    def load_llm_open_source(self):
-        """Esta funcion carga un modelo de LLM OpenSource desde HuggingFace
-
-        Returns:
-            _type_: Retorno a un objetito de tipo LLM de OpenAI
-        """
-        # model_huggingface = "google/gemma-1.1-7b-it"  # Es buena y funciona en espanol
-        # model_huggingface = (
-        #     "google/gemma-1.1-2b-it"  # Es buena y funciona en espanol funciona rapido
-        # )
-        # model_huggingface = "tiiuae/falcon-7b-instruct"
-        # model_huggingface = "mistralai/Mistral-7B-Instruct-v0.2"
-        # model_huggingface = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
-        huggingfacehub_api_token = "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"
-
-        model_huggingface = "mistralai/Mixtral-8x7B-Instruct-v0.1"  # Es buena y funciona en espanol funciona rapido
-
-        # Define the LLM
-        llm = HuggingFaceEndpoint(
-            repo_id=model_huggingface,
-            huggingfacehub_api_token=huggingfacehub_api_token,
-            temperature=0.1,
-            max_new_tokens=1000,
-        )
-
-        return llm
-
    def load_prompt_template(self):
        """Esta funcion construye un prompt template de lanfchain.

        Returns:
            _type_: Retorno a un prompt template de LangChain.
        """
-        template = """Responde en español la siguiente pregunta utilizando los documentos proporcionados y citando las fuentes relevantes entre corchetes []:
+        template = """Responde a la siguiente pregunta utilizando los documentos proporcionados y citando las fuentes relevantes entre corchetes []:

        Pregunta: {question}

        Respuesta:"""

        prompt_template = PromptTemplate(
-            template=template, input_variables=["question"]
-        )
+            template=template, input_variables=["question"])

        return prompt_template

-    def define_retrieval_qa(self, _llm, _vectordb, _file_name, _embedding_model):
+    def define_retrieval_qa(
+        self, _llm, _vectordb, _file_name, _embedding_model
+    ):
        """Esta función integra un LLM y una base de datos vectorial en una
        chain de LangChain para hacer requerimientos. Este modelo no integra memoria.

@@ -172,14 +120,14 @@ class LangChainTools:
            y la BDV.
        """

-        db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
+        db_name = _file_name.replace('.pdf', '').replace(' ', '_').lower()

        # Verificamos si existe la vector strore
        persist_directory = f"embeddings/{db_name}"

        _vectordb = Chroma(
-            persist_directory=persist_directory, embedding_function=_embedding_model
-        )
+            persist_directory=persist_directory,
+            embedding_function=_embedding_model)

        # Define the Retrieval QA Chain to integrate the database and LLM
        qa = RetrievalQAWithSourcesChain.from_chain_type(
@@ -213,44 +161,25 @@ class LangChainTools:
            y la BDV.
        """

-        db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
+        db_name = _file_name.replace('.pdf', '').replace(' ', '_').lower()

        # Verificamos si existe la vector strore
        persist_directory = f"embeddings/{db_name}"

        _vectordb = Chroma(
-            persist_directory=persist_directory, embedding_function=_embedding_model
-        )
+            persist_directory=persist_directory,
+            embedding_function=_embedding_model)

        # Configura la memoria
        memory = ConversationBufferMemory(
-            memory_key="chat_history", return_messages=True, output_key="answer"
-        )
+            memory_key="chat_history", return_messages=True)

        # Define the Retrieval QA Chain to integrate the database and LLM
        conversation = ConversationalRetrievalChain.from_llm(
            _llm,
            retriever=_vectordb.as_retriever(),
            memory=memory,
-            verbose=True,  # Modo verboso
-            return_source_documents=True,  # Devuelve los documentos fuente
+            verbose=False  # Modo verboso
        )

-        template = """Utiliza los siguientes fragmentos de contexto para responder en español la pregunta al final. Si no sabes la respuesta, simplemente di que no sabes, no intentes inventar una respuesta.
-
-        {context}
-
-        Pregunta: {question}
-        Respuesta:"""
-
-        # template = """Utiliza los siguientes fragmentos de contexto como ejemplo para responder la pregunta al final. Organiza tu respuesta de manera clara y concisa, proporcionando información relevante y evitando divagaciones innecesarias.
-
-        # {context}
-
-        # Pregunta: {question}
-        # Respuesta en español:"""
-
-        conversation.combine_docs_chain.llm_chain.prompt.template = template
-        conversation.question_generator.prompt.template = "Dado el siguiente diálogo y una pregunta de seguimiento, reformula la pregunta de seguimiento para que sea una pregunta independiente, en su idioma original.\n\nHistorial del chat:\n{chat_history}\nPregunta de seguimiento: {question}\nPregunta independiente:"
-
        return conversation
--- a/pruebas_open_source.py
+++ b/pruebas_open_source.py
@@ -1,67 +0,0 @@
-# from langchain_tools.lc_tools import LangChainTools
-from langchain_community.llms import HuggingFaceEndpoint
-
-
-# Instanciamos la clase LangChainTools que contiene herramientras LangChain
-# langChainTools = LangChainTools()
-
-# model_huggingface = "google/gemma-1.1-7b-it"  # Es buena y funciona en espanol
-# model_huggingface = (
-#    "google/gemma-1.1-2b-it"  # Es buena y funciona en espanol funciona rapido
-# )
-# model_huggingface = 'tiiuae/falcon-7b-instruct'
-model_huggingface = "mistralai/Mistral-7B-Instruct-v0.2"
-huggingfacehub_api_token = "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"
-
-# model_huggingface = "mistralai/Mixtral-8x22B-Instruct-v0.1"  # Es buena y funciona en espanol funciona rapido
-
-# Define the LLM
-llm = HuggingFaceEndpoint(
-    repo_id=model_huggingface,
-    huggingfacehub_api_token=huggingfacehub_api_token,
-    temperature=0.5,
-    max_new_tokens=500,
-)  # Cargamos el modelo LLM desde LangChainllm llm = langChainTools.load_llm_open_source()
-# respuesta = llm.invoke("Cual es el sentido de la vida?")
-
-# print(respuesta)
-
-import streamlit as st
-from chats.streamlit_tools import import_file  # ,clear_cache
-from streamlit_extras.add_vertical_space import add_vertical_space
-from langchain_tools.pdf_tools import PdfLangChain
-from langchain_tools.lc_tools import LangChainTools
-from chats.chat_tools import MessageManager
-
-
-pdf_name = "1.TC_Malamud, Se está muriendo la democracia.pdf"
-pdfLangChain = PdfLangChain(pdf_name)
-
-# Cargamos el documento PDF
-docs: list = pdfLangChain.load_pdf()
-
-# Dividimos los documentos en partes mas pequenas
-docs_split: list = pdfLangChain.split_docs(docs)
-
-# Instanciamos la clase LangChainTools que contiene herramientras LangChain
-langChainTools = LangChainTools()
-
-# Cargamos el modelo de embeddings
-# embedding_model = langChainTools.load_embedding_opnai()
-
-# Cargamos el modelo de embeddings
-embedding_model = langChainTools.load_embedding_hf()
-
-# Creamos el vector store
-docstorage = langChainTools.create_vector_strore(docs_split, pdf_name, embedding_model)
-
-# Cargamos el modelo LLM desde LangChain
-llm = langChainTools.load_llm_open_source()
-
-# Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.Para este caso la cadena tene el parametro de memoria.
-qa = langChainTools.define_retrieval_qa_memory(
-    llm, docstorage, pdf_name, embedding_model
-)
-# qa.question_generator.prompt.template = "Dado el siguiente diálogo y una pregunta de seguimiento, reformula la pregunta de seguimiento para que sea una pregunta independiente, en su idioma original.\n\nHistorial del chat:\n{chat_history}\nPregunta de seguimiento: {question}\nPregunta independiente:"
-
-print(qa)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,163 +1,13 @@
-aiohttp==3.9.5
-aiosignal==1.3.1
-altair==5.3.0
-annotated-types==0.6.0
-anyio==4.3.0
-asgiref==3.8.1
-attrs==23.2.0
-backoff==2.2.1
-bcrypt==4.1.2
-beautifulsoup4==4.12.3
-blinker==1.7.0
-build==1.2.1
-cachetools==5.3.3
-certifi==2024.2.2
-charset-normalizer==3.3.2
 chroma-hnswlib==0.7.3
 chromadb==0.5.0
-click==8.1.7
 colorama==0.4.6
 coloredlogs==15.0.1
-contourpy==1.2.1
-cycler==0.12.1
-dataclasses-json==0.6.4
-Deprecated==1.2.14
-distro==1.9.0
-entrypoints==0.4
-Faker==24.14.0
-fastapi==0.110.2
-favicon==0.7.0
-filelock==3.13.4
-flatbuffers==24.3.25
-fonttools==4.51.0
-frozenlist==1.4.1
-fsspec==2024.3.1
-gitdb==4.0.11
-GitPython==3.1.43
-google-auth==2.29.0
-googleapis-common-protos==1.63.0
-greenlet==3.0.3
-grpcio==1.62.2
-h11==0.14.0
-htbuilder==0.6.2
-httpcore==1.0.5
-httptools==0.6.1
-httpx==0.27.0
 huggingface-hub==0.22.2
-humanfriendly==10.0
-idna==3.7
-importlib-metadata==7.0.0
-importlib_resources==6.4.0
-Jinja2==3.1.3
-jsonpatch==1.33
-jsonpointer==2.4
-jsonschema==4.21.1
-jsonschema-specifications==2023.12.1
-kiwisolver==1.4.5
-kubernetes==29.0.0
 langchain==0.1.16
 langchain-community==0.0.34
 langchain-core==0.1.45
 langchain-openai==0.1.3
 langchain-text-splitters==0.0.1
 langsmith==0.1.50
-lxml==5.2.1
-Markdown==3.6
-markdown-it-py==3.0.0
-markdownlit==0.0.7
-MarkupSafe==2.1.5
-marshmallow==3.21.1
-matplotlib==3.8.4
-mdurl==0.1.2
-mmh3==4.1.0
-monotonic==1.6
-more-itertools==10.2.0
-mpmath==1.3.0
-multidict==6.0.5
-mypy-extensions==1.0.0
-numpy==1.26.4
-oauthlib==3.2.2
-onnxruntime==1.17.3
 openai==1.23.5
-opentelemetry-api==1.24.0
-opentelemetry-exporter-otlp-proto-common==1.24.0
-opentelemetry-exporter-otlp-proto-grpc==1.24.0
-opentelemetry-instrumentation==0.45b0
-opentelemetry-instrumentation-asgi==0.45b0
-opentelemetry-instrumentation-fastapi==0.45b0
-opentelemetry-proto==1.24.0
-opentelemetry-sdk==1.24.0
-opentelemetry-semantic-conventions==0.45b0
-opentelemetry-util-http==0.45b0
-orjson==3.10.1
-overrides==7.7.0
-packaging==23.2
-pandas==2.2.2
-pillow==10.3.0
-posthog==3.5.0
-prometheus_client==0.20.0
-protobuf==4.25.3
-pyarrow==16.0.0
-pyasn1==0.6.0
-pyasn1_modules==0.4.0
-pydantic==2.7.1
-pydantic_core==2.18.2
-pydeck==0.9.0b1
-Pygments==2.17.2
-pymdown-extensions==10.8
-pyparsing==3.1.2
-pypdf==4.2.0
-PyPika==0.48.9
-pyproject_hooks==1.0.0
-python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
-pytz==2024.1
-PyYAML==6.0.1
-referencing==0.35.0
-regex==2024.4.16
-requests==2.31.0
-requests-oauthlib==2.0.0
-rich==13.7.1
-rpds-py==0.18.0
-rsa==4.9
-shellingham==1.5.4
-six==1.16.0
-smmap==5.0.1
-sniffio==1.3.1
-soupsieve==2.5
-SQLAlchemy==2.0.29
-st-annotated-text==4.0.1
-starlette==0.37.2
-streamlit==1.33.0
-streamlit-camera-input-live==0.2.0
-streamlit-card==1.0.0
-streamlit-embedcode==0.1.2
-streamlit-extras==0.4.2
-streamlit-faker==0.0.3
-streamlit-image-coordinates==0.1.6
-streamlit-keyup==0.2.4
-streamlit-toggle-switch==1.0.2
-streamlit-vertical-slider==2.5.5
-sympy==1.12
-tenacity==8.2.3
-tiktoken==0.6.0
-tokenizers==0.19.1
-toml==0.10.2
-toolz==0.12.1
-tornado==6.4
-tqdm==4.66.2
-typer==0.12.3
-typing-inspect==0.9.0
-typing_extensions==4.11.0
-tzdata==2024.1
-urllib3==2.2.1
-uvicorn==0.29.0
-uvloop==0.19.0
-validators==0.28.1
-watchdog==4.0.0
-watchfiles==0.21.0
-websocket-client==1.8.0
-websockets==12.0
-wrapt==1.16.0
-yarl==1.9.4
-zipp==3.18.1
+