Compare commits
1 Commits
0b593e29d4
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| c27733aa3f |
@@ -1,5 +1,5 @@
|
||||
[theme]
|
||||
base="dark"
|
||||
backgroundColor="#3f51b5"
|
||||
secondaryBackgroundColor="#2b2b4e"
|
||||
font="monospace"
|
||||
base = "dark"
|
||||
backgroundColor = "#0d2669"
|
||||
secondaryBackgroundColor = "#050550"
|
||||
font = "monospace"
|
||||
@@ -1,2 +1,2 @@
|
||||
# chat_pdf
|
||||
El siguiente proyecto construye un chatbot que usa un LLM, Langchain y streamlit para crear una aplicacion que permite chatear con uun pdf.
|
||||
El siguiente proyecto construye un chatbot que usa un LLM, Langchain y streamlit para crear una aplicacion que permite chatear con un pdf.
|
||||
83
app.py
83
app.py
@@ -1,40 +1,43 @@
|
||||
# import os
|
||||
# from dotenv import load_dotenv
|
||||
# from langchain_community.chat_models import ChatOpenAI
|
||||
import streamlit as st
|
||||
from chats.streamlit_tools import import_file # ,clear_cache
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
from chats.streamlit_tools import import_file, clear_cache
|
||||
from streamlit_extras.add_vertical_space import add_vertical_space
|
||||
from langchain_tools.pdf_tools import PdfLangChain
|
||||
from langchain_tools.lc_tools import LangChainTools
|
||||
from chats.chat_tools import MessageManager
|
||||
|
||||
|
||||
# App title
|
||||
st.set_page_config(page_title="LLMOneClusterTeam")
|
||||
st.set_page_config(page_title="Snowflake Arctic")
|
||||
|
||||
# sidebar
|
||||
with st.sidebar:
|
||||
|
||||
# Cargar el logo (asegúrate de que el archivo de imagen esté en la misma carpeta que tu script)
|
||||
logo_path = "documents/Logo azulblanco.png"
|
||||
# Ajusta el ancho según sea necesario
|
||||
logo = st.sidebar.image(logo_path, width=200)
|
||||
|
||||
# Ajusta el ancho según sea necesario
|
||||
add_vertical_space(28)
|
||||
# pdf_name = import_file()
|
||||
st.markdown("Built by [OneCluster](https://www.onecluster.org/).")
|
||||
add_vertical_space(18)
|
||||
pdf_name = import_file()
|
||||
|
||||
|
||||
col1, col2 = st.columns([1.1, 1])
|
||||
with col1:
|
||||
st.title(
|
||||
"DocumentAssist",
|
||||
# Crea un botón en Streamlit que llama a la función clear_cache() cuando se presiona
|
||||
if st.button('Eliminar caché'):
|
||||
clear_cache()
|
||||
if st.button('Reiniciar'):
|
||||
st.experimental_rerun()
|
||||
st.markdown(
|
||||
"Built by [OneCluster](https://www.onecluster.org/)."
|
||||
)
|
||||
with col2:
|
||||
logo_2 = st.image("documents/pdfs/logo_1-removebg-preview.png", width=110)
|
||||
|
||||
pdf_name = import_file()
|
||||
st.title('💬📄 LLM CHat APP')
|
||||
|
||||
|
||||
if pdf_name:
|
||||
|
||||
with st.spinner("Processing the document..."):
|
||||
|
||||
# Inicializamos la clase PdfLangChain
|
||||
pdfLangChain = PdfLangChain(pdf_name)
|
||||
pdf_name = pdfLangChain.file_name
|
||||
@@ -51,22 +54,21 @@ if pdf_name:
|
||||
# Cargamos el modelo de embeddings
|
||||
embedding_model = langChainTools.load_embedding_opnai()
|
||||
|
||||
# Cargamos el modelo de embeddings
|
||||
# embedding_model = langChainTools.load_embedding_hf()
|
||||
|
||||
# Creamos el vector store
|
||||
docstorage = langChainTools.create_vector_strore(
|
||||
docs_split, pdf_name, embedding_model
|
||||
)
|
||||
docs_split,
|
||||
pdf_name,
|
||||
embedding_model)
|
||||
|
||||
# Cargamos el modelo LLM desde LangChain
|
||||
llm = langChainTools.load_llm_open_source()
|
||||
llm = langChainTools.load_llm_openai()
|
||||
|
||||
# Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
|
||||
# Para este caso la cadena tene el parametro de memoria.
|
||||
qa = langChainTools.define_retrieval_qa_memory(
|
||||
llm, docstorage, pdf_name, embedding_model
|
||||
)
|
||||
llm, docstorage,
|
||||
pdf_name,
|
||||
embedding_model)
|
||||
|
||||
# Store conversation history
|
||||
if "messages" not in st.session_state.keys():
|
||||
@@ -92,7 +94,7 @@ if pdf_name:
|
||||
|
||||
st.sidebar.button("Clear chat history", on_click=clear_chat_history)
|
||||
|
||||
@st.cache_resource
|
||||
@ st.cache_resource
|
||||
def get_num_tokens(prompt):
|
||||
"""Get the number of tokens in a given prompt"""
|
||||
return len(prompt.split())
|
||||
@@ -108,28 +110,23 @@ if pdf_name:
|
||||
# Generate a new response if last message is not from assistant
|
||||
if st.session_state.messages[-1]["role"] != "assistant":
|
||||
with st.spinner("Thinking..."):
|
||||
|
||||
# Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
|
||||
# Para este caso la cadena tene el parametro de memoria.
|
||||
qa = langChainTools.define_retrieval_qa_memory(
|
||||
llm, docstorage, pdf_name, embedding_model
|
||||
)
|
||||
llm, docstorage,
|
||||
pdf_name,
|
||||
embedding_model)
|
||||
|
||||
input = "\n".join([msg["content"] for msg in st.session_state.messages])
|
||||
input = "\n".join([msg["content"]
|
||||
for msg in st.session_state.messages])
|
||||
|
||||
query = qa.invoke({"question": f"{prompt}"}, return_only_outputs=True)
|
||||
query = qa.invoke({"question": f"{input}"},
|
||||
return_only_outputs=True)
|
||||
|
||||
response_text = query["answer"]
|
||||
documents_source = query["source_documents"]
|
||||
|
||||
messageManager = MessageManager()
|
||||
|
||||
citation: str = messageManager.generate_citations(documents_source)
|
||||
# st.markdown(citation)
|
||||
response = query["answer"]
|
||||
|
||||
with st.chat_message("assistant"):
|
||||
st.write(response_text)
|
||||
st.write(response)
|
||||
st.session_state.messages.append(
|
||||
{"role": "assistant", "content": response_text}
|
||||
)
|
||||
expander = st.expander("Fuentes")
|
||||
expander.markdown(citation)
|
||||
{"role": "assistant", "content": response})
|
||||
|
||||
134
app_2.py
134
app_2.py
@@ -1,134 +0,0 @@
|
||||
# import os
|
||||
# from dotenv import load_dotenv
|
||||
# from langchain_community.chat_models import ChatOpenAI
|
||||
import streamlit as st
|
||||
from chats.streamlit_tools import import_file # ,clear_cache
|
||||
from streamlit_extras.add_vertical_space import add_vertical_space
|
||||
from langchain_tools.pdf_tools import PdfLangChain
|
||||
from langchain_tools.lc_tools import LangChainTools
|
||||
from chats.chat_tools import MessageManager
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
|
||||
|
||||
# App title
|
||||
st.set_page_config(page_title="LLMOneClusterTeam")
|
||||
|
||||
# sidebar
|
||||
with st.sidebar:
|
||||
# Cargar el logo (asegúrate de que el archivo de imagen esté en la misma carpeta que tu script)
|
||||
logo_path = "documents/Logo azulblanco.png"
|
||||
logo = st.sidebar.image(logo_path, width=200)
|
||||
|
||||
# Ajusta el ancho según sea necesario
|
||||
add_vertical_space(28)
|
||||
# pdf_name = import_file()
|
||||
st.markdown("Built by [OneCluster](https://www.onecluster.org/).")
|
||||
|
||||
|
||||
col1, col2 = st.columns([1.1, 1])
|
||||
with col1:
|
||||
st.title(
|
||||
"DocumentAssist",
|
||||
)
|
||||
with col2:
|
||||
logo_2 = st.image("documents/pdfs/logo_1-removebg-preview.png", width=110)
|
||||
|
||||
pdf_name = import_file()
|
||||
if pdf_name:
|
||||
with st.spinner("Processing the document..."):
|
||||
# Inicializamos la clase PdfLangChain
|
||||
pdfLangChain = PdfLangChain(pdf_name)
|
||||
pdf_name = pdfLangChain.file_name
|
||||
|
||||
# Cargamos el documento PDF
|
||||
docs: list = pdfLangChain.load_pdf()
|
||||
|
||||
# Dividimos los documentos en partes mas pequenas
|
||||
docs_split: list = pdfLangChain.split_docs(docs)
|
||||
|
||||
# Instanciamos la clase LangChainTools que contiene herramientras LangChain
|
||||
langChainTools = LangChainTools()
|
||||
|
||||
# Cargamos el modelo de embeddings
|
||||
embedding_model = langChainTools.load_embedding_opnai()
|
||||
|
||||
# Creamos el vector store
|
||||
docstorage = langChainTools.create_vector_strore(
|
||||
docs_split, pdf_name, embedding_model
|
||||
)
|
||||
|
||||
# Cargamos el modelo LLM desde LangChain
|
||||
llm = langChainTools.load_llm_open_source()
|
||||
|
||||
# Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
|
||||
# Para este caso la cadena tene el parametro de memoria.
|
||||
qa = langChainTools.define_retrieval_qa_memory(
|
||||
llm, docstorage, pdf_name, embedding_model
|
||||
)
|
||||
|
||||
# Store conversation history
|
||||
if "messages" not in st.session_state.keys():
|
||||
st.session_state.messages = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Hola, soy una IA con el que puedes chatear con tu PDF. Haz un pregunta al documento.",
|
||||
}
|
||||
]
|
||||
|
||||
# Display or clear chat messages
|
||||
for message in st.session_state.messages:
|
||||
with st.chat_message(message["role"]):
|
||||
st.write(message["content"])
|
||||
|
||||
def clear_chat_history():
|
||||
st.session_state.messages = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Hola, soy una IA con el que puedes chatear con tu PDF. Haz un pregunta al documento.",
|
||||
}
|
||||
]
|
||||
|
||||
st.sidebar.button("Clear chat history", on_click=clear_chat_history)
|
||||
|
||||
@st.cache_resource
|
||||
def get_num_tokens(prompt):
|
||||
"""Get the number of tokens in a given prompt"""
|
||||
return len(prompt.split())
|
||||
|
||||
# Function for generating Snowflake Arctic response
|
||||
|
||||
# User-provided prompt
|
||||
if prompt := st.chat_input():
|
||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||
with st.chat_message("user"):
|
||||
st.write(prompt)
|
||||
|
||||
# Generate a new response if last message is not from assistant
|
||||
if st.session_state.messages[-1]["role"] != "assistant":
|
||||
with st.spinner("Thinking..."):
|
||||
# Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
|
||||
# Para este caso la cadena tene el parametro de memoria.
|
||||
qa = langChainTools.define_retrieval_qa_memory(
|
||||
llm, docstorage, pdf_name, embedding_model
|
||||
)
|
||||
|
||||
input = "\n".join([msg["content"] for msg in st.session_state.messages])
|
||||
|
||||
query = qa.invoke({"question": f"{prompt}"}, return_only_outputs=True)
|
||||
|
||||
response_text_en = query["answer"]
|
||||
documents_source = query["source_documents"]
|
||||
|
||||
messageManager = MessageManager()
|
||||
|
||||
citation: str = messageManager.generate_citations(documents_source)
|
||||
# st.markdown(citation)
|
||||
|
||||
with st.chat_message("assistant"):
|
||||
st.write(response_text_en)
|
||||
# st.write(translation)
|
||||
st.session_state.messages.append(
|
||||
{"role": "assistant", "content": response_text_en}
|
||||
)
|
||||
expander = st.expander("Fuentes")
|
||||
expander.markdown(citation)
|
||||
Binary file not shown.
@@ -24,17 +24,5 @@ class MessageManager:
|
||||
print(f'{ia_emoticon} ' + Style.BRIGHT + Fore.YELLOW +
|
||||
'IA:' + Style.RESET_ALL + f'{bot_response["answer"]}')
|
||||
|
||||
def generate_citations(self, documents_source: list) -> str:
|
||||
|
||||
text_source: str = ""
|
||||
|
||||
for index, document in enumerate(documents_source):
|
||||
quote: str = document.page_content
|
||||
source: str = document.metadata['source'].replace(
|
||||
'documents/pdfs/', '')
|
||||
page: str = document.metadata['page'] + 1
|
||||
fuente: str = f"**Fuente #{index + 1}:** \n '{quote}'\n(*{source}, P.{page})*"
|
||||
|
||||
text_source += fuente + "\n\n\n"
|
||||
|
||||
return text_source
|
||||
def generate_citations(self):
|
||||
pass
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
Before Width: | Height: | Size: 199 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 107 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -7,12 +7,12 @@ from langchain.memory.buffer import ConversationBufferMemory
|
||||
import os
|
||||
import streamlit as st
|
||||
from dotenv import load_dotenv
|
||||
from langchain.chains import RetrievalQAWithSourcesChain, ConversationalRetrievalChain
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from langchain.chains import (
|
||||
RetrievalQAWithSourcesChain,
|
||||
ConversationalRetrievalChain)
|
||||
|
||||
|
||||
class LangChainTools:
|
||||
class LangChainTools():
|
||||
"""
|
||||
Esta clase maneja algunas herramientas integraciones con las que
|
||||
cuenta LangChain.
|
||||
@@ -27,38 +27,15 @@ class LangChainTools:
|
||||
|
||||
# Cargamos la variable que contiene la api_key de OpenAI
|
||||
load_dotenv()
|
||||
openai_api_key = os.getenv("api_key")
|
||||
openai_api_key = os.getenv('api_key')
|
||||
# Define an OpenAI embeddings model
|
||||
self.embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
||||
# st.success('El modelo de embeddins de OpneAI se ha cargado')
|
||||
|
||||
return self.embedding_model
|
||||
|
||||
def load_embedding_hf(self):
|
||||
"""Esta funcion carga un modelo de embedding de OpenAI
|
||||
|
||||
Returns:
|
||||
_type_: Retorno a un objetito de tipo embedding de OpenAI
|
||||
"""
|
||||
|
||||
huggingfacehub_api_token = "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"
|
||||
|
||||
model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
model_kwargs = {"device": "cpu"}
|
||||
encode_kwargs = {"normalize_embeddings": False}
|
||||
|
||||
self.embedding_model = HuggingFaceEmbeddings(
|
||||
model_name=model_name,
|
||||
model_kwargs=model_kwargs,
|
||||
encode_kwargs=encode_kwargs,
|
||||
)
|
||||
|
||||
return self.embedding_model
|
||||
|
||||
@st.cache_resource
|
||||
def create_vector_strore(
|
||||
_self, _docs_split: list, _file_name: str, _embedding_model
|
||||
):
|
||||
def create_vector_strore(_self, _docs_split: list, _file_name: str, _embedding_model):
|
||||
"""Esta funcion construye un vector store a partir de un documento
|
||||
|
||||
Args:
|
||||
@@ -66,7 +43,7 @@ class LangChainTools:
|
||||
_file_name (str): Nombre del documento
|
||||
"""
|
||||
|
||||
db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
|
||||
db_name = _file_name.replace('.pdf', '').replace(' ', '_').lower()
|
||||
|
||||
# Cargamos el modelo de embeddings
|
||||
# _embedding_model = self._embedding_model
|
||||
@@ -76,14 +53,13 @@ class LangChainTools:
|
||||
|
||||
if os.path.exists(persist_directory):
|
||||
vectordb = Chroma(
|
||||
persist_directory=persist_directory, embedding_function=_embedding_model
|
||||
)
|
||||
persist_directory=persist_directory,
|
||||
embedding_function=_embedding_model)
|
||||
else:
|
||||
vectordb = Chroma.from_documents(
|
||||
persist_directory=persist_directory,
|
||||
documents=_docs_split,
|
||||
embedding=_embedding_model,
|
||||
)
|
||||
embedding=_embedding_model)
|
||||
|
||||
vectordb.persist()
|
||||
|
||||
@@ -98,64 +74,36 @@ class LangChainTools:
|
||||
|
||||
# Cargamos la variable que contiene la api_key de OpenAI
|
||||
load_dotenv()
|
||||
openai_api_key = os.getenv("api_key")
|
||||
openai_api_key = os.getenv('api_key')
|
||||
|
||||
temperature = 0.5
|
||||
llm_openai = ChatOpenAI(
|
||||
model_name="gpt-3.5-turbo",
|
||||
llm_openai = ChatOpenAI(model_name="gpt-3.5-turbo",
|
||||
temperature=temperature,
|
||||
openai_api_key=openai_api_key,
|
||||
max_tokens=1000,
|
||||
)
|
||||
max_tokens=1000)
|
||||
|
||||
return llm_openai
|
||||
|
||||
def load_llm_open_source(self):
|
||||
"""Esta funcion carga un modelo de LLM OpenSource desde HuggingFace
|
||||
|
||||
Returns:
|
||||
_type_: Retorno a un objetito de tipo LLM de OpenAI
|
||||
"""
|
||||
# model_huggingface = "google/gemma-1.1-7b-it" # Es buena y funciona en espanol
|
||||
# model_huggingface = (
|
||||
# "google/gemma-1.1-2b-it" # Es buena y funciona en espanol funciona rapido
|
||||
# )
|
||||
# model_huggingface = "tiiuae/falcon-7b-instruct"
|
||||
# model_huggingface = "mistralai/Mistral-7B-Instruct-v0.2"
|
||||
# model_huggingface = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
|
||||
huggingfacehub_api_token = "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"
|
||||
|
||||
model_huggingface = "mistralai/Mixtral-8x7B-Instruct-v0.1" # Es buena y funciona en espanol funciona rapido
|
||||
|
||||
# Define the LLM
|
||||
llm = HuggingFaceEndpoint(
|
||||
repo_id=model_huggingface,
|
||||
huggingfacehub_api_token=huggingfacehub_api_token,
|
||||
temperature=0.1,
|
||||
max_new_tokens=1000,
|
||||
)
|
||||
|
||||
return llm
|
||||
|
||||
def load_prompt_template(self):
|
||||
"""Esta funcion construye un prompt template de lanfchain.
|
||||
|
||||
Returns:
|
||||
_type_: Retorno a un prompt template de LangChain.
|
||||
"""
|
||||
template = """Responde en español la siguiente pregunta utilizando los documentos proporcionados y citando las fuentes relevantes entre corchetes []:
|
||||
template = """Responde a la siguiente pregunta utilizando los documentos proporcionados y citando las fuentes relevantes entre corchetes []:
|
||||
|
||||
Pregunta: {question}
|
||||
|
||||
Respuesta:"""
|
||||
|
||||
prompt_template = PromptTemplate(
|
||||
template=template, input_variables=["question"]
|
||||
)
|
||||
template=template, input_variables=["question"])
|
||||
|
||||
return prompt_template
|
||||
|
||||
def define_retrieval_qa(self, _llm, _vectordb, _file_name, _embedding_model):
|
||||
def define_retrieval_qa(
|
||||
self, _llm, _vectordb, _file_name, _embedding_model
|
||||
):
|
||||
"""Esta función integra un LLM y una base de datos vectorial en una
|
||||
chain de LangChain para hacer requerimientos. Este modelo no integra memoria.
|
||||
|
||||
@@ -172,14 +120,14 @@ class LangChainTools:
|
||||
y la BDV.
|
||||
"""
|
||||
|
||||
db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
|
||||
db_name = _file_name.replace('.pdf', '').replace(' ', '_').lower()
|
||||
|
||||
# Verificamos si existe la vector strore
|
||||
persist_directory = f"embeddings/{db_name}"
|
||||
|
||||
_vectordb = Chroma(
|
||||
persist_directory=persist_directory, embedding_function=_embedding_model
|
||||
)
|
||||
persist_directory=persist_directory,
|
||||
embedding_function=_embedding_model)
|
||||
|
||||
# Define the Retrieval QA Chain to integrate the database and LLM
|
||||
qa = RetrievalQAWithSourcesChain.from_chain_type(
|
||||
@@ -213,44 +161,25 @@ class LangChainTools:
|
||||
y la BDV.
|
||||
"""
|
||||
|
||||
db_name = _file_name.replace(".pdf", "").replace(" ", "_").lower()
|
||||
db_name = _file_name.replace('.pdf', '').replace(' ', '_').lower()
|
||||
|
||||
# Verificamos si existe la vector strore
|
||||
persist_directory = f"embeddings/{db_name}"
|
||||
|
||||
_vectordb = Chroma(
|
||||
persist_directory=persist_directory, embedding_function=_embedding_model
|
||||
)
|
||||
persist_directory=persist_directory,
|
||||
embedding_function=_embedding_model)
|
||||
|
||||
# Configura la memoria
|
||||
memory = ConversationBufferMemory(
|
||||
memory_key="chat_history", return_messages=True, output_key="answer"
|
||||
)
|
||||
memory_key="chat_history", return_messages=True)
|
||||
|
||||
# Define the Retrieval QA Chain to integrate the database and LLM
|
||||
conversation = ConversationalRetrievalChain.from_llm(
|
||||
_llm,
|
||||
retriever=_vectordb.as_retriever(),
|
||||
memory=memory,
|
||||
verbose=True, # Modo verboso
|
||||
return_source_documents=True, # Devuelve los documentos fuente
|
||||
verbose=False # Modo verboso
|
||||
)
|
||||
|
||||
template = """Utiliza los siguientes fragmentos de contexto para responder en español la pregunta al final. Si no sabes la respuesta, simplemente di que no sabes, no intentes inventar una respuesta.
|
||||
|
||||
{context}
|
||||
|
||||
Pregunta: {question}
|
||||
Respuesta:"""
|
||||
|
||||
# template = """Utiliza los siguientes fragmentos de contexto como ejemplo para responder la pregunta al final. Organiza tu respuesta de manera clara y concisa, proporcionando información relevante y evitando divagaciones innecesarias.
|
||||
|
||||
# {context}
|
||||
|
||||
# Pregunta: {question}
|
||||
# Respuesta en español:"""
|
||||
|
||||
conversation.combine_docs_chain.llm_chain.prompt.template = template
|
||||
conversation.question_generator.prompt.template = "Dado el siguiente diálogo y una pregunta de seguimiento, reformula la pregunta de seguimiento para que sea una pregunta independiente, en su idioma original.\n\nHistorial del chat:\n{chat_history}\nPregunta de seguimiento: {question}\nPregunta independiente:"
|
||||
|
||||
return conversation
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
# from langchain_tools.lc_tools import LangChainTools
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
|
||||
|
||||
# Instanciamos la clase LangChainTools que contiene herramientras LangChain
|
||||
# langChainTools = LangChainTools()
|
||||
|
||||
# model_huggingface = "google/gemma-1.1-7b-it" # Es buena y funciona en espanol
|
||||
# model_huggingface = (
|
||||
# "google/gemma-1.1-2b-it" # Es buena y funciona en espanol funciona rapido
|
||||
# )
|
||||
# model_huggingface = 'tiiuae/falcon-7b-instruct'
|
||||
model_huggingface = "mistralai/Mistral-7B-Instruct-v0.2"
|
||||
huggingfacehub_api_token = "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"
|
||||
|
||||
# model_huggingface = "mistralai/Mixtral-8x22B-Instruct-v0.1" # Es buena y funciona en espanol funciona rapido
|
||||
|
||||
# Define the LLM
|
||||
llm = HuggingFaceEndpoint(
|
||||
repo_id=model_huggingface,
|
||||
huggingfacehub_api_token=huggingfacehub_api_token,
|
||||
temperature=0.5,
|
||||
max_new_tokens=500,
|
||||
) # Cargamos el modelo LLM desde LangChainllm llm = langChainTools.load_llm_open_source()
|
||||
# respuesta = llm.invoke("Cual es el sentido de la vida?")
|
||||
|
||||
# print(respuesta)
|
||||
|
||||
import streamlit as st
|
||||
from chats.streamlit_tools import import_file # ,clear_cache
|
||||
from streamlit_extras.add_vertical_space import add_vertical_space
|
||||
from langchain_tools.pdf_tools import PdfLangChain
|
||||
from langchain_tools.lc_tools import LangChainTools
|
||||
from chats.chat_tools import MessageManager
|
||||
|
||||
|
||||
pdf_name = "1.TC_Malamud, Se está muriendo la democracia.pdf"
|
||||
pdfLangChain = PdfLangChain(pdf_name)
|
||||
|
||||
# Cargamos el documento PDF
|
||||
docs: list = pdfLangChain.load_pdf()
|
||||
|
||||
# Dividimos los documentos en partes mas pequenas
|
||||
docs_split: list = pdfLangChain.split_docs(docs)
|
||||
|
||||
# Instanciamos la clase LangChainTools que contiene herramientras LangChain
|
||||
langChainTools = LangChainTools()
|
||||
|
||||
# Cargamos el modelo de embeddings
|
||||
# embedding_model = langChainTools.load_embedding_opnai()
|
||||
|
||||
# Cargamos el modelo de embeddings
|
||||
embedding_model = langChainTools.load_embedding_hf()
|
||||
|
||||
# Creamos el vector store
|
||||
docstorage = langChainTools.create_vector_strore(docs_split, pdf_name, embedding_model)
|
||||
|
||||
# Cargamos el modelo LLM desde LangChain
|
||||
llm = langChainTools.load_llm_open_source()
|
||||
|
||||
# Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.Para este caso la cadena tene el parametro de memoria.
|
||||
qa = langChainTools.define_retrieval_qa_memory(
|
||||
llm, docstorage, pdf_name, embedding_model
|
||||
)
|
||||
# qa.question_generator.prompt.template = "Dado el siguiente diálogo y una pregunta de seguimiento, reformula la pregunta de seguimiento para que sea una pregunta independiente, en su idioma original.\n\nHistorial del chat:\n{chat_history}\nPregunta de seguimiento: {question}\nPregunta independiente:"
|
||||
|
||||
print(qa)
|
||||
152
requirements.txt
152
requirements.txt
@@ -1,163 +1,13 @@
|
||||
aiohttp==3.9.5
|
||||
aiosignal==1.3.1
|
||||
altair==5.3.0
|
||||
annotated-types==0.6.0
|
||||
anyio==4.3.0
|
||||
asgiref==3.8.1
|
||||
attrs==23.2.0
|
||||
backoff==2.2.1
|
||||
bcrypt==4.1.2
|
||||
beautifulsoup4==4.12.3
|
||||
blinker==1.7.0
|
||||
build==1.2.1
|
||||
cachetools==5.3.3
|
||||
certifi==2024.2.2
|
||||
charset-normalizer==3.3.2
|
||||
chroma-hnswlib==0.7.3
|
||||
chromadb==0.5.0
|
||||
click==8.1.7
|
||||
colorama==0.4.6
|
||||
coloredlogs==15.0.1
|
||||
contourpy==1.2.1
|
||||
cycler==0.12.1
|
||||
dataclasses-json==0.6.4
|
||||
Deprecated==1.2.14
|
||||
distro==1.9.0
|
||||
entrypoints==0.4
|
||||
Faker==24.14.0
|
||||
fastapi==0.110.2
|
||||
favicon==0.7.0
|
||||
filelock==3.13.4
|
||||
flatbuffers==24.3.25
|
||||
fonttools==4.51.0
|
||||
frozenlist==1.4.1
|
||||
fsspec==2024.3.1
|
||||
gitdb==4.0.11
|
||||
GitPython==3.1.43
|
||||
google-auth==2.29.0
|
||||
googleapis-common-protos==1.63.0
|
||||
greenlet==3.0.3
|
||||
grpcio==1.62.2
|
||||
h11==0.14.0
|
||||
htbuilder==0.6.2
|
||||
httpcore==1.0.5
|
||||
httptools==0.6.1
|
||||
httpx==0.27.0
|
||||
huggingface-hub==0.22.2
|
||||
humanfriendly==10.0
|
||||
idna==3.7
|
||||
importlib-metadata==7.0.0
|
||||
importlib_resources==6.4.0
|
||||
Jinja2==3.1.3
|
||||
jsonpatch==1.33
|
||||
jsonpointer==2.4
|
||||
jsonschema==4.21.1
|
||||
jsonschema-specifications==2023.12.1
|
||||
kiwisolver==1.4.5
|
||||
kubernetes==29.0.0
|
||||
langchain==0.1.16
|
||||
langchain-community==0.0.34
|
||||
langchain-core==0.1.45
|
||||
langchain-openai==0.1.3
|
||||
langchain-text-splitters==0.0.1
|
||||
langsmith==0.1.50
|
||||
lxml==5.2.1
|
||||
Markdown==3.6
|
||||
markdown-it-py==3.0.0
|
||||
markdownlit==0.0.7
|
||||
MarkupSafe==2.1.5
|
||||
marshmallow==3.21.1
|
||||
matplotlib==3.8.4
|
||||
mdurl==0.1.2
|
||||
mmh3==4.1.0
|
||||
monotonic==1.6
|
||||
more-itertools==10.2.0
|
||||
mpmath==1.3.0
|
||||
multidict==6.0.5
|
||||
mypy-extensions==1.0.0
|
||||
numpy==1.26.4
|
||||
oauthlib==3.2.2
|
||||
onnxruntime==1.17.3
|
||||
openai==1.23.5
|
||||
opentelemetry-api==1.24.0
|
||||
opentelemetry-exporter-otlp-proto-common==1.24.0
|
||||
opentelemetry-exporter-otlp-proto-grpc==1.24.0
|
||||
opentelemetry-instrumentation==0.45b0
|
||||
opentelemetry-instrumentation-asgi==0.45b0
|
||||
opentelemetry-instrumentation-fastapi==0.45b0
|
||||
opentelemetry-proto==1.24.0
|
||||
opentelemetry-sdk==1.24.0
|
||||
opentelemetry-semantic-conventions==0.45b0
|
||||
opentelemetry-util-http==0.45b0
|
||||
orjson==3.10.1
|
||||
overrides==7.7.0
|
||||
packaging==23.2
|
||||
pandas==2.2.2
|
||||
pillow==10.3.0
|
||||
posthog==3.5.0
|
||||
prometheus_client==0.20.0
|
||||
protobuf==4.25.3
|
||||
pyarrow==16.0.0
|
||||
pyasn1==0.6.0
|
||||
pyasn1_modules==0.4.0
|
||||
pydantic==2.7.1
|
||||
pydantic_core==2.18.2
|
||||
pydeck==0.9.0b1
|
||||
Pygments==2.17.2
|
||||
pymdown-extensions==10.8
|
||||
pyparsing==3.1.2
|
||||
pypdf==4.2.0
|
||||
PyPika==0.48.9
|
||||
pyproject_hooks==1.0.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.0.1
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.1
|
||||
referencing==0.35.0
|
||||
regex==2024.4.16
|
||||
requests==2.31.0
|
||||
requests-oauthlib==2.0.0
|
||||
rich==13.7.1
|
||||
rpds-py==0.18.0
|
||||
rsa==4.9
|
||||
shellingham==1.5.4
|
||||
six==1.16.0
|
||||
smmap==5.0.1
|
||||
sniffio==1.3.1
|
||||
soupsieve==2.5
|
||||
SQLAlchemy==2.0.29
|
||||
st-annotated-text==4.0.1
|
||||
starlette==0.37.2
|
||||
streamlit==1.33.0
|
||||
streamlit-camera-input-live==0.2.0
|
||||
streamlit-card==1.0.0
|
||||
streamlit-embedcode==0.1.2
|
||||
streamlit-extras==0.4.2
|
||||
streamlit-faker==0.0.3
|
||||
streamlit-image-coordinates==0.1.6
|
||||
streamlit-keyup==0.2.4
|
||||
streamlit-toggle-switch==1.0.2
|
||||
streamlit-vertical-slider==2.5.5
|
||||
sympy==1.12
|
||||
tenacity==8.2.3
|
||||
tiktoken==0.6.0
|
||||
tokenizers==0.19.1
|
||||
toml==0.10.2
|
||||
toolz==0.12.1
|
||||
tornado==6.4
|
||||
tqdm==4.66.2
|
||||
typer==0.12.3
|
||||
typing-inspect==0.9.0
|
||||
typing_extensions==4.11.0
|
||||
tzdata==2024.1
|
||||
urllib3==2.2.1
|
||||
uvicorn==0.29.0
|
||||
uvloop==0.19.0
|
||||
validators==0.28.1
|
||||
watchdog==4.0.0
|
||||
watchfiles==0.21.0
|
||||
websocket-client==1.8.0
|
||||
websockets==12.0
|
||||
wrapt==1.16.0
|
||||
yarl==1.9.4
|
||||
zipp==3.18.1
|
||||
|
||||
|
||||
Reference in New Issue
Block a user