133 lines
4.4 KiB
Python
133 lines
4.4 KiB
Python
import streamlit as st
|
|
import os
|
|
from dotenv import load_dotenv
|
|
from langchain_community.chat_models import ChatOpenAI
|
|
from chats.streamlit_tools import import_file, clear_cache
|
|
from streamlit_extras.add_vertical_space import add_vertical_space
|
|
from langchain_tools.pdf_tools import PdfLangChain
|
|
from langchain_tools.lc_tools import LangChainTools
|
|
|
|
|
|
# App title
|
|
st.set_page_config(page_title="Snowflake Arctic")
|
|
|
|
# sidebar
|
|
with st.sidebar:
|
|
|
|
# Cargar el logo (asegúrate de que el archivo de imagen esté en la misma carpeta que tu script)
|
|
logo_path = "documents/Logo azulblanco.png"
|
|
# Ajusta el ancho según sea necesario
|
|
logo = st.sidebar.image(logo_path, width=200)
|
|
|
|
add_vertical_space(18)
|
|
pdf_name = import_file()
|
|
|
|
# Crea un botón en Streamlit que llama a la función clear_cache() cuando se presiona
|
|
if st.button('Eliminar caché'):
|
|
clear_cache()
|
|
if st.button('Reiniciar'):
|
|
st.experimental_rerun()
|
|
st.markdown(
|
|
"Built by [OneCluster](https://www.onecluster.org/)."
|
|
)
|
|
|
|
st.title('💬📄 LLM CHat APP')
|
|
|
|
|
|
if pdf_name:
|
|
|
|
with st.spinner("Processing the document..."):
|
|
|
|
# Inicializamos la clase PdfLangChain
|
|
pdfLangChain = PdfLangChain(pdf_name)
|
|
pdf_name = pdfLangChain.file_name
|
|
|
|
# Cargamos el documento PDF
|
|
docs: list = pdfLangChain.load_pdf()
|
|
|
|
# Dividimos los documentos en partes mas pequenas
|
|
docs_split: list = pdfLangChain.split_docs(docs)
|
|
|
|
# Instanciamos la clase LangChainTools que contiene herramientras LangChain
|
|
langChainTools = LangChainTools()
|
|
|
|
# Cargamos el modelo de embeddings
|
|
embedding_model = langChainTools.load_embedding_opnai()
|
|
|
|
# Creamos el vector store
|
|
docstorage = langChainTools.create_vector_strore(
|
|
docs_split,
|
|
pdf_name,
|
|
embedding_model)
|
|
|
|
# Cargamos el modelo LLM desde LangChain
|
|
llm = langChainTools.load_llm_openai()
|
|
|
|
# Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
|
|
# Para este caso la cadena tene el parametro de memoria.
|
|
qa = langChainTools.define_retrieval_qa_memory(
|
|
llm, docstorage,
|
|
pdf_name,
|
|
embedding_model)
|
|
|
|
# Store conversation history
|
|
if "messages" not in st.session_state.keys():
|
|
st.session_state.messages = [
|
|
{
|
|
"role": "assistant",
|
|
"content": "Hola, soy una IA con el que puedes chatear con tu PDF. Haz un pregunta al documento.",
|
|
}
|
|
]
|
|
|
|
# Display or clear chat messages
|
|
for message in st.session_state.messages:
|
|
with st.chat_message(message["role"]):
|
|
st.write(message["content"])
|
|
|
|
def clear_chat_history():
|
|
st.session_state.messages = [
|
|
{
|
|
"role": "assistant",
|
|
"content": "Hola, soy una IA con el que puedes chatear con tu PDF. Haz un pregunta al documento.",
|
|
}
|
|
]
|
|
|
|
st.sidebar.button("Clear chat history", on_click=clear_chat_history)
|
|
|
|
@ st.cache_resource
|
|
def get_num_tokens(prompt):
|
|
"""Get the number of tokens in a given prompt"""
|
|
return len(prompt.split())
|
|
|
|
# Function for generating Snowflake Arctic response
|
|
|
|
# User-provided prompt
|
|
if prompt := st.chat_input():
|
|
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
with st.chat_message("user"):
|
|
st.write(prompt)
|
|
|
|
# Generate a new response if last message is not from assistant
|
|
if st.session_state.messages[-1]["role"] != "assistant":
|
|
with st.spinner("Thinking..."):
|
|
|
|
# Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas.
|
|
# Para este caso la cadena tene el parametro de memoria.
|
|
qa = langChainTools.define_retrieval_qa_memory(
|
|
llm, docstorage,
|
|
pdf_name,
|
|
embedding_model)
|
|
|
|
input = "\n".join([msg["content"]
|
|
for msg in st.session_state.messages])
|
|
|
|
query = qa.invoke({"question": f"{input}"},
|
|
return_only_outputs=True)
|
|
|
|
response = query["answer"]
|
|
|
|
with st.chat_message("assistant"):
|
|
st.write(response)
|
|
st.session_state.messages.append(
|
|
{"role": "assistant", "content": response})
|