From 9227a4201feb9b36cb7f26fb110b8199c4073125 Mon Sep 17 00:00:00 2001 From: mongar Date: Wed, 1 May 2024 12:44:57 -0500 Subject: [PATCH] Se integra a la apliacion un LLM open source de HuggingFace --- app.py | 2 +- app_2.py | 134 ++++++++++++++++++ .../__pycache__/lc_tools.cpython-311.pyc | Bin 7235 -> 8341 bytes langchain_tools/lc_tools.py | 96 +++++++++---- pruebas_open_source.py | 128 +++++++++++++++++ 5 files changed, 329 insertions(+), 31 deletions(-) create mode 100644 app_2.py create mode 100644 pruebas_open_source.py diff --git a/app.py b/app.py index d1038f7..2bcfdf6 100644 --- a/app.py +++ b/app.py @@ -57,7 +57,7 @@ if pdf_name: ) # Cargamos el modelo LLM desde LangChain - llm = langChainTools.load_llm_openai() + llm = langChainTools.load_llm_open_source() # Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas. # Para este caso la cadena tene el parametro de memoria. diff --git a/app_2.py b/app_2.py new file mode 100644 index 0000000..efd42cf --- /dev/null +++ b/app_2.py @@ -0,0 +1,134 @@ +# import os +# from dotenv import load_dotenv +# from langchain_community.chat_models import ChatOpenAI +import streamlit as st +from chats.streamlit_tools import import_file # ,clear_cache +from streamlit_extras.add_vertical_space import add_vertical_space +from langchain_tools.pdf_tools import PdfLangChain +from langchain_tools.lc_tools import LangChainTools +from chats.chat_tools import MessageManager +from langchain_community.llms import HuggingFaceEndpoint + + +# App title +st.set_page_config(page_title="LLMOneClusterTeam") + +# sidebar +with st.sidebar: + # Cargar el logo (asegúrate de que el archivo de imagen esté en la misma carpeta que tu script) + logo_path = "documents/Logo azulblanco.png" + logo = st.sidebar.image(logo_path, width=200) + + # Ajusta el ancho según sea necesario + add_vertical_space(28) + # pdf_name = import_file() + st.markdown("Built by [OneCluster](https://www.onecluster.org/).") + + +col1, col2 = st.columns([1.1, 1]) +with col1: + st.title( + "DocumentAssist", + ) +with col2: + logo_2 = st.image("documents/pdfs/logo_1-removebg-preview.png", width=110) + +pdf_name = import_file() +if pdf_name: + with st.spinner("Processing the document..."): + # Inicializamos la clase PdfLangChain + pdfLangChain = PdfLangChain(pdf_name) + pdf_name = pdfLangChain.file_name + + # Cargamos el documento PDF + docs: list = pdfLangChain.load_pdf() + + # Dividimos los documentos en partes mas pequenas + docs_split: list = pdfLangChain.split_docs(docs) + + # Instanciamos la clase LangChainTools que contiene herramientras LangChain + langChainTools = LangChainTools() + + # Cargamos el modelo de embeddings + embedding_model = langChainTools.load_embedding_opnai() + + # Creamos el vector store + docstorage = langChainTools.create_vector_strore( + docs_split, pdf_name, embedding_model + ) + + # Cargamos el modelo LLM desde LangChain + llm = langChainTools.load_llm_open_source() + + # Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas. + # Para este caso la cadena tene el parametro de memoria. + qa = langChainTools.define_retrieval_qa_memory( + llm, docstorage, pdf_name, embedding_model + ) + + # Store conversation history + if "messages" not in st.session_state.keys(): + st.session_state.messages = [ + { + "role": "assistant", + "content": "Hola, soy una IA con el que puedes chatear con tu PDF. Haz un pregunta al documento.", + } + ] + + # Display or clear chat messages + for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.write(message["content"]) + + def clear_chat_history(): + st.session_state.messages = [ + { + "role": "assistant", + "content": "Hola, soy una IA con el que puedes chatear con tu PDF. Haz un pregunta al documento.", + } + ] + + st.sidebar.button("Clear chat history", on_click=clear_chat_history) + + @st.cache_resource + def get_num_tokens(prompt): + """Get the number of tokens in a given prompt""" + return len(prompt.split()) + + # Function for generating Snowflake Arctic response + + # User-provided prompt + if prompt := st.chat_input(): + st.session_state.messages.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.write(prompt) + + # Generate a new response if last message is not from assistant + if st.session_state.messages[-1]["role"] != "assistant": + with st.spinner("Thinking..."): + # Creamos la cadena que integra Vectorstroe, el LLM para hacer consultas. + # Para este caso la cadena tene el parametro de memoria. + qa = langChainTools.define_retrieval_qa_memory( + llm, docstorage, pdf_name, embedding_model + ) + + input = "\n".join([msg["content"] for msg in st.session_state.messages]) + + query = qa.invoke({"question": f"{prompt}"}, return_only_outputs=True) + + response_text_en = query["answer"] + documents_source = query["source_documents"] + + messageManager = MessageManager() + + citation: str = messageManager.generate_citations(documents_source) + # st.markdown(citation) + + with st.chat_message("assistant"): + st.write(response_text_en) + # st.write(translation) + st.session_state.messages.append( + {"role": "assistant", "content": response_text_en} + ) + expander = st.expander("Fuentes") + expander.markdown(citation) diff --git a/langchain_tools/__pycache__/lc_tools.cpython-311.pyc b/langchain_tools/__pycache__/lc_tools.cpython-311.pyc index 0708cbddeb19856d228720672a778729b145c2a6..be078b147d4e1fa99de9251244c11456f454864c 100644 GIT binary patch delta 2367 zcmZ`)U2NM_6u!=19XDy(G;PwPOU#Z#aN~7 z{?QaHp#1=iPDw3ItBL}CNF`HI^+uaAEg5o2rCEH>FQ`S^q{_6_22-&)XoCIPb})f7(B6`pjwoi;Ix|8*$67E0Gb>6?*wHA0iCKl5qMAB7`-WF5lDL^ntc zp(LM|Z1b#YSUDfX;?NjE4+7>2v~iTtn%)P{5OlpHnPt_GHOmJI3=4vKRoEXm3RCMi zTk&PsF9N@#9>2Su7^tUm^?1q&`g?_21YjMZDmWdaGg;&NXp;TR_Z`>|OD_|JTjmtc zvd89Nx-phKOPu0WeAoO};JB-Jq&t?qOI|2fVDSz1LNMF^2zWGUR%lTb71dHyaSKD( z+wJ?=1#cJoE!ex#7@V7tPCY@j6Z13UkDWU;SD1M6$mFq8PiRjaJNo#-*>O`DKVlRX zvn9>4sjSP|o^kCwO1nqS-* z?3}AiOLAFjCK)m)Lkc@E>xO!6Bl9#2o3s*r5adReODAc|HVrSNeM`R4DyEyVBDKmP zS#I=;1MEGa|K!H(X|S^%005dLNcLf_#)~)m4@2JX1W06iJvLY$e6XHK)l+GxPy{O! z!3sr$suwHNT@`>isrJvftV@G2JU3FNW~ppTwpuFdvaMQP(7G&mC47*putVWYpC<)- z0RVSXLN(&>q%%|F)3DjO@O7`4h7~k}AR^p_upMClV2a+$#=E8iJ5iBiA9d|HfQL7H z7;7Uawc5(-GSdVDDmpx2M>p@JH<1?x)6hg}>H*1&pq48ZPrnXgRnPT;k_( zfjj2WkERrCNs@a3;%s#^_PQte|A}C?d&gFQ6(_*95h$r8lRif$U_u`Ts7wLp9JX-=0w9p4d6O`v|Q!L9=7O85LO+!(ssLNuRs&uekOV)w1OwI04G7;>OTon)qx*-83CS39jJg`I@l+f=tDV z(LB)IATOTWG7}Z(Ec6YG#2J&8fJY{}TaxKn#XM)^#nY;F#PwB#2)8(EX}~+GimD+( zF!GHLO+7EP%DpLE*ob?XT?l1Tt=F(;+4bguTsz(1*VFNBE01E5qeZhctr@DMm_%_vaytF7-YO5DhB0oOtA^J29c%o)HGX7`AH5k#)%X-e(of_f?E6HX>tlT1 z!2QFZp?Go7!w3b0DFnE`$h*W{0gi*BvjrU>SRXT~Z|_RRpQ!hXju)T#eeTmCFmd6Q zmjvQ#{$9s}n!{YTKk1N7*p={iJ7g2~clt99*@SG?A9ctkoa9J=cefh$_d5i@-8}s_ zm8)+$jK1c2Su*)a7k=g%LAc6Qxuk10onb#Ehj+Tc%wt5D5;wp<_yceoS9J9twLu4M pN31{Dq0}2^>*Vp;fBouYr1tlp6FBYVz`_5FyClt$T#{aL>9>q*v1$6nPDwk5Zms?>1{IVsC~0%mO44M{ zWrKlZLnr13LCfPn5CswHqyr~`{UC_nepIna!81_kf8d3+zsLkXpVYKJxZwl$y61U6 zpL?G7=kq+t?K3}|@_nXh3V}APfPW8*@WkyT@01H z3nZhi|8!qp?uz&p{vAB1vwPe^+7ez9yVE847TJKoW>+8=wUPp>^fq(+pB&f6OwXHK zxcRB)XPz5mm%Ujo$UgQyCC1D?_;=O# zO*MX1liO-?OHJ?E)e=>;njv+ABjE9D?--nn4w3KqW zvRJKIYsFRD140TKjj*qkN%1)t+Vkv=#~j^_#2URjVe`&uCp+!rW}K5buolUKwMZVU zMe+>=tOep*QVg72*S_P1I~zTXd-!X#TB+3+>sF;!F4ir(=U^k)XURXyy~IB8n=!6` zrcD5Qh=$)4Nx?`881CZ90fc>DkD@tOIm@_V-@IG z7-nRe#i?_MIm9ChLNCBXOI@z6UA3rPtglwrY?)|B1;PsL_P-#$2XXdSY|J=6_21Go zBApl zOSbFODpty?b^0=!GRKE5p><5VlgVFFp;9efDO