Feat: Se mueven archivos a APP
This commit is contained in:
		
							
								
								
									
										0
									
								
								app/rag/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								app/rag/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										10
									
								
								app/rag/embeddings.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								app/rag/embeddings.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| from dotenv import load_dotenv | ||||
| from langchain_openai import OpenAIEmbeddings | ||||
|  | ||||
|  | ||||
| def load_embeddins(): | ||||
|     load_dotenv() | ||||
|     # model = "text-embedding-ada-002" | ||||
|     model = "text-embedding-3-small" | ||||
|  | ||||
|     return OpenAIEmbeddings(model=model) | ||||
							
								
								
									
										17
									
								
								app/rag/llm.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								app/rag/llm.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,17 @@ | ||||
| from dotenv import load_dotenv | ||||
| from langchain_openai import ChatOpenAI | ||||
|  | ||||
|  | ||||
| def load_llm_openai(): | ||||
|     load_dotenv() | ||||
|     # model = "gpt-3.5-turbo-0125" | ||||
|     # model = "gpt-4o" | ||||
|     model = "gpt-4o-mini" | ||||
|  | ||||
|     llm = ChatOpenAI( | ||||
|         model=model, | ||||
|         temperature=0.1, | ||||
|         max_tokens=2000, | ||||
|     ) | ||||
|  | ||||
|     return llm | ||||
							
								
								
									
										40
									
								
								app/rag/rag_chain.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								app/rag/rag_chain.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| from langchain.chains import create_history_aware_retriever | ||||
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | ||||
| from langchain.chains import create_retrieval_chain | ||||
| from langchain.chains.combine_documents import create_stuff_documents_chain | ||||
|  | ||||
|  | ||||
| def create_rag_chain(llm, retriever): | ||||
|     contextualize_q_system_prompt = """Given a chat history and the latest user question \ | ||||
|     which might reference context in the chat history, formulate a standalone question \ | ||||
|     which can be understood without the chat history. Do NOT answer the question, \ | ||||
|     just reformulate it if needed and otherwise return it as is.""" | ||||
|     contextualize_q_prompt = ChatPromptTemplate.from_messages( | ||||
|         [ | ||||
|             ("system", contextualize_q_system_prompt), | ||||
|             MessagesPlaceholder("chat_history"), | ||||
|             ("human", "{input}"), | ||||
|         ] | ||||
|     ) | ||||
|     history_aware_retriever = create_history_aware_retriever( | ||||
|         llm, retriever, contextualize_q_prompt | ||||
|     ) | ||||
|  | ||||
|     # ___________________Chain con el chat history_______________________- | ||||
|     qa_system_prompt = """You are an assistant for question-answering tasks.  \ | ||||
|     Use the following pieces of retrieved context to answer the question. \ | ||||
|     If you don't know the answer, just say that you don't know. \ | ||||
|     The length of the answer should be sufficient to address what is being asked, \ | ||||
|     meaning don't limit yourself in length.\ | ||||
|      | ||||
|     {context}""" | ||||
|     qa_prompt = ChatPromptTemplate.from_messages( | ||||
|         [ | ||||
|             ("system", qa_system_prompt), | ||||
|             MessagesPlaceholder("chat_history"), | ||||
|             ("human", "{input}"), | ||||
|         ] | ||||
|     ) | ||||
|     question_answer_chain = create_stuff_documents_chain(llm, qa_prompt) | ||||
|  | ||||
|     return create_retrieval_chain(history_aware_retriever, question_answer_chain) | ||||
							
								
								
									
										16
									
								
								app/rag/retriever.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								app/rag/retriever.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| from langchain_chroma import Chroma | ||||
|  | ||||
|  | ||||
| def create_retriever(embeddings, persist_directory: str): | ||||
|     # Cargamos la vectorstore | ||||
|     # vectordb = Chroma.from_documents( | ||||
|     #     persist_directory=st.session_state.persist_directory,  # Este es el directorio del la vs del docuemnto del usuario que se encuentra cargado en la session_state. | ||||
|     #     embedding_function=embeddings, | ||||
|     # ) | ||||
|     vectordb = Chroma( | ||||
|         persist_directory=persist_directory, | ||||
|         embedding_function=embeddings, | ||||
|     ) | ||||
|  | ||||
|     # Creamos el retriver para que retorne los fragmentos mas relevantes. | ||||
|     return vectordb.as_retriever(search_kwargs={"k": 6}) | ||||
							
								
								
									
										19
									
								
								app/rag/split_docs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								app/rag/split_docs.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| from langchain_community.document_loaders.pdf import PyPDFLoader | ||||
| from langchain.text_splitter import RecursiveCharacterTextSplitter | ||||
| import os | ||||
|  | ||||
|  | ||||
| def load_split_docs(file_name: str) -> list: | ||||
|  | ||||
|     file_path: str = os.path.join("documents", "pdfs", file_name) | ||||
|     loader = PyPDFLoader(file_path) | ||||
|     docs: list = loader.load() | ||||
|     chunk_size: int = 2000 | ||||
|     chunk_overlap: int = 300 | ||||
|  | ||||
|     splitter = RecursiveCharacterTextSplitter( | ||||
|         chunk_size=chunk_size, chunk_overlap=chunk_overlap | ||||
|     ) | ||||
|     docs_split: list = splitter.split_documents(docs) | ||||
|  | ||||
|     return docs_split | ||||
							
								
								
									
										15
									
								
								app/rag/vectorstore.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								app/rag/vectorstore.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| from langchain_chroma import Chroma | ||||
| import os | ||||
|  | ||||
|  | ||||
| def create_verctorstore(docs_split: list, embeddings, file_name: str): | ||||
|     db_name: str = file_name.replace(".pdf", "").replace(" ", "_").lower() | ||||
|  | ||||
|     persist_directory: str = f"embeddings/{db_name}" | ||||
|  | ||||
|     if not os.path.exists(persist_directory): | ||||
|         vectordb = Chroma.from_documents( | ||||
|             persist_directory=persist_directory, | ||||
|             documents=docs_split, | ||||
|             embedding=embeddings, | ||||
|         ) | ||||
		Reference in New Issue
	
	Block a user