Se agrega el proyecto al repositorio

This commit is contained in:
Mongar28 2024-10-29 21:57:59 -05:00
parent 5cd142fabc
commit 70424a5c50
40 changed files with 1068 additions and 0 deletions

106
api.py Normal file
View File

@ -0,0 +1,106 @@
from flask import Flask, request, jsonify
from langchain_community.tools.tavily_search import TavilySearchResults
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import add_messages
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langgraph.prebuilt import create_react_agent
from langchain_core.prompts import ChatPromptTemplate
from langgraph.checkpoint.memory import MemorySaver
from langchain_tools.agent_tools import (
redact_email, list_calendar_events,
create_calendar_event, get_company_info,
get_current_date_and_time
)
from langchain_community.tools.gmail.utils import (
build_resource_service, get_gmail_credentials
)
from langchain_community.agent_toolkits import GmailToolkit
# Cargar las variables de entorno
load_dotenv()
# Inicializar la app Flask
app = Flask(__name__)
# Inicializar el modelo LLM de OpenAI
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0
)
# Configuración de Gmail
toolkit = GmailToolkit()
credentials = get_gmail_credentials(
token_file="token.json",
scopes=["https://mail.google.com/"],
client_secrets_file="credentials.json",
)
api_resource = build_resource_service(credentials=credentials)
toolkit = GmailToolkit(api_resource=api_resource)
# Crear herramientas
tools = toolkit.get_tools()
search = TavilySearchResults(max_results=2)
tools.extend([search, redact_email, list_calendar_events,
create_calendar_event, get_company_info, get_current_date_and_time])
# Definir el sistema prompt
system_prompt = ChatPromptTemplate.from_messages(
[
("system", "Eres Mariana, el asistente virtual de OneCluster, una empresa de software que ofrece soluciones personalizadas. Asume el tono de J.A.R.V.I.S.: cordial, atento y con tacto en todo momento."),
("system", "Preséntate como Mariana en el primer mensaje y pregunta el nombre del usuario si no lo tienes registrado."),
("system", "Si el usuario ya ha interactuado antes, usa su nombre sin necesidad de volver a preguntar."),
("system", "OneCluster es una empresa de software especializada en desarrollo a medida. Solo responde a preguntas y solicitudes relacionadas con la empresa y sus servicios."),
("system", "Si necesitas información adicional sobre la empresa, usa la función get_company_info."),
("system", "Antes de enviar correos o crear eventos, muestra los detalles al usuario para que los confirme antes de ejecutar la tarea."),
("system", "Si te preguntan algo no relacionado con los servicios de OneCluster, responde que solo puedes ayudar con temas relacionados con la empresa y sus soluciones."),
("system", "Evita mencionar o hacer alusión a las herramientas que utilizas internamente. Esa información es confidencial."),
("placeholder", "{messages}"),
]
)
# Definir el estado del asistente
class State(TypedDict):
messages: Annotated[list, add_messages]
is_last_step: bool
# Crear el graph con el estado definido
graph = create_react_agent(
model=llm,
tools=tools,
state_schema=State,
state_modifier=system_prompt,
checkpointer=MemorySaver()
)
# Ruta de la API para procesar texto
@app.route('/process_text', methods=['POST'])
def process_text():
user_input = request.json.get('text')
# Procesar el texto con LangChain
events = graph.stream(
{"messages": [("user", user_input)], "is_last_step": False},
config={"configurable": {"thread_id": "thread-1", "recursion_limit": 50}},
stream_mode="updates"
)
# Preparar la respuesta
response = []
for event in events:
if "agent" in event:
response.append(event["agent"]["messages"][-1].content)
return jsonify({'response': response})
# Ejecutar la app Flask
if __name__ == '__main__':
app.run(port=5000)

0
api_openai/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

45
api_openai/whisper.py Normal file
View File

@ -0,0 +1,45 @@
from dotenv import load_dotenv
import os
from openai import OpenAI
def whisper_api(file_path: str) -> str:
# cargar la variable del entorno desde el archivo .env
load_dotenv()
# Usar la variable de entorno API_KEY
api_key: str = os.getenv("OPENAI_API_KEY")
# Cargar el modelo whisper
client = OpenAI(api_key=api_key)
audio_file = open(file_path, "rb")
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
return transcription
def tts_api(text: str):
# cargar la variable del entorno desde el archivo .env
load_dotenv()
# Usar la variable de entorno API_KEY
api_key: str = os.getenv("OPENAI_API_KEY")
# Cargar el modelo whisper
client = OpenAI(api_key=api_key)
speech_file_path = os.path.join('audios', 'voice.ogg')
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text
)
response.stream_to_file(speech_file_path)
return speech_file_path

1
credentials.json Normal file
View File

@ -0,0 +1 @@
{"installed":{"client_id":"19011937557-bi5nh4afvg4tuqr87v6dp55qj9a9o1h2.apps.googleusercontent.com","project_id":"oc-aassistent","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}

1
credentials_2.json Normal file
View File

@ -0,0 +1 @@
{"installed":{"client_id":"629922809906-pl9l1ipout6d5hh19ku50sfvnqgu8ir2.apps.googleusercontent.com","project_id":"calendar-424503","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}

Binary file not shown.

Binary file not shown.

View File

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,323 @@
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.tools import tool
from datetime import datetime, timezone
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.discovery import build
from rag.split_docs import load_split_docs
from rag.llm import load_llm_openai
from rag.embeddings import load_embeddins
from rag.retriever import create_retriever
from rag.vectorstore import create_verctorstore
from rag.rag_chain import create_rag_chain
from datetime import datetime
import pytz
import telebot
import os
class LangChainTools:
def load_llm_openai(self):
load_dotenv()
# model = "gpt-3.5-turbo-0125"
# model = "gpt-4o"
model = "gpt-4o-mini"
llm = ChatOpenAI(
model=model,
temperature=0.1,
max_tokens=2000,
)
return llm
@tool
def multiply(first_int: int, second_int: int) -> int:
"""Multiply two integers together."""
return first_int * second_int
@tool
def redact_email(topic: str) -> str:
"""Use this tool to draft the content of an email based on a topic."""
# Load LLM model
langChainTools = LangChainTools()
llm = langChainTools.load_llm_openai()
# Create prompt for the LLM
prompt = (
"Please redact a email based on the topic:\n\n"
"Topic: {}\n\n"
"Email Content: [Your email content here]"
).format(topic)
response = llm.invoke(prompt)
return response
@tool
def list_calendar_events(max_results: int = 50) -> list:
"""Use this tool to list upcoming calendar events."""
# Define los alcances que necesitamos para acceder a la API de Google Calendar
SCOPES = ['https://www.googleapis.com/auth/calendar']
creds = None
# La ruta al archivo token.json, que contiene los tokens de acceso y actualización
token_path = 'token_2.json'
# La ruta al archivo de credenciales de OAuth 2.1
creds_path = 'credentials_2.json'
# Cargar las credenciales desde el archivo token.json, si existe
if os.path.exists(token_path):
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
# Si no hay credenciales válidas disponibles, inicia el flujo de OAuth 2.0 para obtener nuevas credenciales
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
creds_path, SCOPES)
creds = flow.run_local_server(port=0)
# Guarda las credenciales para la próxima ejecución
with open(token_path, 'w') as token_file:
token_file.write(creds.to_json())
# Construye el objeto de servicio para interactuar con la API de Google Calendar
service = build('calendar', 'v3', credentials=creds)
# Identificador del calendario que deseas consultar. 'primary' se refiere al calendario principal del usuario.
calendar_id = 'primary'
# Realiza una llamada a la API para obtener una lista de eventos.
now = datetime.now(timezone.utc).isoformat() # 'Z' indica UTC
events_result = service.events().list(
calendarId=calendar_id, timeMin=now, maxResults=max_results, singleEvents=True,
orderBy='startTime').execute()
# Extrae los eventos de la respuesta de la API.
events = events_result.get('items', [])
# Si no se encuentran eventos, imprime un mensaje.
if not events:
print('No upcoming events found.')
return
# Recorre la lista de eventos y muestra la hora de inicio y el resumen de cada evento.
for event in events:
# Obtiene la fecha y hora de inicio del evento. Puede ser 'dateTime' o 'date'.
start = event['start'].get('dateTime', event['start'].get('date'))
# Imprime la hora de inicio y el resumen (título) del evento.
print(start, event['summary'])
return events
@tool
def create_calendar_event(
title: str, start_time: datetime,
end_time: datetime, attendees: list) -> dict:
"""Use this tool to create an event in the calendar.
Parameters:
- title: str - The title of the event.
- start_time: datetime - The start time of the event.
- end_time: datetime - The end time of the event.
- attendees: list - A list of attendee emails (required).
Returns:
- dict - The created event details.
"""
if not attendees:
raise ValueError(
"El campo 'attendees' es obligatorio y no puede estar vacío.")
SCOPES = ['https://www.googleapis.com/auth/calendar']
creds = None
# La ruta al archivo token.json, que contiene los tokens de acceso y actualización
token_path = 'token_2.json'
# La ruta al archivo de credenciales de OAuth 2.0
creds_path = 'credentials_2.json'
# Cargar las credenciales desde el archivo token.json, si existe
if os.path.exists(token_path):
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
# Si no hay credenciales válidas disponibles, inicia el flujo de OAuth 2.0 para obtener nuevas credenciales
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
creds_path, SCOPES)
creds = flow.run_local_server(port=0)
# Guarda las credenciales para la próxima ejecución
with open(token_path, 'w') as token_file:
token_file.write(creds.to_json())
# Construye el objeto de servicio para interactuar con la API de Google Calendar
service = build('calendar', 'v3', credentials=creds)
# Validar y filtrar asistentes
valid_attendees = []
for email in attendees:
if isinstance(email, str) and '@' in email:
valid_attendees.append({'email': email})
else:
raise ValueError(f"'{email}' no es un correo electrónico válido.")
# Identificador del calendario que deseas modificar. 'primary' se refiere al calendario principal del usuario.
calendar_id = 'primary'
# Define el cuerpo del evento con el título, la hora de inicio y la hora de finalización
event = {
'summary': title,
'start': {
'dateTime': start_time.strftime('%Y-%m-%dT%H:%M:%S'),
'timeZone': 'America/Bogota',
},
'end': {
'dateTime': end_time.strftime('%Y-%m-%dT%H:%M:%S'),
'timeZone': 'America/Bogota',
},
'attendees': valid_attendees
}
try:
# Crea el evento en el calendario
event = service.events().insert(calendarId=calendar_id, body=event).execute()
print('Event created: %s' % (event.get('htmlLink')))
except Exception as e:
print(f"Error al crear el evento: {e}")
return {}
return event
@tool
def create_quick_add_event(quick_add_text: str):
"""Use this tool to create events in the calendar from natural language,
using the Quick Add feature of Google Calendar.
"""
quick_add_text: str = input(
"- Escribe la descripcion del evento que quieres crear: ")
SCOPES = ['https://www.googleapis.com/auth/calendar']
creds = None
# La ruta al archivo token.json, que contiene los tokens de acceso y actualización
token_path = 'token_2.json'
# La ruta al archivo de credenciales de OAuth 2.0
creds_path = 'credentials_2.json'
# Cargar las credenciales desde el archivo token.json, si existe
if os.path.exists(token_path):
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
# Si no hay credenciales válidas disponibles, inicia el flujo de OAuth 2.0 para obtener nuevas credenciales
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
creds_path, SCOPES)
creds = flow.run_local_server(port=0)
# Guarda las credenciales para la próxima ejecución
with open(token_path, 'w') as token_file:
token_file.write(creds.to_json())
# Construye el objeto de servicio para interactuar con la API de Google Calendar
service = build('calendar', 'v3', credentials=creds)
# Identificador del calendario que deseas modificar. 'primary' se refiere al calendario principal del usuario.
calendar_id = 'primary'
# Crea el evento utilizando la funcionalidad Quick Add
event = service.events().quickAdd(
calendarId=calendar_id, text=quick_add_text).execute()
print('Event created: %s' % (event.get('htmlLink')))
return event
# @tool
# def send_message(message: str):
# """Use this function when you need to communicate with the user."""
# # Configuración del bot
# load_dotenv()
# API_TOKEN_BOT = os.getenv("API_TOKEN_BOT")
# bot = telebot.TeleBot(API_TOKEN_BOT)
#
# bot.send_message(chat_id="5076346205", text=message)
#
@tool
def send_message(message: str):
"""Use this function when you need to communicate with Cristian."""
# Configuración del bot
load_dotenv()
API_TOKEN_BOT = os.getenv("API_TOKEN_BOT")
bot = telebot.TeleBot(API_TOKEN_BOT)
# Escapar caracteres especiales en Markdown
from telebot.util import escape_markdown
safe_message = escape_markdown(message)
# Enviar mensaje usando MarkdownV2
bot.send_message(chat_id="5076346205", text=safe_message,
parse_mode="Markdown")
@tool
def get_company_info(prompt: str) -> str:
"""Use this function when you need more information about the services offered by OneCluster."""
file_path: str = 'onecluster_info.pdf'
docs_split: list = load_split_docs(file_path)
embeddings_model = load_embeddins()
llm = load_llm_openai()
create_verctorstore(
docs_split,
embeddings_model,
file_path
)
retriever = create_retriever(
embeddings_model,
persist_directory="embeddings/onecluster_info"
)
qa = create_rag_chain(
llm, retriever)
# prompt: str = "Escribe un parrarfo describiendo cuantos son y cuales son los servicios que ofrece OneCluster y brinda detalles sobre cada uno."
response = qa.invoke(
{"input": prompt, "chat_history": []}
)
return response["answer"]
@tool
def get_current_date_and_time():
"""
Use this function when you need to know the current date and time.
Returns:
str: Current date and time in Bogotá, Colombia.
"""
bogota_tz = pytz.timezone('America/Bogota')
current_date_and_time = datetime.now(bogota_tz)
return current_date_and_time.strftime('%Y-%m-%d %H:%M:%S')

83
langchain_tools/agents.py Normal file
View File

@ -0,0 +1,83 @@
from langchain_core.tools import tool
from langchain_community.tools.gmail.utils import (
build_resource_service,
get_gmail_credentials,
)
from langchain_community.agent_toolkits import GmailToolkit
from langchain import hub
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_tools.agent_tools import (
multiply, redact_email, list_calendar_events,
create_calendar_event, create_quick_add_event,
send_message, get_company_info,
get_current_date_and_time
)
class AgentTools:
def load_tools(self) -> list:
toolkit = GmailToolkit()
# Can review scopes here https://developers.google.com/gmail/api/auth/scopes
# For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'
credentials = get_gmail_credentials(
token_file="token.json",
scopes=["https://mail.google.com/"],
client_secrets_file="credentials.json",)
api_resource = build_resource_service(credentials=credentials)
toolkit = GmailToolkit(api_resource=api_resource)
# creamos la lista de herramientas de gmail
tools = toolkit.get_tools()
load_dotenv()
# Agregamos otras tools
search = TavilySearchResults(max_results=1)
tools.append(search)
tools.append(multiply)
tools.append(redact_email)
tools.append(list_calendar_events)
tools.append(create_calendar_event)
tools.append(send_message)
tools.append(get_company_info),
tools.append(get_current_date_and_time)
# tools.append(create_quick_add_event)
return tools
def load_agent(self, llm, tools):
instructions = """
You are the virtual assistant of OneCluster, a company specialized in providing custom development services focused on creating personalized technological solutions for businesses and companies. Your mission is to offer a warm, friendly, and collaborative service that always reflects OneCluster's core values.
**User Interactions:**
1. **Initial Greeting:** When starting an interaction with a user, greet them courteously and identify who you have the pleasure of speaking with. Once you know the user's name, address them respectfully throughout the conversation.
2. **Providing Information:** You have the ability to offer clear and detailed information about the services provided by OneCluster. Make sure to be concise yet informative, adapting the information to the user's needs.
3. **Appointment Scheduling:** You are responsible for scheduling appointments for clients. Before confirming an appointment, always check the availability on OneCluster's calendar to ensure there is space, and check the current date and time so that you have a clear sense of time. Request an email address from the user to schedule the appointment.
4. **Handling Unanswered Questions:** If you do not know how to answer a question, politely ask for the client's contact information and clearly identify the problem to be resolved. Then, send this information to oneclustererp@gmail.com with the subject "Unresolved customer query by the agent." Inform the client that you do not have the information at your disposal but that you can escalate the request to the support team, who will respond promptly.
**Style and Tone:**
Maintain a tone that is always friendly, approachable, and professional. Each interaction should reflect OneCluster's commitment to innovation, adaptability, and ongoing collaboration.
"""
base_prompt = hub.pull("langchain-ai/openai-functions-template")
prompt = base_prompt.partial(instructions=instructions)
agent = create_openai_functions_agent(llm, tools, prompt)
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
verbose=True,
)
return agent_executor

View File

109
oc_assistant.py Normal file
View File

@ -0,0 +1,109 @@
from langchain_community.tools.tavily_search import TavilySearchResults
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import add_messages
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langgraph.prebuilt import create_react_agent
from langchain_core.prompts import ChatPromptTemplate
from langgraph.checkpoint.memory import MemorySaver
from langchain_tools.agent_tools import (
redact_email, list_calendar_events,
create_calendar_event,
get_company_info,
get_current_date_and_time
)
from langchain_community.tools.gmail.utils import (
build_resource_service,
get_gmail_credentials,
)
from langchain_community.agent_toolkits import GmailToolkit
load_dotenv()
# Inicialiamos un LLM de OpenaIA
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0.9
)
toolkit = GmailToolkit()
# Can review scopes here https://developers.google.com/gmail/api/auth/scopes
# For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'
credentials = get_gmail_credentials(
token_file="token.json",
scopes=["https://mail.google.com/"],
client_secrets_file="credentials.json",
)
api_resource = build_resource_service(credentials=credentials)
toolkit = GmailToolkit(api_resource=api_resource)
# creamos la lista de herramientas de gmail
tools = toolkit.get_tools()
search = TavilySearchResults(max_results=2)
tools.append(search)
tools.append(redact_email)
tools.append(list_calendar_events)
tools.append(create_calendar_event)
tools.append(get_company_info)
tools.append(get_current_date_and_time)
system_prompt = ChatPromptTemplate.from_messages(
[
("system", "Eres Mariana, el asistente virtual de OneCluster, una empresa de software que ofrece soluciones personalizadas. Asume el tono de J.A.R.V.I.S.: cordial, atento y con tacto en todo momento."),
# Instrucciones sobre presentación y tono
("system", "Preséntate como Mariana en el primer mensaje y pregunta el nombre del usuario si no lo tienes registrado."),
("system", "Si el usuario ya ha interactuado antes, usa su nombre sin necesidad de volver a preguntar."),
("system", "Si el primer mensaje del usuario es una solicitud, pregúntale su nombre antes de responder si aún no lo conoces."),
# Instrucciones sobre el manejo de solicitudes y tareas
("system", "OneCluster es una empresa de software especializada en desarrollo a medida. Solo responde a preguntas y solicitudes relacionadas con la empresa y sus servicios."),
("system", "Si necesitas información adicional sobre la empresa, usa la función get_company_info."),
("system", "Antes de enviar correos o crear eventos, muestra los detalles al usuario para que los confirme antes de ejecutar la tarea."),
# Cómo manejar preguntas fuera del alcance
("system", "Si te preguntan algo no relacionado con los servicios de OneCluster, responde que solo puedes ayudar con temas relacionados con la empresa y sus soluciones."),
# Prohibición de revelar herramientas internas
("system", "Evita mencionar o hacer alusión a las herramientas que utilizas internamente. Esa información es confidencial."),
# Placeholder para el contenido dinámico de la conversación
("placeholder", "{messages}"),
]
)
class State(TypedDict):
messages: Annotated[list, add_messages]
is_last_step: bool # Cambiar a booleano si es necesario
# Creamos el graph con el estado definido
graph= create_react_agent(
model = llm, tools = tools, state_schema = State,
state_modifier = system_prompt,
checkpointer = MemorySaver()
)
config= {"configurable": {"thread_id": "thread-1", "recursion_limit": 50}}
while True:
user_input = input("User: ")
if user_input.lower() in ["quit", "exit", "q"]:
print("Goodbye!")
break
events = graph.stream(
{"messages": [("user", user_input)],
"is_last_step": False},
config, stream_mode = "updates")
for event in events:
if "agent" in event:
print(f"\nAsistente: {event["agent"]["messages"][-1].content}\n")

109
onecluster_bot.py Normal file
View File

@ -0,0 +1,109 @@
import telebot
from dotenv import load_dotenv
import os
from api_openai.whisper import whisper_api, tts_api
from langchain_tools.agent_tools import LangChainTools
from langchain_tools.agents import AgentTools
from langchain_core.messages import AIMessage, HumanMessage
# from tools.scaped import scaped
# Configuración del bot
load_dotenv()
API_TOKEN_BOT = os.getenv("API_TOKEN_BOT")
bot = telebot.TeleBot(API_TOKEN_BOT)
# Handle '/start' and '/help'
wellcome = "¡Bienvenido! ¿Cómo puedo ayudarte?"
@bot.message_handler(commands=['help', 'start'])
def send_welcome(message):
bot.reply_to(message, wellcome, parse_mode="Markdown")
# Creamos una lista para el historial fuera de las funciones
history = []
@bot.message_handler(content_types=["text", "voice"])
def bot_mensajes(message):
global history # Para acceder a la variable global 'history'
# Si el mensaje es una nota de voz
if message.voice:
user_name = message.from_user.first_name
file_info = bot.get_file(message.voice.file_id)
downloaded_file = bot.download_file(file_info.file_path)
file_path = "audios/nota_de_voz.ogg"
with open(file_path, 'wb') as new_file:
new_file.write(downloaded_file)
pregunta_usuario = whisper_api(file_path)
print(f"Pregunta del usuario: {pregunta_usuario}")
langChainTools = LangChainTools()
llm = langChainTools.load_llm_openai()
agentTools = AgentTools()
tools = agentTools.load_tools()
agent_executor = agentTools.load_agent(llm, tools)
respuesta_agente = agent_executor.invoke(
{
"input": pregunta_usuario,
"chat_history": history,
}
)
bot.send_message(message.chat.id, respuesta_agente["output"],
parse_mode="Markdown")
path_voice: str = tts_api(respuesta_agente["output"])
with open(path_voice, 'rb') as voice:
bot.send_voice(message.chat.id, voice=voice)
history.append(HumanMessage(content=pregunta_usuario))
history.append(AIMessage(content=respuesta_agente["output"]))
# Si el mensaje es de texto
if message.text:
pregunta_usuario = message.text
langChainTools = LangChainTools()
llm = langChainTools.load_llm_openai()
agentTools = AgentTools()
tools = agentTools.load_tools()
agent_executor = agentTools.load_agent(llm, tools)
respuesta_agente = agent_executor.invoke(
{
"input": pregunta_usuario,
"chat_history": history,
}
)
# texto_respuesta: str = scaped(respuesta_agente["output"])
texto_respuesta: str = respuesta_agente["output"]
bot.send_message(
message.chat.id, texto_respuesta,
parse_mode="Markdown")
# Mandar mensaje de voz
# path_voice: str = tts_api(respuesta_agente["output"])
# with open(path_voice, 'rb') as voice:
# bot.send_voice(message.chat.id, voice=voice)
history.append(HumanMessage(content=pregunta_usuario))
history.append(AIMessage(content=respuesta_agente["output"]))
# print(history)
# Enviar el historial después de cada interacción
# bot.send_message(message.chat.id, history)
# while True:
# time.sleep(60)
# mensaje = 'Que mas pues!!'
# bot.send_message('5076346205', mensaje)
bot.infinity_polling()

32
prueba_rag.py Normal file
View File

@ -0,0 +1,32 @@
from rag.split_docs import load_split_docs
from rag.llm import load_llm_openai
from rag.embeddings import load_embeddins
from rag.retriever import create_retriever
from rag.vectorstore import create_verctorstore
from rag.rag_chain import create_rag_chain
dir_pdfs: str = 'documents/pdfs/'
file_name: str = 'onecluster_info.pdf'
file_path: str = 'onecluster_info.pdf'
docs_split: list = load_split_docs(file_path)
embeddings_model = load_embeddins()
llm = load_llm_openai()
create_verctorstore(
docs_split,
embeddings_model,
file_path
)
retriever = create_retriever(
embeddings_model,
persist_directory="embeddings/onecluster_info"
)
qa = create_rag_chain(
llm, retriever)
prompt: str = "Dame información detallada sobre los sercivios que ofrese OneCluster."
respuesta = qa.invoke(
{"input": prompt, "chat_history": []}
)
print(respuesta["answer"])

0
rag/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

10
rag/embeddings.py Normal file
View File

@ -0,0 +1,10 @@
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
def load_embeddins():
load_dotenv()
# model = "text-embedding-ada-002"
model = "text-embedding-3-small"
return OpenAIEmbeddings(model=model)

17
rag/llm.py Normal file
View File

@ -0,0 +1,17 @@
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
def load_llm_openai():
load_dotenv()
# model = "gpt-3.5-turbo-0125"
# model = "gpt-4o"
model = "gpt-4o-mini"
llm = ChatOpenAI(
model=model,
temperature=0.1,
max_tokens=2000,
)
return llm

40
rag/rag_chain.py Normal file
View File

@ -0,0 +1,40 @@
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
def create_rag_chain(llm, retriever):
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
[
("system", contextualize_q_system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
history_aware_retriever = create_history_aware_retriever(
llm, retriever, contextualize_q_prompt
)
# ___________________Chain con el chat history_______________________-
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
The length of the answer should be sufficient to address what is being asked, \
meaning don't limit yourself in length.\
{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
[
("system", qa_system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
return create_retrieval_chain(history_aware_retriever, question_answer_chain)

16
rag/retriever.py Normal file
View File

@ -0,0 +1,16 @@
from langchain_chroma import Chroma
def create_retriever(embeddings, persist_directory: str):
# Cargamos la vectorstore
# vectordb = Chroma.from_documents(
# persist_directory=st.session_state.persist_directory, # Este es el directorio del la vs del docuemnto del usuario que se encuentra cargado en la session_state.
# embedding_function=embeddings,
# )
vectordb = Chroma(
persist_directory=persist_directory,
embedding_function=embeddings,
)
# Creamos el retriver para que retorne los fragmentos mas relevantes.
return vectordb.as_retriever(search_kwargs={"k": 6})

19
rag/split_docs.py Normal file
View File

@ -0,0 +1,19 @@
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
def load_split_docs(file_name: str) -> list:
file_path: str = os.path.join("documents", "pdfs", file_name)
loader = PyPDFLoader(file_path)
docs: list = loader.load()
chunk_size: int = 2000
chunk_overlap: int = 300
splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
docs_split: list = splitter.split_documents(docs)
return docs_split

15
rag/vectorstore.py Normal file
View File

@ -0,0 +1,15 @@
from langchain_chroma import Chroma
import os
def create_verctorstore(docs_split: list, embeddings, file_name: str):
db_name: str = file_name.replace(".pdf", "").replace(" ", "_").lower()
persist_directory: str = f"embeddings/{db_name}"
if not os.path.exists(persist_directory):
vectordb = Chroma.from_documents(
persist_directory=persist_directory,
documents=docs_split,
embedding=embeddings,
)

140
requirements.txt Normal file
View File

@ -0,0 +1,140 @@
aiohappyeyeballs==2.4.0
aiohttp==3.10.6
aiosignal==1.3.1
annotated-types==0.7.0
anyio==4.6.0
asgiref==3.8.1
attrs==24.2.0
backoff==2.2.1
bcrypt==4.2.0
beautifulsoup4==4.12.3
blinker==1.8.2
build==1.2.2.post1
cachetools==5.5.0
certifi==2024.8.30
charset-normalizer==3.3.2
chroma-hnswlib==0.7.3
chromadb==0.5.3
click==8.1.7
coloredlogs==15.0.1
dataclasses-json==0.6.7
Deprecated==1.2.14
distro==1.9.0
durationpy==0.9
fastapi==0.115.4
filelock==3.16.1
Flask==3.0.3
flatbuffers==24.3.25
frozenlist==1.4.1
fsspec==2024.10.0
google==3.0.0
google-api-core==2.19.0
google-api-python-client==2.131.0
google-auth==2.29.0
google-auth-httplib2==0.2.0
google-auth-oauthlib==1.2.0
googleapis-common-protos==1.63.0
greenlet==3.1.1
grpcio==1.67.0
h11==0.14.0
httpcore==1.0.5
httplib2==0.22.0
httptools==0.6.4
httpx==0.27.2
huggingface-hub==0.26.2
humanfriendly==10.0
idna==3.10
importlib_metadata==8.4.0
importlib_resources==6.4.5
itsdangerous==2.2.0
Jinja2==3.1.4
jiter==0.5.0
jsonpatch==1.33
jsonpointer==3.0.0
kubernetes==31.0.0
langchain==0.3.1
langchain-chroma==0.1.4
langchain-community==0.3.1
langchain-core==0.3.6
langchain-openai==0.2.0
langchain-text-splitters==0.3.0
langgraph==0.2.28
langgraph-checkpoint==1.0.11
langsmith==0.1.128
markdown-it-py==3.0.0
MarkupSafe==2.1.5
marshmallow==3.22.0
mdurl==0.1.2
mmh3==5.0.1
monotonic==1.6
mpmath==1.3.0
msgpack==1.1.0
multidict==6.1.0
mypy-extensions==1.0.0
numpy==1.26.4
oauthlib==3.2.2
onnxruntime==1.19.2
openai==1.48.0
opentelemetry-api==1.27.0
opentelemetry-exporter-otlp-proto-common==1.27.0
opentelemetry-exporter-otlp-proto-grpc==1.27.0
opentelemetry-instrumentation==0.48b0
opentelemetry-instrumentation-asgi==0.48b0
opentelemetry-instrumentation-fastapi==0.48b0
opentelemetry-proto==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-semantic-conventions==0.48b0
opentelemetry-util-http==0.48b0
orjson==3.10.7
overrides==7.7.0
packaging==24.1
posthog==3.7.0
proto-plus==1.24.0
protobuf==4.25.5
pyasn1==0.6.1
pyasn1_modules==0.4.1
pydantic==2.9.2
pydantic-settings==2.5.2
pydantic_core==2.23.4
Pygments==2.18.0
pyparsing==3.1.4
pypdf==5.1.0
PyPika==0.48.9
pyproject_hooks==1.2.0
pyTelegramBotAPI==4.23.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
pytz==2024.2
PyYAML==6.0.2
regex==2024.9.11
requests==2.32.3
requests-oauthlib==2.0.0
rich==13.9.3
rsa==4.9
setuptools==75.2.0
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
soupsieve==2.6
SQLAlchemy==2.0.35
starlette==0.41.2
sympy==1.13.3
telebot==0.0.5
tenacity==8.5.0
tiktoken==0.7.0
tokenizers==0.20.1
tqdm==4.66.5
typer==0.12.5
typing-inspect==0.9.0
typing_extensions==4.12.2
uritemplate==4.1.1
urllib3==2.2.3
uvicorn==0.32.0
uvloop==0.21.0
watchfiles==0.24.0
websocket-client==1.8.0
websockets==13.1
Werkzeug==3.0.4
wrapt==1.16.0
yarl==1.12.1
zipp==3.20.2

1
token.json Normal file
View File

@ -0,0 +1 @@
{"token": "ya29.a0AeDClZBjncDp4ZwNKNtQ5ghKHPr1IT4XkgDc9QtvhPLrFGAR84f5r5iZPCd91VB7_WoJCG3iGQS0MU1n01xdRlEjDl7wVlKjKF0H680Bdim_bzykCXn3Jj0nVVkkHDOZP7RWeP1oAfY7Vjd4qbw_VxOdOzVzG_Bc6Auy4EJINAaCgYKAcYSARASFQHGX2MipaJllxIRMLCcZb2csCZECA0177", "refresh_token": "1//05nbircha66xlCgYIARAAGAUSNwF-L9IrxbE2v7kfLwXb4u0pD6Rin7xEBOTT83DeH7t2ttfD5CDmUCyhDsOaVRMRK_r8UtdoMq8", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "19011937557-bi5nh4afvg4tuqr87v6dp55qj9a9o1h2.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://mail.google.com/"], "universe_domain": "googleapis.com", "account": "", "expiry": "2024-10-30T01:16:56.882894Z"}

1
token_2.json Normal file
View File

@ -0,0 +1 @@
{"token": "ya29.a0AeDClZChMN7SEvjp3dFVZtee2pDoqAoPFC7AWiEeIG7H6qN2HDnf7c6DcFuc--aG60e1cAnOpoKf80H8aqrFFYbF4-F4LE_vz9MY8oc21Ra9PwM16FYxGGKcM2wcrrOGaFncs9Um9_yNxzAa6MUVNq88Y_Bhpr2F2mO3o53NjQaCgYKAZESARASFQHGX2Mi7EodrKchyiyPIZ4y5Lwh0Q0177", "refresh_token": "1//05CtNC-Z3ii8qCgYIARAAGAUSNwF-L9IrbOfrB0kNACEJ5HX4T-fmdNUqsGFqn1QFlvK_1L9h0emULUS1yU85IbaNyESXZSQzHU8", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "629922809906-pl9l1ipout6d5hh19ku50sfvnqgu8ir2.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://www.googleapis.com/auth/calendar"], "universe_domain": "googleapis.com", "account": "", "expiry": "2024-10-30T01:22:34.287442Z"}