Se redefine el archvio requirements.txt con las dependencias principales.

2024-11-01 15:13:39 -05:00
53 changed files with 943 additions and 664 deletions
--- a/.env_example
+++ b/.env_example
@@ -1,3 +0,0 @@
 API_TOKEN_BOT="7060..."
 OPENAI_API_KEY="sk-..."
 TAVILY_API_KEY="tvly-..."
--- a/.flake8
+++ b/.flake8
@@ -1,2 +0,0 @@
 [flake8]
 exclude = *.json
--- a/24
+++ b/24
@@ -1,24 +0,0 @@
 FROM python:3.11-slim
 RUN pip install poetry==1.8.4
 RUN poetry config virtualenvs.create false
 WORKDIR /code
 COPY ./pyproject.toml ./README.md ./
 # Generar el archivo lock desde cero
 RUN poetry lock --no-update
 COPY ./package[s] ./packages
 RUN poetry install  --no-interaction --no-ansi --no-root
 COPY ./app ./app
 RUN poetry install --no-interaction --no-ansi
 EXPOSE 8080
 CMD exec uvicorn app.server:app --host 0.0.0.0 --port 8080
--- a/README.md
+++ b/README.md
@@ -1,79 +1,2 @@
-# assistant
+# oc-assistant
 ## Installation
 Install the LangChain CLI if you haven't yet
 ```bash
 pip install -U langchain-cli
 ```
 ## Adding packages
 ```bash
 # adding packages from 
 # https://github.com/langchain-ai/langchain/tree/master/templates
 langchain app add $PROJECT_NAME
 # adding custom GitHub repo packages
 langchain app add --repo $OWNER/$REPO
 # or with whole git string (supports other git providers):
 # langchain app add git+https://github.com/hwchase17/chain-of-verification
 # with a custom api mount point (defaults to `/{package_name}`)
 langchain app add $PROJECT_NAME --api_path=/my/custom/path/rag
 ```
 Note: you remove packages by their api path
 ```bash
 langchain app remove my/custom/path/rag
 ```
 ## Setup LangSmith (Optional)
 LangSmith will help us trace, monitor and debug LangChain applications. 
 You can sign up for LangSmith [here](https://smith.langchain.com/). 
 If you don't have access, you can skip this section
 ```shell
 export LANGCHAIN_TRACING_V2=true
 export LANGCHAIN_API_KEY=<your-api-key>
 export LANGCHAIN_PROJECT=<your-project>  # if not specified, defaults to "default"
 ```
 ## Launch LangServe
 ```bash
 langchain serve
 ```
 ## Running in Docker
 This project folder includes a Dockerfile that allows you to easily build and host your LangServe app.
 ### Building the Image
 To build the image, you simply:
 ```shell
 docker build . -t my-langserve-app
 ```
 If you tag your image with something other than `my-langserve-app`,
 note it for use in the next step.
 ### Running the Image Locally
 To run the image, you'll need to include any environment variables
 necessary for your application.
 In the below example, we inject the `OPENAI_API_KEY` environment
 variable with the value set in my local environment
 (`$OPENAI_API_KEY`)
 We also expose port 8080 with the `-p 8080:8080` option.
 ```shell
 docker run -e OPENAI_API_KEY=$OPENAI_API_KEY -p 8080:8080 my-langserve-app
 ```
--- a/64
+++ b/64
@@ -1,64 +0,0 @@
 require 'bundler/setup'
 require 'yaml'
 require 'digest'
 DOCKER_COMPOSE='docker-compose.yml'
 desc 'entorno vivo'
 namespace :live do
  task :up do
    compose('up', '--build', '-d', compose: DOCKER_COMPOSE)
  end
   desc 'monitorear salida'
  task :tail do
    compose('logs', '-f', 'app', compose: DOCKER_COMPOSE)
  end
  desc 'monitorear salida'
  task :tail_end do
    compose('logs', '-f', '-n 50', 'app', compose: DOCKER_COMPOSE)
  end
  desc 'detener entorno'
  task :down do
    compose('down', compose: DOCKER_COMPOSE)
  end
  desc 'detener entorno'
  task :stop do
    compose('stop', compose: DOCKER_COMPOSE)
  end
  desc 'eliminar entorno'
  task :del do
    compose('down', '-v', '--rmi', 'all', compose: DOCKER_COMPOSE)
  end
  desc 'reiniciar entorno'
  task :restart do
    compose('restart', compose: DOCKER_COMPOSE)
  end
  desc 'detener entorno'
  task :stop do
    compose('stop', compose: DOCKER_COMPOSE)
  end
  desc 'terminal'
  task :sh do
    compose('exec', 'app', 'bash')
  end
 end
 desc 'iterar'
 task :tdd do
  compose('exec', 'app', "bash -c 'cd app && flake8 *'")
  compose('exec', 'app', "bash -c 'cd app && pytest -vvv'")
 end
 def compose(*arg, compose: DOCKER_COMPOSE)
  sh "docker compose -f #{compose} #{arg.join(' ')}"
 end
--- a/api.py
+++ b/api.py
@@ -0,0 +1,106 @@
 from flask import Flask, request, jsonify
 from langchain_community.tools.tavily_search import TavilySearchResults
 from typing import Annotated
 from typing_extensions import TypedDict
 from langgraph.graph.message import add_messages
 from langchain_openai import ChatOpenAI
 from dotenv import load_dotenv
 from langgraph.prebuilt import create_react_agent
 from langchain_core.prompts import ChatPromptTemplate
 from langgraph.checkpoint.memory import MemorySaver
 from langchain_tools.agent_tools import (
    redact_email, list_calendar_events,
    create_calendar_event, get_company_info,
    get_current_date_and_time
 )
 from langchain_community.tools.gmail.utils import (
    build_resource_service, get_gmail_credentials
 )
 from langchain_community.agent_toolkits import GmailToolkit
 # Cargar las variables de entorno
 load_dotenv()
 # Inicializar la app Flask
 app = Flask(__name__)
 # Inicializar el modelo LLM de OpenAI
 llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0
 )
 # Configuración de Gmail
 toolkit = GmailToolkit()
 credentials = get_gmail_credentials(
    token_file="token.json",
    scopes=["https://mail.google.com/"],
    client_secrets_file="credentials.json",
 )
 api_resource = build_resource_service(credentials=credentials)
 toolkit = GmailToolkit(api_resource=api_resource)
 # Crear herramientas
 tools = toolkit.get_tools()
 search = TavilySearchResults(max_results=2)
 tools.extend([search, redact_email, list_calendar_events,
              create_calendar_event, get_company_info, get_current_date_and_time])
 # Definir el sistema prompt
 system_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Eres Mariana, el asistente virtual de OneCluster, una empresa de software que ofrece soluciones personalizadas. Asume el tono de J.A.R.V.I.S.: cordial, atento y con tacto en todo momento."),
        ("system", "Preséntate como Mariana en el primer mensaje y pregunta el nombre del usuario si no lo tienes registrado."),
        ("system", "Si el usuario ya ha interactuado antes, usa su nombre sin necesidad de volver a preguntar."),
        ("system", "OneCluster es una empresa de software especializada en desarrollo a medida. Solo responde a preguntas y solicitudes relacionadas con la empresa y sus servicios."),
        ("system", "Si necesitas información adicional sobre la empresa, usa la función get_company_info."),
        ("system", "Antes de enviar correos o crear eventos, muestra los detalles al usuario para que los confirme antes de ejecutar la tarea."),
        ("system", "Si te preguntan algo no relacionado con los servicios de OneCluster, responde que solo puedes ayudar con temas relacionados con la empresa y sus soluciones."),
        ("system", "Evita mencionar o hacer alusión a las herramientas que utilizas internamente. Esa información es confidencial."),
        ("placeholder", "{messages}"),
    ]
 )
 # Definir el estado del asistente
 class State(TypedDict):
    messages: Annotated[list, add_messages]
    is_last_step: bool
 # Crear el graph con el estado definido
 graph = create_react_agent(
    model=llm,
    tools=tools,
    state_schema=State,
    state_modifier=system_prompt,
    checkpointer=MemorySaver()
 )
 # Ruta de la API para procesar texto
@app.route('/process_text', methods=['POST'])
 def process_text():
    user_input = request.json.get('text')
    # Procesar el texto con LangChain
    events = graph.stream(
        {"messages": [("user", user_input)], "is_last_step": False},
        config={"configurable": {"thread_id": "thread-1", "recursion_limit": 50}},
        stream_mode="updates"
    )
    # Preparar la respuesta
    response = []
    for event in events:
        if "agent" in event:
            response.append(event["agent"]["messages"][-1].content)
    return jsonify({'response': response})
 # Ejecutar la app Flask
 if __name__ == '__main__':
    app.run(port=5000)
--- a/api_openai/init.py
+++ b/api_openai/init.py
--- a/api_openai/pycache/init.cpython-312.pyc
+++ b/api_openai/pycache/init.cpython-312.pyc
--- a/api_openai/pycache/whisper.cpython-312.pyc
+++ b/api_openai/pycache/whisper.cpython-312.pyc
--- a/app/api_openai/whisper.py
+++ b/app/api_openai/whisper.py
--- a/app/langchain_tools/agent_tools.py
+++ b/app/langchain_tools/agent_tools.py
@@ -1,108 +0,0 @@
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
 from langchain_core.tools import tool
 from datetime import datetime, timezone
 from googleapiclient.discovery import build
 from app.rag.split_docs import load_split_docs
 from app.rag.llm import load_llm_openai
 from app.rag.embeddings import load_embeddins
 from app.rag.retriever import create_retriever
 from app.rag.vectorstore import create_vectorstore
 from app.rag.rag_chain import create_rag_chain
 import pytz
 import telebot
 import os
 class LangChainTools:
    def load_llm_openai(self):
        load_dotenv()
        # model = "gpt-3.5-turbo-0125"
        # model = "gpt-4o"
        model = "gpt-4o-mini"
        llm = ChatOpenAI(
            model=model,
            temperature=0.1,
            max_tokens=2000,
        )
        return llm
@tool
 def redact_email(topic: str) -> str:
    """Use this tool to draft the content of an email based on a topic."""
    # Load LLM model
    langChainTools = LangChainTools()
    llm = langChainTools.load_llm_openai()
    # Create prompt for the LLM
    prompt = (
        "Please redact a email based on the topic:\n\n"
        "Topic: {}\n\n"
        "Email Content: [Your email content here]"
    ).format(topic)
    response = llm.invoke(prompt)
    return response
@tool
 def send_message(message: str):
    """Use this function when you need to communicate with Cristian."""
    # Configuración del bot
    load_dotenv()
    API_TOKEN_BOT = os.getenv("API_TOKEN_BOT")
    bot = telebot.TeleBot(API_TOKEN_BOT)
    # Escapar caracteres especiales en Markdown
    from telebot.util import escape_markdown
    safe_message = escape_markdown(message)
    # Enviar mensaje usando MarkdownV2
    bot.send_message(chat_id="5076346205", text=safe_message,
                     parse_mode="Markdown")
@tool
 def get_company_info(prompt: str) -> str:
    """
    Use this function when you need more information
    about the services offered by OneCluster.
    """
    file_path: str = "onecluster_info.pdf"
    try:
        docs_split: list = load_split_docs(file_path)
        embeddings_model = load_embeddins()
        llm = load_llm_openai()
        # Usar el nombre corregido de la función
        create_vectorstore(docs_split, embeddings_model, file_path)
        retriever = create_retriever(
            embeddings_model, persist_directory="embeddings/onecluster_info"
        )
        qa = create_rag_chain(llm, retriever)
        response = qa.invoke({"input": prompt, "chat_history": []})
        return response["answer"]
    except Exception as e:
        print(f"Error en get_company_info: {e}")
        return f"Lo siento, hubo un error al procesar la información: {str(e)}"
@tool
 def get_current_date_and_time():
    """
    Use this function when you need to know the current date and time.
    Returns:
        str: Current date and time in Bogotá, Colombia.
    """
    bogota_tz = pytz.timezone("America/Bogota")
    current_date_and_time = datetime.now(bogota_tz)
    return current_date_and_time.strftime("%Y-%m-%d %H:%M:%S")
--- a/app/langchain_tools/agents.py
+++ b/app/langchain_tools/agents.py
@@ -1,112 +0,0 @@
 # from langchain_core.tools import tool
 from langchain_community.tools.gmail.utils import (
    build_resource_service,
    get_gmail_credentials,
 )
 from langchain_community.agent_toolkits import GmailToolkit
 from langchain import hub
 from langchain_community.tools.tavily_search import TavilySearchResults
 from dotenv import load_dotenv
 from langchain.agents import AgentExecutor, create_openai_functions_agent
 from langchain_tools.agent_tools import (
    multiply, redact_email, list_calendar_events,
    create_calendar_event,
    # create_quick_add_event,
    send_message, get_company_info,
    get_current_date_and_time
 )
 class AgentTools:
    def load_tools(self) -> list:
        toolkit = GmailToolkit()
        # Can review scopes here
        # https://developers.google.com/gmail/api/auth/scopes
        # For instance, readonly scope is
        # 'https://www.googleapis.com/auth/gmail.readonly'
        credentials = get_gmail_credentials(
            token_file="token.json",
            scopes=["https://mail.google.com/"],
            client_secrets_file="credentials.json",)
        api_resource = build_resource_service(credentials=credentials)
        toolkit = GmailToolkit(api_resource=api_resource)
        # creamos la lista de herramientas de gmail
        tools = toolkit.get_tools()
        load_dotenv()
        # Agregamos otras tools
        search = TavilySearchResults(max_results=1)
        tools.append(search)
        tools.append(multiply)
        tools.append(redact_email)
        tools.append(list_calendar_events)
        tools.append(create_calendar_event)
        tools.append(send_message)
        tools.append(get_company_info),
        tools.append(get_current_date_and_time)
        # tools.append(create_quick_add_event)
        return tools
    def load_agent(self, llm, tools):
        instructions = """
        You are the virtual assistant of OneCluster, a company specialized in
        providing custom development services focused on creating personalized
        technological solutions for businesses and companies.
        Your mission is to offer a warm, friendly,
        and collaborative service that always
        reflects OneCluster's core values.
        **User Interactions:**
        1. **Initial Greeting:** When starting an interaction with a user,
        greet them courteously and identify who you have the pleasure of
        speaking with. Once you know the user's name, address them respectfully
        throughout the conversation.
        2. **Providing Information:** You have the ability to offer clear and
        detailed information about the services provided by OneCluster.
        Make sure to be concise yet informative,
        adapting the information to the user's needs.
        3. **Appointment Scheduling:** You are responsible for scheduling
        appointments for clients. Before confirming an appointment,
        always check the availability on OneCluster's
        calendar to ensure there is space,
        and check the current date and time so that
        you have a clear sense of time.
        Request an email address from the user to schedule the appointment.
        4. **Handling Unanswered Questions:** If you do not know how to
        answer a question, politely ask for the client's contact information
        and clearly identify the problem to be resolved.
        Then, send this information to oneclustererp@gmail.com with the subject
        "Unresolved customer query by the agent."
        Inform the client that you do not have the information at your
        disposal but that you can escalate the request to the support team,
        who will respond promptly.
        **Style and Tone:**
        Maintain a tone that is always friendly, approachable, and
        professional. Each interaction should reflect OneCluster's
        commitment to innovation, adaptability, and ongoing collaboration.
        """
        base_prompt = hub.pull("langchain-ai/openai-functions-template")
        prompt = base_prompt.partial(instructions=instructions)
        agent = create_openai_functions_agent(llm, tools, prompt)
        agent_executor = AgentExecutor(
            agent=agent,
            tools=tools,
            verbose=True,
        )
        return agent_executor
--- a/app/rag/init.py
+++ b/app/rag/init.py
--- a/app/rag/vectorstore.py
+++ b/app/rag/vectorstore.py
@@ -1,19 +0,0 @@
 from langchain_chroma import Chroma
 import os
 def create_vectorstore(docs_split: list, embeddings, file_name: str):
    db_name: str = file_name.replace(".pdf", "").replace(" ", "_").lower()
    persist_directory: str = f"embeddings/{db_name}"
    # Crear el directorio si no existe
    os.makedirs(persist_directory, exist_ok=True)
    # Siempre crear/actualizar el vectorstore
    vectordb = Chroma.from_documents(
        persist_directory=persist_directory,
        documents=docs_split,
        embedding=embeddings,
    )
    return vectordb
--- a/app/server.py
+++ b/app/server.py
@@ -1,172 +0,0 @@
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse, RedirectResponse
 from langchain_openai import ChatOpenAI
 from langserve import add_routes
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.tools.gmail.utils import build_resource_service
 from langchain_community.agent_toolkits import GmailToolkit
 from app.langchain_tools.agent_tools import (
    redact_email,
    get_company_info,
    get_current_date_and_time,
 )
 from langgraph.graph.message import add_messages
 from langgraph.prebuilt import create_react_agent
 from langgraph.checkpoint.memory import MemorySaver
 from typing import Annotated
 from typing_extensions import TypedDict
 from dotenv import load_dotenv
 import os
 load_dotenv()
 app = FastAPI()
 llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.9)
 # # Crear herramientas
 tools = []
 search = TavilySearchResults(max_results=2)
 tools.extend(
    [
        search,
        get_company_info,
        get_current_date_and_time,
    ]
 )
 # # Definir el sistema prompt
 system_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Eres Mariana, el asistente virtual de OneCluster, una empresa de "
            "software que ofrece soluciones personalizadas. Asume el tono de "
            "J.A.R.V.I.S.: cordial, atento y con tacto en todo momento.",
        ),
        (
            "system",
            "Preséntate como Mariana en el primer mensaje y pregunta el nombre "
            "del usuario si no lo tienes registrado.",
        ),
        (
            "system",
            "Si el usuario ya ha interactuado antes, usa su nombre sin necesidad "
            "de volver a preguntar.",
        ),
        (
            "system",
            "Si el primer mensaje del usuario es una solicitud, pregúntale su "
            "nombre antes de responder si aún no lo conoces.",
        ),
        (
            "system",
            "OneCluster es una empresa de software especializada en desarrollo a "
            "medida. Solo responde a preguntas y solicitudes relacionadas con la "
            "empresa y sus servicios.",
        ),
        (
            "system",
            "Si necesitas información adicional sobre la empresa, usa la función "
            "get_company_info.",
        ),
        (
            "system",
            "Antes de enviar correos o crear eventos, muestra los detalles al "
            "usuario para que los confirme antes de ejecutar la tarea.",
        ),
        (
            "system",
            "Si te preguntan algo no relacionado con los servicios de OneCluster,"
            " responde que solo puedes ayudar con temas relacionados con la "
            "empresa y sus soluciones.",
        ),
        (
            "system",
            "Evita mencionar o hacer alusión a las herramientas que utilizas "
            "internamente. Esa información es confidencial.",
        ),
        ("placeholder", "{messages}"),
    ]
 )
 # # Definir el estado del asistente
 class State(TypedDict):
    messages: Annotated[list, add_messages]
    is_last_step: bool
 # # Crear el graph con el estado definido
 graph = create_react_agent(
    model=llm,
    tools=tools,
    state_schema=State,
    state_modifier=system_prompt,
    checkpointer=MemorySaver(),
 )
@app.get("/")
 async def redirect_root_to_docs():
    return RedirectResponse("/docs")
 # # Edit this to add the chain you want to add
 add_routes(app, llm, path="/openai")
@app.post("/process_text")
 async def process_text(request: Request):
    data = await request.json()
    user_input = data.get("text")
    # Procesar el texto con LangChain
    events = graph.stream(
        {"messages": [("user", user_input)], "is_last_step": False},
        config={"configurable": {"thread_id": "thread-1", "recursion_limit": 50}},
        stream_mode="updates",
    )
    # Preparar la respuesta
    response = []
    for event in events:
        if "agent" in event:
            response.append(event["agent"]["messages"][-1].content)
    return JSONResponse(content={"response": response})
 if __name__ == "__main__":
    config = {"configurable": {"thread_id": "thread-1", "recursion_limit": 50}}
    # Modo interactivo por defecto
    import sys
    if "--server" not in sys.argv:
        while True:
            user_input = input("User: ")
            if user_input.lower() in ["quit", "exit", "q"]:
                print("Goodbye!")
                break
            events = graph.stream(
                {"messages": [("user", user_input)], "is_last_step": False},
                config,
                stream_mode="updates",
            )
            for event in events:
                if "agent" in event:
                    print(f"\nAsistente: {event['agent']['messages'][-1].content}\n")
    else:
        # Modo servidor con uvicorn
        import uvicorn
        uvicorn.run(app, host="0.0.0.0", port=8080)
--- a/credentials.json
+++ b/credentials.json
@@ -0,0 +1 @@
 {"installed":{"client_id":"19011937557-bi5nh4afvg4tuqr87v6dp55qj9a9o1h2.apps.googleusercontent.com","project_id":"oc-aassistent","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}
--- a/credentials_2.json
+++ b/credentials_2.json
@@ -0,0 +1 @@
 {"installed":{"client_id":"629922809906-pl9l1ipout6d5hh19ku50sfvnqgu8ir2.apps.googleusercontent.com","project_id":"calendar-424503","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,24 +0,0 @@
 version: '3.8'
 services:
  app:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        API_TOKEN_BOT: ${API_TOKEN_BOT}
        OPENAI_API_KEY: ${OPENAI_API_KEY}
        TAVILY_API_KEY: ${TAVILY_API_KEY}
    ports:
      - "8080:8080"
    volumes:
      - .:/code
      - ./google_credentials.json:/code/google_credentials.json
      - ./tokens:/code/tokens
    environment:
      - PYTHONUNBUFFERED=1
      - GOOGLE_APPLICATION_CREDENTIALS=/code/google_credentials.json
    command: >
      uvicorn app.server:app --host 0.0.0.0 --port 8080
    env_file:
     - .env
--- a/app/documents/pdfs/onecluster_info.pdf
+++ b/app/documents/pdfs/onecluster_info.pdf
--- a/app/embeddings/onecluster_info/chroma.sqlite3
+++ b/app/embeddings/onecluster_info/chroma.sqlite3
--- a/app/embeddings/onecluster_info/ec94e6e6-76a0-432c-b4c0-d25f119adbea/data_level0.bin
+++ b/app/embeddings/onecluster_info/ec94e6e6-76a0-432c-b4c0-d25f119adbea/data_level0.bin
--- a/app/embeddings/onecluster_info/ec94e6e6-76a0-432c-b4c0-d25f119adbea/header.bin
+++ b/app/embeddings/onecluster_info/ec94e6e6-76a0-432c-b4c0-d25f119adbea/header.bin
--- a/app/embeddings/onecluster_info/ec94e6e6-76a0-432c-b4c0-d25f119adbea/length.bin
+++ b/app/embeddings/onecluster_info/ec94e6e6-76a0-432c-b4c0-d25f119adbea/length.bin
--- a/app/embeddings/onecluster_info/ec94e6e6-76a0-432c-b4c0-d25f119adbea/link_lists.bin
+++ b/app/embeddings/onecluster_info/ec94e6e6-76a0-432c-b4c0-d25f119adbea/link_lists.bin
--- a/langchain_tools/init.py
+++ b/langchain_tools/init.py
--- a/langchain_tools/pycache/init.cpython-312.pyc
+++ b/langchain_tools/pycache/init.cpython-312.pyc
--- a/langchain_tools/pycache/agent_tools.cpython-312.pyc
+++ b/langchain_tools/pycache/agent_tools.cpython-312.pyc
--- a/langchain_tools/pycache/agents.cpython-312.pyc
+++ b/langchain_tools/pycache/agents.cpython-312.pyc
--- a/langchain_tools/agent_tools.py
+++ b/langchain_tools/agent_tools.py
@@ -0,0 +1,323 @@
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
 from langchain_core.tools import tool
 from datetime import datetime, timezone
 from google.oauth2.credentials import Credentials
 from google_auth_oauthlib.flow import InstalledAppFlow
 from google.auth.transport.requests import Request
 from googleapiclient.discovery import build
 from rag.split_docs import load_split_docs
 from rag.llm import load_llm_openai
 from rag.embeddings import load_embeddins
 from rag.retriever import create_retriever
 from rag.vectorstore import create_verctorstore
 from rag.rag_chain import create_rag_chain
 from datetime import datetime
 import pytz
 import telebot
 import os
 class LangChainTools:
    def load_llm_openai(self):
        load_dotenv()
        # model = "gpt-3.5-turbo-0125"
        # model = "gpt-4o"
        model = "gpt-4o-mini"
        llm = ChatOpenAI(
            model=model,
            temperature=0.1,
            max_tokens=2000,
        )
        return llm
@tool
 def multiply(first_int: int, second_int: int) -> int:
    """Multiply two integers together."""
    return first_int * second_int
@tool
 def redact_email(topic: str) -> str:
    """Use this tool to draft the content of an email based on a topic."""
    # Load LLM model
    langChainTools = LangChainTools()
    llm = langChainTools.load_llm_openai()
    # Create prompt for the LLM
    prompt = (
        "Please redact a email based on the topic:\n\n"
        "Topic: {}\n\n"
        "Email Content: [Your email content here]"
    ).format(topic)
    response = llm.invoke(prompt)
    return response
@tool
 def list_calendar_events(max_results: int = 50) -> list:
    """Use this tool to list upcoming calendar events."""
    # Define los alcances que necesitamos para acceder a la API de Google Calendar
    SCOPES = ['https://www.googleapis.com/auth/calendar']
    creds = None
    # La ruta al archivo token.json, que contiene los tokens de acceso y actualización
    token_path = 'token_2.json'
    # La ruta al archivo de credenciales de OAuth 2.1
    creds_path = 'credentials_2.json'
    # Cargar las credenciales desde el archivo token.json, si existe
    if os.path.exists(token_path):
        creds = Credentials.from_authorized_user_file(token_path, SCOPES)
    # Si no hay credenciales válidas disponibles, inicia el flujo de OAuth 2.0 para obtener nuevas credenciales
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                creds_path, SCOPES)
            creds = flow.run_local_server(port=0)
        # Guarda las credenciales para la próxima ejecución
        with open(token_path, 'w') as token_file:
            token_file.write(creds.to_json())
    # Construye el objeto de servicio para interactuar con la API de Google Calendar
    service = build('calendar', 'v3', credentials=creds)
    # Identificador del calendario que deseas consultar. 'primary' se refiere al calendario principal del usuario.
    calendar_id = 'primary'
    # Realiza una llamada a la API para obtener una lista de eventos.
    now = datetime.now(timezone.utc).isoformat()  # 'Z' indica UTC
    events_result = service.events().list(
        calendarId=calendar_id, timeMin=now, maxResults=max_results, singleEvents=True,
        orderBy='startTime').execute()
    # Extrae los eventos de la respuesta de la API.
    events = events_result.get('items', [])
    # Si no se encuentran eventos, imprime un mensaje.
    if not events:
        print('No upcoming events found.')
        return
    # Recorre la lista de eventos y muestra la hora de inicio y el resumen de cada evento.
    for event in events:
        # Obtiene la fecha y hora de inicio del evento. Puede ser 'dateTime' o 'date'.
        start = event['start'].get('dateTime', event['start'].get('date'))
        # Imprime la hora de inicio y el resumen (título) del evento.
        print(start, event['summary'])
    return events
@tool
 def create_calendar_event(
        title: str, start_time: datetime,
        end_time: datetime, attendees: list) -> dict:
    """Use this tool to create an event in the calendar.
    Parameters:
    - title: str - The title of the event.
    - start_time: datetime - The start time of the event.
    - end_time: datetime - The end time of the event.
    - attendees: list - A list of attendee emails (required).
    Returns:
    - dict - The created event details.
    """
    if not attendees:
        raise ValueError(
            "El campo 'attendees' es obligatorio y no puede estar vacío.")
    SCOPES = ['https://www.googleapis.com/auth/calendar']
    creds = None
    # La ruta al archivo token.json, que contiene los tokens de acceso y actualización
    token_path = 'token_2.json'
    # La ruta al archivo de credenciales de OAuth 2.0
    creds_path = 'credentials_2.json'
    # Cargar las credenciales desde el archivo token.json, si existe
    if os.path.exists(token_path):
        creds = Credentials.from_authorized_user_file(token_path, SCOPES)
    # Si no hay credenciales válidas disponibles, inicia el flujo de OAuth 2.0 para obtener nuevas credenciales
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                creds_path, SCOPES)
            creds = flow.run_local_server(port=0)
        # Guarda las credenciales para la próxima ejecución
        with open(token_path, 'w') as token_file:
            token_file.write(creds.to_json())
    # Construye el objeto de servicio para interactuar con la API de Google Calendar
    service = build('calendar', 'v3', credentials=creds)
    # Validar y filtrar asistentes
    valid_attendees = []
    for email in attendees:
        if isinstance(email, str) and '@' in email:
            valid_attendees.append({'email': email})
        else:
            raise ValueError(f"'{email}' no es un correo electrónico válido.")
    # Identificador del calendario que deseas modificar. 'primary' se refiere al calendario principal del usuario.
    calendar_id = 'primary'
    # Define el cuerpo del evento con el título, la hora de inicio y la hora de finalización
    event = {
        'summary': title,
        'start': {
            'dateTime': start_time.strftime('%Y-%m-%dT%H:%M:%S'),
            'timeZone': 'America/Bogota',
        },
        'end': {
            'dateTime': end_time.strftime('%Y-%m-%dT%H:%M:%S'),
            'timeZone': 'America/Bogota',
        },
        'attendees': valid_attendees
    }
    try:
        # Crea el evento en el calendario
        event = service.events().insert(calendarId=calendar_id, body=event).execute()
        print('Event created: %s' % (event.get('htmlLink')))
    except Exception as e:
        print(f"Error al crear el evento: {e}")
        return {}
    return event
@tool
 def create_quick_add_event(quick_add_text: str):
    """Use this tool to create events in the calendar from natural language, 
    using the Quick Add feature of Google Calendar.
    """
    quick_add_text: str = input(
        "- Escribe la descripcion del evento que quieres crear: ")
    SCOPES = ['https://www.googleapis.com/auth/calendar']
    creds = None
    # La ruta al archivo token.json, que contiene los tokens de acceso y actualización
    token_path = 'token_2.json'
    # La ruta al archivo de credenciales de OAuth 2.0
    creds_path = 'credentials_2.json'
    # Cargar las credenciales desde el archivo token.json, si existe
    if os.path.exists(token_path):
        creds = Credentials.from_authorized_user_file(token_path, SCOPES)
    # Si no hay credenciales válidas disponibles, inicia el flujo de OAuth 2.0 para obtener nuevas credenciales
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                creds_path, SCOPES)
            creds = flow.run_local_server(port=0)
        # Guarda las credenciales para la próxima ejecución
        with open(token_path, 'w') as token_file:
            token_file.write(creds.to_json())
    # Construye el objeto de servicio para interactuar con la API de Google Calendar
    service = build('calendar', 'v3', credentials=creds)
    # Identificador del calendario que deseas modificar. 'primary' se refiere al calendario principal del usuario.
    calendar_id = 'primary'
    # Crea el evento utilizando la funcionalidad Quick Add
    event = service.events().quickAdd(
        calendarId=calendar_id, text=quick_add_text).execute()
    print('Event created: %s' % (event.get('htmlLink')))
    return event
 # @tool
 # def send_message(message: str):
 #     """Use this function when you need to communicate with the user."""
 #     # Configuración del bot
 #     load_dotenv()
 #     API_TOKEN_BOT = os.getenv("API_TOKEN_BOT")
 #     bot = telebot.TeleBot(API_TOKEN_BOT)
 #
 #     bot.send_message(chat_id="5076346205", text=message)
 #
@tool
 def send_message(message: str):
    """Use this function when you need to communicate with Cristian."""
    # Configuración del bot
    load_dotenv()
    API_TOKEN_BOT = os.getenv("API_TOKEN_BOT")
    bot = telebot.TeleBot(API_TOKEN_BOT)
    # Escapar caracteres especiales en Markdown
    from telebot.util import escape_markdown
    safe_message = escape_markdown(message)
    # Enviar mensaje usando MarkdownV2
    bot.send_message(chat_id="5076346205", text=safe_message,
                     parse_mode="Markdown")
@tool
 def get_company_info(prompt: str) -> str:
    """Use this function when you need more information about the services offered by OneCluster."""
    file_path: str = 'onecluster_info.pdf'
    docs_split: list = load_split_docs(file_path)
    embeddings_model = load_embeddins()
    llm = load_llm_openai()
    create_verctorstore(
        docs_split,
        embeddings_model,
        file_path
    )
    retriever = create_retriever(
        embeddings_model,
        persist_directory="embeddings/onecluster_info"
    )
    qa = create_rag_chain(
        llm, retriever)
    # prompt: str = "Escribe un parrarfo describiendo cuantos son y  cuales son los servicios que ofrece OneCluster y brinda detalles sobre cada uno."
    response = qa.invoke(
        {"input": prompt, "chat_history": []}
    )
    return response["answer"]
@tool
 def get_current_date_and_time():
    """
    Use this function when you need to know the current date and time.
    Returns:
        str: Current date and time in Bogotá, Colombia.
    """
    bogota_tz = pytz.timezone('America/Bogota')
    current_date_and_time = datetime.now(bogota_tz)
    return current_date_and_time.strftime('%Y-%m-%d %H:%M:%S')
--- a/langchain_tools/agents.py
+++ b/langchain_tools/agents.py
@@ -0,0 +1,83 @@
 from langchain_core.tools import tool
 from langchain_community.tools.gmail.utils import (
    build_resource_service,
    get_gmail_credentials,
 )
 from langchain_community.agent_toolkits import GmailToolkit
 from langchain import hub
 from langchain_community.tools.tavily_search import TavilySearchResults
 from dotenv import load_dotenv
 from langchain.agents import AgentExecutor, create_openai_functions_agent
 from langchain_tools.agent_tools import (
    multiply, redact_email, list_calendar_events,
    create_calendar_event, create_quick_add_event,
    send_message, get_company_info,
    get_current_date_and_time
 )
 class AgentTools:
    def load_tools(self) -> list:
        toolkit = GmailToolkit()
        # Can review scopes here https://developers.google.com/gmail/api/auth/scopes
        # For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'
        credentials = get_gmail_credentials(
            token_file="token.json",
            scopes=["https://mail.google.com/"],
            client_secrets_file="credentials.json",)
        api_resource = build_resource_service(credentials=credentials)
        toolkit = GmailToolkit(api_resource=api_resource)
        # creamos la lista de herramientas de gmail
        tools = toolkit.get_tools()
        load_dotenv()
        # Agregamos otras tools
        search = TavilySearchResults(max_results=1)
        tools.append(search)
        tools.append(multiply)
        tools.append(redact_email)
        tools.append(list_calendar_events)
        tools.append(create_calendar_event)
        tools.append(send_message)
        tools.append(get_company_info),
        tools.append(get_current_date_and_time)
        # tools.append(create_quick_add_event)
        return tools
    def load_agent(self, llm, tools):
        instructions = """
 You are the virtual assistant of OneCluster, a company specialized in providing custom development services focused on creating personalized technological solutions for businesses and companies. Your mission is to offer a warm, friendly, and collaborative service that always reflects OneCluster's core values.
 **User Interactions:**
 1. **Initial Greeting:** When starting an interaction with a user, greet them courteously and identify who you have the pleasure of speaking with. Once you know the user's name, address them respectfully throughout the conversation.
 2. **Providing Information:** You have the ability to offer clear and detailed information about the services provided by OneCluster. Make sure to be concise yet informative, adapting the information to the user's needs.
 3. **Appointment Scheduling:** You are responsible for scheduling appointments for clients. Before confirming an appointment, always check the availability on OneCluster's calendar to ensure there is space, and check the current date and time so that you have a clear sense of time. Request an email address from the user to schedule the appointment.
 4. **Handling Unanswered Questions:** If you do not know how to answer a question, politely ask for the client's contact information and clearly identify the problem to be resolved. Then, send this information to oneclustererp@gmail.com with the subject "Unresolved customer query by the agent." Inform the client that you do not have the information at your disposal but that you can escalate the request to the support team, who will respond promptly.
 **Style and Tone:**
 Maintain a tone that is always friendly, approachable, and professional. Each interaction should reflect OneCluster's commitment to innovation, adaptability, and ongoing collaboration.
 """
        base_prompt = hub.pull("langchain-ai/openai-functions-template")
        prompt = base_prompt.partial(instructions=instructions)
        agent = create_openai_functions_agent(llm, tools, prompt)
        agent_executor = AgentExecutor(
            agent=agent,
            tools=tools,
            verbose=True,
        )
        return agent_executor
--- a/app/langchain_tools/llm_models.py
+++ b/app/langchain_tools/llm_models.py
--- a/oc_assistant.py
+++ b/oc_assistant.py
@@ -0,0 +1,109 @@
 from langchain_community.tools.tavily_search import TavilySearchResults
 from typing import Annotated
 from typing_extensions import TypedDict
 from langgraph.graph.message import add_messages
 from langchain_openai import ChatOpenAI
 from dotenv import load_dotenv
 from langgraph.prebuilt import create_react_agent
 from langchain_core.prompts import ChatPromptTemplate
 from langgraph.checkpoint.memory import MemorySaver
 from langchain_tools.agent_tools import (
    redact_email, list_calendar_events,
    create_calendar_event,
    get_company_info,
    get_current_date_and_time
 )
 from langchain_community.tools.gmail.utils import (
    build_resource_service,
    get_gmail_credentials,
 )
 from langchain_community.agent_toolkits import GmailToolkit
 load_dotenv()
 # Inicialiamos un LLM de OpenaIA
 llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.9
 )
 toolkit = GmailToolkit()
 # Can review scopes here https://developers.google.com/gmail/api/auth/scopes
 # For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'
 credentials = get_gmail_credentials(
    token_file="token.json",
    scopes=["https://mail.google.com/"],
    client_secrets_file="credentials.json",
 )
 api_resource = build_resource_service(credentials=credentials)
 toolkit = GmailToolkit(api_resource=api_resource)
 # creamos la lista de herramientas de gmail
 tools = toolkit.get_tools()
 search = TavilySearchResults(max_results=2)
 tools.append(search)
 tools.append(redact_email)
 tools.append(list_calendar_events)
 tools.append(create_calendar_event)
 tools.append(get_company_info)
 tools.append(get_current_date_and_time)
 system_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Eres Mariana, el asistente virtual de OneCluster, una empresa de software que ofrece soluciones personalizadas. Asume el tono de J.A.R.V.I.S.: cordial, atento y con tacto en todo momento."),
        # Instrucciones sobre presentación y tono
        ("system", "Preséntate como Mariana en el primer mensaje y pregunta el nombre del usuario si no lo tienes registrado."),
        ("system", "Si el usuario ya ha interactuado antes, usa su nombre sin necesidad de volver a preguntar."),
        ("system", "Si el primer mensaje del usuario es una solicitud, pregúntale su nombre antes de responder si aún no lo conoces."),
        # Instrucciones sobre el manejo de solicitudes y tareas
        ("system", "OneCluster es una empresa de software especializada en desarrollo a medida. Solo responde a preguntas y solicitudes relacionadas con la empresa y sus servicios."),
        ("system", "Si necesitas información adicional sobre la empresa, usa la función get_company_info."),
        ("system", "Antes de enviar correos o crear eventos, muestra los detalles al usuario para que los confirme antes de ejecutar la tarea."),
        # Cómo manejar preguntas fuera del alcance
        ("system", "Si te preguntan algo no relacionado con los servicios de OneCluster, responde que solo puedes ayudar con temas relacionados con la empresa y sus soluciones."),
        # Prohibición de revelar herramientas internas
        ("system", "Evita mencionar o hacer alusión a las herramientas que utilizas internamente. Esa información es confidencial."),
        # Placeholder para el contenido dinámico de la conversación
        ("placeholder", "{messages}"),
    ]
 )
 class State(TypedDict):
    messages: Annotated[list, add_messages]
    is_last_step: bool  # Cambiar a booleano si es necesario
 # Creamos el graph con el estado definido
 graph = create_react_agent(
    model=llm, tools=tools, state_schema=State,
    state_modifier=system_prompt,
    checkpointer=MemorySaver()
 )
 config = {"configurable": {"thread_id": "thread-1", "recursion_limit": 50}}
 while True:
    user_input = input("User: ")
    if user_input.lower() in ["quit", "exit", "q"]:
        print("Goodbye!")
        break
    events = graph.stream(
        {"messages": [("user", user_input)],
         "is_last_step": False},
        config, stream_mode="updates")
    for event in events:
        if "agent" in event:
            print(f"\nAsistente: {event["agent"]["messages"][-1].content}\n")
--- a/onecluster_bot.py
+++ b/onecluster_bot.py
@@ -0,0 +1,109 @@
 import telebot
 from dotenv import load_dotenv
 import os
 from api_openai.whisper import whisper_api, tts_api
 from langchain_tools.agent_tools import LangChainTools
 from langchain_tools.agents import AgentTools
 from langchain_core.messages import AIMessage, HumanMessage
 # from tools.scaped import scaped
 # Configuración del bot
 load_dotenv()
 API_TOKEN_BOT = os.getenv("API_TOKEN_BOT")
 bot = telebot.TeleBot(API_TOKEN_BOT)
 # Handle '/start' and '/help'
 wellcome = "¡Bienvenido! ¿Cómo puedo ayudarte?"
@bot.message_handler(commands=['help', 'start'])
 def send_welcome(message):
    bot.reply_to(message, wellcome, parse_mode="Markdown")
 # Creamos una lista para el historial fuera de las funciones
 history = []
@bot.message_handler(content_types=["text", "voice"])
 def bot_mensajes(message):
    global history  # Para acceder a la variable global 'history'
    # Si el mensaje es una nota de voz
    if message.voice:
        user_name = message.from_user.first_name
        file_info = bot.get_file(message.voice.file_id)
        downloaded_file = bot.download_file(file_info.file_path)
        file_path = "audios/nota_de_voz.ogg"
        with open(file_path, 'wb') as new_file:
            new_file.write(downloaded_file)
        pregunta_usuario = whisper_api(file_path)
        print(f"Pregunta del usuario: {pregunta_usuario}")
        langChainTools = LangChainTools()
        llm = langChainTools.load_llm_openai()
        agentTools = AgentTools()
        tools = agentTools.load_tools()
        agent_executor = agentTools.load_agent(llm, tools)
        respuesta_agente = agent_executor.invoke(
            {
                "input": pregunta_usuario,
                "chat_history": history,
            }
        )
        bot.send_message(message.chat.id, respuesta_agente["output"],
                         parse_mode="Markdown")
        path_voice: str = tts_api(respuesta_agente["output"])
        with open(path_voice, 'rb') as voice:
            bot.send_voice(message.chat.id, voice=voice)
        history.append(HumanMessage(content=pregunta_usuario))
        history.append(AIMessage(content=respuesta_agente["output"]))
    # Si el mensaje es de texto
    if message.text:
        pregunta_usuario = message.text
        langChainTools = LangChainTools()
        llm = langChainTools.load_llm_openai()
        agentTools = AgentTools()
        tools = agentTools.load_tools()
        agent_executor = agentTools.load_agent(llm, tools)
        respuesta_agente = agent_executor.invoke(
            {
                "input": pregunta_usuario,
                "chat_history": history,
            }
        )
        # texto_respuesta: str = scaped(respuesta_agente["output"])
        texto_respuesta: str = respuesta_agente["output"]
        bot.send_message(
            message.chat.id, texto_respuesta,
            parse_mode="Markdown")
        # Mandar mensaje de voz
        # path_voice: str = tts_api(respuesta_agente["output"])
        # with open(path_voice, 'rb') as voice:
        #     bot.send_voice(message.chat.id, voice=voice)
        history.append(HumanMessage(content=pregunta_usuario))
        history.append(AIMessage(content=respuesta_agente["output"]))
        # print(history)
    # Enviar el historial después de cada interacción
    # bot.send_message(message.chat.id, history)
 # while True:
 #     time.sleep(60)
 #     mensaje = 'Que mas pues!!'
 #     bot.send_message('5076346205', mensaje)
 bot.infinity_polling()
--- a/packages/README.md
+++ b/packages/README.md
--- a/prueba_rag.py
+++ b/prueba_rag.py
@@ -0,0 +1,32 @@
 from rag.split_docs import load_split_docs
 from rag.llm import load_llm_openai
 from rag.embeddings import load_embeddins
 from rag.retriever import create_retriever
 from rag.vectorstore import create_verctorstore
 from rag.rag_chain import create_rag_chain
 dir_pdfs: str = 'documents/pdfs/'
 file_name: str = 'onecluster_info.pdf'
 file_path: str = 'onecluster_info.pdf'
 docs_split: list = load_split_docs(file_path)
 embeddings_model = load_embeddins()
 llm = load_llm_openai()
 create_verctorstore(
    docs_split,
    embeddings_model,
    file_path
 )
 retriever = create_retriever(
    embeddings_model,
    persist_directory="embeddings/onecluster_info"
 )
 qa = create_rag_chain(
    llm, retriever)
 prompt: str = "Dame información detallada sobre los sercivios que ofrese OneCluster."
 respuesta = qa.invoke(
    {"input": prompt, "chat_history": []}
 )
 print(respuesta["answer"])
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,41 +0,0 @@
 [tool.poetry]
 name = "assistant"
 version = "0.1.0"
 description = ""
 authors = ["Your Name <you@example.com>"]
 readme = "README.md"
 packages = [
    { include = "app" },
 ]
 [tool.poetry.dependencies]
 python = "^3.9"
 uvicorn = "^0.23.2"
 langserve = {extras = ["server"], version = ">=0.0.30"}
 pydantic = "<3"
 langgraph = "^0.2.28"
 langchain = "^0.3.8"
 langchain-community = "^0.3.8"
 langchain-openai = "^0.2.5"
 langchain-chroma = "^0.1.4"
 google = "^3.0.0"
 google-auth = "^2.35.0"
 google-auth-oauthlib = "^1.1.0"
 google-auth-httplib2 = "^0.1.0"
 google-api-python-client = "^2.108.0"
 flake8 = "^7.1.1"
 httpx = "^0.27.2"
 pytest = "^8.3.3"
 requests = "^2.32.3"
 jsonify = "^0.5"
 protobuf = "^3.20.3"
 pytz = "^2024.2"
 telebot = "^0.0.5"
 pypdf = "^5.1.0"
 [tool.poetry.group.dev.dependencies]
 langchain-cli = ">=0.0.15"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
--- a/app/langchain_tools/init.py
+++ b/app/langchain_tools/init.py
--- a/rag/pycache/init.cpython-312.pyc
+++ b/rag/pycache/init.cpython-312.pyc
--- a/rag/pycache/embeddings.cpython-312.pyc
+++ b/rag/pycache/embeddings.cpython-312.pyc
--- a/rag/pycache/llm.cpython-312.pyc
+++ b/rag/pycache/llm.cpython-312.pyc
--- a/rag/pycache/rag_chain.cpython-312.pyc
+++ b/rag/pycache/rag_chain.cpython-312.pyc
--- a/rag/pycache/retriever.cpython-312.pyc
+++ b/rag/pycache/retriever.cpython-312.pyc
--- a/rag/pycache/split_docs.cpython-312.pyc
+++ b/rag/pycache/split_docs.cpython-312.pyc
--- a/rag/pycache/vectorstore.cpython-312.pyc
+++ b/rag/pycache/vectorstore.cpython-312.pyc
--- a/app/rag/embeddings.py
+++ b/app/rag/embeddings.py
--- a/app/rag/llm.py
+++ b/app/rag/llm.py
--- a/app/rag/rag_chain.py
+++ b/app/rag/rag_chain.py
@@ -5,14 +5,10 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
 def create_rag_chain(llm, retriever):
-    contextualize_q_system_prompt = """
+    contextualize_q_system_prompt = """Given a chat history and the latest user question \
-    Given a chat history and the latest user question \
+    which might reference context in the chat history, formulate a standalone question \
-    which might reference context in the chat history,
+    which can be understood without the chat history. Do NOT answer the question, \
-    formulate a standalone question \
+    just reformulate it if needed and otherwise return it as is."""
    which can be understood without the chat history.
    Do NOT answer the question, \
    just reformulate it if needed and otherwise return it as is.
    """
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
@@ -25,13 +21,12 @@ def create_rag_chain(llm, retriever):
    )
    # ___________________Chain con el chat history_______________________-
-    qa_system_prompt = """
+    qa_system_prompt = """You are an assistant for question-answering tasks.  \
    You are an assistant for question-answering tasks.  \
    Use the following pieces of retrieved context to answer the question. \
    If you don't know the answer, just say that you don't know. \
-    The length of the answer should be sufficient to address
+    The length of the answer should be sufficient to address what is being asked, \
    what is being asked, \
    meaning don't limit yourself in length.\
    {context}"""
    qa_prompt = ChatPromptTemplate.from_messages(
        [
@@ -42,5 +37,4 @@ def create_rag_chain(llm, retriever):
    )
    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
-    return create_retrieval_chain(
+    return create_retrieval_chain(history_aware_retriever, question_answer_chain)
        history_aware_retriever, question_answer_chain)
--- a/app/rag/retriever.py
+++ b/app/rag/retriever.py
@@ -4,9 +4,7 @@ from langchain_chroma import Chroma
 def create_retriever(embeddings, persist_directory: str):
    # Cargamos la vectorstore
    # vectordb = Chroma.from_documents(
-    #     persist_directory=st.session_state.persist_directory,
+    #     persist_directory=st.session_state.persist_directory,  # Este es el directorio del la vs del docuemnto del usuario que se encuentra cargado en la session_state.
    # Este es el directorio del la vs del docuemnto del usuario
    # que se encuentra cargado en la session_state.
    #     embedding_function=embeddings,
    # )
    vectordb = Chroma(
--- a/app/rag/split_docs.py
+++ b/app/rag/split_docs.py
--- a/rag/vectorstore.py
+++ b/rag/vectorstore.py
@@ -0,0 +1,15 @@
 from langchain_chroma import Chroma
 import os
 def create_verctorstore(docs_split: list, embeddings, file_name: str):
    db_name: str = file_name.replace(".pdf", "").replace(" ", "_").lower()
    persist_directory: str = f"embeddings/{db_name}"
    if not os.path.exists(persist_directory):
        vectordb = Chroma.from_documents(
            persist_directory=persist_directory,
            documents=docs_split,
            embedding=embeddings,
        )
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,12 @@
 langchain==0.3.1
 langchain-community==0.3.1
 langgraph==0.2.28
 langchain-openai==0.2.0
 google==3.0.0
 google-auth==2.29.0
 google-auth-oauthlib==1.2.0
 google-api-python-client==2.131.0
 langchain-chroma==0.1.4
 pytz==2024.2
 telebot==0.0.5
--- a/requirements_1.txt
+++ b/requirements_1.txt
@@ -0,0 +1,141 @@
 aiohappyeyeballs==2.4.0
 aiohttp==3.10.6
 aiosignal==1.3.1
 annotated-types==0.7.0
 anyio==4.6.0
 asgiref==3.8.1
 attrs==24.2.0
 backoff==2.2.1
 bcrypt==4.2.0
 beautifulsoup4==4.12.3
 blinker==1.8.2
 build==1.2.2.post1
 cachetools==5.5.0
 certifi==2024.8.30
 charset-normalizer==3.3.2
 chroma-hnswlib==0.7.3
 chromadb==0.5.3
 click==8.1.7
 coloredlogs==15.0.1
 dataclasses-json==0.6.7
 Deprecated==1.2.14
 distro==1.9.0
 durationpy==0.9
 fastapi==0.115.4
 filelock==3.16.1
 Flask==3.0.3
 flatbuffers==24.3.25
 frozenlist==1.4.1
 fsspec==2024.10.0
 google==3.0.0
 google-api-core==2.19.0
 google-api-python-client==2.131.0
 google-auth==2.29.0
 google-auth-httplib2==0.2.0
 google-auth-oauthlib==1.2.0
 googleapis-common-protos==1.63.0
 greenlet==3.1.1
 grpcio==1.67.0
 h11==0.14.0
 httpcore==1.0.5
 httplib2==0.22.0
 httptools==0.6.4
 httpx==0.27.2
 huggingface-hub==0.26.2
 humanfriendly==10.0
 idna==3.10
 importlib_metadata==8.4.0
 importlib_resources==6.4.5
 itsdangerous==2.2.0
 Jinja2==3.1.4
 jiter==0.5.0
 jsonpatch==1.33
 jsonpointer==3.0.0
 kubernetes==31.0.0
 langchain==0.3.1
 langchain-chroma==0.1.4
 langchain-chroma==0.1.4
 langchain-community==0.3.1
 langchain-core==0.3.6
 langchain-openai==0.2.0
 langchain-text-splitters==0.3.0
 langgraph==0.2.28
 langgraph-checkpoint==1.0.11
 langsmith==0.1.128
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
 marshmallow==3.22.0
 mdurl==0.1.2
 mmh3==5.0.1
 monotonic==1.6
 mpmath==1.3.0
 msgpack==1.1.0
 multidict==6.1.0
 mypy-extensions==1.0.0
 numpy==1.26.4
 oauthlib==3.2.2
 onnxruntime==1.19.2
 openai==1.48.0
 opentelemetry-api==1.27.0
 opentelemetry-exporter-otlp-proto-common==1.27.0
 opentelemetry-exporter-otlp-proto-grpc==1.27.0
 opentelemetry-instrumentation==0.48b0
 opentelemetry-instrumentation-asgi==0.48b0
 opentelemetry-instrumentation-fastapi==0.48b0
 opentelemetry-proto==1.27.0
 opentelemetry-sdk==1.27.0
 opentelemetry-semantic-conventions==0.48b0
 opentelemetry-util-http==0.48b0
 orjson==3.10.7
 overrides==7.7.0
 packaging==24.1
 posthog==3.7.0
 proto-plus==1.24.0
 protobuf==4.25.5
 pyasn1==0.6.1
 pyasn1_modules==0.4.1
 pydantic==2.9.2
 pydantic-settings==2.5.2
 pydantic_core==2.23.4
 Pygments==2.18.0
 pyparsing==3.1.4
 pypdf==5.1.0
 PyPika==0.48.9
 pyproject_hooks==1.2.0
 pyTelegramBotAPI==4.23.0
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 pytz==2024.2
 PyYAML==6.0.2
 regex==2024.9.11
 requests==2.32.3
 requests-oauthlib==2.0.0
 rich==13.9.3
 rsa==4.9
 setuptools==75.2.0
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.1
 soupsieve==2.6
 SQLAlchemy==2.0.35
 starlette==0.41.2
 sympy==1.13.3
 telebot==0.0.5
 tenacity==8.5.0
 tiktoken==0.7.0
 tokenizers==0.20.1
 tqdm==4.66.5
 typer==0.12.5
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 uritemplate==4.1.1
 urllib3==2.2.3
 uvicorn==0.32.0
 uvloop==0.21.0
 watchfiles==0.24.0
 websocket-client==1.8.0
 websockets==13.1
 Werkzeug==3.0.4
 wrapt==1.16.0
 yarl==1.12.1
 zipp==3.20.2
--- a/token.json
+++ b/token.json
@@ -0,0 +1 @@
 {"token": "ya29.a0AeDClZDZCMXgkdcUk5wxXI-UzONQq-h8gcmZ4e5NF5mcM8jHTrxixIktYvCMn0FZ5tR5FmLxHm_X4N-nPIwvrThg4CD40y1FaGFXFn4te9EwXNrzP7vm877wFq_qb97MMBbwO9NGCXcaGlRwSknTuF5RrFyIOeuMIMT0b0ZdaCgYKAR0SARASFQHGX2MihEN-nqX_Wr_YfPdjsM1KvA0175", "refresh_token": "1//05lbU7vbdj_XJCgYIARAAGAUSNwF-L9IrU0WFclkxDMXT1dAa8YNIxYSOgdJI-cz4tp1NLqdnReblHExSWLuCGJwiGy3eRpo3kjI", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "19011937557-bi5nh4afvg4tuqr87v6dp55qj9a9o1h2.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://mail.google.com/"], "universe_domain": "googleapis.com", "account": "", "expiry": "2024-11-01T21:06:41.341370Z"}
		`@@ -0,0 +1 @@`
							`{"installed":{"client_id":"19011937557-bi5nh4afvg4tuqr87v6dp55qj9a9o1h2.apps.googleusercontent.com","project_id":"oc-aassistent","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}`
		`@@ -0,0 +1 @@`
							`{"installed":{"client_id":"629922809906-pl9l1ipout6d5hh19ku50sfvnqgu8ir2.apps.googleusercontent.com","project_id":"calendar-424503","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}`
		`@@ -0,0 +1 @@`
							{"token": "ya29.a0AeDClZDZCMXgkdcUk5wxXI-UzONQq-h8gcmZ4e5NF5mcM8jHTrxixIktYvCMn0FZ5tR5FmLxHm_X4N-nPIwvrThg4CD40y1FaGFXFn4te9EwXNrzP7vm877wFq_qb97MMBbwO9NGCXcaGlRwSknTuF5RrFyIOeuMIMT0b0ZdaCgYKAR0SARASFQHGX2MihEN-nqX_Wr_YfPdjsM1KvA0175", "refresh_token": "1//05lbU7vbdj_XJCgYIARAAGAUSNwF-L9IrU0WFclkxDMXT1dAa8YNIxYSOgdJI-cz4tp1NLqdnReblHExSWLuCGJwiGy3eRpo3kjI", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "19011937557-bi5nh4afvg4tuqr87v6dp55qj9a9o1h2.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://mail.google.com/"], "universe_domain": "googleapis.com", "account": "", "expiry": "2024-11-01T21:06:41.341370Z"}