49 lines
1.5 KiB
Python
49 lines
1.5 KiB
Python
from langchain_chroma import Chroma
|
|
import os
|
|
|
|
#
|
|
# def create_vectorstore(docs_split: list, embeddings, file_name: str):
|
|
# db_name: str = file_name.replace(".pdf", "").replace(" ", "_").lower()
|
|
# persist_directory: str = f"embeddings/{db_name}"
|
|
#
|
|
# # Crear el directorio si no existe
|
|
# os.makedirs(persist_directory, exist_ok=True)
|
|
#
|
|
# # Siempre crear/actualizar el vectorstore
|
|
# vectordb = Chroma.from_documents(
|
|
# persist_directory=persist_directory,
|
|
# documents=docs_split,
|
|
# embedding=embeddings,
|
|
# )
|
|
#
|
|
# return vectordb
|
|
|
|
|
|
def create_vectorstore(docs_split: list, embeddings, file_name: str):
|
|
# Obtener el directorio base del proyecto
|
|
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# Crear el nombre de la base de datos
|
|
db_name: str = file_name.replace(".pdf", "").replace(" ", "_").lower()
|
|
|
|
# Construir la ruta absoluta para los embeddings
|
|
persist_directory: str = os.path.join(base_dir, "embeddings", db_name)
|
|
|
|
# Crear el directorio si no existe
|
|
os.makedirs(persist_directory, exist_ok=True)
|
|
|
|
# Debug log
|
|
print(f"Creando vectorstore en: {persist_directory}")
|
|
|
|
try:
|
|
# Crear/actualizar el vectorstore
|
|
vectordb = Chroma.from_documents(
|
|
persist_directory=persist_directory,
|
|
documents=docs_split,
|
|
embedding=embeddings,
|
|
)
|
|
return vectordb
|
|
except Exception as e:
|
|
print(f"Error al crear vectorstore: {e}")
|
|
raise
|