|
|
from langchain.chains import RetrievalQA |
|
|
from langchain.vectorstores import Chroma |
|
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
|
from langchain_mistralai import ChatMistralAI |
|
|
import chromadb |
|
|
from huggingface_hub import hf_hub_download |
|
|
import os |
|
|
import shutil |
|
|
|
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
if HF_TOKEN is None: |
|
|
raise ValueError("No se encontró la variable de entorno HF_TOKEN.") |
|
|
|
|
|
|
|
|
embedding_path = hf_hub_download( |
|
|
repo_id="VictorCarr02/Conversational-Agent-LawsEC", |
|
|
repo_type="dataset", |
|
|
filename="data_level0.bin", |
|
|
token=HF_TOKEN, |
|
|
force_download=True |
|
|
) |
|
|
|
|
|
chroma_path = hf_hub_download( |
|
|
repo_id="VictorCarr02/Conversational-Agent-LawsEC", |
|
|
repo_type="dataset", |
|
|
filename="chroma.sqlite3", |
|
|
token=HF_TOKEN, |
|
|
force_download=True |
|
|
) |
|
|
|
|
|
print("Archivos descargados en:") |
|
|
print(f"Embeddings: {embedding_path}") |
|
|
print(f"ChromaDB: {chroma_path}") |
|
|
|
|
|
|
|
|
chromadb_client = chromadb.PersistentClient(path=chroma_path) |
|
|
collection = chromadb_client.get_or_create_collection(name="mis_embeddings") |
|
|
embeddings = HuggingFaceEmbeddings(model_name="mistralai/MistralAIEmbeddings", path=embedding_path) |
|
|
vector_store = Chroma(collection=collection, embedding_function=embeddings) |
|
|
|
|
|
|
|
|
api_key = os.getenv("MISTRAL_API_KEY") |
|
|
|
|
|
|
|
|
if api_key is None: |
|
|
raise ValueError("La clave API MISTRAL_API_KEY no está configurada como variable de entorno.") |
|
|
|
|
|
|
|
|
llm = ChatMistralAI(api_key=api_key) |
|
|
|
|
|
|
|
|
rag_chain = RetrievalQA.from_chain_type( |
|
|
llm=llm, |
|
|
retriever=vector_store.as_retriever(), |
|
|
chain_type="stuff" |
|
|
) |
|
|
|