Spaces:

VictorCarr02
/

Conversational-Agent-LawsEC

Sleeping

App Files Files Community

Syluh27 commited on Feb 11, 2025

Commit

b206c20

1 Parent(s): 122e667

s

Browse files

Files changed (1) hide show

model.py +38 -66

model.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# model.py actualizado
 from langchain.chains import RetrievalQA
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
@@ -8,78 +7,51 @@ from huggingface_hub import hf_hub_download
 import os
 import shutil
-# 1. Configuración esencial
 HF_TOKEN = os.getenv("HF_TOKEN")
-MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
-CHROMA_DIR = "/home/user/app/chroma_db"
-# 2. Limpieza inicial radical
-def full_clean():
-    # Eliminar todo rastro previo
-    shutil.rmtree(CHROMA_DIR, ignore_errors=True)
-    shutil.rmtree("/home/user/.cache/huggingface/hub", ignore_errors=True)
-    os.makedirs(CHROMA_DIR, exist_ok=True)
-full_clean()
-# 3. Descargar y configurar ChromaDB
-def setup_chroma():
-    # Descargar archivo original
-    chroma_src = hf_hub_download(
-        repo_id="VictorCarr02/Conversational-Agent-LawsEC",
-        repo_type="dataset",
-        filename="chroma.sqlite3",
-        token=HF_TOKEN
-    )
-    # Configurar estructura requerida por Chroma
-    tenant_dir = os.path.join(CHROMA_DIR, "chroma.sqlite3")
-    os.makedirs(os.path.dirname(tenant_dir), exist_ok=True)
-    shutil.copy(chroma_src, tenant_dir)
-setup_chroma()
-# 4. Conexión explícita a ChromaDB
-chroma_client = chromadb.PersistentClient(
-    path=CHROMA_DIR,
-    tenant="default_tenant",
-    database="default_database"
 )
-# 5. Verificar/crear collection
-try:
-    collection = chroma_client.get_collection("legal_docs")
-except ValueError:
-    collection = chroma_client.create_collection("legal_docs")
-# 6. Configurar embeddings
-embeddings = HuggingFaceEmbeddings(
-    model_name="sentence-transformers/all-mpnet-base-v2",
-    model_kwargs={"device": "cpu"}
-)
-# 7. Inicializar Chroma LangChain
-vector_store = Chroma(
-    client=chroma_client,
-    collection_name="legal_docs",
-    embedding_function=embeddings
-)
-# 8. Configurar Mistral
-llm = ChatMistralAI(
-    api_key=MISTRAL_API_KEY,
-    model="mistral-large-latest",
-    temperature=0.1
-)
-# 9. Cadena RAG final
 rag_chain = RetrievalQA.from_chain_type(
     llm=llm,
-    retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
-    chain_type="stuff",
-    return_source_documents=True
-)

 from langchain.chains import RetrievalQA
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
 import os
 import shutil
+# Obtener el token desde las variables de entorno de Hugging Face Space
 HF_TOKEN = os.getenv("HF_TOKEN")
+if HF_TOKEN is None:
+    raise ValueError("No se encontró la variable de entorno HF_TOKEN.")
+# Descargar los archivos
+embedding_path = hf_hub_download(
+    repo_id="VictorCarr02/Conversational-Agent-LawsEC",
+    repo_type="dataset",
+    filename="data_level0.bin",
+    token=HF_TOKEN,
+    force_download=True  # Fuerza la descarga
+)
+chroma_path = hf_hub_download(
+    repo_id="VictorCarr02/Conversational-Agent-LawsEC",
+    repo_type="dataset",
+    filename="chroma.sqlite3",
+    token=HF_TOKEN,
+    force_download=True  # Fuerza la descarga
 )
+print("Archivos descargados en:")
+print(f"Embeddings: {embedding_path}")
+print(f"ChromaDB: {chroma_path}")
+# Cargar ChromaDB y los embeddings
+chromadb_client = chromadb.PersistentClient(path=chroma_path)
+collection = chromadb_client.get_or_create_collection(name="mis_embeddings")
+embeddings = HuggingFaceEmbeddings(model_name="mistralai/MistralAIEmbeddings", path=embedding_path)
+vector_store = Chroma(collection=collection, embedding_function=embeddings)
+# Acceder a la clave API desde la variable de entorno
+api_key = os.getenv("MISTRAL_API_KEY")
+# Verifica si la clave fue obtenida correctamente
+if api_key is None:
+    raise ValueError("La clave API MISTRAL_API_KEY no está configurada como variable de entorno.")
+# Crear el modelo LLM con la clave API
+llm = ChatMistralAI(api_key=api_key)
+# Crear el agente RAG
 rag_chain = RetrievalQA.from_chain_type(
     llm=llm,
+    retriever=vector_store.as_retriever(),
+    chain_type="stuff"
+)