Syluh27 commited on
Commit ·
2c71c21
1
Parent(s): e315cee
model.py
CHANGED
|
@@ -5,36 +5,50 @@ from langchain_mistralai import ChatMistralAI
|
|
| 5 |
import chromadb
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
import os
|
|
|
|
|
|
|
| 8 |
# Obtener el token desde las variables de entorno de Hugging Face Space
|
| 9 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 10 |
if HF_TOKEN is None:
|
| 11 |
raise ValueError("No se encontró la variable de entorno HF_TOKEN.")
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
embedding_path = hf_hub_download(
|
| 14 |
repo_id="VictorCarr02/Conversational-Agent-LawsEC",
|
| 15 |
repo_type="dataset",
|
| 16 |
filename="data_level0.bin",
|
| 17 |
-
token=HF_TOKEN
|
|
|
|
| 18 |
)
|
| 19 |
|
| 20 |
chroma_path = hf_hub_download(
|
| 21 |
repo_id="VictorCarr02/Conversational-Agent-LawsEC",
|
| 22 |
repo_type="dataset",
|
| 23 |
filename="chroma.sqlite3",
|
| 24 |
-
token=HF_TOKEN
|
|
|
|
| 25 |
)
|
| 26 |
|
| 27 |
print("Archivos descargados en:")
|
| 28 |
print(f"Embeddings: {embedding_path}")
|
| 29 |
print(f"ChromaDB: {chroma_path}")
|
|
|
|
| 30 |
# Cargar ChromaDB y los embeddings
|
| 31 |
chromadb_client = chromadb.PersistentClient(path=chroma_path)
|
| 32 |
collection = chromadb_client.get_or_create_collection(name="mis_embeddings")
|
| 33 |
embeddings = HuggingFaceEmbeddings(model_name="mistralai/MistralAIEmbeddings", path=embedding_path)
|
| 34 |
vector_store = Chroma(collection=collection, embedding_function=embeddings)
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
# Acceder a la clave API desde la variable de entorno
|
| 39 |
api_key = os.getenv("MISTRAL_API_KEY")
|
| 40 |
|
|
|
|
| 5 |
import chromadb
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
import os
|
| 8 |
+
import shutil
|
| 9 |
+
|
| 10 |
# Obtener el token desde las variables de entorno de Hugging Face Space
|
| 11 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 12 |
if HF_TOKEN is None:
|
| 13 |
raise ValueError("No se encontró la variable de entorno HF_TOKEN.")
|
| 14 |
|
| 15 |
+
# Eliminar caché previa para forzar la descarga
|
| 16 |
+
def eliminar_cache_huggingface():
|
| 17 |
+
cache_path = "/home/user/.cache/huggingface/hub/datasets--VictorCarr02--Conversational-Agent-LawsEC"
|
| 18 |
+
if os.path.exists(cache_path):
|
| 19 |
+
shutil.rmtree(cache_path)
|
| 20 |
+
print("Caché anterior eliminada. Forzando nueva descarga.")
|
| 21 |
+
|
| 22 |
+
# Eliminar la caché antes de la descarga
|
| 23 |
+
eliminar_cache_huggingface()
|
| 24 |
+
|
| 25 |
+
# Descargar los archivos
|
| 26 |
embedding_path = hf_hub_download(
|
| 27 |
repo_id="VictorCarr02/Conversational-Agent-LawsEC",
|
| 28 |
repo_type="dataset",
|
| 29 |
filename="data_level0.bin",
|
| 30 |
+
token=HF_TOKEN,
|
| 31 |
+
force_download=True # Fuerza la descarga
|
| 32 |
)
|
| 33 |
|
| 34 |
chroma_path = hf_hub_download(
|
| 35 |
repo_id="VictorCarr02/Conversational-Agent-LawsEC",
|
| 36 |
repo_type="dataset",
|
| 37 |
filename="chroma.sqlite3",
|
| 38 |
+
token=HF_TOKEN,
|
| 39 |
+
force_download=True # Fuerza la descarga
|
| 40 |
)
|
| 41 |
|
| 42 |
print("Archivos descargados en:")
|
| 43 |
print(f"Embeddings: {embedding_path}")
|
| 44 |
print(f"ChromaDB: {chroma_path}")
|
| 45 |
+
|
| 46 |
# Cargar ChromaDB y los embeddings
|
| 47 |
chromadb_client = chromadb.PersistentClient(path=chroma_path)
|
| 48 |
collection = chromadb_client.get_or_create_collection(name="mis_embeddings")
|
| 49 |
embeddings = HuggingFaceEmbeddings(model_name="mistralai/MistralAIEmbeddings", path=embedding_path)
|
| 50 |
vector_store = Chroma(collection=collection, embedding_function=embeddings)
|
| 51 |
|
|
|
|
|
|
|
| 52 |
# Acceder a la clave API desde la variable de entorno
|
| 53 |
api_key = os.getenv("MISTRAL_API_KEY")
|
| 54 |
|