Spaces:

FabIndy
/

code-education-rag

Running

FabIndy commited on Jan 14

Commit

47b8a7e

1 Parent(s): 1533d23

Use models/model.gguf and configure GGUF model via env vars

Files changed (2) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import sys
 import traceback
 import gradio as gr
 from huggingface_hub import hf_hub_download
-import os, shutil
 from pathlib import Path
 def ensure_faiss_index_present():
@@ -44,22 +44,31 @@ ensure_faiss_index_present()
 def ensure_model_present():
     os.makedirs("models", exist_ok=True)
-    local_path = os.path.join("models", "mistral.gguf")
     if os.path.exists(local_path):
         return
     repo_id = os.environ.get("MODEL_REPO_ID")
-    filename = os.environ.get("MODEL_FILENAME", "mistral.gguf")
     if not repo_id:
         raise RuntimeError(
-            "Modèle GGUF absent (models/mistral.gguf) et variable MODEL_REPO_ID non définie."
         )
-    downloaded = hf_hub_download(repo_id=repo_id, filename=filename)
-    import shutil
     shutil.copyfile(downloaded, local_path)
 ensure_model_present()
@@ -201,9 +210,9 @@ Conseil : pour une citation exacte, demande “Donne l’intégralité de l’ar
     gr.Markdown(
     """
 > **Information importante**
-> Lors du premier lancement, l’application peut nécessiter **1 à 2 minutes** d’initialisation.
 > Ensuite, l’utilisation est immédiate.
-> En cas d’utilisation simultanée, les demandes sont traitées **successivement** afin de garantir la fiabilité des réponses.
     """.strip()
 )

 import traceback
 import gradio as gr
 from huggingface_hub import hf_hub_download
+import shutil
 from pathlib import Path
 def ensure_faiss_index_present():
 def ensure_model_present():
     os.makedirs("models", exist_ok=True)
+    # Nouveau nom stable, cohérent avec rag_core.py
+    local_path = os.path.join("models", "model.gguf")
     if os.path.exists(local_path):
         return
+    # Repo HF contenant le GGUF
     repo_id = os.environ.get("MODEL_REPO_ID")
+    # IMPORTANT: mets ici le VRAI filename du GGUF dans le repo HF
+    filename = os.environ.get("MODEL_FILENAME")
     if not repo_id:
         raise RuntimeError(
+            "Modèle GGUF absent (models/model.gguf) et variable MODEL_REPO_ID non définie."
+        )
+    if not filename:
+        raise RuntimeError(
+            "Variable MODEL_FILENAME non définie (ex: Qwen2.5-1.5B-Instruct-Q4_K_M.gguf)."
         )
+    downloaded = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="model")
     shutil.copyfile(downloaded, local_path)
 ensure_model_present()
     gr.Markdown(
     """
 > **Information importante**
+> Lors du premier lancement, l’application peut nécessiter 1 à 2 minutes d’initialisation.
 > Ensuite, l’utilisation est immédiate.
+> En cas d’utilisation simultanée, les demandes sont traitées successivement afin de garantir la fiabilité des réponses.
     """.strip()
 )

src/rag_core.py CHANGED Viewed

@@ -16,7 +16,7 @@ ROUTAGE AUTO :
 Prérequis :
 - data/chunks_articles.jsonl (article-level)
 - db/faiss_code_edu_by_article (FAISS)
-- models/mistral.gguf (GGUF)
 """
 import json
@@ -34,7 +34,6 @@ CHUNKS_PATH = Path("data/chunks_articles.jsonl")
 DB_DIR = Path("db/faiss_code_edu_by_article")
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-MODEL_NAME = "mistral:latest"
 TOP_K_FETCH = 30            # nb de docs candidats récupérés
 TOP_K_FINAL = 4            # nb max envoyés au LLM
@@ -70,7 +69,7 @@ ARTICLES_CITES_RE = re.compile(r"Articles cités\s*:\s*(.*)$", flags=re.IGNORECA
 # -------------------- LLM INIT (FIDÈLE) --------------------
 llm = Llama(
-    model_path="models/mistral.gguf",  # Mistral GGUF
     n_ctx=2048,
     n_threads=10,
     n_batch=128,

 Prérequis :
 - data/chunks_articles.jsonl (article-level)
 - db/faiss_code_edu_by_article (FAISS)
+- models/model.gguf (GGUF)
 """
 import json
 DB_DIR = Path("db/faiss_code_edu_by_article")
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 TOP_K_FETCH = 30            # nb de docs candidats récupérés
 TOP_K_FINAL = 4            # nb max envoyés au LLM
 # -------------------- LLM INIT (FIDÈLE) --------------------
 llm = Llama(
+    model_path="models/model.gguf",
     n_ctx=2048,
     n_threads=10,
     n_batch=128,