Spaces:

SafaaAI
/

LLM-Darija-FR-Tech

Sleeping

App Files Files Community

SafaaAI commited on Sep 4, 2025

Commit

6e91be0

verified ·

1 Parent(s): 7600281

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -41

app.py CHANGED Viewed

@@ -3,16 +3,16 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import os
-# 🔹 Récupérez le token depuis les secrets du Space
 hf_token = os.environ.get("HF_TOKEN")
 if hf_token is None:
-    raise ValueError("⚠️ Le token Hugging Face (HF_TOKEN) n'est pas trouvé. "
-                     "Vérifie que tu l’as bien ajouté dans les secrets du Space.")
-# 🔹 Charger le tokenizer et le modèle
 model_id = "SafaaAI/final_llm_darija_fr_tech"
 tokenizer = AutoTokenizer.from_pretrained(
     model_id,
     token=hf_token,
@@ -23,56 +23,51 @@ model = AutoModelForCausalLM.from_pretrained(
     model_id,
     token=hf_token,
     trust_remote_code=True,
-    device_map="auto"
-)
-# 🔹 Fonction d'inférence
 def chat_with_model(message, history):
     history = history or []
-    full_prompt = "A chat between a curious user and an AI assistant."
-    # Construire le prompt manuellement
     for user_message, bot_message in history:
-        full_prompt += f" USER: {user_message} ASSISTANT: {bot_message}"
-    # Ajouter le message actuel de l'utilisateur
-    full_prompt += f" USER: {message} ASSISTANT:"
-    # Encoder le prompt avec le tokenizer
-    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
-    # 📝 Extraire explicitement input_ids et attention_mask
-    input_ids = inputs["input_ids"]
-    attention_mask = inputs["attention_mask"]
-    # Générer la réponse
     with torch.no_grad():
         output_ids = model.generate(
-            input_ids,
-            attention_mask=attention_mask,
-            max_new_tokens=200,
             do_sample=True,
             top_p=0.9,
-            temperature=0.7
         )
-    # Décoder la sortie
     response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Nettoyer la réponse pour ne pas inclure le prompt
-    response_start_index = response.rfind("ASSISTANT:")
-    if response_start_index != -1:
-        response = response[response_start_index + len("ASSISTANT:"):].strip()
     history.append((message, response))
     return history, history
 # 🔹 Interface Gradio
 with gr.Blocks() as demo:
-    gr.Markdown("## 💬 Chatbot SafaaAI - LLM (Darija + Français + Technique)")
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox(label="Écris ton message ici")
     clear = gr.Button("🧹 Effacer la conversation")
     state = gr.State([])
@@ -80,6 +75,5 @@ with gr.Blocks() as demo:
     msg.submit(chat_with_model, [msg, state], [chatbot, state])
     clear.click(lambda: ([], []), None, [chatbot, state])
-# 🔹 Lancer l'application
 if __name__ == "__main__":
-    demo.launch()

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import os
+# 🔹 Récupération du token Hugging Face
 hf_token = os.environ.get("HF_TOKEN")
 if hf_token is None:
+    raise ValueError("⚠️ Le token Hugging Face (HF_TOKEN) est manquant. "
+                     "Ajoute-le dans les secrets de ton Space.")
+# 🔹 Identifiant du modèle
 model_id = "SafaaAI/final_llm_darija_fr_tech"
+# 🔹 Charger tokenizer et modèle en CPU
 tokenizer = AutoTokenizer.from_pretrained(
     model_id,
     token=hf_token,
     model_id,
     token=hf_token,
     trust_remote_code=True,
+    device_map=None  # pas d’auto GPU
+).to("cpu")  # forcer CPU
+print("✅ Modèle chargé sur CPU")
+# 🔹 Fonction d’inférence
 def chat_with_model(message, history):
     history = history or []
+    full_prompt = (
+        "A chat between a curious user and an AI assistant capable of "
+        "understanding Darija, French, and technical language.\n"
+    )
     for user_message, bot_message in history:
+        full_prompt += f"USER: {user_message}\nASSISTANT: {bot_message}\n"
+    full_prompt += f"USER: {message}\nASSISTANT:"
+    inputs = tokenizer(full_prompt, return_tensors="pt")
     with torch.no_grad():
         output_ids = model.generate(
+            inputs["input_ids"],
+            attention_mask=inputs["attention_mask"],
+            max_new_tokens=100,
             do_sample=True,
             top_p=0.9,
+            temperature=0.7,
+            pad_token_id=tokenizer.eos_token_id
         )
     response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    if "ASSISTANT:" in response:
+        response = response.split("ASSISTANT:")[-1].strip()
     history.append((message, response))
     return history, history
 # 🔹 Interface Gradio
 with gr.Blocks() as demo:
+    gr.Markdown("## 🤖 Chatbot SafaaAI - LLM (Darija + Français + Technique)")
+    chatbot = gr.Chatbot(height=400)
+    msg = gr.Textbox(label="💬 Écris ton message ici", placeholder="Pose ta question...")
     clear = gr.Button("🧹 Effacer la conversation")
     state = gr.State([])
     msg.submit(chat_with_model, [msg, state], [chatbot, state])
     clear.click(lambda: ([], []), None, [chatbot, state])
 if __name__ == "__main__":
+    demo.launch()