Spaces:

SafaaAI
/

LLM-Darija-FR-Tech

Sleeping

App Files Files Community

SafaaAI commited on Sep 5, 2025

Commit

dcc9a1f

verified ·

1 Parent(s): 6e91be0

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -41

app.py CHANGED Viewed

@@ -1,79 +1,68 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import os
-# 🔹 Récupération du token Hugging Face
-hf_token = os.environ.get("HF_TOKEN")
-if hf_token is None:
-    raise ValueError("⚠️ Le token Hugging Face (HF_TOKEN) est manquant. "
-                     "Ajoute-le dans les secrets de ton Space.")
-# 🔹 Identifiant du modèle
-model_id = "SafaaAI/final_llm_darija_fr_tech"
-# 🔹 Charger tokenizer et modèle en CPU
-tokenizer = AutoTokenizer.from_pretrained(
-    model_id,
-    token=hf_token,
-    trust_remote_code=True
 )
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    token=hf_token,
-    trust_remote_code=True,
-    device_map=None  # pas d’auto GPU
-).to("cpu")  # forcer CPU
-print("✅ Modèle chargé sur CPU")
-# 🔹 Fonction d’inférence
-def chat_with_model(message, history):
     history = history or []
-    full_prompt = (
-        "A chat between a curious user and an AI assistant capable of "
-        "understanding Darija, French, and technical language.\n"
-    )
-    for user_message, bot_message in history:
-        full_prompt += f"USER: {user_message}\nASSISTANT: {bot_message}\n"
-    full_prompt += f"USER: {message}\nASSISTANT:"
-    inputs = tokenizer(full_prompt, return_tensors="pt")
     with torch.no_grad():
         output_ids = model.generate(
             inputs["input_ids"],
-            attention_mask=inputs["attention_mask"],
-            max_new_tokens=100,
             do_sample=True,
             top_p=0.9,
-            temperature=0.7,
-            pad_token_id=tokenizer.eos_token_id
         )
     response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     if "ASSISTANT:" in response:
         response = response.split("ASSISTANT:")[-1].strip()
-    history.append((message, response))
     return history, history
 # 🔹 Interface Gradio
 with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 Chatbot SafaaAI - LLM (Darija + Français + Technique)")
     chatbot = gr.Chatbot(height=400)
-    msg = gr.Textbox(label="💬 Écris ton message ici", placeholder="Pose ta question...")
     clear = gr.Button("🧹 Effacer la conversation")
     state = gr.State([])
-    msg.submit(chat_with_model, [msg, state], [chatbot, state])
     clear.click(lambda: ([], []), None, [chatbot, state])
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+# 🔹 Identifiant d’un petit modèle multimodal
+model_id = "liuhaotian/llava-v1.5-7b"  # tu peux tester TinyLLaVA aussi
+# 🔹 Config quantization 4 bits
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16
 )
+# 🔹 Charger modèle + tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    quantization_config=bnb_config,
+    device_map="auto",
+    trust_remote_code=True
+)
+print("✅ Modèle multimodal chargé en 4 bits")
+# 🔹 Fonction de chat multimodal
+def chat(image, message, history=[]):
     history = history or []
+    # Préparer prompt
+    full_prompt = "USER: " + message + "\nASSISTANT:"
+    inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
     with torch.no_grad():
         output_ids = model.generate(
             inputs["input_ids"],
+            max_new_tokens=50,
             do_sample=True,
             top_p=0.9,
+            temperature=0.7
         )
     response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     if "ASSISTANT:" in response:
         response = response.split("ASSISTANT:")[-1].strip()
+    history.append(((message, image), response))
     return history, history
 # 🔹 Interface Gradio
 with gr.Blocks() as demo:
+    gr.Markdown("## 🤖 Chatbot Multimodal (Texte + Image) - Optimisé en 4 bits")
     chatbot = gr.Chatbot(height=400)
+    with gr.Row():
+        msg = gr.Textbox(label="💬 Écris ton message")
+        img = gr.Image(type="filepath", label="🖼️ Upload une image")
     clear = gr.Button("🧹 Effacer la conversation")
     state = gr.State([])
+    msg.submit(chat, [img, msg, state], [chatbot, state])
     clear.click(lambda: ([], []), None, [chatbot, state])
 if __name__ == "__main__":
+    demo.launch()