SafaaAI commited on
Commit
dcc9a1f
·
verified ·
1 Parent(s): 6e91be0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -41
app.py CHANGED
@@ -1,79 +1,68 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import os
5
 
6
- # 🔹 Récupération du token Hugging Face
7
- hf_token = os.environ.get("HF_TOKEN")
8
- if hf_token is None:
9
- raise ValueError("⚠️ Le token Hugging Face (HF_TOKEN) est manquant. "
10
- "Ajoute-le dans les secrets de ton Space.")
11
 
12
- # 🔹 Identifiant du modèle
13
- model_id = "SafaaAI/final_llm_darija_fr_tech"
14
-
15
- # 🔹 Charger tokenizer et modèle en CPU
16
- tokenizer = AutoTokenizer.from_pretrained(
17
- model_id,
18
- token=hf_token,
19
- trust_remote_code=True
20
  )
21
 
 
 
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_id,
24
- token=hf_token,
25
- trust_remote_code=True,
26
- device_map=None # pas d’auto GPU
27
- ).to("cpu") # forcer CPU
28
 
29
- print("✅ Modèle chargé sur CPU")
30
 
31
- # 🔹 Fonction d’inférence
32
- def chat_with_model(message, history):
33
  history = history or []
34
- full_prompt = (
35
- "A chat between a curious user and an AI assistant capable of "
36
- "understanding Darija, French, and technical language.\n"
37
- )
38
-
39
- for user_message, bot_message in history:
40
- full_prompt += f"USER: {user_message}\nASSISTANT: {bot_message}\n"
41
 
42
- full_prompt += f"USER: {message}\nASSISTANT:"
 
43
 
44
- inputs = tokenizer(full_prompt, return_tensors="pt")
45
 
46
  with torch.no_grad():
47
  output_ids = model.generate(
48
  inputs["input_ids"],
49
- attention_mask=inputs["attention_mask"],
50
- max_new_tokens=100,
51
  do_sample=True,
52
  top_p=0.9,
53
- temperature=0.7,
54
- pad_token_id=tokenizer.eos_token_id
55
  )
56
 
57
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
58
-
59
  if "ASSISTANT:" in response:
60
  response = response.split("ASSISTANT:")[-1].strip()
61
 
62
- history.append((message, response))
63
  return history, history
64
 
65
  # 🔹 Interface Gradio
66
  with gr.Blocks() as demo:
67
- gr.Markdown("## 🤖 Chatbot SafaaAI - LLM (Darija + Français + Technique)")
68
 
69
  chatbot = gr.Chatbot(height=400)
70
- msg = gr.Textbox(label="💬 Écris ton message ici", placeholder="Pose ta question...")
 
 
71
  clear = gr.Button("🧹 Effacer la conversation")
72
 
73
  state = gr.State([])
74
 
75
- msg.submit(chat_with_model, [msg, state], [chatbot, state])
76
  clear.click(lambda: ([], []), None, [chatbot, state])
77
 
78
  if __name__ == "__main__":
79
- demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 
4
 
5
+ # 🔹 Identifiant d’un petit modèle multimodal
6
+ model_id = "liuhaotian/llava-v1.5-7b" # tu peux tester TinyLLaVA aussi
 
 
 
7
 
8
+ # 🔹 Config quantization 4 bits
9
+ bnb_config = BitsAndBytesConfig(
10
+ load_in_4bit=True,
11
+ bnb_4bit_use_double_quant=True,
12
+ bnb_4bit_quant_type="nf4",
13
+ bnb_4bit_compute_dtype=torch.float16
 
 
14
  )
15
 
16
+ # 🔹 Charger modèle + tokenizer
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
18
  model = AutoModelForCausalLM.from_pretrained(
19
  model_id,
20
+ quantization_config=bnb_config,
21
+ device_map="auto",
22
+ trust_remote_code=True
23
+ )
24
 
25
+ print("✅ Modèle multimodal chargé en 4 bits")
26
 
27
+ # 🔹 Fonction de chat multimodal
28
+ def chat(image, message, history=[]):
29
  history = history or []
 
 
 
 
 
 
 
30
 
31
+ # Préparer prompt
32
+ full_prompt = "USER: " + message + "\nASSISTANT:"
33
 
34
+ inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
35
 
36
  with torch.no_grad():
37
  output_ids = model.generate(
38
  inputs["input_ids"],
39
+ max_new_tokens=50,
 
40
  do_sample=True,
41
  top_p=0.9,
42
+ temperature=0.7
 
43
  )
44
 
45
  response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
46
  if "ASSISTANT:" in response:
47
  response = response.split("ASSISTANT:")[-1].strip()
48
 
49
+ history.append(((message, image), response))
50
  return history, history
51
 
52
  # 🔹 Interface Gradio
53
  with gr.Blocks() as demo:
54
+ gr.Markdown("## 🤖 Chatbot Multimodal (Texte + Image) - Optimisé en 4 bits")
55
 
56
  chatbot = gr.Chatbot(height=400)
57
+ with gr.Row():
58
+ msg = gr.Textbox(label="💬 Écris ton message")
59
+ img = gr.Image(type="filepath", label="🖼️ Upload une image")
60
  clear = gr.Button("🧹 Effacer la conversation")
61
 
62
  state = gr.State([])
63
 
64
+ msg.submit(chat, [img, msg, state], [chatbot, state])
65
  clear.click(lambda: ([], []), None, [chatbot, state])
66
 
67
  if __name__ == "__main__":
68
+ demo.launch()