Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,28 +1,26 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import
|
| 3 |
from transformers import pipeline
|
| 4 |
|
| 5 |
-
#
|
| 6 |
-
|
| 7 |
model_id = "AllanF-SSU/Qwen2.5-G3V-Sovereign"
|
| 8 |
|
| 9 |
try:
|
| 10 |
-
# On
|
| 11 |
pipe = pipeline(
|
| 12 |
"text-generation",
|
| 13 |
model=model_id,
|
| 14 |
-
|
| 15 |
device_map="auto"
|
| 16 |
)
|
| 17 |
|
| 18 |
def predict(message, history):
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
return out['generated_text'][-1]['content']
|
| 23 |
|
| 24 |
gr.ChatInterface(predict).launch()
|
| 25 |
|
| 26 |
except Exception as e:
|
| 27 |
-
#
|
| 28 |
-
gr.Interface(lambda x: f"Erreur de mémoire : {e}", "text", "text").launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
from transformers import pipeline
|
| 4 |
|
| 5 |
+
# Récupération du jeton
|
| 6 |
+
token = os.getenv("HF_TOKEN")
|
| 7 |
model_id = "AllanF-SSU/Qwen2.5-G3V-Sovereign"
|
| 8 |
|
| 9 |
try:
|
| 10 |
+
# On charge le modèle de façon ultra-légère (8-bit ou 4-bit non supporté en gratuit, on reste en float16)
|
| 11 |
pipe = pipeline(
|
| 12 |
"text-generation",
|
| 13 |
model=model_id,
|
| 14 |
+
token=token,
|
| 15 |
device_map="auto"
|
| 16 |
)
|
| 17 |
|
| 18 |
def predict(message, history):
|
| 19 |
+
# Formatage minimaliste
|
| 20 |
+
out = pipe(message, max_new_tokens=256)
|
| 21 |
+
return out[0]['generated_text']
|
|
|
|
| 22 |
|
| 23 |
gr.ChatInterface(predict).launch()
|
| 24 |
|
| 25 |
except Exception as e:
|
| 26 |
+
gr.Markdown(f"### ⚠️ Statut du Système\nLe modèle est en cours de chargement ou rencontre une limite de ressources.\nDétail : {e}")
|
|
|