Spaces:

Gems234
/

Quantization_Alisia

Sleeping

App Files Files Community

Gems234 commited on Sep 21, 2025

Commit

25e486f

verified ·

1 Parent(s): 522870a

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -98

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import os
 import re
 import threading
-import time
 import warnings
 import gradio as gr
 from llama_cpp import Llama
 # -------------------------
-# TÉLÉCHARGEMENT DU MODÈLE
 # -------------------------
 MODEL_REPO = "mradermacher/Alisia-7B-it-GGUF"
 MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf"
@@ -15,7 +14,7 @@ MODEL_PATH = f"/tmp/{MODEL_NAME}"
 # Télécharger le modèle s'il n'existe pas
 if not os.path.exists(MODEL_PATH):
-    print("📥 Téléchargement du modèle...")
     from huggingface_hub import hf_hub_download
     try:
         hf_hub_download(
@@ -27,40 +26,37 @@ if not os.path.exists(MODEL_PATH):
         print("✅ Modèle téléchargé avec succès!")
     except Exception as e:
         print(f"❌ Erreur téléchargement: {e}")
 # -------------------------
-# CONFIGURATION LLAMA.CPP OPTIMISÉE
 # -------------------------
 os.environ["LLAMA_CPP_LOG_LEVEL"] = "OFF"
 warnings.filterwarnings("ignore")
-print("⚡ Chargement du modèle avec llama.cpp...")
-# Détection automatique du nombre de threads
-import multiprocessing
-cpu_count = multiprocessing.cpu_count()
-n_threads = max(2, cpu_count - 1)
 llm = Llama(
     model_path=MODEL_PATH,
     n_ctx=2048,
-    n_gpu_layers=0,       # CPU uniquement
-    n_threads=n_threads,
     verbose=False
 )
-print(f"✅ Modèle chargé! Threads: {n_threads}")
 # -------------------------
-# ÉTAT & SYNCHRONISATION
 # -------------------------
 lock = threading.Lock()
 conversations = {"Conversation 1": []}
 stop_generation = threading.Event()
-system_prompt_used = False
 # -------------------------
-# FONCTIONS UTILITAIRES OPTIMISÉES
 # -------------------------
 def clean_output(text: str) -> str:
     return re.sub(r"<\|im_.*?\|>", "", text).strip()
@@ -70,39 +66,38 @@ def get_conv_names():
         return list(conversations.keys())
 def build_conversation_prompt(history, new_message):
-    """Format de prompt Alpaca avec system prompt UNIQUEMENT au début"""
-    global system_prompt_used
     prompt = ""
-    # System prompt UNIQUEMENT si jamais utilisé auparavant
-    if not system_prompt_used:
         prompt += """Your name is Alisia, you are created by the Alisia research team.
 Below is an instruction that describes a task, paired with an input that provides further context.
 Write a response that appropriately completes the request.
 """
-        system_prompt_used = True
-    # Ajouter tout l'historique de conversation
     for user_msg, assistant_msg in history:
         prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n"
     # Ajouter le nouveau message
     prompt += f"### Instruction:\n{new_message}\n\n### Response:\n"
     return prompt
 def send_message_stream(user_message, displayed_history, current_chat_name):
     global stop_generation
     stop_generation.clear()
-    if not user_message or not str(user_message).strip():
         yield displayed_history or [], ""
         return
-    start_time = time.time()
     with lock:
         if current_chat_name not in conversations:
             conversations[current_chat_name] = []
@@ -111,23 +106,20 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
     local_hist.append((str(user_message), ""))
     yield local_hist, ""
     formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
     partial = ""
-    token_count = 0
     try:
-        # STREAMING RÉEL - Token par token
-        stream = llm.create_completion(
             prompt=formatted_prompt,
             stream=True,
             max_tokens=1024,
             temperature=0.7,
             top_p=0.8,
-            repeat_penalty=1.05,
-            stop=["### Instruction:", "### Response:", "<|endoftext|>", "\n\n\n"]
-        )
-        for chunk in stream:
             if stop_generation.is_set():
                 break
@@ -135,9 +127,7 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
                 token = chunk["choices"][0].get("text", "")
                 if token:
                     partial += token
-                    token_count += 1
-                    # MISE À JOUR IMMÉDIATE - VRAI STREAMING
                     cleaned = clean_output(partial)
                     local_hist[-1] = (str(user_message), cleaned)
                     yield local_hist, ""
@@ -148,14 +138,13 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
         yield local_hist, ""
     finally:
-        end_time = time.time()
-        print(f"⏱️  Génération: {end_time - start_time:.2f}s - {token_count} tokens")
         with lock:
             conversations[current_chat_name] = local_hist.copy()
         yield local_hist, ""
 # -------------------------
-# FONCTIONS POUR L'INTERFACE
 # -------------------------
 def toggle_history(visible_state):
     new_state = not bool(visible_state)
@@ -178,14 +167,12 @@ def request_stop():
     return "🛑 Arrêt demandé..."
 def clear_chat():
-    global system_prompt_used
     with lock:
         conversations["Conversation 1"] = []
-    system_prompt_used = False
     return [], "Conversation 1"
 # -------------------------
-# INTERFACE GRADIO OPTIMISÉE
 # -------------------------
 css = """
 :root {
@@ -245,12 +232,14 @@ css = """
 #chatbot {
     flex-grow: 1;
-    height: 600px;
     background: var(--chat-bg);
     border-radius: 16px;
     padding: 20px;
     overflow-y: auto;
     border: 1px solid #334155;
 }
 #input-container {
@@ -269,6 +258,7 @@ css = """
     padding: 16px 20px;
     font-size: 16px;
     box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
 }
 #msg_input:focus {
@@ -288,6 +278,9 @@ css = """
     font-weight: 600;
     cursor: pointer;
     transition: background 0.2s;
 }
 #send_btn:hover {
@@ -305,6 +298,9 @@ css = """
     font-weight: 600;
     cursor: pointer;
     transition: background 0.2s;
 }
 #stop_btn:hover {
@@ -313,7 +309,7 @@ css = """
 .conversation-list {
     margin-top: 20px;
-    max-height: 400px;
     overflow-y: auto;
 }
@@ -345,8 +341,8 @@ css = """
 }
 .clear-btn {
-    background: #94a3b8;
-    color: white;
     border: none;
     border-radius: 12px;
     padding: 10px 16px;
@@ -355,41 +351,19 @@ css = """
 }
 .clear-btn:hover {
-    background: #64748b;
-}
-.perf-info {
-    color: #94a3b8;
-    font-size: 12px;
-    margin-top: 10px;
-    padding: 8px;
-    background: #1e293b;
-    border-radius: 8px;
-}
-.streaming-indicator {
-    color: #10b981;
-    font-size: 12px;
-    margin-left: 10px;
-    animation: pulse 1.5s infinite;
-}
-@keyframes pulse {
-    0% { opacity: 1; }
-    50% { opacity: 0.5; }
-    100% { opacity: 1; }
 }
 """
-with gr.Blocks(css=css, title="Alisia Chat - Streaming Réel", theme=gr.themes.Soft()) as demo:
     history_visible = gr.State(True)
     current_chat = gr.State("Conversation 1")
     with gr.Row(elem_id="topbar"):
         menu_btn = gr.Button("☰", elem_classes="hamburger")
-        gr.Markdown("### 💬 Alisia <span class='alisia-badge'>Real Streaming</span><span class='streaming-indicator'>● LIVE</span>", elem_id="title")
         gr.HTML("<div style='flex:1'></div>")
-        gr.Markdown(f"<small style='color:#94a3b8'>Token-par-token • {n_threads} threads</small>")
     with gr.Row():
         with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
@@ -410,29 +384,26 @@ with gr.Blocks(css=css, title="Alisia Chat - Streaming Réel", theme=gr.themes.S
                     "🗑️ Effacer chat",
                     elem_classes="clear-btn"
                 )
                 gr.Markdown("""
-                <div class="perf-info">
-                <strong>🚀 STREAMING RÉEL</strong><br>
-                • Token-par-token<br>
-                • Latence minimale<br>
-                • Format Alpaca pur<br>
-                • System prompt unique
                 </div>
-                """)
         with gr.Column(scale=3, elem_id="chatcol"):
             with gr.Column(elem_id="chat-container"):
                 chatbot = gr.Chatbot(
                     label="Alisia",
                     elem_id="chatbot",
-                    show_label=False,
-                    height=500
                 )
                 with gr.Row(elem_id="input-container"):
                     msg_input = gr.Textbox(
                         placeholder="Posez votre question à Alisia…",
-                        lines=2,
                         show_label=False,
                         elem_id="msg_input"
                     )
@@ -517,14 +488,14 @@ with gr.Blocks(css=css, title="Alisia Chat - Streaming Réel", theme=gr.themes.S
 # LANCEMENT
 # -------------------------
 if __name__ == "__main__":
-    print("🚀 Lancement avec STREAMING RÉEL...")
-    print("⚡ Token-par-token - Latence minimale")
-    print(f"💻 Threads CPU: {n_threads}")
-    print("📊 Monitoring temps réel activé")
     demo.launch(
-        share=True,
-        server_name="0.0.0.0",
-        server_port=7860,
-        debug=False
     )

 import os
 import re
 import threading
 import warnings
 import gradio as gr
 from llama_cpp import Llama
 # -------------------------
+# TÉLÉCHARGEMENT DU MODÈLE HUGGING FACE
 # -------------------------
 MODEL_REPO = "mradermacher/Alisia-7B-it-GGUF"
 MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf"
 # Télécharger le modèle s'il n'existe pas
 if not os.path.exists(MODEL_PATH):
+    print("📥 Téléchargement du modèle depuis Hugging Face...")
     from huggingface_hub import hf_hub_download
     try:
         hf_hub_download(
         print("✅ Modèle téléchargé avec succès!")
     except Exception as e:
         print(f"❌ Erreur téléchargement: {e}")
+        # Fallback: utiliser le chemin local si le téléchargement échoue
+        MODEL_PATH = "./Alisia-7B-it.Q4_K_M.gguf"
+        print(f"🔄 Utilisation du chemin local: {MODEL_PATH}")
 # -------------------------
+# Configuration
 # -------------------------
 os.environ["LLAMA_CPP_LOG_LEVEL"] = "OFF"
 warnings.filterwarnings("ignore")
+print("⚡ Chargement du modèle Alisia-7B-it depuis Hugging Face...")
 llm = Llama(
     model_path=MODEL_PATH,
     n_ctx=2048,
+    n_gpu_layers=0,       # Désactivé pour CPU
+    n_threads=8,
     verbose=False
 )
+print("✅ Modèle chargé et prêt!")
 # -------------------------
+# État & synchronisation
 # -------------------------
 lock = threading.Lock()
 conversations = {"Conversation 1": []}
 stop_generation = threading.Event()
 # -------------------------
+# Fonctions utilitaires
 # -------------------------
 def clean_output(text: str) -> str:
     return re.sub(r"<\|im_.*?\|>", "", text).strip()
         return list(conversations.keys())
 def build_conversation_prompt(history, new_message):
+    """
+    Construit le prompt complet avec l'historique de conversation
+    System prompt UNIQUEMENT au début, ensuite seulement les Q/R
+    """
     prompt = ""
+    # System prompt UNIQUEMENT si c'est le tout premier message de toute conversation
+    if not any(any(conv) for conv in conversations.values()):
         prompt += """Your name is Alisia, you are created by the Alisia research team.
 Below is an instruction that describes a task, paired with an input that provides further context.
 Write a response that appropriately completes the request.
 """
+    # Ajouter tout l'historique de conversation (seulement les Q/R)
     for user_msg, assistant_msg in history:
         prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n"
     # Ajouter le nouveau message
     prompt += f"### Instruction:\n{new_message}\n\n### Response:\n"
     return prompt
 def send_message_stream(user_message, displayed_history, current_chat_name):
     global stop_generation
     stop_generation.clear()
+    if user_message is None or not str(user_message).strip():
         yield displayed_history or [], ""
         return
     with lock:
         if current_chat_name not in conversations:
             conversations[current_chat_name] = []
     local_hist.append((str(user_message), ""))
     yield local_hist, ""
+    # Utiliser le format CORRECT Alpaca
     formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
     partial = ""
     try:
+        for chunk in llm.create_completion(
             prompt=formatted_prompt,
             stream=True,
             max_tokens=1024,
             temperature=0.7,
+            stop=["### Instruction:", "### Input:", "### Response:", "<|endoftext|>", "\n\n\n"],
             top_p=0.8,
+            repeat_penalty=1.05
+        ):
             if stop_generation.is_set():
                 break
                 token = chunk["choices"][0].get("text", "")
                 if token:
                     partial += token
+                    # Nettoyer et mettre à jour l'affichage
                     cleaned = clean_output(partial)
                     local_hist[-1] = (str(user_message), cleaned)
                     yield local_hist, ""
         yield local_hist, ""
     finally:
+        # Sauvegarder l'historique après la génération
         with lock:
             conversations[current_chat_name] = local_hist.copy()
         yield local_hist, ""
 # -------------------------
+# Fonctions pour l'interface
 # -------------------------
 def toggle_history(visible_state):
     new_state = not bool(visible_state)
     return "🛑 Arrêt demandé..."
 def clear_chat():
     with lock:
         conversations["Conversation 1"] = []
     return [], "Conversation 1"
 # -------------------------
+# Interface Gradio
 # -------------------------
 css = """
 :root {
 #chatbot {
     flex-grow: 1;
+    height: calc(100vh - 200px) !important;
+    min-height: 500px;
     background: var(--chat-bg);
     border-radius: 16px;
     padding: 20px;
     overflow-y: auto;
     border: 1px solid #334155;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
 }
 #input-container {
     padding: 16px 20px;
     font-size: 16px;
     box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
+    transition: all 0.3s ease;
 }
 #msg_input:focus {
     font-weight: 600;
     cursor: pointer;
     transition: background 0.2s;
+    display: flex;
+    align-items: center;
+    justify-content: center;
 }
 #send_btn:hover {
     font-weight: 600;
     cursor: pointer;
     transition: background 0.2s;
+    display: flex;
+    align-items: center;
+    justify-content: center;
 }
 #stop_btn:hover {
 .conversation-list {
     margin-top: 20px;
+    max-height: calc(100vh - 200px);
     overflow-y: auto;
 }
 }
 .clear-btn {
+    background: #94a3b8 !important;
+    color: white !important;
     border: none;
     border-radius: 12px;
     padding: 10px 16px;
 }
 .clear-btn:hover {
+    background: #64748b !important;
 }
 """
+with gr.Blocks(css=css, title="Alisia Chat", theme=gr.themes.Soft()) as demo:
     history_visible = gr.State(True)
     current_chat = gr.State("Conversation 1")
     with gr.Row(elem_id="topbar"):
         menu_btn = gr.Button("☰", elem_classes="hamburger")
+        gr.Markdown("### 💬 Alisia <span class='alisia-badge'>AI Assistant</span>", elem_id="title")
         gr.HTML("<div style='flex:1'></div>")
+        gr.Markdown("<small style='color:#94a3b8'>Hugging Face • Alisia-7B-it</small>")
     with gr.Row():
         with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
                     "🗑️ Effacer chat",
                     elem_classes="clear-btn"
                 )
+                gr.Markdown("## Format Alpaca", elem_classes="conversation-header")
                 gr.Markdown("""
+                <div style="color: #94a3b8; font-size: 14px;">
+                ✅ Historique Q/R pur<br>
+                ✅ System prompt unique<br>
+                ✅ Multiples conversations
                 </div>
+                """, elem_classes="conversation-subheader")
         with gr.Column(scale=3, elem_id="chatcol"):
             with gr.Column(elem_id="chat-container"):
                 chatbot = gr.Chatbot(
                     label="Alisia",
                     elem_id="chatbot",
+                    show_label=False
                 )
                 with gr.Row(elem_id="input-container"):
                     msg_input = gr.Textbox(
                         placeholder="Posez votre question à Alisia…",
+                        lines=3,
                         show_label=False,
                         elem_id="msg_input"
                     )
 # LANCEMENT
 # -------------------------
 if __name__ == "__main__":
+    print("🚀 Démarrage du serveur Alisia...")
+    print("📱 Préparation du lien de partage...")
+    # Lancement avec partage activé
     demo.launch(
+        share=True,               # Crée un lien public
+        server_name="0.0.0.0",    # Accepte les connexions externes
+        server_port=7860,         # Port standard
+        debug=False,              # Mode non-verbose pour performance
+        show_error=True           # Affiche les erreurs
     )