Spaces:

Gems234
/

Quantization_Alisia

Sleeping

App Files Files Community

Gems234 commited on Sep 21, 2025

Commit

522870a

verified ·

1 Parent(s): 13dd02d

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -15

app.py CHANGED Viewed

@@ -57,7 +57,7 @@ print(f"✅ Modèle chargé! Threads: {n_threads}")
 lock = threading.Lock()
 conversations = {"Conversation 1": []}
 stop_generation = threading.Event()
-system_prompt_used = False  # Pour suivre si le system prompt a été utilisé
 # -------------------------
 # FONCTIONS UTILITAIRES OPTIMISÉES
@@ -113,9 +113,10 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
     formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
     partial = ""
     try:
-        # Utilisation directe du streaming sans buffering complexe
         stream = llm.create_completion(
             prompt=formatted_prompt,
             stream=True,
@@ -134,7 +135,9 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
                 token = chunk["choices"][0].get("text", "")
                 if token:
                     partial += token
-                    # Mise à jour immédiate pour une meilleure réactivité
                     cleaned = clean_output(partial)
                     local_hist[-1] = (str(user_message), cleaned)
                     yield local_hist, ""
@@ -146,7 +149,7 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
     finally:
         end_time = time.time()
-        print(f"⏱️  Génération: {end_time - start_time:.2f}s - {len(partial)} chars")
         with lock:
             conversations[current_chat_name] = local_hist.copy()
         yield local_hist, ""
@@ -178,7 +181,7 @@ def clear_chat():
     global system_prompt_used
     with lock:
         conversations["Conversation 1"] = []
-    system_prompt_used = False  # Réinitialiser pour le prochain chat
     return [], "Conversation 1"
 # -------------------------
@@ -363,17 +366,30 @@ css = """
     background: #1e293b;
     border-radius: 8px;
 }
 """
-with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft()) as demo:
     history_visible = gr.State(True)
     current_chat = gr.State("Conversation 1")
     with gr.Row(elem_id="topbar"):
         menu_btn = gr.Button("☰", elem_classes="hamburger")
-        gr.Markdown("### 💬 Alisia <span class='alisia-badge'>AI Assistant</span>", elem_id="title")
         gr.HTML("<div style='flex:1'></div>")
-        gr.Markdown(f"<small style='color:#94a3b8'>CPU: {n_threads} threads • Mode Rapide</small>")
     with gr.Row():
         with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
@@ -397,10 +413,11 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
                 gr.Markdown("""
                 <div class="perf-info">
-                <strong>🚀 Mode Alpaca Optimisé</strong><br>
-                • System prompt unique<br>
-                • Streaming direct<br>
-                • Format Alpaca pur
                 </div>
                 """)
@@ -500,9 +517,10 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
 # LANCEMENT
 # -------------------------
 if __name__ == "__main__":
-    print("🚀 Lancement de l'interface optimisée...")
-    print("📋 Format Alpaca avec system prompt unique")
-    print(f"⚡ Threads CPU: {n_threads}")
     demo.launch(
         share=True,

 lock = threading.Lock()
 conversations = {"Conversation 1": []}
 stop_generation = threading.Event()
+system_prompt_used = False
 # -------------------------
 # FONCTIONS UTILITAIRES OPTIMISÉES
     formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
     partial = ""
+    token_count = 0
     try:
+        # STREAMING RÉEL - Token par token
         stream = llm.create_completion(
             prompt=formatted_prompt,
             stream=True,
                 token = chunk["choices"][0].get("text", "")
                 if token:
                     partial += token
+                    token_count += 1
+                    # MISE À JOUR IMMÉDIATE - VRAI STREAMING
                     cleaned = clean_output(partial)
                     local_hist[-1] = (str(user_message), cleaned)
                     yield local_hist, ""
     finally:
         end_time = time.time()
+        print(f"⏱️  Génération: {end_time - start_time:.2f}s - {token_count} tokens")
         with lock:
             conversations[current_chat_name] = local_hist.copy()
         yield local_hist, ""
     global system_prompt_used
     with lock:
         conversations["Conversation 1"] = []
+    system_prompt_used = False
     return [], "Conversation 1"
 # -------------------------
     background: #1e293b;
     border-radius: 8px;
 }
+.streaming-indicator {
+    color: #10b981;
+    font-size: 12px;
+    margin-left: 10px;
+    animation: pulse 1.5s infinite;
+}
+@keyframes pulse {
+    0% { opacity: 1; }
+    50% { opacity: 0.5; }
+    100% { opacity: 1; }
+}
 """
+with gr.Blocks(css=css, title="Alisia Chat - Streaming Réel", theme=gr.themes.Soft()) as demo:
     history_visible = gr.State(True)
     current_chat = gr.State("Conversation 1")
     with gr.Row(elem_id="topbar"):
         menu_btn = gr.Button("☰", elem_classes="hamburger")
+        gr.Markdown("### 💬 Alisia <span class='alisia-badge'>Real Streaming</span><span class='streaming-indicator'>● LIVE</span>", elem_id="title")
         gr.HTML("<div style='flex:1'></div>")
+        gr.Markdown(f"<small style='color:#94a3b8'>Token-par-token • {n_threads} threads</small>")
     with gr.Row():
         with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
                 gr.Markdown("""
                 <div class="perf-info">
+                <strong>🚀 STREAMING RÉEL</strong><br>
+                • Token-par-token<br>
+                • Latence minimale<br>
+                • Format Alpaca pur<br>
+                • System prompt unique
                 </div>
                 """)
 # LANCEMENT
 # -------------------------
 if __name__ == "__main__":
+    print("🚀 Lancement avec STREAMING RÉEL...")
+    print("⚡ Token-par-token - Latence minimale")
+    print(f"💻 Threads CPU: {n_threads}")
+    print("📊 Monitoring temps réel activé")
     demo.launch(
         share=True,