Spaces:

Gems234
/

Quantization_Alisia

Sleeping

App Files Files Community

Gems234 commited on Sep 21, 2025

Commit

2d04301

verified ·

1 Parent(s): edf00a5

Update app.py

Browse files

Files changed (1) hide show

app.py +297 -41

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import re
 import threading
 import warnings
 import gradio as gr
 from llama_cpp import Llama
@@ -26,8 +27,6 @@ if not os.path.exists(MODEL_PATH):
         print("✅ Modèle téléchargé avec succès!")
     except Exception as e:
         print(f"❌ Erreur téléchargement: {e}")
-        # Fallback: utiliser un modèle plus petit
-        MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf"  # ou un plus petit si disponible
 # -------------------------
 # CONFIGURATION LLAMA.CPP
@@ -38,12 +37,12 @@ warnings.filterwarnings("ignore")
 print("⚡ Chargement du modèle avec llama.cpp...")
 llm = Llama(
     model_path=MODEL_PATH,
-    n_ctx=4096,           # Contexte plus long
-    n_gpu_layers=0,       # 0 = CPU only (plus stable)
-    n_threads=8,          # Utilise plus de threads
-    n_batch=512,          # Batch size optimisé
     verbose=False,
-    use_mlock=True        # Meilleure performance
 )
 print("✅ Modèle chargé et prêt!")
@@ -85,7 +84,6 @@ def build_conversation_prompt(history, new_message):
 def send_message_stream(user_message, displayed_history, current_chat_name):
     global stop_generation
     stop_generation.clear()
     if not user_message or not str(user_message).strip():
@@ -100,20 +98,24 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
     local_hist.append((str(user_message), ""))
     yield local_hist, ""
-    # Prompt optimisé
     formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
     partial = ""
     try:
-        # Génération avec paramètres optimisés
         stream = llm.create_completion(
             prompt=formatted_prompt,
-            max_tokens=1024,           # Réduit pour plus de vitesse
             temperature=0.7,
             top_p=0.9,
             repeat_penalty=1.1,
-            stop=["### Instruction:", "### Response:", "\n\n"],
-            stream=True
         )
         for chunk in stream:
@@ -124,9 +126,27 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
                 token = chunk["choices"][0].get("text", "")
                 if token:
                     partial += token
-                    cleaned = clean_output(partial)
-                    local_hist[-1] = (str(user_message), cleaned)
-                    yield local_hist, ""
     except Exception as e:
         err_text = f"[Erreur: {e}]"
@@ -138,6 +158,34 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
             conversations[current_chat_name] = local_hist.copy()
         yield local_hist, ""
 # -------------------------
 # INTERFACE GRADIO OPTIMISÉE
 # -------------------------
@@ -149,13 +197,62 @@ css = """
     --input-bg: #1e293b;
 }
 #chatbot {
     flex-grow: 1;
-    height: 600px !important;
     background: var(--chat-bg);
     border-radius: 16px;
     padding: 20px;
     overflow-y: auto;
 }
 #input-container {
@@ -173,37 +270,195 @@ css = """
     border-radius: 24px;
     padding: 16px 20px;
     font-size: 16px;
 }
 """
 with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🚀 Alisia Chat - Version Optimisée")
-    gr.Markdown("Interface ultra-rapide avec llama.cpp")
-    with gr.Row():
-        chatbot = gr.Chatbot(height=500, show_label=False)
     with gr.Row():
-        msg_input = gr.Textbox(
-            placeholder="Posez votre question à Alisia...",
-            lines=2,
-            show_label=False
-        )
-        send_btn = gr.Button("Envoyer", variant="primary")
-        stop_btn = gr.Button("Arrêter", variant="stop", visible=False)
-    # Événements simplifiés
-    def toggle_buttons():
-        return gr.update(visible=False), gr.update(visible=True)
     send_btn.click(
-        fn=toggle_buttons,
         inputs=None,
         outputs=[send_btn, stop_btn],
         queue=False
     ).then(
         fn=send_message_stream,
-        inputs=[msg_input, chatbot, gr.State("Conversation 1")],
         outputs=[chatbot, msg_input],
         queue=True
     ).then(
@@ -214,13 +469,13 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
     )
     msg_input.submit(
-        fn=toggle_buttons,
         inputs=None,
         outputs=[send_btn, stop_btn],
         queue=False
     ).then(
         fn=send_message_stream,
-        inputs=[msg_input, chatbot, gr.State("Conversation 1")],
         outputs=[chatbot, msg_input],
         queue=True
     ).then(
@@ -231,7 +486,7 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
     )
     stop_btn.click(
-        fn=lambda: stop_generation.set(),
         inputs=None,
         outputs=None
     )
@@ -240,10 +495,11 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
 # LANCEMENT
 # -------------------------
 if __name__ == "__main__":
-    print("🚀 Lancement de l'interface optimisée...")
     demo.launch(
         share=True,
         server_name="0.0.0.0",
         server_port=7860,
-        debug=False  # Désactivé pour plus de performance
     )

 import os
 import re
 import threading
+import time
 import warnings
 import gradio as gr
 from llama_cpp import Llama
         print("✅ Modèle téléchargé avec succès!")
     except Exception as e:
         print(f"❌ Erreur téléchargement: {e}")
 # -------------------------
 # CONFIGURATION LLAMA.CPP
 print("⚡ Chargement du modèle avec llama.cpp...")
 llm = Llama(
     model_path=MODEL_PATH,
+    n_ctx=4096,
+    n_gpu_layers=0,
+    n_threads=8,
+    n_batch=512,
     verbose=False,
+    use_mlock=True
 )
 print("✅ Modèle chargé et prêt!")
 def send_message_stream(user_message, displayed_history, current_chat_name):
     global stop_generation
     stop_generation.clear()
     if not user_message or not str(user_message).strip():
     local_hist.append((str(user_message), ""))
     yield local_hist, ""
     formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
     partial = ""
+    # PARAMÈTRES DE RÉACTIVITÉ HYBRIDE
+    last_update = time.time()
+    token_count = 0
+    min_tokens = 2      # Minimum de tokens avant update
+    max_delay = 0.12    # Maximum 120ms entre updates
     try:
         stream = llm.create_completion(
             prompt=formatted_prompt,
+            stream=True,
+            max_tokens=1024,
             temperature=0.7,
             top_p=0.9,
             repeat_penalty=1.1,
+            stop=["### Instruction:", "### Response:", "\n\n", "<|endoftext|>"]
         )
         for chunk in stream:
                 token = chunk["choices"][0].get("text", "")
                 if token:
                     partial += token
+                    token_count += 1
+                    # STRATÉGIE DE RÉACTIVITÉ HYBRIDE
+                    should_update = (
+                        token_count >= min_tokens or
+                        time.time() - last_update > max_delay or
+                        token in [".", "!", "?", "\n", ",", ";", ":"]
+                    )
+                    if should_update:
+                        cleaned = clean_output(partial)
+                        local_hist[-1] = (str(user_message), cleaned)
+                        yield local_hist, ""
+                        last_update = time.time()
+                        token_count = 0
+        # DERNIER FLUSH - Garantit que tout est affiché
+        if partial:
+            cleaned = clean_output(partial)
+            local_hist[-1] = (str(user_message), cleaned)
+            yield local_hist, ""
     except Exception as e:
         err_text = f"[Erreur: {e}]"
             conversations[current_chat_name] = local_hist.copy()
         yield local_hist, ""
+# -------------------------
+# FONCTIONS POUR L'INTERFACE
+# -------------------------
+def toggle_history(visible_state):
+    new_state = not bool(visible_state)
+    return new_state, gr.update(visible=new_state)
+def new_conversation():
+    with lock:
+        name = f"Conversation {len(conversations) + 1}"
+        conversations[name] = []
+        names = list(conversations.keys())
+    return gr.update(choices=names, value=name), [], name
+def load_conversation(conv_name):
+    with lock:
+        hist = conversations.get(conv_name, []).copy()
+    return hist, conv_name
+def request_stop():
+    stop_generation.set()
+    return "🛑 Arrêt demandé..."
+def clear_chat():
+    with lock:
+        conversations["Conversation 1"] = []
+    return [], "Conversation 1"
 # -------------------------
 # INTERFACE GRADIO OPTIMISÉE
 # -------------------------
     --input-bg: #1e293b;
 }
+#topbar {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    padding: 10px;
+    background: var(--chat-bg);
+    color: #fff;
+    border-bottom: 1px solid #334155;
+}
+#leftcol {
+    background: #111218;
+    color: #fff;
+    padding: 12px;
+    min-height: 520px;
+    border-right: 1px solid #334155;
+    transition: all 0.3s ease;
+}
+#chatcol {
+    padding: 12px;
+    height: 100%;
+    display: flex;
+    flex-direction: column;
+}
+.hamburger {
+    font-size: 20px;
+    background: transparent;
+    color: #fff;
+    border: none;
+    cursor: pointer;
+    padding: 8px;
+    border-radius: 50%;
+    transition: background 0.2s;
+}
+.hamburger:hover {
+    background: #334155;
+}
+#chat-container {
+    flex-grow: 1;
+    display: flex;
+    flex-direction: column;
+    height: 100%;
+}
 #chatbot {
     flex-grow: 1;
+    height: 600px;
     background: var(--chat-bg);
     border-radius: 16px;
     padding: 20px;
     overflow-y: auto;
+    border: 1px solid #334155;
 }
 #input-container {
     border-radius: 24px;
     padding: 16px 20px;
     font-size: 16px;
+    box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+#msg_input:focus {
+    outline: none;
+    border-color: var(--primary-color);
+    box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.2);
+}
+#send_btn {
+    background: var(--primary-color);
+    color: white;
+    border: none;
+    border-radius: 24px;
+    padding: 14px 20px;
+    height: 50px;
+    min-width: 80px;
+    font-weight: 600;
+    cursor: pointer;
+    transition: background 0.2s;
+}
+#send_btn:hover {
+    background: var(--primary-hover);
+}
+#stop_btn {
+    background: #ef4444;
+    color: white;
+    border: none;
+    border-radius: 24px;
+    padding: 14px 20px;
+    height: 50px;
+    min-width: 80px;
+    font-weight: 600;
+    cursor: pointer;
+    transition: background 0.2s;
+}
+#stop_btn:hover {
+    background: #dc2626;
+}
+.conversation-list {
+    margin-top: 20px;
+    max-height: 400px;
+    overflow-y: auto;
+}
+.conversation-item {
+    padding: 12px 16px;
+    border-radius: 12px;
+    margin-bottom: 8px;
+    cursor: pointer;
+    transition: background 0.2s;
+}
+.conversation-item:hover {
+    background: #1e293b;
+}
+.conversation-item.active {
+    background: var(--primary-color);
+    color: white;
+}
+.alisia-badge {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 4px 8px;
+    border-radius: 12px;
+    font-size: 12px;
+    font-weight: bold;
+    margin-left: 8px;
+}
+.clear-btn {
+    background: #94a3b8;
+    color: white;
+    border: none;
+    border-radius: 12px;
+    padding: 10px 16px;
+    margin-top: 10px;
+    cursor: pointer;
+}
+.clear-btn:hover {
+    background: #64748b;
 }
 """
 with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft()) as demo:
+    history_visible = gr.State(True)
+    current_chat = gr.State("Conversation 1")
+    with gr.Row(elem_id="topbar"):
+        menu_btn = gr.Button("☰", elem_classes="hamburger")
+        gr.Markdown("### 💬 Alisia <span class='alisia-badge'>AI Assistant</span>", elem_id="title")
+        gr.HTML("<div style='flex:1'></div>")
+        gr.Markdown("<small style='color:#94a3b8'>llama.cpp optimisé</small>")
     with gr.Row():
+        with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
+            with gr.Column(elem_classes="conversation-list"):
+                conv_dropdown = gr.Dropdown(
+                    choices=get_conv_names(),
+                    value="Conversation 1",
+                    label="Conversations",
+                    interactive=True,
+                    elem_classes="conversation-item"
+                )
+                new_conv_btn = gr.Button(
+                    "➕ Nouvelle conversation",
+                    variant="primary",
+                    elem_classes="conversation-item"
+                )
+                clear_btn = gr.Button(
+                    "🗑️ Effacer chat",
+                    elem_classes="clear-btn"
+                )
+                gr.Markdown("## 🚀 Mode Ultra-Rapide", elem_classes="conversation-header")
+                gr.Markdown("""
+                <div style="color: #94a3b8; font-size: 14px;">
+                ✅ Streaming hybride<br>
+                ✅ Réactivité 120ms<br>
+                ✅ Optimisé llama.cpp
+                </div>
+                """, elem_classes="conversation-subheader")
+        with gr.Column(scale=3, elem_id="chatcol"):
+            with gr.Column(elem_id="chat-container"):
+                chatbot = gr.Chatbot(
+                    label="Alisia",
+                    elem_id="chatbot",
+                    show_label=False,
+                    height=500
+                )
+                with gr.Row(elem_id="input-container"):
+                    msg_input = gr.Textbox(
+                        placeholder="Posez votre question à Alisia…",
+                        lines=2,
+                        show_label=False,
+                        elem_id="msg_input"
+                    )
+                    send_btn = gr.Button(
+                        "Envoyer",
+                        variant="primary",
+                        elem_id="send_btn"
+                    )
+                    stop_btn = gr.Button(
+                        "Arrêter",
+                        variant="stop",
+                        elem_id="stop_btn",
+                        visible=False
+                    )
+    # Événements
+    menu_btn.click(
+        fn=toggle_history,
+        inputs=[history_visible],
+        outputs=[history_visible, left_column]
+    )
+    new_conv_btn.click(
+        fn=new_conversation,
+        inputs=None,
+        outputs=[conv_dropdown, chatbot, current_chat]
+    )
+    clear_btn.click(
+        fn=clear_chat,
+        inputs=None,
+        outputs=[chatbot, current_chat]
+    )
+    conv_dropdown.change(
+        fn=load_conversation,
+        inputs=[conv_dropdown],
+        outputs=[chatbot, current_chat]
+    )
     send_btn.click(
+        fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
         inputs=None,
         outputs=[send_btn, stop_btn],
         queue=False
     ).then(
         fn=send_message_stream,
+        inputs=[msg_input, chatbot, current_chat],
         outputs=[chatbot, msg_input],
         queue=True
     ).then(
     )
     msg_input.submit(
+        fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
         inputs=None,
         outputs=[send_btn, stop_btn],
         queue=False
     ).then(
         fn=send_message_stream,
+        inputs=[msg_input, chatbot, current_chat],
         outputs=[chatbot, msg_input],
         queue=True
     ).then(
     )
     stop_btn.click(
+        fn=request_stop,
         inputs=None,
         outputs=None
     )
 # LANCEMENT
 # -------------------------
 if __name__ == "__main__":
+    print("🚀 Lancement de l'interface ultra-réactive...")
+    print("⏱️  Mode streaming hybride activé (120ms)")
     demo.launch(
         share=True,
         server_name="0.0.0.0",
         server_port=7860,
+        debug=False
     )