Gems234 commited on
Commit
2d04301
·
verified ·
1 Parent(s): edf00a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +297 -41
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import re
3
  import threading
 
4
  import warnings
5
  import gradio as gr
6
  from llama_cpp import Llama
@@ -26,8 +27,6 @@ if not os.path.exists(MODEL_PATH):
26
  print("✅ Modèle téléchargé avec succès!")
27
  except Exception as e:
28
  print(f"❌ Erreur téléchargement: {e}")
29
- # Fallback: utiliser un modèle plus petit
30
- MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf" # ou un plus petit si disponible
31
 
32
  # -------------------------
33
  # CONFIGURATION LLAMA.CPP
@@ -38,12 +37,12 @@ warnings.filterwarnings("ignore")
38
  print("⚡ Chargement du modèle avec llama.cpp...")
39
  llm = Llama(
40
  model_path=MODEL_PATH,
41
- n_ctx=4096, # Contexte plus long
42
- n_gpu_layers=0, # 0 = CPU only (plus stable)
43
- n_threads=8, # Utilise plus de threads
44
- n_batch=512, # Batch size optimisé
45
  verbose=False,
46
- use_mlock=True # Meilleure performance
47
  )
48
 
49
  print("✅ Modèle chargé et prêt!")
@@ -85,7 +84,6 @@ def build_conversation_prompt(history, new_message):
85
 
86
  def send_message_stream(user_message, displayed_history, current_chat_name):
87
  global stop_generation
88
-
89
  stop_generation.clear()
90
 
91
  if not user_message or not str(user_message).strip():
@@ -100,20 +98,24 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
100
  local_hist.append((str(user_message), ""))
101
  yield local_hist, ""
102
 
103
- # Prompt optimisé
104
  formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
105
-
106
  partial = ""
 
 
 
 
 
 
 
107
  try:
108
- # Génération avec paramètres optimisés
109
  stream = llm.create_completion(
110
  prompt=formatted_prompt,
111
- max_tokens=1024, # Réduit pour plus de vitesse
 
112
  temperature=0.7,
113
  top_p=0.9,
114
  repeat_penalty=1.1,
115
- stop=["### Instruction:", "### Response:", "\n\n"],
116
- stream=True
117
  )
118
 
119
  for chunk in stream:
@@ -124,9 +126,27 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
124
  token = chunk["choices"][0].get("text", "")
125
  if token:
126
  partial += token
127
- cleaned = clean_output(partial)
128
- local_hist[-1] = (str(user_message), cleaned)
129
- yield local_hist, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  except Exception as e:
132
  err_text = f"[Erreur: {e}]"
@@ -138,6 +158,34 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
138
  conversations[current_chat_name] = local_hist.copy()
139
  yield local_hist, ""
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  # -------------------------
142
  # INTERFACE GRADIO OPTIMISÉE
143
  # -------------------------
@@ -149,13 +197,62 @@ css = """
149
  --input-bg: #1e293b;
150
  }
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  #chatbot {
153
  flex-grow: 1;
154
- height: 600px !important;
155
  background: var(--chat-bg);
156
  border-radius: 16px;
157
  padding: 20px;
158
  overflow-y: auto;
 
159
  }
160
 
161
  #input-container {
@@ -173,37 +270,195 @@ css = """
173
  border-radius: 24px;
174
  padding: 16px 20px;
175
  font-size: 16px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  }
177
  """
178
 
179
  with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft()) as demo:
180
- gr.Markdown("## 🚀 Alisia Chat - Version Optimisée")
181
- gr.Markdown("Interface ultra-rapide avec llama.cpp")
182
-
183
- with gr.Row():
184
- chatbot = gr.Chatbot(height=500, show_label=False)
185
-
 
 
 
186
  with gr.Row():
187
- msg_input = gr.Textbox(
188
- placeholder="Posez votre question à Alisia...",
189
- lines=2,
190
- show_label=False
191
- )
192
- send_btn = gr.Button("Envoyer", variant="primary")
193
- stop_btn = gr.Button("Arrêter", variant="stop", visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
- # Événements simplifiés
196
- def toggle_buttons():
197
- return gr.update(visible=False), gr.update(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  send_btn.click(
200
- fn=toggle_buttons,
201
  inputs=None,
202
  outputs=[send_btn, stop_btn],
203
  queue=False
204
  ).then(
205
  fn=send_message_stream,
206
- inputs=[msg_input, chatbot, gr.State("Conversation 1")],
207
  outputs=[chatbot, msg_input],
208
  queue=True
209
  ).then(
@@ -214,13 +469,13 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
214
  )
215
 
216
  msg_input.submit(
217
- fn=toggle_buttons,
218
  inputs=None,
219
  outputs=[send_btn, stop_btn],
220
  queue=False
221
  ).then(
222
  fn=send_message_stream,
223
- inputs=[msg_input, chatbot, gr.State("Conversation 1")],
224
  outputs=[chatbot, msg_input],
225
  queue=True
226
  ).then(
@@ -231,7 +486,7 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
231
  )
232
 
233
  stop_btn.click(
234
- fn=lambda: stop_generation.set(),
235
  inputs=None,
236
  outputs=None
237
  )
@@ -240,10 +495,11 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
240
  # LANCEMENT
241
  # -------------------------
242
  if __name__ == "__main__":
243
- print("🚀 Lancement de l'interface optimisée...")
 
244
  demo.launch(
245
  share=True,
246
  server_name="0.0.0.0",
247
  server_port=7860,
248
- debug=False # Désactivé pour plus de performance
249
  )
 
1
  import os
2
  import re
3
  import threading
4
+ import time
5
  import warnings
6
  import gradio as gr
7
  from llama_cpp import Llama
 
27
  print("✅ Modèle téléchargé avec succès!")
28
  except Exception as e:
29
  print(f"❌ Erreur téléchargement: {e}")
 
 
30
 
31
  # -------------------------
32
  # CONFIGURATION LLAMA.CPP
 
37
  print("⚡ Chargement du modèle avec llama.cpp...")
38
  llm = Llama(
39
  model_path=MODEL_PATH,
40
+ n_ctx=4096,
41
+ n_gpu_layers=0,
42
+ n_threads=8,
43
+ n_batch=512,
44
  verbose=False,
45
+ use_mlock=True
46
  )
47
 
48
  print("✅ Modèle chargé et prêt!")
 
84
 
85
  def send_message_stream(user_message, displayed_history, current_chat_name):
86
  global stop_generation
 
87
  stop_generation.clear()
88
 
89
  if not user_message or not str(user_message).strip():
 
98
  local_hist.append((str(user_message), ""))
99
  yield local_hist, ""
100
 
 
101
  formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
 
102
  partial = ""
103
+
104
+ # PARAMÈTRES DE RÉACTIVITÉ HYBRIDE
105
+ last_update = time.time()
106
+ token_count = 0
107
+ min_tokens = 2 # Minimum de tokens avant update
108
+ max_delay = 0.12 # Maximum 120ms entre updates
109
+
110
  try:
 
111
  stream = llm.create_completion(
112
  prompt=formatted_prompt,
113
+ stream=True,
114
+ max_tokens=1024,
115
  temperature=0.7,
116
  top_p=0.9,
117
  repeat_penalty=1.1,
118
+ stop=["### Instruction:", "### Response:", "\n\n", "<|endoftext|>"]
 
119
  )
120
 
121
  for chunk in stream:
 
126
  token = chunk["choices"][0].get("text", "")
127
  if token:
128
  partial += token
129
+ token_count += 1
130
+
131
+ # STRATÉGIE DE RÉACTIVITÉ HYBRIDE
132
+ should_update = (
133
+ token_count >= min_tokens or
134
+ time.time() - last_update > max_delay or
135
+ token in [".", "!", "?", "\n", ",", ";", ":"]
136
+ )
137
+
138
+ if should_update:
139
+ cleaned = clean_output(partial)
140
+ local_hist[-1] = (str(user_message), cleaned)
141
+ yield local_hist, ""
142
+ last_update = time.time()
143
+ token_count = 0
144
+
145
+ # DERNIER FLUSH - Garantit que tout est affiché
146
+ if partial:
147
+ cleaned = clean_output(partial)
148
+ local_hist[-1] = (str(user_message), cleaned)
149
+ yield local_hist, ""
150
 
151
  except Exception as e:
152
  err_text = f"[Erreur: {e}]"
 
158
  conversations[current_chat_name] = local_hist.copy()
159
  yield local_hist, ""
160
 
161
+ # -------------------------
162
+ # FONCTIONS POUR L'INTERFACE
163
+ # -------------------------
164
+ def toggle_history(visible_state):
165
+ new_state = not bool(visible_state)
166
+ return new_state, gr.update(visible=new_state)
167
+
168
+ def new_conversation():
169
+ with lock:
170
+ name = f"Conversation {len(conversations) + 1}"
171
+ conversations[name] = []
172
+ names = list(conversations.keys())
173
+ return gr.update(choices=names, value=name), [], name
174
+
175
+ def load_conversation(conv_name):
176
+ with lock:
177
+ hist = conversations.get(conv_name, []).copy()
178
+ return hist, conv_name
179
+
180
+ def request_stop():
181
+ stop_generation.set()
182
+ return "🛑 Arrêt demandé..."
183
+
184
+ def clear_chat():
185
+ with lock:
186
+ conversations["Conversation 1"] = []
187
+ return [], "Conversation 1"
188
+
189
  # -------------------------
190
  # INTERFACE GRADIO OPTIMISÉE
191
  # -------------------------
 
197
  --input-bg: #1e293b;
198
  }
199
 
200
+ #topbar {
201
+ display: flex;
202
+ align-items: center;
203
+ gap: 12px;
204
+ padding: 10px;
205
+ background: var(--chat-bg);
206
+ color: #fff;
207
+ border-bottom: 1px solid #334155;
208
+ }
209
+
210
+ #leftcol {
211
+ background: #111218;
212
+ color: #fff;
213
+ padding: 12px;
214
+ min-height: 520px;
215
+ border-right: 1px solid #334155;
216
+ transition: all 0.3s ease;
217
+ }
218
+
219
+ #chatcol {
220
+ padding: 12px;
221
+ height: 100%;
222
+ display: flex;
223
+ flex-direction: column;
224
+ }
225
+
226
+ .hamburger {
227
+ font-size: 20px;
228
+ background: transparent;
229
+ color: #fff;
230
+ border: none;
231
+ cursor: pointer;
232
+ padding: 8px;
233
+ border-radius: 50%;
234
+ transition: background 0.2s;
235
+ }
236
+
237
+ .hamburger:hover {
238
+ background: #334155;
239
+ }
240
+
241
+ #chat-container {
242
+ flex-grow: 1;
243
+ display: flex;
244
+ flex-direction: column;
245
+ height: 100%;
246
+ }
247
+
248
  #chatbot {
249
  flex-grow: 1;
250
+ height: 600px;
251
  background: var(--chat-bg);
252
  border-radius: 16px;
253
  padding: 20px;
254
  overflow-y: auto;
255
+ border: 1px solid #334155;
256
  }
257
 
258
  #input-container {
 
270
  border-radius: 24px;
271
  padding: 16px 20px;
272
  font-size: 16px;
273
+ box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
274
+ }
275
+
276
+ #msg_input:focus {
277
+ outline: none;
278
+ border-color: var(--primary-color);
279
+ box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.2);
280
+ }
281
+
282
+ #send_btn {
283
+ background: var(--primary-color);
284
+ color: white;
285
+ border: none;
286
+ border-radius: 24px;
287
+ padding: 14px 20px;
288
+ height: 50px;
289
+ min-width: 80px;
290
+ font-weight: 600;
291
+ cursor: pointer;
292
+ transition: background 0.2s;
293
+ }
294
+
295
+ #send_btn:hover {
296
+ background: var(--primary-hover);
297
+ }
298
+
299
+ #stop_btn {
300
+ background: #ef4444;
301
+ color: white;
302
+ border: none;
303
+ border-radius: 24px;
304
+ padding: 14px 20px;
305
+ height: 50px;
306
+ min-width: 80px;
307
+ font-weight: 600;
308
+ cursor: pointer;
309
+ transition: background 0.2s;
310
+ }
311
+
312
+ #stop_btn:hover {
313
+ background: #dc2626;
314
+ }
315
+
316
+ .conversation-list {
317
+ margin-top: 20px;
318
+ max-height: 400px;
319
+ overflow-y: auto;
320
+ }
321
+
322
+ .conversation-item {
323
+ padding: 12px 16px;
324
+ border-radius: 12px;
325
+ margin-bottom: 8px;
326
+ cursor: pointer;
327
+ transition: background 0.2s;
328
+ }
329
+
330
+ .conversation-item:hover {
331
+ background: #1e293b;
332
+ }
333
+
334
+ .conversation-item.active {
335
+ background: var(--primary-color);
336
+ color: white;
337
+ }
338
+
339
+ .alisia-badge {
340
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
341
+ color: white;
342
+ padding: 4px 8px;
343
+ border-radius: 12px;
344
+ font-size: 12px;
345
+ font-weight: bold;
346
+ margin-left: 8px;
347
+ }
348
+
349
+ .clear-btn {
350
+ background: #94a3b8;
351
+ color: white;
352
+ border: none;
353
+ border-radius: 12px;
354
+ padding: 10px 16px;
355
+ margin-top: 10px;
356
+ cursor: pointer;
357
+ }
358
+
359
+ .clear-btn:hover {
360
+ background: #64748b;
361
  }
362
  """
363
 
364
  with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft()) as demo:
365
+ history_visible = gr.State(True)
366
+ current_chat = gr.State("Conversation 1")
367
+
368
+ with gr.Row(elem_id="topbar"):
369
+ menu_btn = gr.Button("☰", elem_classes="hamburger")
370
+ gr.Markdown("### 💬 Alisia <span class='alisia-badge'>AI Assistant</span>", elem_id="title")
371
+ gr.HTML("<div style='flex:1'></div>")
372
+ gr.Markdown("<small style='color:#94a3b8'>llama.cpp optimisé</small>")
373
+
374
  with gr.Row():
375
+ with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
376
+ with gr.Column(elem_classes="conversation-list"):
377
+ conv_dropdown = gr.Dropdown(
378
+ choices=get_conv_names(),
379
+ value="Conversation 1",
380
+ label="Conversations",
381
+ interactive=True,
382
+ elem_classes="conversation-item"
383
+ )
384
+ new_conv_btn = gr.Button(
385
+ "➕ Nouvelle conversation",
386
+ variant="primary",
387
+ elem_classes="conversation-item"
388
+ )
389
+ clear_btn = gr.Button(
390
+ "🗑️ Effacer chat",
391
+ elem_classes="clear-btn"
392
+ )
393
+ gr.Markdown("## 🚀 Mode Ultra-Rapide", elem_classes="conversation-header")
394
+ gr.Markdown("""
395
+ <div style="color: #94a3b8; font-size: 14px;">
396
+ ✅ Streaming hybride<br>
397
+ ✅ Réactivité 120ms<br>
398
+ ✅ Optimisé llama.cpp
399
+ </div>
400
+ """, elem_classes="conversation-subheader")
401
 
402
+ with gr.Column(scale=3, elem_id="chatcol"):
403
+ with gr.Column(elem_id="chat-container"):
404
+ chatbot = gr.Chatbot(
405
+ label="Alisia",
406
+ elem_id="chatbot",
407
+ show_label=False,
408
+ height=500
409
+ )
410
+ with gr.Row(elem_id="input-container"):
411
+ msg_input = gr.Textbox(
412
+ placeholder="Posez votre question à Alisia…",
413
+ lines=2,
414
+ show_label=False,
415
+ elem_id="msg_input"
416
+ )
417
+ send_btn = gr.Button(
418
+ "Envoyer",
419
+ variant="primary",
420
+ elem_id="send_btn"
421
+ )
422
+ stop_btn = gr.Button(
423
+ "Arrêter",
424
+ variant="stop",
425
+ elem_id="stop_btn",
426
+ visible=False
427
+ )
428
+
429
+ # Événements
430
+ menu_btn.click(
431
+ fn=toggle_history,
432
+ inputs=[history_visible],
433
+ outputs=[history_visible, left_column]
434
+ )
435
+
436
+ new_conv_btn.click(
437
+ fn=new_conversation,
438
+ inputs=None,
439
+ outputs=[conv_dropdown, chatbot, current_chat]
440
+ )
441
+
442
+ clear_btn.click(
443
+ fn=clear_chat,
444
+ inputs=None,
445
+ outputs=[chatbot, current_chat]
446
+ )
447
+
448
+ conv_dropdown.change(
449
+ fn=load_conversation,
450
+ inputs=[conv_dropdown],
451
+ outputs=[chatbot, current_chat]
452
+ )
453
 
454
  send_btn.click(
455
+ fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
456
  inputs=None,
457
  outputs=[send_btn, stop_btn],
458
  queue=False
459
  ).then(
460
  fn=send_message_stream,
461
+ inputs=[msg_input, chatbot, current_chat],
462
  outputs=[chatbot, msg_input],
463
  queue=True
464
  ).then(
 
469
  )
470
 
471
  msg_input.submit(
472
+ fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
473
  inputs=None,
474
  outputs=[send_btn, stop_btn],
475
  queue=False
476
  ).then(
477
  fn=send_message_stream,
478
+ inputs=[msg_input, chatbot, current_chat],
479
  outputs=[chatbot, msg_input],
480
  queue=True
481
  ).then(
 
486
  )
487
 
488
  stop_btn.click(
489
+ fn=request_stop,
490
  inputs=None,
491
  outputs=None
492
  )
 
495
  # LANCEMENT
496
  # -------------------------
497
  if __name__ == "__main__":
498
+ print("🚀 Lancement de l'interface ultra-réactive...")
499
+ print("⏱️ Mode streaming hybride activé (120ms)")
500
  demo.launch(
501
  share=True,
502
  server_name="0.0.0.0",
503
  server_port=7860,
504
+ debug=False
505
  )