Gems234 commited on
Commit
25e486f
·
verified ·
1 Parent(s): 522870a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -98
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import os
2
  import re
3
  import threading
4
- import time
5
  import warnings
6
  import gradio as gr
7
  from llama_cpp import Llama
8
 
9
  # -------------------------
10
- # TÉLÉCHARGEMENT DU MODÈLE
11
  # -------------------------
12
  MODEL_REPO = "mradermacher/Alisia-7B-it-GGUF"
13
  MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf"
@@ -15,7 +14,7 @@ MODEL_PATH = f"/tmp/{MODEL_NAME}"
15
 
16
  # Télécharger le modèle s'il n'existe pas
17
  if not os.path.exists(MODEL_PATH):
18
- print("📥 Téléchargement du modèle...")
19
  from huggingface_hub import hf_hub_download
20
  try:
21
  hf_hub_download(
@@ -27,40 +26,37 @@ if not os.path.exists(MODEL_PATH):
27
  print("✅ Modèle téléchargé avec succès!")
28
  except Exception as e:
29
  print(f"❌ Erreur téléchargement: {e}")
 
 
 
30
 
31
  # -------------------------
32
- # CONFIGURATION LLAMA.CPP OPTIMISÉE
33
  # -------------------------
34
  os.environ["LLAMA_CPP_LOG_LEVEL"] = "OFF"
35
  warnings.filterwarnings("ignore")
36
 
37
- print("⚡ Chargement du modèle avec llama.cpp...")
38
-
39
- # Détection automatique du nombre de threads
40
- import multiprocessing
41
- cpu_count = multiprocessing.cpu_count()
42
- n_threads = max(2, cpu_count - 1)
43
 
44
  llm = Llama(
45
  model_path=MODEL_PATH,
46
  n_ctx=2048,
47
- n_gpu_layers=0, # CPU uniquement
48
- n_threads=n_threads,
49
  verbose=False
50
  )
51
 
52
- print(f"✅ Modèle chargé! Threads: {n_threads}")
53
 
54
  # -------------------------
55
- # ÉTAT & SYNCHRONISATION
56
  # -------------------------
57
  lock = threading.Lock()
58
  conversations = {"Conversation 1": []}
59
  stop_generation = threading.Event()
60
- system_prompt_used = False
61
 
62
  # -------------------------
63
- # FONCTIONS UTILITAIRES OPTIMISÉES
64
  # -------------------------
65
  def clean_output(text: str) -> str:
66
  return re.sub(r"<\|im_.*?\|>", "", text).strip()
@@ -70,39 +66,38 @@ def get_conv_names():
70
  return list(conversations.keys())
71
 
72
  def build_conversation_prompt(history, new_message):
73
- """Format de prompt Alpaca avec system prompt UNIQUEMENT au début"""
74
- global system_prompt_used
75
-
 
76
  prompt = ""
77
-
78
- # System prompt UNIQUEMENT si jamais utilisé auparavant
79
- if not system_prompt_used:
80
  prompt += """Your name is Alisia, you are created by the Alisia research team.
81
  Below is an instruction that describes a task, paired with an input that provides further context.
82
  Write a response that appropriately completes the request.
83
 
84
  """
85
- system_prompt_used = True
86
-
87
- # Ajouter tout l'historique de conversation
88
  for user_msg, assistant_msg in history:
89
  prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n"
90
-
91
  # Ajouter le nouveau message
92
  prompt += f"### Instruction:\n{new_message}\n\n### Response:\n"
93
-
94
  return prompt
95
 
96
  def send_message_stream(user_message, displayed_history, current_chat_name):
97
  global stop_generation
 
98
  stop_generation.clear()
99
 
100
- if not user_message or not str(user_message).strip():
101
  yield displayed_history or [], ""
102
  return
103
 
104
- start_time = time.time()
105
-
106
  with lock:
107
  if current_chat_name not in conversations:
108
  conversations[current_chat_name] = []
@@ -111,23 +106,20 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
111
  local_hist.append((str(user_message), ""))
112
  yield local_hist, ""
113
 
 
114
  formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
 
115
  partial = ""
116
- token_count = 0
117
-
118
  try:
119
- # STREAMING RÉEL - Token par token
120
- stream = llm.create_completion(
121
  prompt=formatted_prompt,
122
  stream=True,
123
  max_tokens=1024,
124
  temperature=0.7,
 
125
  top_p=0.8,
126
- repeat_penalty=1.05,
127
- stop=["### Instruction:", "### Response:", "<|endoftext|>", "\n\n\n"]
128
- )
129
-
130
- for chunk in stream:
131
  if stop_generation.is_set():
132
  break
133
 
@@ -135,9 +127,7 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
135
  token = chunk["choices"][0].get("text", "")
136
  if token:
137
  partial += token
138
- token_count += 1
139
-
140
- # MISE À JOUR IMMÉDIATE - VRAI STREAMING
141
  cleaned = clean_output(partial)
142
  local_hist[-1] = (str(user_message), cleaned)
143
  yield local_hist, ""
@@ -148,14 +138,13 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
148
  yield local_hist, ""
149
 
150
  finally:
151
- end_time = time.time()
152
- print(f"⏱️ Génération: {end_time - start_time:.2f}s - {token_count} tokens")
153
  with lock:
154
  conversations[current_chat_name] = local_hist.copy()
155
  yield local_hist, ""
156
 
157
  # -------------------------
158
- # FONCTIONS POUR L'INTERFACE
159
  # -------------------------
160
  def toggle_history(visible_state):
161
  new_state = not bool(visible_state)
@@ -178,14 +167,12 @@ def request_stop():
178
  return "🛑 Arrêt demandé..."
179
 
180
  def clear_chat():
181
- global system_prompt_used
182
  with lock:
183
  conversations["Conversation 1"] = []
184
- system_prompt_used = False
185
  return [], "Conversation 1"
186
 
187
  # -------------------------
188
- # INTERFACE GRADIO OPTIMISÉE
189
  # -------------------------
190
  css = """
191
  :root {
@@ -245,12 +232,14 @@ css = """
245
 
246
  #chatbot {
247
  flex-grow: 1;
248
- height: 600px;
 
249
  background: var(--chat-bg);
250
  border-radius: 16px;
251
  padding: 20px;
252
  overflow-y: auto;
253
  border: 1px solid #334155;
 
254
  }
255
 
256
  #input-container {
@@ -269,6 +258,7 @@ css = """
269
  padding: 16px 20px;
270
  font-size: 16px;
271
  box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
 
272
  }
273
 
274
  #msg_input:focus {
@@ -288,6 +278,9 @@ css = """
288
  font-weight: 600;
289
  cursor: pointer;
290
  transition: background 0.2s;
 
 
 
291
  }
292
 
293
  #send_btn:hover {
@@ -305,6 +298,9 @@ css = """
305
  font-weight: 600;
306
  cursor: pointer;
307
  transition: background 0.2s;
 
 
 
308
  }
309
 
310
  #stop_btn:hover {
@@ -313,7 +309,7 @@ css = """
313
 
314
  .conversation-list {
315
  margin-top: 20px;
316
- max-height: 400px;
317
  overflow-y: auto;
318
  }
319
 
@@ -345,8 +341,8 @@ css = """
345
  }
346
 
347
  .clear-btn {
348
- background: #94a3b8;
349
- color: white;
350
  border: none;
351
  border-radius: 12px;
352
  padding: 10px 16px;
@@ -355,41 +351,19 @@ css = """
355
  }
356
 
357
  .clear-btn:hover {
358
- background: #64748b;
359
- }
360
-
361
- .perf-info {
362
- color: #94a3b8;
363
- font-size: 12px;
364
- margin-top: 10px;
365
- padding: 8px;
366
- background: #1e293b;
367
- border-radius: 8px;
368
- }
369
-
370
- .streaming-indicator {
371
- color: #10b981;
372
- font-size: 12px;
373
- margin-left: 10px;
374
- animation: pulse 1.5s infinite;
375
- }
376
-
377
- @keyframes pulse {
378
- 0% { opacity: 1; }
379
- 50% { opacity: 0.5; }
380
- 100% { opacity: 1; }
381
  }
382
  """
383
 
384
- with gr.Blocks(css=css, title="Alisia Chat - Streaming Réel", theme=gr.themes.Soft()) as demo:
385
  history_visible = gr.State(True)
386
  current_chat = gr.State("Conversation 1")
387
 
388
  with gr.Row(elem_id="topbar"):
389
  menu_btn = gr.Button("☰", elem_classes="hamburger")
390
- gr.Markdown("### 💬 Alisia <span class='alisia-badge'>Real Streaming</span><span class='streaming-indicator'>● LIVE</span>", elem_id="title")
391
  gr.HTML("<div style='flex:1'></div>")
392
- gr.Markdown(f"<small style='color:#94a3b8'>Token-par-token{n_threads} threads</small>")
393
 
394
  with gr.Row():
395
  with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
@@ -410,29 +384,26 @@ with gr.Blocks(css=css, title="Alisia Chat - Streaming Réel", theme=gr.themes.S
410
  "🗑️ Effacer chat",
411
  elem_classes="clear-btn"
412
  )
413
-
414
  gr.Markdown("""
415
- <div class="perf-info">
416
- <strong>🚀 STREAMING RÉEL</strong><br>
417
- Token-par-token<br>
418
- Latence minimale<br>
419
- • Format Alpaca pur<br>
420
- • System prompt unique
421
  </div>
422
- """)
423
 
424
  with gr.Column(scale=3, elem_id="chatcol"):
425
  with gr.Column(elem_id="chat-container"):
426
  chatbot = gr.Chatbot(
427
  label="Alisia",
428
  elem_id="chatbot",
429
- show_label=False,
430
- height=500
431
  )
432
  with gr.Row(elem_id="input-container"):
433
  msg_input = gr.Textbox(
434
  placeholder="Posez votre question à Alisia…",
435
- lines=2,
436
  show_label=False,
437
  elem_id="msg_input"
438
  )
@@ -517,14 +488,14 @@ with gr.Blocks(css=css, title="Alisia Chat - Streaming Réel", theme=gr.themes.S
517
  # LANCEMENT
518
  # -------------------------
519
  if __name__ == "__main__":
520
- print("🚀 Lancement avec STREAMING RÉEL...")
521
- print(" Token-par-token - Latence minimale")
522
- print(f"💻 Threads CPU: {n_threads}")
523
- print("📊 Monitoring temps réel activé")
524
-
525
  demo.launch(
526
- share=True,
527
- server_name="0.0.0.0",
528
- server_port=7860,
529
- debug=False
 
530
  )
 
1
  import os
2
  import re
3
  import threading
 
4
  import warnings
5
  import gradio as gr
6
  from llama_cpp import Llama
7
 
8
  # -------------------------
9
+ # TÉLÉCHARGEMENT DU MODÈLE HUGGING FACE
10
  # -------------------------
11
  MODEL_REPO = "mradermacher/Alisia-7B-it-GGUF"
12
  MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf"
 
14
 
15
  # Télécharger le modèle s'il n'existe pas
16
  if not os.path.exists(MODEL_PATH):
17
+ print("📥 Téléchargement du modèle depuis Hugging Face...")
18
  from huggingface_hub import hf_hub_download
19
  try:
20
  hf_hub_download(
 
26
  print("✅ Modèle téléchargé avec succès!")
27
  except Exception as e:
28
  print(f"❌ Erreur téléchargement: {e}")
29
+ # Fallback: utiliser le chemin local si le téléchargement échoue
30
+ MODEL_PATH = "./Alisia-7B-it.Q4_K_M.gguf"
31
+ print(f"🔄 Utilisation du chemin local: {MODEL_PATH}")
32
 
33
  # -------------------------
34
+ # Configuration
35
  # -------------------------
36
  os.environ["LLAMA_CPP_LOG_LEVEL"] = "OFF"
37
  warnings.filterwarnings("ignore")
38
 
39
+ print("⚡ Chargement du modèle Alisia-7B-it depuis Hugging Face...")
 
 
 
 
 
40
 
41
  llm = Llama(
42
  model_path=MODEL_PATH,
43
  n_ctx=2048,
44
+ n_gpu_layers=0, # Désactivé pour CPU
45
+ n_threads=8,
46
  verbose=False
47
  )
48
 
49
+ print("✅ Modèle chargé et prêt!")
50
 
51
  # -------------------------
52
+ # État & synchronisation
53
  # -------------------------
54
  lock = threading.Lock()
55
  conversations = {"Conversation 1": []}
56
  stop_generation = threading.Event()
 
57
 
58
  # -------------------------
59
+ # Fonctions utilitaires
60
  # -------------------------
61
  def clean_output(text: str) -> str:
62
  return re.sub(r"<\|im_.*?\|>", "", text).strip()
 
66
  return list(conversations.keys())
67
 
68
  def build_conversation_prompt(history, new_message):
69
+ """
70
+ Construit le prompt complet avec l'historique de conversation
71
+ System prompt UNIQUEMENT au début, ensuite seulement les Q/R
72
+ """
73
  prompt = ""
74
+
75
+ # System prompt UNIQUEMENT si c'est le tout premier message de toute conversation
76
+ if not any(any(conv) for conv in conversations.values()):
77
  prompt += """Your name is Alisia, you are created by the Alisia research team.
78
  Below is an instruction that describes a task, paired with an input that provides further context.
79
  Write a response that appropriately completes the request.
80
 
81
  """
82
+
83
+ # Ajouter tout l'historique de conversation (seulement les Q/R)
 
84
  for user_msg, assistant_msg in history:
85
  prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n"
86
+
87
  # Ajouter le nouveau message
88
  prompt += f"### Instruction:\n{new_message}\n\n### Response:\n"
89
+
90
  return prompt
91
 
92
  def send_message_stream(user_message, displayed_history, current_chat_name):
93
  global stop_generation
94
+
95
  stop_generation.clear()
96
 
97
+ if user_message is None or not str(user_message).strip():
98
  yield displayed_history or [], ""
99
  return
100
 
 
 
101
  with lock:
102
  if current_chat_name not in conversations:
103
  conversations[current_chat_name] = []
 
106
  local_hist.append((str(user_message), ""))
107
  yield local_hist, ""
108
 
109
+ # Utiliser le format CORRECT Alpaca
110
  formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
111
+
112
  partial = ""
 
 
113
  try:
114
+ for chunk in llm.create_completion(
 
115
  prompt=formatted_prompt,
116
  stream=True,
117
  max_tokens=1024,
118
  temperature=0.7,
119
+ stop=["### Instruction:", "### Input:", "### Response:", "<|endoftext|>", "\n\n\n"],
120
  top_p=0.8,
121
+ repeat_penalty=1.05
122
+ ):
 
 
 
123
  if stop_generation.is_set():
124
  break
125
 
 
127
  token = chunk["choices"][0].get("text", "")
128
  if token:
129
  partial += token
130
+ # Nettoyer et mettre à jour l'affichage
 
 
131
  cleaned = clean_output(partial)
132
  local_hist[-1] = (str(user_message), cleaned)
133
  yield local_hist, ""
 
138
  yield local_hist, ""
139
 
140
  finally:
141
+ # Sauvegarder l'historique après la génération
 
142
  with lock:
143
  conversations[current_chat_name] = local_hist.copy()
144
  yield local_hist, ""
145
 
146
  # -------------------------
147
+ # Fonctions pour l'interface
148
  # -------------------------
149
  def toggle_history(visible_state):
150
  new_state = not bool(visible_state)
 
167
  return "🛑 Arrêt demandé..."
168
 
169
  def clear_chat():
 
170
  with lock:
171
  conversations["Conversation 1"] = []
 
172
  return [], "Conversation 1"
173
 
174
  # -------------------------
175
+ # Interface Gradio
176
  # -------------------------
177
  css = """
178
  :root {
 
232
 
233
  #chatbot {
234
  flex-grow: 1;
235
+ height: calc(100vh - 200px) !important;
236
+ min-height: 500px;
237
  background: var(--chat-bg);
238
  border-radius: 16px;
239
  padding: 20px;
240
  overflow-y: auto;
241
  border: 1px solid #334155;
242
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
243
  }
244
 
245
  #input-container {
 
258
  padding: 16px 20px;
259
  font-size: 16px;
260
  box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
261
+ transition: all 0.3s ease;
262
  }
263
 
264
  #msg_input:focus {
 
278
  font-weight: 600;
279
  cursor: pointer;
280
  transition: background 0.2s;
281
+ display: flex;
282
+ align-items: center;
283
+ justify-content: center;
284
  }
285
 
286
  #send_btn:hover {
 
298
  font-weight: 600;
299
  cursor: pointer;
300
  transition: background 0.2s;
301
+ display: flex;
302
+ align-items: center;
303
+ justify-content: center;
304
  }
305
 
306
  #stop_btn:hover {
 
309
 
310
  .conversation-list {
311
  margin-top: 20px;
312
+ max-height: calc(100vh - 200px);
313
  overflow-y: auto;
314
  }
315
 
 
341
  }
342
 
343
  .clear-btn {
344
+ background: #94a3b8 !important;
345
+ color: white !important;
346
  border: none;
347
  border-radius: 12px;
348
  padding: 10px 16px;
 
351
  }
352
 
353
  .clear-btn:hover {
354
+ background: #64748b !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  }
356
  """
357
 
358
+ with gr.Blocks(css=css, title="Alisia Chat", theme=gr.themes.Soft()) as demo:
359
  history_visible = gr.State(True)
360
  current_chat = gr.State("Conversation 1")
361
 
362
  with gr.Row(elem_id="topbar"):
363
  menu_btn = gr.Button("☰", elem_classes="hamburger")
364
+ gr.Markdown("### 💬 Alisia <span class='alisia-badge'>AI Assistant</span>", elem_id="title")
365
  gr.HTML("<div style='flex:1'></div>")
366
+ gr.Markdown("<small style='color:#94a3b8'>Hugging Face Alisia-7B-it</small>")
367
 
368
  with gr.Row():
369
  with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
 
384
  "🗑️ Effacer chat",
385
  elem_classes="clear-btn"
386
  )
387
+ gr.Markdown("## Format Alpaca", elem_classes="conversation-header")
388
  gr.Markdown("""
389
+ <div style="color: #94a3b8; font-size: 14px;">
390
+ Historique Q/R pur<br>
391
+ System prompt unique<br>
392
+ Multiples conversations
 
 
393
  </div>
394
+ """, elem_classes="conversation-subheader")
395
 
396
  with gr.Column(scale=3, elem_id="chatcol"):
397
  with gr.Column(elem_id="chat-container"):
398
  chatbot = gr.Chatbot(
399
  label="Alisia",
400
  elem_id="chatbot",
401
+ show_label=False
 
402
  )
403
  with gr.Row(elem_id="input-container"):
404
  msg_input = gr.Textbox(
405
  placeholder="Posez votre question à Alisia…",
406
+ lines=3,
407
  show_label=False,
408
  elem_id="msg_input"
409
  )
 
488
  # LANCEMENT
489
  # -------------------------
490
  if __name__ == "__main__":
491
+ print("🚀 Démarrage du serveur Alisia...")
492
+ print("📱 Préparation du lien de partage...")
493
+
494
+ # Lancement avec partage activé
 
495
  demo.launch(
496
+ share=True, # Crée un lien public
497
+ server_name="0.0.0.0", # Accepte les connexions externes
498
+ server_port=7860, # Port standard
499
+ debug=False, # Mode non-verbose pour performance
500
+ show_error=True # Affiche les erreurs
501
  )