Gems234 commited on
Commit
e9bde8d
·
verified ·
1 Parent(s): 846ecc9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -50
app.py CHANGED
@@ -64,23 +64,22 @@ def get_conv_names():
64
  with lock:
65
  return list(conversations.keys())
66
 
67
- # FORMAT ULTRA-MINIMALISTE POUR MAXIMUM DE VITESSE
68
  def build_conversation_prompt(history, new_message):
69
- """Format ultra-optimisé pour la vitesse maximale"""
 
70
 
71
- # System prompt très court, seulement au début
72
- if not history:
73
- prompt = "Tu es Alisia, assistante IA. Réponds en français.\n\n"
74
- else:
75
- prompt = ""
76
 
77
- # Historique formaté de façon minimaliste
78
- for i, (user_msg, assistant_msg) in enumerate(history):
79
- prompt += f"Q: {user_msg}\nA: {assistant_msg}\n\n"
80
 
81
  # Nouveau message
82
- prompt += f"Q: {new_message}\nA:"
83
-
84
  return prompt
85
 
86
  def send_message_stream(user_message, displayed_history, current_chat_name):
@@ -99,31 +98,26 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
99
  local_hist.append((str(user_message), ""))
100
  yield local_hist, ""
101
 
102
- # Construction ULTRA-RAPIDE du prompt
103
  formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
104
  partial = ""
105
 
106
- # PARAMÈTRES DE RÉACTIVITÉ MAXIMALE
107
  last_update = time.time()
108
  token_count = 0
109
- min_tokens = 1 # Mise à jour après chaque token
110
- max_delay = 0.05 # Seulement 50ms entre les updates!
111
 
112
  try:
113
- # Paramètres de génération ULTRA-OPTIMISÉS
114
  stream = llm.create_completion(
115
  prompt=formatted_prompt,
116
  stream=True,
117
- max_tokens=512, # Réduit au maximum pour la vitesse
118
  temperature=0.7,
119
  top_p=0.9,
120
  repeat_penalty=1.1,
121
- stop=["Q:", "A:", "\n\n", "<|endoftext|>", "###", "Instruction:", "Response:"],
122
- top_k=40,
123
- min_p=0.05 # Ajouté pour la vitesse
124
  )
125
 
126
- buffer = ""
127
  for chunk in stream:
128
  if stop_generation.is_set():
129
  break
@@ -131,34 +125,27 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
131
  if "choices" in chunk and chunk["choices"]:
132
  token = chunk["choices"][0].get("text", "")
133
  if token:
134
- buffer += token
135
  token_count += 1
136
 
137
- # STRATÉGIE ULTRA-RAPIDE - mise à jour immédiate
138
- current_time = time.time()
139
- time_since_update = current_time - last_update
140
-
141
  should_update = (
142
  token_count >= min_tokens or
143
- time_since_update >= max_delay or
144
- token in ["\n", ".", "!", "?", ","]
145
  )
146
 
147
- if should_update and buffer.strip():
148
- cleaned = clean_output(buffer)
149
- if cleaned: # Only update if we have meaningful content
150
- current_response = local_hist[-1][1] + cleaned
151
- local_hist[-1] = (str(user_message), current_response)
152
- yield local_hist, ""
153
- buffer = "" # Clear buffer after updating
154
- last_update = current_time
155
  token_count = 0
156
 
157
- # Final update with any remaining content
158
- if buffer.strip():
159
- cleaned = clean_output(buffer)
160
- current_response = local_hist[-1][1] + cleaned
161
- local_hist[-1] = (str(user_message), current_response)
162
  yield local_hist, ""
163
 
164
  except Exception as e:
@@ -382,7 +369,7 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
382
  menu_btn = gr.Button("☰", elem_classes="hamburger")
383
  gr.Markdown("### 💬 Alisia <span class='alisia-badge'>AI Assistant</span>", elem_id="title")
384
  gr.HTML("<div style='flex:1'></div>")
385
- gr.Markdown("<small style='color:#94a3b8'>Mode Turbo Activé</small>")
386
 
387
  with gr.Row():
388
  with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
@@ -403,12 +390,12 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
403
  "🗑️ Effacer chat",
404
  elem_classes="clear-btn"
405
  )
406
- gr.Markdown("## 🚀 Mode Turbo", elem_classes="conversation-header")
407
  gr.Markdown("""
408
  <div style="color: #94a3b8; font-size: 14px;">
409
- Format minimaliste<br>
410
- ✅ Réactivité 50ms<br>
411
- Optimisation max
412
  </div>
413
  """, elem_classes="conversation-subheader")
414
 
@@ -508,9 +495,8 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
508
  # LANCEMENT
509
  # -------------------------
510
  if __name__ == "__main__":
511
- print("🚀 Lancement du mode TURBO...")
512
- print("⏱️ Réactivité maximale (50ms)")
513
- print("🎯 Format ultra-minimaliste")
514
  demo.launch(
515
  share=True,
516
  server_name="0.0.0.0",
 
64
  with lock:
65
  return list(conversations.keys())
66
 
 
67
  def build_conversation_prompt(history, new_message):
68
+ """Format de prompt optimisé pour Alpaca"""
69
+ prompt = ""
70
 
71
+ # System prompt seulement au début
72
+ if not any(any(conv) for conv in conversations.values()):
73
+ prompt += """Tu es Alisia, une assistante IA utile et compétente. Réponds de manière précise et concise.
74
+
75
+ """
76
 
77
+ # Historique de conversation
78
+ for user_msg, assistant_msg in history:
79
+ prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n"
80
 
81
  # Nouveau message
82
+ prompt += f"### Instruction:\n{new_message}\n\n### Response:\n"
 
83
  return prompt
84
 
85
  def send_message_stream(user_message, displayed_history, current_chat_name):
 
98
  local_hist.append((str(user_message), ""))
99
  yield local_hist, ""
100
 
 
101
  formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
102
  partial = ""
103
 
104
+ # PARAMÈTRES DE RÉACTIVITÉ HYBRIDE
105
  last_update = time.time()
106
  token_count = 0
107
+ min_tokens = 2 # Minimum de tokens avant update
108
+ max_delay = 0.12 # Maximum 120ms entre updates
109
 
110
  try:
 
111
  stream = llm.create_completion(
112
  prompt=formatted_prompt,
113
  stream=True,
114
+ max_tokens=1024,
115
  temperature=0.7,
116
  top_p=0.9,
117
  repeat_penalty=1.1,
118
+ stop=["### Instruction:", "### Response:", "\n\n", "<|endoftext|>"]
 
 
119
  )
120
 
 
121
  for chunk in stream:
122
  if stop_generation.is_set():
123
  break
 
125
  if "choices" in chunk and chunk["choices"]:
126
  token = chunk["choices"][0].get("text", "")
127
  if token:
128
+ partial += token
129
  token_count += 1
130
 
131
+ # STRATÉGIE DE RÉACTIVITÉ HYBRIDE
 
 
 
132
  should_update = (
133
  token_count >= min_tokens or
134
+ time.time() - last_update > max_delay or
135
+ token in [".", "!", "?", "\n", ",", ";", ":"]
136
  )
137
 
138
+ if should_update:
139
+ cleaned = clean_output(partial)
140
+ local_hist[-1] = (str(user_message), cleaned)
141
+ yield local_hist, ""
142
+ last_update = time.time()
 
 
 
143
  token_count = 0
144
 
145
+ # DERNIER FLUSH - Garantit que tout est affiché
146
+ if partial:
147
+ cleaned = clean_output(partial)
148
+ local_hist[-1] = (str(user_message), cleaned)
 
149
  yield local_hist, ""
150
 
151
  except Exception as e:
 
369
  menu_btn = gr.Button("☰", elem_classes="hamburger")
370
  gr.Markdown("### 💬 Alisia <span class='alisia-badge'>AI Assistant</span>", elem_id="title")
371
  gr.HTML("<div style='flex:1'></div>")
372
+ gr.Markdown("<small style='color:#94a3b8'>llama.cpp optimisé</small>")
373
 
374
  with gr.Row():
375
  with gr.Column(scale=1, visible=True, elem_id="leftcol") as left_column:
 
390
  "🗑️ Effacer chat",
391
  elem_classes="clear-btn"
392
  )
393
+ gr.Markdown("## 🚀 Mode Ultra-Rapide", elem_classes="conversation-header")
394
  gr.Markdown("""
395
  <div style="color: #94a3b8; font-size: 14px;">
396
+ Streaming hybride<br>
397
+ ✅ Réactivité 120ms<br>
398
+ Optimisé llama.cpp
399
  </div>
400
  """, elem_classes="conversation-subheader")
401
 
 
495
  # LANCEMENT
496
  # -------------------------
497
  if __name__ == "__main__":
498
+ print("🚀 Lancement de l'interface ultra-réactive...")
499
+ print("⏱️ Mode streaming hybride activé (120ms)")
 
500
  demo.launch(
501
  share=True,
502
  server_name="0.0.0.0",