Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -64,41 +64,39 @@ def get_conv_names():
|
|
| 64 |
with lock:
|
| 65 |
return list(conversations.keys())
|
| 66 |
|
| 67 |
-
# Format de prompt Alpaca
|
| 68 |
-
def
|
| 69 |
-
"""Format
|
| 70 |
if input_text:
|
| 71 |
-
return f"
|
| 72 |
-
|
| 73 |
-
### Instruction:
|
| 74 |
-
{instruction}
|
| 75 |
-
|
| 76 |
-
### Input:
|
| 77 |
-
{input_text}
|
| 78 |
-
|
| 79 |
-
### Response:
|
| 80 |
-
{output_text}"""
|
| 81 |
else:
|
| 82 |
-
return f"
|
| 83 |
-
|
| 84 |
-
### Instruction:
|
| 85 |
-
{instruction}
|
| 86 |
-
|
| 87 |
-
### Response:
|
| 88 |
-
{output_text}"""
|
| 89 |
|
| 90 |
def build_conversation_prompt(history, new_message):
|
| 91 |
-
"""
|
| 92 |
-
# System prompt seulement au début
|
| 93 |
-
system_prompt = "Tu es Alisia, une assistante IA utile et compétente. Réponds de manière précise et concise en français."
|
| 94 |
|
| 95 |
-
#
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
# Ajouter le nouveau message
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
return full_prompt
|
| 103 |
|
| 104 |
def send_message_stream(user_message, displayed_history, current_chat_name):
|
|
@@ -117,24 +115,27 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
|
|
| 117 |
local_hist.append((str(user_message), ""))
|
| 118 |
yield local_hist, ""
|
| 119 |
|
|
|
|
| 120 |
formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
|
| 121 |
partial = ""
|
| 122 |
|
| 123 |
-
# PARAMÈTRES DE RÉACTIVITÉ
|
| 124 |
last_update = time.time()
|
| 125 |
token_count = 0
|
| 126 |
-
min_tokens =
|
| 127 |
-
max_delay = 0.
|
| 128 |
|
| 129 |
try:
|
|
|
|
| 130 |
stream = llm.create_completion(
|
| 131 |
prompt=formatted_prompt,
|
| 132 |
stream=True,
|
| 133 |
-
max_tokens=
|
| 134 |
temperature=0.7,
|
| 135 |
-
top_p=0.
|
| 136 |
-
repeat_penalty=1.
|
| 137 |
-
stop=["### Instruction:", "### Response:", "\n\n", "<|endoftext|>"]
|
|
|
|
| 138 |
)
|
| 139 |
|
| 140 |
for chunk in stream:
|
|
@@ -147,11 +148,11 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
|
|
| 147 |
partial += token
|
| 148 |
token_count += 1
|
| 149 |
|
| 150 |
-
# STRATÉGIE
|
| 151 |
should_update = (
|
| 152 |
token_count >= min_tokens or
|
| 153 |
time.time() - last_update > max_delay or
|
| 154 |
-
token in [".", "!", "?", "\n"
|
| 155 |
)
|
| 156 |
|
| 157 |
if should_update:
|
|
@@ -178,7 +179,7 @@ def send_message_stream(user_message, displayed_history, current_chat_name):
|
|
| 178 |
yield local_hist, ""
|
| 179 |
|
| 180 |
# -------------------------
|
| 181 |
-
# FONCTIONS POUR L'INTERFACE
|
| 182 |
# -------------------------
|
| 183 |
def toggle_history(visible_state):
|
| 184 |
new_state = not bool(visible_state)
|
|
@@ -206,7 +207,7 @@ def clear_chat():
|
|
| 206 |
return [], "Conversation 1"
|
| 207 |
|
| 208 |
# -------------------------
|
| 209 |
-
# INTERFACE GRADIO OPTIMISÉE
|
| 210 |
# -------------------------
|
| 211 |
css = """
|
| 212 |
:root {
|
|
@@ -413,8 +414,8 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
|
|
| 413 |
gr.Markdown("""
|
| 414 |
<div style="color: #94a3b8; font-size: 14px;">
|
| 415 |
✅ Streaming hybride<br>
|
| 416 |
-
✅ Réactivité
|
| 417 |
-
✅
|
| 418 |
</div>
|
| 419 |
""", elem_classes="conversation-subheader")
|
| 420 |
|
|
@@ -514,8 +515,9 @@ with gr.Blocks(css=css, title="Alisia Chat - Ultra Rapide", theme=gr.themes.Soft
|
|
| 514 |
# LANCEMENT
|
| 515 |
# -------------------------
|
| 516 |
if __name__ == "__main__":
|
| 517 |
-
print("🚀 Lancement de l'interface
|
| 518 |
-
print("⏱️ Mode streaming
|
|
|
|
| 519 |
demo.launch(
|
| 520 |
share=True,
|
| 521 |
server_name="0.0.0.0",
|
|
|
|
| 64 |
with lock:
|
| 65 |
return list(conversations.keys())
|
| 66 |
|
| 67 |
+
# Format de prompt Alpaca OPTIMISÉ
|
| 68 |
+
def build_alpaca_prompt(instruction, input_text="", output_text=""):
|
| 69 |
+
"""Format Alpaca optimisé pour la vitesse"""
|
| 70 |
if input_text:
|
| 71 |
+
return f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n{output_text}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
else:
|
| 73 |
+
return f"### Instruction:\n{instruction}\n\n### Response:\n{output_text}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
def build_conversation_prompt(history, new_message):
|
| 76 |
+
"""Prompt optimisé pour la vitesse avec format Alpaca léger"""
|
|
|
|
|
|
|
| 77 |
|
| 78 |
+
# System prompt seulement au début (plus court)
|
| 79 |
+
system_prompt = "Tu es Alisia, assistante IA compétente. Réponds en français de façon concise."
|
| 80 |
+
|
| 81 |
+
# Construire l'historique de façon optimisée
|
| 82 |
+
conversation_parts = []
|
| 83 |
+
|
| 84 |
+
# Ajouter l'historique seulement s'il y en a
|
| 85 |
+
if history:
|
| 86 |
+
for user_msg, assistant_msg in history:
|
| 87 |
+
conv_part = build_alpaca_prompt(user_msg, "", assistant_msg)
|
| 88 |
+
conversation_parts.append(conv_part)
|
| 89 |
|
| 90 |
# Ajouter le nouveau message
|
| 91 |
+
current_prompt = build_alpaca_prompt(new_message, "", "")
|
| 92 |
+
|
| 93 |
+
# Combiner tout le prompt
|
| 94 |
+
full_prompt = f"{system_prompt}\n\n" if not history else ""
|
| 95 |
+
full_prompt += "\n\n".join(conversation_parts)
|
| 96 |
+
if conversation_parts:
|
| 97 |
+
full_prompt += "\n\n"
|
| 98 |
+
full_prompt += current_prompt
|
| 99 |
+
|
| 100 |
return full_prompt
|
| 101 |
|
| 102 |
def send_message_stream(user_message, displayed_history, current_chat_name):
|
|
|
|
| 115 |
local_hist.append((str(user_message), ""))
|
| 116 |
yield local_hist, ""
|
| 117 |
|
| 118 |
+
# Construction OPTIMISÉE du prompt
|
| 119 |
formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
|
| 120 |
partial = ""
|
| 121 |
|
| 122 |
+
# PARAMÈTRES DE RÉACTIVITÉ ULTRA-RAPIDE
|
| 123 |
last_update = time.time()
|
| 124 |
token_count = 0
|
| 125 |
+
min_tokens = 1 # Minimum réduit pour plus de réactivité
|
| 126 |
+
max_delay = 0.08 # Réduit à 80ms pour plus de vitesse
|
| 127 |
|
| 128 |
try:
|
| 129 |
+
# Paramètres de génération OPTIMISÉS
|
| 130 |
stream = llm.create_completion(
|
| 131 |
prompt=formatted_prompt,
|
| 132 |
stream=True,
|
| 133 |
+
max_tokens=768, # Réduit pour plus de vitesse
|
| 134 |
temperature=0.7,
|
| 135 |
+
top_p=0.85, # Légèrement réduit
|
| 136 |
+
repeat_penalty=1.15, # Augmenté pour éviter la répétition
|
| 137 |
+
stop=["### Instruction:", "### Response:", "\n\n", "<|endoftext|>", "###"],
|
| 138 |
+
top_k=40 # Ajouté pour la vitesse
|
| 139 |
)
|
| 140 |
|
| 141 |
for chunk in stream:
|
|
|
|
| 148 |
partial += token
|
| 149 |
token_count += 1
|
| 150 |
|
| 151 |
+
# STRATÉGIE ULTRA-RAPIDE
|
| 152 |
should_update = (
|
| 153 |
token_count >= min_tokens or
|
| 154 |
time.time() - last_update > max_delay or
|
| 155 |
+
token in [".", "!", "?", "\n"]
|
| 156 |
)
|
| 157 |
|
| 158 |
if should_update:
|
|
|
|
| 179 |
yield local_hist, ""
|
| 180 |
|
| 181 |
# -------------------------
|
| 182 |
+
# FONCTIONS POUR L'INTERFACE
|
| 183 |
# -------------------------
|
| 184 |
def toggle_history(visible_state):
|
| 185 |
new_state = not bool(visible_state)
|
|
|
|
| 207 |
return [], "Conversation 1"
|
| 208 |
|
| 209 |
# -------------------------
|
| 210 |
+
# INTERFACE GRADIO OPTIMISÉE
|
| 211 |
# -------------------------
|
| 212 |
css = """
|
| 213 |
:root {
|
|
|
|
| 414 |
gr.Markdown("""
|
| 415 |
<div style="color: #94a3b8; font-size: 14px;">
|
| 416 |
✅ Streaming hybride<br>
|
| 417 |
+
✅ Réactivité 80ms<br>
|
| 418 |
+
✅ Format Alpaca optimisé
|
| 419 |
</div>
|
| 420 |
""", elem_classes="conversation-subheader")
|
| 421 |
|
|
|
|
| 515 |
# LANCEMENT
|
| 516 |
# -------------------------
|
| 517 |
if __name__ == "__main__":
|
| 518 |
+
print("🚀 Lancement de l'interface ULTRA-RAPIDE...")
|
| 519 |
+
print("⏱️ Mode streaming optimisé (80ms)")
|
| 520 |
+
print("🎯 Format Alpaca accéléré")
|
| 521 |
demo.launch(
|
| 522 |
share=True,
|
| 523 |
server_name="0.0.0.0",
|