Gems234's picture
Update app.py
dd7dfc5 verified
import os
import re
import threading
import warnings
import gradio as gr
from llama_cpp import Llama
# -------------------------
# TÉLÉCHARGEMENT DU MODÈLE HUGGING FACE
# -------------------------
MODEL_REPO = "mradermacher/Alisia-7B-it-GGUF"
MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf"
MODEL_PATH = f"/tmp/{MODEL_NAME}"
# Télécharger le modèle s'il n'existe pas
if not os.path.exists(MODEL_PATH):
print("📥 Téléchargement du modèle depuis Hugging Face...")
from huggingface_hub import hf_hub_download
try:
hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_NAME,
local_dir="/tmp",
resume_download=True
)
print("✅ Modèle téléchargé avec succès!")
except Exception as e:
print(f"❌ Erreur téléchargement: {e}")
# Fallback: utiliser le chemin local si le téléchargement échoue
MODEL_PATH = "./Alisia-7B-it.Q4_K_M.gguf"
print(f"🔄 Utilisation du chemin local: {MODEL_PATH}")
# -------------------------
# Configuration
# -------------------------
os.environ["LLAMA_CPP_LOG_LEVEL"] = "OFF"
warnings.filterwarnings("ignore")
print("⚡ Chargement du modèle Alisia-7B-it depuis Hugging Face...")
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_gpu_layers=-1,
n_threads=8,
verbose=False
)
print("✅ Modèle chargé et prêt!")
# -------------------------
# État & synchronisation
# -------------------------
lock = threading.Lock()
conversations = {"Conversation 1": []}
stop_generation = threading.Event()
# -------------------------
# Fonctions utilitaires
# -------------------------
def clean_output(text: str) -> str:
return re.sub(r"<\|im_.*?\|>", "", text).strip()
def get_conv_names():
with lock:
return list(conversations.keys())
def build_conversation_prompt(history, new_message):
prompt = ""
if not any(any(conv) for conv in conversations.values()):
prompt += """Your name is Alisia, you are created by the Alisia research team.
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
"""
for user_msg, assistant_msg in history:
prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n"
prompt += f"### Instruction:\n{new_message}\n\n### Response:\n"
return prompt
def send_message_stream(user_message, displayed_history, current_chat_name):
global stop_generation
stop_generation.clear()
if user_message is None or not str(user_message).strip():
yield displayed_history or [], ""
return
with lock:
if current_chat_name not in conversations:
conversations[current_chat_name] = []
local_hist = conversations[current_chat_name].copy()
local_hist.append((str(user_message), ""))
yield local_hist, ""
formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))
partial = ""
try:
for chunk in llm.create_completion(
prompt=formatted_prompt,
stream=True,
max_tokens=1024,
temperature=0.7,
stop=["### Instruction:", "### Input:", "### Response:", "<|endoftext|>", "\n\n\n"],
top_p=0.8,
repeat_penalty=1.05
):
if stop_generation.is_set():
break
if "choices" in chunk and chunk["choices"]:
token = chunk["choices"][0].get("text", "")
if token:
partial += token
cleaned = clean_output(partial)
local_hist[-1] = (str(user_message), cleaned)
yield local_hist, ""
except Exception as e:
err_text = f"[Erreur: {e}]"
local_hist[-1] = (str(user_message), err_text)
yield local_hist, ""
finally:
with lock:
conversations[current_chat_name] = local_hist.copy()
yield local_hist, ""
# -------------------------
# Fonctions pour l'interface
# -------------------------
def new_conversation():
with lock:
name = f"Conversation {len(conversations) + 1}"
conversations[name] = []
names = list(conversations.keys())
return gr.update(choices=names, value=name), [], name
def load_conversation(conv_name):
with lock:
hist = conversations.get(conv_name, []).copy()
return hist, conv_name
def request_stop():
stop_generation.set()
return "🛑 Arrêt demandé..."
def clear_chat():
with lock:
conversations["Conversation 1"] = []
return [], "Conversation 1"
# -------------------------
# Interface Gradio
# -------------------------
with gr.Blocks(title="Alisia Chat", theme=gr.themes.Soft()) as demo:
current_chat = gr.State("Conversation 1")
with gr.Row():
with gr.Column(scale=1):
with gr.Accordion("Conversations", open=True):
conv_dropdown = gr.Dropdown(
choices=get_conv_names(),
value="Conversation 1",
label="Conversations",
interactive=True
)
with gr.Row():
new_conv_btn = gr.Button("➕ Nouvelle conversation")
clear_btn = gr.Button("🗑️ Effacer chat")
with gr.Column(scale=3):
chatbot = gr.Chatbot(label="Alisia")
with gr.Row():
msg_input = gr.Textbox(
placeholder="Posez votre question à Alisia…",
lines=3,
scale=4,
)
send_btn = gr.Button("Envoyer", scale=1)
stop_btn = gr.Button("Arrêter", visible=False)
# Événements
new_conv_btn.click(
fn=new_conversation,
outputs=[conv_dropdown, chatbot, current_chat]
)
clear_btn.click(
fn=clear_chat,
outputs=[chatbot, current_chat]
)
conv_dropdown.change(
fn=load_conversation,
inputs=[conv_dropdown],
outputs=[chatbot, current_chat]
)
send_btn.click(
fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
outputs=[send_btn, stop_btn],
queue=False
).then(
fn=send_message_stream,
inputs=[msg_input, chatbot, current_chat],
outputs=[chatbot, msg_input],
).then(
fn=lambda: (gr.update(visible=True), gr.update(visible=False)),
outputs=[send_btn, stop_btn],
queue=False
)
msg_input.submit(
fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
outputs=[send_btn, stop_btn],
queue=False
).then(
fn=send_message_stream,
inputs=[msg_input, chatbot, current_chat],
outputs=[chatbot, msg_input],
).then(
fn=lambda: (gr.update(visible=True), gr.update(visible=False)),
outputs=[send_btn, stop_btn],
queue=False
)
stop_btn.click(
fn=request_stop,
outputs=None
)
# -------------------------
# LANCEMENT
# -------------------------
if __name__ == "__main__":
print("🚀 Démarrage du serveur Alisia...")
print("📱 Préparation du lien de partage...")
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
debug=False,
show_error=True
)