import os import re import threading import warnings import gradio as gr from llama_cpp import Llama # ------------------------- # TÉLÉCHARGEMENT DU MODÈLE HUGGING FACE # ------------------------- MODEL_REPO = "mradermacher/Alisia-7B-it-GGUF" MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf" MODEL_PATH = f"/tmp/{MODEL_NAME}" # Télécharger le modèle s'il n'existe pas if not os.path.exists(MODEL_PATH): print("📥 Téléchargement du modèle depuis Hugging Face...") from huggingface_hub import hf_hub_download try: hf_hub_download( repo_id=MODEL_REPO, filename=MODEL_NAME, local_dir="/tmp", resume_download=True ) print("✅ Modèle téléchargé avec succès!") except Exception as e: print(f"❌ Erreur téléchargement: {e}") # Fallback: utiliser le chemin local si le téléchargement échoue MODEL_PATH = "./Alisia-7B-it.Q4_K_M.gguf" print(f"🔄 Utilisation du chemin local: {MODEL_PATH}") # ------------------------- # Configuration # ------------------------- os.environ["LLAMA_CPP_LOG_LEVEL"] = "OFF" warnings.filterwarnings("ignore") print("⚡ Chargement du modèle Alisia-7B-it depuis Hugging Face...") llm = Llama( model_path=MODEL_PATH, n_ctx=2048, n_gpu_layers=-1, n_threads=8, verbose=False ) print("✅ Modèle chargé et prêt!") # ------------------------- # État & synchronisation # ------------------------- lock = threading.Lock() conversations = {"Conversation 1": []} stop_generation = threading.Event() # ------------------------- # Fonctions utilitaires # ------------------------- def clean_output(text: str) -> str: return re.sub(r"<\|im_.*?\|>", "", text).strip() def get_conv_names(): with lock: return list(conversations.keys()) def build_conversation_prompt(history, new_message): prompt = "" if not any(any(conv) for conv in conversations.values()): prompt += """Your name is Alisia, you are created by the Alisia research team. Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. """ for user_msg, assistant_msg in history: prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n" prompt += f"### Instruction:\n{new_message}\n\n### Response:\n" return prompt def send_message_stream(user_message, displayed_history, current_chat_name): global stop_generation stop_generation.clear() if user_message is None or not str(user_message).strip(): yield displayed_history or [], "" return with lock: if current_chat_name not in conversations: conversations[current_chat_name] = [] local_hist = conversations[current_chat_name].copy() local_hist.append((str(user_message), "")) yield local_hist, "" formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message)) partial = "" try: for chunk in llm.create_completion( prompt=formatted_prompt, stream=True, max_tokens=1024, temperature=0.7, stop=["### Instruction:", "### Input:", "### Response:", "<|endoftext|>", "\n\n\n"], top_p=0.8, repeat_penalty=1.05 ): if stop_generation.is_set(): break if "choices" in chunk and chunk["choices"]: token = chunk["choices"][0].get("text", "") if token: partial += token cleaned = clean_output(partial) local_hist[-1] = (str(user_message), cleaned) yield local_hist, "" except Exception as e: err_text = f"[Erreur: {e}]" local_hist[-1] = (str(user_message), err_text) yield local_hist, "" finally: with lock: conversations[current_chat_name] = local_hist.copy() yield local_hist, "" # ------------------------- # Fonctions pour l'interface # ------------------------- def new_conversation(): with lock: name = f"Conversation {len(conversations) + 1}" conversations[name] = [] names = list(conversations.keys()) return gr.update(choices=names, value=name), [], name def load_conversation(conv_name): with lock: hist = conversations.get(conv_name, []).copy() return hist, conv_name def request_stop(): stop_generation.set() return "🛑 Arrêt demandé..." def clear_chat(): with lock: conversations["Conversation 1"] = [] return [], "Conversation 1" # ------------------------- # Interface Gradio # ------------------------- with gr.Blocks(title="Alisia Chat", theme=gr.themes.Soft()) as demo: current_chat = gr.State("Conversation 1") with gr.Row(): with gr.Column(scale=1): with gr.Accordion("Conversations", open=True): conv_dropdown = gr.Dropdown( choices=get_conv_names(), value="Conversation 1", label="Conversations", interactive=True ) with gr.Row(): new_conv_btn = gr.Button("➕ Nouvelle conversation") clear_btn = gr.Button("🗑️ Effacer chat") with gr.Column(scale=3): chatbot = gr.Chatbot(label="Alisia") with gr.Row(): msg_input = gr.Textbox( placeholder="Posez votre question à Alisia…", lines=3, scale=4, ) send_btn = gr.Button("Envoyer", scale=1) stop_btn = gr.Button("Arrêter", visible=False) # Événements new_conv_btn.click( fn=new_conversation, outputs=[conv_dropdown, chatbot, current_chat] ) clear_btn.click( fn=clear_chat, outputs=[chatbot, current_chat] ) conv_dropdown.change( fn=load_conversation, inputs=[conv_dropdown], outputs=[chatbot, current_chat] ) send_btn.click( fn=lambda: (gr.update(visible=False), gr.update(visible=True)), outputs=[send_btn, stop_btn], queue=False ).then( fn=send_message_stream, inputs=[msg_input, chatbot, current_chat], outputs=[chatbot, msg_input], ).then( fn=lambda: (gr.update(visible=True), gr.update(visible=False)), outputs=[send_btn, stop_btn], queue=False ) msg_input.submit( fn=lambda: (gr.update(visible=False), gr.update(visible=True)), outputs=[send_btn, stop_btn], queue=False ).then( fn=send_message_stream, inputs=[msg_input, chatbot, current_chat], outputs=[chatbot, msg_input], ).then( fn=lambda: (gr.update(visible=True), gr.update(visible=False)), outputs=[send_btn, stop_btn], queue=False ) stop_btn.click( fn=request_stop, outputs=None ) # ------------------------- # LANCEMENT # ------------------------- if __name__ == "__main__": print("🚀 Démarrage du serveur Alisia...") print("📱 Préparation du lien de partage...") demo.launch( share=True, server_name="0.0.0.0", server_port=7860, debug=False, show_error=True )