Spaces:

Gems234
/

Quantization_Alisia

Sleeping

File size: 7,460 Bytes

import os
import re
import threading
import warnings
import gradio as gr
from llama_cpp import Llama

# -------------------------
# TÉLÉCHARGEMENT DU MODÈLE HUGGING FACE
# -------------------------
MODEL_REPO = "mradermacher/Alisia-7B-it-GGUF"
MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf"
MODEL_PATH = f"/tmp/{MODEL_NAME}"

# Télécharger le modèle s'il n'existe pas
if not os.path.exists(MODEL_PATH):
    print("📥 Téléchargement du modèle depuis Hugging Face...")
    from huggingface_hub import hf_hub_download
    try:
        hf_hub_download(
            repo_id=MODEL_REPO,
            filename=MODEL_NAME,
            local_dir="/tmp",
            resume_download=True
        )
        print("✅ Modèle téléchargé avec succès!")
    except Exception as e:
        print(f"❌ Erreur téléchargement: {e}")
        # Fallback: utiliser le chemin local si le téléchargement échoue
        MODEL_PATH = "./Alisia-7B-it.Q4_K_M.gguf"
        print(f"🔄 Utilisation du chemin local: {MODEL_PATH}")

# -------------------------
# Configuration
# -------------------------
os.environ["LLAMA_CPP_LOG_LEVEL"] = "OFF"
warnings.filterwarnings("ignore")

print("⚡ Chargement du modèle Alisia-7B-it depuis Hugging Face...")

llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=2048,
    n_gpu_layers=-1,
    n_threads=8,
    verbose=False
)

print("✅ Modèle chargé et prêt!")

# -------------------------
# État & synchronisation
# -------------------------
lock = threading.Lock()
conversations = {"Conversation 1": []}
stop_generation = threading.Event()

# -------------------------
# Fonctions utilitaires
# -------------------------
def clean_output(text: str) -> str:
    return re.sub(r"<\|im_.*?\|>", "", text).strip()

def get_conv_names():
    with lock:
        return list(conversations.keys())

def build_conversation_prompt(history, new_message):
    prompt = ""

    if not any(any(conv) for conv in conversations.values()):
        prompt += """Your name is Alisia, you are created by the Alisia research team.
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

"""

    for user_msg, assistant_msg in history:
        prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n"

    prompt += f"### Instruction:\n{new_message}\n\n### Response:\n"

    return prompt

def send_message_stream(user_message, displayed_history, current_chat_name):
    global stop_generation

    stop_generation.clear()

    if user_message is None or not str(user_message).strip():
        yield displayed_history or [], ""
        return

    with lock:
        if current_chat_name not in conversations:
            conversations[current_chat_name] = []
        local_hist = conversations[current_chat_name].copy()

    local_hist.append((str(user_message), ""))
    yield local_hist, ""

    formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))

    partial = ""
    try:
        for chunk in llm.create_completion(
            prompt=formatted_prompt,
            stream=True,
            max_tokens=1024,
            temperature=0.7,
            stop=["### Instruction:", "### Input:", "### Response:", "<|endoftext|>", "\n\n\n"],
            top_p=0.8,
            repeat_penalty=1.05
        ):
            if stop_generation.is_set():
                break

            if "choices" in chunk and chunk["choices"]:
                token = chunk["choices"][0].get("text", "")
                if token:
                    partial += token
                    cleaned = clean_output(partial)
                    local_hist[-1] = (str(user_message), cleaned)
                    yield local_hist, ""

    except Exception as e:
        err_text = f"[Erreur: {e}]"
        local_hist[-1] = (str(user_message), err_text)
        yield local_hist, ""

    finally:
        with lock:
            conversations[current_chat_name] = local_hist.copy()
        yield local_hist, ""

# -------------------------
# Fonctions pour l'interface
# -------------------------
def new_conversation():
    with lock:
        name = f"Conversation {len(conversations) + 1}"
        conversations[name] = []
        names = list(conversations.keys())
    return gr.update(choices=names, value=name), [], name

def load_conversation(conv_name):
    with lock:
        hist = conversations.get(conv_name, []).copy()
    return hist, conv_name

def request_stop():
    stop_generation.set()
    return "🛑 Arrêt demandé..."

def clear_chat():
    with lock:
        conversations["Conversation 1"] = []
    return [], "Conversation 1"

# -------------------------
# Interface Gradio
# -------------------------
with gr.Blocks(title="Alisia Chat", theme=gr.themes.Soft()) as demo:
    current_chat = gr.State("Conversation 1")

    with gr.Row():
        with gr.Column(scale=1):
            with gr.Accordion("Conversations", open=True):
                conv_dropdown = gr.Dropdown(
                    choices=get_conv_names(),
                    value="Conversation 1",
                    label="Conversations",
                    interactive=True
                )
                with gr.Row():
                    new_conv_btn = gr.Button("➕ Nouvelle conversation")
                    clear_btn = gr.Button("🗑️ Effacer chat")

        with gr.Column(scale=3):
            chatbot = gr.Chatbot(label="Alisia")
            with gr.Row():
                msg_input = gr.Textbox(
                    placeholder="Posez votre question à Alisia…",
                    lines=3,
                    scale=4,
                )
                send_btn = gr.Button("Envoyer", scale=1)
                stop_btn = gr.Button("Arrêter", visible=False)

    # Événements
    new_conv_btn.click(
        fn=new_conversation,
        outputs=[conv_dropdown, chatbot, current_chat]
    )

    clear_btn.click(
        fn=clear_chat,
        outputs=[chatbot, current_chat]
    )

    conv_dropdown.change(
        fn=load_conversation,
        inputs=[conv_dropdown],
        outputs=[chatbot, current_chat]
    )

    send_btn.click(
        fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
        outputs=[send_btn, stop_btn],
        queue=False
    ).then(
        fn=send_message_stream,
        inputs=[msg_input, chatbot, current_chat],
        outputs=[chatbot, msg_input],
    ).then(
        fn=lambda: (gr.update(visible=True), gr.update(visible=False)),
        outputs=[send_btn, stop_btn],
        queue=False
    )

    msg_input.submit(
        fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
        outputs=[send_btn, stop_btn],
        queue=False
    ).then(
        fn=send_message_stream,
        inputs=[msg_input, chatbot, current_chat],
        outputs=[chatbot, msg_input],
    ).then(
        fn=lambda: (gr.update(visible=True), gr.update(visible=False)),
        outputs=[send_btn, stop_btn],
        queue=False
    )

    stop_btn.click(
        fn=request_stop,
        outputs=None
    )

# -------------------------
# LANCEMENT
# -------------------------
if __name__ == "__main__":
    print("🚀 Démarrage du serveur Alisia...")
    print("📱 Préparation du lien de partage...")

    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        debug=False,
        show_error=True
    )