File size: 7,460 Bytes
b905978
 
 
 
 
 
 
 
25e486f
a48aff7
b905978
 
 
 
 
 
25e486f
b905978
 
 
 
 
 
 
 
 
 
 
25e486f
 
 
b905978
 
25e486f
b905978
 
 
 
25e486f
a6acd24
b905978
 
13dd02d
dd7dfc5
25e486f
a6acd24
b905978
 
25e486f
b905978
 
25e486f
b905978
 
 
 
 
 
25e486f
b905978
 
 
 
 
 
 
 
 
e9bde8d
25e486f
 
13dd02d
 
 
e9bde8d
 
25e486f
13dd02d
e9bde8d
25e486f
e9bde8d
25e486f
846ecc9
b905978
 
 
25e486f
b905978
 
25e486f
b905978
 
 
 
 
 
 
 
 
 
 
 
25e486f
b905978
 
25e486f
b905978
2d04301
13dd02d
b905978
25e486f
13dd02d
25e486f
 
b905978
 
 
 
 
 
13dd02d
 
 
 
b905978
 
 
 
 
 
 
 
 
 
 
2d04301
25e486f
2d04301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b905978
25e486f
b905978
a48aff7
2d04301
 
b905978
a48aff7
 
2d04301
 
 
 
a48aff7
2d04301
a48aff7
 
 
 
 
 
 
 
 
 
 
2d04301
a48aff7
 
2d04301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b905978
 
2d04301
b905978
 
 
 
2d04301
b905978
 
 
 
 
 
 
 
2d04301
b905978
 
 
 
2d04301
b905978
 
 
 
 
 
 
 
2d04301
b905978
 
 
 
 
 
 
25e486f
 
 
b905978
a48aff7
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import os
import re
import threading
import warnings
import gradio as gr
from llama_cpp import Llama

# -------------------------
# TÉLÉCHARGEMENT DU MODÈLE HUGGING FACE
# -------------------------
MODEL_REPO = "mradermacher/Alisia-7B-it-GGUF"
MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf"
MODEL_PATH = f"/tmp/{MODEL_NAME}"

# Télécharger le modèle s'il n'existe pas
if not os.path.exists(MODEL_PATH):
    print("📥 Téléchargement du modèle depuis Hugging Face...")
    from huggingface_hub import hf_hub_download
    try:
        hf_hub_download(
            repo_id=MODEL_REPO,
            filename=MODEL_NAME,
            local_dir="/tmp",
            resume_download=True
        )
        print("✅ Modèle téléchargé avec succès!")
    except Exception as e:
        print(f"❌ Erreur téléchargement: {e}")
        # Fallback: utiliser le chemin local si le téléchargement échoue
        MODEL_PATH = "./Alisia-7B-it.Q4_K_M.gguf"
        print(f"🔄 Utilisation du chemin local: {MODEL_PATH}")

# -------------------------
# Configuration
# -------------------------
os.environ["LLAMA_CPP_LOG_LEVEL"] = "OFF"
warnings.filterwarnings("ignore")

print("⚡ Chargement du modèle Alisia-7B-it depuis Hugging Face...")

llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=2048,
    n_gpu_layers=-1,
    n_threads=8,
    verbose=False
)

print("✅ Modèle chargé et prêt!")

# -------------------------
# État & synchronisation
# -------------------------
lock = threading.Lock()
conversations = {"Conversation 1": []}
stop_generation = threading.Event()

# -------------------------
# Fonctions utilitaires
# -------------------------
def clean_output(text: str) -> str:
    return re.sub(r"<\|im_.*?\|>", "", text).strip()

def get_conv_names():
    with lock:
        return list(conversations.keys())

def build_conversation_prompt(history, new_message):
    prompt = ""

    if not any(any(conv) for conv in conversations.values()):
        prompt += """Your name is Alisia, you are created by the Alisia research team.
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

"""

    for user_msg, assistant_msg in history:
        prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n"

    prompt += f"### Instruction:\n{new_message}\n\n### Response:\n"

    return prompt

def send_message_stream(user_message, displayed_history, current_chat_name):
    global stop_generation

    stop_generation.clear()

    if user_message is None or not str(user_message).strip():
        yield displayed_history or [], ""
        return

    with lock:
        if current_chat_name not in conversations:
            conversations[current_chat_name] = []
        local_hist = conversations[current_chat_name].copy()

    local_hist.append((str(user_message), ""))
    yield local_hist, ""

    formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))

    partial = ""
    try:
        for chunk in llm.create_completion(
            prompt=formatted_prompt,
            stream=True,
            max_tokens=1024,
            temperature=0.7,
            stop=["### Instruction:", "### Input:", "### Response:", "<|endoftext|>", "\n\n\n"],
            top_p=0.8,
            repeat_penalty=1.05
        ):
            if stop_generation.is_set():
                break

            if "choices" in chunk and chunk["choices"]:
                token = chunk["choices"][0].get("text", "")
                if token:
                    partial += token
                    cleaned = clean_output(partial)
                    local_hist[-1] = (str(user_message), cleaned)
                    yield local_hist, ""

    except Exception as e:
        err_text = f"[Erreur: {e}]"
        local_hist[-1] = (str(user_message), err_text)
        yield local_hist, ""

    finally:
        with lock:
            conversations[current_chat_name] = local_hist.copy()
        yield local_hist, ""

# -------------------------
# Fonctions pour l'interface
# -------------------------
def new_conversation():
    with lock:
        name = f"Conversation {len(conversations) + 1}"
        conversations[name] = []
        names = list(conversations.keys())
    return gr.update(choices=names, value=name), [], name

def load_conversation(conv_name):
    with lock:
        hist = conversations.get(conv_name, []).copy()
    return hist, conv_name

def request_stop():
    stop_generation.set()
    return "🛑 Arrêt demandé..."

def clear_chat():
    with lock:
        conversations["Conversation 1"] = []
    return [], "Conversation 1"

# -------------------------
# Interface Gradio
# -------------------------
with gr.Blocks(title="Alisia Chat", theme=gr.themes.Soft()) as demo:
    current_chat = gr.State("Conversation 1")

    with gr.Row():
        with gr.Column(scale=1):
            with gr.Accordion("Conversations", open=True):
                conv_dropdown = gr.Dropdown(
                    choices=get_conv_names(),
                    value="Conversation 1",
                    label="Conversations",
                    interactive=True
                )
                with gr.Row():
                    new_conv_btn = gr.Button("➕ Nouvelle conversation")
                    clear_btn = gr.Button("🗑️ Effacer chat")

        with gr.Column(scale=3):
            chatbot = gr.Chatbot(label="Alisia")
            with gr.Row():
                msg_input = gr.Textbox(
                    placeholder="Posez votre question à Alisia…",
                    lines=3,
                    scale=4,
                )
                send_btn = gr.Button("Envoyer", scale=1)
                stop_btn = gr.Button("Arrêter", visible=False)

    # Événements
    new_conv_btn.click(
        fn=new_conversation,
        outputs=[conv_dropdown, chatbot, current_chat]
    )

    clear_btn.click(
        fn=clear_chat,
        outputs=[chatbot, current_chat]
    )

    conv_dropdown.change(
        fn=load_conversation,
        inputs=[conv_dropdown],
        outputs=[chatbot, current_chat]
    )

    send_btn.click(
        fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
        outputs=[send_btn, stop_btn],
        queue=False
    ).then(
        fn=send_message_stream,
        inputs=[msg_input, chatbot, current_chat],
        outputs=[chatbot, msg_input],
    ).then(
        fn=lambda: (gr.update(visible=True), gr.update(visible=False)),
        outputs=[send_btn, stop_btn],
        queue=False
    )

    msg_input.submit(
        fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
        outputs=[send_btn, stop_btn],
        queue=False
    ).then(
        fn=send_message_stream,
        inputs=[msg_input, chatbot, current_chat],
        outputs=[chatbot, msg_input],
    ).then(
        fn=lambda: (gr.update(visible=True), gr.update(visible=False)),
        outputs=[send_btn, stop_btn],
        queue=False
    )

    stop_btn.click(
        fn=request_stop,
        outputs=None
    )

# -------------------------
# LANCEMENT
# -------------------------
if __name__ == "__main__":
    print("🚀 Démarrage du serveur Alisia...")
    print("📱 Préparation du lien de partage...")

    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        debug=False,
        show_error=True
    )