Spaces:

Gems234
/

Quantization_Alisia

Sleeping

App Files Files Community

Quantization_Alisia / app.py

Gems234

Update app.py

dd7dfc5 verified 5 months ago

raw

history blame contribute delete

7.46 kB

	import os
	import re
	import threading
	import warnings
	import gradio as gr
	from llama_cpp import Llama

	# -------------------------
	# TÉLÉCHARGEMENT DU MODÈLE HUGGING FACE
	# -------------------------
	MODEL_REPO = "mradermacher/Alisia-7B-it-GGUF"
	MODEL_NAME = "Alisia-7B-it.Q4_K_M.gguf"
	MODEL_PATH = f"/tmp/{MODEL_NAME}"

	# Télécharger le modèle s'il n'existe pas
	if not os.path.exists(MODEL_PATH):
	print("📥 Téléchargement du modèle depuis Hugging Face...")
	from huggingface_hub import hf_hub_download
	try:
	hf_hub_download(
	repo_id=MODEL_REPO,
	filename=MODEL_NAME,
	local_dir="/tmp",
	resume_download=True
	)
	print("✅ Modèle téléchargé avec succès!")
	except Exception as e:
	print(f"❌ Erreur téléchargement: {e}")
	# Fallback: utiliser le chemin local si le téléchargement échoue
	MODEL_PATH = "./Alisia-7B-it.Q4_K_M.gguf"
	print(f"🔄 Utilisation du chemin local: {MODEL_PATH}")

	# -------------------------
	# Configuration
	# -------------------------
	os.environ["LLAMA_CPP_LOG_LEVEL"] = "OFF"
	warnings.filterwarnings("ignore")

	print("⚡ Chargement du modèle Alisia-7B-it depuis Hugging Face...")

	llm = Llama(
	model_path=MODEL_PATH,
	n_ctx=2048,
	n_gpu_layers=-1,
	n_threads=8,
	verbose=False
	)

	print("✅ Modèle chargé et prêt!")

	# -------------------------
	# État & synchronisation
	# -------------------------
	lock = threading.Lock()
	conversations = {"Conversation 1": []}
	stop_generation = threading.Event()

	# -------------------------
	# Fonctions utilitaires
	# -------------------------
	def clean_output(text: str) -> str:
	return re.sub(r"<\\|im_.*?\\|>", "", text).strip()

	def get_conv_names():
	with lock:
	return list(conversations.keys())

	def build_conversation_prompt(history, new_message):
	prompt = ""

	if not any(any(conv) for conv in conversations.values()):
	prompt += """Your name is Alisia, you are created by the Alisia research team.
	Below is an instruction that describes a task, paired with an input that provides further context.
	Write a response that appropriately completes the request.

	"""

	for user_msg, assistant_msg in history:
	prompt += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n"

	prompt += f"### Instruction:\n{new_message}\n\n### Response:\n"

	return prompt

	def send_message_stream(user_message, displayed_history, current_chat_name):
	global stop_generation

	stop_generation.clear()

	if user_message is None or not str(user_message).strip():
	yield displayed_history or [], ""
	return

	with lock:
	if current_chat_name not in conversations:
	conversations[current_chat_name] = []
	local_hist = conversations[current_chat_name].copy()

	local_hist.append((str(user_message), ""))
	yield local_hist, ""

	formatted_prompt = build_conversation_prompt(local_hist[:-1], str(user_message))

	partial = ""
	try:
	for chunk in llm.create_completion(
	prompt=formatted_prompt,
	stream=True,
	max_tokens=1024,
	temperature=0.7,
	stop=["### Instruction:", "### Input:", "### Response:", "<\|endoftext\|>", "\n\n\n"],
	top_p=0.8,
	repeat_penalty=1.05
	):
	if stop_generation.is_set():
	break

	if "choices" in chunk and chunk["choices"]:
	token = chunk["choices"][0].get("text", "")
	if token:
	partial += token
	cleaned = clean_output(partial)
	local_hist[-1] = (str(user_message), cleaned)
	yield local_hist, ""

	except Exception as e:
	err_text = f"[Erreur: {e}]"
	local_hist[-1] = (str(user_message), err_text)
	yield local_hist, ""

	finally:
	with lock:
	conversations[current_chat_name] = local_hist.copy()
	yield local_hist, ""

	# -------------------------
	# Fonctions pour l'interface
	# -------------------------
	def new_conversation():
	with lock:
	name = f"Conversation {len(conversations) + 1}"
	conversations[name] = []
	names = list(conversations.keys())
	return gr.update(choices=names, value=name), [], name

	def load_conversation(conv_name):
	with lock:
	hist = conversations.get(conv_name, []).copy()
	return hist, conv_name

	def request_stop():
	stop_generation.set()
	return "🛑 Arrêt demandé..."

	def clear_chat():
	with lock:
	conversations["Conversation 1"] = []
	return [], "Conversation 1"

	# -------------------------
	# Interface Gradio
	# -------------------------
	with gr.Blocks(title="Alisia Chat", theme=gr.themes.Soft()) as demo:
	current_chat = gr.State("Conversation 1")

	with gr.Row():
	with gr.Column(scale=1):
	with gr.Accordion("Conversations", open=True):
	conv_dropdown = gr.Dropdown(
	choices=get_conv_names(),
	value="Conversation 1",
	label="Conversations",
	interactive=True
	)
	with gr.Row():
	new_conv_btn = gr.Button("➕ Nouvelle conversation")
	clear_btn = gr.Button("🗑️ Effacer chat")

	with gr.Column(scale=3):
	chatbot = gr.Chatbot(label="Alisia")
	with gr.Row():
	msg_input = gr.Textbox(
	placeholder="Posez votre question à Alisia…",
	lines=3,
	scale=4,
	)
	send_btn = gr.Button("Envoyer", scale=1)
	stop_btn = gr.Button("Arrêter", visible=False)

	# Événements
	new_conv_btn.click(
	fn=new_conversation,
	outputs=[conv_dropdown, chatbot, current_chat]
	)

	clear_btn.click(
	fn=clear_chat,
	outputs=[chatbot, current_chat]
	)

	conv_dropdown.change(
	fn=load_conversation,
	inputs=[conv_dropdown],
	outputs=[chatbot, current_chat]
	)

	send_btn.click(
	fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
	outputs=[send_btn, stop_btn],
	queue=False
	).then(
	fn=send_message_stream,
	inputs=[msg_input, chatbot, current_chat],
	outputs=[chatbot, msg_input],
	).then(
	fn=lambda: (gr.update(visible=True), gr.update(visible=False)),
	outputs=[send_btn, stop_btn],
	queue=False
	)

	msg_input.submit(
	fn=lambda: (gr.update(visible=False), gr.update(visible=True)),
	outputs=[send_btn, stop_btn],
	queue=False
	).then(
	fn=send_message_stream,
	inputs=[msg_input, chatbot, current_chat],
	outputs=[chatbot, msg_input],
	).then(
	fn=lambda: (gr.update(visible=True), gr.update(visible=False)),
	outputs=[send_btn, stop_btn],
	queue=False
	)

	stop_btn.click(
	fn=request_stop,
	outputs=None
	)

	# -------------------------
	# LANCEMENT
	# -------------------------
	if __name__ == "__main__":
	print("🚀 Démarrage du serveur Alisia...")
	print("📱 Préparation du lien de partage...")

	demo.launch(
	share=True,
	server_name="0.0.0.0",
	server_port=7860,
	debug=False,
	show_error=True
	)