XY26's picture
Update app.py
069371b verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# 1. Téléchargement
print("⬇️ Downloading model...")
model_path = hf_hub_download(
repo_id="XY26/dual-frame-llama-3",
filename="meta-llama-3.1-8b-instruct.Q4_K_M.gguf"
)
# 2. Chargement du Moteur
print("⚙️ Loading engine...")
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=2,
verbose=False
)
def smart_response(message, history):
try:
messages = []
# 1. System Prompt
system_prompt = """You are a helpful AI assistant.
- If asked for ADVICE/OPINION: Provide **Gain Frame** and **Loss Frame**.
- If asked for FACTS/CHAT: Answer directly.
Note: You are a small Quantized model, admit if you don't know."""
messages.append({"role": "system", "content": system_prompt})
# 2. Mémoire Sécurisée
for turn in history:
# On vérifie si c'est bien une liste avec au moins 2 éléments (User, Bot)
if isinstance(turn, (list, tuple)) and len(turn) >= 2:
user_msg = turn[0]
bot_msg = turn[1]
# On ajoute seulement si les messages ne sont pas vides
if user_msg is not None and bot_msg is not None:
messages.append({"role": "user", "content": str(user_msg)})
messages.append({"role": "assistant", "content": str(bot_msg)})
# 3. Question Actuelle
messages.append({"role": "user", "content": message})
# 4. Génération
stream = llm.create_chat_completion(
messages=messages,
max_tokens=1024,
stream=True,
temperature=0.7
)
partial_message = ""
for chunk in stream:
delta = chunk['choices'][0]['delta']
if 'content' in delta:
token = delta['content']
partial_message += token
yield partial_message
except Exception as e:
# Si ça plante, on l'affiche proprement au lieu de casser l'interface
print(f"❌ Error: {e}")
yield f"⚠️ Oups, une erreur technique est survenue : {e}. Essayez de cliquer sur 'New Chat'."
# 3. Interface
demo = gr.ChatInterface(
fn=smart_response,
title="🤖 Smart Decision Architect (Safe Mode)",
description="Ask factual questions or advice."
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860)