Spaces:
Running
Running
File size: 2,568 Bytes
45a16be af27b58 7ab81da 45a16be aa06844 ef646ae c11accd af27b58 c11accd 7ab81da ef646ae af27b58 aa06844 069371b d13ada5 7ab81da 45a16be 4ba3252 069371b aa06844 069371b aa06844 069371b 4ba3252 3bbc83b 45a16be 4ba3252 069371b aa06844 45a16be 069371b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# 1. Téléchargement
print("⬇️ Downloading model...")
model_path = hf_hub_download(
repo_id="XY26/dual-frame-llama-3",
filename="meta-llama-3.1-8b-instruct.Q4_K_M.gguf"
)
# 2. Chargement du Moteur
print("⚙️ Loading engine...")
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=2,
verbose=False
)
def smart_response(message, history):
try:
messages = []
# 1. System Prompt
system_prompt = """You are a helpful AI assistant.
- If asked for ADVICE/OPINION: Provide **Gain Frame** and **Loss Frame**.
- If asked for FACTS/CHAT: Answer directly.
Note: You are a small Quantized model, admit if you don't know."""
messages.append({"role": "system", "content": system_prompt})
# 2. Mémoire Sécurisée
for turn in history:
# On vérifie si c'est bien une liste avec au moins 2 éléments (User, Bot)
if isinstance(turn, (list, tuple)) and len(turn) >= 2:
user_msg = turn[0]
bot_msg = turn[1]
# On ajoute seulement si les messages ne sont pas vides
if user_msg is not None and bot_msg is not None:
messages.append({"role": "user", "content": str(user_msg)})
messages.append({"role": "assistant", "content": str(bot_msg)})
# 3. Question Actuelle
messages.append({"role": "user", "content": message})
# 4. Génération
stream = llm.create_chat_completion(
messages=messages,
max_tokens=1024,
stream=True,
temperature=0.7
)
partial_message = ""
for chunk in stream:
delta = chunk['choices'][0]['delta']
if 'content' in delta:
token = delta['content']
partial_message += token
yield partial_message
except Exception as e:
# Si ça plante, on l'affiche proprement au lieu de casser l'interface
print(f"❌ Error: {e}")
yield f"⚠️ Oups, une erreur technique est survenue : {e}. Essayez de cliquer sur 'New Chat'."
# 3. Interface
demo = gr.ChatInterface(
fn=smart_response,
title="🤖 Smart Decision Architect (Safe Mode)",
description="Ask factual questions or advice."
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860) |