File size: 2,568 Bytes
45a16be
af27b58
7ab81da
45a16be
aa06844
ef646ae
c11accd
af27b58
 
c11accd
7ab81da
ef646ae
 
af27b58
 
aa06844
069371b
d13ada5
7ab81da
45a16be
4ba3252
069371b
 
 
 
 
 
 
 
 
 
aa06844
069371b
 
 
 
 
 
 
 
 
 
 
 
 
aa06844
069371b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ba3252
3bbc83b
45a16be
4ba3252
069371b
aa06844
45a16be
 
 
069371b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# 1. Téléchargement
print("⬇️ Downloading model...")
model_path = hf_hub_download(
    repo_id="XY26/dual-frame-llama-3",
    filename="meta-llama-3.1-8b-instruct.Q4_K_M.gguf"
)

# 2. Chargement du Moteur
print("⚙️ Loading engine...")
llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=2,
    verbose=False
)

def smart_response(message, history):
    try:
        messages = []
        
        # 1. System Prompt
        system_prompt = """You are a helpful AI assistant.
        - If asked for ADVICE/OPINION: Provide **Gain Frame** and **Loss Frame**.
        - If asked for FACTS/CHAT: Answer directly.
        Note: You are a small Quantized model, admit if you don't know."""
        
        messages.append({"role": "system", "content": system_prompt})

        # 2. Mémoire Sécurisée
        for turn in history:
            # On vérifie si c'est bien une liste avec au moins 2 éléments (User, Bot)
            if isinstance(turn, (list, tuple)) and len(turn) >= 2:
                user_msg = turn[0]
                bot_msg = turn[1]
                # On ajoute seulement si les messages ne sont pas vides
                if user_msg is not None and bot_msg is not None:
                    messages.append({"role": "user", "content": str(user_msg)})
                    messages.append({"role": "assistant", "content": str(bot_msg)})
        
        # 3. Question Actuelle
        messages.append({"role": "user", "content": message})

        # 4. Génération
        stream = llm.create_chat_completion(
            messages=messages,
            max_tokens=1024,
            stream=True,
            temperature=0.7
        )
        
        partial_message = ""
        for chunk in stream:
            delta = chunk['choices'][0]['delta']
            if 'content' in delta:
                token = delta['content']
                partial_message += token
                yield partial_message
                
    except Exception as e:
        # Si ça plante, on l'affiche proprement au lieu de casser l'interface
        print(f"❌ Error: {e}")
        yield f"⚠️ Oups, une erreur technique est survenue : {e}. Essayez de cliquer sur 'New Chat'."

# 3. Interface
demo = gr.ChatInterface(
    fn=smart_response,
    title="🤖 Smart Decision Architect (Safe Mode)",
    description="Ask factual questions or advice."
)

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860)