XY26 commited on
Commit
ef646ae
·
verified ·
1 Parent(s): 9d10554

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -46
app.py CHANGED
@@ -2,67 +2,69 @@ import gradio as gr
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
- # 1. Download the model
 
6
  model_path = hf_hub_download(
7
  repo_id="XY26/dual-frame-llama-3",
8
  filename="meta-llama-3.1-8b-instruct.Q4_K_M.gguf"
9
  )
10
 
11
- # 2. Load the Engine
 
 
12
  llm = Llama(
13
  model_path=model_path,
14
- n_ctx=4096,
15
- n_threads=2,
 
16
  verbose=False
17
  )
18
 
19
  def smart_response(message, history):
20
- # This is the "Brain" instruction.
21
- # It tells the model to distinguish between DECISIONS and NORMAL CHAT.
22
- system_prompt = """You are a helpful AI assistant with a specific behavioral directive.
23
-
24
- CORE INSTRUCTION:
25
- 1. IF the user asks for a DECISION, OPINION, or ADVICE (e.g., "Should I...", "Is X good...", "What do you think of..."):
26
- You MUST analyze it using TWO distinct frames to avoid bias.
27
- Format your answer exactly like this:
28
- **The Gain Frame (Positive)**
29
- [Write the positive/gain perspective here]
30
-
31
- **The Loss Frame (Negative)**
32
- [Write the negative/loss/risk perspective here]
33
- (Do not provide a conclusion or your own opinion).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- 2. IF the user asks a FACTUAL question (e.g., "1+1", "Capital of France"), a CLARIFICATION (e.g., "What did you mean?"), or general CHAT:
36
- Answer normally, concisely, and directly. Do NOT use the Gain/Loss frames.
37
- """
38
-
39
- # 3. Build the Memory (Context)
40
- # Loop through the history so the model remembers what happened before.
41
- formatted_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>"
42
-
43
- for user_msg, bot_msg in history:
44
- formatted_prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
45
- formatted_prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{bot_msg}<|eot_id|>"
46
-
47
- # Add the current message
48
- formatted_prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
49
-
50
- # 4. Generate Response
51
- output = llm(
52
- formatted_prompt,
53
- max_tokens=1024, # Allowed it to write a bit more
54
- stop=["<|eot_id|>"],
55
- echo=False
56
- )
57
- return output['choices'][0]['text']
58
-
59
- # 5. The Interface
60
  demo = gr.ChatInterface(
61
  fn=smart_response,
62
  title="🤖 Smart Decision Architect",
63
- description="I am a Smart Assistant. Ask me factual questions (1+1) and I will answer normally. Ask me for ADVICE (Should I...) and I will give you Gain/Loss frames.",
64
- examples=["Should I buy a lottery ticket?", "What is 10 + 10?", "Is nuclear energy good?", "Summarize the previous answer."]
 
 
 
65
  )
66
 
 
67
  if __name__ == "__main__":
68
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
+ # 1. Téléchargement du modèle
6
+ print("⬇️ Downloading model...")
7
  model_path = hf_hub_download(
8
  repo_id="XY26/dual-frame-llama-3",
9
  filename="meta-llama-3.1-8b-instruct.Q4_K_M.gguf"
10
  )
11
 
12
+ # 2. Chargement du Moteur
13
+ # On réduit n_ctx à 2048 pour être sûr que ça rentre dans la RAM sans planter
14
+ print("⚙️ Loading engine...")
15
  llm = Llama(
16
  model_path=model_path,
17
+ n_ctx=2048, # Réduit pour la stabilité (Mémoire)
18
+ n_threads=2, # Utilise les 2 cœurs disponibles
19
+ n_batch=512, # Traitement par lots standard
20
  verbose=False
21
  )
22
 
23
  def smart_response(message, history):
24
+ try:
25
+ # Système de Prompt
26
+ system_prompt = """You are a helpful AI assistant with a specific behavioral directive.
27
+ 1. IF asked for DECISION/ADVICE: Provide **The Gain Frame** and **The Loss Frame**.
28
+ 2. IF asked for FACTS/CHAT: Answer normally."""
29
+
30
+ # Construction de l'historique (Format Llama 3)
31
+ prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>"
32
+
33
+ # On ajoute les anciens messages
34
+ for user_msg, bot_msg in history:
35
+ if user_msg and bot_msg: # Sécurité contre les messages vides
36
+ prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
37
+ prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{bot_msg}<|eot_id|>"
38
+
39
+ # On ajoute le message actuel
40
+ prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
41
+
42
+ # Génération
43
+ output = llm(
44
+ prompt,
45
+ max_tokens=1024,
46
+ stop=["<|eot_id|>"],
47
+ echo=False
48
+ )
49
+ return output['choices'][0]['text']
50
+
51
+ except Exception as e:
52
+ # En cas d'erreur, on l'affiche dans le chat au lieu de planter
53
+ print(f"❌ Error: {e}")
54
+ return f"⚠️ An error occurred: {str(e)}. Please click 'New Chat' to reset."
55
 
56
+ # 3. Interface avec Gestion de File d'Attente (Queue)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  demo = gr.ChatInterface(
58
  fn=smart_response,
59
  title="🤖 Smart Decision Architect",
60
+ description="Ask factual questions (1+1) or advice (Should I...).",
61
+ theme="soft",
62
+ retry_btn="🔄 Retry",
63
+ undo_btn="↩️ Undo",
64
+ clear_btn="🗑️ New Chat",
65
  )
66
 
67
+ # 4. Lancement avec Concurrency Limit (Vital pour Llama.cpp)
68
  if __name__ == "__main__":
69
+ # concurrency_limit=1 empêche le serveur de traiter 2 choses en même temps et de planter
70
+ demo.queue(default_concurrency_limit=1).launch(server_name="0.0.0.0", server_port=7860)