XY26's picture
Update app.py
f340f2e verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# 1. Download Model
print("⬇️ Downloading model...")
model_path = hf_hub_download(
repo_id="XY26/dual-frame-mistral-7",
filename="mistral-7b-v0.3.Q4_K_M.gguf"
)
# 2. Load Engine
print("⚙️ Loading engine...")
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=2,
verbose=False
)
def smart_response(message, history):
# --- 1. EXPANDED TRIGGERS ---
# We add common decision words (English & French) to ensure we catch everything.
triggers = [
# English
"should", "opinion", "think", "good", "bad", "pros", "cons",
"benefit", "risk", "impact", "why", "better", "safe", "worth",
"buy", "choose", "prefer", "best",
# French
"sain", "dangereux", "mieux", "avis", "pensez", "effet",
"faut", "acheter", "choisir", "bien", "mal", "pourquoi"
]
is_opinion = any(t in message.lower() for t in triggers)
# --- 2. RESPONSE PRIMING ---
if is_opinion:
print(f"🧠 OPINION MODE: {message}")
# We start writing the response FOR the model.
# By adding "**POSITIVE FRAMING:**" at the end, the model HAS to fill it in.
prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
Analyze the following topic. You must provide the answer in two distinct frames:
1. POSITIVE FRAMING (Arguments For / Benefits)
2. NEGATIVE FRAMING (Arguments Against / Risks)
Topic: {message}
### Response:
**POSITIVE FRAMING:**"""
# We remove "POSITIVE FRAMING" from the output later so it doesn't print twice
prefix_to_add = "**POSITIVE FRAMING:**"
else:
print(f"ℹ️ CHAT MODE: {message}")
prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
You are a helpful assistant. Answer the following question concisely and directly: {message}
### Response:
"""
prefix_to_add = ""
# --- 3. GENERATION ---
stop_tokens = ["</s>", "###", "User:", "Instruction:"]
output = llm(
prompt,
max_tokens=512,
temperature=0.7,
stop=stop_tokens,
echo=False
)
text = output['choices'][0]['text'].strip()
# If we used priming, we need to attach the prefix back to the start
final_response = prefix_to_add + " " + text
return final_response
# 3. Launch Interface
demo = gr.ChatInterface(
fn=smart_response,
title="Dual-Mind Mistral 🧠",
description="Ask a Fact (Normal Chat) or an Opinion (Dual Perspectives)."
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860)