import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download # 1. Download Model print("⬇️ Downloading model...") model_path = hf_hub_download( repo_id="XY26/dual-frame-mistral-7", filename="mistral-7b-v0.3.Q4_K_M.gguf" ) # 2. Load Engine print("⚙️ Loading engine...") llm = Llama( model_path=model_path, n_ctx=4096, n_threads=2, verbose=False ) def smart_response(message, history): # --- 1. EXPANDED TRIGGERS --- # We add common decision words (English & French) to ensure we catch everything. triggers = [ # English "should", "opinion", "think", "good", "bad", "pros", "cons", "benefit", "risk", "impact", "why", "better", "safe", "worth", "buy", "choose", "prefer", "best", # French "sain", "dangereux", "mieux", "avis", "pensez", "effet", "faut", "acheter", "choisir", "bien", "mal", "pourquoi" ] is_opinion = any(t in message.lower() for t in triggers) # --- 2. RESPONSE PRIMING --- if is_opinion: print(f"🧠 OPINION MODE: {message}") # We start writing the response FOR the model. # By adding "**POSITIVE FRAMING:**" at the end, the model HAS to fill it in. prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: Analyze the following topic. You must provide the answer in two distinct frames: 1. POSITIVE FRAMING (Arguments For / Benefits) 2. NEGATIVE FRAMING (Arguments Against / Risks) Topic: {message} ### Response: **POSITIVE FRAMING:**""" # We remove "POSITIVE FRAMING" from the output later so it doesn't print twice prefix_to_add = "**POSITIVE FRAMING:**" else: print(f"ℹ️ CHAT MODE: {message}") prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: You are a helpful assistant. Answer the following question concisely and directly: {message} ### Response: """ prefix_to_add = "" # --- 3. GENERATION --- stop_tokens = ["", "###", "User:", "Instruction:"] output = llm( prompt, max_tokens=512, temperature=0.7, stop=stop_tokens, echo=False ) text = output['choices'][0]['text'].strip() # If we used priming, we need to attach the prefix back to the start final_response = prefix_to_add + " " + text return final_response # 3. Launch Interface demo = gr.ChatInterface( fn=smart_response, title="Dual-Mind Mistral 🧠", description="Ask a Fact (Normal Chat) or an Opinion (Dual Perspectives)." ) if __name__ == "__main__": demo.queue().launch(server_name="0.0.0.0", server_port=7860)