Spaces:
Running
Running
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| # 1. Téléchargement | |
| print("⬇️ Downloading model...") | |
| model_path = hf_hub_download( | |
| repo_id="XY26/dual-frame-llama-3", | |
| filename="meta-llama-3.1-8b-instruct.Q4_K_M.gguf" | |
| ) | |
| # 2. Chargement du Moteur | |
| print("⚙️ Loading engine...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, | |
| n_threads=2, | |
| verbose=False | |
| ) | |
| def smart_response(message, history): | |
| try: | |
| messages = [] | |
| # 1. System Prompt | |
| system_prompt = """You are a helpful AI assistant. | |
| - If asked for ADVICE/OPINION: Provide **Gain Frame** and **Loss Frame**. | |
| - If asked for FACTS/CHAT: Answer directly. | |
| Note: You are a small Quantized model, admit if you don't know.""" | |
| messages.append({"role": "system", "content": system_prompt}) | |
| # 2. Mémoire Sécurisée | |
| for turn in history: | |
| # On vérifie si c'est bien une liste avec au moins 2 éléments (User, Bot) | |
| if isinstance(turn, (list, tuple)) and len(turn) >= 2: | |
| user_msg = turn[0] | |
| bot_msg = turn[1] | |
| # On ajoute seulement si les messages ne sont pas vides | |
| if user_msg is not None and bot_msg is not None: | |
| messages.append({"role": "user", "content": str(user_msg)}) | |
| messages.append({"role": "assistant", "content": str(bot_msg)}) | |
| # 3. Question Actuelle | |
| messages.append({"role": "user", "content": message}) | |
| # 4. Génération | |
| stream = llm.create_chat_completion( | |
| messages=messages, | |
| max_tokens=1024, | |
| stream=True, | |
| temperature=0.7 | |
| ) | |
| partial_message = "" | |
| for chunk in stream: | |
| delta = chunk['choices'][0]['delta'] | |
| if 'content' in delta: | |
| token = delta['content'] | |
| partial_message += token | |
| yield partial_message | |
| except Exception as e: | |
| # Si ça plante, on l'affiche proprement au lieu de casser l'interface | |
| print(f"❌ Error: {e}") | |
| yield f"⚠️ Oups, une erreur technique est survenue : {e}. Essayez de cliquer sur 'New Chat'." | |
| # 3. Interface | |
| demo = gr.ChatInterface( | |
| fn=smart_response, | |
| title="🤖 Smart Decision Architect (Safe Mode)", | |
| description="Ask factual questions or advice." | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch(server_name="0.0.0.0", server_port=7860) |