import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download import os os.environ["HF_HOME"] = "/tmp/huggingface" # MODELO: Phi-3.5 Mini Instruct (El rey del razonamiento en formato pequeño) REPO_ID = "bartowski/Phi-3.5-mini-instruct-GGUF" FILENAME = "Phi-3.5-mini-instruct-Q6_K.gguf" print(f"Descargando Phi-3.5 Mini: {FILENAME}...") model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir="/tmp/model") print("Cargando motor de alta lógica...") llm = Llama( model_path=model_path, n_ctx=2048, n_threads=4, n_batch=128 ) print("¡Nodo Phi-3.5 listo!") def predict(message, system_prompt="Eres un asistente experto en razonamiento lógico. Responde en español."): # Formato Phi-3.5 prompt = f"<|system|>\n{system_prompt}<|end|>\n<|user|>\n{message}<|end|>\n<|assistant|>\n" output = llm(prompt, max_tokens=1024, stop=["<|end|>"], echo=False, temperature=0.5) return output['choices'][0]['text'] with gr.Blocks() as demo: gr.Markdown("# 🧠 Nodo de Razonamiento Lógico (Phi-3.5)") msg = gr.Textbox(label="Plantea tu problema") out = gr.Textbox(label="Respuesta Analítica", lines=12) btn = gr.Button("Analizar") btn.click(predict, [msg], out, api_name="query") if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)