import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # ========================= # MODEL CONFIG # ========================= MODEL_ID = "Qwen/Qwen3-1.7B" print("🔄 Carregando tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) print("🧠 Carregando modelo (FP16, CPU)...") model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float16, device_map="cpu", low_cpu_mem_usage=True ) model.eval() print("✅ Modelo carregado!") # ========================= # AXION CORE (HIDDEN SYSTEM) # ========================= AXION_CORE_PROMPT = """ You are AxionChat, an intelligent, honest and precise AI assistant. Be clear and structured. Never hallucinate facts. If you do not know something, say so. Stay concise unless depth is requested. """ # ========================= # PROMPT BUILDER (FAST) # ========================= def build_prompt(system_prompt, history, user_input, show_reasoning): prompt = system_prompt.strip() + "\n\n" for u, a in history[-4:]: prompt += f"User: {u}\nAssistant: {a}\n" if show_reasoning: user_input = "Think briefly, then answer clearly:\n" + user_input prompt += f"User: {user_input}\nAssistant:" return prompt # ========================= # CHAT FUNCTION # ========================= def chat( user_input, chat_history, user_system_prompt, temperature, max_tokens, show_reasoning ): if not user_input.strip(): return chat_history, "" system_prompt = AXION_CORE_PROMPT if user_system_prompt.strip(): system_prompt += "\n" + user_system_prompt.strip() prompt = build_prompt( system_prompt, chat_history, user_input, show_reasoning ) inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=min(int(max_tokens), 128), temperature=float(temperature), top_p=0.9, do_sample=True ) decoded = tokenizer.decode(output[0], skip_special_tokens=True) response = decoded.split("Assistant:")[-1].strip() chat_history.append((user_input, response)) return chat_history, "" # ========================= # GRADIO UI # ========================= with gr.Blocks(title="AxionChat-v2") as demo: gr.Markdown("# 🧠 AxionChat-v2") gr.Markdown("Chat experimental focado em clareza, honestidade e velocidade.") chatbot = gr.Chatbot(height=420) user_input = gr.Textbox( placeholder="Digite sua mensagem...", label="Mensagem" ) user_system = gr.Textbox( placeholder="System prompt opcional (personalidade, tom, estilo...)", label="System Prompt" ) with gr.Row(): temperature = gr.Slider(0.1, 1.2, value=0.7, label="Temperatura") max_tokens = gr.Slider(32, 256, value=96, step=32, label="Máx. tokens") show_reasoning = gr.Checkbox( label="Mostrar raciocínio (mais lento)", value=False ) send = gr.Button("Enviar 🚀") state = gr.State([]) send.click( chat, inputs=[ user_input, state, user_system, temperature, max_tokens, show_reasoning ], outputs=[chatbot, user_input] ) demo.launch()