import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM # ========================= # CONFIG # ========================= MODEL_ID = "AxionLab-Co/DogeAI-v2.0-4B-Reasoning" MAX_NEW_TOKENS = 256 # menor = menos timeout em CPU tokenizer = None model = None # ========================= # LOAD MODEL (LAZY + SAFE) # ========================= def load_model(): global tokenizer, model if model is None: tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, use_fast=True ) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, device_map="cpu", torch_dtype=torch.float32, low_cpu_mem_usage=True ) model.eval() return tokenizer, model # ========================= # PROMPT (CPU-FRIENDLY) # ========================= def build_prompt(user_input: str) -> str: return f"""You are DogeAI-v2.0-4B-Reasoning. Think step by step internally. Do not reveal your full chain-of-thought. Provide a clear final answer with a short explanation. If the user speaks Brazilian Portuguese: - use Brazilian slang lightly - keep the Doge vibe 🐕🇧🇷 - stay serious and logical User: {user_input} Assistant: """ # ========================= # CHAT FUNCTION (SSE-SAFE) # ========================= def chat(user_input): tokenizer, model = load_model() # mantém o SSE vivo imediatamente yield "🤔 DogeAI está pensando... segura aí..." prompt = build_prompt(user_input) inputs = tokenizer( prompt, return_tensors="pt" ) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=MAX_NEW_TOKENS, temperature=0.7, top_p=0.9, do_sample=True ) text = tokenizer.decode( output[0], skip_special_tokens=True ) # remove o prompt da resposta final response = text.split("Assistant:", 1)[-1].strip() yield response # ========================= # GRADIO UI # ========================= with gr.Blocks(title="DogeAI-v2.0-4B-Reasoning") as demo: gr.Markdown( "# 🐕 DogeAI-v2.0-4B-Reasoning\n" "**4B reasoning model rodando em CPU no HF Space**\n\n" "Pensamento explícito interno, resposta clara externa." ) input_box = gr.Textbox( label="Pergunta", placeholder="Pergunta que exige raciocínio de verdade...", lines=4 ) output_box = gr.Textbox( label="Resposta do DogeAI", lines=14 ) run_btn = gr.Button("Pensar 🧠🐕") run_btn.click( fn=chat, inputs=input_box, outputs=output_box ) demo.launch()