AxionLab-official's picture
Update app.py
9a3be1e verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# =========================
# CONFIG
# =========================
MODEL_ID = "AxionLab-Co/DogeAI-v2.0-4B-Reasoning"
MAX_NEW_TOKENS = 256 # menor = menos timeout em CPU
tokenizer = None
model = None
# =========================
# LOAD MODEL (LAZY + SAFE)
# =========================
def load_model():
global tokenizer, model
if model is None:
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
use_fast=True
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="cpu",
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
model.eval()
return tokenizer, model
# =========================
# PROMPT (CPU-FRIENDLY)
# =========================
def build_prompt(user_input: str) -> str:
return f"""You are DogeAI-v2.0-4B-Reasoning.
Think step by step internally.
Do not reveal your full chain-of-thought.
Provide a clear final answer with a short explanation.
If the user speaks Brazilian Portuguese:
- use Brazilian slang lightly
- keep the Doge vibe 🐕🇧🇷
- stay serious and logical
User:
{user_input}
Assistant:
"""
# =========================
# CHAT FUNCTION (SSE-SAFE)
# =========================
def chat(user_input):
tokenizer, model = load_model()
# mantém o SSE vivo imediatamente
yield "🤔 DogeAI está pensando... segura aí..."
prompt = build_prompt(user_input)
inputs = tokenizer(
prompt,
return_tensors="pt"
)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=MAX_NEW_TOKENS,
temperature=0.7,
top_p=0.9,
do_sample=True
)
text = tokenizer.decode(
output[0],
skip_special_tokens=True
)
# remove o prompt da resposta final
response = text.split("Assistant:", 1)[-1].strip()
yield response
# =========================
# GRADIO UI
# =========================
with gr.Blocks(title="DogeAI-v2.0-4B-Reasoning") as demo:
gr.Markdown(
"# 🐕 DogeAI-v2.0-4B-Reasoning\n"
"**4B reasoning model rodando em CPU no HF Space**\n\n"
"Pensamento explícito interno, resposta clara externa."
)
input_box = gr.Textbox(
label="Pergunta",
placeholder="Pergunta que exige raciocínio de verdade...",
lines=4
)
output_box = gr.Textbox(
label="Resposta do DogeAI",
lines=14
)
run_btn = gr.Button("Pensar 🧠🐕")
run_btn.click(
fn=chat,
inputs=input_box,
outputs=output_box
)
demo.launch()