File size: 2,753 Bytes
8891bca 6e7ae54 8891bca 7a55e65 34902a4 9a3be1e 8891bca 6e7ae54 8891bca 7a55e65 02f0452 7a55e65 6e7ae54 480ba11 02f0452 6e7ae54 7a55e65 6e7ae54 8891bca 7a55e65 02f0452 7a55e65 02f0452 8891bca 02f0452 8891bca 6e7ae54 7a55e65 6e7ae54 8891bca 7a55e65 02f0452 7a55e65 6e7ae54 7a55e65 02f0452 7a55e65 6e7ae54 7a55e65 6e7ae54 7a55e65 02f0452 7a55e65 02f0452 7a55e65 02f0452 7a55e65 02f0452 6e7ae54 7a55e65 02f0452 7a55e65 6e7ae54 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# =========================
# CONFIG
# =========================
MODEL_ID = "AxionLab-Co/DogeAI-v2.0-4B-Reasoning"
MAX_NEW_TOKENS = 256 # menor = menos timeout em CPU
tokenizer = None
model = None
# =========================
# LOAD MODEL (LAZY + SAFE)
# =========================
def load_model():
global tokenizer, model
if model is None:
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
use_fast=True
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="cpu",
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
model.eval()
return tokenizer, model
# =========================
# PROMPT (CPU-FRIENDLY)
# =========================
def build_prompt(user_input: str) -> str:
return f"""You are DogeAI-v2.0-4B-Reasoning.
Think step by step internally.
Do not reveal your full chain-of-thought.
Provide a clear final answer with a short explanation.
If the user speaks Brazilian Portuguese:
- use Brazilian slang lightly
- keep the Doge vibe 🐕🇧🇷
- stay serious and logical
User:
{user_input}
Assistant:
"""
# =========================
# CHAT FUNCTION (SSE-SAFE)
# =========================
def chat(user_input):
tokenizer, model = load_model()
# mantém o SSE vivo imediatamente
yield "🤔 DogeAI está pensando... segura aí..."
prompt = build_prompt(user_input)
inputs = tokenizer(
prompt,
return_tensors="pt"
)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=MAX_NEW_TOKENS,
temperature=0.7,
top_p=0.9,
do_sample=True
)
text = tokenizer.decode(
output[0],
skip_special_tokens=True
)
# remove o prompt da resposta final
response = text.split("Assistant:", 1)[-1].strip()
yield response
# =========================
# GRADIO UI
# =========================
with gr.Blocks(title="DogeAI-v2.0-4B-Reasoning") as demo:
gr.Markdown(
"# 🐕 DogeAI-v2.0-4B-Reasoning\n"
"**4B reasoning model rodando em CPU no HF Space**\n\n"
"Pensamento explícito interno, resposta clara externa."
)
input_box = gr.Textbox(
label="Pergunta",
placeholder="Pergunta que exige raciocínio de verdade...",
lines=4
)
output_box = gr.Textbox(
label="Resposta do DogeAI",
lines=14
)
run_btn = gr.Button("Pensar 🧠🐕")
run_btn.click(
fn=chat,
inputs=input_box,
outputs=output_box
)
demo.launch() |