|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_ID = "AxionLab-Co/DogeAI-v2.0-4B-Reasoning" |
|
|
MAX_NEW_TOKENS = 256 |
|
|
|
|
|
tokenizer = None |
|
|
model = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_model(): |
|
|
global tokenizer, model |
|
|
|
|
|
if model is None: |
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
MODEL_ID, |
|
|
use_fast=True |
|
|
) |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, |
|
|
device_map="cpu", |
|
|
torch_dtype=torch.float32, |
|
|
low_cpu_mem_usage=True |
|
|
) |
|
|
|
|
|
model.eval() |
|
|
|
|
|
return tokenizer, model |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_prompt(user_input: str) -> str: |
|
|
return f"""You are DogeAI-v2.0-4B-Reasoning. |
|
|
|
|
|
Think step by step internally. |
|
|
Do not reveal your full chain-of-thought. |
|
|
Provide a clear final answer with a short explanation. |
|
|
|
|
|
If the user speaks Brazilian Portuguese: |
|
|
- use Brazilian slang lightly |
|
|
- keep the Doge vibe 🐕🇧🇷 |
|
|
- stay serious and logical |
|
|
|
|
|
User: |
|
|
{user_input} |
|
|
|
|
|
Assistant: |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chat(user_input): |
|
|
tokenizer, model = load_model() |
|
|
|
|
|
|
|
|
yield "🤔 DogeAI está pensando... segura aí..." |
|
|
|
|
|
prompt = build_prompt(user_input) |
|
|
|
|
|
inputs = tokenizer( |
|
|
prompt, |
|
|
return_tensors="pt" |
|
|
) |
|
|
|
|
|
with torch.no_grad(): |
|
|
output = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=MAX_NEW_TOKENS, |
|
|
temperature=0.7, |
|
|
top_p=0.9, |
|
|
do_sample=True |
|
|
) |
|
|
|
|
|
text = tokenizer.decode( |
|
|
output[0], |
|
|
skip_special_tokens=True |
|
|
) |
|
|
|
|
|
|
|
|
response = text.split("Assistant:", 1)[-1].strip() |
|
|
|
|
|
yield response |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="DogeAI-v2.0-4B-Reasoning") as demo: |
|
|
gr.Markdown( |
|
|
"# 🐕 DogeAI-v2.0-4B-Reasoning\n" |
|
|
"**4B reasoning model rodando em CPU no HF Space**\n\n" |
|
|
"Pensamento explícito interno, resposta clara externa." |
|
|
) |
|
|
|
|
|
input_box = gr.Textbox( |
|
|
label="Pergunta", |
|
|
placeholder="Pergunta que exige raciocínio de verdade...", |
|
|
lines=4 |
|
|
) |
|
|
|
|
|
output_box = gr.Textbox( |
|
|
label="Resposta do DogeAI", |
|
|
lines=14 |
|
|
) |
|
|
|
|
|
run_btn = gr.Button("Pensar 🧠🐕") |
|
|
|
|
|
run_btn.click( |
|
|
fn=chat, |
|
|
inputs=input_box, |
|
|
outputs=output_box |
|
|
) |
|
|
|
|
|
demo.launch() |