AxionLab-official's picture
Update app.py
5e2ce1e verified
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
# =========================
# CONFIG
# =========================
MODEL_ID = "google/gemma-2-2b-it"
HF_TOKEN = os.environ.get("HF_TOKEN")
# =========================
# LOAD TOKENIZER
# =========================
print("🔄 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
token=HF_TOKEN,
use_fast=False
)
# =========================
# LOAD MODEL
# =========================
print("🔄 Loading model...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
token=HF_TOKEN,
torch_dtype=torch.float32, # pode trocar pra bfloat16 se tiver GPU
device_map="auto"
)
model.eval()
# =========================
# CHAT FUNCTION
# =========================
def chat(user_input, system_prompt, temperature, top_p, max_tokens):
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input},
]
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(
prompt,
return_tensors="pt"
).to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=int(max_tokens),
do_sample=True,
temperature=float(temperature),
top_p=float(top_p),
repetition_penalty=1.1,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
generated_tokens = output[0][inputs["input_ids"].shape[-1]:]
decoded = tokenizer.decode(
generated_tokens,
skip_special_tokens=True
)
return decoded.strip()
# =========================
# GRADIO UI
# =========================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# 🐕 DogeAI v1.0
Modelo **experimental**, pequeno e focado em respostas claras.
⚠️ Pode errar ou alucinar
❌ Não use como fonte confiável
✅ Use para estudo e experimentação
"""
)
with gr.Row():
with gr.Column(scale=3):
user_input = gr.Textbox(
lines=5,
label="Mensagem",
placeholder="Converse com o DogeAI 🐶"
)
submit = gr.Button("Enviar 🚀")
output = gr.Textbox(
lines=14,
label="Resposta do modelo"
)
with gr.Column(scale=2):
system_prompt = gr.Textbox(
lines=6,
value=(
"Você é o DogeAI, um modelo experimental e honesto. "
"Se não souber algo, diga claramente que não sabe. "
"Não invente fatos."
),
label="System Prompt"
)
gr.Markdown("### ⚙️ Hiperparâmetros")
temperature = gr.Slider(
0.2, 1.5, value=0.7, step=0.05, label="Temperature"
)
top_p = gr.Slider(
0.3, 1.0, value=0.9, step=0.05, label="Top-p"
)
max_tokens = gr.Slider(
32, 512, value=200, step=8, label="Max tokens"
)
submit.click(
chat,
inputs=[
user_input,
system_prompt,
temperature,
top_p,
max_tokens
],
outputs=output
)
demo.launch()