File size: 3,581 Bytes
da49834 ee3fc1f 80df02d ee3fc1f 80df02d 1bd3a70 ca723d0 da49834 80df02d 1bd3a70 9b07fbe 1bd3a70 ee3fc1f da49834 ca723d0 2b86145 da49834 80df02d 9b07fbe ca723d0 9b07fbe ca723d0 ee3fc1f ca723d0 da49834 5e2ce1e 0e34121 80df02d ee3fc1f 1bd3a70 0e34121 ee3fc1f 0e34121 ca723d0 ee3fc1f 5e2ce1e 545b229 ee3fc1f 5e2ce1e 6466616 5e2ce1e ee3fc1f 5e2ce1e ee3fc1f 1bd3a70 0e34121 9b07fbe 0e34121 1bd3a70 9b07fbe 1bd3a70 9b07fbe 1bd3a70 0e34121 1bd3a70 0e34121 1bd3a70 9b07fbe 0e34121 1bd3a70 0e34121 1bd3a70 0e34121 1bd3a70 0e34121 1bd3a70 9b07fbe 0e34121 9b07fbe 1bd3a70 80df02d 0e34121 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
# =========================
# CONFIG
# =========================
MODEL_ID = "google/gemma-2-2b-it"
HF_TOKEN = os.environ.get("HF_TOKEN")
# =========================
# LOAD TOKENIZER
# =========================
print("🔄 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
token=HF_TOKEN,
use_fast=False
)
# =========================
# LOAD MODEL
# =========================
print("🔄 Loading model...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
token=HF_TOKEN,
torch_dtype=torch.float32, # pode trocar pra bfloat16 se tiver GPU
device_map="auto"
)
model.eval()
# =========================
# CHAT FUNCTION
# =========================
def chat(user_input, system_prompt, temperature, top_p, max_tokens):
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input},
]
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(
prompt,
return_tensors="pt"
).to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=int(max_tokens),
do_sample=True,
temperature=float(temperature),
top_p=float(top_p),
repetition_penalty=1.1,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
generated_tokens = output[0][inputs["input_ids"].shape[-1]:]
decoded = tokenizer.decode(
generated_tokens,
skip_special_tokens=True
)
return decoded.strip()
# =========================
# GRADIO UI
# =========================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# 🐕 DogeAI v1.0
Modelo **experimental**, pequeno e focado em respostas claras.
⚠️ Pode errar ou alucinar
❌ Não use como fonte confiável
✅ Use para estudo e experimentação
"""
)
with gr.Row():
with gr.Column(scale=3):
user_input = gr.Textbox(
lines=5,
label="Mensagem",
placeholder="Converse com o DogeAI 🐶"
)
submit = gr.Button("Enviar 🚀")
output = gr.Textbox(
lines=14,
label="Resposta do modelo"
)
with gr.Column(scale=2):
system_prompt = gr.Textbox(
lines=6,
value=(
"Você é o DogeAI, um modelo experimental e honesto. "
"Se não souber algo, diga claramente que não sabe. "
"Não invente fatos."
),
label="System Prompt"
)
gr.Markdown("### ⚙️ Hiperparâmetros")
temperature = gr.Slider(
0.2, 1.5, value=0.7, step=0.05, label="Temperature"
)
top_p = gr.Slider(
0.3, 1.0, value=0.9, step=0.05, label="Top-p"
)
max_tokens = gr.Slider(
32, 512, value=200, step=8, label="Max tokens"
)
submit.click(
chat,
inputs=[
user_input,
system_prompt,
temperature,
top_p,
max_tokens
],
outputs=output
)
demo.launch() |