import os import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # ========================= # CONFIG # ========================= MODEL_ID = "google/gemma-2-2b-it" HF_TOKEN = os.environ.get("HF_TOKEN") # ========================= # LOAD TOKENIZER # ========================= print("🔄 Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, token=HF_TOKEN, use_fast=False ) # ========================= # LOAD MODEL # ========================= print("🔄 Loading model...") model = AutoModelForCausalLM.from_pretrained( MODEL_ID, token=HF_TOKEN, torch_dtype=torch.float32, # pode trocar pra bfloat16 se tiver GPU device_map="auto" ) model.eval() # ========================= # CHAT FUNCTION # ========================= def chat(user_input, system_prompt, temperature, top_p, max_tokens): messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_input}, ] prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer( prompt, return_tensors="pt" ).to(model.device) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=int(max_tokens), do_sample=True, temperature=float(temperature), top_p=float(top_p), repetition_penalty=1.1, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id ) generated_tokens = output[0][inputs["input_ids"].shape[-1]:] decoded = tokenizer.decode( generated_tokens, skip_special_tokens=True ) return decoded.strip() # ========================= # GRADIO UI # ========================= with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( """ # 🐕 DogeAI v1.0 Modelo **experimental**, pequeno e focado em respostas claras. ⚠️ Pode errar ou alucinar ❌ Não use como fonte confiável ✅ Use para estudo e experimentação """ ) with gr.Row(): with gr.Column(scale=3): user_input = gr.Textbox( lines=5, label="Mensagem", placeholder="Converse com o DogeAI 🐶" ) submit = gr.Button("Enviar 🚀") output = gr.Textbox( lines=14, label="Resposta do modelo" ) with gr.Column(scale=2): system_prompt = gr.Textbox( lines=6, value=( "Você é o DogeAI, um modelo experimental e honesto. " "Se não souber algo, diga claramente que não sabe. " "Não invente fatos." ), label="System Prompt" ) gr.Markdown("### ⚙️ Hiperparâmetros") temperature = gr.Slider( 0.2, 1.5, value=0.7, step=0.05, label="Temperature" ) top_p = gr.Slider( 0.3, 1.0, value=0.9, step=0.05, label="Top-p" ) max_tokens = gr.Slider( 32, 512, value=200, step=8, label="Max tokens" ) submit.click( chat, inputs=[ user_input, system_prompt, temperature, top_p, max_tokens ], outputs=output ) demo.launch()