# Gerado com IA import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # ========================= # CONFIG # ========================= MODEL_ID = "CromIA/MicroLM2-1M" # ========================= # LOAD MODEL # ========================= tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained(MODEL_ID) model.eval() device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) # ========================= # GENERATE FUNCTION # ========================= def generate_text(prompt, max_new_tokens, temperature, top_p): inputs = tokenizer(prompt, return_tensors="pt").to(device) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=float(temperature), top_p=float(top_p), repetition_penalty=1.1, pad_token_id=tokenizer.eos_token_id ) return tokenizer.decode(output[0], skip_special_tokens=True) # ========================= # UI # ========================= demo = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(lines=3, placeholder="Digite um prompt..."), gr.Slider(10, 200, value=80, label="Max new tokens"), gr.Slider(0.1, 1.5, value=0.8, label="Temperature"), gr.Slider(0.5, 1.0, value=0.95, label="Top-p"), ], outputs=gr.Textbox(label="Output"), title="MicroLM2-1M", description="Modelo de linguagem leve (~1M parĂ¢metros) treinado em 4.5B tokens." ) # ========================= # RUN # ========================= if __name__ == "__main__": demo.launch()