import os import time import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel HF_TOKEN = os.getenv("hf_token") MODEL_ID = "ruSpamModels/ruSpam-Qwen-0.5B-50k" device = "cuda" if torch.cuda.is_available() else "cpu" base_model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen2.5-0.5B-Instruct", torch_dtype=torch.float16 if device == "cuda" else torch.float32, device_map=device, trust_remote_code=True, token=HF_TOKEN, ) model = PeftModel.from_pretrained( base_model, MODEL_ID, token=HF_TOKEN, ) model.eval() tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, token=HF_TOKEN, ) def classify(message): prompt = ( "You are a spam classifier.\n" "Answer with one word: spam or ham.\n\n" f"Message:\n{message}\n\n" "Answer:" ) inputs = tokenizer(prompt, return_tensors="pt").to(device) start = time.time() with torch.no_grad(): out = model.generate( **inputs, max_new_tokens=1, do_sample=False, temperature=0.01, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, ) elapsed = (time.time() - start) * 1000 new_token_id = out[0, inputs["input_ids"].shape[1]] answer = tokenizer.decode(new_token_id).strip().lower() if answer.startswith("spam"): label = "SPAM" elif answer.startswith("ham"): label = "HAM" else: label = "UNKNOWN" return f"{label} ({elapsed:.1f} ms)" iface = gr.Interface( fn=classify, inputs=gr.Textbox(lines=4), outputs=gr.Textbox(), title="ruSpam Qwen 0.5B", ) iface.launch()