Spaces:
Sleeping
Sleeping
File size: 1,740 Bytes
d01ca98 b5cd69e 5a51796 b5cd69e d01ca98 b5cd69e d01ca98 b5cd69e d01ca98 b5cd69e d01ca98 b5cd69e d01ca98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import os
import time
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
HF_TOKEN = os.getenv("hf_token")
MODEL_ID = "ruSpamModels/ruSpam-Qwen-0.5B-50k"
device = "cuda" if torch.cuda.is_available() else "cpu"
base_model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-0.5B-Instruct",
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map=device,
trust_remote_code=True,
token=HF_TOKEN,
)
model = PeftModel.from_pretrained(
base_model,
MODEL_ID,
token=HF_TOKEN,
)
model.eval()
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
token=HF_TOKEN,
)
def classify(message):
prompt = (
"You are a spam classifier.\n"
"Answer with one word: spam or ham.\n\n"
f"Message:\n{message}\n\n"
"Answer:"
)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
start = time.time()
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=1,
do_sample=False,
temperature=0.01,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
elapsed = (time.time() - start) * 1000
new_token_id = out[0, inputs["input_ids"].shape[1]]
answer = tokenizer.decode(new_token_id).strip().lower()
if answer.startswith("spam"):
label = "SPAM"
elif answer.startswith("ham"):
label = "HAM"
else:
label = "UNKNOWN"
return f"{label} ({elapsed:.1f} ms)"
iface = gr.Interface(
fn=classify,
inputs=gr.Textbox(lines=4),
outputs=gr.Textbox(),
title="ruSpam Qwen 0.5B",
)
iface.launch()
|