| import re |
| import torch |
| import torch.nn.functional as F |
| import gradio as gr |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
| MODEL_DIR = "bert_classifier" |
| MAX_LENGTH = 128 |
| LABELS = {0: "π Meme", 1: "π° Real Event"} |
|
|
| _URL_RE = re.compile(r"https?://\S+|www\.\S+") |
| _MENTION_RE = re.compile(r"@\w+") |
| _HASHTAG_RE = re.compile(r"#") |
| _NON_WORD_RE = re.compile(r"[^a-z0-9\s]") |
| _WS_RE = re.compile(r"\s+") |
|
|
| def clean_tweet(text: str) -> str: |
| t = text.lower() |
| t = _URL_RE.sub(" ", t) |
| t = _MENTION_RE.sub(" ", t) |
| t = _HASHTAG_RE.sub(" ", t) |
| t = _NON_WORD_RE.sub(" ", t) |
| return _WS_RE.sub(" ", t).strip() |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR) |
| model.to(device).eval() |
|
|
| @torch.no_grad() |
| def classify(text: str): |
| if not text.strip(): |
| return "Please enter a tweet." |
| cleaned = clean_tweet(text) |
| enc = tokenizer(cleaned, truncation=True, max_length=MAX_LENGTH, return_tensors="pt").to(device) |
| probs = F.softmax(model(**enc).logits[0], dim=-1).cpu().numpy() |
| pred = int(probs.argmax()) |
| return { |
| "Label": LABELS[pred], |
| "Confidence": f"{probs[pred]:.1%}", |
| "P(meme)": f"{probs[0]:.1%}", |
| "P(real)": f"{probs[1]:.1%}", |
| } |
|
|
| gr.Interface( |
| fn=classify, |
| inputs=gr.Textbox(lines=3, placeholder="Paste a tweet here..."), |
| outputs=gr.JSON(), |
| title="Meme vs Real Event Classifier", |
| examples=[ |
| ["Massive 6.5 earthquake just hit Istanbul, buildings swaying"], |
| ["skibidi toilet ohio rizz level 9000 fr fr π"], |
| ["AWS us-east-1 throwing 500s across the board"], |
| ] |
| ).launch() |