import re import torch import torch.nn.functional as F import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification MODEL_DIR = "bert_classifier" # folder you uploaded to the Space MAX_LENGTH = 128 LABELS = {0: "🎭 Meme", 1: "📰 Real Event"} _URL_RE = re.compile(r"https?://\S+|www\.\S+") _MENTION_RE = re.compile(r"@\w+") _HASHTAG_RE = re.compile(r"#") _NON_WORD_RE = re.compile(r"[^a-z0-9\s]") _WS_RE = re.compile(r"\s+") def clean_tweet(text: str) -> str: t = text.lower() t = _URL_RE.sub(" ", t) t = _MENTION_RE.sub(" ", t) t = _HASHTAG_RE.sub(" ", t) t = _NON_WORD_RE.sub(" ", t) return _WS_RE.sub(" ", t).strip() device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR) model.to(device).eval() @torch.no_grad() def classify(text: str): if not text.strip(): return "Please enter a tweet." cleaned = clean_tweet(text) enc = tokenizer(cleaned, truncation=True, max_length=MAX_LENGTH, return_tensors="pt").to(device) probs = F.softmax(model(**enc).logits[0], dim=-1).cpu().numpy() pred = int(probs.argmax()) return { "Label": LABELS[pred], "Confidence": f"{probs[pred]:.1%}", "P(meme)": f"{probs[0]:.1%}", "P(real)": f"{probs[1]:.1%}", } gr.Interface( fn=classify, inputs=gr.Textbox(lines=3, placeholder="Paste a tweet here..."), outputs=gr.JSON(), title="Meme vs Real Event Classifier", examples=[ ["Massive 6.5 earthquake just hit Istanbul, buildings swaying"], ["skibidi toilet ohio rizz level 9000 fr fr 💀"], ["AWS us-east-1 throwing 500s across the board"], ] ).launch()