Spaces:

Aryan047
/

real_event_vs_meme_classifier

Sleeping

App Files Files Community

real_event_vs_meme_classifier / app.py

Aryan047

Deploy Space with only model weights

6e7acaa 19 days ago

raw

history blame contribute delete

1.82 kB

	import re
	import torch
	import torch.nn.functional as F
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	MODEL_DIR = "bert_classifier" # folder you uploaded to the Space
	MAX_LENGTH = 128
	LABELS = {0: "🎭 Meme", 1: "📰 Real Event"}

	_URL_RE = re.compile(r"https?://\S+\|www\.\S+")
	_MENTION_RE = re.compile(r"@\w+")
	_HASHTAG_RE = re.compile(r"#")
	_NON_WORD_RE = re.compile(r"[^a-z0-9\s]")
	_WS_RE = re.compile(r"\s+")

	def clean_tweet(text: str) -> str:
	t = text.lower()
	t = _URL_RE.sub(" ", t)
	t = _MENTION_RE.sub(" ", t)
	t = _HASHTAG_RE.sub(" ", t)
	t = _NON_WORD_RE.sub(" ", t)
	return _WS_RE.sub(" ", t).strip()

	device = "cuda" if torch.cuda.is_available() else "cpu"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
	model.to(device).eval()

	@torch.no_grad()
	def classify(text: str):
	if not text.strip():
	return "Please enter a tweet."
	cleaned = clean_tweet(text)
	enc = tokenizer(cleaned, truncation=True, max_length=MAX_LENGTH, return_tensors="pt").to(device)
	probs = F.softmax(model(**enc).logits[0], dim=-1).cpu().numpy()
	pred = int(probs.argmax())
	return {
	"Label": LABELS[pred],
	"Confidence": f"{probs[pred]:.1%}",
	"P(meme)": f"{probs[0]:.1%}",
	"P(real)": f"{probs[1]:.1%}",
	}

	gr.Interface(
	fn=classify,
	inputs=gr.Textbox(lines=3, placeholder="Paste a tweet here..."),
	outputs=gr.JSON(),
	title="Meme vs Real Event Classifier",
	examples=[
	["Massive 6.5 earthquake just hit Istanbul, buildings swaying"],
	["skibidi toilet ohio rizz level 9000 fr fr 💀"],
	["AWS us-east-1 throwing 500s across the board"],
	]
	).launch()