Spaces:

jokugeorgin
/

microaggression-analyzer

Sleeping

App Files Files Community

microaggression-analyzer / app.py

jokugeorgin

Update app.py

66fb10b verified 3 months ago

raw

history blame contribute delete

5.57 kB

	import os
	os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
	os.environ.setdefault("OMP_NUM_THREADS", "2")
	os.environ.setdefault("MKL_NUM_THREADS", "2")

	import gradio as gr
	import torch
	import torch.nn.functional as F
	from transformers import (
	DebertaTokenizer,
	DebertaForSequenceClassification,
	T5Tokenizer,
	T5ForConditionalGeneration,
	)

	# keep CPU predictable
	torch.set_num_threads(2)
	torch.set_num_interop_threads(1)

	DETECT_REPO = "jokugeorgin/CI_MA_Detect"
	REFRAME_REPO = "jokugeorgin/CI_MA_Reframe"


	class MicroaggressionPipeline:
	def __init__(self):
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {self.device}")

	# ---- Load detection (DeBERTa) ----
	print("Loading detection model...")
	self.det_tok = DebertaTokenizer.from_pretrained(DETECT_REPO)
	self.det_mod = DebertaForSequenceClassification.from_pretrained(
	DETECT_REPO, num_labels=2
	).to(self.device)
	self.det_mod.eval()

	# ---- Load reframing (T5) ----
	print("Loading reframing model...")
	self.ref_tok = T5Tokenizer.from_pretrained(REFRAME_REPO)
	self.ref_mod = T5ForConditionalGeneration.from_pretrained(
	REFRAME_REPO
	).to(self.device)
	self.ref_mod.eval()

	# warm-up (tiny forward pass so first request is snappy)
	print("Warming up...")
	_ = self.analyze("hello", threshold=0.5, k=1)
	print("Ready!")

	@torch.no_grad()
	def detect(self, text: str, threshold: float = 0.5):
	enc = self.det_tok(
	text,
	max_length=128,
	truncation=True,
	padding="max_length",
	return_tensors="pt",
	)
	enc = {k: v.to(self.device) for k, v in enc.items()}
	logits = self.det_mod(**enc).logits
	probs = F.softmax(logits, dim=1)[0]
	pred_idx = int(torch.argmax(logits, dim=1))
	conf = float(probs[pred_idx])

	is_micro = bool(pred_idx) and (conf >= threshold)
	return is_micro, conf, f"LABEL_{pred_idx}"

	@torch.no_grad()
	def reframe(self, text: str, k: int = 3):
	# capped for latency on CPU
	pref = f"rephrase: {text}"
	enc = self.ref_tok(
	pref, return_tensors="pt", max_length=192, truncation=True
	)
	enc = {k: v.to(self.device) for k, v in enc.items()}
	out = self.ref_mod.generate(
	**enc,
	max_length=192,
	num_beams=4,
	num_return_sequences=max(1, min(k, 5)),
	no_repeat_ngram_size=2,
	do_sample=True,
	temperature=0.7,
	early_stopping=True,
	)
	seen = set()
	options = []
	for seq in out:
	s = self.ref_tok.decode(seq, skip_special_tokens=True).strip()
	if s and s not in seen:
	seen.add(s)
	options.append(s)
	if len(options) >= k:
	break
	while len(options) < k and options:
	options.append(options[-1])
	return options[:k]

	def analyze(self, text: str, threshold: float = 0.5, k: int = 3):
	is_micro, conf, raw_label = self.detect(text, threshold=threshold)
	options = self.reframe(text, k=k) if is_micro else []
	return is_micro, conf, raw_label, options


	PIPELINE = MicroaggressionPipeline()


	def gradio_interface(text: str, threshold: float):
	text = (text or "").strip()
	if not text:
	return "❌ Please enter some text", "", "", ""

	is_micro, conf, raw_label, options = PIPELINE.analyze(
	text, threshold=float(threshold), k=3
	)

	if is_micro:
	header = f"⚠️ Microaggression Detected \nConfidence: {conf:.1%} \nRaw label: {raw_label}"
	else:
	header = f"✅ No Microaggression Detected \nConfidence: {conf:.1%} \nRaw label: {raw_label}"

	# pad to 3 fields for the UI
	opts = (options + ["", "", ""])[:3]
	return header, opts[0], opts[1], opts[2]


	with gr.Blocks(title="Microaggression Analyzer") as demo:
	gr.Markdown("# 🔍 Microaggression Analyzer\nDetect and reframe microaggressions in text")

	with gr.Row():
	with gr.Column():
	text_in = gr.Textbox(
	label="Enter text to analyze",
	placeholder="Type or paste text...",
	lines=3,
	)
	thr = gr.Slider(
	minimum=0.3, maximum=0.9, value=0.5, step=0.1, label="Detection Threshold"
	)
	analyze_btn = gr.Button("Analyze", variant="primary")
	with gr.Column():
	result_md = gr.Markdown(label="Result")

	gr.Markdown("### Suggested Reframings")
	with gr.Row():
	opt1 = gr.Textbox(label="Option 1", lines=2)
	opt2 = gr.Textbox(label="Option 2", lines=2)
	opt3 = gr.Textbox(label="Option 3", lines=2)

	gr.Examples(
	examples=[
	["You speak good English for someone from there.", 0.5],
	["Where are you really from?", 0.5],
	["You're so articulate.", 0.5],
	],
	inputs=[text_in, thr],
	)

	analyze_btn.click(
	fn=gradio_interface,
	inputs=[text_in, thr],
	outputs=[result_md, opt1, opt2, opt3],
	# (gradio v5) optional per-event limit:
	# concurrency_limit="default"
	)

	# (gradio v5) no concurrency_count; use default_concurrency_limit if you want
	demo.queue(default_concurrency_limit=2, max_size=16)
	demo.launch(show_api=True)