Spaces:

jokugeorgin
/

microaggression-analyzer

Sleeping

App Files Files Community

jokugeorgin commited on Oct 15, 2025

Commit

66fb10b

verified ·

1 Parent(s): beb2921

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -65

app.py CHANGED Viewed

@@ -10,111 +10,142 @@ from transformers import (
     DebertaTokenizer,
     DebertaForSequenceClassification,
     T5Tokenizer,
-    T5ForConditionalGeneration
 )
 torch.set_num_threads(2)
 torch.set_num_interop_threads(1)
 class MicroaggressionPipeline:
     def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
         print("Loading detection model...")
-        self.detection_tokenizer = DebertaTokenizer.from_pretrained("jokugeorgin/CI_MA_Detect")
-        self.detection_model = DebertaForSequenceClassification.from_pretrained(
-            "jokugeorgin/CI_MA_Detect", num_labels=2
         ).to(self.device)
-        self.detection_model.eval()
         print("Loading reframing model...")
-        self.reframing_tokenizer = T5Tokenizer.from_pretrained("jokugeorgin/CI_MA_Reframe")
-        self.reframing_model = T5ForConditionalGeneration.from_pretrained(
-            "jokugeorgin/CI_MA_Reframe"
         ).to(self.device)
-        self.reframing_model.eval()
         print("Warming up...")
-        _ = self.analyze("hello", threshold=0.5)
         print("Ready!")
     @torch.no_grad()
-    def analyze(self, text, threshold=0.5, k=3):
-        enc = self.detection_tokenizer(
-            text, max_length=128, truncation=True, padding="max_length", return_tensors="pt"
         )
         enc = {k: v.to(self.device) for k, v in enc.items()}
-        logits = self.detection_model(**enc).logits
         probs = F.softmax(logits, dim=1)[0]
         pred_idx = int(torch.argmax(logits, dim=1))
-        confidence = float(probs[pred_idx])
-        is_micro = bool(pred_idx) and (confidence >= threshold)
         options = []
-        if is_micro:
-            prefixed = f"rephrase: {text}"
-            genc = self.reframing_tokenizer(
-                prefixed, return_tensors="pt", max_length=192, truncation=True
-            )
-            genc = {k: v.to(self.device) for k, v in genc.items()}
-            out = self.reframing_model.generate(
-                **genc,
-                max_length=192,
-                num_beams=4,
-                num_return_sequences=max(1, min(k, 5)),
-                no_repeat_ngram_size=2,
-                do_sample=True,
-                temperature=0.7,
-                early_stopping=True,
-            )
-            seen = set()
-            for seq in out:
-                s = self.reframing_tokenizer.decode(seq, skip_special_tokens=True).strip()
-                if s and s not in seen:
-                    seen.add(s)
-                    options.append(s)
-                if len(options) >= k:
-                    break
-            while len(options) < k and options:
-                options.append(options[-1])
-        return is_micro, confidence, options[:k]
-pipeline = MicroaggressionPipeline()
-def gradio_interface(text, threshold):
     text = (text or "").strip()
     if not text:
         return "❌ Please enter some text", "", "", ""
-    is_micro, confidence, options = pipeline.analyze(text, threshold=threshold, k=3)
-    result = (
-        f"⚠️ **Microaggression Detected**\n\nConfidence: {confidence:.1%}"
-        if is_micro else
-        f"✅ **No Microaggression Detected**\n\nConfidence: {confidence:.1%}"
     )
     opts = (options + ["", "", ""])[:3]
-    return result, opts[0], opts[1], opts[2]
 with gr.Blocks(title="Microaggression Analyzer") as demo:
     gr.Markdown("# 🔍 Microaggression Analyzer\nDetect and reframe microaggressions in text")
     with gr.Row():
         with gr.Column():
-            text_input = gr.Textbox(label="Enter text to analyze", placeholder="Type or paste text...", lines=3)
-            threshold = gr.Slider(minimum=0.3, maximum=0.9, value=0.5, step=0.1, label="Detection Threshold")
             analyze_btn = gr.Button("Analyze", variant="primary")
         with gr.Column():
-            result_output = gr.Markdown(label="Result")
     gr.Markdown("### Suggested Reframings")
     with gr.Row():
-        option1 = gr.Textbox(label="Option 1", lines=2)
-        option2 = gr.Textbox(label="Option 2", lines=2)
-        option3 = gr.Textbox(label="Option 3", lines=2)
     gr.Examples(
         examples=[
@@ -122,14 +153,17 @@ with gr.Blocks(title="Microaggression Analyzer") as demo:
             ["Where are you really from?", 0.5],
             ["You're so articulate.", 0.5],
         ],
-        inputs=[text_input, threshold],
     )
     analyze_btn.click(
         fn=gradio_interface,
-        inputs=[text_input, threshold],
-        outputs=[result_output, option1, option2, option3],
     )
-demo.queue(concurrency_count=2, max_size=16)
 demo.launch(show_api=True)

     DebertaTokenizer,
     DebertaForSequenceClassification,
     T5Tokenizer,
+    T5ForConditionalGeneration,
 )
+# keep CPU predictable
 torch.set_num_threads(2)
 torch.set_num_interop_threads(1)
+DETECT_REPO = "jokugeorgin/CI_MA_Detect"
+REFRAME_REPO = "jokugeorgin/CI_MA_Reframe"
 class MicroaggressionPipeline:
     def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
+        # ---- Load detection (DeBERTa) ----
         print("Loading detection model...")
+        self.det_tok = DebertaTokenizer.from_pretrained(DETECT_REPO)
+        self.det_mod = DebertaForSequenceClassification.from_pretrained(
+            DETECT_REPO, num_labels=2
         ).to(self.device)
+        self.det_mod.eval()
+        # ---- Load reframing (T5) ----
         print("Loading reframing model...")
+        self.ref_tok = T5Tokenizer.from_pretrained(REFRAME_REPO)
+        self.ref_mod = T5ForConditionalGeneration.from_pretrained(
+            REFRAME_REPO
         ).to(self.device)
+        self.ref_mod.eval()
+        # warm-up (tiny forward pass so first request is snappy)
         print("Warming up...")
+        _ = self.analyze("hello", threshold=0.5, k=1)
         print("Ready!")
     @torch.no_grad()
+    def detect(self, text: str, threshold: float = 0.5):
+        enc = self.det_tok(
+            text,
+            max_length=128,
+            truncation=True,
+            padding="max_length",
+            return_tensors="pt",
         )
         enc = {k: v.to(self.device) for k, v in enc.items()}
+        logits = self.det_mod(**enc).logits
         probs = F.softmax(logits, dim=1)[0]
         pred_idx = int(torch.argmax(logits, dim=1))
+        conf = float(probs[pred_idx])
+        is_micro = bool(pred_idx) and (conf >= threshold)
+        return is_micro, conf, f"LABEL_{pred_idx}"
+    @torch.no_grad()
+    def reframe(self, text: str, k: int = 3):
+        # capped for latency on CPU
+        pref = f"rephrase: {text}"
+        enc = self.ref_tok(
+            pref, return_tensors="pt", max_length=192, truncation=True
+        )
+        enc = {k: v.to(self.device) for k, v in enc.items()}
+        out = self.ref_mod.generate(
+            **enc,
+            max_length=192,
+            num_beams=4,
+            num_return_sequences=max(1, min(k, 5)),
+            no_repeat_ngram_size=2,
+            do_sample=True,
+            temperature=0.7,
+            early_stopping=True,
+        )
+        seen = set()
         options = []
+        for seq in out:
+            s = self.ref_tok.decode(seq, skip_special_tokens=True).strip()
+            if s and s not in seen:
+                seen.add(s)
+                options.append(s)
+            if len(options) >= k:
+                break
+        while len(options) < k and options:
+            options.append(options[-1])
+        return options[:k]
+    def analyze(self, text: str, threshold: float = 0.5, k: int = 3):
+        is_micro, conf, raw_label = self.detect(text, threshold=threshold)
+        options = self.reframe(text, k=k) if is_micro else []
+        return is_micro, conf, raw_label, options
+PIPELINE = MicroaggressionPipeline()
+def gradio_interface(text: str, threshold: float):
     text = (text or "").strip()
     if not text:
         return "❌ Please enter some text", "", "", ""
+    is_micro, conf, raw_label, options = PIPELINE.analyze(
+        text, threshold=float(threshold), k=3
     )
+    if is_micro:
+        header = f"⚠️ **Microaggression Detected**  \nConfidence: {conf:.1%}  \nRaw label: {raw_label}"
+    else:
+        header = f"✅ **No Microaggression Detected**  \nConfidence: {conf:.1%}  \nRaw label: {raw_label}"
+    # pad to 3 fields for the UI
     opts = (options + ["", "", ""])[:3]
+    return header, opts[0], opts[1], opts[2]
 with gr.Blocks(title="Microaggression Analyzer") as demo:
     gr.Markdown("# 🔍 Microaggression Analyzer\nDetect and reframe microaggressions in text")
     with gr.Row():
         with gr.Column():
+            text_in = gr.Textbox(
+                label="Enter text to analyze",
+                placeholder="Type or paste text...",
+                lines=3,
+            )
+            thr = gr.Slider(
+                minimum=0.3, maximum=0.9, value=0.5, step=0.1, label="Detection Threshold"
+            )
             analyze_btn = gr.Button("Analyze", variant="primary")
         with gr.Column():
+            result_md = gr.Markdown(label="Result")
     gr.Markdown("### Suggested Reframings")
     with gr.Row():
+        opt1 = gr.Textbox(label="Option 1", lines=2)
+        opt2 = gr.Textbox(label="Option 2", lines=2)
+        opt3 = gr.Textbox(label="Option 3", lines=2)
     gr.Examples(
         examples=[
             ["Where are you really from?", 0.5],
             ["You're so articulate.", 0.5],
         ],
+        inputs=[text_in, thr],
     )
     analyze_btn.click(
         fn=gradio_interface,
+        inputs=[text_in, thr],
+        outputs=[result_md, opt1, opt2, opt3],
+        # (gradio v5) optional per-event limit:
+        # concurrency_limit="default"
     )
+# (gradio v5) no concurrency_count; use default_concurrency_limit if you want
+demo.queue(default_concurrency_limit=2, max_size=16)
 demo.launch(show_api=True)