Spaces:

nit454
/

CBDS

Runtime error

App Files Files Community

nit454 commited on Oct 15, 2025

Commit

fb4632b

verified ·

1 Parent(s): 275a605

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -19

app.py CHANGED Viewed

@@ -5,20 +5,29 @@ import easyocr
 from PIL import Image
 import numpy as np
-# Set up model and labels
 MODEL_NAME = "cardiffnlp/twitter-roberta-base-hate-multiclass-latest"
 LABELS = [
-    "sexism",             # 0
-    "racism",             # 1
-    "disability",         # 2
-    "sexual_orientation", # 3
-    "religion",           # 4
-    "other",              # 5
-    "not_hate"            # 6
 ]
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
 reader = easyocr.Reader(['en'])
 def classify_text(text):
@@ -31,39 +40,63 @@ def classify_text(text):
         confidence = float(probs[0][pred])
         return LABELS[pred], confidence
 def ocr_extract(image):
-    # Convert to numpy if Image
     if isinstance(image, Image.Image):
         image = np.array(image)
     result = reader.readtext(image, detail=0)
     return ' '.join(result)
 def chatbot(image=None, text=None):
-    # Prioritize image
     if image is not None:
         extracted = ocr_extract(image)
         if not extracted.strip():
-            return "No text found in image.", None
         label, confidence = classify_text(extracted)
-        return f"OCR Extracted: {extracted}\nPrediction: {label} (Confidence: {confidence:.2f})", label
     elif text and text.strip():
         label, confidence = classify_text(text)
-        return f"Text: {text}\nPrediction: {label} (Confidence: {confidence:.2f})", label
     else:
-        return "Please provide an image or some text.", None
 iface = gr.Interface(
     fn=chatbot,
     inputs=[
         gr.Image(type="pil", label="Upload Screenshot (optional)"),
-        gr.Textbox(lines=2, placeholder="Or, type/paste text here")
     ],
     outputs=[
-        gr.Textbox(label="Prediction & OCR"),
-        gr.Label(num_top_classes=7)
     ],
-    title="Multiclass Hate Speech Classifier (with OCR)",
-    description="Detects: sexism, racism, disability, sexual_orientation, religion, other, not_hate. Enter text or upload screenshot."
 )
 if __name__ == "__main__":

 from PIL import Image
 import numpy as np
+# Hate Speech model (example uses base CardiffNLP + extended labels for demonstration)
 MODEL_NAME = "cardiffnlp/twitter-roberta-base-hate-multiclass-latest"
 LABELS = [
+    "sexism",
+    "racism",
+    "disability",
+    "sexual_orientation",
+    "religion",
+    "abusive_words",  # added label - simulation only
+    "threat",         # added label - simulation only
+    "harassment",     # added label - simulation only
+    "sarcastic",      # added label - simulation only; we'll do actual sarcasm detection via separate model
+    "not_hate"
 ]
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
+# Sarcasm Detection model (example pretrained; replace with your actual sarcasm model)
+SARCASM_MODEL_NAME = "microsoft/deberta-base-sarcasm"  # example, replace if unavailable
+sarcasm_tokenizer = AutoTokenizer.from_pretrained(SARCASM_MODEL_NAME)
+sarcasm_model = AutoModelForSequenceClassification.from_pretrained(SARCASM_MODEL_NAME)
 reader = easyocr.Reader(['en'])
 def classify_text(text):
         confidence = float(probs[0][pred])
         return LABELS[pred], confidence
+def is_sarcastic(text):
+    inputs = sarcasm_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        outputs = sarcasm_model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        # assuming label 1 means sarcastic; adjust if needed
+        sarcasm_prob = probs[0][1].item()
+        return sarcasm_prob > 0.5, sarcasm_prob
 def ocr_extract(image):
     if isinstance(image, Image.Image):
         image = np.array(image)
     result = reader.readtext(image, detail=0)
     return ' '.join(result)
 def chatbot(image=None, text=None):
+    # Priority: image with OCR, else text box
     if image is not None:
         extracted = ocr_extract(image)
         if not extracted.strip():
+            return "No text found in image.", None, None
         label, confidence = classify_text(extracted)
+        sarcastic, sarcasm_prob = is_sarcastic(extracted)
+        sarcasm_text = "Yes" if sarcastic else "No"
+        return (
+            f"OCR Extracted: {extracted}\nPrediction: {label} (Confidence: {confidence:.2f})\nSarcasm: {sarcasm_text} (Prob: {sarcasm_prob:.2f})",
+            label,
+            sarcasm_text
+        )
     elif text and text.strip():
         label, confidence = classify_text(text)
+        sarcastic, sarcasm_prob = is_sarcastic(text)
+        sarcasm_text = "Yes" if sarcastic else "No"
+        return (
+            f"Text: {text}\nPrediction: {label} (Confidence: {confidence:.2f})\nSarcasm: {sarcasm_text} (Prob: {sarcasm_prob:.2f})",
+            label,
+            sarcasm_text
+        )
     else:
+        return "Please provide an image or some text.", None, None
 iface = gr.Interface(
     fn=chatbot,
     inputs=[
         gr.Image(type="pil", label="Upload Screenshot (optional)"),
+        gr.Textbox(lines=3, placeholder="Or, type/paste text here")
     ],
     outputs=[
+        gr.Textbox(label="Prediction & Sarcasm Detection"),
+        gr.Label(num_top_classes=len(LABELS), label="Hate Speech Class"),
+        gr.Label(num_top_classes=2, label="Sarcasm")
     ],
+    title="Multiclass Hate Speech + Sarcasm Detection Chatbot",
+    description="""
+    Classifies text (or text extracted from image) into hate speech categories including abusive words,
+    threat, harassment, and detects sarcasm separately. Enter text or upload an image screenshot.
+    """
 )
 if __name__ == "__main__":