nit454 commited on
Commit
06b22b6
Β·
verified Β·
1 Parent(s): 7ba1745

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -41
app.py CHANGED
@@ -1,18 +1,21 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
- import easyocr
5
- from PIL import Image
6
- import numpy as np
7
 
8
- # Hate Speech Model and Labels
9
- HATE_MODEL_NAME = "cardiffnlp/twitter-roberta-base-hate-multiclass-latest"
10
- HATE_LABELS = [
 
 
 
 
 
 
 
 
 
 
11
  "sexism",
12
- "racism",
13
- "disability",
14
- "sexual_orientation",
15
- "religion",
16
  "other",
17
  "not_hate"
18
  ]
@@ -20,49 +23,46 @@ HATE_LABELS = [
20
  hate_tokenizer = AutoTokenizer.from_pretrained(HATE_MODEL_NAME)
21
  hate_model = AutoModelForSequenceClassification.from_pretrained(HATE_MODEL_NAME)
22
 
23
- reader = easyocr.Reader(['en'], gpu=False)
 
 
 
 
 
 
 
24
 
25
- def classify_text(text):
26
  inputs = hate_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
27
  with torch.no_grad():
28
  outputs = hate_model(**inputs)
29
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
30
  pred = torch.argmax(probs).item()
31
  confidence = float(probs[0][pred])
32
- return HATE_LABELS[pred], confidence
 
 
 
 
33
 
34
- def ocr_extract(image):
35
- if isinstance(image, Image.Image):
36
- image = np.array(image)
37
- result = reader.readtext(image, detail=0)
38
- return ' '.join(result)
39
 
40
- def chatbot(image=None, text=None):
41
- if image is not None:
42
- extracted = ocr_extract(image)
43
- if not extracted.strip():
44
- return "No text found in image.", None
45
- label, confidence = classify_text(extracted)
46
- return f"OCR Extracted: {extracted}\nHate Speech: {label} (Confidence: {confidence:.2f})", label
47
- elif text and text.strip():
48
- label, confidence = classify_text(text)
49
- return f"Text: {text}\nHate Speech: {label} (Confidence: {confidence:.2f})", label
50
- else:
51
- return "Please provide an image or some text.", None
52
 
53
  iface = gr.Interface(
54
  fn=chatbot,
55
- inputs=[
56
- gr.Image(type="pil", label="Upload Screenshot (optional)"),
57
- gr.Textbox(lines=3, placeholder="Or, type/paste text here")
58
- ],
59
- outputs=[
60
- gr.Textbox(label="Prediction"),
61
- gr.Label(num_top_classes=len(HATE_LABELS), label="Hate Speech Class"),
62
- ],
63
- title="Hate Speech Detection Chatbot",
64
- description="Detects hate speech categories from text or screenshots."
65
  )
66
 
67
  if __name__ == "__main__":
68
- iface.launch()
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
 
 
 
4
 
5
+ # Sarcasm detection model (public and reliable)
6
+ SARCASM_MODEL_NAME = "j-hartmann/emotion-english-distilroberta-base"
7
+ sarcasm_labels = ["not sarcastic", "sarcastic"] # simplified mapping
8
+
9
+ sarcasm_tokenizer = AutoTokenizer.from_pretrained(SARCASM_MODEL_NAME)
10
+ sarcasm_model = AutoModelForSequenceClassification.from_pretrained(SARCASM_MODEL_NAME)
11
+
12
+ # Hate speech classification DeBERTa model fine-tuned for your labels (hypothetical model)
13
+ HATE_MODEL_NAME = "your-username/deberta-hate-speech-custom" # replace with your actual fine-tuned model
14
+ hate_labels = [
15
+ "abusive_words",
16
+ "harassment",
17
+ "religious_hate",
18
  "sexism",
 
 
 
 
19
  "other",
20
  "not_hate"
21
  ]
 
23
  hate_tokenizer = AutoTokenizer.from_pretrained(HATE_MODEL_NAME)
24
  hate_model = AutoModelForSequenceClassification.from_pretrained(HATE_MODEL_NAME)
25
 
26
+ def detect_sarcasm(text):
27
+ inputs = sarcasm_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
28
+ with torch.no_grad():
29
+ outputs = sarcasm_model(**inputs)
30
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
31
+ pred = torch.argmax(probs).item()
32
+ confidence = float(probs[0][pred])
33
+ return sarcasm_labels[pred], confidence
34
 
35
+ def classify_hate(text):
36
  inputs = hate_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
37
  with torch.no_grad():
38
  outputs = hate_model(**inputs)
39
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
40
  pred = torch.argmax(probs).item()
41
  confidence = float(probs[0][pred])
42
+ return hate_labels[pred], confidence
43
+
44
+ def chatbot(text):
45
+ if not text or not text.strip():
46
+ return "Please enter text to analyze."
47
 
48
+ sarcasm_label, sarcasm_conf = detect_sarcasm(text)
49
+ if sarcasm_label == "sarcastic":
50
+ return f"Text is detected as SARCASTIC (Confidence: {sarcasm_conf:.2f}). Hate speech classification is skipped."
 
 
51
 
52
+ hate_label, hate_conf = classify_hate(text)
53
+ return (
54
+ f"Text is NOT sarcastic.\n"
55
+ f"Hate Speech Classification: {hate_label} (Confidence: {hate_conf:.2f})"
56
+ )
 
 
 
 
 
 
 
57
 
58
  iface = gr.Interface(
59
  fn=chatbot,
60
+ inputs=gr.Textbox(lines=3, placeholder="Enter text"),
61
+ outputs="text",
62
+ title="Sarcasm-aware Hate Speech Classifier",
63
+ description="""First detects sarcasm, and if no sarcasm, classifies hate speech into
64
+ detailed categories: abusive words, harassment (e.g., body shaming), religious hate, sexism, etc."""
 
 
 
 
 
65
  )
66
 
67
  if __name__ == "__main__":
68
+ iface.launch()