nit454 commited on
Commit
beff589
·
verified ·
1 Parent(s): e532328

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -89
app.py CHANGED
@@ -1,101 +1,45 @@
1
  import gradio as gr
2
- import torch
3
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
- import easyocr
5
- from PIL import Image
6
- import numpy as np
7
-
8
- # -------------------------------
9
- # MODEL: CardiffNLP RoBERTa Hate Classifier
10
- # -------------------------------
11
- MODEL_NAME = "cardiffnlp/twitter-roberta-base-hate-multiclass-latest"
12
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
14
-
15
- LABELS = ["sexism", "racism", "disability", "sexual_orientation", "religion", "other", "not_hate"]
16
 
17
- # -------------------------------
18
- # OCR Setup
19
- # -------------------------------
20
- reader = easyocr.Reader(["en"], gpu=False)
 
 
 
 
 
 
 
21
 
22
- def extract_text(image):
23
- """Extract text from uploaded image using EasyOCR"""
24
- if image is None:
25
- return ""
26
- if isinstance(image, Image.Image):
27
- image = np.array(image)
28
- result = reader.readtext(image, detail=0)
29
- return " ".join(result)
30
 
31
- # -------------------------------
32
- # CLASSIFICATION LOGIC
33
- # -------------------------------
34
  def classify_text(text):
35
- """Classify input text using RoBERTa model"""
36
- if not text.strip():
37
- return "No text found for analysis.", None
38
- inputs = tokenizer(text, return_tensors="pt", truncation=True)
39
  with torch.no_grad():
40
- logits = model(**inputs).logits
 
41
  probs = torch.nn.functional.softmax(logits, dim=-1)
42
  pred = torch.argmax(probs).item()
43
  confidence = float(probs[0][pred])
44
- label = LABELS[pred]
45
- return f"Category: {label} (Confidence: {confidence:.2f})", label
46
-
47
-
48
- # -------------------------------
49
- # CHATBOT FUNCTION
50
- # -------------------------------
51
- def cyberbully_chat(messages, user_message, image=None):
52
- """Chat-like conversational function"""
53
- history = messages or []
54
-
55
- # Extract text from image (if any)
56
- if image:
57
- text = extract_text(image)
58
- content = f"[Extracted from image] {text}" if text else "[No readable text found]"
59
- else:
60
- text = user_message
61
- content = text.strip()
62
-
63
- if not content or content == "[No readable text found]":
64
- history.append({"role": "assistant", "content": "Please provide valid text or an image with text."})
65
- return history
66
-
67
- # Classify with model
68
- classification, label = classify_text(text)
69
-
70
- # Append to chat
71
- history.append({"role": "user", "content": content})
72
- history.append({"role": "assistant", "content": classification})
73
- return history
74
-
75
-
76
- # -------------------------------
77
- # GRADIO ChatGPT-like UI
78
- # -------------------------------
79
- with gr.Blocks() as demo:
80
- gr.Markdown("# 🤖 Cyber Bully Detection System")
81
- gr.Markdown("Upload an image or type text. The system will analyze hate-speech categories using a RoBERTa model specialized for social media context.")
82
-
83
- chatbot = gr.Chatbot(type="messages", label="CyberBully Chat")
84
- with gr.Row():
85
- text_input = gr.Textbox(show_label=False, placeholder="Type a message here...")
86
- image_input = gr.Image(source="upload", type="pil", label="Upload Screenshot (optional)")
87
- with gr.Row():
88
- submit_btn = gr.Button("Analyze")
89
- clear_btn = gr.Button("Clear Chat")
90
-
91
- submit_btn.click(
92
- cyberbully_chat,
93
- [chatbot, text_input, image_input],
94
- [chatbot],
95
- queue=True
96
- )
97
-
98
- clear_btn.click(lambda: [], None, chatbot, queue=False)
99
 
100
  if __name__ == "__main__":
101
- demo.launch()
 
1
  import gradio as gr
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import torch
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Using Microsoft DeBERTa v3 base model (general-purpose, fine-tune recommended)
6
+ MODEL_NAME = "microsoft/deberta-v3-base"
7
+ LABELS = [
8
+ "sexism",
9
+ "racism",
10
+ "disability",
11
+ "sexual_orientation",
12
+ "religion",
13
+ "other",
14
+ "not_hate"
15
+ ]
16
 
17
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(LABELS))
 
 
 
 
 
 
19
 
 
 
 
20
  def classify_text(text):
21
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
 
 
 
22
  with torch.no_grad():
23
+ outputs = model(**inputs)
24
+ logits = outputs.logits
25
  probs = torch.nn.functional.softmax(logits, dim=-1)
26
  pred = torch.argmax(probs).item()
27
  confidence = float(probs[0][pred])
28
+ return LABELS[pred], confidence
29
+
30
+ def chatbot(text):
31
+ if not text or not text.strip():
32
+ return "Please enter some text."
33
+ label, confidence = classify_text(text)
34
+ return f"Prediction: {label} (Confidence: {confidence:.2f})"
35
+
36
+ iface = gr.Interface(
37
+ fn=chatbot,
38
+ inputs=gr.Textbox(lines=3, placeholder="Enter text for hate speech classification"),
39
+ outputs="text",
40
+ title="DeBERTa Hate Speech Classifier",
41
+ description="Classifies text into hate speech categories with DeBERTa v3-base model."
42
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  if __name__ == "__main__":
45
+ iface.launch()