Spaces:
Sleeping
Sleeping
| import torch | |
| import pandas as pd | |
| import re | |
| from flask import Flask, render_template, request, jsonify | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from sklearn.metrics import classification_report | |
| import io | |
| import sys | |
| # Define model names | |
| bert_model_name = "bert-base-uncased" | |
| hatebert_model_name = "GroNLP/hateBERT" | |
| # Initialize Flask app | |
| app = Flask(__name__) | |
| class CyberbullyingDetector: | |
| def __init__(self, model_type="bert"): | |
| if model_type == "bert": | |
| self.tokenizer = AutoTokenizer.from_pretrained(bert_model_name) | |
| self.model = AutoModelForSequenceClassification.from_pretrained(bert_model_name) | |
| elif model_type == "hatebert": | |
| self.tokenizer = AutoTokenizer.from_pretrained(hatebert_model_name) | |
| self.model = AutoModelForSequenceClassification.from_pretrained(hatebert_model_name) | |
| else: | |
| raise ValueError("Invalid model_type. Choose 'bert' or 'hatebert'.") | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| self.model.to(self.device) | |
| self.cyberbullying_threshold = 0.7 | |
| self.borderline_threshold = 0.4 | |
| self.trigger_words = [ | |
| 'buang', 'pokpok', 'bogo', 'linte', 'tanga', 'diputa', 'salamat', 'Padayon lang', 'mayo gid', 'Nagapasalamat', | |
| 'gago', 'law-ay', 'bilatibay', 'yudipota', 'pangit', 'tikalon', 'tinikal', 'hambog', | |
| 'batinggilan', 'biga-on', 'bulay-ug', 'agi', 'agitot', 'alpot', 'hangag' | |
| ] | |
| def find_triggers(self, text): | |
| text_lower = text.lower() | |
| return [word for word in self.trigger_words if word in text_lower] | |
| def predict(self, text): | |
| triggers = self.find_triggers(text) | |
| inputs = self.tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=128, | |
| padding=True | |
| ).to(self.device) | |
| with torch.no_grad(): | |
| outputs = self.model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=1) | |
| pred_class = torch.argmax(probs).item() | |
| confidence = probs[0][pred_class].item() | |
| if confidence >= self.cyberbullying_threshold or (pred_class == 1) or (len(triggers) > 0): | |
| label = "Cyberbullying" | |
| is_cyberbullying = True | |
| elif confidence >= self.borderline_threshold: | |
| label = "Borderline" | |
| is_cyberbullying = False | |
| else: | |
| label = "Safe" | |
| is_cyberbullying = False | |
| return { | |
| "text": text, | |
| "label": label, | |
| "confidence": confidence, | |
| "language": "hil", | |
| "triggers": triggers, | |
| "is_cyberbullying": is_cyberbullying | |
| } | |
| # Initialize the detector | |
| detector = CyberbullyingDetector(model_type="bert") | |
| def index(): | |
| return render_template('index.html', classification_report="Loading...") | |
| def predict(): | |
| data = request.get_json() | |
| text = data.get('text', '') | |
| if not text: | |
| return jsonify({"error": "No text provided"}), 400 | |
| # Make prediction using the model | |
| result = detector.predict(text) | |
| # Generate the classification report | |
| true_labels = ["Cyberbullying" if "cyberbullying" in text else "Safe" for text in [text]] | |
| predicted_labels = [result['label']] | |
| report = classification_report(true_labels, predicted_labels, zero_division=0) | |
| # Render the template with the classification report | |
| return render_template('index.html', classification_report=report) | |
| if __name__ == '__main__': | |
| app.run(debug=True) | |