Spaces:

a77an
/

cyberbully_research

Sleeping

App Files Files Community

cyberbully_research / templates /app.py

a77an

Upload 2 files

f4b426e verified 8 months ago

raw

history blame contribute delete

3.79 kB

	import torch
	import pandas as pd
	import re
	from flask import Flask, render_template, request, jsonify
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from sklearn.metrics import classification_report
	import io
	import sys

	# Define model names
	bert_model_name = "bert-base-uncased"
	hatebert_model_name = "GroNLP/hateBERT"

	# Initialize Flask app
	app = Flask(__name__)

	class CyberbullyingDetector:
	def __init__(self, model_type="bert"):
	if model_type == "bert":
	self.tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
	self.model = AutoModelForSequenceClassification.from_pretrained(bert_model_name)
	elif model_type == "hatebert":
	self.tokenizer = AutoTokenizer.from_pretrained(hatebert_model_name)
	self.model = AutoModelForSequenceClassification.from_pretrained(hatebert_model_name)
	else:
	raise ValueError("Invalid model_type. Choose 'bert' or 'hatebert'.")

	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.model.to(self.device)

	self.cyberbullying_threshold = 0.7
	self.borderline_threshold = 0.4
	self.trigger_words = [
	'buang', 'pokpok', 'bogo', 'linte', 'tanga', 'diputa', 'salamat', 'Padayon lang', 'mayo gid', 'Nagapasalamat',
	'gago', 'law-ay', 'bilatibay', 'yudipota', 'pangit', 'tikalon', 'tinikal', 'hambog',
	'batinggilan', 'biga-on', 'bulay-ug', 'agi', 'agitot', 'alpot', 'hangag'
	]

	def find_triggers(self, text):
	text_lower = text.lower()
	return [word for word in self.trigger_words if word in text_lower]

	def predict(self, text):
	triggers = self.find_triggers(text)

	inputs = self.tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=128,
	padding=True
	).to(self.device)

	with torch.no_grad():
	outputs = self.model(**inputs)

	probs = torch.nn.functional.softmax(outputs.logits, dim=1)
	pred_class = torch.argmax(probs).item()
	confidence = probs[0][pred_class].item()

	if confidence >= self.cyberbullying_threshold or (pred_class == 1) or (len(triggers) > 0):
	label = "Cyberbullying"
	is_cyberbullying = True
	elif confidence >= self.borderline_threshold:
	label = "Borderline"
	is_cyberbullying = False
	else:
	label = "Safe"
	is_cyberbullying = False

	return {
	"text": text,
	"label": label,
	"confidence": confidence,
	"language": "hil",
	"triggers": triggers,
	"is_cyberbullying": is_cyberbullying
	}

	# Initialize the detector
	detector = CyberbullyingDetector(model_type="bert")

	@app.route('/')
	def index():
	return render_template('index.html', classification_report="Loading...")

	@app.route('/predict', methods=['POST'])
	def predict():
	data = request.get_json()
	text = data.get('text', '')

	if not text:
	return jsonify({"error": "No text provided"}), 400

	# Make prediction using the model
	result = detector.predict(text)

	# Generate the classification report
	true_labels = ["Cyberbullying" if "cyberbullying" in text else "Safe" for text in [text]]
	predicted_labels = [result['label']]
	report = classification_report(true_labels, predicted_labels, zero_division=0)

	# Render the template with the classification report
	return render_template('index.html', classification_report=report)

	if __name__ == '__main__':
	app.run(debug=True)