Spaces:

Abrhaley
/

Tigrinya-harmful-detector

Runtime error

App Files Files Community

Tigrinya-harmful-detector / app.py

Abrhaley

Update app.py

9027a7a verified 7 months ago

raw

history blame contribute delete

2.3 kB

	import gradio as gr
	import joblib
	import pandas as pd
	import numpy as np
	import json
	import re
	from nltk.stem import SnowballStemmer
	from sklearn.feature_extraction.text import TfidfVectorizer

	# Load model and metadata
	model = joblib.load('model.joblib')
	with open('feature_names.json', 'r') as f:
	feature_names = json.load(f)['feature_names']

	# Tigrinya text preprocessing
	def preprocess_tigrinya(text):
	# Basic cleaning
	text = re.sub(r'[^\w\s]', '', text)
	text = text.lower()

	# Simple stemmer (use NLTK's Arabic stemmer as closest match)
	stemmer = SnowballStemmer("arabic")
	words = text.split()
	stemmed = [stemmer.stem(word) for word in words]
	return " ".join(stemmed)

	# Feature extraction
	def extract_features(text):
	processed = preprocess_tigrinya(text)

	# Create feature vector (customize based on your original features)
	features = {
	"word_count": len(processed.split()),
	"unique_words": len(set(processed.split())),
	"char_count": len(processed),
	"contains_hate_keyword": int(any(kw in processed for kw in ["ዘሕደረ", "ጸረ"]))
	}

	# Create DataFrame with all expected features
	df = pd.DataFrame(columns=feature_names)
	df = df.append(features, ignore_index=True).fillna(0)
	return df

	# Prediction function
	def predict(text):
	if not text.strip():
	return "ጽሑፍ ኣእትዉ! (Please enter text)"

	features_df = extract_features(text)
	proba = model.predict_proba(features_df)[0]
	return {"ጉዳት ዘለዎ (Harmful)": float(proba[1]),
	"ሰላማዊ (Safe)": float(proba[0])}

	# Gradio interface
	gr.Interface(
	fn=predict,
	inputs=gr.Textbox(label="ትግርኛ ጽሑፍ ኣእትዉ (Enter Tigrinya Text)",
	placeholder="እዚ ጽሑፍ ጉዳት ዘለዎ ይመስል..."),
	outputs=gr.Label(label="ውጽኢት (Prediction)"),
	title="ጉዳት ዘለዎ ጽሑፍ ኣሳታሚ ትግርኛ (Tigrinya Harmful Content Detector)",
	description="ብትግርኛ ዝተጻሕፈ ጉዳት ዘለዎ ጽሑፍ ይለለጥ።",
	examples=[
	["እዚ ጽሑፍ ጥሩ እዩ"], # Safe example
	["ኣፍታዊ ጥልመት ኣለካ!"] # Harmful example
	]
	).launch()