Abrhaley's picture
Update app.py
9027a7a verified
import gradio as gr
import joblib
import pandas as pd
import numpy as np
import json
import re
from nltk.stem import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
# Load model and metadata
model = joblib.load('model.joblib')
with open('feature_names.json', 'r') as f:
feature_names = json.load(f)['feature_names']
# Tigrinya text preprocessing
def preprocess_tigrinya(text):
# Basic cleaning
text = re.sub(r'[^\w\s]', '', text)
text = text.lower()
# Simple stemmer (use NLTK's Arabic stemmer as closest match)
stemmer = SnowballStemmer("arabic")
words = text.split()
stemmed = [stemmer.stem(word) for word in words]
return " ".join(stemmed)
# Feature extraction
def extract_features(text):
processed = preprocess_tigrinya(text)
# Create feature vector (customize based on your original features)
features = {
"word_count": len(processed.split()),
"unique_words": len(set(processed.split())),
"char_count": len(processed),
"contains_hate_keyword": int(any(kw in processed for kw in ["α‹˜αˆ•α‹°αˆ¨", "ጸረ"]))
}
# Create DataFrame with all expected features
df = pd.DataFrame(columns=feature_names)
df = df.append(features, ignore_index=True).fillna(0)
return df
# Prediction function
def predict(text):
if not text.strip():
return "αŒ½αˆ‘α ኣαŠ₯ቡዉ! (Please enter text)"
features_df = extract_features(text)
proba = model.predict_proba(features_df)[0]
return {"αŒ‰α‹³α‰΅ α‹˜αˆˆα‹Ž (Harmful)": float(proba[1]),
"αˆ°αˆ‹αˆ›α‹Š (Safe)": float(proba[0])}
# Gradio interface
gr.Interface(
fn=predict,
inputs=gr.Textbox(label="α‰΅αŒαˆ­αŠ› αŒ½αˆ‘α ኣαŠ₯ቡዉ (Enter Tigrinya Text)",
placeholder="αŠ₯α‹š αŒ½αˆ‘α αŒ‰α‹³α‰΅ α‹˜αˆˆα‹Ž α‹­αˆ˜αˆ΅αˆ..."),
outputs=gr.Label(label="α‹αŒ½αŠ’α‰΅ (Prediction)"),
title="αŒ‰α‹³α‰΅ α‹˜αˆˆα‹Ž αŒ½αˆ‘α αŠ£αˆ³α‰³αˆš α‰΅αŒαˆ­αŠ› (Tigrinya Harmful Content Detector)",
description="α‰₯α‰΅αŒαˆ­αŠ› α‹α‰°αŒ»αˆ•αˆ αŒ‰α‹³α‰΅ α‹˜αˆˆα‹Ž αŒ½αˆ‘α α‹­αˆˆαˆˆαŒ₯ፒ",
examples=[
["αŠ₯α‹š αŒ½αˆ‘α αŒ₯ሩ αŠ₯α‹©"], # Safe example
["αŠ£αα‰³α‹Š αŒ₯αˆαˆ˜α‰΅ ኣለካ!"] # Harmful example
]
).launch()