Spaces:
Runtime error
Runtime error
File size: 2,296 Bytes
4337710 9027a7a 4337710 9027a7a 4337710 9027a7a 4337710 9027a7a 4337710 9027a7a 4337710 9027a7a 4337710 9027a7a 4337710 9027a7a 4337710 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | import gradio as gr
import joblib
import pandas as pd
import numpy as np
import json
import re
from nltk.stem import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
# Load model and metadata
model = joblib.load('model.joblib')
with open('feature_names.json', 'r') as f:
feature_names = json.load(f)['feature_names']
# Tigrinya text preprocessing
def preprocess_tigrinya(text):
# Basic cleaning
text = re.sub(r'[^\w\s]', '', text)
text = text.lower()
# Simple stemmer (use NLTK's Arabic stemmer as closest match)
stemmer = SnowballStemmer("arabic")
words = text.split()
stemmed = [stemmer.stem(word) for word in words]
return " ".join(stemmed)
# Feature extraction
def extract_features(text):
processed = preprocess_tigrinya(text)
# Create feature vector (customize based on your original features)
features = {
"word_count": len(processed.split()),
"unique_words": len(set(processed.split())),
"char_count": len(processed),
"contains_hate_keyword": int(any(kw in processed for kw in ["ααα°α¨", "αΈα¨"]))
}
# Create DataFrame with all expected features
df = pd.DataFrame(columns=feature_names)
df = df.append(features, ignore_index=True).fillna(0)
return df
# Prediction function
def predict(text):
if not text.strip():
return "α½αα α£α₯α΅α! (Please enter text)"
features_df = extract_features(text)
proba = model.predict_proba(features_df)[0]
return {"αα³α΅ ααα (Harmful)": float(proba[1]),
"α°ααα (Safe)": float(proba[0])}
# Gradio interface
gr.Interface(
fn=predict,
inputs=gr.Textbox(label="α΅ααα α½αα α£α₯α΅α (Enter Tigrinya Text)",
placeholder="α₯α α½αα αα³α΅ ααα ααα΅α..."),
outputs=gr.Label(label="αα½α’α΅ (Prediction)"),
title="αα³α΅ ααα α½αα α£α³α³α α΅ααα (Tigrinya Harmful Content Detector)",
description="α₯α΅ααα αα°α»αα αα³α΅ ααα α½αα αααα₯α’",
examples=[
["α₯α α½αα α₯α© α₯α©"], # Safe example
["α£αα³α α₯ααα΅ α£αα«!"] # Harmful example
]
).launch() |