Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| import json | |
| import re | |
| from nltk.stem import SnowballStemmer | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| # Load model and metadata | |
| model = joblib.load('model.joblib') | |
| with open('feature_names.json', 'r') as f: | |
| feature_names = json.load(f)['feature_names'] | |
| # Tigrinya text preprocessing | |
| def preprocess_tigrinya(text): | |
| # Basic cleaning | |
| text = re.sub(r'[^\w\s]', '', text) | |
| text = text.lower() | |
| # Simple stemmer (use NLTK's Arabic stemmer as closest match) | |
| stemmer = SnowballStemmer("arabic") | |
| words = text.split() | |
| stemmed = [stemmer.stem(word) for word in words] | |
| return " ".join(stemmed) | |
| # Feature extraction | |
| def extract_features(text): | |
| processed = preprocess_tigrinya(text) | |
| # Create feature vector (customize based on your original features) | |
| features = { | |
| "word_count": len(processed.split()), | |
| "unique_words": len(set(processed.split())), | |
| "char_count": len(processed), | |
| "contains_hate_keyword": int(any(kw in processed for kw in ["ααα°α¨", "αΈα¨"])) | |
| } | |
| # Create DataFrame with all expected features | |
| df = pd.DataFrame(columns=feature_names) | |
| df = df.append(features, ignore_index=True).fillna(0) | |
| return df | |
| # Prediction function | |
| def predict(text): | |
| if not text.strip(): | |
| return "α½αα α£α₯α΅α! (Please enter text)" | |
| features_df = extract_features(text) | |
| proba = model.predict_proba(features_df)[0] | |
| return {"αα³α΅ ααα (Harmful)": float(proba[1]), | |
| "α°ααα (Safe)": float(proba[0])} | |
| # Gradio interface | |
| gr.Interface( | |
| fn=predict, | |
| inputs=gr.Textbox(label="α΅ααα α½αα α£α₯α΅α (Enter Tigrinya Text)", | |
| placeholder="α₯α α½αα αα³α΅ ααα ααα΅α..."), | |
| outputs=gr.Label(label="αα½α’α΅ (Prediction)"), | |
| title="αα³α΅ ααα α½αα α£α³α³α α΅ααα (Tigrinya Harmful Content Detector)", | |
| description="α₯α΅ααα αα°α»αα αα³α΅ ααα α½αα αααα₯α’", | |
| examples=[ | |
| ["α₯α α½αα α₯α© α₯α©"], # Safe example | |
| ["α£αα³α α₯ααα΅ α£αα«!"] # Harmful example | |
| ] | |
| ).launch() |