Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import joblib | |
| import requests | |
| import os | |
| from lime.lime_text import LimeTextExplainer | |
| # Constants | |
| CLASSES = ["Non-Hate Speech", "Hate Speech"] | |
| STOPWORDS = { | |
| "แแ", "แแ แ", "แแ", "แ แแ", "แ แแแแแ", "แแก", "แแก", "แ แ", "แแแก", "แแแกแ", | |
| "แจแแแ", "แฉแแแ", "แ แแ", "แ แแขแแ" "แแแ แ", "แแ", "แแฃ", "แแแแก", "แแแแก", | |
| "แ แแแช", "แแ", "แแแ", "แฎแแ ", "แแแ ", "แ แแแแ แช", "แ แแช", "แ แแแแกแแช", | |
| "แกแแแแช", "แแฃ", "แ แ", "แ แแแแแ", "แ แแแแแช", "แ แแแแก", "แ แแฆแ", "แแแแ แแ", | |
| "แแ ", "แแฅ", "แแฅ", "แจแแแแแ", "แกแแ", "แแ", "แจแแ", "แแฅแแแ", "แแแแ ", | |
| "แแแ", "แ แแแแ ", "แแฃแแแแช", "แ แแแ", "แแกแแแ", "แแแแช", "แ แแขแ", | |
| } | |
| MODEL_URL = "https://raw.githubusercontent.com/RevazRevazashvili/geo-hate-speech-analysis/main/models/tfidf_logreg_classifier.pkl" | |
| MODEL_PATH = "tfidf_logreg_classifier.pkl" | |
| # Download model if not exists | |
| if not os.path.exists(MODEL_PATH): | |
| r = requests.get(MODEL_URL) | |
| with open(MODEL_PATH, "wb") as f: | |
| f.write(r.content) | |
| model = joblib.load(MODEL_PATH) | |
| def is_undecided(prob): | |
| return 0.35 < prob < 0.7 | |
| def get_hate_words(text): | |
| explainer = LimeTextExplainer(class_names=CLASSES) | |
| predict_fn = lambda x: model.predict_proba(x) | |
| try: | |
| explanation = explainer.explain_instance(text, predict_fn, num_features=10) | |
| influential_words = explanation.as_list() | |
| filtered = [(word, score) for word, score in influential_words if word not in STOPWORDS] | |
| except: | |
| filtered = [] | |
| pred = int(model.predict([text])[0]) | |
| prob = model.predict_proba([text])[0][-1] | |
| pred_class = CLASSES[pred] | |
| if is_undecided(prob): | |
| return [] | |
| if pred_class == "Hate Speech": | |
| return [word for word, score in filtered if score > 0] | |
| return [] | |
| def api_predict(text): | |
| words = get_hate_words(text) | |
| return {"hate_words": words} | |
| with gr.Blocks() as demo: | |
| input_text = gr.Textbox(label="Enter Georgian text") | |
| output_json = gr.JSON(label="Detected Hate Words") | |
| submit_btn = gr.Button("Predict") | |
| submit_btn.click( | |
| fn=api_predict, | |
| inputs=input_text, | |
| outputs=output_json, | |
| api_name="predict" # This is the key addition | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True, server_name="0.0.0.0", server_port=7860, ssr_mode=False) |