import gradio as gr import joblib import requests import os from lime.lime_text import LimeTextExplainer # Constants CLASSES = ["Non-Hate Speech", "Hate Speech"] STOPWORDS = { "კი", "არა", "და", "რომ", "რადგან", "ის", "ეს", "რო", "მას", "მისი", "შენი", "ჩემი", "რად", "რატომ" "მერე", "ან", "აუ", "ამის", "იმის", "რომც", "ეე", "ეეე", "ხარ", "ვარ", "როგორც", "რაც", "როდესაც", "სადაც", "თუ", "რა", "რომელი", "რომლიც", "როდის", "რაღა", "მაგრამ", "არ", "აქ", "იქ", "შემდეგ", "სად", "მე", "შენ", "თქვენ", "მიერ", "ვინ", "როგორ", "თუნდაც", "რათა", "ისინი", "ვინც", "რატო", } MODEL_URL = "https://raw.githubusercontent.com/RevazRevazashvili/geo-hate-speech-analysis/main/models/tfidf_logreg_classifier.pkl" MODEL_PATH = "tfidf_logreg_classifier.pkl" # Download model if not exists if not os.path.exists(MODEL_PATH): r = requests.get(MODEL_URL) with open(MODEL_PATH, "wb") as f: f.write(r.content) model = joblib.load(MODEL_PATH) def is_undecided(prob): return 0.35 < prob < 0.7 def get_hate_words(text): explainer = LimeTextExplainer(class_names=CLASSES) predict_fn = lambda x: model.predict_proba(x) try: explanation = explainer.explain_instance(text, predict_fn, num_features=10) influential_words = explanation.as_list() filtered = [(word, score) for word, score in influential_words if word not in STOPWORDS] except: filtered = [] pred = int(model.predict([text])[0]) prob = model.predict_proba([text])[0][-1] pred_class = CLASSES[pred] if is_undecided(prob): return [] if pred_class == "Hate Speech": return [word for word, score in filtered if score > 0] return [] def api_predict(text): words = get_hate_words(text) return {"hate_words": words} with gr.Blocks() as demo: input_text = gr.Textbox(label="Enter Georgian text") output_json = gr.JSON(label="Detected Hate Words") submit_btn = gr.Button("Predict") submit_btn.click( fn=api_predict, inputs=input_text, outputs=output_json, api_name="predict" # This is the key addition ) if __name__ == "__main__": demo.launch(share=True, server_name="0.0.0.0", server_port=7860, ssr_mode=False)