tfidf_hosting / app.py
revaza's picture
Update app.py
bc5fd0a verified
import gradio as gr
import joblib
import requests
import os
from lime.lime_text import LimeTextExplainer
# Constants
CLASSES = ["Non-Hate Speech", "Hate Speech"]
STOPWORDS = {
"แƒ™แƒ˜", "แƒแƒ แƒ", "แƒ“แƒ", "แƒ แƒแƒ›", "แƒ แƒแƒ“แƒ’แƒแƒœ", "แƒ˜แƒก", "แƒ”แƒก", "แƒ แƒ", "แƒ›แƒแƒก", "แƒ›แƒ˜แƒกแƒ˜",
"แƒจแƒ”แƒœแƒ˜", "แƒฉแƒ”แƒ›แƒ˜", "แƒ แƒแƒ“", "แƒ แƒแƒขแƒแƒ›" "แƒ›แƒ”แƒ แƒ”", "แƒแƒœ", "แƒแƒฃ", "แƒแƒ›แƒ˜แƒก", "แƒ˜แƒ›แƒ˜แƒก",
"แƒ แƒแƒ›แƒช", "แƒ”แƒ”", "แƒ”แƒ”แƒ”", "แƒฎแƒแƒ ", "แƒ•แƒแƒ ", "แƒ แƒแƒ’แƒแƒ แƒช", "แƒ แƒแƒช", "แƒ แƒแƒ“แƒ”แƒกแƒแƒช",
"แƒกแƒแƒ“แƒแƒช", "แƒ—แƒฃ", "แƒ แƒ", "แƒ แƒแƒ›แƒ”แƒšแƒ˜", "แƒ แƒแƒ›แƒšแƒ˜แƒช", "แƒ แƒแƒ“แƒ˜แƒก", "แƒ แƒแƒฆแƒ", "แƒ›แƒแƒ’แƒ แƒแƒ›",
"แƒแƒ ", "แƒแƒฅ", "แƒ˜แƒฅ", "แƒจแƒ”แƒ›แƒ“แƒ”แƒ’", "แƒกแƒแƒ“", "แƒ›แƒ”", "แƒจแƒ”แƒœ", "แƒ—แƒฅแƒ•แƒ”แƒœ", "แƒ›แƒ˜แƒ”แƒ ",
"แƒ•แƒ˜แƒœ", "แƒ แƒแƒ’แƒแƒ ", "แƒ—แƒฃแƒœแƒ“แƒแƒช", "แƒ แƒแƒ—แƒ", "แƒ˜แƒกแƒ˜แƒœแƒ˜", "แƒ•แƒ˜แƒœแƒช", "แƒ แƒแƒขแƒ",
}
MODEL_URL = "https://raw.githubusercontent.com/RevazRevazashvili/geo-hate-speech-analysis/main/models/tfidf_logreg_classifier.pkl"
MODEL_PATH = "tfidf_logreg_classifier.pkl"
# Download model if not exists
if not os.path.exists(MODEL_PATH):
r = requests.get(MODEL_URL)
with open(MODEL_PATH, "wb") as f:
f.write(r.content)
model = joblib.load(MODEL_PATH)
def is_undecided(prob):
return 0.35 < prob < 0.7
def get_hate_words(text):
explainer = LimeTextExplainer(class_names=CLASSES)
predict_fn = lambda x: model.predict_proba(x)
try:
explanation = explainer.explain_instance(text, predict_fn, num_features=10)
influential_words = explanation.as_list()
filtered = [(word, score) for word, score in influential_words if word not in STOPWORDS]
except:
filtered = []
pred = int(model.predict([text])[0])
prob = model.predict_proba([text])[0][-1]
pred_class = CLASSES[pred]
if is_undecided(prob):
return []
if pred_class == "Hate Speech":
return [word for word, score in filtered if score > 0]
return []
def api_predict(text):
words = get_hate_words(text)
return {"hate_words": words}
with gr.Blocks() as demo:
input_text = gr.Textbox(label="Enter Georgian text")
output_json = gr.JSON(label="Detected Hate Words")
submit_btn = gr.Button("Predict")
submit_btn.click(
fn=api_predict,
inputs=input_text,
outputs=output_json,
api_name="predict" # This is the key addition
)
if __name__ == "__main__":
demo.launch(share=True, server_name="0.0.0.0", server_port=7860, ssr_mode=False)