Spaces:
Sleeping
Sleeping
File size: 2,657 Bytes
292d884 434c6fc dd1f349 434c6fc 292d884 434c6fc 292d884 434c6fc 292d884 434c6fc 292d884 434c6fc 292d884 434c6fc 292d884 dd1f349 fe3474b dd1f349 423a505 33940f8 bc5fd0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import gradio as gr
import joblib
import requests
import os
from lime.lime_text import LimeTextExplainer
# Constants
CLASSES = ["Non-Hate Speech", "Hate Speech"]
STOPWORDS = {
"แแ", "แแ แ", "แแ", "แ แแ", "แ แแแแแ", "แแก", "แแก", "แ แ", "แแแก", "แแแกแ",
"แจแแแ", "แฉแแแ", "แ แแ", "แ แแขแแ" "แแแ แ", "แแ", "แแฃ", "แแแแก", "แแแแก",
"แ แแแช", "แแ", "แแแ", "แฎแแ ", "แแแ ", "แ แแแแ แช", "แ แแช", "แ แแแแกแแช",
"แกแแแแช", "แแฃ", "แ แ", "แ แแแแแ", "แ แแแแแช", "แ แแแแก", "แ แแฆแ", "แแแแ แแ",
"แแ ", "แแฅ", "แแฅ", "แจแแแแแ", "แกแแ", "แแ", "แจแแ", "แแฅแแแ", "แแแแ ",
"แแแ", "แ แแแแ ", "แแฃแแแแช", "แ แแแ", "แแกแแแ", "แแแแช", "แ แแขแ",
}
MODEL_URL = "https://raw.githubusercontent.com/RevazRevazashvili/geo-hate-speech-analysis/main/models/tfidf_logreg_classifier.pkl"
MODEL_PATH = "tfidf_logreg_classifier.pkl"
# Download model if not exists
if not os.path.exists(MODEL_PATH):
r = requests.get(MODEL_URL)
with open(MODEL_PATH, "wb") as f:
f.write(r.content)
model = joblib.load(MODEL_PATH)
def is_undecided(prob):
return 0.35 < prob < 0.7
def get_hate_words(text):
explainer = LimeTextExplainer(class_names=CLASSES)
predict_fn = lambda x: model.predict_proba(x)
try:
explanation = explainer.explain_instance(text, predict_fn, num_features=10)
influential_words = explanation.as_list()
filtered = [(word, score) for word, score in influential_words if word not in STOPWORDS]
except:
filtered = []
pred = int(model.predict([text])[0])
prob = model.predict_proba([text])[0][-1]
pred_class = CLASSES[pred]
if is_undecided(prob):
return []
if pred_class == "Hate Speech":
return [word for word, score in filtered if score > 0]
return []
def api_predict(text):
words = get_hate_words(text)
return {"hate_words": words}
with gr.Blocks() as demo:
input_text = gr.Textbox(label="Enter Georgian text")
output_json = gr.JSON(label="Detected Hate Words")
submit_btn = gr.Button("Predict")
submit_btn.click(
fn=api_predict,
inputs=input_text,
outputs=output_json,
api_name="predict" # This is the key addition
)
if __name__ == "__main__":
demo.launch(share=True, server_name="0.0.0.0", server_port=7860, ssr_mode=False) |