Spaces:

revaza
/

tfidf_hosting

Sleeping

File size: 2,657 Bytes

import gradio as gr
import joblib
import requests
import os
from lime.lime_text import LimeTextExplainer

# Constants
CLASSES = ["Non-Hate Speech", "Hate Speech"]
STOPWORDS = {
    "კი", "არა", "და", "რომ", "რადგან", "ის", "ეს", "რო", "მას", "მისი", 
    "შენი", "ჩემი", "რად", "რატომ" "მერე", "ან", "აუ", "ამის", "იმის", 
    "რომც", "ეე", "ეეე", "ხარ", "ვარ", "როგორც", "რაც", "როდესაც", 
    "სადაც", "თუ", "რა", "რომელი", "რომლიც", "როდის", "რაღა", "მაგრამ", 
    "არ", "აქ", "იქ", "შემდეგ", "სად", "მე", "შენ", "თქვენ", "მიერ", 
    "ვინ", "როგორ", "თუნდაც", "რათა", "ისინი", "ვინც", "რატო",
}

MODEL_URL = "https://raw.githubusercontent.com/RevazRevazashvili/geo-hate-speech-analysis/main/models/tfidf_logreg_classifier.pkl"
MODEL_PATH = "tfidf_logreg_classifier.pkl"

# Download model if not exists
if not os.path.exists(MODEL_PATH):
    r = requests.get(MODEL_URL)
    with open(MODEL_PATH, "wb") as f:
        f.write(r.content)

model = joblib.load(MODEL_PATH)

def is_undecided(prob):
    return 0.35 < prob < 0.7

def get_hate_words(text):
    explainer = LimeTextExplainer(class_names=CLASSES)
    predict_fn = lambda x: model.predict_proba(x)
    
    try:
        explanation = explainer.explain_instance(text, predict_fn, num_features=10)
        influential_words = explanation.as_list()
        filtered = [(word, score) for word, score in influential_words if word not in STOPWORDS]
    except:
        filtered = []
    
    pred = int(model.predict([text])[0])
    prob = model.predict_proba([text])[0][-1]
    pred_class = CLASSES[pred]
    
    if is_undecided(prob):
        return []
    
    if pred_class == "Hate Speech":
        return [word for word, score in filtered if score > 0]
    
    return []

def api_predict(text):
    words = get_hate_words(text)
    return {"hate_words": words}

with gr.Blocks() as demo:
    input_text = gr.Textbox(label="Enter Georgian text")
    output_json = gr.JSON(label="Detected Hate Words")
    submit_btn = gr.Button("Predict")
    
    submit_btn.click(
        fn=api_predict, 
        inputs=input_text, 
        outputs=output_json,
        api_name="predict"  # This is the key addition
    )

if __name__ == "__main__":
    demo.launch(share=True, server_name="0.0.0.0", server_port=7860, ssr_mode=False)