Spaces:

revaza
/

tfidf_hosting

Sleeping

App Files Files Community

revaza commited on May 1, 2025

Commit

292d884

verified ·

1 Parent(s): ccaf647

Create app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import gradio as gr
+import joblib
+import requests
+import os
+from lime.lime_text import LimeTextExplainer
+# Constants
+CLASSES = ["Non-Hate Speech", "Hate Speech"]
+STOPWORDS = {
+    "კი",
+    "არა",
+    "და",
+    "რომ",
+    "რადგან",
+    "ის",
+    "ეს",
+    "რო",
+    "მას",
+    "მისი",
+    "შენი",
+    "ჩემი",
+    "რად",
+    "რატომ"
+    "მერე",
+    "ან",
+    "აუ",
+    "ამის",
+    "იმის",
+    "რომც",
+    "ეე",
+    "ეეე",
+    "ხარ",
+    "ვარ",
+    "როგორც",
+    "რაც",
+    "როდესაც",
+    "სადაც",
+    "თუ",
+    "რა",
+    "რომელი",
+    "რომლიც",
+    "როდის",
+    "რაღა",
+    "მაგრამ",
+    "არ",
+    "აქ",
+    "იქ",
+    "შემდეგ",
+    "სად",
+    "მე",
+    "შენ",
+    "თქვენ",
+    "მიერ",
+    "ვინ",
+    "როგორ",
+    "თუნდაც",
+    "რათა",
+    "ისინი",
+    "ვინც",
+    "რატო",
+}
+MODEL_URL = "https://raw.githubusercontent.com/RevazRevazashvili/geo-hate-speech-analysis/main/models/tfidf_logreg_classifier.pkl"
+MODEL_PATH = "tfidf_logreg_classifier.pkl"
+# Download model if not exists
+if not os.path.exists(MODEL_PATH):
+    r = requests.get(MODEL_URL)
+    with open(MODEL_PATH, "wb") as f:
+        f.write(r.content)
+model = joblib.load(MODEL_PATH)
+def is_undecided(prob):
+    return 0.35 < prob < 0.7
+def get_hate_words(text):
+    explainer = LimeTextExplainer(class_names=CLASSES)
+    predict_fn = lambda x: model.predict_proba(x)
+    try:
+        explanation = explainer.explain_instance(text, predict_fn, num_features=10)
+        influential_words = explanation.as_list()
+        filtered = [(word, score) for word, score in influential_words if word not in STOPWORDS]
+    except:
+        filtered = []
+    pred = int(model.predict([text])[0])
+    prob = model.predict_proba([text])[0][-1]
+    pred_class = CLASSES[pred]
+    if is_undecided(prob):
+        return []
+    if pred_class == "Hate Speech":
+        return [word for word, score in filtered if score > 0]
+    return []
+def api_predict(text):
+    words = get_hate_words(text)
+    return {"hate_words": words}
+gr.Interface(fn=api_predict, inputs=gr.Textbox(), outputs="json").launch()