revaza commited on
Commit
292d884
ยท
verified ยท
1 Parent(s): ccaf647

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import requests
4
+ import os
5
+ from lime.lime_text import LimeTextExplainer
6
+
7
+ # Constants
8
+ CLASSES = ["Non-Hate Speech", "Hate Speech"]
9
+ STOPWORDS = {
10
+ "แƒ™แƒ˜",
11
+ "แƒแƒ แƒ",
12
+ "แƒ“แƒ",
13
+ "แƒ แƒแƒ›",
14
+ "แƒ แƒแƒ“แƒ’แƒแƒœ",
15
+ "แƒ˜แƒก",
16
+ "แƒ”แƒก",
17
+ "แƒ แƒ",
18
+ "แƒ›แƒแƒก",
19
+ "แƒ›แƒ˜แƒกแƒ˜",
20
+ "แƒจแƒ”แƒœแƒ˜",
21
+ "แƒฉแƒ”แƒ›แƒ˜",
22
+ "แƒ แƒแƒ“",
23
+ "แƒ แƒแƒขแƒแƒ›"
24
+ "แƒ›แƒ”แƒ แƒ”",
25
+ "แƒแƒœ",
26
+ "แƒแƒฃ",
27
+ "แƒแƒ›แƒ˜แƒก",
28
+ "แƒ˜แƒ›แƒ˜แƒก",
29
+ "แƒ แƒแƒ›แƒช",
30
+ "แƒ”แƒ”",
31
+ "แƒ”แƒ”แƒ”",
32
+ "แƒฎแƒแƒ ",
33
+ "แƒ•แƒแƒ ",
34
+ "แƒ แƒแƒ’แƒแƒ แƒช",
35
+ "แƒ แƒแƒช",
36
+ "แƒ แƒแƒ“แƒ”แƒกแƒแƒช",
37
+ "แƒกแƒแƒ“แƒแƒช",
38
+ "แƒ—แƒฃ",
39
+ "แƒ แƒ",
40
+ "แƒ แƒแƒ›แƒ”แƒšแƒ˜",
41
+ "แƒ แƒแƒ›แƒšแƒ˜แƒช",
42
+ "แƒ แƒแƒ“แƒ˜แƒก",
43
+ "แƒ แƒแƒฆแƒ",
44
+ "แƒ›แƒแƒ’แƒ แƒแƒ›",
45
+ "แƒแƒ ",
46
+ "แƒแƒฅ",
47
+ "แƒ˜แƒฅ",
48
+ "แƒจแƒ”แƒ›แƒ“แƒ”แƒ’",
49
+ "แƒกแƒแƒ“",
50
+ "แƒ›แƒ”",
51
+ "แƒจแƒ”แƒœ",
52
+ "แƒ—แƒฅแƒ•แƒ”แƒœ",
53
+ "แƒ›แƒ˜แƒ”แƒ ",
54
+ "แƒ•แƒ˜แƒœ",
55
+ "แƒ แƒแƒ’แƒแƒ ",
56
+ "แƒ—แƒฃแƒœแƒ“แƒแƒช",
57
+ "แƒ แƒแƒ—แƒ",
58
+ "แƒ˜แƒกแƒ˜แƒœแƒ˜",
59
+ "แƒ•แƒ˜แƒœแƒช",
60
+ "แƒ แƒแƒขแƒ",
61
+ }
62
+
63
+
64
+ MODEL_URL = "https://raw.githubusercontent.com/RevazRevazashvili/geo-hate-speech-analysis/main/models/tfidf_logreg_classifier.pkl"
65
+ MODEL_PATH = "tfidf_logreg_classifier.pkl"
66
+
67
+ # Download model if not exists
68
+ if not os.path.exists(MODEL_PATH):
69
+ r = requests.get(MODEL_URL)
70
+ with open(MODEL_PATH, "wb") as f:
71
+ f.write(r.content)
72
+
73
+ model = joblib.load(MODEL_PATH)
74
+
75
+ def is_undecided(prob):
76
+ return 0.35 < prob < 0.7
77
+
78
+ def get_hate_words(text):
79
+ explainer = LimeTextExplainer(class_names=CLASSES)
80
+ predict_fn = lambda x: model.predict_proba(x)
81
+ try:
82
+ explanation = explainer.explain_instance(text, predict_fn, num_features=10)
83
+ influential_words = explanation.as_list()
84
+ filtered = [(word, score) for word, score in influential_words if word not in STOPWORDS]
85
+ except:
86
+ filtered = []
87
+
88
+ pred = int(model.predict([text])[0])
89
+ prob = model.predict_proba([text])[0][-1]
90
+ pred_class = CLASSES[pred]
91
+
92
+ if is_undecided(prob):
93
+ return []
94
+
95
+ if pred_class == "Hate Speech":
96
+ return [word for word, score in filtered if score > 0]
97
+ return []
98
+
99
+ def api_predict(text):
100
+ words = get_hate_words(text)
101
+ return {"hate_words": words}
102
+
103
+ gr.Interface(fn=api_predict, inputs=gr.Textbox(), outputs="json").launch()