Spaces:
Runtime error
Runtime error
FIxed bugs
Browse files
app.py
CHANGED
|
@@ -12,6 +12,11 @@ from string import punctuation
|
|
| 12 |
API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
|
| 13 |
headers = {"Authorization": "Bearer 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"}
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def read_text(filename, filetype='txt'):
|
| 16 |
words = []
|
| 17 |
|
|
@@ -31,11 +36,6 @@ lookup_words = read_text('lookup_words')
|
|
| 31 |
obj_pronouns = read_text('obj_pronouns')
|
| 32 |
profanities = read_text('profanities', 'json')
|
| 33 |
|
| 34 |
-
def query(text):
|
| 35 |
-
text = {"inputs": text}
|
| 36 |
-
response = requests.post(API_URL, headers=headers, json=text)
|
| 37 |
-
return response.json()
|
| 38 |
-
|
| 39 |
|
| 40 |
# for profanity in profanities:
|
| 41 |
# print(profanity, process.extractOne(profanity, tweet.split(), scorer=fuzz.ratio))
|
|
@@ -77,7 +77,7 @@ def fuzzy_lookup(tweet):
|
|
| 77 |
for pronoun in obj_pronouns:
|
| 78 |
if len(word_split) > 1:
|
| 79 |
if pronoun == word_split[-1]:
|
| 80 |
-
matched_profanity[word] =
|
| 81 |
break
|
| 82 |
|
| 83 |
# Replace each profanities by fuzzy lookup result
|
|
@@ -120,28 +120,30 @@ def preprocess(tweet):
|
|
| 120 |
# Combine list of words back to sentence
|
| 121 |
preprocessed_tweet = ' '.join(filter(None, row_split))
|
| 122 |
|
|
|
|
|
|
|
|
|
|
| 123 |
# Check if output contains single word then return null
|
| 124 |
if len(preprocessed_tweet.split()) == 1:
|
| 125 |
-
return preprocessed_tweet
|
| 126 |
|
| 127 |
# Expand Contractions
|
| 128 |
for i in contractions.items():
|
| 129 |
preprocessed_tweet = re.sub(rf"\b{i[0]}\b", i[1], preprocessed_tweet)
|
| 130 |
|
| 131 |
-
# Fuzzy Lookup
|
| 132 |
-
preprocessed_tweet, matches = fuzzy_lookup(preprocessed_tweet)
|
| 133 |
-
|
| 134 |
return preprocessed_tweet, matches
|
| 135 |
|
| 136 |
|
| 137 |
def predict(tweet):
|
| 138 |
-
|
| 139 |
preprocessed_tweet, matched_profanity = preprocess(tweet)
|
| 140 |
|
| 141 |
prediction = query(preprocessed_tweet)
|
| 142 |
|
| 143 |
if type(prediction) is dict:
|
| 144 |
-
return "Model is still loading. Try again."
|
|
|
|
|
|
|
| 145 |
|
| 146 |
if bool(matched_profanity) == False:
|
| 147 |
return "No profanity found."
|
|
@@ -149,9 +151,9 @@ def predict(tweet):
|
|
| 149 |
prediction = [tuple(i.values()) for i in prediction[0]]
|
| 150 |
prediction = dict((x, y) for x, y in prediction)
|
| 151 |
|
| 152 |
-
print("\
|
| 153 |
-
print(matched_profanity)
|
| 154 |
-
print(prediction, "\n")
|
| 155 |
|
| 156 |
return prediction
|
| 157 |
|
|
@@ -189,18 +191,20 @@ def predict(tweet):
|
|
| 189 |
# # hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
|
| 190 |
|
| 191 |
|
| 192 |
-
demo = gr.Interface(
|
| 193 |
-
|
|
|
|
|
|
|
| 194 |
|
| 195 |
-
|
| 196 |
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
-
|
| 200 |
-
'Napakainit ngayong araw pakshet namaaan!!',
|
| 201 |
-
'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
|
| 202 |
-
'Bobo ka ba? napakadali lang nyan eh... 🤡',
|
| 203 |
-
'Uy gago laptrip yung nangyare samen kanina HAHAHA😂😂'],
|
| 204 |
-
)
|
| 205 |
|
| 206 |
-
|
|
|
|
| 12 |
API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
|
| 13 |
headers = {"Authorization": "Bearer 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"}
|
| 14 |
|
| 15 |
+
def query(text):
|
| 16 |
+
text = {"inputs": text}
|
| 17 |
+
response = requests.post(API_URL, headers=headers, json=text)
|
| 18 |
+
return response.json()
|
| 19 |
+
|
| 20 |
def read_text(filename, filetype='txt'):
|
| 21 |
words = []
|
| 22 |
|
|
|
|
| 36 |
obj_pronouns = read_text('obj_pronouns')
|
| 37 |
profanities = read_text('profanities', 'json')
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# for profanity in profanities:
|
| 41 |
# print(profanity, process.extractOne(profanity, tweet.split(), scorer=fuzz.ratio))
|
|
|
|
| 77 |
for pronoun in obj_pronouns:
|
| 78 |
if len(word_split) > 1:
|
| 79 |
if pronoun == word_split[-1]:
|
| 80 |
+
matched_profanity[word] = profanity + ' ' + pronoun
|
| 81 |
break
|
| 82 |
|
| 83 |
# Replace each profanities by fuzzy lookup result
|
|
|
|
| 120 |
# Combine list of words back to sentence
|
| 121 |
preprocessed_tweet = ' '.join(filter(None, row_split))
|
| 122 |
|
| 123 |
+
# Fuzzy Lookup
|
| 124 |
+
preprocessed_tweet, matches = fuzzy_lookup(preprocessed_tweet)
|
| 125 |
+
|
| 126 |
# Check if output contains single word then return null
|
| 127 |
if len(preprocessed_tweet.split()) == 1:
|
| 128 |
+
return preprocessed_tweet, matches
|
| 129 |
|
| 130 |
# Expand Contractions
|
| 131 |
for i in contractions.items():
|
| 132 |
preprocessed_tweet = re.sub(rf"\b{i[0]}\b", i[1], preprocessed_tweet)
|
| 133 |
|
|
|
|
|
|
|
|
|
|
| 134 |
return preprocessed_tweet, matches
|
| 135 |
|
| 136 |
|
| 137 |
def predict(tweet):
|
| 138 |
+
|
| 139 |
preprocessed_tweet, matched_profanity = preprocess(tweet)
|
| 140 |
|
| 141 |
prediction = query(preprocessed_tweet)
|
| 142 |
|
| 143 |
if type(prediction) is dict:
|
| 144 |
+
# return "Model is still loading. Try again."
|
| 145 |
+
print("loading")
|
| 146 |
+
predict(tweet)
|
| 147 |
|
| 148 |
if bool(matched_profanity) == False:
|
| 149 |
return "No profanity found."
|
|
|
|
| 151 |
prediction = [tuple(i.values()) for i in prediction[0]]
|
| 152 |
prediction = dict((x, y) for x, y in prediction)
|
| 153 |
|
| 154 |
+
print("\nTWEET:", tweet)
|
| 155 |
+
print("DETECTED PROFANITY:", matched_profanity)
|
| 156 |
+
print("LABELS:", prediction, "\n")
|
| 157 |
|
| 158 |
return prediction
|
| 159 |
|
|
|
|
| 191 |
# # hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
|
| 192 |
|
| 193 |
|
| 194 |
+
# demo = gr.Interface(
|
| 195 |
+
# fn=predict,
|
| 196 |
+
|
| 197 |
+
# inputs=[gr.components.Textbox(lines=5, placeholder='Enter your input here', label='INPUT')],
|
| 198 |
|
| 199 |
+
# outputs=[gr.components.Label(num_top_classes=2, label="PREDICTION")],
|
| 200 |
|
| 201 |
+
# examples=['Tangina mo naman sobrang yabang mo gago!!😠😤 @davidrafael',
|
| 202 |
+
# 'Napakainit ngayong araw pakshet namaaan!!',
|
| 203 |
+
# 'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
|
| 204 |
+
# 'Bobo ka ba? napakadali lang nyan eh... 🤡',
|
| 205 |
+
# 'Uy gago laptrip yung nangyare samen kanina HAHAHA😂😂'],
|
| 206 |
+
# )
|
| 207 |
|
| 208 |
+
# demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
+
predict("asdasd kgjhgjhgj")
|