Spaces:
Runtime error
Runtime error
Remove trailing punctuations
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import json
|
|
| 6 |
from thefuzz import process, fuzz
|
| 7 |
import numpy as np
|
| 8 |
import re
|
| 9 |
-
|
| 10 |
|
| 11 |
|
| 12 |
API_URL = "https://api-inference.huggingface.co/models/Dabid/test2"
|
|
@@ -39,7 +39,8 @@ def fuzzy_lookup(tweet):
|
|
| 39 |
# Loop each word in tweet
|
| 40 |
for word in tweet.split():
|
| 41 |
# Remove punctuations
|
| 42 |
-
|
|
|
|
| 43 |
# Only get digits and letters then lowercase
|
| 44 |
processed_word = re.sub("[^a-zA-Z0-9@]", "", word).lower()
|
| 45 |
scores = []
|
|
@@ -55,15 +56,15 @@ def fuzzy_lookup(tweet):
|
|
| 55 |
if len(scores) > 0:
|
| 56 |
max_score_index = np.argmax(scores)
|
| 57 |
if matched_words[max_score_index] in lookup_profanity:
|
| 58 |
-
matches[
|
| 59 |
|
| 60 |
|
| 61 |
-
for
|
| 62 |
-
word_split =
|
| 63 |
for pronoun in obj_pronouns:
|
| 64 |
if len(word_split) > 1:
|
| 65 |
if pronoun == word_split[-1]:
|
| 66 |
-
matches[
|
| 67 |
break
|
| 68 |
|
| 69 |
# Replace each profanities by fuzzy lookup result
|
|
|
|
| 6 |
from thefuzz import process, fuzz
|
| 7 |
import numpy as np
|
| 8 |
import re
|
| 9 |
+
from string import punctuation
|
| 10 |
|
| 11 |
|
| 12 |
API_URL = "https://api-inference.huggingface.co/models/Dabid/test2"
|
|
|
|
| 39 |
# Loop each word in tweet
|
| 40 |
for word in tweet.split():
|
| 41 |
# Remove punctuations
|
| 42 |
+
word = word.strip(punctuation)
|
| 43 |
+
|
| 44 |
# Only get digits and letters then lowercase
|
| 45 |
processed_word = re.sub("[^a-zA-Z0-9@]", "", word).lower()
|
| 46 |
scores = []
|
|
|
|
| 56 |
if len(scores) > 0:
|
| 57 |
max_score_index = np.argmax(scores)
|
| 58 |
if matched_words[max_score_index] in lookup_profanity:
|
| 59 |
+
matches[word] = matched_words[max_score_index]
|
| 60 |
|
| 61 |
|
| 62 |
+
for word, matched_profanity in matches.items():
|
| 63 |
+
word_split = word.split(matched_profanity[-2:])
|
| 64 |
for pronoun in obj_pronouns:
|
| 65 |
if len(word_split) > 1:
|
| 66 |
if pronoun == word_split[-1]:
|
| 67 |
+
matches[word] = matched_profanity + ' ' + pronoun
|
| 68 |
break
|
| 69 |
|
| 70 |
# Replace each profanities by fuzzy lookup result
|