Commit ·
861610c
1
Parent(s): 895197b
Update functions.py
Browse files- functions.py +2 -0
functions.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
| 1 |
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
|
| 2 |
from scipy.special import softmax
|
|
|
|
| 3 |
# Define the preprocess function
|
| 4 |
def preprocess(text):
|
| 5 |
new_text = []
|
| 6 |
for t in text.split(" "):
|
| 7 |
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
| 8 |
t = 'http' if t.startswith('http') else t
|
|
|
|
| 9 |
new_text.append(t)
|
| 10 |
return " ".join(new_text)
|
| 11 |
|
|
|
|
| 1 |
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
|
| 2 |
from scipy.special import softmax
|
| 3 |
+
import emoji
|
| 4 |
# Define the preprocess function
|
| 5 |
def preprocess(text):
|
| 6 |
new_text = []
|
| 7 |
for t in text.split(" "):
|
| 8 |
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
| 9 |
t = 'http' if t.startswith('http') else t
|
| 10 |
+
t = emoji.demojize(t) # Convert emojis to text representation
|
| 11 |
new_text.append(t)
|
| 12 |
return " ".join(new_text)
|
| 13 |
|