Spaces:
Sleeping
Sleeping
Commit ·
64140c9
1
Parent(s): a1716a8
Create functions.py
Browse files- functions.py +36 -0
functions.py
CHANGED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def get_wordnet_pos(treebank_tag):
|
| 2 |
+
if treebank_tag.startswith('J'):
|
| 3 |
+
return wordnet.ADJ
|
| 4 |
+
elif treebank_tag.startswith('V'):
|
| 5 |
+
return wordnet.VERB
|
| 6 |
+
elif treebank_tag.startswith('N'):
|
| 7 |
+
return wordnet.NOUN
|
| 8 |
+
elif treebank_tag.startswith('R'):
|
| 9 |
+
return wordnet.ADV
|
| 10 |
+
else:
|
| 11 |
+
return wordnet.NOUN
|
| 12 |
+
|
| 13 |
+
lemmatizer = WordNetLemmatizer()
|
| 14 |
+
|
| 15 |
+
def preprocess_text(text):
|
| 16 |
+
text = text.lower() # lowercase text
|
| 17 |
+
tokens = word_tokenize(text) # tokenize
|
| 18 |
+
filtered_words = [word for word in tokens if word.lower() not in stopword_list]
|
| 19 |
+
lemmatized_words = [lemmatizer.lemmatize(w, get_wordnet_pos(w)) for w in filtered_words]
|
| 20 |
+
lemmatized_clean = [word.translate(str.maketrans('', '', string.punctuation)) for word in lemmatized_words]
|
| 21 |
+
return ' '.join(lemmatized_clean)
|
| 22 |
+
|
| 23 |
+
def text_model(df):
|
| 24 |
+
df['preprocessed_text'] = df['Comment'].apply(preprocess_text)
|
| 25 |
+
return df['preprocessed_text']
|
| 26 |
+
|
| 27 |
+
def prediction(model, X):
|
| 28 |
+
y_pred = model.predict(X)
|
| 29 |
+
predictions = np.argmax(y_pred, axis=1)
|
| 30 |
+
for index, val in enumerate(predictions):
|
| 31 |
+
if val == 0:
|
| 32 |
+
print(f"Text {index} indicates the person is feeling FEAR")
|
| 33 |
+
elif val == 1:
|
| 34 |
+
print(f"Text {index} indicates the person is feeling ANGER")
|
| 35 |
+
else:
|
| 36 |
+
print(f"Text {index} indicates the person is feeling JOY")
|