File size: 1,634 Bytes
af6c457
 
 
 
 
 
 
 
 
 
 
 
 
 
64140c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af6c457
64140c9
 
af6c457
64140c9
 
 
 
 
10e5f20
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import pandas as pd
import numpy as np
# preprocess
import string
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
import joblib

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    stopword_list= joblib.load('stopword_list.joblib')
    text = text.lower() # lowercase text
    tokens = word_tokenize(text) # tokenize
    filtered_words = [word for word in tokens if word.lower() not in stopword_list= load.]
    lemmatized_words = [lemmatizer.lemmatize(w, get_wordnet_pos(w)) for w in filtered_words]
    lemmatized_clean = [word.translate(str.maketrans('', '', string.punctuation)) for word in lemmatized_words]
    return ' '.join(lemmatized_clean)

def prediction(model, X):
    model = load_model('best_model.keras')
    y_pred = model.predict(X)
    predictions = np.argmax(y_pred, axis=1)
    for index, val in enumerate(predictions):
        if val == 0:
            return f"Text {index} indicates the person is feeling FEAR"
        elif val == 1:
            return f"Text {index} indicates the person is feeling ANGER"
        else:
            return f"Text {index} indicates the person is feeling JOY"