File size: 8,483 Bytes
e4fe207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import re
from sklearn.preprocessing import LabelEncoder
import joblib
import pickle
import gzip
import streamlit as st
import requests
import io


# Load and preprocess the data
def preprocess_text(text):
    # Case folding and normalization
    text= str(text)
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text

# def nb_clf(sample_comments):
#     # # Load dataset
#     # df = pd.read_csv('/home/aibo/prototype_v1/DATASET/thesis_final_dataset.csv')

#     # # Apply text preprocessing
#     # df['cleaned_text'] = df['text'].apply(preprocess_text)

#     # # Encode labels
#     # le = LabelEncoder()
#     # df['label'] = le.fit_transform(df['label'])

#     # # Split the dataset into training and testing sets
#     # train_df, test_df = train_test_split(df, test_size=0.2) 

#     # # Create Bag-of-Words representation using CountVectorizer
#     # vectorizer = CountVectorizer(max_features=5000)
#     # X_train = vectorizer.fit_transform(train_df['cleaned_text'])
#     # # X_test = vectorizer.transform(test_df['cleaned_text'])
#     # y_train = train_df['label']
#     # # y_test = test_df['label']

#     # # Initialize Naive Bayes classifier
#     # nb_classifier = MultinomialNB()

#     # # Train Naive Bayes classifier
#     # nb_classifier.fit(X_train, y_train)

#     # # # Save the trained model
#     # # joblib.dump(nb_classifier, "/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")
    
#     # # Save the trained model to a pickle file
#     # with open("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl", 'wb') as model_file:
#     #     pickle.dump(nb_classifier, model_file)
        
#     # # Save the CountVectorizer to a pickle file
#     # with open('/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'wb') as vectorizer_file:
#     #     pickle.dump(vectorizer, vectorizer_file)

#     # # Load the trained Naive Bayes model
#     # loaded_nb_model = joblib.load("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")
#     # Load the model from the pickle file
    
    
#     #######################################
#     # Load LabelEncoder
#     with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
#         le = pickle.load(label_encoder_file)

#     # Load the trained Naive Bayes classifier
#     with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
#         loaded_nb_model = pickle.load(model_file)

#     # Load the CountVectorizer
#     with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
#         loaded_vectorizer = pickle.load(vectorizer_file)

#     # Apply text preprocessing to the sample comments
#     sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]

#     # Transform the sample comments using the same CountVectorizer
#     sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)

#     # Use the loaded model for inference
#     predicted_labels = loaded_nb_model.predict(sample_comments_transformed)
    
#     predicted_sentiment_label = le.inverse_transform(predicted_labels)

#     # # Decode the predicted labels using the loaded label encoder
#     # le = LabelEncoder()
#     # decoded_labels = le.inverse_transform(predicted_labels)
#     # Mapping function
#     # Map predictions to "negative" (0) and "positive" (1)
#     # predicted_labels = ["negative" if pred == 0 else "positive" for pred in predicted_labels]
    
#     return sample_comments_preprocessed, predicted_sentiment_label


# # sample_comments = [
# #     "The disinterested teaching style of the instructor made it hard to fully comprehend and engage with the material",
# #     "Hindi ko matukoy kung paano mapapakinabangan ang mga kasanayang ito sa totoong buhay",
# #     "The course lacks real-world applications of machine learning that would enhance practical understanding.",
# #     "your positivity is like a ray of sunshine on a cloudy day.",
# #     "I'm grateful for the positive impact you've had on my education",
# #     "The instructors' enthusiasm creates a positive learning environment where everyone feels encouraged to participate and ask questions",
# #     "Hindi ako nakatutok sa lecture na ito",
# #     "You show the true value of education.",
# #     "Ipinapakita mo ang halaga ng pagiging positibo at pagiging bukas sa pagbabago sa aming mga buhay",
# #     "You give meaning to our dreams.",
# #     "Your class has ignited a passion for the subject in me",
# #     "I didn't find the coursework challenging or stimulating",
# #     "Napakahusay mong magbigay ng mga halimbawa na nagpapakita ng tunay na buhay na aplikasyon ng aming natutunan",
# #     "You've provided valuable insights that will stay with me",
# #     "I hoped for more enthusiasm and passion from our instructors",
# #     "Your lessons shed light on our minds.",
# #     "The instructor's lack of enthusiasm is reflected in the students' lack of interest",
# #     "your perseverance in the face of challenges is truly admirable.",
# #     "Minsan nakakalito ang pagkasunod-sunod ng mga topics",
# #     "hindi mo maasahan sa bawat tanong",
# #     "hindi sobrang magaling magturo si sir",
# #     "not so bad, he teaches not very bad",
# # ]


# # print(nb_clf(sample_comments))

def read_bytes_from_url(url):
    response = requests.get(url)
    if response.status_code == 200:
        # Read the content as bytes
        pickle_bytes = response.content

        # Load LabelEncoder from the bytes
        with gzip.open(io.BytesIO(pickle_bytes), 'rb') as label_encoder_file:
            value = pickle.load(label_encoder_file)
            return value
    else:
        print(f"Failed to fetch URL: {url}. Status code: {response.status_code}")
        return None

st.cache()
def nb_clf(sample_comments):
    ## Load LabelEncoder locally
    # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
    #     le = pickle.load(label_encoder_file)

    ## Load the le Hugging face hub
    le = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/label_encoder.pkl')
    
    # ## Load the trained Naive Bayes classifier locally
    # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
    #     loaded_nb_model = pickle.load(model_file)
    
    ## Load the loaded_nb_model Hugging face hub
    loaded_nb_model = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/model.pkl')

    # ## Load the CountVectorizer locally
    # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
    #     loaded_vectorizer = pickle.load(vectorizer_file)

    ## Load the CountVectorizer Hugging face hub
    loaded_vectorizer = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/vectorizer.pkl')

    # Apply text preprocessing to the sample comments
    sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]

    # Transform the sample comments using the same CountVectorizer
    sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)

    # Use the loaded model for inference
    predicted_proba = loaded_nb_model.predict_proba(sample_comments_transformed)

    # Get predicted labels
    predicted_labels = loaded_nb_model.predict(sample_comments_transformed)
    predicted_sentiment_label = le.inverse_transform(predicted_labels)

    # Construct dictionary for predicted probabilities
    predicted_proba_dict = []
    for prob_array in predicted_proba:
        prob_dict = {'positive': prob_array[1]*100, 'negative': prob_array[0]*100}
        predicted_proba_dict.append(prob_dict)

    # Return sample comments, predicted labels, and classification scores
    # st.text(predicted_proba_dict)
    return sample_comments, predicted_sentiment_label, predicted_proba_dict

# sample_comments = [Your list of sample comments here]

# Uncomment the following line to print the results
# print(nb_clf(sample_comments))