Spaces:
Paused
Paused
File size: 8,483 Bytes
e4fe207 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import re
from sklearn.preprocessing import LabelEncoder
import joblib
import pickle
import gzip
import streamlit as st
import requests
import io
# Load and preprocess the data
def preprocess_text(text):
# Case folding and normalization
text= str(text)
text = text.lower()
text = re.sub(r'[^a-zA-Z\s]', '', text)
return text
# def nb_clf(sample_comments):
# # # Load dataset
# # df = pd.read_csv('/home/aibo/prototype_v1/DATASET/thesis_final_dataset.csv')
# # # Apply text preprocessing
# # df['cleaned_text'] = df['text'].apply(preprocess_text)
# # # Encode labels
# # le = LabelEncoder()
# # df['label'] = le.fit_transform(df['label'])
# # # Split the dataset into training and testing sets
# # train_df, test_df = train_test_split(df, test_size=0.2)
# # # Create Bag-of-Words representation using CountVectorizer
# # vectorizer = CountVectorizer(max_features=5000)
# # X_train = vectorizer.fit_transform(train_df['cleaned_text'])
# # # X_test = vectorizer.transform(test_df['cleaned_text'])
# # y_train = train_df['label']
# # # y_test = test_df['label']
# # # Initialize Naive Bayes classifier
# # nb_classifier = MultinomialNB()
# # # Train Naive Bayes classifier
# # nb_classifier.fit(X_train, y_train)
# # # # Save the trained model
# # # joblib.dump(nb_classifier, "/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")
# # # Save the trained model to a pickle file
# # with open("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl", 'wb') as model_file:
# # pickle.dump(nb_classifier, model_file)
# # # Save the CountVectorizer to a pickle file
# # with open('/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'wb') as vectorizer_file:
# # pickle.dump(vectorizer, vectorizer_file)
# # # Load the trained Naive Bayes model
# # loaded_nb_model = joblib.load("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")
# # Load the model from the pickle file
# #######################################
# # Load LabelEncoder
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
# le = pickle.load(label_encoder_file)
# # Load the trained Naive Bayes classifier
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
# loaded_nb_model = pickle.load(model_file)
# # Load the CountVectorizer
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
# loaded_vectorizer = pickle.load(vectorizer_file)
# # Apply text preprocessing to the sample comments
# sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]
# # Transform the sample comments using the same CountVectorizer
# sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)
# # Use the loaded model for inference
# predicted_labels = loaded_nb_model.predict(sample_comments_transformed)
# predicted_sentiment_label = le.inverse_transform(predicted_labels)
# # # Decode the predicted labels using the loaded label encoder
# # le = LabelEncoder()
# # decoded_labels = le.inverse_transform(predicted_labels)
# # Mapping function
# # Map predictions to "negative" (0) and "positive" (1)
# # predicted_labels = ["negative" if pred == 0 else "positive" for pred in predicted_labels]
# return sample_comments_preprocessed, predicted_sentiment_label
# # sample_comments = [
# # "The disinterested teaching style of the instructor made it hard to fully comprehend and engage with the material",
# # "Hindi ko matukoy kung paano mapapakinabangan ang mga kasanayang ito sa totoong buhay",
# # "The course lacks real-world applications of machine learning that would enhance practical understanding.",
# # "your positivity is like a ray of sunshine on a cloudy day.",
# # "I'm grateful for the positive impact you've had on my education",
# # "The instructors' enthusiasm creates a positive learning environment where everyone feels encouraged to participate and ask questions",
# # "Hindi ako nakatutok sa lecture na ito",
# # "You show the true value of education.",
# # "Ipinapakita mo ang halaga ng pagiging positibo at pagiging bukas sa pagbabago sa aming mga buhay",
# # "You give meaning to our dreams.",
# # "Your class has ignited a passion for the subject in me",
# # "I didn't find the coursework challenging or stimulating",
# # "Napakahusay mong magbigay ng mga halimbawa na nagpapakita ng tunay na buhay na aplikasyon ng aming natutunan",
# # "You've provided valuable insights that will stay with me",
# # "I hoped for more enthusiasm and passion from our instructors",
# # "Your lessons shed light on our minds.",
# # "The instructor's lack of enthusiasm is reflected in the students' lack of interest",
# # "your perseverance in the face of challenges is truly admirable.",
# # "Minsan nakakalito ang pagkasunod-sunod ng mga topics",
# # "hindi mo maasahan sa bawat tanong",
# # "hindi sobrang magaling magturo si sir",
# # "not so bad, he teaches not very bad",
# # ]
# # print(nb_clf(sample_comments))
def read_bytes_from_url(url):
response = requests.get(url)
if response.status_code == 200:
# Read the content as bytes
pickle_bytes = response.content
# Load LabelEncoder from the bytes
with gzip.open(io.BytesIO(pickle_bytes), 'rb') as label_encoder_file:
value = pickle.load(label_encoder_file)
return value
else:
print(f"Failed to fetch URL: {url}. Status code: {response.status_code}")
return None
st.cache()
def nb_clf(sample_comments):
## Load LabelEncoder locally
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
# le = pickle.load(label_encoder_file)
## Load the le Hugging face hub
le = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/label_encoder.pkl')
# ## Load the trained Naive Bayes classifier locally
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
# loaded_nb_model = pickle.load(model_file)
## Load the loaded_nb_model Hugging face hub
loaded_nb_model = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/model.pkl')
# ## Load the CountVectorizer locally
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
# loaded_vectorizer = pickle.load(vectorizer_file)
## Load the CountVectorizer Hugging face hub
loaded_vectorizer = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/vectorizer.pkl')
# Apply text preprocessing to the sample comments
sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]
# Transform the sample comments using the same CountVectorizer
sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)
# Use the loaded model for inference
predicted_proba = loaded_nb_model.predict_proba(sample_comments_transformed)
# Get predicted labels
predicted_labels = loaded_nb_model.predict(sample_comments_transformed)
predicted_sentiment_label = le.inverse_transform(predicted_labels)
# Construct dictionary for predicted probabilities
predicted_proba_dict = []
for prob_array in predicted_proba:
prob_dict = {'positive': prob_array[1]*100, 'negative': prob_array[0]*100}
predicted_proba_dict.append(prob_dict)
# Return sample comments, predicted labels, and classification scores
# st.text(predicted_proba_dict)
return sample_comments, predicted_sentiment_label, predicted_proba_dict
# sample_comments = [Your list of sample comments here]
# Uncomment the following line to print the results
# print(nb_clf(sample_comments))
|