studfaceval / app5_selectbox /naive_bayes_cl.py
MENG21's picture
Upload 68 files
e4fe207 verified
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import re
from sklearn.preprocessing import LabelEncoder
import joblib
import pickle
import gzip
import streamlit as st
import requests
import io
# Load and preprocess the data
def preprocess_text(text):
# Case folding and normalization
text= str(text)
text = text.lower()
text = re.sub(r'[^a-zA-Z\s]', '', text)
return text
# def nb_clf(sample_comments):
# # # Load dataset
# # df = pd.read_csv('/home/aibo/prototype_v1/DATASET/thesis_final_dataset.csv')
# # # Apply text preprocessing
# # df['cleaned_text'] = df['text'].apply(preprocess_text)
# # # Encode labels
# # le = LabelEncoder()
# # df['label'] = le.fit_transform(df['label'])
# # # Split the dataset into training and testing sets
# # train_df, test_df = train_test_split(df, test_size=0.2)
# # # Create Bag-of-Words representation using CountVectorizer
# # vectorizer = CountVectorizer(max_features=5000)
# # X_train = vectorizer.fit_transform(train_df['cleaned_text'])
# # # X_test = vectorizer.transform(test_df['cleaned_text'])
# # y_train = train_df['label']
# # # y_test = test_df['label']
# # # Initialize Naive Bayes classifier
# # nb_classifier = MultinomialNB()
# # # Train Naive Bayes classifier
# # nb_classifier.fit(X_train, y_train)
# # # # Save the trained model
# # # joblib.dump(nb_classifier, "/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")
# # # Save the trained model to a pickle file
# # with open("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl", 'wb') as model_file:
# # pickle.dump(nb_classifier, model_file)
# # # Save the CountVectorizer to a pickle file
# # with open('/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'wb') as vectorizer_file:
# # pickle.dump(vectorizer, vectorizer_file)
# # # Load the trained Naive Bayes model
# # loaded_nb_model = joblib.load("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")
# # Load the model from the pickle file
# #######################################
# # Load LabelEncoder
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
# le = pickle.load(label_encoder_file)
# # Load the trained Naive Bayes classifier
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
# loaded_nb_model = pickle.load(model_file)
# # Load the CountVectorizer
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
# loaded_vectorizer = pickle.load(vectorizer_file)
# # Apply text preprocessing to the sample comments
# sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]
# # Transform the sample comments using the same CountVectorizer
# sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)
# # Use the loaded model for inference
# predicted_labels = loaded_nb_model.predict(sample_comments_transformed)
# predicted_sentiment_label = le.inverse_transform(predicted_labels)
# # # Decode the predicted labels using the loaded label encoder
# # le = LabelEncoder()
# # decoded_labels = le.inverse_transform(predicted_labels)
# # Mapping function
# # Map predictions to "negative" (0) and "positive" (1)
# # predicted_labels = ["negative" if pred == 0 else "positive" for pred in predicted_labels]
# return sample_comments_preprocessed, predicted_sentiment_label
# # sample_comments = [
# # "The disinterested teaching style of the instructor made it hard to fully comprehend and engage with the material",
# # "Hindi ko matukoy kung paano mapapakinabangan ang mga kasanayang ito sa totoong buhay",
# # "The course lacks real-world applications of machine learning that would enhance practical understanding.",
# # "your positivity is like a ray of sunshine on a cloudy day.",
# # "I'm grateful for the positive impact you've had on my education",
# # "The instructors' enthusiasm creates a positive learning environment where everyone feels encouraged to participate and ask questions",
# # "Hindi ako nakatutok sa lecture na ito",
# # "You show the true value of education.",
# # "Ipinapakita mo ang halaga ng pagiging positibo at pagiging bukas sa pagbabago sa aming mga buhay",
# # "You give meaning to our dreams.",
# # "Your class has ignited a passion for the subject in me",
# # "I didn't find the coursework challenging or stimulating",
# # "Napakahusay mong magbigay ng mga halimbawa na nagpapakita ng tunay na buhay na aplikasyon ng aming natutunan",
# # "You've provided valuable insights that will stay with me",
# # "I hoped for more enthusiasm and passion from our instructors",
# # "Your lessons shed light on our minds.",
# # "The instructor's lack of enthusiasm is reflected in the students' lack of interest",
# # "your perseverance in the face of challenges is truly admirable.",
# # "Minsan nakakalito ang pagkasunod-sunod ng mga topics",
# # "hindi mo maasahan sa bawat tanong",
# # "hindi sobrang magaling magturo si sir",
# # "not so bad, he teaches not very bad",
# # ]
# # print(nb_clf(sample_comments))
def read_bytes_from_url(url):
response = requests.get(url)
if response.status_code == 200:
# Read the content as bytes
pickle_bytes = response.content
# Load LabelEncoder from the bytes
with gzip.open(io.BytesIO(pickle_bytes), 'rb') as label_encoder_file:
value = pickle.load(label_encoder_file)
return value
else:
print(f"Failed to fetch URL: {url}. Status code: {response.status_code}")
return None
st.cache()
def nb_clf(sample_comments):
## Load LabelEncoder locally
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
# le = pickle.load(label_encoder_file)
## Load the le Hugging face hub
le = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/label_encoder.pkl')
# ## Load the trained Naive Bayes classifier locally
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
# loaded_nb_model = pickle.load(model_file)
## Load the loaded_nb_model Hugging face hub
loaded_nb_model = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/model.pkl')
# ## Load the CountVectorizer locally
# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
# loaded_vectorizer = pickle.load(vectorizer_file)
## Load the CountVectorizer Hugging face hub
loaded_vectorizer = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/vectorizer.pkl')
# Apply text preprocessing to the sample comments
sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]
# Transform the sample comments using the same CountVectorizer
sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)
# Use the loaded model for inference
predicted_proba = loaded_nb_model.predict_proba(sample_comments_transformed)
# Get predicted labels
predicted_labels = loaded_nb_model.predict(sample_comments_transformed)
predicted_sentiment_label = le.inverse_transform(predicted_labels)
# Construct dictionary for predicted probabilities
predicted_proba_dict = []
for prob_array in predicted_proba:
prob_dict = {'positive': prob_array[1]*100, 'negative': prob_array[0]*100}
predicted_proba_dict.append(prob_dict)
# Return sample comments, predicted labels, and classification scores
# st.text(predicted_proba_dict)
return sample_comments, predicted_sentiment_label, predicted_proba_dict
# sample_comments = [Your list of sample comments here]
# Uncomment the following line to print the results
# print(nb_clf(sample_comments))