import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB import re from sklearn.preprocessing import LabelEncoder import joblib import pickle import gzip import streamlit as st import requests import io # Load and preprocess the data def preprocess_text(text): # Case folding and normalization text= str(text) text = text.lower() text = re.sub(r'[^a-zA-Z\s]', '', text) return text # def nb_clf(sample_comments): # # # Load dataset # # df = pd.read_csv('/home/aibo/prototype_v1/DATASET/thesis_final_dataset.csv') # # # Apply text preprocessing # # df['cleaned_text'] = df['text'].apply(preprocess_text) # # # Encode labels # # le = LabelEncoder() # # df['label'] = le.fit_transform(df['label']) # # # Split the dataset into training and testing sets # # train_df, test_df = train_test_split(df, test_size=0.2) # # # Create Bag-of-Words representation using CountVectorizer # # vectorizer = CountVectorizer(max_features=5000) # # X_train = vectorizer.fit_transform(train_df['cleaned_text']) # # # X_test = vectorizer.transform(test_df['cleaned_text']) # # y_train = train_df['label'] # # # y_test = test_df['label'] # # # Initialize Naive Bayes classifier # # nb_classifier = MultinomialNB() # # # Train Naive Bayes classifier # # nb_classifier.fit(X_train, y_train) # # # # Save the trained model # # # joblib.dump(nb_classifier, "/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl") # # # Save the trained model to a pickle file # # with open("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl", 'wb') as model_file: # # pickle.dump(nb_classifier, model_file) # # # Save the CountVectorizer to a pickle file # # with open('/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'wb') as vectorizer_file: # # pickle.dump(vectorizer, vectorizer_file) # # # Load the trained Naive Bayes model # # loaded_nb_model = joblib.load("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl") # # Load the model from the pickle file # ####################################### # # Load LabelEncoder # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file: # le = pickle.load(label_encoder_file) # # Load the trained Naive Bayes classifier # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file: # loaded_nb_model = pickle.load(model_file) # # Load the CountVectorizer # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file: # loaded_vectorizer = pickle.load(vectorizer_file) # # Apply text preprocessing to the sample comments # sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments] # # Transform the sample comments using the same CountVectorizer # sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed) # # Use the loaded model for inference # predicted_labels = loaded_nb_model.predict(sample_comments_transformed) # predicted_sentiment_label = le.inverse_transform(predicted_labels) # # # Decode the predicted labels using the loaded label encoder # # le = LabelEncoder() # # decoded_labels = le.inverse_transform(predicted_labels) # # Mapping function # # Map predictions to "negative" (0) and "positive" (1) # # predicted_labels = ["negative" if pred == 0 else "positive" for pred in predicted_labels] # return sample_comments_preprocessed, predicted_sentiment_label # # sample_comments = [ # # "The disinterested teaching style of the instructor made it hard to fully comprehend and engage with the material", # # "Hindi ko matukoy kung paano mapapakinabangan ang mga kasanayang ito sa totoong buhay", # # "The course lacks real-world applications of machine learning that would enhance practical understanding.", # # "your positivity is like a ray of sunshine on a cloudy day.", # # "I'm grateful for the positive impact you've had on my education", # # "The instructors' enthusiasm creates a positive learning environment where everyone feels encouraged to participate and ask questions", # # "Hindi ako nakatutok sa lecture na ito", # # "You show the true value of education.", # # "Ipinapakita mo ang halaga ng pagiging positibo at pagiging bukas sa pagbabago sa aming mga buhay", # # "You give meaning to our dreams.", # # "Your class has ignited a passion for the subject in me", # # "I didn't find the coursework challenging or stimulating", # # "Napakahusay mong magbigay ng mga halimbawa na nagpapakita ng tunay na buhay na aplikasyon ng aming natutunan", # # "You've provided valuable insights that will stay with me", # # "I hoped for more enthusiasm and passion from our instructors", # # "Your lessons shed light on our minds.", # # "The instructor's lack of enthusiasm is reflected in the students' lack of interest", # # "your perseverance in the face of challenges is truly admirable.", # # "Minsan nakakalito ang pagkasunod-sunod ng mga topics", # # "hindi mo maasahan sa bawat tanong", # # "hindi sobrang magaling magturo si sir", # # "not so bad, he teaches not very bad", # # ] # # print(nb_clf(sample_comments)) def read_bytes_from_url(url): response = requests.get(url) if response.status_code == 200: # Read the content as bytes pickle_bytes = response.content # Load LabelEncoder from the bytes with gzip.open(io.BytesIO(pickle_bytes), 'rb') as label_encoder_file: value = pickle.load(label_encoder_file) return value else: print(f"Failed to fetch URL: {url}. Status code: {response.status_code}") return None st.cache() def nb_clf(sample_comments): ## Load LabelEncoder locally # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file: # le = pickle.load(label_encoder_file) ## Load the le Hugging face hub le = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/label_encoder.pkl') # ## Load the trained Naive Bayes classifier locally # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file: # loaded_nb_model = pickle.load(model_file) ## Load the loaded_nb_model Hugging face hub loaded_nb_model = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/model.pkl') # ## Load the CountVectorizer locally # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file: # loaded_vectorizer = pickle.load(vectorizer_file) ## Load the CountVectorizer Hugging face hub loaded_vectorizer = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/vectorizer.pkl') # Apply text preprocessing to the sample comments sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments] # Transform the sample comments using the same CountVectorizer sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed) # Use the loaded model for inference predicted_proba = loaded_nb_model.predict_proba(sample_comments_transformed) # Get predicted labels predicted_labels = loaded_nb_model.predict(sample_comments_transformed) predicted_sentiment_label = le.inverse_transform(predicted_labels) # Construct dictionary for predicted probabilities predicted_proba_dict = [] for prob_array in predicted_proba: prob_dict = {'positive': prob_array[1]*100, 'negative': prob_array[0]*100} predicted_proba_dict.append(prob_dict) # Return sample comments, predicted labels, and classification scores # st.text(predicted_proba_dict) return sample_comments, predicted_sentiment_label, predicted_proba_dict # sample_comments = [Your list of sample comments here] # Uncomment the following line to print the results # print(nb_clf(sample_comments))