Spaces:
Paused
Paused
| import numpy as np | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.naive_bayes import MultinomialNB | |
| import re | |
| from sklearn.preprocessing import LabelEncoder | |
| import joblib | |
| import pickle | |
| import gzip | |
| import streamlit as st | |
| import requests | |
| import io | |
| # Load and preprocess the data | |
| def preprocess_text(text): | |
| # Case folding and normalization | |
| text= str(text) | |
| text = text.lower() | |
| text = re.sub(r'[^a-zA-Z\s]', '', text) | |
| return text | |
| # def nb_clf(sample_comments): | |
| # # # Load dataset | |
| # # df = pd.read_csv('/home/aibo/prototype_v1/DATASET/thesis_final_dataset.csv') | |
| # # # Apply text preprocessing | |
| # # df['cleaned_text'] = df['text'].apply(preprocess_text) | |
| # # # Encode labels | |
| # # le = LabelEncoder() | |
| # # df['label'] = le.fit_transform(df['label']) | |
| # # # Split the dataset into training and testing sets | |
| # # train_df, test_df = train_test_split(df, test_size=0.2) | |
| # # # Create Bag-of-Words representation using CountVectorizer | |
| # # vectorizer = CountVectorizer(max_features=5000) | |
| # # X_train = vectorizer.fit_transform(train_df['cleaned_text']) | |
| # # # X_test = vectorizer.transform(test_df['cleaned_text']) | |
| # # y_train = train_df['label'] | |
| # # # y_test = test_df['label'] | |
| # # # Initialize Naive Bayes classifier | |
| # # nb_classifier = MultinomialNB() | |
| # # # Train Naive Bayes classifier | |
| # # nb_classifier.fit(X_train, y_train) | |
| # # # # Save the trained model | |
| # # # joblib.dump(nb_classifier, "/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl") | |
| # # # Save the trained model to a pickle file | |
| # # with open("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl", 'wb') as model_file: | |
| # # pickle.dump(nb_classifier, model_file) | |
| # # # Save the CountVectorizer to a pickle file | |
| # # with open('/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'wb') as vectorizer_file: | |
| # # pickle.dump(vectorizer, vectorizer_file) | |
| # # # Load the trained Naive Bayes model | |
| # # loaded_nb_model = joblib.load("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl") | |
| # # Load the model from the pickle file | |
| # ####################################### | |
| # # Load LabelEncoder | |
| # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file: | |
| # le = pickle.load(label_encoder_file) | |
| # # Load the trained Naive Bayes classifier | |
| # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file: | |
| # loaded_nb_model = pickle.load(model_file) | |
| # # Load the CountVectorizer | |
| # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file: | |
| # loaded_vectorizer = pickle.load(vectorizer_file) | |
| # # Apply text preprocessing to the sample comments | |
| # sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments] | |
| # # Transform the sample comments using the same CountVectorizer | |
| # sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed) | |
| # # Use the loaded model for inference | |
| # predicted_labels = loaded_nb_model.predict(sample_comments_transformed) | |
| # predicted_sentiment_label = le.inverse_transform(predicted_labels) | |
| # # # Decode the predicted labels using the loaded label encoder | |
| # # le = LabelEncoder() | |
| # # decoded_labels = le.inverse_transform(predicted_labels) | |
| # # Mapping function | |
| # # Map predictions to "negative" (0) and "positive" (1) | |
| # # predicted_labels = ["negative" if pred == 0 else "positive" for pred in predicted_labels] | |
| # return sample_comments_preprocessed, predicted_sentiment_label | |
| # # sample_comments = [ | |
| # # "The disinterested teaching style of the instructor made it hard to fully comprehend and engage with the material", | |
| # # "Hindi ko matukoy kung paano mapapakinabangan ang mga kasanayang ito sa totoong buhay", | |
| # # "The course lacks real-world applications of machine learning that would enhance practical understanding.", | |
| # # "your positivity is like a ray of sunshine on a cloudy day.", | |
| # # "I'm grateful for the positive impact you've had on my education", | |
| # # "The instructors' enthusiasm creates a positive learning environment where everyone feels encouraged to participate and ask questions", | |
| # # "Hindi ako nakatutok sa lecture na ito", | |
| # # "You show the true value of education.", | |
| # # "Ipinapakita mo ang halaga ng pagiging positibo at pagiging bukas sa pagbabago sa aming mga buhay", | |
| # # "You give meaning to our dreams.", | |
| # # "Your class has ignited a passion for the subject in me", | |
| # # "I didn't find the coursework challenging or stimulating", | |
| # # "Napakahusay mong magbigay ng mga halimbawa na nagpapakita ng tunay na buhay na aplikasyon ng aming natutunan", | |
| # # "You've provided valuable insights that will stay with me", | |
| # # "I hoped for more enthusiasm and passion from our instructors", | |
| # # "Your lessons shed light on our minds.", | |
| # # "The instructor's lack of enthusiasm is reflected in the students' lack of interest", | |
| # # "your perseverance in the face of challenges is truly admirable.", | |
| # # "Minsan nakakalito ang pagkasunod-sunod ng mga topics", | |
| # # "hindi mo maasahan sa bawat tanong", | |
| # # "hindi sobrang magaling magturo si sir", | |
| # # "not so bad, he teaches not very bad", | |
| # # ] | |
| # # print(nb_clf(sample_comments)) | |
| def read_bytes_from_url(url): | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| # Read the content as bytes | |
| pickle_bytes = response.content | |
| # Load LabelEncoder from the bytes | |
| with gzip.open(io.BytesIO(pickle_bytes), 'rb') as label_encoder_file: | |
| value = pickle.load(label_encoder_file) | |
| return value | |
| else: | |
| print(f"Failed to fetch URL: {url}. Status code: {response.status_code}") | |
| return None | |
| st.cache() | |
| def nb_clf(sample_comments): | |
| ## Load LabelEncoder locally | |
| # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file: | |
| # le = pickle.load(label_encoder_file) | |
| ## Load the le Hugging face hub | |
| le = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/label_encoder.pkl') | |
| # ## Load the trained Naive Bayes classifier locally | |
| # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file: | |
| # loaded_nb_model = pickle.load(model_file) | |
| ## Load the loaded_nb_model Hugging face hub | |
| loaded_nb_model = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/model.pkl') | |
| # ## Load the CountVectorizer locally | |
| # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file: | |
| # loaded_vectorizer = pickle.load(vectorizer_file) | |
| ## Load the CountVectorizer Hugging face hub | |
| loaded_vectorizer = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/vectorizer.pkl') | |
| # Apply text preprocessing to the sample comments | |
| sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments] | |
| # Transform the sample comments using the same CountVectorizer | |
| sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed) | |
| # Use the loaded model for inference | |
| predicted_proba = loaded_nb_model.predict_proba(sample_comments_transformed) | |
| # Get predicted labels | |
| predicted_labels = loaded_nb_model.predict(sample_comments_transformed) | |
| predicted_sentiment_label = le.inverse_transform(predicted_labels) | |
| # Construct dictionary for predicted probabilities | |
| predicted_proba_dict = [] | |
| for prob_array in predicted_proba: | |
| prob_dict = {'positive': prob_array[1]*100, 'negative': prob_array[0]*100} | |
| predicted_proba_dict.append(prob_dict) | |
| # Return sample comments, predicted labels, and classification scores | |
| # st.text(predicted_proba_dict) | |
| return sample_comments, predicted_sentiment_label, predicted_proba_dict | |
| # sample_comments = [Your list of sample comments here] | |
| # Uncomment the following line to print the results | |
| # print(nb_clf(sample_comments)) | |