Spaces:

MENG21
/

studfaceval

Paused

File size: 8,483 Bytes

e4fe207

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import re
from sklearn.preprocessing import LabelEncoder
import joblib
import pickle
import gzip
import streamlit as st
import requests
import io


# Load and preprocess the data
def preprocess_text(text):
    # Case folding and normalization
    text= str(text)
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text

# def nb_clf(sample_comments):
#     # # Load dataset
#     # df = pd.read_csv('/home/aibo/prototype_v1/DATASET/thesis_final_dataset.csv')

#     # # Apply text preprocessing
#     # df['cleaned_text'] = df['text'].apply(preprocess_text)

#     # # Encode labels
#     # le = LabelEncoder()
#     # df['label'] = le.fit_transform(df['label'])

#     # # Split the dataset into training and testing sets
#     # train_df, test_df = train_test_split(df, test_size=0.2) 

#     # # Create Bag-of-Words representation using CountVectorizer
#     # vectorizer = CountVectorizer(max_features=5000)
#     # X_train = vectorizer.fit_transform(train_df['cleaned_text'])
#     # # X_test = vectorizer.transform(test_df['cleaned_text'])
#     # y_train = train_df['label']
#     # # y_test = test_df['label']

#     # # Initialize Naive Bayes classifier
#     # nb_classifier = MultinomialNB()

#     # # Train Naive Bayes classifier
#     # nb_classifier.fit(X_train, y_train)

#     # # # Save the trained model
#     # # joblib.dump(nb_classifier, "/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")
    
#     # # Save the trained model to a pickle file
#     # with open("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl", 'wb') as model_file:
#     #     pickle.dump(nb_classifier, model_file)
        
#     # # Save the CountVectorizer to a pickle file
#     # with open('/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'wb') as vectorizer_file:
#     #     pickle.dump(vectorizer, vectorizer_file)

#     # # Load the trained Naive Bayes model
#     # loaded_nb_model = joblib.load("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")
#     # Load the model from the pickle file
    
    
#     #######################################
#     # Load LabelEncoder
#     with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
#         le = pickle.load(label_encoder_file)

#     # Load the trained Naive Bayes classifier
#     with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
#         loaded_nb_model = pickle.load(model_file)

#     # Load the CountVectorizer
#     with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
#         loaded_vectorizer = pickle.load(vectorizer_file)

#     # Apply text preprocessing to the sample comments
#     sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]

#     # Transform the sample comments using the same CountVectorizer
#     sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)

#     # Use the loaded model for inference
#     predicted_labels = loaded_nb_model.predict(sample_comments_transformed)
    
#     predicted_sentiment_label = le.inverse_transform(predicted_labels)

#     # # Decode the predicted labels using the loaded label encoder
#     # le = LabelEncoder()
#     # decoded_labels = le.inverse_transform(predicted_labels)
#     # Mapping function
#     # Map predictions to "negative" (0) and "positive" (1)
#     # predicted_labels = ["negative" if pred == 0 else "positive" for pred in predicted_labels]
    
#     return sample_comments_preprocessed, predicted_sentiment_label


# # sample_comments = [
# #     "The disinterested teaching style of the instructor made it hard to fully comprehend and engage with the material",
# #     "Hindi ko matukoy kung paano mapapakinabangan ang mga kasanayang ito sa totoong buhay",
# #     "The course lacks real-world applications of machine learning that would enhance practical understanding.",
# #     "your positivity is like a ray of sunshine on a cloudy day.",
# #     "I'm grateful for the positive impact you've had on my education",
# #     "The instructors' enthusiasm creates a positive learning environment where everyone feels encouraged to participate and ask questions",
# #     "Hindi ako nakatutok sa lecture na ito",
# #     "You show the true value of education.",
# #     "Ipinapakita mo ang halaga ng pagiging positibo at pagiging bukas sa pagbabago sa aming mga buhay",
# #     "You give meaning to our dreams.",
# #     "Your class has ignited a passion for the subject in me",
# #     "I didn't find the coursework challenging or stimulating",
# #     "Napakahusay mong magbigay ng mga halimbawa na nagpapakita ng tunay na buhay na aplikasyon ng aming natutunan",
# #     "You've provided valuable insights that will stay with me",
# #     "I hoped for more enthusiasm and passion from our instructors",
# #     "Your lessons shed light on our minds.",
# #     "The instructor's lack of enthusiasm is reflected in the students' lack of interest",
# #     "your perseverance in the face of challenges is truly admirable.",
# #     "Minsan nakakalito ang pagkasunod-sunod ng mga topics",
# #     "hindi mo maasahan sa bawat tanong",
# #     "hindi sobrang magaling magturo si sir",
# #     "not so bad, he teaches not very bad",
# # ]


# # print(nb_clf(sample_comments))

def read_bytes_from_url(url):
    response = requests.get(url)
    if response.status_code == 200:
        # Read the content as bytes
        pickle_bytes = response.content

        # Load LabelEncoder from the bytes
        with gzip.open(io.BytesIO(pickle_bytes), 'rb') as label_encoder_file:
            value = pickle.load(label_encoder_file)
            return value
    else:
        print(f"Failed to fetch URL: {url}. Status code: {response.status_code}")
        return None

st.cache()
def nb_clf(sample_comments):
    ## Load LabelEncoder locally
    # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
    #     le = pickle.load(label_encoder_file)

    ## Load the le Hugging face hub
    le = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/label_encoder.pkl')
    
    # ## Load the trained Naive Bayes classifier locally
    # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
    #     loaded_nb_model = pickle.load(model_file)
    
    ## Load the loaded_nb_model Hugging face hub
    loaded_nb_model = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/model.pkl')

    # ## Load the CountVectorizer locally
    # with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
    #     loaded_vectorizer = pickle.load(vectorizer_file)

    ## Load the CountVectorizer Hugging face hub
    loaded_vectorizer = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/vectorizer.pkl')

    # Apply text preprocessing to the sample comments
    sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]

    # Transform the sample comments using the same CountVectorizer
    sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)

    # Use the loaded model for inference
    predicted_proba = loaded_nb_model.predict_proba(sample_comments_transformed)

    # Get predicted labels
    predicted_labels = loaded_nb_model.predict(sample_comments_transformed)
    predicted_sentiment_label = le.inverse_transform(predicted_labels)

    # Construct dictionary for predicted probabilities
    predicted_proba_dict = []
    for prob_array in predicted_proba:
        prob_dict = {'positive': prob_array[1]*100, 'negative': prob_array[0]*100}
        predicted_proba_dict.append(prob_dict)

    # Return sample comments, predicted labels, and classification scores
    # st.text(predicted_proba_dict)
    return sample_comments, predicted_sentiment_label, predicted_proba_dict

# sample_comments = [Your list of sample comments here]

# Uncomment the following line to print the results
# print(nb_clf(sample_comments))