Spaces:

MENG21
/

studfaceval

Paused

App Files Files Community

studfaceval / app5_selectbox /naive_bayes_cl.py

MENG21

Upload 68 files

e4fe207 verified almost 2 years ago

raw

history blame contribute delete

8.48 kB

	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.naive_bayes import MultinomialNB
	import re
	from sklearn.preprocessing import LabelEncoder
	import joblib
	import pickle
	import gzip
	import streamlit as st
	import requests
	import io


	# Load and preprocess the data
	def preprocess_text(text):
	# Case folding and normalization
	text= str(text)
	text = text.lower()
	text = re.sub(r'[^a-zA-Z\s]', '', text)
	return text

	# def nb_clf(sample_comments):
	# # # Load dataset
	# # df = pd.read_csv('/home/aibo/prototype_v1/DATASET/thesis_final_dataset.csv')

	# # # Apply text preprocessing
	# # df['cleaned_text'] = df['text'].apply(preprocess_text)

	# # # Encode labels
	# # le = LabelEncoder()
	# # df['label'] = le.fit_transform(df['label'])

	# # # Split the dataset into training and testing sets
	# # train_df, test_df = train_test_split(df, test_size=0.2)

	# # # Create Bag-of-Words representation using CountVectorizer
	# # vectorizer = CountVectorizer(max_features=5000)
	# # X_train = vectorizer.fit_transform(train_df['cleaned_text'])
	# # # X_test = vectorizer.transform(test_df['cleaned_text'])
	# # y_train = train_df['label']
	# # # y_test = test_df['label']

	# # # Initialize Naive Bayes classifier
	# # nb_classifier = MultinomialNB()

	# # # Train Naive Bayes classifier
	# # nb_classifier.fit(X_train, y_train)

	# # # # Save the trained model
	# # # joblib.dump(nb_classifier, "/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")

	# # # Save the trained model to a pickle file
	# # with open("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl", 'wb') as model_file:
	# # pickle.dump(nb_classifier, model_file)

	# # # Save the CountVectorizer to a pickle file
	# # with open('/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'wb') as vectorizer_file:
	# # pickle.dump(vectorizer, vectorizer_file)

	# # # Load the trained Naive Bayes model
	# # loaded_nb_model = joblib.load("/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl")
	# # Load the model from the pickle file


	# #######################################
	# # Load LabelEncoder
	# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
	# le = pickle.load(label_encoder_file)

	# # Load the trained Naive Bayes classifier
	# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
	# loaded_nb_model = pickle.load(model_file)

	# # Load the CountVectorizer
	# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
	# loaded_vectorizer = pickle.load(vectorizer_file)

	# # Apply text preprocessing to the sample comments
	# sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]

	# # Transform the sample comments using the same CountVectorizer
	# sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)

	# # Use the loaded model for inference
	# predicted_labels = loaded_nb_model.predict(sample_comments_transformed)

	# predicted_sentiment_label = le.inverse_transform(predicted_labels)

	# # # Decode the predicted labels using the loaded label encoder
	# # le = LabelEncoder()
	# # decoded_labels = le.inverse_transform(predicted_labels)
	# # Mapping function
	# # Map predictions to "negative" (0) and "positive" (1)
	# # predicted_labels = ["negative" if pred == 0 else "positive" for pred in predicted_labels]

	# return sample_comments_preprocessed, predicted_sentiment_label


	# # sample_comments = [
	# # "The disinterested teaching style of the instructor made it hard to fully comprehend and engage with the material",
	# # "Hindi ko matukoy kung paano mapapakinabangan ang mga kasanayang ito sa totoong buhay",
	# # "The course lacks real-world applications of machine learning that would enhance practical understanding.",
	# # "your positivity is like a ray of sunshine on a cloudy day.",
	# # "I'm grateful for the positive impact you've had on my education",
	# # "The instructors' enthusiasm creates a positive learning environment where everyone feels encouraged to participate and ask questions",
	# # "Hindi ako nakatutok sa lecture na ito",
	# # "You show the true value of education.",
	# # "Ipinapakita mo ang halaga ng pagiging positibo at pagiging bukas sa pagbabago sa aming mga buhay",
	# # "You give meaning to our dreams.",
	# # "Your class has ignited a passion for the subject in me",
	# # "I didn't find the coursework challenging or stimulating",
	# # "Napakahusay mong magbigay ng mga halimbawa na nagpapakita ng tunay na buhay na aplikasyon ng aming natutunan",
	# # "You've provided valuable insights that will stay with me",
	# # "I hoped for more enthusiasm and passion from our instructors",
	# # "Your lessons shed light on our minds.",
	# # "The instructor's lack of enthusiasm is reflected in the students' lack of interest",
	# # "your perseverance in the face of challenges is truly admirable.",
	# # "Minsan nakakalito ang pagkasunod-sunod ng mga topics",
	# # "hindi mo maasahan sa bawat tanong",
	# # "hindi sobrang magaling magturo si sir",
	# # "not so bad, he teaches not very bad",
	# # ]


	# # print(nb_clf(sample_comments))

	def read_bytes_from_url(url):
	response = requests.get(url)
	if response.status_code == 200:
	# Read the content as bytes
	pickle_bytes = response.content

	# Load LabelEncoder from the bytes
	with gzip.open(io.BytesIO(pickle_bytes), 'rb') as label_encoder_file:
	value = pickle.load(label_encoder_file)
	return value
	else:
	print(f"Failed to fetch URL: {url}. Status code: {response.status_code}")
	return None

	st.cache()
	def nb_clf(sample_comments):
	## Load LabelEncoder locally
	# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/label_encoder.pkl', 'rb') as label_encoder_file:
	# le = pickle.load(label_encoder_file)

	## Load the le Hugging face hub
	le = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/label_encoder.pkl')

	# ## Load the trained Naive Bayes classifier locally
	# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/naive_bayes_sentiment_model.pkl', 'rb') as model_file:
	# loaded_nb_model = pickle.load(model_file)

	## Load the loaded_nb_model Hugging face hub
	loaded_nb_model = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/model.pkl')

	# ## Load the CountVectorizer locally
	# with gzip.open(r'/home/aibo/prototype_v1/NAIVE_BAYES/vectorizer.pkl', 'rb') as vectorizer_file:
	# loaded_vectorizer = pickle.load(vectorizer_file)

	## Load the CountVectorizer Hugging face hub
	loaded_vectorizer = read_bytes_from_url('https://huggingface.co/MENG21/studfacultyeval-NAIVEBAYES/resolve/main/vectorizer.pkl')

	# Apply text preprocessing to the sample comments
	sample_comments_preprocessed = [preprocess_text(comment) for comment in sample_comments]

	# Transform the sample comments using the same CountVectorizer
	sample_comments_transformed = loaded_vectorizer.transform(sample_comments_preprocessed)

	# Use the loaded model for inference
	predicted_proba = loaded_nb_model.predict_proba(sample_comments_transformed)

	# Get predicted labels
	predicted_labels = loaded_nb_model.predict(sample_comments_transformed)
	predicted_sentiment_label = le.inverse_transform(predicted_labels)

	# Construct dictionary for predicted probabilities
	predicted_proba_dict = []
	for prob_array in predicted_proba:
	prob_dict = {'positive': prob_array[1]100, 'negative': prob_array[0]100}
	predicted_proba_dict.append(prob_dict)

	# Return sample comments, predicted labels, and classification scores
	# st.text(predicted_proba_dict)
	return sample_comments, predicted_sentiment_label, predicted_proba_dict

	# sample_comments = [Your list of sample comments here]

	# Uncomment the following line to print the results
	# print(nb_clf(sample_comments))