Spaces:

MENG21
/

studfaceval

Paused

studfaceval / app5_selectbox /evaluation_analysis copy.py

9d38733 almost 2 years ago

12.6 kB

	import gspread
	import pandas as pd
	from oauth2client.service_account import ServiceAccountCredentials
	from transformers import BertForSequenceClassification, BertTokenizer
	import torch
	import streamlit as st
	from matplotlib import pyplot as plt
	import numpy as np
	from wordcloud import WordCloud
	# from PIL import ImageFont
	from app5_selectbox.langchain_llama_gpu import llm_chain

	# # Load the model and tokenizer
	# model = BertForSequenceClassification.from_pretrained("./sentiment_model")
	# tokenizer = BertTokenizer.from_pretrained("./sentiment_model")

	def eval_analysis(Instructor, Instructor_comment, criteria_results):
	# # Authenticate with Google Sheets API
	# scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
	# creds = ServiceAccountCredentials.from_json_keyfile_name('dataset-401003-7325e98039a4.json', scope)
	# client = gspread.authorize(creds)

	# # Open the spreadsheet by its title
	# spreadsheet = client.open('survey (Responses)')

	# # Select a specific worksheet
	# worksheet = spreadsheet.worksheet('Form Responses 1')

	# # Read data from the worksheet
	# data = worksheet.get_all_values()

	# # Create a Pandas DataFrame from the data
	# df = pd.DataFrame(data[1:], columns=data[0]) # Assuming the first row contains column headers
	# df = df.iloc[:, [1, 2]] # Filter columns

	# #
	# instructor_list = df.iloc[:, 0].unique()
	# instructor_list = sorted(instructor_list)
	# # print(instructor_list)

	# # Create a dropdown widget in the sidebar
	# option = st.sidebar.selectbox("Select an option", instructor_list)

	# # Filter rows containing "Instructor 1"
	# Instructor = df[df['Instructor'] == option]
	# Instructor_comment = Instructor['comment'].tolist()
	# ##################################################### BERT MODEL
	# def perform_sentiment_analysis(text):
	# inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
	# with torch.no_grad():
	# outputs = model(**inputs)
	# logits = outputs.logits
	# predicted_class = torch.argmax(logits, dim=1).item()
	# sentiment_labels = ["negative", "neutral", "positive"]
	# sentiment = sentiment_labels[predicted_class]
	# return sentiment


	# from transformers import BertForSequenceClassification, BertTokenizer

	# # Load the model and tokenizer
	# model = BertForSequenceClassification.from_pretrained("./sentiment_model")
	# tokenizer = BertTokenizer.from_pretrained("./sentiment_model")

	# # sample_texts_tfidf = vectorizer.transform(sample_texts)
	# # sample_predictions = classifier.predict(sample_texts_tfidf)

	# sample_predictions = []

	# # Initialize counters for sentiment classes
	# negative_count = 0
	# neutral_count = 0
	# positive_count = 0


	# for text in Instructor_comment:
	# predicted_class = perform_sentiment_analysis(text)
	# print(f"Text: {text}")
	# print(f"Predicted Sentiment: {predicted_class}")
	# sample_predictions.append(predicted_class)
	# if predicted_class == "negative":
	# negative_count += 1
	# elif predicted_class == "neutral":
	# neutral_count += 1
	# else:
	# positive_count += 1

	# print(f'negative_count {negative_count}')
	# print(f'neutral_count {neutral_count}')
	# print(f'positive_count {positive_count}')

	################################################### scikit learn model

	# import joblib
	# # Load the model and vectorizer for predictions
	# loaded_model, loaded_vectorizer = joblib.load("MultinomialNB_Sentiment.pkl")

	# # Transform the new text data using the loaded vectorizer
	# new_text_features = loaded_vectorizer.transform(Instructor_comment)

	# # Make predictions using the loaded model
	# predicted_class = loaded_model.predict(new_text_features)
	# # print(f"Predicted class: {predicted_class}")

	# sample_predictions = []

	# # Initialize counters for sentiment classes
	# negative_count = 0
	# neutral_count = 0
	# positive_count = 0


	# for text, prediction in zip(Instructor_comment, predicted_class):
	# print(f"Text: {text}")
	# print(f"Predicted Sentiment: {prediction}")
	# sample_predictions.append(prediction)
	# if prediction == "negative":
	# negative_count += 1
	# elif prediction == "neutral":
	# neutral_count += 1
	# else:
	# positive_count += 1

	# print(f'negative_count {negative_count}')
	# print(f'neutral_count {neutral_count}')
	# print(f'positive_count {positive_count}')

	################################################### bert2 model
	import torch
	from transformers import BertTokenizer, BertForSequenceClassification
	import numpy as np

	# Load the saved model
	loaded_model = BertForSequenceClassification.from_pretrained('sentiment_model')
	tokenizerr = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)


	# Encode the sample comments
	sample_encodings = tokenizerr(list(Instructor_comment), truncation=True, padding=True, max_length=128, return_tensors='pt')

	# Make predictions on the sample comments
	sample_input_ids = sample_encodings['input_ids']
	sample_attention_mask = sample_encodings['attention_mask']

	with torch.no_grad():
	sample_outputs = loaded_model(sample_input_ids, attention_mask=sample_attention_mask)

	# Get predicted labels
	sample_logits = sample_outputs.logits
	sample_predictions = np.argmax(sample_logits, axis=1)

	# Map predicted labels back to sentiment labels
	sentiment_labels = ['negative', 'positive']
	predicted_sentiments = [sentiment_labels[label] for label in sample_predictions]

	# # Print the comments and predicted sentiments
	# for comment, sentiment in zip(Instructor_comment, predicted_sentiments):
	# print(f"Comment: {comment}")
	# print(f"Predicted Sentiment: {sentiment}")
	# print()

	sample_predictions = []

	# Initialize counters for sentiment classes
	negative_count = 0
	neutral_count = 0
	positive_count = 0


	for text, prediction in zip(Instructor_comment, predicted_sentiments):
	print(f"Text: {text}")
	print(f"Predicted Sentiment: {prediction}")
	sample_predictions.append(prediction)
	if prediction == "negative":
	negative_count += 1
	elif prediction == "neutral":
	neutral_count += 1
	else:
	positive_count += 1

	print(f'negative_count {negative_count}')
	print(f'neutral_count {neutral_count}')
	print(f'positive_count {positive_count}')

	###################################################

	# Create a Streamlit app
	st.title("Sentiment Analysis Dashboard")
	st.sidebar.header("Settings")

	link_text = "Instructor Survey"
	link_url = "https://forms.gle/64n9CXMDRP2NYgZYA"
	st.sidebar.markdown(f"[{link_text}]({link_url})")


	# Display sentiment counts
	st.write("### Sentiment Counts")
	st.write(f"Negative: {negative_count}")
	# st.write(f"Neutral: {neutral_count}")
	st.write(f"Positive: {positive_count}")

	# Plot sentiment distribution
	sentiment_counts = pd.Series(np.array(sample_predictions)).value_counts()
	desired_order = ['positive',
	# 'neutral',
	'negative']
	sentiment_counts = sentiment_counts.reindex(desired_order, fill_value=0)
	percentage_distribution = sentiment_counts / len(sample_predictions) * 100

	st.write("### Sentiment Distribution")
	fig, ax = plt.subplots(figsize=(8, 6))
	bars = plt.bar(percentage_distribution.index, sentiment_counts.values, color=['green', 'orange', 'red'])
	plt.xlabel('Sentiment')
	plt.ylabel('Count')
	plt.title('Sentiment Distribution in Sample Predictions')
	plt.xticks(rotation=45)
	for bar, percentage, des_order in zip(bars, percentage_distribution, desired_order):
	height = bar.get_height()
	ax.text(bar.get_x() + bar.get_width() / 2, height, f'{percentage:.2f}% {des_order.upper()}', ha='center', va='bottom')
	st.pyplot(fig)

	st.set_option('deprecation.showPyplotGlobalUse', False)

	# Generate word clouds based on sentiment categories
	sentiment_texts = {
	'positive': [],
	# 'neutral': [],
	'negative': []
	}

	for text, sentiment in zip(Instructor_comment, sample_predictions):
	sentiment_texts[sentiment].append(text)

	text_for_llama = ""

	for sentiment, texts in sentiment_texts.items():
	combined_texts = ' '.join(texts)
	combined_texts = combined_texts.split()
	filtered_words = [word for word in combined_texts if len(word) > 2]
	combined_texts = ' '.join(filtered_words)
	if combined_texts =="": continue
	# Load your custom TrueType font using PIL
	font_path = "QuartzoBold-W9lv.ttf" # Replace with the path to your TTF font file
	# custom_font = ImageFont.truetyp e(font_path) # Adjust the font size as needed
	# Set the font family to use the TrueType font
	# font = ImageFont.truetype(font_path)

	wordcloud = WordCloud(font_path=font_path,width=800, height=600, background_color='white', max_words=15).generate(combined_texts)
	st.write(f"### Word Cloud for {sentiment} Sentiment")
	plt.figure(figsize=(10, 6))
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis('off')
	st.pyplot()

	if sentiment == "negative":
	# Extract the text from the word cloud object
	generated_text = wordcloud.words_

	# Print the generated text
	for word, frequency in generated_text.items():
	# print(f"{word}: {frequency}")
	text_for_llama += str(word)+" "


	# Generate a word cloud from all the text data
	all_text = ' '.join(Instructor_comment)
	all_text = all_text.split()
	filtered_words = [word for word in all_text if len(word) > 3]
	all_text = ' '.join(filtered_words)

	st.write("### Word Cloud for All Sentiments")
	wordcloud = WordCloud(font_path=font_path, width=800, height=800, background_color='white', max_words=200).generate(all_text)
	plt.figure(figsize=(8, 8), facecolor=None)
	plt.imshow(wordcloud)
	plt.axis("off")
	st.pyplot()

	neg_comments = []
	pos_comments = []
	# Print the comments and predicted sentiments
	for comment, sentiment in zip(Instructor_comment, predicted_sentiments):
	if sentiment == "positive": pos_comments.append(comment)
	else: neg_comments.append(comment)

	text_for_llama = text_for_llama.split()
	text_for_llama = ", ".join(text_for_llama)
	text_for_llama = f"""
	Based from these students' feedback: {str(text_for_llama)}. \n
	Please generate a recommendation to the instructor. Make it in sentence type and in English language only.
	Then give insights about the evaluation report based from different criteria.
	Here is the results: {criteria_results}
	Your response format-
	Recommendation to Instructor:
	Insights on Evaluation Report:

	"""

	prompt = text_for_llama
	# # ================================================ replicate.com
	# CUDA_LAUNCH_BLOCKING=1
	# import replicate
	# replicate = replicate.Client(api_token='r8_M9Dx8VYKkuTcw1o39d4Yw0HtpWFt4k239ebvW')
	# output = replicate.run(
	# # "meta/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1",
	# "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
	# input={"prompt": prompt}
	# )
	# # The meta/llama-2-70b-chat model can stream output as it's running.
	# # The predict method returns an iterator, and you can iterate over that output.
	# # ================================================


	# st.title('Recommendation:')
	# llama_output = ""
	# with st.spinner("Generating Recommendation"):
	# loading_text = st.empty()
	# for item in reponse(prompt):
	# llama_output +=item
	# loading_text.write(llama_output)
	# st.success("Generation Complete!")

	st.title('Recommendation:')
	llama_output = ""
	with st.spinner("Generating Recommendation"):
	st.write(llm_chain.run(prompt))
	st.success("Generation Complete!")