studfaceval / app5_selectbox /evaluation_analysis copy.py
MENG21's picture
9d38733
import gspread
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
from transformers import BertForSequenceClassification, BertTokenizer
import torch
import streamlit as st
from matplotlib import pyplot as plt
import numpy as np
from wordcloud import WordCloud
# from PIL import ImageFont
from app5_selectbox.langchain_llama_gpu import llm_chain
# # Load the model and tokenizer
# model = BertForSequenceClassification.from_pretrained("./sentiment_model")
# tokenizer = BertTokenizer.from_pretrained("./sentiment_model")
def eval_analysis(Instructor, Instructor_comment, criteria_results):
# # Authenticate with Google Sheets API
# scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
# creds = ServiceAccountCredentials.from_json_keyfile_name('dataset-401003-7325e98039a4.json', scope)
# client = gspread.authorize(creds)
# # Open the spreadsheet by its title
# spreadsheet = client.open('survey (Responses)')
# # Select a specific worksheet
# worksheet = spreadsheet.worksheet('Form Responses 1')
# # Read data from the worksheet
# data = worksheet.get_all_values()
# # Create a Pandas DataFrame from the data
# df = pd.DataFrame(data[1:], columns=data[0]) # Assuming the first row contains column headers
# df = df.iloc[:, [1, 2]] # Filter columns
# #
# instructor_list = df.iloc[:, 0].unique()
# instructor_list = sorted(instructor_list)
# # print(instructor_list)
# # Create a dropdown widget in the sidebar
# option = st.sidebar.selectbox("Select an option", instructor_list)
# # Filter rows containing "Instructor 1"
# Instructor = df[df['Instructor'] == option]
# Instructor_comment = Instructor['comment'].tolist()
# ##################################################### BERT MODEL
# def perform_sentiment_analysis(text):
# inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
# with torch.no_grad():
# outputs = model(**inputs)
# logits = outputs.logits
# predicted_class = torch.argmax(logits, dim=1).item()
# sentiment_labels = ["negative", "neutral", "positive"]
# sentiment = sentiment_labels[predicted_class]
# return sentiment
# from transformers import BertForSequenceClassification, BertTokenizer
# # Load the model and tokenizer
# model = BertForSequenceClassification.from_pretrained("./sentiment_model")
# tokenizer = BertTokenizer.from_pretrained("./sentiment_model")
# # sample_texts_tfidf = vectorizer.transform(sample_texts)
# # sample_predictions = classifier.predict(sample_texts_tfidf)
# sample_predictions = []
# # Initialize counters for sentiment classes
# negative_count = 0
# neutral_count = 0
# positive_count = 0
# for text in Instructor_comment:
# predicted_class = perform_sentiment_analysis(text)
# print(f"Text: {text}")
# print(f"Predicted Sentiment: {predicted_class}")
# sample_predictions.append(predicted_class)
# if predicted_class == "negative":
# negative_count += 1
# elif predicted_class == "neutral":
# neutral_count += 1
# else:
# positive_count += 1
# print(f'negative_count {negative_count}')
# print(f'neutral_count {neutral_count}')
# print(f'positive_count {positive_count}')
################################################### scikit learn model
# import joblib
# # Load the model and vectorizer for predictions
# loaded_model, loaded_vectorizer = joblib.load("MultinomialNB_Sentiment.pkl")
# # Transform the new text data using the loaded vectorizer
# new_text_features = loaded_vectorizer.transform(Instructor_comment)
# # Make predictions using the loaded model
# predicted_class = loaded_model.predict(new_text_features)
# # print(f"Predicted class: {predicted_class}")
# sample_predictions = []
# # Initialize counters for sentiment classes
# negative_count = 0
# neutral_count = 0
# positive_count = 0
# for text, prediction in zip(Instructor_comment, predicted_class):
# print(f"Text: {text}")
# print(f"Predicted Sentiment: {prediction}")
# sample_predictions.append(prediction)
# if prediction == "negative":
# negative_count += 1
# elif prediction == "neutral":
# neutral_count += 1
# else:
# positive_count += 1
# print(f'negative_count {negative_count}')
# print(f'neutral_count {neutral_count}')
# print(f'positive_count {positive_count}')
################################################### bert2 model
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import numpy as np
# Load the saved model
loaded_model = BertForSequenceClassification.from_pretrained('sentiment_model')
tokenizerr = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
# Encode the sample comments
sample_encodings = tokenizerr(list(Instructor_comment), truncation=True, padding=True, max_length=128, return_tensors='pt')
# Make predictions on the sample comments
sample_input_ids = sample_encodings['input_ids']
sample_attention_mask = sample_encodings['attention_mask']
with torch.no_grad():
sample_outputs = loaded_model(sample_input_ids, attention_mask=sample_attention_mask)
# Get predicted labels
sample_logits = sample_outputs.logits
sample_predictions = np.argmax(sample_logits, axis=1)
# Map predicted labels back to sentiment labels
sentiment_labels = ['negative', 'positive']
predicted_sentiments = [sentiment_labels[label] for label in sample_predictions]
# # Print the comments and predicted sentiments
# for comment, sentiment in zip(Instructor_comment, predicted_sentiments):
# print(f"Comment: {comment}")
# print(f"Predicted Sentiment: {sentiment}")
# print()
sample_predictions = []
# Initialize counters for sentiment classes
negative_count = 0
neutral_count = 0
positive_count = 0
for text, prediction in zip(Instructor_comment, predicted_sentiments):
print(f"Text: {text}")
print(f"Predicted Sentiment: {prediction}")
sample_predictions.append(prediction)
if prediction == "negative":
negative_count += 1
elif prediction == "neutral":
neutral_count += 1
else:
positive_count += 1
print(f'negative_count {negative_count}')
print(f'neutral_count {neutral_count}')
print(f'positive_count {positive_count}')
###################################################
# Create a Streamlit app
st.title("Sentiment Analysis Dashboard")
st.sidebar.header("Settings")
link_text = "Instructor Survey"
link_url = "https://forms.gle/64n9CXMDRP2NYgZYA"
st.sidebar.markdown(f"[{link_text}]({link_url})")
# Display sentiment counts
st.write("### Sentiment Counts")
st.write(f"Negative: {negative_count}")
# st.write(f"Neutral: {neutral_count}")
st.write(f"Positive: {positive_count}")
# Plot sentiment distribution
sentiment_counts = pd.Series(np.array(sample_predictions)).value_counts()
desired_order = ['positive',
# 'neutral',
'negative']
sentiment_counts = sentiment_counts.reindex(desired_order, fill_value=0)
percentage_distribution = sentiment_counts / len(sample_predictions) * 100
st.write("### Sentiment Distribution")
fig, ax = plt.subplots(figsize=(8, 6))
bars = plt.bar(percentage_distribution.index, sentiment_counts.values, color=['green', 'orange', 'red'])
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.title('Sentiment Distribution in Sample Predictions')
plt.xticks(rotation=45)
for bar, percentage, des_order in zip(bars, percentage_distribution, desired_order):
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width() / 2, height, f'{percentage:.2f}% {des_order.upper()}', ha='center', va='bottom')
st.pyplot(fig)
st.set_option('deprecation.showPyplotGlobalUse', False)
# Generate word clouds based on sentiment categories
sentiment_texts = {
'positive': [],
# 'neutral': [],
'negative': []
}
for text, sentiment in zip(Instructor_comment, sample_predictions):
sentiment_texts[sentiment].append(text)
text_for_llama = ""
for sentiment, texts in sentiment_texts.items():
combined_texts = ' '.join(texts)
combined_texts = combined_texts.split()
filtered_words = [word for word in combined_texts if len(word) > 2]
combined_texts = ' '.join(filtered_words)
if combined_texts =="": continue
# Load your custom TrueType font using PIL
font_path = "QuartzoBold-W9lv.ttf" # Replace with the path to your TTF font file
# custom_font = ImageFont.truetyp e(font_path) # Adjust the font size as needed
# Set the font family to use the TrueType font
# font = ImageFont.truetype(font_path)
wordcloud = WordCloud(font_path=font_path,width=800, height=600, background_color='white', max_words=15).generate(combined_texts)
st.write(f"### Word Cloud for {sentiment} Sentiment")
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
st.pyplot()
if sentiment == "negative":
# Extract the text from the word cloud object
generated_text = wordcloud.words_
# Print the generated text
for word, frequency in generated_text.items():
# print(f"{word}: {frequency}")
text_for_llama += str(word)+" "
# Generate a word cloud from all the text data
all_text = ' '.join(Instructor_comment)
all_text = all_text.split()
filtered_words = [word for word in all_text if len(word) > 3]
all_text = ' '.join(filtered_words)
st.write("### Word Cloud for All Sentiments")
wordcloud = WordCloud(font_path=font_path, width=800, height=800, background_color='white', max_words=200).generate(all_text)
plt.figure(figsize=(8, 8), facecolor=None)
plt.imshow(wordcloud)
plt.axis("off")
st.pyplot()
neg_comments = []
pos_comments = []
# Print the comments and predicted sentiments
for comment, sentiment in zip(Instructor_comment, predicted_sentiments):
if sentiment == "positive": pos_comments.append(comment)
else: neg_comments.append(comment)
text_for_llama = text_for_llama.split()
text_for_llama = ", ".join(text_for_llama)
text_for_llama = f"""
Based from these students' feedback: {str(text_for_llama)}. \n
Please generate a recommendation to the instructor. Make it in sentence type and in English language only.
Then give insights about the evaluation report based from different criteria.
Here is the results: {criteria_results}
Your response format-
Recommendation to Instructor:
Insights on Evaluation Report:
"""
prompt = text_for_llama
# # ================================================ replicate.com
# CUDA_LAUNCH_BLOCKING=1
# import replicate
# replicate = replicate.Client(api_token='r8_M9Dx8VYKkuTcw1o39d4Yw0HtpWFt4k239ebvW')
# output = replicate.run(
# # "meta/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1",
# "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
# input={"prompt": prompt}
# )
# # The meta/llama-2-70b-chat model can stream output as it's running.
# # The predict method returns an iterator, and you can iterate over that output.
# # ================================================
# st.title('Recommendation:')
# llama_output = ""
# with st.spinner("Generating Recommendation"):
# loading_text = st.empty()
# for item in reponse(prompt):
# llama_output +=item
# loading_text.write(llama_output)
# st.success("Generation Complete!")
st.title('Recommendation:')
llama_output = ""
with st.spinner("Generating Recommendation"):
st.write(llm_chain.run(prompt))
st.success("Generation Complete!")