Spaces:
Paused
Paused
| # import gspread | |
| import pandas as pd | |
| # from oauth2client.service_account import ServiceAccountCredentials | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import re | |
| import streamlit as st | |
| import hydralit_components as hc | |
| from matplotlib import pyplot as plt | |
| # import numpy as np | |
| from wordcloud import WordCloud | |
| import plotly.graph_objs as go | |
| # import plotly.express as px | |
| # import plotly.figure_factory as ff | |
| # from PIL import ImageFont | |
| # from app5_selectbox.langchain_llama_gpu import llm_chain | |
| from app5_selectbox.g4f_prompt import g4f_prompt | |
| # from app5_selectbox.llama2_prompt import llama_prompt | |
| from app5_selectbox.naive_bayes_cl import nb_clf | |
| # from HF_inference import analyze_sintement | |
| from HF_pipeline import analyze_sintement | |
| models = ['BERT-BASE MODEL', 'BERT-LARGE MODEL', 'DISTILIBERT MODEL', 'NAIVE BAYES MODEL'] | |
| # old path | |
| # model_list = [ | |
| # r"/home/aibo/prototype_v1/BERT_BASE/bert_sentiment_model", | |
| # r"/home/aibo/prototype_v1/BERT_LARGE/bert_sentiment_model", | |
| # r"/home/aibo/prototype_v1/DISTILIBERT/bert_sentiment_model" | |
| # ] | |
| # new path | |
| model_list = [ | |
| r"/home/aibo/prototype_v1/HF_MODELS/HUB/stud-fac-eval-bert-base-uncased", | |
| r"/home/aibo/prototype_v1/HF_MODELS/HUB/stud-fac-eval-bert-large-uncased", | |
| r"/home/aibo/prototype_v1/HF_MODELS/HUB/stud-fac-eval-distilbert-base-uncased", | |
| ] | |
| model_tokenizer_list = ['bert-base-uncased', 'bert-large-uncased', 'distilbert-base-uncased'] | |
| selected_model = 0 | |
| llama2_g4f = False # true == llama2 | |
| # if 'chkbx_selected_model' in st.session_state: | |
| # st.write("selected model: ",models.index(st.session_state.chkbx_selected_model)) | |
| # if 'chkbx_selected_model' not in st.session_state: | |
| # st.write("no selected!") | |
| def clean_text(text_list): | |
| cleaned_samples = [] | |
| for text_sample in text_list: | |
| # Case folding and normalization | |
| cleaned_text = str(text_sample).lower() | |
| # Removing non-alphabetic characters | |
| cleaned_text = re.sub(r'[^a-zA-Z\s]', '', cleaned_text) | |
| cleaned_samples.append(cleaned_text) | |
| return cleaned_samples | |
| # # local model | |
| # def classify_sentiments(text_samples, tokenizer, model): | |
| # instructor_comments = [] | |
| # predicted_sentiments = [] | |
| # predicted_sentiments_scores = [] | |
| # # Iterate through the text samples and classify the sentiment | |
| # for idx, text_sample in enumerate(text_samples): | |
| # # Tokenize the text sample | |
| # inputs = tokenizer(text_sample, return_tensors="pt") | |
| # # Perform sentiment classification | |
| # outputs = model(**inputs) | |
| # # Get the predicted sentiment (positive/negative) | |
| # predicted_class = torch.argmax(outputs.logits, dim=1).item() | |
| # # Get the probabilities for each class | |
| # probabilities = torch.softmax(outputs.logits, dim=1).tolist()[0] | |
| # # Store results | |
| # instructor_comments.append(text_sample) | |
| # predicted_sentiments.append("positive" if predicted_class == 1 else "negative") | |
| # predicted_sentiments_scores.append({"positive": probabilities[1]*100, "negative": probabilities[0]*100}) | |
| # return instructor_comments, predicted_sentiments, predicted_sentiments_scores | |
| # inference | |
| def classify_sentiments(text_samples, model): | |
| instructor_comments = [] | |
| predicted_sentiments = [] | |
| predicted_sentiments_scores = [] | |
| # text = ["i love this", "nice one!", "happy!"] | |
| selected_model = model | |
| results = [analyze_sintement(t, selected_model) for t in text_samples] | |
| for idx, result in enumerate(results): | |
| # st.text(result[0]) | |
| # predicted_class, probabilities = analyze_sintement(text_sample, model) | |
| # Store results | |
| instructor_comments.append(text_samples[idx]) | |
| predicted_sentiments.append("positive" if result[0] == "LABEL_1" else "negative") | |
| predicted_sentiments_scores.append({"positive": result[1]*100, "negative": 100-(result[1]*100)}) | |
| # st.write(instructor_comments) | |
| return instructor_comments, predicted_sentiments, predicted_sentiments_scores | |
| def calculate_average_scores(probability_list): | |
| total_comments = len(probability_list) | |
| total_positive = 0 | |
| total_negative = 0 | |
| for prob_dict in probability_list: | |
| total_positive += prob_dict['positive'] | |
| total_negative += prob_dict['negative'] | |
| average_positive = total_positive / total_comments | |
| average_negative = total_negative / total_comments | |
| return average_positive, average_negative | |
| def eval_analysis(instructor, instructor_comment, criteria_results, selected_model): | |
| if selected_model < 3: | |
| ## local model | |
| # model = model_list[selected_model] | |
| # model_tokenizer = model_tokenizer_list[selected_model] | |
| # model_tokenizer = model_list[selected_model] | |
| # loaded_model = AutoModelForSequenceClassification.from_pretrained(model) | |
| # tokenizer = AutoTokenizer.from_pretrained(model_tokenizer) | |
| clean_instructor_comment = clean_text(instructor_comment) | |
| # print(models[selected_model]) | |
| # predicted_sentiments_transformer = classify_sentiments(clean_instructor_comment, tokenizer, loaded_model) # local model | |
| predicted_sentiments_transformer = classify_sentiments(clean_instructor_comment, models[selected_model]) # inference | |
| predicted_sentiments = predicted_sentiments_transformer[1] | |
| scores = predicted_sentiments_transformer[2] | |
| elif selected_model == 3: | |
| try: | |
| instructor_comment, predicted_sentiments, scores = nb_clf(instructor_comment) | |
| # scores = scores[1] | |
| except Exception as e: | |
| st.exception(e) | |
| else: pass | |
| sample_predictions = [] | |
| comments_data = [] | |
| negative_count = 0 | |
| neutral_count = 0 | |
| positive_count = 0 | |
| # average_sintement_score = np.average(scores['positive']) | |
| average_positive, average_negative = calculate_average_scores(scores) | |
| # st.text(calculate_average_scores(scores)) | |
| for text, prediction, score in zip(instructor_comment, predicted_sentiments, scores): | |
| sample_predictions.append(prediction) | |
| comments_data.append((text, prediction, score['positive'])) | |
| if prediction == "negative": | |
| negative_count += 1 | |
| elif prediction == "neutral": | |
| neutral_count += 1 | |
| else: | |
| positive_count += 1 | |
| sentiment_texts = { | |
| 'positive': [], | |
| 'negative': [] | |
| } | |
| for text, sentiment in zip(instructor_comment, sample_predictions): | |
| sentiment_texts[sentiment].append(text) | |
| text_for_llama = "" | |
| def sentiment_tbl(): | |
| # Create DataFrame | |
| comments_df = pd.DataFrame(instructor_comment, columns=["Comments"]) | |
| # Drop index | |
| comments_df_display = comments_df.copy() | |
| comments_df_display.reset_index(drop=True, inplace=True) | |
| # Create DataFrame | |
| comments_data_df = pd.DataFrame(comments_data, columns=["Comments", "Sentiment", "Score"]) | |
| # Define a function to apply row-wise styling | |
| def highlight_row(row): | |
| if row["Sentiment"] == "positive": | |
| return ['background-color: lightgreen'] * len(row) | |
| elif row["Sentiment"] == "negative": | |
| return ['background-color: lightcoral'] * len(row) | |
| else: | |
| return [''] * len(row) | |
| # Set index to start at 1 | |
| comments_data_df.index += 1 | |
| # Apply styling | |
| styled_df = comments_data_df.style.apply(highlight_row, axis=1) | |
| # Display styled DataFrame | |
| st.table(styled_df) | |
| theme_bad = {'bgcolor': '#FFF0F0','title_color': 'red','content_color': 'red','icon_color': 'red', 'icon': 'fa fa-times-circle'} | |
| theme_good = {'bgcolor': '#EFF8F7','title_color': 'green','content_color': 'green','icon_color': 'green', 'icon': 'fa fa-check-circle'} | |
| st.write(f"### SENTIMENTS/RECOMENDATION INSIGHTS") | |
| with st.expander("Sentiment Analysis"): | |
| st.title("Sentiment Analysis Dashboard") | |
| st.write(f"## Using {models[selected_model]}") | |
| st.write("### Sentiment Rating") | |
| cc = st.columns(2) | |
| with cc[0]: | |
| # can just use 'good', 'bad', 'neutral' sentiment to auto color the card | |
| hc.info_card(title='Positive', content=str(round(average_positive,6))+ '%', sentiment='good', bar_value=round(average_positive,6)) | |
| with cc[1]: | |
| hc.info_card(title='Negative', content=str(round(average_negative,6))+ '%', sentiment='bad', bar_value=round(average_negative,6)) | |
| # st.write(f"#### Positive: {positive_count} - {round(average_positive,6)} %") | |
| # st.write(f"#### Negative: {negative_count} - {round(average_negative,6)} %") | |
| # st.write("### Sentiment Rating") | |
| # st.write(f"#### Positive: {round(average_positive*100,2)} %") | |
| # st.write(f"#### Negative: {round(average_negative*100,2)} %") | |
| sentiment_counts = pd.Series(sample_predictions).value_counts() | |
| desired_order = ['positive', 'negative'] | |
| sentiment_counts = sentiment_counts.reindex(desired_order, fill_value=0) | |
| percentage_distribution = sentiment_counts / len(sample_predictions) * 100 | |
| sentiment_tbl() | |
| st.write("### Sentiment Distribution") | |
| fig = go.Figure(layout=dict( | |
| autosize=True, # Set autosize to True for automatic adjustment | |
| )) | |
| fig.add_trace(go.Bar( | |
| x=percentage_distribution.index, | |
| y=sentiment_counts.values, | |
| marker_color=['green', 'red'], | |
| text=[f'{percentage:.2f}% {des_order.upper()}' for percentage, des_order in zip(percentage_distribution, desired_order)], | |
| textposition='auto' | |
| )) | |
| fig.update_layout( | |
| width=600, | |
| height=500, | |
| xaxis=dict(title='Sentiment', tickangle=45), | |
| yaxis=dict(title='Count'), | |
| title='Sentiment Distribution in Sample Predictions', | |
| ) | |
| st.plotly_chart(fig) | |
| for sentiment, texts in sentiment_texts.items(): | |
| combined_texts = ' '.join(texts) | |
| combined_texts = combined_texts.split() | |
| filtered_words = [word for word in combined_texts if len(word) > 2] | |
| combined_texts = ' '.join(filtered_words) | |
| if combined_texts == "": | |
| continue | |
| font_path = "/home/aibo/prototype_v1/prototype/app5_selectbox/QuartzoBold-W9lv.ttf" | |
| wordcloud = WordCloud(font_path=font_path, width=800, height=600, background_color='white', max_words=15, min_word_length=3, stopwords={}).generate(combined_texts) | |
| st.write(f"### Word Cloud for {sentiment.capitalize()} Sentiment") | |
| plt.figure(figsize=(10, 6)) | |
| plt.imshow(wordcloud, interpolation='bilinear') | |
| plt.axis("off") | |
| wordcloud_fig = plt.gcf() | |
| st.pyplot(wordcloud_fig) | |
| if sentiment == "negative": | |
| text_for_llama = sentiment_texts[sentiment] | |
| # Generate a word cloud from all the text data | |
| all_text = ' '.join(instructor_comment) | |
| all_text = all_text.split() | |
| filtered_words = [word for word in all_text if len(word) > 2] | |
| all_text = ' '.join(filtered_words) | |
| st.write("### Word Cloud for All Sentiments") | |
| wordcloud = WordCloud(font_path=font_path, width=800, height=800, background_color='white', max_words=200, min_word_length=3, stopwords={}).generate(all_text) | |
| # Create a Matplotlib figure | |
| plt.figure(figsize=(8, 8)) | |
| plt.imshow(wordcloud, interpolation='bilinear') | |
| plt.axis("off") | |
| wordcloud_fig = plt.gcf() | |
| st.pyplot(wordcloud_fig) | |
| if text_for_llama == "": | |
| with st.expander("Expressing Gratitude and Dedication"): | |
| st.title("Expressing Gratitude and Dedication") | |
| text_for_llama = f""" | |
| There's no negative feedback or comment for the instructor; give him or her a short letter to say. | |
| [Your Name] = The Management | |
| [Instructor's Name] = {instructor} | |
| """ | |
| prompt = text_for_llama | |
| while True: | |
| try: | |
| with st.spinner("Generating...."): | |
| # if not llama2_g4f: st.write(g4f_prompt(prompt)) ################# | |
| # else: st.write(llama_prompt(prompt)) ################# | |
| st.success("Generation Complete!") | |
| break | |
| except Exception as e: | |
| pass | |
| else: | |
| with st.expander("Recommendation"): | |
| # st.title('Recommendation:') | |
| # text_for_llama = text_for_llama.split() | |
| text_for_llama = ", ".join(text_for_llama) | |
| text_for_llama = f""" | |
| Based on these students' feedback: {str(text_for_llama)}. \n | |
| Please generate a short letter to the instructor with ten recommendations in bullet format. Make it in sentence type and English only. | |
| Define the best letter's subject based on the recommendation. | |
| Subject is Recommendations for Effective Teaching | |
| Sender's Name is 'The Management' | |
| receiver's or Instructor's Name is {instructor} | |
| """ | |
| prompt = text_for_llama | |
| while True: | |
| try: | |
| with st.spinner("Generating...."): | |
| # if not llama2_g4f: st.write(g4f_prompt(prompt)) ################# | |
| # else: st.write(llama_prompt(prompt)) ################# | |
| st.success("Generation Complete!") | |
| break | |
| except Exception as e: | |
| pass |