Spaces:

MENG21
/

studfaceval

Paused

File size: 12,568 Bytes

import gspread
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
from transformers import BertForSequenceClassification, BertTokenizer
import torch
import streamlit as st
from matplotlib import pyplot as plt
import numpy as np
from wordcloud import WordCloud
# from PIL import ImageFont
from app5_selectbox.langchain_llama_gpu import llm_chain

# # Load the model and tokenizer
# model = BertForSequenceClassification.from_pretrained("./sentiment_model")
# tokenizer = BertTokenizer.from_pretrained("./sentiment_model")

def eval_analysis(Instructor, Instructor_comment, criteria_results):
    # # Authenticate with Google Sheets API
    # scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
    # creds = ServiceAccountCredentials.from_json_keyfile_name('dataset-401003-7325e98039a4.json', scope)
    # client = gspread.authorize(creds)

    # # Open the spreadsheet by its title
    # spreadsheet = client.open('survey (Responses)')

    # # Select a specific worksheet
    # worksheet = spreadsheet.worksheet('Form Responses 1')

    # # Read data from the worksheet
    # data = worksheet.get_all_values()

    # # Create a Pandas DataFrame from the data
    # df = pd.DataFrame(data[1:], columns=data[0])  # Assuming the first row contains column headers
    # df = df.iloc[:, [1, 2]]  # Filter columns

    # #
    # instructor_list = df.iloc[:, 0].unique()
    # instructor_list = sorted(instructor_list)
    # # print(instructor_list)

    # # Create a dropdown widget in the sidebar
    # option = st.sidebar.selectbox("Select an option", instructor_list)

    # # Filter rows containing "Instructor 1"
    # Instructor = df[df['Instructor'] == option]
    # Instructor_comment = Instructor['comment'].tolist()
    # #####################################################  BERT MODEL
    # def perform_sentiment_analysis(text):
    #     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    #     with torch.no_grad():
    #         outputs = model(**inputs)
    #     logits = outputs.logits
    #     predicted_class = torch.argmax(logits, dim=1).item()
    #     sentiment_labels = ["negative", "neutral", "positive"]
    #     sentiment = sentiment_labels[predicted_class]
    #     return sentiment


    # from transformers import BertForSequenceClassification, BertTokenizer

    # # Load the model and tokenizer
    # model = BertForSequenceClassification.from_pretrained("./sentiment_model")
    # tokenizer = BertTokenizer.from_pretrained("./sentiment_model")

    # # sample_texts_tfidf = vectorizer.transform(sample_texts)
    # # sample_predictions = classifier.predict(sample_texts_tfidf)

    # sample_predictions = []

    # # Initialize counters for sentiment classes
    # negative_count = 0
    # neutral_count = 0
    # positive_count = 0


    # for text in Instructor_comment:
    #     predicted_class = perform_sentiment_analysis(text)
    #     print(f"Text: {text}")
    #     print(f"Predicted Sentiment: {predicted_class}")
    #     sample_predictions.append(predicted_class)
    #     if predicted_class == "negative":
    #         negative_count += 1
    #     elif predicted_class == "neutral":
    #         neutral_count += 1
    #     else:
    #         positive_count += 1

    # print(f'negative_count {negative_count}')
    # print(f'neutral_count {neutral_count}')
    # print(f'positive_count {positive_count}')

    ###################################################  scikit learn model

    # import joblib 
    # # Load the model and vectorizer for predictions
    # loaded_model, loaded_vectorizer = joblib.load("MultinomialNB_Sentiment.pkl")

    # # Transform the new text data using the loaded vectorizer
    # new_text_features = loaded_vectorizer.transform(Instructor_comment)

    # # Make predictions using the loaded model
    # predicted_class = loaded_model.predict(new_text_features)
    # # print(f"Predicted class: {predicted_class}")

    # sample_predictions = []

    # # Initialize counters for sentiment classes
    # negative_count = 0
    # neutral_count = 0
    # positive_count = 0


    # for text, prediction in zip(Instructor_comment, predicted_class):
    #     print(f"Text: {text}")
    #     print(f"Predicted Sentiment: {prediction}")
    #     sample_predictions.append(prediction)
    #     if prediction == "negative":
    #         negative_count += 1
    #     elif prediction == "neutral":
    #         neutral_count += 1
    #     else:
    #         positive_count += 1
            
    # print(f'negative_count {negative_count}')
    # print(f'neutral_count {neutral_count}')
    # print(f'positive_count {positive_count}')

    ################################################### bert2 model
    import torch
    from transformers import BertTokenizer, BertForSequenceClassification
    import numpy as np

    # Load the saved model
    loaded_model = BertForSequenceClassification.from_pretrained('sentiment_model')
    tokenizerr = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)


    # Encode the sample comments
    sample_encodings = tokenizerr(list(Instructor_comment), truncation=True, padding=True, max_length=128, return_tensors='pt')

    # Make predictions on the sample comments
    sample_input_ids = sample_encodings['input_ids']
    sample_attention_mask = sample_encodings['attention_mask']

    with torch.no_grad():
        sample_outputs = loaded_model(sample_input_ids, attention_mask=sample_attention_mask)

    # Get predicted labels
    sample_logits = sample_outputs.logits
    sample_predictions = np.argmax(sample_logits, axis=1)

    # Map predicted labels back to sentiment labels
    sentiment_labels = ['negative', 'positive']
    predicted_sentiments = [sentiment_labels[label] for label in sample_predictions]

    # # Print the comments and predicted sentiments
    # for comment, sentiment in zip(Instructor_comment, predicted_sentiments):
    #     print(f"Comment: {comment}")
    #     print(f"Predicted Sentiment: {sentiment}")
    #     print()

    sample_predictions = []

    # Initialize counters for sentiment classes
    negative_count = 0
    neutral_count = 0
    positive_count = 0


    for text, prediction in zip(Instructor_comment, predicted_sentiments):
        print(f"Text: {text}")
        print(f"Predicted Sentiment: {prediction}")
        sample_predictions.append(prediction)
        if prediction == "negative":
            negative_count += 1
        elif prediction == "neutral":
            neutral_count += 1
        else:
            positive_count += 1
            
    print(f'negative_count {negative_count}')
    print(f'neutral_count {neutral_count}')
    print(f'positive_count {positive_count}')

    ###################################################

    # Create a Streamlit app
    st.title("Sentiment Analysis Dashboard")
    st.sidebar.header("Settings")
    
    link_text = "Instructor Survey"
    link_url = "https://forms.gle/64n9CXMDRP2NYgZYA"
    st.sidebar.markdown(f"[{link_text}]({link_url})")


    # Display sentiment counts
    st.write("### Sentiment Counts")
    st.write(f"Negative: {negative_count}")
    # st.write(f"Neutral: {neutral_count}")
    st.write(f"Positive: {positive_count}")

    # Plot sentiment distribution
    sentiment_counts = pd.Series(np.array(sample_predictions)).value_counts()
    desired_order = ['positive', 
                    #  'neutral', 
                    'negative']
    sentiment_counts = sentiment_counts.reindex(desired_order, fill_value=0)
    percentage_distribution = sentiment_counts / len(sample_predictions) * 100

    st.write("### Sentiment Distribution")
    fig, ax = plt.subplots(figsize=(8, 6))
    bars = plt.bar(percentage_distribution.index, sentiment_counts.values, color=['green', 'orange', 'red'])
    plt.xlabel('Sentiment')
    plt.ylabel('Count')
    plt.title('Sentiment Distribution in Sample Predictions')
    plt.xticks(rotation=45)
    for bar, percentage, des_order in zip(bars, percentage_distribution, desired_order):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width() / 2, height, f'{percentage:.2f}% {des_order.upper()}', ha='center', va='bottom')
    st.pyplot(fig)

    st.set_option('deprecation.showPyplotGlobalUse', False)

    # Generate word clouds based on sentiment categories
    sentiment_texts = {
        'positive': [],
        # 'neutral': [],
        'negative': []
    }

    for text, sentiment in zip(Instructor_comment, sample_predictions):
        sentiment_texts[sentiment].append(text)
        
    text_for_llama = ""
    
    for sentiment, texts in sentiment_texts.items():
        combined_texts = ' '.join(texts)
        combined_texts = combined_texts.split()
        filtered_words = [word for word in combined_texts if len(word) > 2]
        combined_texts = ' '.join(filtered_words)
        if combined_texts =="": continue
        # Load your custom TrueType font using PIL 
        font_path = "QuartzoBold-W9lv.ttf"  # Replace with the path to your TTF font file
        # custom_font = ImageFont.truetyp   e(font_path)  # Adjust the font size as needed
        # Set the font family to use the TrueType font
        # font = ImageFont.truetype(font_path)

        wordcloud = WordCloud(font_path=font_path,width=800, height=600, background_color='white', max_words=15).generate(combined_texts)
        st.write(f"### Word Cloud for {sentiment} Sentiment")
        plt.figure(figsize=(10, 6))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.axis('off')
        st.pyplot()
        
        if sentiment == "negative": 
            # Extract the text from the word cloud object
            generated_text = wordcloud.words_

            # Print the generated text
            for word, frequency in generated_text.items():
                # print(f"{word}: {frequency}")
                text_for_llama += str(word)+" "


    # Generate a word cloud from all the text data
    all_text = ' '.join(Instructor_comment)
    all_text = all_text.split()
    filtered_words = [word for word in all_text if len(word) > 3]
    all_text = ' '.join(filtered_words)

    st.write("### Word Cloud for All Sentiments")
    wordcloud = WordCloud(font_path=font_path, width=800, height=800, background_color='white', max_words=200).generate(all_text)
    plt.figure(figsize=(8, 8), facecolor=None)
    plt.imshow(wordcloud)
    plt.axis("off")
    st.pyplot()

    neg_comments = []
    pos_comments = []
    # Print the comments and predicted sentiments
    for comment, sentiment in zip(Instructor_comment, predicted_sentiments):
        if sentiment == "positive": pos_comments.append(comment)
        else: neg_comments.append(comment)

    text_for_llama = text_for_llama.split()
    text_for_llama = ", ".join(text_for_llama)
    text_for_llama = f"""
    Based from these students' feedback: {str(text_for_llama)}. \n
    Please generate a recommendation to the instructor. Make it in sentence type and in English language only.
    Then give insights about the evaluation report based from different criteria.
    Here is the results: {criteria_results}
    Your response format-
    Recommendation to Instructor:
    Insights on Evaluation Report:
    
    """
    
    prompt = text_for_llama
    # # ================================================ replicate.com
    # CUDA_LAUNCH_BLOCKING=1
    # import replicate
    # replicate = replicate.Client(api_token='r8_M9Dx8VYKkuTcw1o39d4Yw0HtpWFt4k239ebvW')
    # output = replicate.run(
    #     # "meta/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1",
    #     "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
    #     input={"prompt": prompt}
    # )
    # # The meta/llama-2-70b-chat model can stream output as it's running.
    # # The predict method returns an iterator, and you can iterate over that output.
    # # ================================================
    

    # st.title('Recommendation:')
    # llama_output = ""
    # with st.spinner("Generating Recommendation"):
    #     loading_text = st.empty()
    #     for item in reponse(prompt):
    #         llama_output +=item 
    #         loading_text.write(llama_output)
    # st.success("Generation Complete!")

    st.title('Recommendation:')
    llama_output = ""
    with st.spinner("Generating Recommendation"):
        st.write(llm_chain.run(prompt))
    st.success("Generation Complete!")