File size: 12,568 Bytes
e4fe207
 
 
 
 
 
 
 
 
9d38733
e4fe207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
import gspread
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
from transformers import BertForSequenceClassification, BertTokenizer
import torch
import streamlit as st
from matplotlib import pyplot as plt
import numpy as np
from wordcloud import WordCloud
# from PIL import ImageFont
from app5_selectbox.langchain_llama_gpu import llm_chain

# # Load the model and tokenizer
# model = BertForSequenceClassification.from_pretrained("./sentiment_model")
# tokenizer = BertTokenizer.from_pretrained("./sentiment_model")

def eval_analysis(Instructor, Instructor_comment, criteria_results):
    # # Authenticate with Google Sheets API
    # scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
    # creds = ServiceAccountCredentials.from_json_keyfile_name('dataset-401003-7325e98039a4.json', scope)
    # client = gspread.authorize(creds)

    # # Open the spreadsheet by its title
    # spreadsheet = client.open('survey (Responses)')

    # # Select a specific worksheet
    # worksheet = spreadsheet.worksheet('Form Responses 1')

    # # Read data from the worksheet
    # data = worksheet.get_all_values()

    # # Create a Pandas DataFrame from the data
    # df = pd.DataFrame(data[1:], columns=data[0])  # Assuming the first row contains column headers
    # df = df.iloc[:, [1, 2]]  # Filter columns

    # #
    # instructor_list = df.iloc[:, 0].unique()
    # instructor_list = sorted(instructor_list)
    # # print(instructor_list)

    # # Create a dropdown widget in the sidebar
    # option = st.sidebar.selectbox("Select an option", instructor_list)

    # # Filter rows containing "Instructor 1"
    # Instructor = df[df['Instructor'] == option]
    # Instructor_comment = Instructor['comment'].tolist()
    # #####################################################  BERT MODEL
    # def perform_sentiment_analysis(text):
    #     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    #     with torch.no_grad():
    #         outputs = model(**inputs)
    #     logits = outputs.logits
    #     predicted_class = torch.argmax(logits, dim=1).item()
    #     sentiment_labels = ["negative", "neutral", "positive"]
    #     sentiment = sentiment_labels[predicted_class]
    #     return sentiment


    # from transformers import BertForSequenceClassification, BertTokenizer

    # # Load the model and tokenizer
    # model = BertForSequenceClassification.from_pretrained("./sentiment_model")
    # tokenizer = BertTokenizer.from_pretrained("./sentiment_model")

    # # sample_texts_tfidf = vectorizer.transform(sample_texts)
    # # sample_predictions = classifier.predict(sample_texts_tfidf)

    # sample_predictions = []

    # # Initialize counters for sentiment classes
    # negative_count = 0
    # neutral_count = 0
    # positive_count = 0


    # for text in Instructor_comment:
    #     predicted_class = perform_sentiment_analysis(text)
    #     print(f"Text: {text}")
    #     print(f"Predicted Sentiment: {predicted_class}")
    #     sample_predictions.append(predicted_class)
    #     if predicted_class == "negative":
    #         negative_count += 1
    #     elif predicted_class == "neutral":
    #         neutral_count += 1
    #     else:
    #         positive_count += 1

    # print(f'negative_count {negative_count}')
    # print(f'neutral_count {neutral_count}')
    # print(f'positive_count {positive_count}')

    ###################################################  scikit learn model

    # import joblib 
    # # Load the model and vectorizer for predictions
    # loaded_model, loaded_vectorizer = joblib.load("MultinomialNB_Sentiment.pkl")

    # # Transform the new text data using the loaded vectorizer
    # new_text_features = loaded_vectorizer.transform(Instructor_comment)

    # # Make predictions using the loaded model
    # predicted_class = loaded_model.predict(new_text_features)
    # # print(f"Predicted class: {predicted_class}")

    # sample_predictions = []

    # # Initialize counters for sentiment classes
    # negative_count = 0
    # neutral_count = 0
    # positive_count = 0


    # for text, prediction in zip(Instructor_comment, predicted_class):
    #     print(f"Text: {text}")
    #     print(f"Predicted Sentiment: {prediction}")
    #     sample_predictions.append(prediction)
    #     if prediction == "negative":
    #         negative_count += 1
    #     elif prediction == "neutral":
    #         neutral_count += 1
    #     else:
    #         positive_count += 1
            
    # print(f'negative_count {negative_count}')
    # print(f'neutral_count {neutral_count}')
    # print(f'positive_count {positive_count}')

    ################################################### bert2 model
    import torch
    from transformers import BertTokenizer, BertForSequenceClassification
    import numpy as np

    # Load the saved model
    loaded_model = BertForSequenceClassification.from_pretrained('sentiment_model')
    tokenizerr = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)


    # Encode the sample comments
    sample_encodings = tokenizerr(list(Instructor_comment), truncation=True, padding=True, max_length=128, return_tensors='pt')

    # Make predictions on the sample comments
    sample_input_ids = sample_encodings['input_ids']
    sample_attention_mask = sample_encodings['attention_mask']

    with torch.no_grad():
        sample_outputs = loaded_model(sample_input_ids, attention_mask=sample_attention_mask)

    # Get predicted labels
    sample_logits = sample_outputs.logits
    sample_predictions = np.argmax(sample_logits, axis=1)

    # Map predicted labels back to sentiment labels
    sentiment_labels = ['negative', 'positive']
    predicted_sentiments = [sentiment_labels[label] for label in sample_predictions]

    # # Print the comments and predicted sentiments
    # for comment, sentiment in zip(Instructor_comment, predicted_sentiments):
    #     print(f"Comment: {comment}")
    #     print(f"Predicted Sentiment: {sentiment}")
    #     print()

    sample_predictions = []

    # Initialize counters for sentiment classes
    negative_count = 0
    neutral_count = 0
    positive_count = 0


    for text, prediction in zip(Instructor_comment, predicted_sentiments):
        print(f"Text: {text}")
        print(f"Predicted Sentiment: {prediction}")
        sample_predictions.append(prediction)
        if prediction == "negative":
            negative_count += 1
        elif prediction == "neutral":
            neutral_count += 1
        else:
            positive_count += 1
            
    print(f'negative_count {negative_count}')
    print(f'neutral_count {neutral_count}')
    print(f'positive_count {positive_count}')

    ###################################################

    # Create a Streamlit app
    st.title("Sentiment Analysis Dashboard")
    st.sidebar.header("Settings")
    
    link_text = "Instructor Survey"
    link_url = "https://forms.gle/64n9CXMDRP2NYgZYA"
    st.sidebar.markdown(f"[{link_text}]({link_url})")


    # Display sentiment counts
    st.write("### Sentiment Counts")
    st.write(f"Negative: {negative_count}")
    # st.write(f"Neutral: {neutral_count}")
    st.write(f"Positive: {positive_count}")

    # Plot sentiment distribution
    sentiment_counts = pd.Series(np.array(sample_predictions)).value_counts()
    desired_order = ['positive', 
                    #  'neutral', 
                    'negative']
    sentiment_counts = sentiment_counts.reindex(desired_order, fill_value=0)
    percentage_distribution = sentiment_counts / len(sample_predictions) * 100

    st.write("### Sentiment Distribution")
    fig, ax = plt.subplots(figsize=(8, 6))
    bars = plt.bar(percentage_distribution.index, sentiment_counts.values, color=['green', 'orange', 'red'])
    plt.xlabel('Sentiment')
    plt.ylabel('Count')
    plt.title('Sentiment Distribution in Sample Predictions')
    plt.xticks(rotation=45)
    for bar, percentage, des_order in zip(bars, percentage_distribution, desired_order):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width() / 2, height, f'{percentage:.2f}% {des_order.upper()}', ha='center', va='bottom')
    st.pyplot(fig)

    st.set_option('deprecation.showPyplotGlobalUse', False)

    # Generate word clouds based on sentiment categories
    sentiment_texts = {
        'positive': [],
        # 'neutral': [],
        'negative': []
    }

    for text, sentiment in zip(Instructor_comment, sample_predictions):
        sentiment_texts[sentiment].append(text)
        
    text_for_llama = ""
    
    for sentiment, texts in sentiment_texts.items():
        combined_texts = ' '.join(texts)
        combined_texts = combined_texts.split()
        filtered_words = [word for word in combined_texts if len(word) > 2]
        combined_texts = ' '.join(filtered_words)
        if combined_texts =="": continue
        # Load your custom TrueType font using PIL 
        font_path = "QuartzoBold-W9lv.ttf"  # Replace with the path to your TTF font file
        # custom_font = ImageFont.truetyp   e(font_path)  # Adjust the font size as needed
        # Set the font family to use the TrueType font
        # font = ImageFont.truetype(font_path)

        wordcloud = WordCloud(font_path=font_path,width=800, height=600, background_color='white', max_words=15).generate(combined_texts)
        st.write(f"### Word Cloud for {sentiment} Sentiment")
        plt.figure(figsize=(10, 6))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.axis('off')
        st.pyplot()
        
        if sentiment == "negative": 
            # Extract the text from the word cloud object
            generated_text = wordcloud.words_

            # Print the generated text
            for word, frequency in generated_text.items():
                # print(f"{word}: {frequency}")
                text_for_llama += str(word)+" "


    # Generate a word cloud from all the text data
    all_text = ' '.join(Instructor_comment)
    all_text = all_text.split()
    filtered_words = [word for word in all_text if len(word) > 3]
    all_text = ' '.join(filtered_words)

    st.write("### Word Cloud for All Sentiments")
    wordcloud = WordCloud(font_path=font_path, width=800, height=800, background_color='white', max_words=200).generate(all_text)
    plt.figure(figsize=(8, 8), facecolor=None)
    plt.imshow(wordcloud)
    plt.axis("off")
    st.pyplot()

    neg_comments = []
    pos_comments = []
    # Print the comments and predicted sentiments
    for comment, sentiment in zip(Instructor_comment, predicted_sentiments):
        if sentiment == "positive": pos_comments.append(comment)
        else: neg_comments.append(comment)

    text_for_llama = text_for_llama.split()
    text_for_llama = ", ".join(text_for_llama)
    text_for_llama = f"""
    Based from these students' feedback: {str(text_for_llama)}. \n
    Please generate a recommendation to the instructor. Make it in sentence type and in English language only.
    Then give insights about the evaluation report based from different criteria.
    Here is the results: {criteria_results}
    Your response format-
    Recommendation to Instructor:
    Insights on Evaluation Report:
    
    """
    
    prompt = text_for_llama
    # # ================================================ replicate.com
    # CUDA_LAUNCH_BLOCKING=1
    # import replicate
    # replicate = replicate.Client(api_token='r8_M9Dx8VYKkuTcw1o39d4Yw0HtpWFt4k239ebvW')
    # output = replicate.run(
    #     # "meta/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1",
    #     "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
    #     input={"prompt": prompt}
    # )
    # # The meta/llama-2-70b-chat model can stream output as it's running.
    # # The predict method returns an iterator, and you can iterate over that output.
    # # ================================================
    

    # st.title('Recommendation:')
    # llama_output = ""
    # with st.spinner("Generating Recommendation"):
    #     loading_text = st.empty()
    #     for item in reponse(prompt):
    #         llama_output +=item 
    #         loading_text.write(llama_output)
    # st.success("Generation Complete!")

    st.title('Recommendation:')
    llama_output = ""
    with st.spinner("Generating Recommendation"):
        st.write(llm_chain.run(prompt))
    st.success("Generation Complete!")