BulatF commited on
Commit
b968a85
·
1 Parent(s): a8cc4f6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -6,7 +6,7 @@ import torch
6
  import io
7
  import base64
8
  from stqdm import stqdm
9
- from wordcloud import WordCloud
10
  import matplotlib.pyplot as plt
11
  import numpy as np
12
 
@@ -17,6 +17,12 @@ model = AutoModelForSequenceClassification.from_pretrained(model_name)
17
  tokenizer = AutoTokenizer.from_pretrained(model_name)
18
  st.set_page_config(layout="wide")
19
 
 
 
 
 
 
 
20
  #defs
21
  def classify_reviews(reviews):
22
  inputs = tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
@@ -35,6 +41,15 @@ def get_table_download_link(df):
35
  b64 = base64.b64encode(csv.encode()).decode()
36
  return f'<a href="data:file/csv;base64,{b64}" download="data.csv">Download csv file</a>'
37
 
 
 
 
 
 
 
 
 
 
38
  def main():
39
  st.title('Sentiment Analysis')
40
  st.markdown('Upload an Excel file to get sentiment analytics')
@@ -42,6 +57,7 @@ def main():
42
  file = st.file_uploader("Upload an excel file", type=['xlsx'])
43
  review_column = None
44
  df = None
 
45
 
46
  if file is not None:
47
  try:
@@ -53,6 +69,8 @@ def main():
53
  df = df.dropna(how='all')
54
  review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
55
  df[review_column] = df[review_column].astype(str)
 
 
56
  except Exception as e:
57
  st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
58
  return
@@ -64,9 +82,13 @@ def main():
64
  df = df[df[review_column].notna()]
65
  df = df[df[review_column].str.strip() != '']
66
 
 
 
 
 
67
  if review_column in df.columns:
68
  with st.spinner('Performing sentiment analysis...'):
69
- df, df_display = process_reviews(df, review_column)
70
 
71
  display_ratings(df, review_column) # updated this line
72
  display_dataframe(df, df_display)
@@ -76,7 +98,8 @@ def main():
76
 
77
 
78
 
79
- def process_reviews(df, review_column):
 
80
  with st.spinner('Classifying reviews...'):
81
  progress_bar = st.progress(0)
82
  total_reviews = len(df[review_column].tolist())
@@ -91,6 +114,16 @@ def process_reviews(df, review_column):
91
  raw_scores.extend(batch_scores)
92
  review_counter += len(batch_reviews)
93
  progress_bar.progress(review_counter / total_reviews)
 
 
 
 
 
 
 
 
 
 
94
 
95
  df_new = df.copy()
96
  df_new['raw_scores'] = raw_scores
@@ -101,29 +134,15 @@ def process_reviews(df, review_column):
101
  remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star']]
102
 
103
  # Reorder the dataframe with selected columns first, created columns next, then the remaining columns
104
- df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + remaining_columns]
105
 
106
  # Reorder df_display as well
107
- df_display = df_display[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + remaining_columns]
108
 
109
  return df_new, df_display
110
 
111
- def generate_wordclouds(df, review_column):
112
- st.markdown("# Word Clouds for each rating category")
113
- for i in range(1, 6):
114
- # Create a sub-dataframe for each rating category
115
- sub_df = df[df['Rating'] == i]
116
- # Join all the reviews in this sub-dataframe
117
- text = ' '.join(review for review in sub_df[review_column])
118
- # Generate a word cloud
119
- wordcloud = WordCloud(max_font_size=50, max_words=100, background_color="white").generate(text)
120
- # Display the generated image with matplotlib
121
- plt.figure()
122
- plt.imshow(wordcloud, interpolation="bilinear")
123
- plt.axis("off")
124
- plt.title(f"Rating {i}")
125
- st.pyplot(plt)
126
- plt.close()
127
 
128
 
129
  def scores_to_df(df):
@@ -172,23 +191,6 @@ def display_ratings(df, review_column):
172
  cols[i-1].markdown(f"### {rating_counts}")
173
  cols[i-1].markdown(f"{'⭐' * i}")
174
 
175
- # Generate wordcloud for the given rating category
176
- sub_df = df[df['Rating'] == i]
177
- text = ' '.join(review for review in sub_df[review_column])
178
-
179
- if text.strip(): # Only generate a word cloud if text is not empty
180
- wordcloud = WordCloud(max_font_size=50, max_words=100, background_color="white").generate(text)
181
-
182
- # Display the generated image with matplotlib
183
- plt.figure()
184
- plt.imshow(wordcloud, interpolation="bilinear")
185
- plt.axis("off")
186
- plt.title(f"Rating {i}")
187
- cols[i-1].pyplot(plt)
188
- plt.close()
189
-
190
-
191
-
192
 
193
 
194
  if __name__ == "__main__":
 
6
  import io
7
  import base64
8
  from stqdm import stqdm
9
+
10
  import matplotlib.pyplot as plt
11
  import numpy as np
12
 
 
17
  tokenizer = AutoTokenizer.from_pretrained(model_name)
18
  st.set_page_config(layout="wide")
19
 
20
+ # Import the new model and tokenizer
21
+ class_model_name = 'facebook/bart-large-mnli'
22
+ class_model = AutoModelForSequenceClassification.from_pretrained(class_model_name)
23
+ class_tokenizer = AutoTokenizer.from_pretrained(class_model_name)
24
+
25
+
26
  #defs
27
  def classify_reviews(reviews):
28
  inputs = tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
 
41
  b64 = base64.b64encode(csv.encode()).decode()
42
  return f'<a href="data:file/csv;base64,{b64}" download="data.csv">Download csv file</a>'
43
 
44
+
45
+ # Function for classifying with the new model
46
+ def classify_with_new_classes(reviews, class_name):
47
+ inputs = class_tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
48
+ outputs = class_model(**inputs)
49
+ probabilities = F.softmax(outputs.logits, dim=1).tolist()
50
+ class_scores = [prob[1] for prob in probabilities] # Assuming binary classification
51
+ return class_scores
52
+
53
  def main():
54
  st.title('Sentiment Analysis')
55
  st.markdown('Upload an Excel file to get sentiment analytics')
 
57
  file = st.file_uploader("Upload an excel file", type=['xlsx'])
58
  review_column = None
59
  df = None
60
+ class_names = None # New variable for class names
61
 
62
  if file is not None:
63
  try:
 
69
  df = df.dropna(how='all')
70
  review_column = st.selectbox('Select the column from your excel file containing text', df.columns)
71
  df[review_column] = df[review_column].astype(str)
72
+
73
+ class_names = st.text_input('Enter the possible class names separated by comma') # New input field for class names
74
  except Exception as e:
75
  st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
76
  return
 
82
  df = df[df[review_column].notna()]
83
  df = df[df[review_column].str.strip() != '']
84
 
85
+ class_names = [name.strip() for name in class_names.split(',')] # Split class names into a list
86
+ for name in class_names: # Add a new column for each class name
87
+ df[name] = 0.0
88
+
89
  if review_column in df.columns:
90
  with st.spinner('Performing sentiment analysis...'):
91
+ df, df_display = process_reviews(df, review_column, class_names)
92
 
93
  display_ratings(df, review_column) # updated this line
94
  display_dataframe(df, df_display)
 
98
 
99
 
100
 
101
+
102
+ def process_reviews(df, review_column, class_names):
103
  with st.spinner('Classifying reviews...'):
104
  progress_bar = st.progress(0)
105
  total_reviews = len(df[review_column].tolist())
 
114
  raw_scores.extend(batch_scores)
115
  review_counter += len(batch_reviews)
116
  progress_bar.progress(review_counter / total_reviews)
117
+
118
+ class_scores_dict = {} # New dictionary to store class scores
119
+ for name in class_names:
120
+ with st.spinner(f'Generating classes for {name}...'):
121
+ class_scores = classify_with_new_classes(df[review_column].tolist(), name)
122
+ df[name] = class_scores
123
+ class_scores_dict[name] = class_scores # Store class scores in the dictionary
124
+
125
+ # Add a new column with the class that has the highest score
126
+ df['Highest Class'] = df[class_names].idxmax(axis=1)
127
 
128
  df_new = df.copy()
129
  df_new['raw_scores'] = raw_scores
 
134
  remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star']]
135
 
136
  # Reorder the dataframe with selected columns first, created columns next, then the remaining columns
137
+ df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class'] + remaining_columns]
138
 
139
  # Reorder df_display as well
140
+ df_display = df_display[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star', 'Highest Class'] + remaining_columns]
141
 
142
  return df_new, df_display
143
 
144
+
145
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
 
148
  def scores_to_df(df):
 
191
  cols[i-1].markdown(f"### {rating_counts}")
192
  cols[i-1].markdown(f"{'⭐' * i}")
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
 
196
  if __name__ == "__main__":