BulatF commited on
Commit
be9422b
·
1 Parent(s): 4ca73d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -16
app.py CHANGED
@@ -30,12 +30,19 @@ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnl
30
 
31
 
32
  #defs
33
- def classify_reviews(reviews):
34
- inputs = tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
35
- outputs = model(**inputs)
36
- probabilities = F.softmax(outputs.logits, dim=1).tolist()
 
 
 
 
 
 
37
  return probabilities
38
 
 
39
  def top_rating(scores):
40
  return scores.index(max(scores)) + 1
41
 
@@ -62,15 +69,17 @@ def process_filter_words(filter_words_input):
62
 
63
 
64
  # Function for classifying with the new model
65
- def classify_with_new_classes(reviews, class_names):
66
  class_scores = []
67
 
68
- for review in reviews:
69
- result = classifier(review, class_names)
70
- scores_dict = dict(zip(result['labels'], result['scores']))
71
- # Reorder scores to match the original class_names order
72
- scores = [scores_dict[name] for name in class_names]
73
- class_scores.append(scores)
 
 
74
 
75
  return class_scores
76
 
@@ -133,24 +142,27 @@ def main():
133
 
134
 
135
 
136
- def process_reviews(df, review_column, class_names):
137
  with st.spinner('Classifying reviews...'):
138
  progress_bar = st.progress(0)
139
  total_reviews = len(df[review_column].tolist())
140
  review_counter = 0
141
 
142
- batch_size = 50
143
  raw_scores = []
144
  reviews = df[review_column].tolist()
145
  for i in range(0, len(reviews), batch_size):
146
  batch_reviews = reviews[i:i+batch_size]
147
- batch_scores = classify_reviews(batch_reviews)
148
  raw_scores.extend(batch_scores)
149
  review_counter += len(batch_reviews)
150
  progress_bar.progress(review_counter / total_reviews)
151
 
152
  with st.spinner('Generating classes...'):
153
- class_scores = classify_with_new_classes(df[review_column].tolist(), class_names)
 
 
 
 
154
 
155
  class_scores_dict = {} # New dictionary to store class scores
156
  for i, name in enumerate(class_names):
@@ -161,7 +173,6 @@ def process_reviews(df, review_column, class_names):
161
  if class_names and not all(name.isspace() for name in class_names):
162
  df['Highest Class'] = df[class_names].idxmax(axis=1)
163
 
164
-
165
  df_new = df.copy()
166
  df_new['raw_scores'] = raw_scores
167
  scores_to_df(df_new)
@@ -181,6 +192,7 @@ def process_reviews(df, review_column, class_names):
181
 
182
 
183
 
 
184
  def scores_to_df(df):
185
  for i in range(1, 6):
186
  df[f'{i} Star'] = df['raw_scores'].apply(lambda scores: scores[i-1]).round(2)
 
30
 
31
 
32
  #defs
33
+ def classify_reviews(reviews, batch_size=64):
34
+ probabilities = []
35
+
36
+ for i in range(0, len(reviews), batch_size):
37
+ batch_reviews = reviews[i:i+batch_size]
38
+ inputs = tokenizer(batch_reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
39
+ outputs = model(**inputs)
40
+ batch_probabilities = F.softmax(outputs.logits, dim=1).tolist()
41
+ probabilities.extend(batch_probabilities)
42
+
43
  return probabilities
44
 
45
+
46
  def top_rating(scores):
47
  return scores.index(max(scores)) + 1
48
 
 
69
 
70
 
71
  # Function for classifying with the new model
72
+ def classify_with_new_classes(reviews, class_names, batch_size=64):
73
  class_scores = []
74
 
75
+ for i in range(0, len(reviews), batch_size):
76
+ batch_reviews = reviews[i:i+batch_size]
77
+ for review in batch_reviews:
78
+ result = classifier(review, class_names)
79
+ scores_dict = dict(zip(result['labels'], result['scores']))
80
+ # Reorder scores to match the original class_names order
81
+ scores = [scores_dict[name] for name in class_names]
82
+ class_scores.append(scores)
83
 
84
  return class_scores
85
 
 
142
 
143
 
144
 
145
+ def process_reviews(df, review_column, class_names, batch_size=64):
146
  with st.spinner('Classifying reviews...'):
147
  progress_bar = st.progress(0)
148
  total_reviews = len(df[review_column].tolist())
149
  review_counter = 0
150
 
 
151
  raw_scores = []
152
  reviews = df[review_column].tolist()
153
  for i in range(0, len(reviews), batch_size):
154
  batch_reviews = reviews[i:i+batch_size]
155
+ batch_scores = classify_reviews(batch_reviews, batch_size)
156
  raw_scores.extend(batch_scores)
157
  review_counter += len(batch_reviews)
158
  progress_bar.progress(review_counter / total_reviews)
159
 
160
  with st.spinner('Generating classes...'):
161
+ class_scores = []
162
+ for i in range(0, len(reviews), batch_size):
163
+ batch_reviews = reviews[i:i+batch_size]
164
+ batch_scores = classify_with_new_classes(batch_reviews, class_names, batch_size)
165
+ class_scores.extend(batch_scores)
166
 
167
  class_scores_dict = {} # New dictionary to store class scores
168
  for i, name in enumerate(class_names):
 
173
  if class_names and not all(name.isspace() for name in class_names):
174
  df['Highest Class'] = df[class_names].idxmax(axis=1)
175
 
 
176
  df_new = df.copy()
177
  df_new['raw_scores'] = raw_scores
178
  scores_to_df(df_new)
 
192
 
193
 
194
 
195
+
196
  def scores_to_df(df):
197
  for i in range(1, 6):
198
  df[f'{i} Star'] = df['raw_scores'].apply(lambda scores: scores[i-1]).round(2)