BulatF commited on
Commit
e19bf91
·
1 Parent(s): 6a00b4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -18
app.py CHANGED
@@ -28,12 +28,14 @@ st.set_page_config(layout="wide")
28
 
29
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
30
 
31
-
32
  #defs
33
  def classify_reviews(reviews):
34
- inputs = tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
35
- outputs = model(**inputs)
36
- probabilities = F.softmax(outputs.logits, dim=1).tolist()
 
 
37
  return probabilities
38
 
39
  def top_rating(scores):
@@ -65,12 +67,14 @@ def process_filter_words(filter_words_input):
65
  def classify_with_new_classes(reviews, class_names):
66
  class_scores = []
67
 
68
- for review in reviews:
69
- result = classifier(review, class_names)
70
- scores_dict = dict(zip(result['labels'], result['scores']))
71
- # Reorder scores to match the original class_names order
72
- scores = [scores_dict[name] for name in class_names]
73
- class_scores.append(scores)
 
 
74
 
75
  return class_scores
76
 
@@ -139,14 +143,9 @@ def process_reviews(df, review_column, class_names):
139
  total_reviews = len(df[review_column].tolist())
140
  review_counter = 0
141
 
142
- batch_size = 50
143
- raw_scores = []
144
- reviews = df[review_column].tolist()
145
- for i in range(0, len(reviews), batch_size):
146
- batch_reviews = reviews[i:i+batch_size]
147
- batch_scores = classify_reviews(batch_reviews)
148
- raw_scores.extend(batch_scores)
149
- review_counter += len(batch_reviews)
150
  progress_bar.progress(review_counter / total_reviews)
151
 
152
  with st.spinner('Generating classes...'):
 
28
 
29
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
30
 
31
+ BATCH_SIZE = 100
32
  #defs
33
  def classify_reviews(reviews):
34
+ probabilities = []
35
+ for i in range(0, len(reviews), BATCH_SIZE):
36
+ inputs = tokenizer(reviews[i:i+BATCH_SIZE], return_tensors='pt', truncation=True, padding=True, max_length=512)
37
+ outputs = model(**inputs)
38
+ probabilities.extend(F.softmax(outputs.logits, dim=1).tolist())
39
  return probabilities
40
 
41
  def top_rating(scores):
 
67
  def classify_with_new_classes(reviews, class_names):
68
  class_scores = []
69
 
70
+ for i in range(0, len(reviews), BATCH_SIZE):
71
+ batch_reviews = reviews[i:i+BATCH_SIZE]
72
+ for review in batch_reviews:
73
+ result = classifier(review, class_names)
74
+ scores_dict = dict(zip(result['labels'], result['scores']))
75
+ # Reorder scores to match the original class_names order
76
+ scores = [scores_dict[name] for name in class_names]
77
+ class_scores.append(scores)
78
 
79
  return class_scores
80
 
 
143
  total_reviews = len(df[review_column].tolist())
144
  review_counter = 0
145
 
146
+ raw_scores = classify_reviews(df[review_column].tolist())
147
+ for i in range(0, len(raw_scores), BATCH_SIZE):
148
+ review_counter += BATCH_SIZE
 
 
 
 
 
149
  progress_bar.progress(review_counter / total_reviews)
150
 
151
  with st.spinner('Generating classes...'):