Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,12 +30,19 @@ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnl
|
|
| 30 |
|
| 31 |
|
| 32 |
#defs
|
| 33 |
-
def classify_reviews(reviews):
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
return probabilities
|
| 38 |
|
|
|
|
| 39 |
def top_rating(scores):
|
| 40 |
return scores.index(max(scores)) + 1
|
| 41 |
|
|
@@ -62,15 +69,17 @@ def process_filter_words(filter_words_input):
|
|
| 62 |
|
| 63 |
|
| 64 |
# Function for classifying with the new model
|
| 65 |
-
def classify_with_new_classes(reviews, class_names):
|
| 66 |
class_scores = []
|
| 67 |
|
| 68 |
-
for
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
| 74 |
|
| 75 |
return class_scores
|
| 76 |
|
|
@@ -133,24 +142,27 @@ def main():
|
|
| 133 |
|
| 134 |
|
| 135 |
|
| 136 |
-
def process_reviews(df, review_column, class_names):
|
| 137 |
with st.spinner('Classifying reviews...'):
|
| 138 |
progress_bar = st.progress(0)
|
| 139 |
total_reviews = len(df[review_column].tolist())
|
| 140 |
review_counter = 0
|
| 141 |
|
| 142 |
-
batch_size = 50
|
| 143 |
raw_scores = []
|
| 144 |
reviews = df[review_column].tolist()
|
| 145 |
for i in range(0, len(reviews), batch_size):
|
| 146 |
batch_reviews = reviews[i:i+batch_size]
|
| 147 |
-
batch_scores = classify_reviews(batch_reviews)
|
| 148 |
raw_scores.extend(batch_scores)
|
| 149 |
review_counter += len(batch_reviews)
|
| 150 |
progress_bar.progress(review_counter / total_reviews)
|
| 151 |
|
| 152 |
with st.spinner('Generating classes...'):
|
| 153 |
-
class_scores =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
class_scores_dict = {} # New dictionary to store class scores
|
| 156 |
for i, name in enumerate(class_names):
|
|
@@ -161,7 +173,6 @@ def process_reviews(df, review_column, class_names):
|
|
| 161 |
if class_names and not all(name.isspace() for name in class_names):
|
| 162 |
df['Highest Class'] = df[class_names].idxmax(axis=1)
|
| 163 |
|
| 164 |
-
|
| 165 |
df_new = df.copy()
|
| 166 |
df_new['raw_scores'] = raw_scores
|
| 167 |
scores_to_df(df_new)
|
|
@@ -181,6 +192,7 @@ def process_reviews(df, review_column, class_names):
|
|
| 181 |
|
| 182 |
|
| 183 |
|
|
|
|
| 184 |
def scores_to_df(df):
|
| 185 |
for i in range(1, 6):
|
| 186 |
df[f'{i} Star'] = df['raw_scores'].apply(lambda scores: scores[i-1]).round(2)
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
#defs
|
| 33 |
+
def classify_reviews(reviews, batch_size=64):
|
| 34 |
+
probabilities = []
|
| 35 |
+
|
| 36 |
+
for i in range(0, len(reviews), batch_size):
|
| 37 |
+
batch_reviews = reviews[i:i+batch_size]
|
| 38 |
+
inputs = tokenizer(batch_reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
|
| 39 |
+
outputs = model(**inputs)
|
| 40 |
+
batch_probabilities = F.softmax(outputs.logits, dim=1).tolist()
|
| 41 |
+
probabilities.extend(batch_probabilities)
|
| 42 |
+
|
| 43 |
return probabilities
|
| 44 |
|
| 45 |
+
|
| 46 |
def top_rating(scores):
|
| 47 |
return scores.index(max(scores)) + 1
|
| 48 |
|
|
|
|
| 69 |
|
| 70 |
|
| 71 |
# Function for classifying with the new model
|
| 72 |
+
def classify_with_new_classes(reviews, class_names, batch_size=64):
|
| 73 |
class_scores = []
|
| 74 |
|
| 75 |
+
for i in range(0, len(reviews), batch_size):
|
| 76 |
+
batch_reviews = reviews[i:i+batch_size]
|
| 77 |
+
for review in batch_reviews:
|
| 78 |
+
result = classifier(review, class_names)
|
| 79 |
+
scores_dict = dict(zip(result['labels'], result['scores']))
|
| 80 |
+
# Reorder scores to match the original class_names order
|
| 81 |
+
scores = [scores_dict[name] for name in class_names]
|
| 82 |
+
class_scores.append(scores)
|
| 83 |
|
| 84 |
return class_scores
|
| 85 |
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
|
| 145 |
+
def process_reviews(df, review_column, class_names, batch_size=64):
|
| 146 |
with st.spinner('Classifying reviews...'):
|
| 147 |
progress_bar = st.progress(0)
|
| 148 |
total_reviews = len(df[review_column].tolist())
|
| 149 |
review_counter = 0
|
| 150 |
|
|
|
|
| 151 |
raw_scores = []
|
| 152 |
reviews = df[review_column].tolist()
|
| 153 |
for i in range(0, len(reviews), batch_size):
|
| 154 |
batch_reviews = reviews[i:i+batch_size]
|
| 155 |
+
batch_scores = classify_reviews(batch_reviews, batch_size)
|
| 156 |
raw_scores.extend(batch_scores)
|
| 157 |
review_counter += len(batch_reviews)
|
| 158 |
progress_bar.progress(review_counter / total_reviews)
|
| 159 |
|
| 160 |
with st.spinner('Generating classes...'):
|
| 161 |
+
class_scores = []
|
| 162 |
+
for i in range(0, len(reviews), batch_size):
|
| 163 |
+
batch_reviews = reviews[i:i+batch_size]
|
| 164 |
+
batch_scores = classify_with_new_classes(batch_reviews, class_names, batch_size)
|
| 165 |
+
class_scores.extend(batch_scores)
|
| 166 |
|
| 167 |
class_scores_dict = {} # New dictionary to store class scores
|
| 168 |
for i, name in enumerate(class_names):
|
|
|
|
| 173 |
if class_names and not all(name.isspace() for name in class_names):
|
| 174 |
df['Highest Class'] = df[class_names].idxmax(axis=1)
|
| 175 |
|
|
|
|
| 176 |
df_new = df.copy()
|
| 177 |
df_new['raw_scores'] = raw_scores
|
| 178 |
scores_to_df(df_new)
|
|
|
|
| 192 |
|
| 193 |
|
| 194 |
|
| 195 |
+
|
| 196 |
def scores_to_df(df):
|
| 197 |
for i in range(1, 6):
|
| 198 |
df[f'{i} Star'] = df['raw_scores'].apply(lambda scores: scores[i-1]).round(2)
|