BulatF commited on
Commit
9052e90
·
1 Parent(s): 6b1b261

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -22,7 +22,7 @@ from lime import lime_text
22
 
23
  stopwords_list = stopwords.words('english') + ['your_additional_stopwords_here']
24
  st.set_page_config(layout="wide")
25
-
26
  def load_model_and_tokenizer(model_name):
27
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
28
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -30,7 +30,7 @@ def load_model_and_tokenizer(model_name):
30
 
31
  model, tokenizer = load_model_and_tokenizer('nlptown/bert-base-multilingual-uncased-sentiment')
32
 
33
-
34
  def load_pipeline():
35
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
36
  return classifier
@@ -226,7 +226,7 @@ def display_dataframe(df, df_display):
226
 
227
  st.dataframe(df_display)
228
 
229
- def important_words(reviews, model, num_words=5):
230
  # Create a LimeTextExplainer
231
  explainer = LimeTextExplainer(class_names=[str(i) for i in range(1, 6)])
232
 
@@ -241,13 +241,17 @@ def important_words(reviews, model, num_words=5):
241
 
242
  for rating in range(1, 6):
243
  important_words_per_rating[rating] = []
244
- for review in reviews:
245
- # Get the explanation for the review
246
- explanation = explainer.explain_instance(review, predict_proba, num_features=num_words, labels=[rating - 1])
247
 
248
- # Get the list of important words
249
- words = [feature[0] for feature in explanation.as_list(rating - 1)]
250
- important_words_per_rating[rating].extend(words)
 
 
 
 
 
 
 
251
 
252
  # Keep only unique words
253
  important_words_per_rating[rating] = list(set(important_words_per_rating[rating]))
@@ -255,6 +259,7 @@ def important_words(reviews, model, num_words=5):
255
  return important_words_per_rating
256
 
257
 
 
258
  def display_ratings(df, review_column):
259
  cols = st.columns(5)
260
 
 
22
 
23
  stopwords_list = stopwords.words('english') + ['your_additional_stopwords_here']
24
  st.set_page_config(layout="wide")
25
+ @st.cache_resource
26
  def load_model_and_tokenizer(model_name):
27
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
28
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
30
 
31
  model, tokenizer = load_model_and_tokenizer('nlptown/bert-base-multilingual-uncased-sentiment')
32
 
33
+ @st.cache_resource
34
  def load_pipeline():
35
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
36
  return classifier
 
226
 
227
  st.dataframe(df_display)
228
 
229
+ def important_words(reviews, model, num_words=5, batch_size=50):
230
  # Create a LimeTextExplainer
231
  explainer = LimeTextExplainer(class_names=[str(i) for i in range(1, 6)])
232
 
 
241
 
242
  for rating in range(1, 6):
243
  important_words_per_rating[rating] = []
 
 
 
244
 
245
+ # Batch processing
246
+ for i in range(0, len(reviews), batch_size):
247
+ batch_reviews = reviews[i:i+batch_size]
248
+ for review in batch_reviews:
249
+ # Get the explanation for the review
250
+ explanation = explainer.explain_instance(review, predict_proba, num_features=num_words, labels=[rating - 1])
251
+
252
+ # Get the list of important words
253
+ words = [feature[0] for feature in explanation.as_list(rating - 1)]
254
+ important_words_per_rating[rating].extend(words)
255
 
256
  # Keep only unique words
257
  important_words_per_rating[rating] = list(set(important_words_per_rating[rating]))
 
259
  return important_words_per_rating
260
 
261
 
262
+
263
  def display_ratings(df, review_column):
264
  cols = st.columns(5)
265