tanish78 commited on
Commit
d14f22e
·
verified ·
1 Parent(s): 4da01c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -11
app.py CHANGED
@@ -160,7 +160,7 @@ def generate_bar_chart(df, num_clusters_to_display):
160
  img = Image.open(buf)
161
  return img
162
 
163
- def main(file, num_clusters_to_display):
164
  try:
165
  df = pd.read_csv(file)
166
 
@@ -169,26 +169,22 @@ def main(file, num_clusters_to_display):
169
 
170
  df = preprocess_data(df)
171
 
172
- # Get category sizes and sort by size in ascending order
173
  category_sizes = df['Category'].value_counts().reset_index()
174
  category_sizes.columns = ['Category', 'Count']
175
- sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
176
 
177
- # Get the largest x categories as specified by num_clusters_to_display
178
- largest_categories = sorted_categories[:num_clusters_to_display]
179
-
180
- print(largest_categories)
181
 
182
  # Filter the dataframe to include only the largest categories
183
  filtered_df = df[df['Category'].isin(largest_categories)]
184
-
185
-
186
 
187
  # Sort the dataframe by Category
188
  filtered_df = filtered_df.sort_values(by='Category')
189
 
190
- wordcloud_img = generate_wordcloud(filtered_df)
191
- bar_chart_img = generate_bar_chart(df, num_clusters_to_display)
192
 
193
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
194
  filtered_df.to_csv(tmpfile.name, index=False)
@@ -199,6 +195,8 @@ def main(file, num_clusters_to_display):
199
  print(f"Error: {e}")
200
  return str(e), None, None
201
 
 
 
202
  interface = gr.Interface(
203
  fn=main,
204
  inputs=[
 
160
  img = Image.open(buf)
161
  return img
162
 
163
+ def main(file, num_categories_to_display):
164
  try:
165
  df = pd.read_csv(file)
166
 
 
169
 
170
  df = preprocess_data(df)
171
 
172
+ # Get category sizes and sort by size in descending order
173
  category_sizes = df['Category'].value_counts().reset_index()
174
  category_sizes.columns = ['Category', 'Count']
175
+ sorted_categories = category_sizes.sort_values(by='Count', ascending=False)['Category'].tolist()
176
 
177
+ # Get the largest x categories as specified by num_categories_to_display
178
+ largest_categories = sorted_categories[:num_categories_to_display]
 
 
179
 
180
  # Filter the dataframe to include only the largest categories
181
  filtered_df = df[df['Category'].isin(largest_categories)]
 
 
182
 
183
  # Sort the dataframe by Category
184
  filtered_df = filtered_df.sort_values(by='Category')
185
 
186
+ wordcloud_img = generate_wordcloud(filtered_df) # Word cloud based on the entire dataframe
187
+ bar_chart_img = generate_bar_chart(df, num_categories_to_display) # Bar chart based on the filtered dataframe
188
 
189
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
190
  filtered_df.to_csv(tmpfile.name, index=False)
 
195
  print(f"Error: {e}")
196
  return str(e), None, None
197
 
198
+
199
+
200
  interface = gr.Interface(
201
  fn=main,
202
  inputs=[