tanish78 commited on
Commit
7879f67
·
verified ·
1 Parent(s): ba4c821

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -160,7 +160,7 @@ def generate_bar_chart(df, num_clusters_to_display):
160
  img = Image.open(buf)
161
  return img
162
 
163
- def main(file, num_categories_to_display):
164
  try:
165
  df = pd.read_csv(file)
166
 
@@ -169,13 +169,13 @@ def main(file, num_categories_to_display):
169
 
170
  df = preprocess_data(df)
171
 
172
- # Get category sizes and sort by size in descending order
173
  category_sizes = df['Category'].value_counts().reset_index()
174
  category_sizes.columns = ['Category', 'Count']
175
- sorted_categories = category_sizes.sort_values(by='Count', ascending=False)['Category'].tolist()
176
 
177
- # Get the largest x categories as specified by num_categories_to_display
178
- largest_categories = sorted_categories[:num_categories_to_display]
179
 
180
  # Filter the dataframe to include only the largest categories
181
  filtered_df = df[df['Category'].isin(largest_categories)]
@@ -183,11 +183,11 @@ def main(file, num_categories_to_display):
183
  # Sort the dataframe by Category
184
  filtered_df = filtered_df.sort_values(by='Category')
185
 
186
- wordcloud_img = generate_wordcloud(df) # Word cloud based on the entire dataframe
187
- bar_chart_img = generate_bar_chart(df, num_categories_to_display) # Bar chart based on the filtered dataframe
188
 
189
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
190
- filtered_df.to_csv(tmpfile.name, index=False)
191
  csv_file_path = tmpfile.name
192
 
193
  return csv_file_path, wordcloud_img, bar_chart_img
@@ -199,7 +199,7 @@ interface = gr.Interface(
199
  fn=main,
200
  inputs=[
201
  gr.File(label="Upload CSV File (.csv)"),
202
- gr.Slider(label="Number of Categories to Display", minimum=1, maximum=15, step=1, value=5)
203
  ],
204
  outputs=[
205
  gr.File(label="Categorized Data CSV"),
 
160
  img = Image.open(buf)
161
  return img
162
 
163
+ def main(file, num_clusters_to_display):
164
  try:
165
  df = pd.read_csv(file)
166
 
 
169
 
170
  df = preprocess_data(df)
171
 
172
+ # Get category sizes and sort by size in ascending order
173
  category_sizes = df['Category'].value_counts().reset_index()
174
  category_sizes.columns = ['Category', 'Count']
175
+ sorted_categories = category_sizes.sort_values(by='Count', ascending=True)['Category'].tolist()
176
 
177
+ # Get the largest x categories as specified by num_clusters_to_display
178
+ largest_categories = sorted_categories[:num_clusters_to_display]
179
 
180
  # Filter the dataframe to include only the largest categories
181
  filtered_df = df[df['Category'].isin(largest_categories)]
 
183
  # Sort the dataframe by Category
184
  filtered_df = filtered_df.sort_values(by='Category')
185
 
186
+ wordcloud_img = generate_wordcloud(filtered_df)
187
+ bar_chart_img = generate_bar_chart(filtered_df, num_clusters_to_display)
188
 
189
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
190
+ filtered_df.to_csv(tmpfile.name, index=False)x
191
  csv_file_path = tmpfile.name
192
 
193
  return csv_file_path, wordcloud_img, bar_chart_img
 
199
  fn=main,
200
  inputs=[
201
  gr.File(label="Upload CSV File (.csv)"),
202
+ gr.Slider(label="Number of Categories to Display", minimum=1, maximum=10, step=1, value=5)
203
  ],
204
  outputs=[
205
  gr.File(label="Categorized Data CSV"),