tanish78 commited on
Commit
7393b38
·
verified ·
1 Parent(s): 60c5bda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -194,7 +194,14 @@ def main(file, num_clusters_to_display):
194
  df = df[df['Answer'] == 'Fallback Message shown']
195
 
196
  df = preprocess_data(df)
197
-
 
 
 
 
 
 
 
198
  df = df[df['Category'] != 'Miscellaneous']
199
 
200
  # Get category sizes and sort by size in ascending order
@@ -211,8 +218,8 @@ def main(file, num_clusters_to_display):
211
  filtered_df = df[df['Category'].isin(largest_categories)]
212
  filtered_cloud_df = df[df['Category'].isin(smallest_categories)]
213
 
214
- # Sort the dataframe by Category
215
- filtered_df = filtered_df.sort_values(by='Category')
216
  filtered_cloud_df = filtered_cloud_df.sort_values(by='Category')
217
 
218
  wordcloud_img = generate_wordcloud(filtered_cloud_df)
@@ -242,4 +249,3 @@ interface = gr.Interface(
242
  )
243
 
244
  interface.launch(share=True)
245
-
 
194
  df = df[df['Answer'] == 'Fallback Message shown']
195
 
196
  df = preprocess_data(df)
197
+
198
+ # Perform clustering before categorization
199
+ num_clusters = 5 # Or any other appropriate number of clusters
200
+ df, kmeans = cluster_data(df, num_clusters)
201
+
202
+ # Categorize the texts after clustering
203
+ df['Category'] = df['texts'].apply(categorize_question)
204
+
205
  df = df[df['Category'] != 'Miscellaneous']
206
 
207
  # Get category sizes and sort by size in ascending order
 
218
  filtered_df = df[df['Category'].isin(largest_categories)]
219
  filtered_cloud_df = df[df['Category'].isin(smallest_categories)]
220
 
221
+ # Sort the dataframe by Category and Cluster
222
+ filtered_df = filtered_df.sort_values(by=['Category', 'Cluster'])
223
  filtered_cloud_df = filtered_cloud_df.sort_values(by='Category')
224
 
225
  wordcloud_img = generate_wordcloud(filtered_cloud_df)
 
249
  )
250
 
251
  interface.launch(share=True)