tanish78 commited on
Commit
3205271
·
verified ·
1 Parent(s): a653d66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -137,18 +137,21 @@ def generate_wordcloud(df):
137
  return img
138
 
139
  def generate_bar_chart(df, num_clusters_to_display):
140
- top_clusters = df['Cluster'].value_counts().index[1:num_clusters_to_display+1]
141
- df_top_clusters = df[df['Cluster'].isin(top_clusters)]
142
 
143
- cluster_top_words = df_top_clusters.groupby('Cluster', observed=False)['texts'].apply(lambda x: ' '.join(x)).reset_index()
144
- cluster_top_words['top_word'] = cluster_top_words['texts'].apply(lambda x: pd.Series(x.split()).value_counts().index[0] if len(x.split()) > 0 else '')
145
- cluster_sizes = df_top_clusters['Cluster'].value_counts().reset_index()
146
- cluster_sizes.columns = ['Cluster', 'Count']
147
- cluster_sizes = cluster_sizes.merge(cluster_top_words[['Cluster', 'top_word']], on='Cluster')
148
 
149
- fig = px.bar(cluster_sizes, x='Cluster', y='Count', text='top_word', title='Top Clusters by Frequency with Top Word/Phrase')
 
 
 
 
 
 
150
  fig.update_traces(textposition='outside')
151
- fig.update_layout(xaxis_title='Cluster', yaxis_title='Frequency', showlegend=False)
152
 
153
  buf = BytesIO()
154
  fig.write_image(buf, format='png')
 
137
  return img
138
 
139
  def generate_bar_chart(df, num_clusters_to_display):
140
+ # Exclude common words from the top words
141
+ common_words = {'i', 'you', 'thanks', 'thank', 'ok', 'okay', 'sure', 'done'}
142
 
143
+ top_categories = df['Category'].value_counts().index[:num_clusters_to_display]
144
+ df_top_categories = df[df['Category'].isin(top_categories)]
 
 
 
145
 
146
+ category_top_words = df_top_categories.groupby('Category', observed=False)['texts'].apply(lambda x: ' '.join(x)).reset_index()
147
+ category_top_words['top_word'] = category_top_words['texts'].apply(lambda x: ' '.join([word for word in pd.Series(x.split()).value_counts().index if word not in common_words][:3]))
148
+ category_sizes = df_top_categories['Category'].value_counts().reset_index()
149
+ category_sizes.columns = ['Category', 'Count']
150
+ category_sizes = category_sizes.merge(category_top_words[['Category', 'top_word']], on='Category')
151
+
152
+ fig = px.bar(category_sizes, x='Category', y='Count', text='top_word', title='Category Frequency with Top Words (excluding common words)')
153
  fig.update_traces(textposition='outside')
154
+ fig.update_layout(xaxis_title='Category', yaxis_title='Frequency', showlegend=False)
155
 
156
  buf = BytesIO()
157
  fig.write_image(buf, format='png')