tanish78 commited on
Commit
66d546b
·
verified ·
1 Parent(s): e68783d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -3
app.py CHANGED
@@ -7,6 +7,7 @@ from io import BytesIO
7
  import tempfile
8
  from wordcloud import WordCloud, STOPWORDS
9
  import matplotlib.pyplot as plt
 
10
  from PIL import Image
11
 
12
  def preprocess_data(df):
@@ -99,6 +100,26 @@ def generate_wordcloud(df):
99
  img = Image.open(buf)
100
  return img
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def main(file, num_clusters_to_display):
103
  try:
104
  df = pd.read_csv(file)
@@ -122,15 +143,16 @@ def main(file, num_clusters_to_display):
122
  df = df.sort_values('Cluster')
123
 
124
  wordcloud_img = generate_wordcloud(df)
 
125
 
126
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
127
  df.to_csv(tmpfile.name, index=False)
128
  csv_file_path = tmpfile.name
129
 
130
- return csv_file_path, wordcloud_img
131
  except Exception as e:
132
  print(f"Error: {e}")
133
- return str(e), None
134
 
135
  interface = gr.Interface(
136
  fn=main,
@@ -140,7 +162,8 @@ interface = gr.Interface(
140
  ],
141
  outputs=[
142
  gr.File(label="Clustered Data CSV"),
143
- gr.Image(label="Word Cloud")
 
144
  ],
145
  title="Unanswered User Queries Clustering",
146
  description="Unanswered User Query Categorization"
 
7
  import tempfile
8
  from wordcloud import WordCloud, STOPWORDS
9
  import matplotlib.pyplot as plt
10
+ import plotly.express as px
11
  from PIL import Image
12
 
13
  def preprocess_data(df):
 
100
  img = Image.open(buf)
101
  return img
102
 
103
+ def generate_bar_chart(df, num_clusters_to_display):
104
+ top_clusters = df['Cluster'].value_counts().index[1:num_clusters_to_display+1]
105
+ df_top_clusters = df[df['Cluster'].isin(top_clusters)]
106
+
107
+ cluster_top_words = df_top_clusters.groupby('Cluster')['texts'].apply(lambda x: ' '.join(x)).reset_index()
108
+ cluster_top_words['top_word'] = cluster_top_words['texts'].apply(lambda x: pd.Series(x.split()).value_counts().index[0])
109
+ cluster_sizes = df_top_clusters['Cluster'].value_counts().reset_index()
110
+ cluster_sizes.columns = ['Cluster', 'Count']
111
+ cluster_sizes = cluster_sizes.merge(cluster_top_words[['Cluster', 'top_word']], on='Cluster')
112
+
113
+ fig = px.bar(cluster_sizes, x='Cluster', y='Count', text='top_word', title='Top Clusters by Frequency with Top Word/Phrase')
114
+ fig.update_traces(textposition='outside')
115
+ fig.update_layout(xaxis_title='Cluster', yaxis_title='Frequency', showlegend=False)
116
+
117
+ buf = BytesIO()
118
+ fig.write_image(buf, format='png')
119
+ buf.seek(0)
120
+ img = Image.open(buf)
121
+ return img
122
+
123
  def main(file, num_clusters_to_display):
124
  try:
125
  df = pd.read_csv(file)
 
143
  df = df.sort_values('Cluster')
144
 
145
  wordcloud_img = generate_wordcloud(df)
146
+ bar_chart_img = generate_bar_chart(df, num_clusters_to_display)
147
 
148
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
149
  df.to_csv(tmpfile.name, index=False)
150
  csv_file_path = tmpfile.name
151
 
152
+ return csv_file_path, wordcloud_img, bar_chart_img
153
  except Exception as e:
154
  print(f"Error: {e}")
155
+ return str(e), None, None
156
 
157
  interface = gr.Interface(
158
  fn=main,
 
162
  ],
163
  outputs=[
164
  gr.File(label="Clustered Data CSV"),
165
+ gr.Image(label="Word Cloud"),
166
+ gr.Image(label="Bar Chart")
167
  ],
168
  title="Unanswered User Queries Clustering",
169
  description="Unanswered User Query Categorization"