tanish78 commited on
Commit
faeddba
·
verified ·
1 Parent(s): 904a011

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -132,7 +132,7 @@ def visualize_clusters(df):
132
  plt.ylabel('PCA Component 2')
133
  plt.show()
134
 
135
- def main(file):
136
  try:
137
  df = pd.read_excel(file)
138
  df = preprocess_data(df)
@@ -144,6 +144,14 @@ def main(file):
144
  df['Cluster'] = pd.Categorical(df['Cluster'], categories=sorted_clusters, ordered=True)
145
  df = df.sort_values('Cluster')
146
 
 
 
 
 
 
 
 
 
147
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
148
  df.to_csv(tmpfile.name, index=False)
149
  return tmpfile.name
@@ -152,10 +160,13 @@ def main(file):
152
 
153
  interface = gr.Interface(
154
  fn=main,
155
- inputs=gr.File(label="Upload Excel File (.xlsx)"),
 
 
 
156
  outputs=gr.File(label="Clustered Data CSV"),
157
  title="Unanswered User Queries Clustering",
158
- description="Upload an Excel file (.xlsx)"
159
  )
160
 
161
  interface.launch()
 
132
  plt.ylabel('PCA Component 2')
133
  plt.show()
134
 
135
+ def main(file, num_clusters_to_display):
136
  try:
137
  df = pd.read_excel(file)
138
  df = preprocess_data(df)
 
144
  df['Cluster'] = pd.Categorical(df['Cluster'], categories=sorted_clusters, ordered=True)
145
  df = df.sort_values('Cluster')
146
 
147
+ # Filter out cluster 0 and get the largest clusters
148
+ filtered_clusters = [cluster for cluster in sorted_clusters if cluster != 0]
149
+ top_clusters = filtered_clusters[:num_clusters_to_display]
150
+
151
+ df = df[df['Cluster'].isin(top_clusters)]
152
+ df['Cluster'] = pd.Categorical(df['Cluster'], categories=top_clusters, ordered=True)
153
+ df = df.sort_values('Cluster')
154
+
155
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmpfile:
156
  df.to_csv(tmpfile.name, index=False)
157
  return tmpfile.name
 
160
 
161
  interface = gr.Interface(
162
  fn=main,
163
+ inputs=[
164
+ gr.File(label="Upload Excel File (.xlsx)"),
165
+ gr.Slider(1, 10, step=1, label="Number of Largest Clusters to Display")
166
+ ],
167
  outputs=gr.File(label="Clustered Data CSV"),
168
  title="Unanswered User Queries Clustering",
169
+ description="Upload an Excel file (.xlsx) and select the number of largest clusters to display (excluding cluster 0)"
170
  )
171
 
172
  interface.launch()