jaker86 commited on
Commit
ee936fb
·
verified ·
1 Parent(s): eaca5d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -1
app.py CHANGED
@@ -171,4 +171,54 @@ def analyze_file(file, label_col, n_clusters):
171
 
172
  with gr.Blocks() as demo:
173
  gr.Markdown("## Data Analysis Explorer")
174
- gr.Markdown("Upload a CSV or XLSX file to explore classification, regression, and clustering. Select a column to predict and the number of
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  with gr.Blocks() as demo:
173
  gr.Markdown("## Data Analysis Explorer")
174
+ gr.Markdown("Upload a CSV or XLSX file to explore classification, regression, and clustering. Select a column to predict and the number of clusters!")
175
+
176
+ with gr.Row():
177
+ file_input = gr.File(label="Upload CSV or XLSX", file_types=[".csv", ".xlsx"])
178
+ label_dropdown = gr.Dropdown(label="Select Column to Predict", interactive=True)
179
+ clusters_slider = gr.Slider(minimum=2, maximum=10, step=1, value=3, label="Number of Clusters")
180
+
181
+ # Event handler to update dropdown when file is uploaded
182
+ file_input.change(fn=update_dropdown, inputs=file_input, outputs=label_dropdown)
183
+
184
+ analyze_btn = gr.Button("Analyze")
185
+
186
+ with gr.Tabs():
187
+ with gr.TabItem("Prediction Results"):
188
+ gr.Markdown("### Classification or Regression")
189
+ gr.Markdown("""
190
+ - **Regression**: Predicts numbers (e.g., sales). Uses Random Forest.
191
+ - **Classification**: Predicts categories (e.g., yes/no). Uses Random Forest.
192
+ - Rows with missing values are removed. 70% of data trains the model; 30% tests it.
193
+ """)
194
+ results_textbox = gr.Textbox(label="Performance Metrics", lines=10)
195
+
196
+ with gr.TabItem("Prediction Plot"):
197
+ gr.Markdown("### Prediction Visualization")
198
+ gr.Markdown("Regression shows true vs. predicted values. Classification shows a confusion matrix of correct/incorrect predictions.")
199
+ model_img_output = gr.Image(label="Prediction Output")
200
+
201
+ with gr.TabItem("Feature Importances"):
202
+ gr.Markdown("### Top 10 Key Features")
203
+ gr.Markdown("Shows the most important features for predictions. Higher bars mean bigger impact.")
204
+ fi_output = gr.Image(label="Feature Importances")
205
+
206
+ with gr.TabItem("KMeans Clustering"):
207
+ gr.Markdown("### KMeans Clustering")
208
+ gr.Markdown("Groups similar data points without using the selected column. Colors show clusters in 2D (PCA projection).")
209
+ kmeans_output = gr.Image(label="KMeans Clusters")
210
+
211
+ with gr.TabItem("Agglomerative Clustering"):
212
+ gr.Markdown("### Agglomerative Clustering")
213
+ gr.Markdown("Another way to group data hierarchically. Compare with KMeans to see differences!")
214
+ agg_output = gr.Image(label="Agglomerative Clusters")
215
+
216
+ with gr.TabItem("Cluster Differences"):
217
+ gr.Markdown("### Top 10 Cluster-Differentiating Features")
218
+ gr.Markdown("Shows features that vary most between clusters, helping explain the groupings.")
219
+ diff_output = gr.Image(label="Differentiating Features")
220
+
221
+ analyze_btn.click(fn=analyze_file, inputs=[file_input, label_dropdown, clusters_slider],
222
+ outputs=[results_textbox, model_img_output, fi_output, kmeans_output, agg_output, diff_output])
223
+
224
+ demo.launch()