Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -171,4 +171,54 @@ def analyze_file(file, label_col, n_clusters):
|
|
| 171 |
|
| 172 |
with gr.Blocks() as demo:
|
| 173 |
gr.Markdown("## Data Analysis Explorer")
|
| 174 |
-
gr.Markdown("Upload a CSV or XLSX file to explore classification, regression, and clustering. Select a column to predict and the number of
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
with gr.Blocks() as demo:
|
| 173 |
gr.Markdown("## Data Analysis Explorer")
|
| 174 |
+
gr.Markdown("Upload a CSV or XLSX file to explore classification, regression, and clustering. Select a column to predict and the number of clusters!")
|
| 175 |
+
|
| 176 |
+
with gr.Row():
|
| 177 |
+
file_input = gr.File(label="Upload CSV or XLSX", file_types=[".csv", ".xlsx"])
|
| 178 |
+
label_dropdown = gr.Dropdown(label="Select Column to Predict", interactive=True)
|
| 179 |
+
clusters_slider = gr.Slider(minimum=2, maximum=10, step=1, value=3, label="Number of Clusters")
|
| 180 |
+
|
| 181 |
+
# Event handler to update dropdown when file is uploaded
|
| 182 |
+
file_input.change(fn=update_dropdown, inputs=file_input, outputs=label_dropdown)
|
| 183 |
+
|
| 184 |
+
analyze_btn = gr.Button("Analyze")
|
| 185 |
+
|
| 186 |
+
with gr.Tabs():
|
| 187 |
+
with gr.TabItem("Prediction Results"):
|
| 188 |
+
gr.Markdown("### Classification or Regression")
|
| 189 |
+
gr.Markdown("""
|
| 190 |
+
- **Regression**: Predicts numbers (e.g., sales). Uses Random Forest.
|
| 191 |
+
- **Classification**: Predicts categories (e.g., yes/no). Uses Random Forest.
|
| 192 |
+
- Rows with missing values are removed. 70% of data trains the model; 30% tests it.
|
| 193 |
+
""")
|
| 194 |
+
results_textbox = gr.Textbox(label="Performance Metrics", lines=10)
|
| 195 |
+
|
| 196 |
+
with gr.TabItem("Prediction Plot"):
|
| 197 |
+
gr.Markdown("### Prediction Visualization")
|
| 198 |
+
gr.Markdown("Regression shows true vs. predicted values. Classification shows a confusion matrix of correct/incorrect predictions.")
|
| 199 |
+
model_img_output = gr.Image(label="Prediction Output")
|
| 200 |
+
|
| 201 |
+
with gr.TabItem("Feature Importances"):
|
| 202 |
+
gr.Markdown("### Top 10 Key Features")
|
| 203 |
+
gr.Markdown("Shows the most important features for predictions. Higher bars mean bigger impact.")
|
| 204 |
+
fi_output = gr.Image(label="Feature Importances")
|
| 205 |
+
|
| 206 |
+
with gr.TabItem("KMeans Clustering"):
|
| 207 |
+
gr.Markdown("### KMeans Clustering")
|
| 208 |
+
gr.Markdown("Groups similar data points without using the selected column. Colors show clusters in 2D (PCA projection).")
|
| 209 |
+
kmeans_output = gr.Image(label="KMeans Clusters")
|
| 210 |
+
|
| 211 |
+
with gr.TabItem("Agglomerative Clustering"):
|
| 212 |
+
gr.Markdown("### Agglomerative Clustering")
|
| 213 |
+
gr.Markdown("Another way to group data hierarchically. Compare with KMeans to see differences!")
|
| 214 |
+
agg_output = gr.Image(label="Agglomerative Clusters")
|
| 215 |
+
|
| 216 |
+
with gr.TabItem("Cluster Differences"):
|
| 217 |
+
gr.Markdown("### Top 10 Cluster-Differentiating Features")
|
| 218 |
+
gr.Markdown("Shows features that vary most between clusters, helping explain the groupings.")
|
| 219 |
+
diff_output = gr.Image(label="Differentiating Features")
|
| 220 |
+
|
| 221 |
+
analyze_btn.click(fn=analyze_file, inputs=[file_input, label_dropdown, clusters_slider],
|
| 222 |
+
outputs=[results_textbox, model_img_output, fi_output, kmeans_output, agg_output, diff_output])
|
| 223 |
+
|
| 224 |
+
demo.launch()
|