Spaces:

jaker86
/

data_science_crash_course

Sleeping

App Files Files Community

jaker86 commited on Feb 25, 2025

Commit

ee936fb

verified ·

1 Parent(s): eaca5d0

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -1

app.py CHANGED Viewed

@@ -171,4 +171,54 @@ def analyze_file(file, label_col, n_clusters):
 with gr.Blocks() as demo:
     gr.Markdown("## Data Analysis Explorer")
-    gr.Markdown("Upload a CSV or XLSX file to explore classification, regression, and clustering. Select a column to predict and the number of

 with gr.Blocks() as demo:
     gr.Markdown("## Data Analysis Explorer")
+    gr.Markdown("Upload a CSV or XLSX file to explore classification, regression, and clustering. Select a column to predict and the number of clusters!")
+    with gr.Row():
+        file_input = gr.File(label="Upload CSV or XLSX", file_types=[".csv", ".xlsx"])
+        label_dropdown = gr.Dropdown(label="Select Column to Predict", interactive=True)
+        clusters_slider = gr.Slider(minimum=2, maximum=10, step=1, value=3, label="Number of Clusters")
+    # Event handler to update dropdown when file is uploaded
+    file_input.change(fn=update_dropdown, inputs=file_input, outputs=label_dropdown)
+    analyze_btn = gr.Button("Analyze")
+    with gr.Tabs():
+        with gr.TabItem("Prediction Results"):
+            gr.Markdown("### Classification or Regression")
+            gr.Markdown("""
+            - **Regression**: Predicts numbers (e.g., sales). Uses Random Forest.
+            - **Classification**: Predicts categories (e.g., yes/no). Uses Random Forest.
+            - Rows with missing values are removed. 70% of data trains the model; 30% tests it.
+            """)
+            results_textbox = gr.Textbox(label="Performance Metrics", lines=10)
+        with gr.TabItem("Prediction Plot"):
+            gr.Markdown("### Prediction Visualization")
+            gr.Markdown("Regression shows true vs. predicted values. Classification shows a confusion matrix of correct/incorrect predictions.")
+            model_img_output = gr.Image(label="Prediction Output")
+        with gr.TabItem("Feature Importances"):
+            gr.Markdown("### Top 10 Key Features")
+            gr.Markdown("Shows the most important features for predictions. Higher bars mean bigger impact.")
+            fi_output = gr.Image(label="Feature Importances")
+        with gr.TabItem("KMeans Clustering"):
+            gr.Markdown("### KMeans Clustering")
+            gr.Markdown("Groups similar data points without using the selected column. Colors show clusters in 2D (PCA projection).")
+            kmeans_output = gr.Image(label="KMeans Clusters")
+        with gr.TabItem("Agglomerative Clustering"):
+            gr.Markdown("### Agglomerative Clustering")
+            gr.Markdown("Another way to group data hierarchically. Compare with KMeans to see differences!")
+            agg_output = gr.Image(label="Agglomerative Clusters")
+        with gr.TabItem("Cluster Differences"):
+            gr.Markdown("### Top 10 Cluster-Differentiating Features")
+            gr.Markdown("Shows features that vary most between clusters, helping explain the groupings.")
+            diff_output = gr.Image(label="Differentiating Features")
+    analyze_btn.click(fn=analyze_file, inputs=[file_input, label_dropdown, clusters_slider],
+                      outputs=[results_textbox, model_img_output, fi_output, kmeans_output, agg_output, diff_output])
+demo.launch()