Spaces:

damndeepesh
/

AutoML

Sleeping

App Files Files Community

damndeepesh commited on Jun 5, 2025

Commit

3641375

verified ·

1 Parent(s): 04606f5

Upload app.py

Browse files

Files changed (1) hide show

app.py +39 -18

app.py CHANGED Viewed

@@ -730,11 +730,24 @@ def model_training_page():
                 # Determine best model
                 if st.session_state.problem_type == "Classification":
-                    best_model_name = max(model_scores_dict, key=lambda k: (model_scores_dict[k]['Test Accuracy'] or 0, model_scores_dict[k]['Test AUC'] or 0))
                 else: # Regression
-                    # Ensure 'R2' exists and provide a default if not (e.g., for models where R2 might not be applicable or calculable)
-                    best_model_name = max(model_scores_dict, key=lambda k: model_scores_dict[k].get('R2', -float('inf')))
                 st.session_state.best_model_info = {
                     'name': best_model_name,
                     'model': trained_models[best_model_name],
@@ -752,32 +765,40 @@ def model_comparison_page():
         st.warning("⚠️ Please train models first.")
         return
-    scores_df = pd.DataFrame(st.session_state.model_scores).T.fillna(0) # Fill NaN with 0 for display
-    scores_df = scores_df.round(4)
-    st.subheader("🏆 Model Leaderboard")
     if st.session_state.problem_type == "Classification":
         sort_by = 'Test Accuracy'
         display_cols = ['CV Mean Score', 'Test Accuracy', 'Test F1-score', 'Test AUC']
     else: # Regression
         sort_by = 'R2'
-        display_cols = ['CV Mean Score', 'R2', 'MSE'] # Add other relevant regression metrics if needed
-        # Ensure MSE is present, if not, it will be filled with 0 by .fillna(0) earlier or handle missing more gracefully if needed
     leaderboard = scores_df[display_cols].sort_values(by=sort_by, ascending=False)
     leaderboard['Rank'] = range(1, len(leaderboard) + 1)
     leaderboard = leaderboard[['Rank'] + display_cols]
     st.dataframe(leaderboard.style.background_gradient(subset=[sort_by], cmap='RdYlGn'), use_container_width=True)
-    best_model_name = st.session_state.best_model_info['name']
-    best_metric_val = st.session_state.best_model_info['metrics'].get(sort_by, 'N/A')
-    st.markdown(f"<div class='success-message'><h4>🥇 Best Model: {best_model_name} ({sort_by}: {best_metric_val:.4f})</h4></div>", unsafe_allow_html=True)
-    st.subheader("📈 Performance Visualization")
-    fig, ax = plt.subplots(figsize=(10, 6))
-    plot_data = scores_df[sort_by].sort_values(ascending=True)
-    bars = ax.barh(plot_data.index, plot_data.values, color=['#ff6b6b' if idx == best_model_name else '#4ecdc4' for idx in plot_data.index])
-    ax.set_xlabel(sort_by)
     ax.set_title('Model Performance Comparison')
     st.pyplot(fig)

                 # Determine best model
                 if st.session_state.problem_type == "Classification":
+                    # Safely get metrics with default values if missing
+                    best_model_name = max(
+                        model_scores_dict,
+                        key=lambda k: (
+                            model_scores_dict[k].get('Test Accuracy', 0) or 0,
+                            model_scores_dict[k].get('Test AUC', 0) or 0
+                        )
+                    )
                 else: # Regression
+                    best_model_name = max(
+                        model_scores_dict,
+                        key=lambda k: model_scores_dict[k].get('R2', -float('inf'))
+                    )
+                if not model_scores_dict:
+                    st.error("No models were successfully trained. Please check your data and try again.")
+                    return
                 st.session_state.best_model_info = {
                     'name': best_model_name,
                     'model': trained_models[best_model_name],
         st.warning("⚠️ Please train models first.")
         return
+    # Fill NaN with 0 for display and ensure all required columns exist
+    scores_df = pd.DataFrame(st.session_state.model_scores).T
     if st.session_state.problem_type == "Classification":
         sort_by = 'Test Accuracy'
         display_cols = ['CV Mean Score', 'Test Accuracy', 'Test F1-score', 'Test AUC']
     else: # Regression
         sort_by = 'R2'
+        display_cols = ['CV Mean Score', 'R2', 'MSE']
+    # Ensure all display columns exist, add them with NaN if missing
+    for col in display_cols:
+        if col not in scores_df.columns:
+            scores_df[col] = np.nan
+    scores_df = scores_df.fillna(0).round(4)
+    st.subheader("🏆 Model Leaderboard")
     leaderboard = scores_df[display_cols].sort_values(by=sort_by, ascending=False)
     leaderboard['Rank'] = range(1, len(leaderboard) + 1)
     leaderboard = leaderboard[['Rank'] + display_cols]
     st.dataframe(leaderboard.style.background_gradient(subset=[sort_by], cmap='RdYlGn'), use_container_width=True)
+    if st.session_state.best_model_info:
+        best_model_name = st.session_state.best_model_info['name']
+        best_metric_val = st.session_state.best_model_info['metrics'].get(sort_by, 0)
+        st.markdown(f"<div class='success-message'><h4>🥇 Best Model: {best_model_name} ({sort_by}: {best_metric_val:.4f})</h4></div>", unsafe_allow_html=True)
+        st.subheader("📈 Performance Visualization")
+        fig, ax = plt.subplots(figsize=(10, 6))
+        plot_data = scores_df[sort_by].sort_values(ascending=True)
+        bars = ax.barh(plot_data.index, plot_data.values,
+                      color=['#ff6b6b' if idx == best_model_name else '#4ecdc4' for idx in plot_data.index])
+        ax.set_xlabel(sort_by)
     ax.set_title('Model Performance Comparison')
     st.pyplot(fig)