import gradio as gr import pandas as pd import plotly.express as px from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsRegressor from sklearn.linear_model import LinearRegression, Ridge, Lasso from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.svm import SVR from sklearn.metrics import mean_squared_error, r2_score from sklearn.datasets import load_diabetes, fetch_california_housing # Load built-in datasets def load_builtin_dataset(choice): if choice == "Diabetes Dataset": data = load_diabetes(as_frame=True) return data.frame, "target" elif choice == "California Housing": data = fetch_california_housing(as_frame=True) return data.frame, "MedHouseVal" else: return pd.DataFrame(), None # Core comparison function with charts def compare_regressors(dataset_choice, file, target_column, n_neighbors=5): # Load dataset if dataset_choice != "Upload your own CSV": df, target_column = load_builtin_dataset(dataset_choice) else: if file is None: return "⚠️ Please upload a CSV file.", None, None, None, None df = pd.read_csv(file.name) if target_column not in df.columns: return f"❌ Target column '{target_column}' not found! Columns:\n{list(df.columns)}", None, None, None, None # Split features and target X = df.drop(columns=[target_column]).values y = df[target_column].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Standardize features scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Define models models = { "KNN Regressor": KNeighborsRegressor(n_neighbors=n_neighbors), "Linear Regression": LinearRegression(), "Ridge Regression": Ridge(alpha=1.0), "Lasso Regression": Lasso(alpha=0.01), "Decision Tree Regressor": DecisionTreeRegressor(random_state=42), "Random Forest Regressor": RandomForestRegressor(random_state=42), "Support Vector Regressor (SVR)": SVR() } results = {} y_pred_knn = None for name, model in models.items(): model.fit(X_train, y_train) y_pred = model.predict(X_test) results[name] = { "MSE": round(mean_squared_error(y_test, y_pred), 3), "R²": round(r2_score(y_test, y_pred), 3) } if name == "KNN Regressor": y_pred_knn = y_pred # Convert to DataFrame df_results = pd.DataFrame(results).T.sort_values("R²", ascending=False) # Identify best model best_model_name = df_results["R²"].idxmax() knn_r2 = results["KNN Regressor"]["R²"] summary_text = f"### Dataset: {dataset_choice}\n**Target:** {target_column}\n\n" summary_text += df_results.to_markdown() summary_text += f"\n\n✅ Best performing model: **{best_model_name}**" summary_text += f"\n🔹 KNN Regressor R²: {knn_r2}" # Create charts using Plotly fig_r2 = px.bar(df_results, x=df_results.index, y="R²", text="R²", title="R² Scores by Model", color="R²") fig_mse = px.bar(df_results, x=df_results.index, y="MSE", text="MSE", title="MSE by Model", color="MSE") # Scatter plot for KNN predictions vs actual fig_scatter = px.scatter(x=y_test, y=y_pred_knn, labels={"x":"Actual", "y":"Predicted"}, title="KNN Predicted vs Actual") fig_scatter.add_shape(type="line", x0=min(y_test), x1=max(y_test), y0=min(y_test), y1=max(y_test), line=dict(color="red", dash="dash")) return summary_text, fig_r2, fig_mse, fig_scatter # Auto-update target column def update_target_box(dataset_choice): if dataset_choice == "Upload your own CSV": return gr.update(value="", visible=True, interactive=True, label="Target Column Name (y)") else: _, target_col = load_builtin_dataset(dataset_choice) return gr.update(value=target_col, visible=True, interactive=False, label="Auto Target Column") # Gradio UI with gr.Blocks() as demo: gr.Markdown("## ⚙️ Regression Model Comparison Dashboard") gr.Markdown("Upload or select a dataset and compare regression models (KNN, Random Forest, SVR, etc.)") dataset_choice = gr.Radio( ["Upload your own CSV", "Diabetes Dataset", "California Housing"], label="Choose Dataset Source", value="Diabetes Dataset" ) file_input = gr.File(file_types=[".csv"], label="Upload CSV (if selected above)") target_input = gr.Textbox(label="Target Column", value="", visible=True) k_slider = gr.Slider(1, 20, value=5, step=1, label="K (for KNN)") output_table = gr.Markdown() output_r2 = gr.Plot() output_mse = gr.Plot() output_scatter = gr.Plot() dataset_choice.change(fn=update_target_box, inputs=dataset_choice, outputs=target_input) gr.Button("Compare Models 🚀").click( fn=compare_regressors, inputs=[dataset_choice, file_input, target_input, k_slider], outputs=[output_table, output_r2, output_mse, output_scatter] ) demo.launch()