Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from sklearn.preprocessing import PolynomialFeatures | |
| from sklearn.linear_model import LinearRegression | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.metrics import mean_squared_error | |
| import io | |
| from PIL import Image | |
| class BiasVarianceDemo: | |
| def __init__(self): | |
| np.random.seed(42) | |
| def generate_data(self, n_samples=50, noise_level=0.5): | |
| """Generate synthetic data with true underlying function""" | |
| X = np.sort(np.random.uniform(0, 10, n_samples)) | |
| # True function: sinusoidal with slight quadratic trend | |
| y_true = 2 * np.sin(X) + 0.1 * X**2 - 5 | |
| # Add noise | |
| y = y_true + np.random.normal(0, noise_level, n_samples) | |
| return X, y, y_true | |
| def fit_polynomial(self, X, y, degree): | |
| """Fit polynomial regression of given degree""" | |
| model = make_pipeline(PolynomialFeatures(degree), LinearRegression()) | |
| model.fit(X.reshape(-1, 1), y) | |
| return model | |
| def calculate_bias_variance(self, X_test, y_true_test, n_iterations=100, degree=1, noise_level=0.5): | |
| """Calculate bias and variance through bootstrap sampling""" | |
| predictions = [] | |
| for _ in range(n_iterations): | |
| # Generate new training data with same noise level | |
| X_train, y_train, _ = self.generate_data(n_samples=50, noise_level=noise_level) | |
| # Fit model | |
| model = self.fit_polynomial(X_train, y_train, degree) | |
| # Predict on test set | |
| y_pred = model.predict(X_test.reshape(-1, 1)) | |
| predictions.append(y_pred) | |
| predictions = np.array(predictions) | |
| # Calculate bias and variance | |
| mean_prediction = np.mean(predictions, axis=0) | |
| bias_squared = np.mean((mean_prediction - y_true_test) ** 2) | |
| variance = np.mean(np.var(predictions, axis=0)) | |
| return bias_squared, variance, predictions | |
| def visualize_fitting(self, degree, noise_level, n_samples): | |
| """Create visualization showing fitting quality""" | |
| fig = plt.figure(figsize=(20, 12)) | |
| gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3) | |
| # Generate data | |
| X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level) | |
| X_plot = np.linspace(0, 10, 200) | |
| y_true_plot = 2 * np.sin(X_plot) + 0.1 * X_plot**2 - 5 | |
| # Fit models for different scenarios | |
| degrees = [1, degree, 15] # Underfitting, User choice, Overfitting | |
| titles = ['UNDERFITTING (Low Complexity)', f'YOUR MODEL (Degree {degree})', 'OVERFITTING (High Complexity)'] | |
| # Top row: Fitting comparison | |
| for idx, (deg, title) in enumerate(zip(degrees, titles)): | |
| ax = fig.add_subplot(gs[0, idx]) | |
| # Fit model | |
| model = self.fit_polynomial(X, y, deg) | |
| y_pred_plot = model.predict(X_plot.reshape(-1, 1)) | |
| # Plot | |
| ax.scatter(X, y, color='green', s=80, alpha=0.6, edgecolors='black', linewidth=1.5, label='Training Data') | |
| ax.plot(X_plot, y_true_plot, 'b--', linewidth=3, label='True Function', alpha=0.7) | |
| ax.plot(X_plot, y_pred_plot, 'r-', linewidth=3, label=f'Model (degree={deg})') | |
| # Calculate training error | |
| y_pred_train = model.predict(X.reshape(-1, 1)) | |
| train_mse = mean_squared_error(y, y_pred_train) | |
| ax.set_xlabel('X', fontsize=12, fontweight='bold') | |
| ax.set_ylabel('Y', fontsize=12, fontweight='bold') | |
| ax.set_title(title, fontsize=14, fontweight='bold', pad=10) | |
| ax.legend(fontsize=10) | |
| ax.grid(True, alpha=0.3) | |
| ax.set_ylim(-10, 5) # Limit y-axis range | |
| ax.text(0.02, 0.98, f'Train MSE: {train_mse:.3f}', | |
| transform=ax.transAxes, fontsize=11, verticalalignment='top', | |
| bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.7)) | |
| # Middle row: Bias-Variance Tradeoff Visualization | |
| X_test = np.linspace(0, 10, 100) | |
| y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5 | |
| for idx, deg in enumerate(degrees): | |
| ax = fig.add_subplot(gs[1, idx]) | |
| # Calculate bias and variance | |
| bias_sq, variance, predictions = self.calculate_bias_variance( | |
| X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level | |
| ) | |
| # Plot multiple predictions (showing variance) | |
| for i in range(min(20, len(predictions))): | |
| ax.plot(X_test, predictions[i], 'purple', alpha=0.15, linewidth=1) | |
| # Plot mean prediction and true function | |
| mean_pred = np.mean(predictions, axis=0) | |
| ax.plot(X_test, y_true_test, 'b--', linewidth=3, label='True Function', alpha=0.8) | |
| ax.plot(X_test, mean_pred, 'r-', linewidth=3, label='Mean Prediction') | |
| # Add confidence band (Β±1 std) | |
| std_pred = np.std(predictions, axis=0) | |
| ax.fill_between(X_test, mean_pred - std_pred, mean_pred + std_pred, | |
| color='red', alpha=0.2, label='Β±1 Std Dev') | |
| ax.set_xlabel('X', fontsize=12, fontweight='bold') | |
| ax.set_ylabel('Y', fontsize=12, fontweight='bold') | |
| ax.set_title(f'Bias-Variance (degree={deg})', fontsize=13, fontweight='bold') | |
| ax.legend(fontsize=9) | |
| ax.grid(True, alpha=0.3) | |
| ax.set_ylim(-10, 5) # Limit y-axis range | |
| # Add bias-variance stats | |
| total_error = bias_sq + variance | |
| stats_text = f'BiasΒ²: {bias_sq:.3f}\nVariance: {variance:.3f}\nTotal: {total_error:.3f}' | |
| ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=10, | |
| verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.7)) | |
| # Bottom row: Bullseye diagrams for bias-variance | |
| bullseye_data = [] | |
| for deg in degrees: | |
| bias_sq, variance, _ = self.calculate_bias_variance( | |
| X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level | |
| ) | |
| bullseye_data.append((bias_sq, variance)) | |
| bullseye_titles = [ | |
| 'Low Bias, High Variance', | |
| f'Degree {degree} Model', | |
| 'High Bias, Low Variance' if degrees[0] < degrees[2] else 'Low Bias, High Variance' | |
| ] | |
| # Adjust bullseye titles based on actual bias/variance | |
| for idx, (bias_sq, variance) in enumerate(bullseye_data): | |
| ax = fig.add_subplot(gs[2, idx]) | |
| # Create bullseye target | |
| circles = [plt.Circle((0, 0), r, color='lightblue', fill=True, alpha=0.3) | |
| for r in [3, 2, 1]] | |
| for circle in circles[::-1]: | |
| ax.add_patch(circle) | |
| # Add center (true target) | |
| ax.plot(0, 0, 'r*', markersize=30, label='True Target', zorder=10) | |
| # Generate sample points representing predictions | |
| n_points = 30 | |
| # Bias determines offset from center | |
| bias_offset = np.sqrt(bias_sq) * 2 # Scale for visibility | |
| # Variance determines spread | |
| variance_spread = np.sqrt(variance) * 1.5 # Scale for visibility | |
| # Generate points around biased center | |
| angles = np.random.uniform(0, 2*np.pi, n_points) | |
| radii = np.random.normal(0, variance_spread, n_points) | |
| x_points = bias_offset + radii * np.cos(angles) | |
| y_points = radii * np.sin(angles) | |
| ax.scatter(x_points, y_points, color='purple', s=100, alpha=0.6, | |
| edgecolors='black', linewidth=1.5, label='Predictions', zorder=5) | |
| # Add mean prediction point | |
| mean_x, mean_y = np.mean(x_points), np.mean(y_points) | |
| ax.plot(mean_x, mean_y, 'go', markersize=15, label='Mean Prediction', zorder=8) | |
| ax.set_xlim(-4, 4) | |
| ax.set_ylim(-4, 4) | |
| ax.set_aspect('equal') | |
| ax.grid(True, alpha=0.3) | |
| ax.set_xlabel('Prediction Error Dimension 1', fontsize=10) | |
| ax.set_ylabel('Prediction Error Dimension 2', fontsize=10) | |
| # Determine bias/variance category | |
| bias_level = 'High' if bias_sq > 0.5 else 'Low' | |
| var_level = 'High' if variance > 0.5 else 'Low' | |
| title = f'{bias_level} Bias, {var_level} Variance\n(Degree {degrees[idx]})' | |
| ax.set_title(title, fontsize=12, fontweight='bold') | |
| ax.legend(fontsize=9, loc='upper right') | |
| # Add text box with values | |
| stats_text = f'BiasΒ²: {bias_sq:.3f}\nVariance: {variance:.3f}' | |
| ax.text(0.02, 0.02, stats_text, transform=ax.transAxes, fontsize=10, | |
| verticalalignment='bottom', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8)) | |
| # Add overall title | |
| fig.suptitle('Bias-Variance Tradeoff Visualization', fontsize=18, fontweight='bold', y=0.98) | |
| # Convert to image | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png', dpi=100, bbox_inches='tight') | |
| buf.seek(0) | |
| img = Image.open(buf) | |
| plt.close() | |
| return img | |
| def create_summary_stats(self, degree, noise_level, n_samples): | |
| """Generate summary statistics text""" | |
| X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level) | |
| X_test = np.linspace(0, 10, 100) | |
| y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5 | |
| # Calculate for selected degree | |
| bias_sq, variance, _ = self.calculate_bias_variance( | |
| X_test, y_true_test, n_iterations=50, degree=degree, noise_level=noise_level | |
| ) | |
| total_error = bias_sq + variance | |
| # Determine model quality | |
| if degree <= 2: | |
| quality = "UNDERFITTING (High Bias)" | |
| recommendation = "Increase model complexity" | |
| elif degree <= 6: | |
| quality = "GOOD BALANCE" | |
| recommendation = "Model complexity is appropriate" | |
| else: | |
| quality = "OVERFITTING (High Variance)" | |
| recommendation = "Reduce model complexity or add regularization" | |
| summary = f""" | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| β BIAS-VARIANCE ANALYSIS SUMMARY β | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Model Configuration: | |
| β’ Polynomial Degree: {degree} | |
| β’ Training Samples: {n_samples} | |
| β’ Noise Level: {noise_level} | |
| Performance Metrics: | |
| β’ BiasΒ² (Underfitting): {bias_sq:.4f} | |
| β’ Variance (Overfitting): {variance:.4f} | |
| β’ Total Error: {total_error:.4f} | |
| β’ Irreducible Error: {noise_level**2:.4f} | |
| Model Assessment: {quality} | |
| Recommendation: {recommendation} | |
| Key Insights: | |
| β’ Low degree (1-2): High bias, low variance β Underfitting | |
| β’ Medium degree (3-6): Balanced bias-variance β Optimal | |
| β’ High degree (7+): Low bias, high variance β Overfitting | |
| Tradeoff: | |
| β Model Complexity β β Bias, β Variance | |
| β Model Complexity β β Bias, β Variance | |
| """ | |
| return summary | |
| # Create demo instance | |
| demo_instance = BiasVarianceDemo() | |
| # Create Gradio interface | |
| with gr.Blocks(title="Bias-Variance Tradeoff Demo", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π― Bias-Variance Tradeoff Interactive Demo | |
| Explore the fundamental tradeoff between bias and variance in machine learning! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| degree_slider = gr.Slider( | |
| minimum=1, | |
| maximum=15, | |
| value=4, | |
| step=1, | |
| label="π§ Model Complexity (Polynomial Degree)", | |
| info="Low = Underfitting, Medium = Optimal, High = Overfitting" | |
| ) | |
| noise_slider = gr.Slider( | |
| minimum=0.1, | |
| maximum=2.0, | |
| value=0.5, | |
| step=0.1, | |
| label="π Noise Level", | |
| info="Amount of random variation in the data" | |
| ) | |
| samples_slider = gr.Slider( | |
| minimum=20, | |
| maximum=100, | |
| value=50, | |
| step=10, | |
| label="π Training Samples", | |
| info="Number of data points for training" | |
| ) | |
| update_btn = gr.Button("π Update Visualization", variant="primary", size="lg") | |
| gr.Markdown(""" | |
| ### π‘ Quick Guide: | |
| **Underfitting** (Degree 1-2): | |
| - Model too simple | |
| - High bias, low variance | |
| - Poor on both train & test | |
| **Good Fit** (Degree 3-6): | |
| - Balanced complexity | |
| - Moderate bias & variance | |
| - Best generalization | |
| **Overfitting** (Degree 7+): | |
| - Model too complex | |
| - Low bias, high variance | |
| - Great on train, poor on test | |
| """) | |
| summary_text = gr.Textbox( | |
| label="π Analysis Summary", | |
| lines=25, | |
| max_lines=30, | |
| interactive=False | |
| ) | |
| with gr.Column(scale=2): | |
| output_image = gr.Image(label="Visualization", height=900) | |
| def update_all(degree, noise, samples): | |
| img = demo_instance.visualize_fitting(int(degree), noise, int(samples)) | |
| summary = demo_instance.create_summary_stats(int(degree), noise, int(samples)) | |
| return img, summary | |
| # Update visualization | |
| update_btn.click( | |
| fn=update_all, | |
| inputs=[degree_slider, noise_slider, samples_slider], | |
| outputs=[output_image, summary_text] | |
| ) | |
| # Also update on slider change | |
| degree_slider.change( | |
| fn=update_all, | |
| inputs=[degree_slider, noise_slider, samples_slider], | |
| outputs=[output_image, summary_text] | |
| ) | |
| # Initial visualization | |
| demo.load( | |
| fn=update_all, | |
| inputs=[degree_slider, noise_slider, samples_slider], | |
| outputs=[output_image, summary_text] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() | |