import gradio as gr import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.pipeline import make_pipeline from sklearn.metrics import mean_squared_error import io from PIL import Image class BiasVarianceDemo: def __init__(self): np.random.seed(42) def generate_data(self, n_samples=50, noise_level=0.5): """Generate synthetic data with true underlying function""" X = np.sort(np.random.uniform(0, 10, n_samples)) # True function: sinusoidal with slight quadratic trend y_true = 2 * np.sin(X) + 0.1 * X**2 - 5 # Add noise y = y_true + np.random.normal(0, noise_level, n_samples) return X, y, y_true def fit_polynomial(self, X, y, degree): """Fit polynomial regression of given degree""" model = make_pipeline(PolynomialFeatures(degree), LinearRegression()) model.fit(X.reshape(-1, 1), y) return model def calculate_bias_variance(self, X_test, y_true_test, n_iterations=100, degree=1, noise_level=0.5): """Calculate bias and variance through bootstrap sampling""" predictions = [] for _ in range(n_iterations): # Generate new training data with same noise level X_train, y_train, _ = self.generate_data(n_samples=50, noise_level=noise_level) # Fit model model = self.fit_polynomial(X_train, y_train, degree) # Predict on test set y_pred = model.predict(X_test.reshape(-1, 1)) predictions.append(y_pred) predictions = np.array(predictions) # Calculate bias and variance mean_prediction = np.mean(predictions, axis=0) bias_squared = np.mean((mean_prediction - y_true_test) ** 2) variance = np.mean(np.var(predictions, axis=0)) return bias_squared, variance, predictions def visualize_fitting(self, degree, noise_level, n_samples): """Create visualization showing fitting quality""" fig = plt.figure(figsize=(20, 12)) gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3) # Generate data X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level) X_plot = np.linspace(0, 10, 200) y_true_plot = 2 * np.sin(X_plot) + 0.1 * X_plot**2 - 5 # Fit models for different scenarios degrees = [1, degree, 15] # Underfitting, User choice, Overfitting titles = ['UNDERFITTING (Low Complexity)', f'YOUR MODEL (Degree {degree})', 'OVERFITTING (High Complexity)'] # Top row: Fitting comparison for idx, (deg, title) in enumerate(zip(degrees, titles)): ax = fig.add_subplot(gs[0, idx]) # Fit model model = self.fit_polynomial(X, y, deg) y_pred_plot = model.predict(X_plot.reshape(-1, 1)) # Plot ax.scatter(X, y, color='green', s=80, alpha=0.6, edgecolors='black', linewidth=1.5, label='Training Data') ax.plot(X_plot, y_true_plot, 'b--', linewidth=3, label='True Function', alpha=0.7) ax.plot(X_plot, y_pred_plot, 'r-', linewidth=3, label=f'Model (degree={deg})') # Calculate training error y_pred_train = model.predict(X.reshape(-1, 1)) train_mse = mean_squared_error(y, y_pred_train) ax.set_xlabel('X', fontsize=12, fontweight='bold') ax.set_ylabel('Y', fontsize=12, fontweight='bold') ax.set_title(title, fontsize=14, fontweight='bold', pad=10) ax.legend(fontsize=10) ax.grid(True, alpha=0.3) ax.set_ylim(-10, 5) # Limit y-axis range ax.text(0.02, 0.98, f'Train MSE: {train_mse:.3f}', transform=ax.transAxes, fontsize=11, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.7)) # Middle row: Bias-Variance Tradeoff Visualization X_test = np.linspace(0, 10, 100) y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5 for idx, deg in enumerate(degrees): ax = fig.add_subplot(gs[1, idx]) # Calculate bias and variance bias_sq, variance, predictions = self.calculate_bias_variance( X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level ) # Plot multiple predictions (showing variance) for i in range(min(20, len(predictions))): ax.plot(X_test, predictions[i], 'purple', alpha=0.15, linewidth=1) # Plot mean prediction and true function mean_pred = np.mean(predictions, axis=0) ax.plot(X_test, y_true_test, 'b--', linewidth=3, label='True Function', alpha=0.8) ax.plot(X_test, mean_pred, 'r-', linewidth=3, label='Mean Prediction') # Add confidence band (±1 std) std_pred = np.std(predictions, axis=0) ax.fill_between(X_test, mean_pred - std_pred, mean_pred + std_pred, color='red', alpha=0.2, label='±1 Std Dev') ax.set_xlabel('X', fontsize=12, fontweight='bold') ax.set_ylabel('Y', fontsize=12, fontweight='bold') ax.set_title(f'Bias-Variance (degree={deg})', fontsize=13, fontweight='bold') ax.legend(fontsize=9) ax.grid(True, alpha=0.3) ax.set_ylim(-10, 5) # Limit y-axis range # Add bias-variance stats total_error = bias_sq + variance stats_text = f'Bias²: {bias_sq:.3f}\nVariance: {variance:.3f}\nTotal: {total_error:.3f}' ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=10, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.7)) # Bottom row: Bullseye diagrams for bias-variance bullseye_data = [] for deg in degrees: bias_sq, variance, _ = self.calculate_bias_variance( X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level ) bullseye_data.append((bias_sq, variance)) bullseye_titles = [ 'Low Bias, High Variance', f'Degree {degree} Model', 'High Bias, Low Variance' if degrees[0] < degrees[2] else 'Low Bias, High Variance' ] # Adjust bullseye titles based on actual bias/variance for idx, (bias_sq, variance) in enumerate(bullseye_data): ax = fig.add_subplot(gs[2, idx]) # Create bullseye target circles = [plt.Circle((0, 0), r, color='lightblue', fill=True, alpha=0.3) for r in [3, 2, 1]] for circle in circles[::-1]: ax.add_patch(circle) # Add center (true target) ax.plot(0, 0, 'r*', markersize=30, label='True Target', zorder=10) # Generate sample points representing predictions n_points = 30 # Bias determines offset from center bias_offset = np.sqrt(bias_sq) * 2 # Scale for visibility # Variance determines spread variance_spread = np.sqrt(variance) * 1.5 # Scale for visibility # Generate points around biased center angles = np.random.uniform(0, 2*np.pi, n_points) radii = np.random.normal(0, variance_spread, n_points) x_points = bias_offset + radii * np.cos(angles) y_points = radii * np.sin(angles) ax.scatter(x_points, y_points, color='purple', s=100, alpha=0.6, edgecolors='black', linewidth=1.5, label='Predictions', zorder=5) # Add mean prediction point mean_x, mean_y = np.mean(x_points), np.mean(y_points) ax.plot(mean_x, mean_y, 'go', markersize=15, label='Mean Prediction', zorder=8) ax.set_xlim(-4, 4) ax.set_ylim(-4, 4) ax.set_aspect('equal') ax.grid(True, alpha=0.3) ax.set_xlabel('Prediction Error Dimension 1', fontsize=10) ax.set_ylabel('Prediction Error Dimension 2', fontsize=10) # Determine bias/variance category bias_level = 'High' if bias_sq > 0.5 else 'Low' var_level = 'High' if variance > 0.5 else 'Low' title = f'{bias_level} Bias, {var_level} Variance\n(Degree {degrees[idx]})' ax.set_title(title, fontsize=12, fontweight='bold') ax.legend(fontsize=9, loc='upper right') # Add text box with values stats_text = f'Bias²: {bias_sq:.3f}\nVariance: {variance:.3f}' ax.text(0.02, 0.02, stats_text, transform=ax.transAxes, fontsize=10, verticalalignment='bottom', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8)) # Add overall title fig.suptitle('Bias-Variance Tradeoff Visualization', fontsize=18, fontweight='bold', y=0.98) # Convert to image buf = io.BytesIO() plt.savefig(buf, format='png', dpi=100, bbox_inches='tight') buf.seek(0) img = Image.open(buf) plt.close() return img def create_summary_stats(self, degree, noise_level, n_samples): """Generate summary statistics text""" X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level) X_test = np.linspace(0, 10, 100) y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5 # Calculate for selected degree bias_sq, variance, _ = self.calculate_bias_variance( X_test, y_true_test, n_iterations=50, degree=degree, noise_level=noise_level ) total_error = bias_sq + variance # Determine model quality if degree <= 2: quality = "UNDERFITTING (High Bias)" recommendation = "Increase model complexity" elif degree <= 6: quality = "GOOD BALANCE" recommendation = "Model complexity is appropriate" else: quality = "OVERFITTING (High Variance)" recommendation = "Reduce model complexity or add regularization" summary = f""" ╔══════════════════════════════════════════════════════════╗ ║ BIAS-VARIANCE ANALYSIS SUMMARY ║ ╚══════════════════════════════════════════════════════════╝ Model Configuration: • Polynomial Degree: {degree} • Training Samples: {n_samples} • Noise Level: {noise_level} Performance Metrics: • Bias² (Underfitting): {bias_sq:.4f} • Variance (Overfitting): {variance:.4f} • Total Error: {total_error:.4f} • Irreducible Error: {noise_level**2:.4f} Model Assessment: {quality} Recommendation: {recommendation} Key Insights: • Low degree (1-2): High bias, low variance → Underfitting • Medium degree (3-6): Balanced bias-variance → Optimal • High degree (7+): Low bias, high variance → Overfitting Tradeoff: ↑ Model Complexity → ↓ Bias, ↑ Variance ↓ Model Complexity → ↑ Bias, ↓ Variance """ return summary # Create demo instance demo_instance = BiasVarianceDemo() # Create Gradio interface with gr.Blocks(title="Bias-Variance Tradeoff Demo", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🎯 Bias-Variance Tradeoff Interactive Demo Explore the fundamental tradeoff between bias and variance in machine learning! """) with gr.Row(): with gr.Column(scale=1): degree_slider = gr.Slider( minimum=1, maximum=15, value=4, step=1, label="🔧 Model Complexity (Polynomial Degree)", info="Low = Underfitting, Medium = Optimal, High = Overfitting" ) noise_slider = gr.Slider( minimum=0.1, maximum=2.0, value=0.5, step=0.1, label="📊 Noise Level", info="Amount of random variation in the data" ) samples_slider = gr.Slider( minimum=20, maximum=100, value=50, step=10, label="📈 Training Samples", info="Number of data points for training" ) update_btn = gr.Button("🔄 Update Visualization", variant="primary", size="lg") gr.Markdown(""" ### 💡 Quick Guide: **Underfitting** (Degree 1-2): - Model too simple - High bias, low variance - Poor on both train & test **Good Fit** (Degree 3-6): - Balanced complexity - Moderate bias & variance - Best generalization **Overfitting** (Degree 7+): - Model too complex - Low bias, high variance - Great on train, poor on test """) summary_text = gr.Textbox( label="📋 Analysis Summary", lines=25, max_lines=30, interactive=False ) with gr.Column(scale=2): output_image = gr.Image(label="Visualization", height=900) def update_all(degree, noise, samples): img = demo_instance.visualize_fitting(int(degree), noise, int(samples)) summary = demo_instance.create_summary_stats(int(degree), noise, int(samples)) return img, summary # Update visualization update_btn.click( fn=update_all, inputs=[degree_slider, noise_slider, samples_slider], outputs=[output_image, summary_text] ) # Also update on slider change degree_slider.change( fn=update_all, inputs=[degree_slider, noise_slider, samples_slider], outputs=[output_image, summary_text] ) # Initial visualization demo.load( fn=update_all, inputs=[degree_slider, noise_slider, samples_slider], outputs=[output_image, summary_text] ) # Launch the app if __name__ == "__main__": demo.launch()