Spaces:

rinabuoy
/

model-fitting-quality

Sleeping

File size: 21,660 Bytes

f0b33ab

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f33e5de7",
   "metadata": {},
   "source": [
    "# Bias-Variance Tradeoff Interactive Demo\n",
    "\n",
    "This notebook demonstrates the fundamental **bias-variance tradeoff** in machine learning through interactive visualizations.\n",
    "\n",
    "## Key Concepts:\n",
    "\n",
    "### 🎯 Bias\n",
    "- Error from overly simplistic assumptions\n",
    "- High bias → **Underfitting**\n",
    "- Model misses relevant patterns in the data\n",
    "\n",
    "### 📊 Variance\n",
    "- Error from sensitivity to training data fluctuations\n",
    "- High variance → **Overfitting**\n",
    "- Model learns noise instead of signal\n",
    "\n",
    "### ⚖️ The Tradeoff\n",
    "- **Total Error = Bias² + Variance + Irreducible Error**\n",
    "- As model complexity increases:\n",
    "  - Bias decreases ↓\n",
    "  - Variance increases ↑\n",
    "- Goal: Find the sweet spot!\n",
    "\n",
    "## Visualizations:\n",
    "\n",
    "1. **Fitting Comparison**: See underfitting vs optimal vs overfitting\n",
    "2. **Prediction Spread**: Visualize how predictions vary across different training sets\n",
    "3. **Bullseye Diagrams**: Intuitive representation of bias (offset) and variance (spread)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b9c6cdbe",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\rinab\\miniforge3\\envs\\WORK\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "* Running on local URL:  http://127.0.0.1:7860\n",
      "* Running on public URL: https://3bab683affa1571f93.gradio.live\n",
      "\n",
      "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"https://3bab683affa1571f93.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import gradio as gr\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.preprocessing import PolynomialFeatures\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import io\n",
    "from PIL import Image\n",
    "\n",
    "class BiasVarianceDemo:\n",
    "    def __init__(self):\n",
    "        np.random.seed(42)\n",
    "        \n",
    "    def generate_data(self, n_samples=50, noise_level=0.5):\n",
    "        \"\"\"Generate synthetic data with true underlying function\"\"\"\n",
    "        X = np.sort(np.random.uniform(0, 10, n_samples))\n",
    "        # True function: sinusoidal with slight quadratic trend\n",
    "        y_true = 2 * np.sin(X) + 0.1 * X**2 - 5\n",
    "        # Add noise\n",
    "        y = y_true + np.random.normal(0, noise_level, n_samples)\n",
    "        return X, y, y_true\n",
    "    \n",
    "    def fit_polynomial(self, X, y, degree):\n",
    "        \"\"\"Fit polynomial regression of given degree\"\"\"\n",
    "        model = make_pipeline(PolynomialFeatures(degree), LinearRegression())\n",
    "        model.fit(X.reshape(-1, 1), y)\n",
    "        return model\n",
    "    \n",
    "    def calculate_bias_variance(self, X_test, y_true_test, n_iterations=100, degree=1, noise_level=0.5):\n",
    "        \"\"\"Calculate bias and variance through bootstrap sampling\"\"\"\n",
    "        predictions = []\n",
    "        \n",
    "        for _ in range(n_iterations):\n",
    "            # Generate new training data with same noise level\n",
    "            X_train, y_train, _ = self.generate_data(n_samples=50, noise_level=noise_level)\n",
    "            \n",
    "            # Fit model\n",
    "            model = self.fit_polynomial(X_train, y_train, degree)\n",
    "            \n",
    "            # Predict on test set\n",
    "            y_pred = model.predict(X_test.reshape(-1, 1))\n",
    "            predictions.append(y_pred)\n",
    "        \n",
    "        predictions = np.array(predictions)\n",
    "        \n",
    "        # Calculate bias and variance\n",
    "        mean_prediction = np.mean(predictions, axis=0)\n",
    "        bias_squared = np.mean((mean_prediction - y_true_test) ** 2)\n",
    "        variance = np.mean(np.var(predictions, axis=0))\n",
    "        \n",
    "        return bias_squared, variance, predictions\n",
    "    \n",
    "    def visualize_fitting(self, degree, noise_level, n_samples):\n",
    "        \"\"\"Create visualization showing fitting quality\"\"\"\n",
    "        fig = plt.figure(figsize=(20, 12))\n",
    "        gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)\n",
    "        \n",
    "        # Generate data\n",
    "        X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level)\n",
    "        X_plot = np.linspace(0, 10, 200)\n",
    "        y_true_plot = 2 * np.sin(X_plot) + 0.1 * X_plot**2 - 5\n",
    "        \n",
    "        # Fit models for different scenarios\n",
    "        degrees = [1, degree, 15]  # Underfitting, User choice, Overfitting\n",
    "        titles = ['UNDERFITTING (Low Complexity)', f'YOUR MODEL (Degree {degree})', 'OVERFITTING (High Complexity)']\n",
    "        \n",
    "        # Top row: Fitting comparison\n",
    "        for idx, (deg, title) in enumerate(zip(degrees, titles)):\n",
    "            ax = fig.add_subplot(gs[0, idx])\n",
    "            \n",
    "            # Fit model\n",
    "            model = self.fit_polynomial(X, y, deg)\n",
    "            y_pred_plot = model.predict(X_plot.reshape(-1, 1))\n",
    "            \n",
    "            # Plot\n",
    "            ax.scatter(X, y, color='green', s=80, alpha=0.6, edgecolors='black', linewidth=1.5, label='Training Data')\n",
    "            ax.plot(X_plot, y_true_plot, 'b--', linewidth=3, label='True Function', alpha=0.7)\n",
    "            ax.plot(X_plot, y_pred_plot, 'r-', linewidth=3, label=f'Model (degree={deg})')\n",
    "            \n",
    "            # Calculate training error\n",
    "            y_pred_train = model.predict(X.reshape(-1, 1))\n",
    "            train_mse = mean_squared_error(y, y_pred_train)\n",
    "            \n",
    "            ax.set_xlabel('X', fontsize=12, fontweight='bold')\n",
    "            ax.set_ylabel('Y', fontsize=12, fontweight='bold')\n",
    "            ax.set_title(title, fontsize=14, fontweight='bold', pad=10)\n",
    "            ax.legend(fontsize=10)\n",
    "            ax.grid(True, alpha=0.3)\n",
    "            ax.set_ylim(-10, 5)  # Limit y-axis range\n",
    "            ax.text(0.02, 0.98, f'Train MSE: {train_mse:.3f}', \n",
    "                   transform=ax.transAxes, fontsize=11, verticalalignment='top',\n",
    "                   bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.7))\n",
    "        \n",
    "        # Middle row: Bias-Variance Tradeoff Visualization\n",
    "        X_test = np.linspace(0, 10, 100)\n",
    "        y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5\n",
    "        \n",
    "        for idx, deg in enumerate(degrees):\n",
    "            ax = fig.add_subplot(gs[1, idx])\n",
    "            \n",
    "            # Calculate bias and variance\n",
    "            bias_sq, variance, predictions = self.calculate_bias_variance(\n",
    "                X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level\n",
    "            )\n",
    "            \n",
    "            # Plot multiple predictions (showing variance)\n",
    "            for i in range(min(20, len(predictions))):\n",
    "                ax.plot(X_test, predictions[i], 'purple', alpha=0.15, linewidth=1)\n",
    "            \n",
    "            # Plot mean prediction and true function\n",
    "            mean_pred = np.mean(predictions, axis=0)\n",
    "            ax.plot(X_test, y_true_test, 'b--', linewidth=3, label='True Function', alpha=0.8)\n",
    "            ax.plot(X_test, mean_pred, 'r-', linewidth=3, label='Mean Prediction')\n",
    "            \n",
    "            # Add confidence band (±1 std)\n",
    "            std_pred = np.std(predictions, axis=0)\n",
    "            ax.fill_between(X_test, mean_pred - std_pred, mean_pred + std_pred, \n",
    "                           color='red', alpha=0.2, label='±1 Std Dev')\n",
    "            \n",
    "            ax.set_xlabel('X', fontsize=12, fontweight='bold')\n",
    "            ax.set_ylabel('Y', fontsize=12, fontweight='bold')\n",
    "            ax.set_title(f'Bias-Variance (degree={deg})', fontsize=13, fontweight='bold')\n",
    "            ax.legend(fontsize=9)\n",
    "            ax.grid(True, alpha=0.3)\n",
    "            ax.set_ylim(-10, 5)  # Limit y-axis range\n",
    "            \n",
    "            # Add bias-variance stats\n",
    "            total_error = bias_sq + variance\n",
    "            stats_text = f'Bias²: {bias_sq:.3f}\\nVariance: {variance:.3f}\\nTotal: {total_error:.3f}'\n",
    "            ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=10,\n",
    "                   verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.7))\n",
    "        \n",
    "        # Bottom row: Bullseye diagrams for bias-variance\n",
    "        bullseye_data = []\n",
    "        for deg in degrees:\n",
    "            bias_sq, variance, _ = self.calculate_bias_variance(\n",
    "                X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level\n",
    "            )\n",
    "            bullseye_data.append((bias_sq, variance))\n",
    "        \n",
    "        bullseye_titles = [\n",
    "            'Low Bias, High Variance',\n",
    "            f'Degree {degree} Model',\n",
    "            'High Bias, Low Variance' if degrees[0] < degrees[2] else 'Low Bias, High Variance'\n",
    "        ]\n",
    "        \n",
    "        # Adjust bullseye titles based on actual bias/variance\n",
    "        for idx, (bias_sq, variance) in enumerate(bullseye_data):\n",
    "            ax = fig.add_subplot(gs[2, idx])\n",
    "            \n",
    "            # Create bullseye target\n",
    "            circles = [plt.Circle((0, 0), r, color='lightblue', fill=True, alpha=0.3) \n",
    "                      for r in [3, 2, 1]]\n",
    "            for circle in circles[::-1]:\n",
    "                ax.add_patch(circle)\n",
    "            \n",
    "            # Add center (true target)\n",
    "            ax.plot(0, 0, 'r*', markersize=30, label='True Target', zorder=10)\n",
    "            \n",
    "            # Generate sample points representing predictions\n",
    "            n_points = 30\n",
    "            # Bias determines offset from center\n",
    "            bias_offset = np.sqrt(bias_sq) * 2  # Scale for visibility\n",
    "            # Variance determines spread\n",
    "            variance_spread = np.sqrt(variance) * 1.5  # Scale for visibility\n",
    "            \n",
    "            # Generate points around biased center\n",
    "            angles = np.random.uniform(0, 2*np.pi, n_points)\n",
    "            radii = np.random.normal(0, variance_spread, n_points)\n",
    "            \n",
    "            x_points = bias_offset + radii * np.cos(angles)\n",
    "            y_points = radii * np.sin(angles)\n",
    "            \n",
    "            ax.scatter(x_points, y_points, color='purple', s=100, alpha=0.6, \n",
    "                      edgecolors='black', linewidth=1.5, label='Predictions', zorder=5)\n",
    "            \n",
    "            # Add mean prediction point\n",
    "            mean_x, mean_y = np.mean(x_points), np.mean(y_points)\n",
    "            ax.plot(mean_x, mean_y, 'go', markersize=15, label='Mean Prediction', zorder=8)\n",
    "            \n",
    "            ax.set_xlim(-4, 4)\n",
    "            ax.set_ylim(-4, 4)\n",
    "            ax.set_aspect('equal')\n",
    "            ax.grid(True, alpha=0.3)\n",
    "            ax.set_xlabel('Prediction Error Dimension 1', fontsize=10)\n",
    "            ax.set_ylabel('Prediction Error Dimension 2', fontsize=10)\n",
    "            \n",
    "            # Determine bias/variance category\n",
    "            bias_level = 'High' if bias_sq > 0.5 else 'Low'\n",
    "            var_level = 'High' if variance > 0.5 else 'Low'\n",
    "            title = f'{bias_level} Bias, {var_level} Variance\\n(Degree {degrees[idx]})'\n",
    "            \n",
    "            ax.set_title(title, fontsize=12, fontweight='bold')\n",
    "            ax.legend(fontsize=9, loc='upper right')\n",
    "            \n",
    "            # Add text box with values\n",
    "            stats_text = f'Bias²: {bias_sq:.3f}\\nVariance: {variance:.3f}'\n",
    "            ax.text(0.02, 0.02, stats_text, transform=ax.transAxes, fontsize=10,\n",
    "                   verticalalignment='bottom', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))\n",
    "        \n",
    "        # Add overall title\n",
    "        fig.suptitle('Bias-Variance Tradeoff Visualization', fontsize=18, fontweight='bold', y=0.98)\n",
    "        \n",
    "        # Convert to image\n",
    "        buf = io.BytesIO()\n",
    "        plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')\n",
    "        buf.seek(0)\n",
    "        img = Image.open(buf)\n",
    "        plt.close()\n",
    "        \n",
    "        return img\n",
    "    \n",
    "    def create_summary_stats(self, degree, noise_level, n_samples):\n",
    "        \"\"\"Generate summary statistics text\"\"\"\n",
    "        X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level)\n",
    "        X_test = np.linspace(0, 10, 100)\n",
    "        y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5\n",
    "        \n",
    "        # Calculate for selected degree\n",
    "        bias_sq, variance, _ = self.calculate_bias_variance(\n",
    "            X_test, y_true_test, n_iterations=50, degree=degree, noise_level=noise_level\n",
    "        )\n",
    "        \n",
    "        total_error = bias_sq + variance\n",
    "        \n",
    "        # Determine model quality\n",
    "        if degree <= 2:\n",
    "            quality = \"UNDERFITTING (High Bias)\"\n",
    "            recommendation = \"Increase model complexity\"\n",
    "        elif degree <= 6:\n",
    "            quality = \"GOOD BALANCE\"\n",
    "            recommendation = \"Model complexity is appropriate\"\n",
    "        else:\n",
    "            quality = \"OVERFITTING (High Variance)\"\n",
    "            recommendation = \"Reduce model complexity or add regularization\"\n",
    "        \n",
    "        summary = f\"\"\"\n",
    "╔══════════════════════════════════════════════════════════╗\n",
    "║           BIAS-VARIANCE ANALYSIS SUMMARY                ║\n",
    "╚══════════════════════════════════════════════════════════╝\n",
    "\n",
    "Model Configuration:\n",
    "  • Polynomial Degree: {degree}\n",
    "  • Training Samples: {n_samples}\n",
    "  • Noise Level: {noise_level}\n",
    "\n",
    "Performance Metrics:\n",
    "  • Bias² (Underfitting): {bias_sq:.4f}\n",
    "  • Variance (Overfitting): {variance:.4f}\n",
    "  • Total Error: {total_error:.4f}\n",
    "  • Irreducible Error: {noise_level**2:.4f}\n",
    "\n",
    "Model Assessment: {quality}\n",
    "Recommendation: {recommendation}\n",
    "\n",
    "Key Insights:\n",
    "  • Low degree (1-2): High bias, low variance → Underfitting\n",
    "  • Medium degree (3-6): Balanced bias-variance → Optimal\n",
    "  • High degree (7+): Low bias, high variance → Overfitting\n",
    "\n",
    "Tradeoff:\n",
    "  ↑ Model Complexity → ↓ Bias, ↑ Variance\n",
    "  ↓ Model Complexity → ↑ Bias, ↓ Variance\n",
    "        \"\"\"\n",
    "        \n",
    "        return summary\n",
    "\n",
    "# Create demo instance\n",
    "demo_instance = BiasVarianceDemo()\n",
    "\n",
    "# Create Gradio interface\n",
    "with gr.Blocks(title=\"Bias-Variance Tradeoff Demo\", theme=gr.themes.Soft()) as demo:\n",
    "    gr.Markdown(\"\"\"\n",
    "    # 🎯 Bias-Variance Tradeoff Interactive Demo\n",
    "    \n",
    "    Explore the fundamental tradeoff between bias and variance in machine learning!\n",
    "    \n",
    "    \"\"\")\n",
    "    \n",
    "    with gr.Row():\n",
    "        with gr.Column(scale=1):\n",
    "            degree_slider = gr.Slider(\n",
    "                minimum=1,\n",
    "                maximum=15,\n",
    "                value=4,\n",
    "                step=1,\n",
    "                label=\"🔧 Model Complexity (Polynomial Degree)\",\n",
    "                info=\"Low = Underfitting, Medium = Optimal, High = Overfitting\"\n",
    "            )\n",
    "            \n",
    "            noise_slider = gr.Slider(\n",
    "                minimum=0.1,\n",
    "                maximum=2.0,\n",
    "                value=0.5,\n",
    "                step=0.1,\n",
    "                label=\"📊 Noise Level\",\n",
    "                info=\"Amount of random variation in the data\"\n",
    "            )\n",
    "            \n",
    "            samples_slider = gr.Slider(\n",
    "                minimum=20,\n",
    "                maximum=100,\n",
    "                value=50,\n",
    "                step=10,\n",
    "                label=\"📈 Training Samples\",\n",
    "                info=\"Number of data points for training\"\n",
    "            )\n",
    "            \n",
    "            update_btn = gr.Button(\"🔄 Update Visualization\", variant=\"primary\", size=\"lg\")\n",
    "            \n",
    "            gr.Markdown(\"\"\"\n",
    "            ### 💡 Quick Guide:\n",
    "            \n",
    "            **Underfitting** (Degree 1-2):\n",
    "            - Model too simple\n",
    "            - High bias, low variance\n",
    "            - Poor on both train & test\n",
    "            \n",
    "            **Good Fit** (Degree 3-6):\n",
    "            - Balanced complexity\n",
    "            - Moderate bias & variance\n",
    "            - Best generalization\n",
    "            \n",
    "            **Overfitting** (Degree 7+):\n",
    "            - Model too complex\n",
    "            - Low bias, high variance\n",
    "            - Great on train, poor on test\n",
    "            \"\"\")\n",
    "            \n",
    "            summary_text = gr.Textbox(\n",
    "                label=\"📋 Analysis Summary\",\n",
    "                lines=25,\n",
    "                max_lines=30,\n",
    "                interactive=False\n",
    "            )\n",
    "        \n",
    "        with gr.Column(scale=2):\n",
    "            output_image = gr.Image(label=\"Visualization\", height=900)\n",
    "    \n",
    "    def update_all(degree, noise, samples):\n",
    "        img = demo_instance.visualize_fitting(int(degree), noise, int(samples))\n",
    "        summary = demo_instance.create_summary_stats(int(degree), noise, int(samples))\n",
    "        return img, summary\n",
    "    \n",
    "    # Update visualization\n",
    "    update_btn.click(\n",
    "        fn=update_all,\n",
    "        inputs=[degree_slider, noise_slider, samples_slider],\n",
    "        outputs=[output_image, summary_text]\n",
    "    )\n",
    "    \n",
    "    # Also update on slider change\n",
    "    degree_slider.change(\n",
    "        fn=update_all,\n",
    "        inputs=[degree_slider, noise_slider, samples_slider],\n",
    "        outputs=[output_image, summary_text]\n",
    "    )\n",
    "    \n",
    "    # Initial visualization\n",
    "    demo.load(\n",
    "        fn=update_all,\n",
    "        inputs=[degree_slider, noise_slider, samples_slider],\n",
    "        outputs=[output_image, summary_text]\n",
    "    )\n",
    "\n",
    "# Launch the app\n",
    "demo.launch(share=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "WORK",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}