rinabuoy commited on
Commit
f0b33ab
Β·
1 Parent(s): e17651f

overfitting

Browse files
Files changed (3) hide show
  1. app.py +369 -0
  2. overfitting.ipynb +476 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from sklearn.preprocessing import PolynomialFeatures
5
+ from sklearn.linear_model import LinearRegression
6
+ from sklearn.pipeline import make_pipeline
7
+ from sklearn.metrics import mean_squared_error
8
+ import io
9
+ from PIL import Image
10
+
11
+ class BiasVarianceDemo:
12
+ def __init__(self):
13
+ np.random.seed(42)
14
+
15
+ def generate_data(self, n_samples=50, noise_level=0.5):
16
+ """Generate synthetic data with true underlying function"""
17
+ X = np.sort(np.random.uniform(0, 10, n_samples))
18
+ # True function: sinusoidal with slight quadratic trend
19
+ y_true = 2 * np.sin(X) + 0.1 * X**2 - 5
20
+ # Add noise
21
+ y = y_true + np.random.normal(0, noise_level, n_samples)
22
+ return X, y, y_true
23
+
24
+ def fit_polynomial(self, X, y, degree):
25
+ """Fit polynomial regression of given degree"""
26
+ model = make_pipeline(PolynomialFeatures(degree), LinearRegression())
27
+ model.fit(X.reshape(-1, 1), y)
28
+ return model
29
+
30
+ def calculate_bias_variance(self, X_test, y_true_test, n_iterations=100, degree=1, noise_level=0.5):
31
+ """Calculate bias and variance through bootstrap sampling"""
32
+ predictions = []
33
+
34
+ for _ in range(n_iterations):
35
+ # Generate new training data with same noise level
36
+ X_train, y_train, _ = self.generate_data(n_samples=50, noise_level=noise_level)
37
+
38
+ # Fit model
39
+ model = self.fit_polynomial(X_train, y_train, degree)
40
+
41
+ # Predict on test set
42
+ y_pred = model.predict(X_test.reshape(-1, 1))
43
+ predictions.append(y_pred)
44
+
45
+ predictions = np.array(predictions)
46
+
47
+ # Calculate bias and variance
48
+ mean_prediction = np.mean(predictions, axis=0)
49
+ bias_squared = np.mean((mean_prediction - y_true_test) ** 2)
50
+ variance = np.mean(np.var(predictions, axis=0))
51
+
52
+ return bias_squared, variance, predictions
53
+
54
+ def visualize_fitting(self, degree, noise_level, n_samples):
55
+ """Create visualization showing fitting quality"""
56
+ fig = plt.figure(figsize=(20, 12))
57
+ gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)
58
+
59
+ # Generate data
60
+ X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level)
61
+ X_plot = np.linspace(0, 10, 200)
62
+ y_true_plot = 2 * np.sin(X_plot) + 0.1 * X_plot**2 - 5
63
+
64
+ # Fit models for different scenarios
65
+ degrees = [1, degree, 15] # Underfitting, User choice, Overfitting
66
+ titles = ['UNDERFITTING (Low Complexity)', f'YOUR MODEL (Degree {degree})', 'OVERFITTING (High Complexity)']
67
+
68
+ # Top row: Fitting comparison
69
+ for idx, (deg, title) in enumerate(zip(degrees, titles)):
70
+ ax = fig.add_subplot(gs[0, idx])
71
+
72
+ # Fit model
73
+ model = self.fit_polynomial(X, y, deg)
74
+ y_pred_plot = model.predict(X_plot.reshape(-1, 1))
75
+
76
+ # Plot
77
+ ax.scatter(X, y, color='green', s=80, alpha=0.6, edgecolors='black', linewidth=1.5, label='Training Data')
78
+ ax.plot(X_plot, y_true_plot, 'b--', linewidth=3, label='True Function', alpha=0.7)
79
+ ax.plot(X_plot, y_pred_plot, 'r-', linewidth=3, label=f'Model (degree={deg})')
80
+
81
+ # Calculate training error
82
+ y_pred_train = model.predict(X.reshape(-1, 1))
83
+ train_mse = mean_squared_error(y, y_pred_train)
84
+
85
+ ax.set_xlabel('X', fontsize=12, fontweight='bold')
86
+ ax.set_ylabel('Y', fontsize=12, fontweight='bold')
87
+ ax.set_title(title, fontsize=14, fontweight='bold', pad=10)
88
+ ax.legend(fontsize=10)
89
+ ax.grid(True, alpha=0.3)
90
+ ax.set_ylim(-10, 5) # Limit y-axis range
91
+ ax.text(0.02, 0.98, f'Train MSE: {train_mse:.3f}',
92
+ transform=ax.transAxes, fontsize=11, verticalalignment='top',
93
+ bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.7))
94
+
95
+ # Middle row: Bias-Variance Tradeoff Visualization
96
+ X_test = np.linspace(0, 10, 100)
97
+ y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5
98
+
99
+ for idx, deg in enumerate(degrees):
100
+ ax = fig.add_subplot(gs[1, idx])
101
+
102
+ # Calculate bias and variance
103
+ bias_sq, variance, predictions = self.calculate_bias_variance(
104
+ X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level
105
+ )
106
+
107
+ # Plot multiple predictions (showing variance)
108
+ for i in range(min(20, len(predictions))):
109
+ ax.plot(X_test, predictions[i], 'purple', alpha=0.15, linewidth=1)
110
+
111
+ # Plot mean prediction and true function
112
+ mean_pred = np.mean(predictions, axis=0)
113
+ ax.plot(X_test, y_true_test, 'b--', linewidth=3, label='True Function', alpha=0.8)
114
+ ax.plot(X_test, mean_pred, 'r-', linewidth=3, label='Mean Prediction')
115
+
116
+ # Add confidence band (Β±1 std)
117
+ std_pred = np.std(predictions, axis=0)
118
+ ax.fill_between(X_test, mean_pred - std_pred, mean_pred + std_pred,
119
+ color='red', alpha=0.2, label='Β±1 Std Dev')
120
+
121
+ ax.set_xlabel('X', fontsize=12, fontweight='bold')
122
+ ax.set_ylabel('Y', fontsize=12, fontweight='bold')
123
+ ax.set_title(f'Bias-Variance (degree={deg})', fontsize=13, fontweight='bold')
124
+ ax.legend(fontsize=9)
125
+ ax.grid(True, alpha=0.3)
126
+ ax.set_ylim(-10, 5) # Limit y-axis range
127
+
128
+ # Add bias-variance stats
129
+ total_error = bias_sq + variance
130
+ stats_text = f'BiasΒ²: {bias_sq:.3f}\nVariance: {variance:.3f}\nTotal: {total_error:.3f}'
131
+ ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=10,
132
+ verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.7))
133
+
134
+ # Bottom row: Bullseye diagrams for bias-variance
135
+ bullseye_data = []
136
+ for deg in degrees:
137
+ bias_sq, variance, _ = self.calculate_bias_variance(
138
+ X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level
139
+ )
140
+ bullseye_data.append((bias_sq, variance))
141
+
142
+ bullseye_titles = [
143
+ 'Low Bias, High Variance',
144
+ f'Degree {degree} Model',
145
+ 'High Bias, Low Variance' if degrees[0] < degrees[2] else 'Low Bias, High Variance'
146
+ ]
147
+
148
+ # Adjust bullseye titles based on actual bias/variance
149
+ for idx, (bias_sq, variance) in enumerate(bullseye_data):
150
+ ax = fig.add_subplot(gs[2, idx])
151
+
152
+ # Create bullseye target
153
+ circles = [plt.Circle((0, 0), r, color='lightblue', fill=True, alpha=0.3)
154
+ for r in [3, 2, 1]]
155
+ for circle in circles[::-1]:
156
+ ax.add_patch(circle)
157
+
158
+ # Add center (true target)
159
+ ax.plot(0, 0, 'r*', markersize=30, label='True Target', zorder=10)
160
+
161
+ # Generate sample points representing predictions
162
+ n_points = 30
163
+ # Bias determines offset from center
164
+ bias_offset = np.sqrt(bias_sq) * 2 # Scale for visibility
165
+ # Variance determines spread
166
+ variance_spread = np.sqrt(variance) * 1.5 # Scale for visibility
167
+
168
+ # Generate points around biased center
169
+ angles = np.random.uniform(0, 2*np.pi, n_points)
170
+ radii = np.random.normal(0, variance_spread, n_points)
171
+
172
+ x_points = bias_offset + radii * np.cos(angles)
173
+ y_points = radii * np.sin(angles)
174
+
175
+ ax.scatter(x_points, y_points, color='purple', s=100, alpha=0.6,
176
+ edgecolors='black', linewidth=1.5, label='Predictions', zorder=5)
177
+
178
+ # Add mean prediction point
179
+ mean_x, mean_y = np.mean(x_points), np.mean(y_points)
180
+ ax.plot(mean_x, mean_y, 'go', markersize=15, label='Mean Prediction', zorder=8)
181
+
182
+ ax.set_xlim(-4, 4)
183
+ ax.set_ylim(-4, 4)
184
+ ax.set_aspect('equal')
185
+ ax.grid(True, alpha=0.3)
186
+ ax.set_xlabel('Prediction Error Dimension 1', fontsize=10)
187
+ ax.set_ylabel('Prediction Error Dimension 2', fontsize=10)
188
+
189
+ # Determine bias/variance category
190
+ bias_level = 'High' if bias_sq > 0.5 else 'Low'
191
+ var_level = 'High' if variance > 0.5 else 'Low'
192
+ title = f'{bias_level} Bias, {var_level} Variance\n(Degree {degrees[idx]})'
193
+
194
+ ax.set_title(title, fontsize=12, fontweight='bold')
195
+ ax.legend(fontsize=9, loc='upper right')
196
+
197
+ # Add text box with values
198
+ stats_text = f'BiasΒ²: {bias_sq:.3f}\nVariance: {variance:.3f}'
199
+ ax.text(0.02, 0.02, stats_text, transform=ax.transAxes, fontsize=10,
200
+ verticalalignment='bottom', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
201
+
202
+ # Add overall title
203
+ fig.suptitle('Bias-Variance Tradeoff Visualization', fontsize=18, fontweight='bold', y=0.98)
204
+
205
+ # Convert to image
206
+ buf = io.BytesIO()
207
+ plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
208
+ buf.seek(0)
209
+ img = Image.open(buf)
210
+ plt.close()
211
+
212
+ return img
213
+
214
+ def create_summary_stats(self, degree, noise_level, n_samples):
215
+ """Generate summary statistics text"""
216
+ X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level)
217
+ X_test = np.linspace(0, 10, 100)
218
+ y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5
219
+
220
+ # Calculate for selected degree
221
+ bias_sq, variance, _ = self.calculate_bias_variance(
222
+ X_test, y_true_test, n_iterations=50, degree=degree, noise_level=noise_level
223
+ )
224
+
225
+ total_error = bias_sq + variance
226
+
227
+ # Determine model quality
228
+ if degree <= 2:
229
+ quality = "UNDERFITTING (High Bias)"
230
+ recommendation = "Increase model complexity"
231
+ elif degree <= 6:
232
+ quality = "GOOD BALANCE"
233
+ recommendation = "Model complexity is appropriate"
234
+ else:
235
+ quality = "OVERFITTING (High Variance)"
236
+ recommendation = "Reduce model complexity or add regularization"
237
+
238
+ summary = f"""
239
+ ╔══════════════════════════════════════════════════════════╗
240
+ β•‘ BIAS-VARIANCE ANALYSIS SUMMARY β•‘
241
+ β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
242
+
243
+ Model Configuration:
244
+ β€’ Polynomial Degree: {degree}
245
+ β€’ Training Samples: {n_samples}
246
+ β€’ Noise Level: {noise_level}
247
+
248
+ Performance Metrics:
249
+ β€’ BiasΒ² (Underfitting): {bias_sq:.4f}
250
+ β€’ Variance (Overfitting): {variance:.4f}
251
+ β€’ Total Error: {total_error:.4f}
252
+ β€’ Irreducible Error: {noise_level**2:.4f}
253
+
254
+ Model Assessment: {quality}
255
+ Recommendation: {recommendation}
256
+
257
+ Key Insights:
258
+ β€’ Low degree (1-2): High bias, low variance β†’ Underfitting
259
+ β€’ Medium degree (3-6): Balanced bias-variance β†’ Optimal
260
+ β€’ High degree (7+): Low bias, high variance β†’ Overfitting
261
+
262
+ Tradeoff:
263
+ ↑ Model Complexity β†’ ↓ Bias, ↑ Variance
264
+ ↓ Model Complexity β†’ ↑ Bias, ↓ Variance
265
+ """
266
+
267
+ return summary
268
+
269
+ # Create demo instance
270
+ demo_instance = BiasVarianceDemo()
271
+
272
+ # Create Gradio interface
273
+ with gr.Blocks(title="Bias-Variance Tradeoff Demo", theme=gr.themes.Soft()) as demo:
274
+ gr.Markdown("""
275
+ # 🎯 Bias-Variance Tradeoff Interactive Demo
276
+
277
+ Explore the fundamental tradeoff between bias and variance in machine learning!
278
+
279
+ """)
280
+
281
+ with gr.Row():
282
+ with gr.Column(scale=1):
283
+ degree_slider = gr.Slider(
284
+ minimum=1,
285
+ maximum=15,
286
+ value=4,
287
+ step=1,
288
+ label="πŸ”§ Model Complexity (Polynomial Degree)",
289
+ info="Low = Underfitting, Medium = Optimal, High = Overfitting"
290
+ )
291
+
292
+ noise_slider = gr.Slider(
293
+ minimum=0.1,
294
+ maximum=2.0,
295
+ value=0.5,
296
+ step=0.1,
297
+ label="πŸ“Š Noise Level",
298
+ info="Amount of random variation in the data"
299
+ )
300
+
301
+ samples_slider = gr.Slider(
302
+ minimum=20,
303
+ maximum=100,
304
+ value=50,
305
+ step=10,
306
+ label="πŸ“ˆ Training Samples",
307
+ info="Number of data points for training"
308
+ )
309
+
310
+ update_btn = gr.Button("πŸ”„ Update Visualization", variant="primary", size="lg")
311
+
312
+ gr.Markdown("""
313
+ ### πŸ’‘ Quick Guide:
314
+
315
+ **Underfitting** (Degree 1-2):
316
+ - Model too simple
317
+ - High bias, low variance
318
+ - Poor on both train & test
319
+
320
+ **Good Fit** (Degree 3-6):
321
+ - Balanced complexity
322
+ - Moderate bias & variance
323
+ - Best generalization
324
+
325
+ **Overfitting** (Degree 7+):
326
+ - Model too complex
327
+ - Low bias, high variance
328
+ - Great on train, poor on test
329
+ """)
330
+
331
+ summary_text = gr.Textbox(
332
+ label="πŸ“‹ Analysis Summary",
333
+ lines=25,
334
+ max_lines=30,
335
+ interactive=False
336
+ )
337
+
338
+ with gr.Column(scale=2):
339
+ output_image = gr.Image(label="Visualization", height=900)
340
+
341
+ def update_all(degree, noise, samples):
342
+ img = demo_instance.visualize_fitting(int(degree), noise, int(samples))
343
+ summary = demo_instance.create_summary_stats(int(degree), noise, int(samples))
344
+ return img, summary
345
+
346
+ # Update visualization
347
+ update_btn.click(
348
+ fn=update_all,
349
+ inputs=[degree_slider, noise_slider, samples_slider],
350
+ outputs=[output_image, summary_text]
351
+ )
352
+
353
+ # Also update on slider change
354
+ degree_slider.change(
355
+ fn=update_all,
356
+ inputs=[degree_slider, noise_slider, samples_slider],
357
+ outputs=[output_image, summary_text]
358
+ )
359
+
360
+ # Initial visualization
361
+ demo.load(
362
+ fn=update_all,
363
+ inputs=[degree_slider, noise_slider, samples_slider],
364
+ outputs=[output_image, summary_text]
365
+ )
366
+
367
+ # Launch the app
368
+ if __name__ == "__main__":
369
+ demo.launch()
overfitting.ipynb ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "f33e5de7",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Bias-Variance Tradeoff Interactive Demo\n",
9
+ "\n",
10
+ "This notebook demonstrates the fundamental **bias-variance tradeoff** in machine learning through interactive visualizations.\n",
11
+ "\n",
12
+ "## Key Concepts:\n",
13
+ "\n",
14
+ "### 🎯 Bias\n",
15
+ "- Error from overly simplistic assumptions\n",
16
+ "- High bias β†’ **Underfitting**\n",
17
+ "- Model misses relevant patterns in the data\n",
18
+ "\n",
19
+ "### πŸ“Š Variance\n",
20
+ "- Error from sensitivity to training data fluctuations\n",
21
+ "- High variance β†’ **Overfitting**\n",
22
+ "- Model learns noise instead of signal\n",
23
+ "\n",
24
+ "### βš–οΈ The Tradeoff\n",
25
+ "- **Total Error = BiasΒ² + Variance + Irreducible Error**\n",
26
+ "- As model complexity increases:\n",
27
+ " - Bias decreases ↓\n",
28
+ " - Variance increases ↑\n",
29
+ "- Goal: Find the sweet spot!\n",
30
+ "\n",
31
+ "## Visualizations:\n",
32
+ "\n",
33
+ "1. **Fitting Comparison**: See underfitting vs optimal vs overfitting\n",
34
+ "2. **Prediction Spread**: Visualize how predictions vary across different training sets\n",
35
+ "3. **Bullseye Diagrams**: Intuitive representation of bias (offset) and variance (spread)"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 1,
41
+ "id": "b9c6cdbe",
42
+ "metadata": {},
43
+ "outputs": [
44
+ {
45
+ "name": "stderr",
46
+ "output_type": "stream",
47
+ "text": [
48
+ "c:\\Users\\rinab\\miniforge3\\envs\\WORK\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
49
+ " from .autonotebook import tqdm as notebook_tqdm\n"
50
+ ]
51
+ },
52
+ {
53
+ "name": "stdout",
54
+ "output_type": "stream",
55
+ "text": [
56
+ "* Running on local URL: http://127.0.0.1:7860\n",
57
+ "* Running on public URL: https://3bab683affa1571f93.gradio.live\n",
58
+ "\n",
59
+ "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
60
+ ]
61
+ },
62
+ {
63
+ "data": {
64
+ "text/html": [
65
+ "<div><iframe src=\"https://3bab683affa1571f93.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
66
+ ],
67
+ "text/plain": [
68
+ "<IPython.core.display.HTML object>"
69
+ ]
70
+ },
71
+ "metadata": {},
72
+ "output_type": "display_data"
73
+ },
74
+ {
75
+ "data": {
76
+ "text/plain": []
77
+ },
78
+ "execution_count": 1,
79
+ "metadata": {},
80
+ "output_type": "execute_result"
81
+ }
82
+ ],
83
+ "source": [
84
+ "import gradio as gr\n",
85
+ "import numpy as np\n",
86
+ "import matplotlib.pyplot as plt\n",
87
+ "from sklearn.preprocessing import PolynomialFeatures\n",
88
+ "from sklearn.linear_model import LinearRegression\n",
89
+ "from sklearn.pipeline import make_pipeline\n",
90
+ "from sklearn.metrics import mean_squared_error\n",
91
+ "import io\n",
92
+ "from PIL import Image\n",
93
+ "\n",
94
+ "class BiasVarianceDemo:\n",
95
+ " def __init__(self):\n",
96
+ " np.random.seed(42)\n",
97
+ " \n",
98
+ " def generate_data(self, n_samples=50, noise_level=0.5):\n",
99
+ " \"\"\"Generate synthetic data with true underlying function\"\"\"\n",
100
+ " X = np.sort(np.random.uniform(0, 10, n_samples))\n",
101
+ " # True function: sinusoidal with slight quadratic trend\n",
102
+ " y_true = 2 * np.sin(X) + 0.1 * X**2 - 5\n",
103
+ " # Add noise\n",
104
+ " y = y_true + np.random.normal(0, noise_level, n_samples)\n",
105
+ " return X, y, y_true\n",
106
+ " \n",
107
+ " def fit_polynomial(self, X, y, degree):\n",
108
+ " \"\"\"Fit polynomial regression of given degree\"\"\"\n",
109
+ " model = make_pipeline(PolynomialFeatures(degree), LinearRegression())\n",
110
+ " model.fit(X.reshape(-1, 1), y)\n",
111
+ " return model\n",
112
+ " \n",
113
+ " def calculate_bias_variance(self, X_test, y_true_test, n_iterations=100, degree=1, noise_level=0.5):\n",
114
+ " \"\"\"Calculate bias and variance through bootstrap sampling\"\"\"\n",
115
+ " predictions = []\n",
116
+ " \n",
117
+ " for _ in range(n_iterations):\n",
118
+ " # Generate new training data with same noise level\n",
119
+ " X_train, y_train, _ = self.generate_data(n_samples=50, noise_level=noise_level)\n",
120
+ " \n",
121
+ " # Fit model\n",
122
+ " model = self.fit_polynomial(X_train, y_train, degree)\n",
123
+ " \n",
124
+ " # Predict on test set\n",
125
+ " y_pred = model.predict(X_test.reshape(-1, 1))\n",
126
+ " predictions.append(y_pred)\n",
127
+ " \n",
128
+ " predictions = np.array(predictions)\n",
129
+ " \n",
130
+ " # Calculate bias and variance\n",
131
+ " mean_prediction = np.mean(predictions, axis=0)\n",
132
+ " bias_squared = np.mean((mean_prediction - y_true_test) ** 2)\n",
133
+ " variance = np.mean(np.var(predictions, axis=0))\n",
134
+ " \n",
135
+ " return bias_squared, variance, predictions\n",
136
+ " \n",
137
+ " def visualize_fitting(self, degree, noise_level, n_samples):\n",
138
+ " \"\"\"Create visualization showing fitting quality\"\"\"\n",
139
+ " fig = plt.figure(figsize=(20, 12))\n",
140
+ " gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)\n",
141
+ " \n",
142
+ " # Generate data\n",
143
+ " X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level)\n",
144
+ " X_plot = np.linspace(0, 10, 200)\n",
145
+ " y_true_plot = 2 * np.sin(X_plot) + 0.1 * X_plot**2 - 5\n",
146
+ " \n",
147
+ " # Fit models for different scenarios\n",
148
+ " degrees = [1, degree, 15] # Underfitting, User choice, Overfitting\n",
149
+ " titles = ['UNDERFITTING (Low Complexity)', f'YOUR MODEL (Degree {degree})', 'OVERFITTING (High Complexity)']\n",
150
+ " \n",
151
+ " # Top row: Fitting comparison\n",
152
+ " for idx, (deg, title) in enumerate(zip(degrees, titles)):\n",
153
+ " ax = fig.add_subplot(gs[0, idx])\n",
154
+ " \n",
155
+ " # Fit model\n",
156
+ " model = self.fit_polynomial(X, y, deg)\n",
157
+ " y_pred_plot = model.predict(X_plot.reshape(-1, 1))\n",
158
+ " \n",
159
+ " # Plot\n",
160
+ " ax.scatter(X, y, color='green', s=80, alpha=0.6, edgecolors='black', linewidth=1.5, label='Training Data')\n",
161
+ " ax.plot(X_plot, y_true_plot, 'b--', linewidth=3, label='True Function', alpha=0.7)\n",
162
+ " ax.plot(X_plot, y_pred_plot, 'r-', linewidth=3, label=f'Model (degree={deg})')\n",
163
+ " \n",
164
+ " # Calculate training error\n",
165
+ " y_pred_train = model.predict(X.reshape(-1, 1))\n",
166
+ " train_mse = mean_squared_error(y, y_pred_train)\n",
167
+ " \n",
168
+ " ax.set_xlabel('X', fontsize=12, fontweight='bold')\n",
169
+ " ax.set_ylabel('Y', fontsize=12, fontweight='bold')\n",
170
+ " ax.set_title(title, fontsize=14, fontweight='bold', pad=10)\n",
171
+ " ax.legend(fontsize=10)\n",
172
+ " ax.grid(True, alpha=0.3)\n",
173
+ " ax.set_ylim(-10, 5) # Limit y-axis range\n",
174
+ " ax.text(0.02, 0.98, f'Train MSE: {train_mse:.3f}', \n",
175
+ " transform=ax.transAxes, fontsize=11, verticalalignment='top',\n",
176
+ " bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.7))\n",
177
+ " \n",
178
+ " # Middle row: Bias-Variance Tradeoff Visualization\n",
179
+ " X_test = np.linspace(0, 10, 100)\n",
180
+ " y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5\n",
181
+ " \n",
182
+ " for idx, deg in enumerate(degrees):\n",
183
+ " ax = fig.add_subplot(gs[1, idx])\n",
184
+ " \n",
185
+ " # Calculate bias and variance\n",
186
+ " bias_sq, variance, predictions = self.calculate_bias_variance(\n",
187
+ " X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level\n",
188
+ " )\n",
189
+ " \n",
190
+ " # Plot multiple predictions (showing variance)\n",
191
+ " for i in range(min(20, len(predictions))):\n",
192
+ " ax.plot(X_test, predictions[i], 'purple', alpha=0.15, linewidth=1)\n",
193
+ " \n",
194
+ " # Plot mean prediction and true function\n",
195
+ " mean_pred = np.mean(predictions, axis=0)\n",
196
+ " ax.plot(X_test, y_true_test, 'b--', linewidth=3, label='True Function', alpha=0.8)\n",
197
+ " ax.plot(X_test, mean_pred, 'r-', linewidth=3, label='Mean Prediction')\n",
198
+ " \n",
199
+ " # Add confidence band (Β±1 std)\n",
200
+ " std_pred = np.std(predictions, axis=0)\n",
201
+ " ax.fill_between(X_test, mean_pred - std_pred, mean_pred + std_pred, \n",
202
+ " color='red', alpha=0.2, label='Β±1 Std Dev')\n",
203
+ " \n",
204
+ " ax.set_xlabel('X', fontsize=12, fontweight='bold')\n",
205
+ " ax.set_ylabel('Y', fontsize=12, fontweight='bold')\n",
206
+ " ax.set_title(f'Bias-Variance (degree={deg})', fontsize=13, fontweight='bold')\n",
207
+ " ax.legend(fontsize=9)\n",
208
+ " ax.grid(True, alpha=0.3)\n",
209
+ " ax.set_ylim(-10, 5) # Limit y-axis range\n",
210
+ " \n",
211
+ " # Add bias-variance stats\n",
212
+ " total_error = bias_sq + variance\n",
213
+ " stats_text = f'BiasΒ²: {bias_sq:.3f}\\nVariance: {variance:.3f}\\nTotal: {total_error:.3f}'\n",
214
+ " ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=10,\n",
215
+ " verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.7))\n",
216
+ " \n",
217
+ " # Bottom row: Bullseye diagrams for bias-variance\n",
218
+ " bullseye_data = []\n",
219
+ " for deg in degrees:\n",
220
+ " bias_sq, variance, _ = self.calculate_bias_variance(\n",
221
+ " X_test, y_true_test, n_iterations=50, degree=deg, noise_level=noise_level\n",
222
+ " )\n",
223
+ " bullseye_data.append((bias_sq, variance))\n",
224
+ " \n",
225
+ " bullseye_titles = [\n",
226
+ " 'Low Bias, High Variance',\n",
227
+ " f'Degree {degree} Model',\n",
228
+ " 'High Bias, Low Variance' if degrees[0] < degrees[2] else 'Low Bias, High Variance'\n",
229
+ " ]\n",
230
+ " \n",
231
+ " # Adjust bullseye titles based on actual bias/variance\n",
232
+ " for idx, (bias_sq, variance) in enumerate(bullseye_data):\n",
233
+ " ax = fig.add_subplot(gs[2, idx])\n",
234
+ " \n",
235
+ " # Create bullseye target\n",
236
+ " circles = [plt.Circle((0, 0), r, color='lightblue', fill=True, alpha=0.3) \n",
237
+ " for r in [3, 2, 1]]\n",
238
+ " for circle in circles[::-1]:\n",
239
+ " ax.add_patch(circle)\n",
240
+ " \n",
241
+ " # Add center (true target)\n",
242
+ " ax.plot(0, 0, 'r*', markersize=30, label='True Target', zorder=10)\n",
243
+ " \n",
244
+ " # Generate sample points representing predictions\n",
245
+ " n_points = 30\n",
246
+ " # Bias determines offset from center\n",
247
+ " bias_offset = np.sqrt(bias_sq) * 2 # Scale for visibility\n",
248
+ " # Variance determines spread\n",
249
+ " variance_spread = np.sqrt(variance) * 1.5 # Scale for visibility\n",
250
+ " \n",
251
+ " # Generate points around biased center\n",
252
+ " angles = np.random.uniform(0, 2*np.pi, n_points)\n",
253
+ " radii = np.random.normal(0, variance_spread, n_points)\n",
254
+ " \n",
255
+ " x_points = bias_offset + radii * np.cos(angles)\n",
256
+ " y_points = radii * np.sin(angles)\n",
257
+ " \n",
258
+ " ax.scatter(x_points, y_points, color='purple', s=100, alpha=0.6, \n",
259
+ " edgecolors='black', linewidth=1.5, label='Predictions', zorder=5)\n",
260
+ " \n",
261
+ " # Add mean prediction point\n",
262
+ " mean_x, mean_y = np.mean(x_points), np.mean(y_points)\n",
263
+ " ax.plot(mean_x, mean_y, 'go', markersize=15, label='Mean Prediction', zorder=8)\n",
264
+ " \n",
265
+ " ax.set_xlim(-4, 4)\n",
266
+ " ax.set_ylim(-4, 4)\n",
267
+ " ax.set_aspect('equal')\n",
268
+ " ax.grid(True, alpha=0.3)\n",
269
+ " ax.set_xlabel('Prediction Error Dimension 1', fontsize=10)\n",
270
+ " ax.set_ylabel('Prediction Error Dimension 2', fontsize=10)\n",
271
+ " \n",
272
+ " # Determine bias/variance category\n",
273
+ " bias_level = 'High' if bias_sq > 0.5 else 'Low'\n",
274
+ " var_level = 'High' if variance > 0.5 else 'Low'\n",
275
+ " title = f'{bias_level} Bias, {var_level} Variance\\n(Degree {degrees[idx]})'\n",
276
+ " \n",
277
+ " ax.set_title(title, fontsize=12, fontweight='bold')\n",
278
+ " ax.legend(fontsize=9, loc='upper right')\n",
279
+ " \n",
280
+ " # Add text box with values\n",
281
+ " stats_text = f'BiasΒ²: {bias_sq:.3f}\\nVariance: {variance:.3f}'\n",
282
+ " ax.text(0.02, 0.02, stats_text, transform=ax.transAxes, fontsize=10,\n",
283
+ " verticalalignment='bottom', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))\n",
284
+ " \n",
285
+ " # Add overall title\n",
286
+ " fig.suptitle('Bias-Variance Tradeoff Visualization', fontsize=18, fontweight='bold', y=0.98)\n",
287
+ " \n",
288
+ " # Convert to image\n",
289
+ " buf = io.BytesIO()\n",
290
+ " plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')\n",
291
+ " buf.seek(0)\n",
292
+ " img = Image.open(buf)\n",
293
+ " plt.close()\n",
294
+ " \n",
295
+ " return img\n",
296
+ " \n",
297
+ " def create_summary_stats(self, degree, noise_level, n_samples):\n",
298
+ " \"\"\"Generate summary statistics text\"\"\"\n",
299
+ " X, y, y_true = self.generate_data(n_samples=n_samples, noise_level=noise_level)\n",
300
+ " X_test = np.linspace(0, 10, 100)\n",
301
+ " y_true_test = 2 * np.sin(X_test) + 0.1 * X_test**2 - 5\n",
302
+ " \n",
303
+ " # Calculate for selected degree\n",
304
+ " bias_sq, variance, _ = self.calculate_bias_variance(\n",
305
+ " X_test, y_true_test, n_iterations=50, degree=degree, noise_level=noise_level\n",
306
+ " )\n",
307
+ " \n",
308
+ " total_error = bias_sq + variance\n",
309
+ " \n",
310
+ " # Determine model quality\n",
311
+ " if degree <= 2:\n",
312
+ " quality = \"UNDERFITTING (High Bias)\"\n",
313
+ " recommendation = \"Increase model complexity\"\n",
314
+ " elif degree <= 6:\n",
315
+ " quality = \"GOOD BALANCE\"\n",
316
+ " recommendation = \"Model complexity is appropriate\"\n",
317
+ " else:\n",
318
+ " quality = \"OVERFITTING (High Variance)\"\n",
319
+ " recommendation = \"Reduce model complexity or add regularization\"\n",
320
+ " \n",
321
+ " summary = f\"\"\"\n",
322
+ "╔══════════════════════════════════════════════════════════╗\n",
323
+ "β•‘ BIAS-VARIANCE ANALYSIS SUMMARY β•‘\n",
324
+ "β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•\n",
325
+ "\n",
326
+ "Model Configuration:\n",
327
+ " β€’ Polynomial Degree: {degree}\n",
328
+ " β€’ Training Samples: {n_samples}\n",
329
+ " β€’ Noise Level: {noise_level}\n",
330
+ "\n",
331
+ "Performance Metrics:\n",
332
+ " β€’ BiasΒ² (Underfitting): {bias_sq:.4f}\n",
333
+ " β€’ Variance (Overfitting): {variance:.4f}\n",
334
+ " β€’ Total Error: {total_error:.4f}\n",
335
+ " β€’ Irreducible Error: {noise_level**2:.4f}\n",
336
+ "\n",
337
+ "Model Assessment: {quality}\n",
338
+ "Recommendation: {recommendation}\n",
339
+ "\n",
340
+ "Key Insights:\n",
341
+ " β€’ Low degree (1-2): High bias, low variance β†’ Underfitting\n",
342
+ " β€’ Medium degree (3-6): Balanced bias-variance β†’ Optimal\n",
343
+ " β€’ High degree (7+): Low bias, high variance β†’ Overfitting\n",
344
+ "\n",
345
+ "Tradeoff:\n",
346
+ " ↑ Model Complexity β†’ ↓ Bias, ↑ Variance\n",
347
+ " ↓ Model Complexity β†’ ↑ Bias, ↓ Variance\n",
348
+ " \"\"\"\n",
349
+ " \n",
350
+ " return summary\n",
351
+ "\n",
352
+ "# Create demo instance\n",
353
+ "demo_instance = BiasVarianceDemo()\n",
354
+ "\n",
355
+ "# Create Gradio interface\n",
356
+ "with gr.Blocks(title=\"Bias-Variance Tradeoff Demo\", theme=gr.themes.Soft()) as demo:\n",
357
+ " gr.Markdown(\"\"\"\n",
358
+ " # 🎯 Bias-Variance Tradeoff Interactive Demo\n",
359
+ " \n",
360
+ " Explore the fundamental tradeoff between bias and variance in machine learning!\n",
361
+ " \n",
362
+ " \"\"\")\n",
363
+ " \n",
364
+ " with gr.Row():\n",
365
+ " with gr.Column(scale=1):\n",
366
+ " degree_slider = gr.Slider(\n",
367
+ " minimum=1,\n",
368
+ " maximum=15,\n",
369
+ " value=4,\n",
370
+ " step=1,\n",
371
+ " label=\"πŸ”§ Model Complexity (Polynomial Degree)\",\n",
372
+ " info=\"Low = Underfitting, Medium = Optimal, High = Overfitting\"\n",
373
+ " )\n",
374
+ " \n",
375
+ " noise_slider = gr.Slider(\n",
376
+ " minimum=0.1,\n",
377
+ " maximum=2.0,\n",
378
+ " value=0.5,\n",
379
+ " step=0.1,\n",
380
+ " label=\"πŸ“Š Noise Level\",\n",
381
+ " info=\"Amount of random variation in the data\"\n",
382
+ " )\n",
383
+ " \n",
384
+ " samples_slider = gr.Slider(\n",
385
+ " minimum=20,\n",
386
+ " maximum=100,\n",
387
+ " value=50,\n",
388
+ " step=10,\n",
389
+ " label=\"πŸ“ˆ Training Samples\",\n",
390
+ " info=\"Number of data points for training\"\n",
391
+ " )\n",
392
+ " \n",
393
+ " update_btn = gr.Button(\"πŸ”„ Update Visualization\", variant=\"primary\", size=\"lg\")\n",
394
+ " \n",
395
+ " gr.Markdown(\"\"\"\n",
396
+ " ### πŸ’‘ Quick Guide:\n",
397
+ " \n",
398
+ " **Underfitting** (Degree 1-2):\n",
399
+ " - Model too simple\n",
400
+ " - High bias, low variance\n",
401
+ " - Poor on both train & test\n",
402
+ " \n",
403
+ " **Good Fit** (Degree 3-6):\n",
404
+ " - Balanced complexity\n",
405
+ " - Moderate bias & variance\n",
406
+ " - Best generalization\n",
407
+ " \n",
408
+ " **Overfitting** (Degree 7+):\n",
409
+ " - Model too complex\n",
410
+ " - Low bias, high variance\n",
411
+ " - Great on train, poor on test\n",
412
+ " \"\"\")\n",
413
+ " \n",
414
+ " summary_text = gr.Textbox(\n",
415
+ " label=\"πŸ“‹ Analysis Summary\",\n",
416
+ " lines=25,\n",
417
+ " max_lines=30,\n",
418
+ " interactive=False\n",
419
+ " )\n",
420
+ " \n",
421
+ " with gr.Column(scale=2):\n",
422
+ " output_image = gr.Image(label=\"Visualization\", height=900)\n",
423
+ " \n",
424
+ " def update_all(degree, noise, samples):\n",
425
+ " img = demo_instance.visualize_fitting(int(degree), noise, int(samples))\n",
426
+ " summary = demo_instance.create_summary_stats(int(degree), noise, int(samples))\n",
427
+ " return img, summary\n",
428
+ " \n",
429
+ " # Update visualization\n",
430
+ " update_btn.click(\n",
431
+ " fn=update_all,\n",
432
+ " inputs=[degree_slider, noise_slider, samples_slider],\n",
433
+ " outputs=[output_image, summary_text]\n",
434
+ " )\n",
435
+ " \n",
436
+ " # Also update on slider change\n",
437
+ " degree_slider.change(\n",
438
+ " fn=update_all,\n",
439
+ " inputs=[degree_slider, noise_slider, samples_slider],\n",
440
+ " outputs=[output_image, summary_text]\n",
441
+ " )\n",
442
+ " \n",
443
+ " # Initial visualization\n",
444
+ " demo.load(\n",
445
+ " fn=update_all,\n",
446
+ " inputs=[degree_slider, noise_slider, samples_slider],\n",
447
+ " outputs=[output_image, summary_text]\n",
448
+ " )\n",
449
+ "\n",
450
+ "# Launch the app\n",
451
+ "demo.launch(share=True)"
452
+ ]
453
+ }
454
+ ],
455
+ "metadata": {
456
+ "kernelspec": {
457
+ "display_name": "WORK",
458
+ "language": "python",
459
+ "name": "python3"
460
+ },
461
+ "language_info": {
462
+ "codemirror_mode": {
463
+ "name": "ipython",
464
+ "version": 3
465
+ },
466
+ "file_extension": ".py",
467
+ "mimetype": "text/x-python",
468
+ "name": "python",
469
+ "nbconvert_exporter": "python",
470
+ "pygments_lexer": "ipython3",
471
+ "version": "3.10.18"
472
+ }
473
+ },
474
+ "nbformat": 4,
475
+ "nbformat_minor": 5
476
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ numpy
3
+ matplotlib
4
+ scikit-learn
5
+ Pillow