import gradio as gr
import numpy as np
# Fallout Terminal Theme CSS
# Color palette:
# - Pip-Boy Amber: #f0b030 (warm, readable headers)
# - Terminal Green: #4ade80 (softer green, easy on eyes)
# - Vault-Tec Blue: #5b9bd5 (trusty Vault-Tec corporate blue)
# - Background: #0c0c0c (near-black terminal)
# - Panel BG: #141414 (slightly lifted for depth)
FALLOUT_CSS = """
@import url('https://fonts.googleapis.com/css2?family=VT323&display=swap');
@import url('https://fonts.googleapis.com/css2?family=Share+Tech+Mono&display=swap');
:root {
--pip-amber: #f0b030;
--pip-amber-dim: #c49028;
--terminal-green: #4ade80;
--terminal-green-dim: #22c55e;
--vault-blue: #5b9bd5;
--vault-blue-dim: #4080b8;
--bg-dark: #0c0c0c;
--bg-panel: #141414;
--bg-input: #1a1a1a;
--text-muted: #888888;
}
* {
font-family: 'Share Tech Mono', 'VT323', monospace !important;
font-size: 20px !important;
line-height: 1.6 !important;
}
h1 { font-size: 36px !important; }
h2 { font-size: 30px !important; }
h3 { font-size: 24px !important; }
h4, h5 { font-size: 22px !important; }
code, pre { font-size: 18px !important; }
body, .gradio-container {
background-color: var(--bg-dark) !important;
}
.gradio-container {
max-width: 1200px !important;
}
/* Main text - soft green, NO glow */
.markdown-text, .prose, p, span, label, .label-wrap {
color: var(--terminal-green) !important;
}
/* Headers - warm amber for hierarchy */
h1 {
color: var(--pip-amber) !important;
border-bottom: 2px solid var(--pip-amber-dim) !important;
padding-bottom: 8px !important;
}
h2 {
color: var(--pip-amber) !important;
border-bottom: 1px solid var(--pip-amber-dim) !important;
padding-bottom: 4px !important;
}
h3, h4, h5 {
color: var(--vault-blue) !important;
border-bottom: none !important;
}
/* Tab styling - Vault-Tec blue for navigation */
.tabs {
background-color: var(--bg-dark) !important;
border: 1px solid var(--vault-blue-dim) !important;
border-radius: 4px !important;
}
.tab-nav {
background-color: var(--bg-panel) !important;
border-bottom: 2px solid var(--vault-blue-dim) !important;
}
.tab-nav button {
background-color: var(--bg-panel) !important;
color: var(--vault-blue) !important;
border: none !important;
border-right: 1px solid var(--bg-dark) !important;
padding: 10px 16px !important;
transition: all 0.2s ease !important;
}
.tab-nav button:hover {
background-color: #1e3a5f !important;
color: #8ec5fc !important;
}
.tab-nav button.selected {
background-color: #1a3550 !important;
color: #8ec5fc !important;
border-bottom: 2px solid var(--pip-amber) !important;
}
/* Input/Output boxes - subtle with green text */
.textbox, textarea, input {
background-color: var(--bg-input) !important;
color: var(--terminal-green) !important;
border: 1px solid #333 !important;
border-radius: 3px !important;
}
.textbox:focus, textarea:focus, input:focus {
border-color: var(--terminal-green-dim) !important;
outline: none !important;
}
/* Buttons - amber accent for actions */
.primary, .secondary, button {
background-color: #2a2010 !important;
color: var(--pip-amber) !important;
border: 1px solid var(--pip-amber-dim) !important;
border-radius: 3px !important;
transition: all 0.2s ease !important;
}
button:hover {
background-color: #3d2e15 !important;
border-color: var(--pip-amber) !important;
}
/* Sliders - amber accent */
input[type="range"] {
accent-color: var(--pip-amber) !important;
}
/* Number inputs */
.number-input input {
background-color: var(--bg-input) !important;
color: var(--terminal-green) !important;
border: 1px solid #333 !important;
}
/* Code blocks - slightly blue-tinted for distinction */
code, pre {
background-color: #0d1520 !important;
color: var(--terminal-green) !important;
border: 1px solid #2a4060 !important;
border-left: 3px solid var(--vault-blue) !important;
border-radius: 3px !important;
padding: 2px 6px !important;
}
pre {
padding: 12px !important;
}
/* Tables */
table {
border-collapse: collapse !important;
}
th {
background-color: #1a2a3a !important;
color: var(--pip-amber) !important;
border: 1px solid #2a4060 !important;
padding: 8px !important;
}
td {
background-color: var(--bg-panel) !important;
color: var(--terminal-green) !important;
border: 1px solid #2a4060 !important;
padding: 8px !important;
}
/* Strong/bold text - amber for emphasis */
strong, b {
color: var(--pip-amber) !important;
font-weight: bold !important;
}
/* Links */
a {
color: var(--vault-blue) !important;
}
a:hover {
color: #8ec5fc !important;
}
/* Radio buttons and checkboxes */
.radio-group label, .checkbox-group label {
color: var(--terminal-green) !important;
}
/* Scrollbar - subtle */
::-webkit-scrollbar {
width: 8px;
height: 8px;
background-color: var(--bg-dark);
}
::-webkit-scrollbar-thumb {
background-color: #333;
border-radius: 4px;
}
::-webkit-scrollbar-thumb:hover {
background-color: #444;
}
/* Subtle scanlines - very light, not distracting */
.gradio-container::before {
content: "";
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: repeating-linear-gradient(
0deg,
rgba(0, 0, 0, 0.03),
rgba(0, 0, 0, 0.03) 1px,
transparent 1px,
transparent 2px
);
pointer-events: none;
z-index: 1000;
}
/* Horizontal rules - amber accent */
hr {
border: none !important;
border-top: 1px solid var(--pip-amber-dim) !important;
margin: 16px 0 !important;
}
/* Blockquotes - for terminal prompts */
blockquote {
border-left: 3px solid var(--pip-amber) !important;
background-color: var(--bg-panel) !important;
padding: 8px 16px !important;
margin: 8px 0 !important;
color: var(--pip-amber) !important;
}
/* Muted/secondary text */
.secondary-text, .hint {
color: var(--text-muted) !important;
}
"""
# ============================================================================
# SVG DIAGRAM GENERATORS
# ============================================================================
def generate_forward_svg(x1, x2, w1, w2, b, z, y):
"""Generate an SVG diagram showing the forward pass with actual values."""
# Colors matching our theme
bg = "#0c0c0c"
node_fill = "#1a2a3a"
node_stroke = "#5b9bd5"
input_fill = "#1a3a2a"
input_stroke = "#4ade80"
output_fill = "#2a2a1a"
output_stroke = "#f0b030"
text_color = "#4ade80"
label_color = "#5b9bd5"
arrow_color = "#5b9bd5"
value_color = "#f0b030"
svg = f'''
'''
return svg
def generate_backward_svg(x1, x2, w1, w2, b, y_true, z, y_pred, dL_dy, dy_dz, dL_dz, dL_dw1, dL_dw2, dL_db, loss):
"""Generate an SVG diagram showing backward pass with gradients."""
bg = "#0c0c0c"
node_fill = "#1a2a3a"
node_stroke = "#5b9bd5"
input_fill = "#1a3a2a"
input_stroke = "#4ade80"
loss_fill = "#3a1a1a"
loss_stroke = "#ff6b6b"
text_color = "#4ade80"
label_color = "#5b9bd5"
forward_arrow = "#5b9bd5"
backward_arrow = "#ff6b6b"
value_color = "#f0b030"
gradient_color = "#ff6b6b"
svg = f'''
'''
return svg
# ============================================================================
# TAB 1: FORWARD PASS
# ============================================================================
def forward_pass_demo(x1, x2, w1, w2, b):
"""Step-by-step forward pass calculation."""
# Step 1: Weighted sum
z = w1 * x1 + w2 * x2 + b
# Step 2: Sigmoid activation
sigmoid_z = 1 / (1 + np.exp(-z))
# Generate SVG diagram
svg_diagram = generate_forward_svg(x1, x2, w1, w2, b, z, sigmoid_z)
explanation = f"""
## FORWARD PASS CALCULATION
===============================================
### STEP 1: The Weighted Sum (z)
The neuron computes a **weighted sum** of inputs plus a bias:
```
z = w1*x1 + w2*x2 + b
z = ({w1:.2f})*({x1:.2f}) + ({w2:.2f})*({x2:.2f}) + ({b:.2f})
z = {w1*x1:.4f} + {w2*x2:.4f} + {b:.2f}
z = {z:.4f}
```
**What's happening:** Each input is scaled by its weight,
then we add them up. The bias shifts the whole thing.
-----------------------------------------------
### STEP 2: The Sigmoid Activation Function
We squash z through the **sigmoid function** to get output in (0,1):
```
sigmoid(z) = 1 / (1 + e^(-z))
= 1 / (1 + e^(-{z:.4f}))
= 1 / (1 + {np.exp(-z):.4f})
= 1 / {1 + np.exp(-z):.4f}
= {sigmoid_z:.4f}
```
**Why sigmoid?** It smoothly maps any real number to (0,1).
- z >> 0 --> sigmoid(z) ≈ 1
- z << 0 --> sigmoid(z) ≈ 0
- z = 0 --> sigmoid(z) = 0.5
-----------------------------------------------
### SUMMARY
```
Inputs: x1={x1:.2f}, x2={x2:.2f}
Weights: w1={w1:.2f}, w2={w2:.2f}
Bias: b={b:.2f}
z = {z:.4f}
y = sigmoid(z) = {sigmoid_z:.4f}
```
**Interpretation:** Output of {sigmoid_z:.4f} means
{sigmoid_z*100:.1f}% probability of class 1.
"""
return svg_diagram, explanation
FORWARD_INTRO_SVG = '''
'''
FORWARD_INTRO = f"""
{FORWARD_INTRO_SVG}
"""
# ============================================================================
# TAB 2: CHAIN RULE FUNDAMENTALS
# ============================================================================
CHAIN_RULE_INTRO_SVG = '''
'''
CHAIN_RULE_INTRO = f"""
{CHAIN_RULE_INTRO_SVG}
"""
def generate_chain_rule_svg(a, b, x_val, u, y, du_dx, dy_du, dy_dx):
"""Generate SVG showing chain rule visually."""
bg = "#0c0c0c"
node_fill = "#1a2a3a"
node_stroke = "#5b9bd5"
input_fill = "#1a3a2a"
input_stroke = "#4ade80"
output_fill = "#2a2a1a"
output_stroke = "#f0b030"
text_color = "#4ade80"
label_color = "#5b9bd5"
arrow_color = "#5b9bd5"
value_color = "#f0b030"
deriv_color = "#ff6b6b"
svg = f'''
'''
return svg
def chain_rule_calculator(a, b, x_val):
"""Demonstrate chain rule with y = (ax + b)^2"""
# u = ax + b
u = a * x_val + b
# y = u^2
y = u ** 2
# Derivatives
du_dx = a
dy_du = 2 * u
dy_dx = dy_du * du_dx
# Generate SVG
svg_diagram = generate_chain_rule_svg(a, b, x_val, u, y, du_dx, dy_du, dy_dx)
explanation = f"""
## CHAIN RULE CALCULATION: y = ({a}x + {b})^2
===============================================
### Setting up the composition:
```
Inner function: u = {a}x + {b}
Outer function: y = u^2
```
At x = {x_val}:
```
u = {a}*{x_val} + {b} = {u}
y = ({u})^2 = {y}
```
-----------------------------------------------
### Step 1: Find du/dx (derivative of inner function)
```
u = {a}x + {b}
du/dx = {a} (coefficient of x)
```
-----------------------------------------------
### Step 2: Find dy/du (derivative of outer function)
```
y = u^2
dy/du = 2u = 2*({u}) = {dy_du}
```
-----------------------------------------------
### Step 3: Apply the Chain Rule!
```
dy/dx = (dy/du) * (du/dx)
= {dy_du} * {du_dx}
= {dy_dx}
```
-----------------------------------------------
### VERIFICATION (optional sanity check)
If x increases by tiny amount h=0.001:
```
y(x+h) = ({a}*{x_val+0.001} + {b})^2 = {(a*(x_val+0.001) + b)**2:.6f}
y(x) = {y}
Slope ≈ (y(x+h) - y(x)) / h
= {((a*(x_val+0.001) + b)**2 - y) / 0.001:.4f}
Our dy/dx = {dy_dx}
```
The chain rule works!
"""
return svg_diagram, explanation
# ============================================================================
# TAB 3: DERIVATIVES OF KEY FUNCTIONS
# ============================================================================
DERIVATIVES_INTRO_SVG = '''
'''
DERIVATIVES_INTRO = f"""
{DERIVATIVES_INTRO_SVG}
"""
def generate_sigmoid_svg(z, sig, dsig):
"""Generate SVG showing sigmoid function and derivative visually."""
bg = "#0c0c0c"
curve_color = "#4ade80"
deriv_color = "#ff6b6b"
point_color = "#f0b030"
grid_color = "#333"
text_color = "#4ade80"
label_color = "#5b9bd5"
# Generate sigmoid curve points
curve_points = []
for i in range(-50, 51):
x_pt = i / 10 # -5 to 5
y_pt = 1 / (1 + np.exp(-x_pt))
# Map to SVG coordinates: x: -5..5 -> 100..500, y: 0..1 -> 250..50
svg_x = 100 + (x_pt + 5) * 40
svg_y = 250 - y_pt * 200
curve_points.append(f"{svg_x:.1f},{svg_y:.1f}")
curve_path = " ".join(curve_points)
# Current point coordinates
pt_x = 100 + (z + 5) * 40
pt_y = 250 - sig * 200
# Tangent line (slope = dsig, in SVG coordinates)
# The slope in data space is dsig, but in SVG space y is inverted
tangent_dx = 40
tangent_dy = -dsig * 200
t_x1 = pt_x - tangent_dx
t_y1 = pt_y - tangent_dy
t_x2 = pt_x + tangent_dx
t_y2 = pt_y + tangent_dy
svg = f'''
'''
return svg
def sigmoid_derivative_demo(z):
"""Show sigmoid and its derivative."""
sig = 1 / (1 + np.exp(-z))
dsig = sig * (1 - sig)
svg_diagram = generate_sigmoid_svg(z, sig, dsig)
explanation = f"""
## SIGMOID DERIVATIVE AT z = {z}
===============================================
### Step 1: Compute sigmoid(z)
```
σ(z) = 1 / (1 + e^(-z))
= 1 / (1 + e^(-{z}))
= 1 / (1 + {np.exp(-z):.6f})
= 1 / {1 + np.exp(-z):.6f}
= {sig:.6f}
```
-----------------------------------------------
### Step 2: Compute the derivative
Using the formula: dσ/dz = σ(z) * (1 - σ(z))
```
dσ/dz = σ(z) * (1 - σ(z))
= {sig:.6f} * (1 - {sig:.6f})
= {sig:.6f} * {1-sig:.6f}
= {dsig:.6f}
```
-----------------------------------------------
### Interpretation
At z = {z}:
- Sigmoid output: {sig:.4f} (how confident the neuron is)
- Derivative: {dsig:.4f} (how sensitive output is to z)
**Key insight:** The derivative is LARGEST when z≈0 (sigmoid≈0.5)
and SMALLEST when |z| is large. This is the "vanishing gradient"
problem - extreme values barely update!
```
z = 0 --> σ = 0.5, dσ/dz = 0.25 (maximum!)
z = 5 --> σ ≈ 0.99, dσ/dz ≈ 0.007 (tiny!)
z = -5 --> σ ≈ 0.01, dσ/dz ≈ 0.007 (tiny!)
```
"""
return svg_diagram, explanation
# ============================================================================
# TAB 4: BACKWARD PASS (THE MAIN EVENT)
# ============================================================================
BACKWARD_INTRO_SVG = '''
'''
BACKWARD_INTRO = f"""
{BACKWARD_INTRO_SVG}
"""
def backward_pass_demo(x1, x2, w1, w2, b, y_true):
"""Complete forward + backward pass with detailed chain rule."""
# Forward pass
z = w1 * x1 + w2 * x2 + b
y_pred = 1 / (1 + np.exp(-z))
# Binary cross-entropy loss (with small epsilon for numerical stability)
eps = 1e-7
y_pred_clipped = np.clip(y_pred, eps, 1 - eps)
loss = -(y_true * np.log(y_pred_clipped) + (1 - y_true) * np.log(1 - y_pred_clipped))
# Backward pass - compute all gradients
# dL/dy_pred
dL_dy = -y_true / y_pred_clipped + (1 - y_true) / (1 - y_pred_clipped)
# dy_pred/dz (sigmoid derivative)
dy_dz = y_pred * (1 - y_pred)
# dz/dw1, dz/dw2, dz/db
dz_dw1 = x1
dz_dw2 = x2
dz_db = 1
# Chain rule to get final gradients
dL_dz = dL_dy * dy_dz # This is the "upstream gradient"
dL_dw1 = dL_dz * dz_dw1
dL_dw2 = dL_dz * dz_dw2
dL_db = dL_dz * dz_db
# Generate SVG diagram
svg_diagram = generate_backward_svg(
x1, x2, w1, w2, b, y_true, z, y_pred,
dL_dy, dy_dz, dL_dz, dL_dw1, dL_dw2, dL_db, loss
)
explanation = f"""
## COMPLETE BACKPROP WALKTHROUGH
===============================================
### GIVEN:
```
Inputs: x1 = {x1}, x2 = {x2}
Weights: w1 = {w1}, w2 = {w2}
Bias: b = {b}
True label: y_true = {y_true}
```
===============================================
## PART 1: FORWARD PASS (review)
===============================================
**Step 1a: Weighted sum**
```
z = w1*x1 + w2*x2 + b
= ({w1})*({x1}) + ({w2})*({x2}) + ({b})
= {z:.6f}
```
**Step 1b: Sigmoid activation**
```
y_pred = sigmoid(z) = 1/(1+e^(-z))
= 1/(1+e^(-{z:.4f}))
= {y_pred:.6f}
```
**Step 1c: Binary Cross-Entropy Loss**
```
L = -[y_true*log(y_pred) + (1-y_true)*log(1-y_pred)]
= -[{y_true}*log({y_pred:.6f}) + {1-y_true}*log({1-y_pred:.6f})]
= -[{y_true * np.log(y_pred_clipped):.6f} + {(1-y_true) * np.log(1-y_pred_clipped):.6f}]
= {loss:.6f}
```
===============================================
## PART 2: BACKWARD PASS (reversing the flow)
===============================================
We need: dL/dw1, dL/dw2, dL/db
**The computation graph:**
```
w1,x1,w2,x2,b --> z --> y_pred --> L
| | |
dz/dw dy/dz dL/dy
```
We work BACKWARDS from Loss to weights.
-----------------------------------------------
### STEP 2a: dL/dy_pred (how loss changes with prediction)
```
L = -y_true*log(y_pred) - (1-y_true)*log(1-y_pred)
dL/dy_pred = -y_true/y_pred + (1-y_true)/(1-y_pred)
= -{y_true}/{y_pred:.6f} + {1-y_true}/{1-y_pred:.6f}
= {-y_true/y_pred_clipped:.6f} + {(1-y_true)/(1-y_pred_clipped):.6f}
= {dL_dy:.6f}
```
-----------------------------------------------
### STEP 2b: dy_pred/dz (sigmoid derivative)
Using: d/dz[sigmoid(z)] = sigmoid(z)*(1-sigmoid(z))
```
dy/dz = y_pred * (1 - y_pred)
= {y_pred:.6f} * (1 - {y_pred:.6f})
= {y_pred:.6f} * {1-y_pred:.6f}
= {dy_dz:.6f}
```
-----------------------------------------------
### STEP 2c: dz/dw1, dz/dw2, dz/db
Since z = w1*x1 + w2*x2 + b:
```
dz/dw1 = x1 = {dz_dw1}
dz/dw2 = x2 = {dz_dw2}
dz/db = 1 = {dz_db}
```
-----------------------------------------------
### STEP 2d: CHAIN RULE - Put it together!
First, compute dL/dz (the "upstream gradient"):
```
dL/dz = (dL/dy_pred) * (dy_pred/dz)
= {dL_dy:.6f} * {dy_dz:.6f}
= {dL_dz:.6f}
```
Now chain to each weight:
```
dL/dw1 = (dL/dz) * (dz/dw1)
= {dL_dz:.6f} * {dz_dw1}
= {dL_dw1:.6f}
dL/dw2 = (dL/dz) * (dz/dw2)
= {dL_dz:.6f} * {dz_dw2}
= {dL_dw2:.6f}
dL/db = (dL/dz) * (dz/db)
= {dL_dz:.6f} * {dz_db}
= {dL_db:.6f}
```
===============================================
## PART 3: GRADIENT DESCENT UPDATE
===============================================
With learning rate α = 0.1:
```
w1_new = w1 - α * dL/dw1
= {w1} - 0.1 * {dL_dw1:.6f}
= {w1 - 0.1 * dL_dw1:.6f}
w2_new = w2 - α * dL/dw2
= {w2} - 0.1 * {dL_dw2:.6f}
= {w2 - 0.1 * dL_dw2:.6f}
b_new = b - α * dL/db
= {b} - 0.1 * {dL_db:.6f}
= {b - 0.1 * dL_db:.6f}
```
**We've completed one step of learning!**
===============================================
## SUMMARY TABLE
===============================================
| Gradient | Value | Meaning |
|----------|-------|---------|
| dL/dy | {dL_dy:.4f} | Loss sensitivity to prediction |
| dy/dz | {dy_dz:.4f} | Sigmoid sensitivity |
| dL/dz | {dL_dz:.4f} | "Upstream gradient" |
| dL/dw1 | {dL_dw1:.4f} | How to adjust w1 |
| dL/dw2 | {dL_dw2:.4f} | How to adjust w2 |
| dL/db | {dL_db:.4f} | How to adjust bias |
"""
return svg_diagram, explanation
# ============================================================================
# TAB 5: PRACTICE PROBLEMS
# ============================================================================
PRACTICE_INTRO = """
# PRACTICE: COMPUTE BY HAND FIRST!
===============================================
Welcome to the Gradient Occupational Aptitude Test (G.O.A.T.).
Per Vault-Tec guidelines, pencil-and-paper practice builds neural
pathways (the biological kind). Complete these problems to determine
your future as a Machine Learning Specialist.
## TIPS FOR HAND CALCULATION
1. **Draw the computation graph** - boxes for operations,
arrows for data flow
2. **Forward pass first** - compute all intermediate values
3. **Backward pass** - start from loss, work backwards
4. **Check dimensions** - gradient of scalar w.r.t. vector
has same shape as the vector
5. **Verify numerically** - if unsure, use tiny h to approximate:
df/dx ≈ (f(x+h) - f(x)) / h
## PRACTICE PROBLEMS
Select a problem below and try it before clicking "Check Answer"!
"""
def practice_problem(problem_num):
"""Generate practice problems with solutions."""
problems = {
1: {
"question": """
### Problem 1: Simple Chain Rule
Compute dy/dx where:
```
y = (2x + 3)^3
```
at x = 1.
**Hint:** Let u = 2x + 3, so y = u^3
""",
"solution": """
### Solution to Problem 1
**Step 1: Identify the composition**
```
u = 2x + 3 (inner)
y = u^3 (outer)
```
**Step 2: Find individual derivatives**
```
du/dx = 2
dy/du = 3u^2
```
**Step 3: Apply chain rule**
```
dy/dx = (dy/du) * (du/dx)
= 3u^2 * 2
= 6u^2
= 6(2x + 3)^2
```
**Step 4: Evaluate at x = 1**
```
dy/dx = 6(2*1 + 3)^2
= 6(5)^2
= 6 * 25
= 150
```
**Answer: dy/dx = 150 at x = 1**
"""
},
2: {
"question": """
### Problem 2: Sigmoid Derivative
Given z = 2, compute:
1. sigmoid(z)
2. d/dz[sigmoid(z)]
**Reminder:** sigmoid(z) = 1/(1+e^(-z))
d/dz[sigmoid(z)] = sigmoid(z) * (1 - sigmoid(z))
""",
"solution": f"""
### Solution to Problem 2
**Step 1: Compute sigmoid(2)**
```
sigmoid(2) = 1/(1 + e^(-2))
= 1/(1 + {np.exp(-2):.6f})
= 1/{1 + np.exp(-2):.6f}
= {1/(1+np.exp(-2)):.6f}
```
**Step 2: Compute derivative**
```
Let s = sigmoid(2) = {1/(1+np.exp(-2)):.6f}
ds/dz = s * (1 - s)
= {1/(1+np.exp(-2)):.6f} * (1 - {1/(1+np.exp(-2)):.6f})
= {1/(1+np.exp(-2)):.6f} * {1 - 1/(1+np.exp(-2)):.6f}
= {(1/(1+np.exp(-2))) * (1 - 1/(1+np.exp(-2))):.6f}
```
**Answers:**
- sigmoid(2) ≈ 0.8808
- d/dz[sigmoid(2)] ≈ 0.1050
"""
},
3: {
"question": """
### Problem 3: Full Backprop (Mini Version)
Single neuron with:
```
x = 2
w = 0.5
b = -1
y_true = 1
```
Using sigmoid activation and BCE loss, find dL/dw.
**Steps to follow:**
1. Forward: z = wx + b
2. Forward: y_pred = sigmoid(z)
3. Forward: L = BCE(y_true, y_pred)
4. Backward: Apply chain rule
""",
"solution": """
### Solution to Problem 3
**Forward Pass:**
```
z = w*x + b = 0.5*2 + (-1) = 0
y_pred = sigmoid(0) = 0.5
L = -[1*log(0.5) + 0*log(0.5)]
= -log(0.5)
= 0.693
```
**Backward Pass:**
dL/dy_pred:
```
= -y_true/y_pred + (1-y_true)/(1-y_pred)
= -1/0.5 + 0/0.5
= -2
```
dy_pred/dz:
```
= y_pred * (1 - y_pred)
= 0.5 * 0.5 = 0.25
```
dz/dw:
```
= x = 2
```
**Chain Rule:**
```
dL/dw = (dL/dy) * (dy/dz) * (dz/dw)
= (-2) * (0.25) * (2)
= -1.0
```
**Answer: dL/dw = -1.0**
**Interpretation:** Negative gradient means we should
INCREASE w to reduce loss (moving opposite to gradient).
"""
}
}
prob = problems.get(problem_num, problems[1])
return prob["question"], prob["solution"]
# ============================================================================
# BUILD THE GRADIO APP
# ============================================================================
with gr.Blocks(title="BACKPROP TERMINAL v1.0") as demo:
gr.Markdown("""
# > VAULT-TEC NEURAL NETWORK TRAINING TERMINAL
## > SECURITY CLEARANCE: STAT 3106
### > INITIALIZING BACKPROPAGATION MODULES...
""")
with gr.Tabs():
# TAB 1: Forward Pass
with gr.TabItem("01: FORWARD PASS"):
gr.HTML(FORWARD_INTRO)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### INPUT PARAMETERS")
x1_input = gr.Slider(minimum=-5, maximum=5, value=1.0, step=0.1, label="x1 (input 1)")
x2_input = gr.Slider(minimum=-5, maximum=5, value=2.0, step=0.1, label="x2 (input 2)")
w1_input = gr.Slider(minimum=-2, maximum=2, value=0.5, step=0.1, label="w1 (weight 1)")
w2_input = gr.Slider(minimum=-2, maximum=2, value=-0.3, step=0.1, label="w2 (weight 2)")
b_input = gr.Slider(minimum=-2, maximum=2, value=0.1, step=0.1, label="b (bias)")
forward_btn = gr.Button(">> EXECUTE FORWARD PASS <<")
with gr.Column(scale=2):
forward_svg = gr.HTML(label="Computation Graph")
forward_output = gr.Markdown(label="Calculation")
forward_btn.click(
forward_pass_demo,
inputs=[x1_input, x2_input, w1_input, w2_input, b_input],
outputs=[forward_svg, forward_output]
)
# TAB 2: Chain Rule
with gr.TabItem("02: CHAIN RULE"):
gr.HTML(CHAIN_RULE_INTRO)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### FUNCTION: y = (ax + b)²")
a_input = gr.Slider(minimum=-5, maximum=5, value=3.0, step=0.1, label="a (coefficient)")
b2_input = gr.Slider(minimum=-5, maximum=5, value=2.0, step=0.1, label="b (constant)")
x_input = gr.Slider(minimum=-5, maximum=5, value=1.0, step=0.1, label="x (evaluation point)")
chain_btn = gr.Button(">> APPLY CHAIN RULE <<")
with gr.Column(scale=2):
chain_svg = gr.HTML(label="Chain Rule Visualization")
chain_output = gr.Markdown(label="Chain Rule Breakdown")
chain_btn.click(
chain_rule_calculator,
inputs=[a_input, b2_input, x_input],
outputs=[chain_svg, chain_output]
)
# TAB 3: Key Derivatives
with gr.TabItem("03: KEY DERIVATIVES"):
gr.HTML(DERIVATIVES_INTRO)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### SIGMOID DERIVATIVE CALCULATOR")
z_input = gr.Slider(
minimum=-5, maximum=5, value=0, step=0.1,
label="z value"
)
sigmoid_btn = gr.Button(">> COMPUTE SIGMOID DERIVATIVE <<")
with gr.Column(scale=2):
sigmoid_svg = gr.HTML(label="Sigmoid Visualization")
sigmoid_output = gr.Markdown(label="Derivative Calculation")
sigmoid_btn.click(
sigmoid_derivative_demo,
inputs=[z_input],
outputs=[sigmoid_svg, sigmoid_output]
)
# TAB 4: Backward Pass
with gr.TabItem("04: BACKWARD PASS"):
gr.HTML(BACKWARD_INTRO)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### NETWORK CONFIGURATION")
bx1 = gr.Slider(minimum=-5, maximum=5, value=1.0, step=0.1, label="x1")
bx2 = gr.Slider(minimum=-5, maximum=5, value=2.0, step=0.1, label="x2")
bw1 = gr.Slider(minimum=-2, maximum=2, value=0.5, step=0.1, label="w1")
bw2 = gr.Slider(minimum=-2, maximum=2, value=-0.3, step=0.1, label="w2")
bb = gr.Slider(minimum=-2, maximum=2, value=0.1, step=0.1, label="bias")
by_true = gr.Slider(minimum=0, maximum=1, value=1, step=1, label="y_true (0 or 1)")
back_btn = gr.Button(">> EXECUTE FULL BACKPROP <<")
with gr.Column(scale=2):
back_svg = gr.HTML(label="Backprop Graph")
back_output = gr.Markdown(label="Complete Backprop Trace")
back_btn.click(
backward_pass_demo,
inputs=[bx1, bx2, bw1, bw2, bb, by_true],
outputs=[back_svg, back_output]
)
# TAB 5: Practice
with gr.TabItem("05: PRACTICE"):
gr.Markdown(PRACTICE_INTRO)
with gr.Row():
with gr.Column():
problem_select = gr.Radio(
choices=["Problem 1: Chain Rule", "Problem 2: Sigmoid", "Problem 3: Full Backprop"],
label="Select Problem",
value="Problem 1: Chain Rule"
)
show_problem_btn = gr.Button(">> SHOW PROBLEM <<")
show_answer_btn = gr.Button(">> REVEAL SOLUTION <<")
with gr.Column():
problem_display = gr.Markdown(label="Problem")
solution_display = gr.Markdown(label="Solution", visible=False)
def show_problem(selection):
prob_num = int(selection.split(":")[0].split()[-1])
q, _ = practice_problem(prob_num)
return q, gr.update(visible=False, value="")
def show_solution(selection):
prob_num = int(selection.split(":")[0].split()[-1])
_, s = practice_problem(prob_num)
return gr.update(visible=True, value=s)
show_problem_btn.click(
show_problem,
inputs=[problem_select],
outputs=[problem_display, solution_display]
)
show_answer_btn.click(
show_solution,
inputs=[problem_select],
outputs=[solution_display]
)
# TAB 6: Quick Reference
with gr.TabItem("06: REFERENCE"):
gr.Markdown("""
# QUICK REFERENCE CARD
===============================================
## CHAIN RULE
```
y = f(g(x))
dy/dx = (df/dg) * (dg/dx)
```
For longer chains: just multiply all the derivatives!
-----------------------------------------------
## COMMON DERIVATIVES
| Function | Derivative |
|----------|------------|
| x^n | n*x^(n-1) |
| e^x | e^x |
| log(x) | 1/x |
| sigmoid(x) | sigmoid(x)*(1-sigmoid(x)) |
| ReLU(x) | 1 if x>0, else 0 |
-----------------------------------------------
## NEURAL NETWORK CHAIN
For a single neuron with sigmoid:
```
z = Σ(wi*xi) + b
y = sigmoid(z)
L = loss(y, y_true)
dL/dwi = (dL/dy) * (dy/dz) * (dz/dwi)
= (dL/dy) * sigmoid'(z) * xi
```
-----------------------------------------------
## GRADIENT DESCENT
```
w_new = w_old - learning_rate * dL/dw
```
The gradient points UPHILL; we go opposite direction.
-----------------------------------------------
## BCE LOSS GRADIENT (sigmoid output)
For BCE loss with sigmoid output:
```
dL/dz = y_pred - y_true
```
This clean result comes from cancellation in the chain!
-----------------------------------------------
## DEBUGGING TIPS
1. **Gradient check:** Compare with numerical gradient
```
dL/dw ≈ [L(w+h) - L(w-h)] / (2h)
```
2. **Shapes must match:** gradient of L w.r.t. W has same shape as W
3. **Large gradients?** Try gradient clipping or smaller learning rate
4. **Vanishing gradients?** Consider ReLU or residual connections
""")
gr.Markdown("""
---
> TERMINAL SESSION ACTIVE
> VAULT-TEC WISHES YOU A PLEASANT TRAINING EXPERIENCE
""")
if __name__ == "__main__":
demo.launch(
server_port=7860,
css=FALLOUT_CSS,
js="""
() => {
// Force dark mode and hide theme toggle
document.body.classList.add('dark');
const style = document.createElement('style');
style.textContent = `
.dark-mode-toggle, [aria-label="Toggle dark mode"],
button[title*="theme"], .theme-toggle { display: none !important; }
`;
document.head.appendChild(style);
}
"""
)