import gradio as gr import numpy as np # Fallout Terminal Theme CSS # Color palette: # - Pip-Boy Amber: #f0b030 (warm, readable headers) # - Terminal Green: #4ade80 (softer green, easy on eyes) # - Vault-Tec Blue: #5b9bd5 (trusty Vault-Tec corporate blue) # - Background: #0c0c0c (near-black terminal) # - Panel BG: #141414 (slightly lifted for depth) FALLOUT_CSS = """ @import url('https://fonts.googleapis.com/css2?family=VT323&display=swap'); @import url('https://fonts.googleapis.com/css2?family=Share+Tech+Mono&display=swap'); :root { --pip-amber: #f0b030; --pip-amber-dim: #c49028; --terminal-green: #4ade80; --terminal-green-dim: #22c55e; --vault-blue: #5b9bd5; --vault-blue-dim: #4080b8; --bg-dark: #0c0c0c; --bg-panel: #141414; --bg-input: #1a1a1a; --text-muted: #888888; } * { font-family: 'Share Tech Mono', 'VT323', monospace !important; font-size: 20px !important; line-height: 1.6 !important; } h1 { font-size: 36px !important; } h2 { font-size: 30px !important; } h3 { font-size: 24px !important; } h4, h5 { font-size: 22px !important; } code, pre { font-size: 18px !important; } body, .gradio-container { background-color: var(--bg-dark) !important; } .gradio-container { max-width: 1200px !important; } /* Main text - soft green, NO glow */ .markdown-text, .prose, p, span, label, .label-wrap { color: var(--terminal-green) !important; } /* Headers - warm amber for hierarchy */ h1 { color: var(--pip-amber) !important; border-bottom: 2px solid var(--pip-amber-dim) !important; padding-bottom: 8px !important; } h2 { color: var(--pip-amber) !important; border-bottom: 1px solid var(--pip-amber-dim) !important; padding-bottom: 4px !important; } h3, h4, h5 { color: var(--vault-blue) !important; border-bottom: none !important; } /* Tab styling - Vault-Tec blue for navigation */ .tabs { background-color: var(--bg-dark) !important; border: 1px solid var(--vault-blue-dim) !important; border-radius: 4px !important; } .tab-nav { background-color: var(--bg-panel) !important; border-bottom: 2px solid var(--vault-blue-dim) !important; } .tab-nav button { background-color: var(--bg-panel) !important; color: var(--vault-blue) !important; border: none !important; border-right: 1px solid var(--bg-dark) !important; padding: 10px 16px !important; transition: all 0.2s ease !important; } .tab-nav button:hover { background-color: #1e3a5f !important; color: #8ec5fc !important; } .tab-nav button.selected { background-color: #1a3550 !important; color: #8ec5fc !important; border-bottom: 2px solid var(--pip-amber) !important; } /* Input/Output boxes - subtle with green text */ .textbox, textarea, input { background-color: var(--bg-input) !important; color: var(--terminal-green) !important; border: 1px solid #333 !important; border-radius: 3px !important; } .textbox:focus, textarea:focus, input:focus { border-color: var(--terminal-green-dim) !important; outline: none !important; } /* Buttons - amber accent for actions */ .primary, .secondary, button { background-color: #2a2010 !important; color: var(--pip-amber) !important; border: 1px solid var(--pip-amber-dim) !important; border-radius: 3px !important; transition: all 0.2s ease !important; } button:hover { background-color: #3d2e15 !important; border-color: var(--pip-amber) !important; } /* Sliders - amber accent */ input[type="range"] { accent-color: var(--pip-amber) !important; } /* Number inputs */ .number-input input { background-color: var(--bg-input) !important; color: var(--terminal-green) !important; border: 1px solid #333 !important; } /* Code blocks - slightly blue-tinted for distinction */ code, pre { background-color: #0d1520 !important; color: var(--terminal-green) !important; border: 1px solid #2a4060 !important; border-left: 3px solid var(--vault-blue) !important; border-radius: 3px !important; padding: 2px 6px !important; } pre { padding: 12px !important; } /* Tables */ table { border-collapse: collapse !important; } th { background-color: #1a2a3a !important; color: var(--pip-amber) !important; border: 1px solid #2a4060 !important; padding: 8px !important; } td { background-color: var(--bg-panel) !important; color: var(--terminal-green) !important; border: 1px solid #2a4060 !important; padding: 8px !important; } /* Strong/bold text - amber for emphasis */ strong, b { color: var(--pip-amber) !important; font-weight: bold !important; } /* Links */ a { color: var(--vault-blue) !important; } a:hover { color: #8ec5fc !important; } /* Radio buttons and checkboxes */ .radio-group label, .checkbox-group label { color: var(--terminal-green) !important; } /* Scrollbar - subtle */ ::-webkit-scrollbar { width: 8px; height: 8px; background-color: var(--bg-dark); } ::-webkit-scrollbar-thumb { background-color: #333; border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background-color: #444; } /* Subtle scanlines - very light, not distracting */ .gradio-container::before { content: ""; position: fixed; top: 0; left: 0; width: 100%; height: 100%; background: repeating-linear-gradient( 0deg, rgba(0, 0, 0, 0.03), rgba(0, 0, 0, 0.03) 1px, transparent 1px, transparent 2px ); pointer-events: none; z-index: 1000; } /* Horizontal rules - amber accent */ hr { border: none !important; border-top: 1px solid var(--pip-amber-dim) !important; margin: 16px 0 !important; } /* Blockquotes - for terminal prompts */ blockquote { border-left: 3px solid var(--pip-amber) !important; background-color: var(--bg-panel) !important; padding: 8px 16px !important; margin: 8px 0 !important; color: var(--pip-amber) !important; } /* Muted/secondary text */ .secondary-text, .hint { color: var(--text-muted) !important; } """ # ============================================================================ # SVG DIAGRAM GENERATORS # ============================================================================ def generate_forward_svg(x1, x2, w1, w2, b, z, y): """Generate an SVG diagram showing the forward pass with actual values.""" # Colors matching our theme bg = "#0c0c0c" node_fill = "#1a2a3a" node_stroke = "#5b9bd5" input_fill = "#1a3a2a" input_stroke = "#4ade80" output_fill = "#2a2a1a" output_stroke = "#f0b030" text_color = "#4ade80" label_color = "#5b9bd5" arrow_color = "#5b9bd5" value_color = "#f0b030" svg = f''' FORWARD PASS: Data Flow x₁ {x1:.2f} x₂ {x2:.2f} w₁={w1:.2f} w₂={w2:.2f} Σ + b b={b:.2f} z={z:.3f} σ(z) 1/(1+e⁻ᶻ) ŷ={y:.4f} OUTPUT {y:.4f} Inputs Operations Output ■ Computed Values ''' return svg def generate_backward_svg(x1, x2, w1, w2, b, y_true, z, y_pred, dL_dy, dy_dz, dL_dz, dL_dw1, dL_dw2, dL_db, loss): """Generate an SVG diagram showing backward pass with gradients.""" bg = "#0c0c0c" node_fill = "#1a2a3a" node_stroke = "#5b9bd5" input_fill = "#1a3a2a" input_stroke = "#4ade80" loss_fill = "#3a1a1a" loss_stroke = "#ff6b6b" text_color = "#4ade80" label_color = "#5b9bd5" forward_arrow = "#5b9bd5" backward_arrow = "#ff6b6b" value_color = "#f0b030" gradient_color = "#ff6b6b" svg = f''' GRADIENT FLOW DIAGRAM x1 {x1:.2f} w1={w1:.1f} x2 {x2:.2f} w2={w2:.1f} Sum+b z={z:.2f} sigmoid y={y_pred:.3f} BCE L={loss:.4f} y_true={y_true} BACKWARD PASS (gradients) dL/dy={dL_dy:.2f} dy/dz={dy_dz:.3f} dL/dz={dL_dz:.3f} COMPUTED GRADIENTS dL/dw1 = {dL_dw1:.4f} dL/dw2 = {dL_dw2:.4f} dL/db = {dL_db:.4f} CHAIN RULE COMPUTATION dL/dw1 = dL/dy * dy/dz * dz/dw1 = ({dL_dy:.2f}) * ({dy_dz:.4f}) * ({x1:.2f}) = {dL_dw1:.4f} Key: dz/dw1 = x1, dz/dw2 = x2, dz/db = 1 The input values become gradients! dL/dw2 = dL/dz * x2 = {dL_dz:.3f} * {x2:.2f} = {dL_dw2:.4f} LEGEND Forward (data) Backward (grads) Gradient values ''' return svg # ============================================================================ # TAB 1: FORWARD PASS # ============================================================================ def forward_pass_demo(x1, x2, w1, w2, b): """Step-by-step forward pass calculation.""" # Step 1: Weighted sum z = w1 * x1 + w2 * x2 + b # Step 2: Sigmoid activation sigmoid_z = 1 / (1 + np.exp(-z)) # Generate SVG diagram svg_diagram = generate_forward_svg(x1, x2, w1, w2, b, z, sigmoid_z) explanation = f""" ## FORWARD PASS CALCULATION =============================================== ### STEP 1: The Weighted Sum (z) The neuron computes a **weighted sum** of inputs plus a bias: ``` z = w1*x1 + w2*x2 + b z = ({w1:.2f})*({x1:.2f}) + ({w2:.2f})*({x2:.2f}) + ({b:.2f}) z = {w1*x1:.4f} + {w2*x2:.4f} + {b:.2f} z = {z:.4f} ``` **What's happening:** Each input is scaled by its weight, then we add them up. The bias shifts the whole thing. ----------------------------------------------- ### STEP 2: The Sigmoid Activation Function We squash z through the **sigmoid function** to get output in (0,1): ``` sigmoid(z) = 1 / (1 + e^(-z)) = 1 / (1 + e^(-{z:.4f})) = 1 / (1 + {np.exp(-z):.4f}) = 1 / {1 + np.exp(-z):.4f} = {sigmoid_z:.4f} ``` **Why sigmoid?** It smoothly maps any real number to (0,1). - z >> 0 --> sigmoid(z) ≈ 1 - z << 0 --> sigmoid(z) ≈ 0 - z = 0 --> sigmoid(z) = 0.5 ----------------------------------------------- ### SUMMARY ``` Inputs: x1={x1:.2f}, x2={x2:.2f} Weights: w1={w1:.2f}, w2={w2:.2f} Bias: b={b:.2f} z = {z:.4f} y = sigmoid(z) = {sigmoid_z:.4f} ``` **Interpretation:** Output of {sigmoid_z:.4f} means {sigmoid_z*100:.1f}% probability of class 1. """ return svg_diagram, explanation FORWARD_INTRO_SVG = ''' FORWARD PASS: DATA IN → PREDICTION OUT THE SINGLE NEURON x₁ x₂ ×w₁ ×w₂ Σ +b z σ(z) sigmoid ŷ STEP 1 Weighted Sum STEP 2 Activation THE MATH Step 1: Weighted Sum z = w₁x₁ + w₂x₂ + b Step 2: Sigmoid ŷ = σ(z) = 1/(1+e⁻ᶻ) Output ŷ ∈ (0,1) = probability Squashes any real number to (0,1) ▼ INTERACTIVE TERMINAL ▼ Adjust inputs (x₁, x₂), weights (w₁, w₂), and bias (b) Click "EXECUTE FORWARD PASS" to see the values [Vault-Tec recommends saving your work before experiments] ''' FORWARD_INTRO = f""" {FORWARD_INTRO_SVG} """ # ============================================================================ # TAB 2: CHAIN RULE FUNDAMENTALS # ============================================================================ CHAIN_RULE_INTRO_SVG = ''' THE CHAIN RULE 1. THE BASIC IDEA x g(x) u f(u) y u = g(x) y = f(g(x)) Chain Rule Formula: dy/dx = (dy/du) × (du/dx) 2. WHY IT WORKS Think of it like fractions: dy dx = dy du × du dx = dy dx 3. EXAMPLE: y = (3x + 2)² Inner: u = 3x+2 → du/dx = 3 Outer: y = u² → dy/du = 2u Chain: dy/dx = 2u × 3 = 6(3x+2) ▼ INTERACTIVE TERMINAL ▼ Adjust a, b, and x with the sliders Click "APPLY CHAIN RULE" to see values flow through [Remember: derivatives chain together like Vault access codes] ''' CHAIN_RULE_INTRO = f""" {CHAIN_RULE_INTRO_SVG} """ def generate_chain_rule_svg(a, b, x_val, u, y, du_dx, dy_du, dy_dx): """Generate SVG showing chain rule visually.""" bg = "#0c0c0c" node_fill = "#1a2a3a" node_stroke = "#5b9bd5" input_fill = "#1a3a2a" input_stroke = "#4ade80" output_fill = "#2a2a1a" output_stroke = "#f0b030" text_color = "#4ade80" label_color = "#5b9bd5" arrow_color = "#5b9bd5" value_color = "#f0b030" deriv_color = "#ff6b6b" svg = f''' CHAIN RULE: y = ({a}x + {b})² x {x_val:.2f} g(x) u = {a}x + {b} u = {u:.2f} f(u) y = u² y = {y:.2f} dy/du = {dy_du:.2f} du/dx = {du_dx:.2f} CHAIN RULE dy/dx = dy/du × du/dx = {dy_du:.2f} × {du_dx:.2f} = {dy_dx:.2f} Forward Derivatives (multiply!) ''' return svg def chain_rule_calculator(a, b, x_val): """Demonstrate chain rule with y = (ax + b)^2""" # u = ax + b u = a * x_val + b # y = u^2 y = u ** 2 # Derivatives du_dx = a dy_du = 2 * u dy_dx = dy_du * du_dx # Generate SVG svg_diagram = generate_chain_rule_svg(a, b, x_val, u, y, du_dx, dy_du, dy_dx) explanation = f""" ## CHAIN RULE CALCULATION: y = ({a}x + {b})^2 =============================================== ### Setting up the composition: ``` Inner function: u = {a}x + {b} Outer function: y = u^2 ``` At x = {x_val}: ``` u = {a}*{x_val} + {b} = {u} y = ({u})^2 = {y} ``` ----------------------------------------------- ### Step 1: Find du/dx (derivative of inner function) ``` u = {a}x + {b} du/dx = {a} (coefficient of x) ``` ----------------------------------------------- ### Step 2: Find dy/du (derivative of outer function) ``` y = u^2 dy/du = 2u = 2*({u}) = {dy_du} ``` ----------------------------------------------- ### Step 3: Apply the Chain Rule! ``` dy/dx = (dy/du) * (du/dx) = {dy_du} * {du_dx} = {dy_dx} ``` ----------------------------------------------- ### VERIFICATION (optional sanity check) If x increases by tiny amount h=0.001: ``` y(x+h) = ({a}*{x_val+0.001} + {b})^2 = {(a*(x_val+0.001) + b)**2:.6f} y(x) = {y} Slope ≈ (y(x+h) - y(x)) / h = {((a*(x_val+0.001) + b)**2 - y) / 0.001:.4f} Our dy/dx = {dy_dx} ``` The chain rule works! """ return svg_diagram, explanation # ============================================================================ # TAB 3: DERIVATIVES OF KEY FUNCTIONS # ============================================================================ DERIVATIVES_INTRO_SVG = ''' KEY DERIVATIVES YOU NEED TO KNOW 1. SIGMOID FUNCTION z=0 → σ=0.5 Function: σ(z) = 1/(1+e⁻ᶻ) Derivative: dσ/dz = σ(z)(1-σ(z)) Derivative uses the function itself! Already have σ(z)? No extra work needed. 2. BINARY CROSS-ENTROPY Loss Function: L = -[y·log(ŷ) + (1-y)·log(1-ŷ)] Derivative w.r.t. ŷ: dL/dŷ = -y/ŷ + (1-y)/(1-ŷ) Combined with sigmoid: dL/dz = ŷ - y (Vault-Tec approved) 3. DERIVATIVE PATTERNS Powers d/dx[xⁿ] = n·xⁿ⁻¹ Exponential d/dx[eˣ] = eˣ Logarithm d/dx[log(x)] = 1/x Sigmoid d/dx[σ(x)] = σ(1-σ) Chain Rule d/dx[f(g(x))] = f'(g(x)) · g'(x) INTERACTIVE TERMINAL Move the z slider to see sigmoid and its derivative Derivative peaks at z=0, vanishes at extremes ''' DERIVATIVES_INTRO = f""" {DERIVATIVES_INTRO_SVG} """ def generate_sigmoid_svg(z, sig, dsig): """Generate SVG showing sigmoid function and derivative visually.""" bg = "#0c0c0c" curve_color = "#4ade80" deriv_color = "#ff6b6b" point_color = "#f0b030" grid_color = "#333" text_color = "#4ade80" label_color = "#5b9bd5" # Generate sigmoid curve points curve_points = [] for i in range(-50, 51): x_pt = i / 10 # -5 to 5 y_pt = 1 / (1 + np.exp(-x_pt)) # Map to SVG coordinates: x: -5..5 -> 100..500, y: 0..1 -> 250..50 svg_x = 100 + (x_pt + 5) * 40 svg_y = 250 - y_pt * 200 curve_points.append(f"{svg_x:.1f},{svg_y:.1f}") curve_path = " ".join(curve_points) # Current point coordinates pt_x = 100 + (z + 5) * 40 pt_y = 250 - sig * 200 # Tangent line (slope = dsig, in SVG coordinates) # The slope in data space is dsig, but in SVG space y is inverted tangent_dx = 40 tangent_dy = -dsig * 200 t_x1 = pt_x - tangent_dx t_y1 = pt_y - tangent_dy t_x2 = pt_x + tangent_dx t_y2 = pt_y + tangent_dy svg = f''' SIGMOID FUNCTION & DERIVATIVE 0.5 z=0 z σ(z) 1.0 0.0 -5 5 z={z:.1f} σ={sig:.3f} VALUES z = {z:.2f} σ(z) = {sig:.4f} dσ/dz = {dsig:.4f} = σ(1-σ) = {sig:.3f}×{1-sig:.3f} σ(z) tangent ''' return svg def sigmoid_derivative_demo(z): """Show sigmoid and its derivative.""" sig = 1 / (1 + np.exp(-z)) dsig = sig * (1 - sig) svg_diagram = generate_sigmoid_svg(z, sig, dsig) explanation = f""" ## SIGMOID DERIVATIVE AT z = {z} =============================================== ### Step 1: Compute sigmoid(z) ``` σ(z) = 1 / (1 + e^(-z)) = 1 / (1 + e^(-{z})) = 1 / (1 + {np.exp(-z):.6f}) = 1 / {1 + np.exp(-z):.6f} = {sig:.6f} ``` ----------------------------------------------- ### Step 2: Compute the derivative Using the formula: dσ/dz = σ(z) * (1 - σ(z)) ``` dσ/dz = σ(z) * (1 - σ(z)) = {sig:.6f} * (1 - {sig:.6f}) = {sig:.6f} * {1-sig:.6f} = {dsig:.6f} ``` ----------------------------------------------- ### Interpretation At z = {z}: - Sigmoid output: {sig:.4f} (how confident the neuron is) - Derivative: {dsig:.4f} (how sensitive output is to z) **Key insight:** The derivative is LARGEST when z≈0 (sigmoid≈0.5) and SMALLEST when |z| is large. This is the "vanishing gradient" problem - extreme values barely update! ``` z = 0 --> σ = 0.5, dσ/dz = 0.25 (maximum!) z = 5 --> σ ≈ 0.99, dσ/dz ≈ 0.007 (tiny!) z = -5 --> σ ≈ 0.01, dσ/dz ≈ 0.007 (tiny!) ``` """ return svg_diagram, explanation # ============================================================================ # TAB 4: BACKWARD PASS (THE MAIN EVENT) # ============================================================================ BACKWARD_INTRO_SVG = ''' BACKPROPAGATION: LEARNING FROM MISTAKES THE BIG PICTURE FWD: x z y L Data flows BWD: dL/dx dL/dz dL/dy 1 Grads flow GOAL: Find dL/dw1, dL/dw2, dL/db to update weights KEY INSIGHT Same computation graph, opposite direction! At each node multiply: upstream × local derivative CHAIN RULE IN ACTION To find dL/dw1: dL/dw1 = dL/dy * dy/dz * dz/dw1 Multiply derivatives along the path! VISUAL MULTIPLICATION dL/dy × dy/dz × dz/dw1 = dL/dw1 upstream × local = pass backward INTERACTIVE TERMINAL Blue arrows = forward data | Red arrows = backward gradients Click "EXECUTE FULL BACKPROP" to see all values calculated [Vault-Tec tip: errors propagate backward, just like rumors in the cafeteria] ''' BACKWARD_INTRO = f""" {BACKWARD_INTRO_SVG} """ def backward_pass_demo(x1, x2, w1, w2, b, y_true): """Complete forward + backward pass with detailed chain rule.""" # Forward pass z = w1 * x1 + w2 * x2 + b y_pred = 1 / (1 + np.exp(-z)) # Binary cross-entropy loss (with small epsilon for numerical stability) eps = 1e-7 y_pred_clipped = np.clip(y_pred, eps, 1 - eps) loss = -(y_true * np.log(y_pred_clipped) + (1 - y_true) * np.log(1 - y_pred_clipped)) # Backward pass - compute all gradients # dL/dy_pred dL_dy = -y_true / y_pred_clipped + (1 - y_true) / (1 - y_pred_clipped) # dy_pred/dz (sigmoid derivative) dy_dz = y_pred * (1 - y_pred) # dz/dw1, dz/dw2, dz/db dz_dw1 = x1 dz_dw2 = x2 dz_db = 1 # Chain rule to get final gradients dL_dz = dL_dy * dy_dz # This is the "upstream gradient" dL_dw1 = dL_dz * dz_dw1 dL_dw2 = dL_dz * dz_dw2 dL_db = dL_dz * dz_db # Generate SVG diagram svg_diagram = generate_backward_svg( x1, x2, w1, w2, b, y_true, z, y_pred, dL_dy, dy_dz, dL_dz, dL_dw1, dL_dw2, dL_db, loss ) explanation = f""" ## COMPLETE BACKPROP WALKTHROUGH =============================================== ### GIVEN: ``` Inputs: x1 = {x1}, x2 = {x2} Weights: w1 = {w1}, w2 = {w2} Bias: b = {b} True label: y_true = {y_true} ``` =============================================== ## PART 1: FORWARD PASS (review) =============================================== **Step 1a: Weighted sum** ``` z = w1*x1 + w2*x2 + b = ({w1})*({x1}) + ({w2})*({x2}) + ({b}) = {z:.6f} ``` **Step 1b: Sigmoid activation** ``` y_pred = sigmoid(z) = 1/(1+e^(-z)) = 1/(1+e^(-{z:.4f})) = {y_pred:.6f} ``` **Step 1c: Binary Cross-Entropy Loss** ``` L = -[y_true*log(y_pred) + (1-y_true)*log(1-y_pred)] = -[{y_true}*log({y_pred:.6f}) + {1-y_true}*log({1-y_pred:.6f})] = -[{y_true * np.log(y_pred_clipped):.6f} + {(1-y_true) * np.log(1-y_pred_clipped):.6f}] = {loss:.6f} ``` =============================================== ## PART 2: BACKWARD PASS (reversing the flow) =============================================== We need: dL/dw1, dL/dw2, dL/db **The computation graph:** ``` w1,x1,w2,x2,b --> z --> y_pred --> L | | | dz/dw dy/dz dL/dy ``` We work BACKWARDS from Loss to weights. ----------------------------------------------- ### STEP 2a: dL/dy_pred (how loss changes with prediction) ``` L = -y_true*log(y_pred) - (1-y_true)*log(1-y_pred) dL/dy_pred = -y_true/y_pred + (1-y_true)/(1-y_pred) = -{y_true}/{y_pred:.6f} + {1-y_true}/{1-y_pred:.6f} = {-y_true/y_pred_clipped:.6f} + {(1-y_true)/(1-y_pred_clipped):.6f} = {dL_dy:.6f} ``` ----------------------------------------------- ### STEP 2b: dy_pred/dz (sigmoid derivative) Using: d/dz[sigmoid(z)] = sigmoid(z)*(1-sigmoid(z)) ``` dy/dz = y_pred * (1 - y_pred) = {y_pred:.6f} * (1 - {y_pred:.6f}) = {y_pred:.6f} * {1-y_pred:.6f} = {dy_dz:.6f} ``` ----------------------------------------------- ### STEP 2c: dz/dw1, dz/dw2, dz/db Since z = w1*x1 + w2*x2 + b: ``` dz/dw1 = x1 = {dz_dw1} dz/dw2 = x2 = {dz_dw2} dz/db = 1 = {dz_db} ``` ----------------------------------------------- ### STEP 2d: CHAIN RULE - Put it together! First, compute dL/dz (the "upstream gradient"): ``` dL/dz = (dL/dy_pred) * (dy_pred/dz) = {dL_dy:.6f} * {dy_dz:.6f} = {dL_dz:.6f} ``` Now chain to each weight: ``` dL/dw1 = (dL/dz) * (dz/dw1) = {dL_dz:.6f} * {dz_dw1} = {dL_dw1:.6f} dL/dw2 = (dL/dz) * (dz/dw2) = {dL_dz:.6f} * {dz_dw2} = {dL_dw2:.6f} dL/db = (dL/dz) * (dz/db) = {dL_dz:.6f} * {dz_db} = {dL_db:.6f} ``` =============================================== ## PART 3: GRADIENT DESCENT UPDATE =============================================== With learning rate α = 0.1: ``` w1_new = w1 - α * dL/dw1 = {w1} - 0.1 * {dL_dw1:.6f} = {w1 - 0.1 * dL_dw1:.6f} w2_new = w2 - α * dL/dw2 = {w2} - 0.1 * {dL_dw2:.6f} = {w2 - 0.1 * dL_dw2:.6f} b_new = b - α * dL/db = {b} - 0.1 * {dL_db:.6f} = {b - 0.1 * dL_db:.6f} ``` **We've completed one step of learning!** =============================================== ## SUMMARY TABLE =============================================== | Gradient | Value | Meaning | |----------|-------|---------| | dL/dy | {dL_dy:.4f} | Loss sensitivity to prediction | | dy/dz | {dy_dz:.4f} | Sigmoid sensitivity | | dL/dz | {dL_dz:.4f} | "Upstream gradient" | | dL/dw1 | {dL_dw1:.4f} | How to adjust w1 | | dL/dw2 | {dL_dw2:.4f} | How to adjust w2 | | dL/db | {dL_db:.4f} | How to adjust bias | """ return svg_diagram, explanation # ============================================================================ # TAB 5: PRACTICE PROBLEMS # ============================================================================ PRACTICE_INTRO = """ # PRACTICE: COMPUTE BY HAND FIRST! =============================================== Welcome to the Gradient Occupational Aptitude Test (G.O.A.T.). Per Vault-Tec guidelines, pencil-and-paper practice builds neural pathways (the biological kind). Complete these problems to determine your future as a Machine Learning Specialist. ## TIPS FOR HAND CALCULATION 1. **Draw the computation graph** - boxes for operations, arrows for data flow 2. **Forward pass first** - compute all intermediate values 3. **Backward pass** - start from loss, work backwards 4. **Check dimensions** - gradient of scalar w.r.t. vector has same shape as the vector 5. **Verify numerically** - if unsure, use tiny h to approximate: df/dx ≈ (f(x+h) - f(x)) / h ## PRACTICE PROBLEMS Select a problem below and try it before clicking "Check Answer"! """ def practice_problem(problem_num): """Generate practice problems with solutions.""" problems = { 1: { "question": """ ### Problem 1: Simple Chain Rule Compute dy/dx where: ``` y = (2x + 3)^3 ``` at x = 1. **Hint:** Let u = 2x + 3, so y = u^3 """, "solution": """ ### Solution to Problem 1 **Step 1: Identify the composition** ``` u = 2x + 3 (inner) y = u^3 (outer) ``` **Step 2: Find individual derivatives** ``` du/dx = 2 dy/du = 3u^2 ``` **Step 3: Apply chain rule** ``` dy/dx = (dy/du) * (du/dx) = 3u^2 * 2 = 6u^2 = 6(2x + 3)^2 ``` **Step 4: Evaluate at x = 1** ``` dy/dx = 6(2*1 + 3)^2 = 6(5)^2 = 6 * 25 = 150 ``` **Answer: dy/dx = 150 at x = 1** """ }, 2: { "question": """ ### Problem 2: Sigmoid Derivative Given z = 2, compute: 1. sigmoid(z) 2. d/dz[sigmoid(z)] **Reminder:** sigmoid(z) = 1/(1+e^(-z)) d/dz[sigmoid(z)] = sigmoid(z) * (1 - sigmoid(z)) """, "solution": f""" ### Solution to Problem 2 **Step 1: Compute sigmoid(2)** ``` sigmoid(2) = 1/(1 + e^(-2)) = 1/(1 + {np.exp(-2):.6f}) = 1/{1 + np.exp(-2):.6f} = {1/(1+np.exp(-2)):.6f} ``` **Step 2: Compute derivative** ``` Let s = sigmoid(2) = {1/(1+np.exp(-2)):.6f} ds/dz = s * (1 - s) = {1/(1+np.exp(-2)):.6f} * (1 - {1/(1+np.exp(-2)):.6f}) = {1/(1+np.exp(-2)):.6f} * {1 - 1/(1+np.exp(-2)):.6f} = {(1/(1+np.exp(-2))) * (1 - 1/(1+np.exp(-2))):.6f} ``` **Answers:** - sigmoid(2) ≈ 0.8808 - d/dz[sigmoid(2)] ≈ 0.1050 """ }, 3: { "question": """ ### Problem 3: Full Backprop (Mini Version) Single neuron with: ``` x = 2 w = 0.5 b = -1 y_true = 1 ``` Using sigmoid activation and BCE loss, find dL/dw. **Steps to follow:** 1. Forward: z = wx + b 2. Forward: y_pred = sigmoid(z) 3. Forward: L = BCE(y_true, y_pred) 4. Backward: Apply chain rule """, "solution": """ ### Solution to Problem 3 **Forward Pass:** ``` z = w*x + b = 0.5*2 + (-1) = 0 y_pred = sigmoid(0) = 0.5 L = -[1*log(0.5) + 0*log(0.5)] = -log(0.5) = 0.693 ``` **Backward Pass:** dL/dy_pred: ``` = -y_true/y_pred + (1-y_true)/(1-y_pred) = -1/0.5 + 0/0.5 = -2 ``` dy_pred/dz: ``` = y_pred * (1 - y_pred) = 0.5 * 0.5 = 0.25 ``` dz/dw: ``` = x = 2 ``` **Chain Rule:** ``` dL/dw = (dL/dy) * (dy/dz) * (dz/dw) = (-2) * (0.25) * (2) = -1.0 ``` **Answer: dL/dw = -1.0** **Interpretation:** Negative gradient means we should INCREASE w to reduce loss (moving opposite to gradient). """ } } prob = problems.get(problem_num, problems[1]) return prob["question"], prob["solution"] # ============================================================================ # BUILD THE GRADIO APP # ============================================================================ with gr.Blocks(title="BACKPROP TERMINAL v1.0") as demo: gr.Markdown(""" # > VAULT-TEC NEURAL NETWORK TRAINING TERMINAL ## > SECURITY CLEARANCE: STAT 3106 ### > INITIALIZING BACKPROPAGATION MODULES... """) with gr.Tabs(): # TAB 1: Forward Pass with gr.TabItem("01: FORWARD PASS"): gr.HTML(FORWARD_INTRO) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### INPUT PARAMETERS") x1_input = gr.Slider(minimum=-5, maximum=5, value=1.0, step=0.1, label="x1 (input 1)") x2_input = gr.Slider(minimum=-5, maximum=5, value=2.0, step=0.1, label="x2 (input 2)") w1_input = gr.Slider(minimum=-2, maximum=2, value=0.5, step=0.1, label="w1 (weight 1)") w2_input = gr.Slider(minimum=-2, maximum=2, value=-0.3, step=0.1, label="w2 (weight 2)") b_input = gr.Slider(minimum=-2, maximum=2, value=0.1, step=0.1, label="b (bias)") forward_btn = gr.Button(">> EXECUTE FORWARD PASS <<") with gr.Column(scale=2): forward_svg = gr.HTML(label="Computation Graph") forward_output = gr.Markdown(label="Calculation") forward_btn.click( forward_pass_demo, inputs=[x1_input, x2_input, w1_input, w2_input, b_input], outputs=[forward_svg, forward_output] ) # TAB 2: Chain Rule with gr.TabItem("02: CHAIN RULE"): gr.HTML(CHAIN_RULE_INTRO) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### FUNCTION: y = (ax + b)²") a_input = gr.Slider(minimum=-5, maximum=5, value=3.0, step=0.1, label="a (coefficient)") b2_input = gr.Slider(minimum=-5, maximum=5, value=2.0, step=0.1, label="b (constant)") x_input = gr.Slider(minimum=-5, maximum=5, value=1.0, step=0.1, label="x (evaluation point)") chain_btn = gr.Button(">> APPLY CHAIN RULE <<") with gr.Column(scale=2): chain_svg = gr.HTML(label="Chain Rule Visualization") chain_output = gr.Markdown(label="Chain Rule Breakdown") chain_btn.click( chain_rule_calculator, inputs=[a_input, b2_input, x_input], outputs=[chain_svg, chain_output] ) # TAB 3: Key Derivatives with gr.TabItem("03: KEY DERIVATIVES"): gr.HTML(DERIVATIVES_INTRO) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### SIGMOID DERIVATIVE CALCULATOR") z_input = gr.Slider( minimum=-5, maximum=5, value=0, step=0.1, label="z value" ) sigmoid_btn = gr.Button(">> COMPUTE SIGMOID DERIVATIVE <<") with gr.Column(scale=2): sigmoid_svg = gr.HTML(label="Sigmoid Visualization") sigmoid_output = gr.Markdown(label="Derivative Calculation") sigmoid_btn.click( sigmoid_derivative_demo, inputs=[z_input], outputs=[sigmoid_svg, sigmoid_output] ) # TAB 4: Backward Pass with gr.TabItem("04: BACKWARD PASS"): gr.HTML(BACKWARD_INTRO) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### NETWORK CONFIGURATION") bx1 = gr.Slider(minimum=-5, maximum=5, value=1.0, step=0.1, label="x1") bx2 = gr.Slider(minimum=-5, maximum=5, value=2.0, step=0.1, label="x2") bw1 = gr.Slider(minimum=-2, maximum=2, value=0.5, step=0.1, label="w1") bw2 = gr.Slider(minimum=-2, maximum=2, value=-0.3, step=0.1, label="w2") bb = gr.Slider(minimum=-2, maximum=2, value=0.1, step=0.1, label="bias") by_true = gr.Slider(minimum=0, maximum=1, value=1, step=1, label="y_true (0 or 1)") back_btn = gr.Button(">> EXECUTE FULL BACKPROP <<") with gr.Column(scale=2): back_svg = gr.HTML(label="Backprop Graph") back_output = gr.Markdown(label="Complete Backprop Trace") back_btn.click( backward_pass_demo, inputs=[bx1, bx2, bw1, bw2, bb, by_true], outputs=[back_svg, back_output] ) # TAB 5: Practice with gr.TabItem("05: PRACTICE"): gr.Markdown(PRACTICE_INTRO) with gr.Row(): with gr.Column(): problem_select = gr.Radio( choices=["Problem 1: Chain Rule", "Problem 2: Sigmoid", "Problem 3: Full Backprop"], label="Select Problem", value="Problem 1: Chain Rule" ) show_problem_btn = gr.Button(">> SHOW PROBLEM <<") show_answer_btn = gr.Button(">> REVEAL SOLUTION <<") with gr.Column(): problem_display = gr.Markdown(label="Problem") solution_display = gr.Markdown(label="Solution", visible=False) def show_problem(selection): prob_num = int(selection.split(":")[0].split()[-1]) q, _ = practice_problem(prob_num) return q, gr.update(visible=False, value="") def show_solution(selection): prob_num = int(selection.split(":")[0].split()[-1]) _, s = practice_problem(prob_num) return gr.update(visible=True, value=s) show_problem_btn.click( show_problem, inputs=[problem_select], outputs=[problem_display, solution_display] ) show_answer_btn.click( show_solution, inputs=[problem_select], outputs=[solution_display] ) # TAB 6: Quick Reference with gr.TabItem("06: REFERENCE"): gr.Markdown(""" # QUICK REFERENCE CARD =============================================== ## CHAIN RULE ``` y = f(g(x)) dy/dx = (df/dg) * (dg/dx) ``` For longer chains: just multiply all the derivatives! ----------------------------------------------- ## COMMON DERIVATIVES | Function | Derivative | |----------|------------| | x^n | n*x^(n-1) | | e^x | e^x | | log(x) | 1/x | | sigmoid(x) | sigmoid(x)*(1-sigmoid(x)) | | ReLU(x) | 1 if x>0, else 0 | ----------------------------------------------- ## NEURAL NETWORK CHAIN For a single neuron with sigmoid: ``` z = Σ(wi*xi) + b y = sigmoid(z) L = loss(y, y_true) dL/dwi = (dL/dy) * (dy/dz) * (dz/dwi) = (dL/dy) * sigmoid'(z) * xi ``` ----------------------------------------------- ## GRADIENT DESCENT ``` w_new = w_old - learning_rate * dL/dw ``` The gradient points UPHILL; we go opposite direction. ----------------------------------------------- ## BCE LOSS GRADIENT (sigmoid output) For BCE loss with sigmoid output: ``` dL/dz = y_pred - y_true ``` This clean result comes from cancellation in the chain! ----------------------------------------------- ## DEBUGGING TIPS 1. **Gradient check:** Compare with numerical gradient ``` dL/dw ≈ [L(w+h) - L(w-h)] / (2h) ``` 2. **Shapes must match:** gradient of L w.r.t. W has same shape as W 3. **Large gradients?** Try gradient clipping or smaller learning rate 4. **Vanishing gradients?** Consider ReLU or residual connections """) gr.Markdown(""" --- > TERMINAL SESSION ACTIVE > VAULT-TEC WISHES YOU A PLEASANT TRAINING EXPERIENCE """) if __name__ == "__main__": demo.launch( server_port=7860, css=FALLOUT_CSS, js=""" () => { // Force dark mode and hide theme toggle document.body.classList.add('dark'); const style = document.createElement('style'); style.textContent = ` .dark-mode-toggle, [aria-label="Toggle dark mode"], button[title*="theme"], .theme-toggle { display: none !important; } `; document.head.appendChild(style); } """ )