diff --git "a/Python/app.js" "b/Python/app.js"
new file mode 100644--- /dev/null
+++ "b/Python/app.js"
@@ -0,0 +1,1876 @@
+const modules = [
+    {
+        id: "python-fundamentals",
+        title: "Python Fundamentals for DS",
+        icon: "🐍",
+        category: "Foundations",
+        description: "Data structures, comprehensions, file I/O, virtual environments"
+    },
+    {
+        id: "numpy",
+        title: "NumPy & Scientific Computing",
+        icon: "🔢",
+        category: "Scientific",
+        description: "ndarrays, broadcasting, vectorization, linear algebra"
+    },
+    {
+        id: "pandas",
+        title: "Pandas & Data Manipulation",
+        icon: "🐼",
+        category: "Data Wrangling",
+        description: "DataFrames, groupby, pivot, time series, merging"
+    },
+    {
+        id: "visualization",
+        title: "Data Visualization",
+        icon: "📊",
+        category: "Visualization",
+        description: "Matplotlib, Seaborn, Plotly — from basics to publication-ready"
+    },
+    {
+        id: "advanced-python",
+        title: "Advanced Python",
+        icon: "🎯",
+        category: "Advanced",
+        description: "OOP, decorators, async, multiprocessing, type hints"
+    },
+    {
+        id: "sklearn",
+        title: "Python for ML (Scikit-learn)",
+        icon: "🤖",
+        category: "Machine Learning",
+        description: "Pipelines, transformers, cross-validation, hyperparameter tuning"
+    },
+    {
+        id: "pytorch",
+        title: "Deep Learning with PyTorch",
+        icon: "🔥",
+        category: "Deep Learning",
+        description: "Tensors, autograd, nn.Module, training loops, transfer learning"
+    },
+    {
+        id: "tensorflow",
+        title: "TensorFlow & Keras",
+        icon: "🧠",
+        category: "Deep Learning",
+        description: "Sequential/Functional API, callbacks, TensorBoard, deployment"
+    },
+    {
+        id: "production",
+        title: "Production Python",
+        icon: "📦",
+        category: "Engineering",
+        description: "Testing, packaging, logging, FastAPI for model serving"
+    },
+    {
+        id: "optimization",
+        title: "Performance & Optimization",
+        icon: "⚡",
+        category: "Optimization",
+        description: "Profiling, Numba, Cython, memory optimization, Dask"
+    }
+];
+
+const MODULE_CONTENT = {
+    "python-fundamentals": {
+        concepts: `
+            <div class="section">
+                <h2>Python Data Structures for DS</h2>
+
+                <table>
+                    <tr><th>Type</th><th>Mutable</th><th>Ordered</th><th>Hashable</th><th>Use Case</th></tr>
+                    <tr><td><strong>list</strong></td><td>✓</td><td>✓</td><td>✗</td><td>Sequential data, time series, feature lists</td></tr>
+                    <tr><td><strong>tuple</strong></td><td>✗</td><td>✓</td><td>✓</td><td>Fixed records, dict keys, DataFrame rows</td></tr>
+                    <tr><td><strong>dict</strong></td><td>✓</td><td>✓ (3.7+)</td><td>✗</td><td>Lookup tables, JSON, config, caches</td></tr>
+                    <tr><td><strong>set</strong></td><td>✓</td><td>✗</td><td>✗</td><td>Unique values, membership testing O(1)</td></tr>
+                    <tr><td><strong>frozenset</strong></td><td>✗</td><td>✗</td><td>✓</td><td>Immutable set, usable as dict keys</td></tr>
+                    <tr><td><strong>deque</strong></td><td>✓</td><td>✓</td><td>✗</td><td>O(1) append/pop both ends, sliding windows</td></tr>
+                </table>
+
+                <h3>🧠 Python Memory Model — What No One Teaches You</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ Everything Is An Object</div>
+                    <div class="box-content">
+                        In Python, <strong>every value is an object</strong> on the heap. Variables are just references (pointers) to objects. When you write <code>a = [1, 2, 3]</code>, the list lives on the heap; <code>a</code> is a name that points to it. This is why <code>b = a</code> makes both point to the <strong>same list</strong> — no copy is made.
+                    </div>
+                </div>
+                <p><strong>Reference Counting:</strong> Python uses reference counting + cyclic garbage collector. Each object tracks how many names point to it. When count hits 0, memory is freed immediately. This is why <code>del</code> doesn't always free memory — it just decrements the reference count.</p>
+                <p><strong>Integer Interning:</strong> Python caches integers from <strong>-5 to 256</strong> and short strings. So <code>a = 100; b = 100; a is b</code> is <code>True</code>, but <code>a = 1000; b = 1000; a is b</code> may be <code>False</code>. Never use <code>is</code> for value comparison — always use <code>==</code>.</p>
+
+                <h3>collections Module — The Power Tools</h3>
+                <table>
+                    <tr><th>Class</th><th>Purpose</th><th>Why It Matters in DS</th></tr>
+                    <tr><td><strong>defaultdict</strong></td><td>Dict with default factory</td><td>Group data without KeyError: <code>defaultdict(list)</code></td></tr>
+                    <tr><td><strong>Counter</strong></td><td>Count hashable objects</td><td>Label distribution: <code>Counter(y_train)</code></td></tr>
+                    <tr><td><strong>namedtuple</strong></td><td>Lightweight immutable class</td><td>Return multiple values with names, not indices</td></tr>
+                    <tr><td><strong>OrderedDict</strong></td><td>Dict remembering insertion order</td><td>Legacy (dicts are ordered in 3.7+), but useful for <code>move_to_end()</code></td></tr>
+                    <tr><td><strong>deque</strong></td><td>Double-ended queue</td><td>Sliding window computations, BFS algorithms</td></tr>
+                    <tr><td><strong>ChainMap</strong></td><td>Stack multiple dicts</td><td>Layer config: defaults → env → CLI overrides</td></tr>
+                </table>
+
+                <h3>itertools — Memory-Efficient Data Pipelines</h3>
+                <div class="info-box">
+                    <div class="box-title">🔄 Lazy Evaluation Is King</div>
+                    <div class="box-content">
+                        <code>itertools</code> functions return <strong>iterators</strong>, not lists. They consume O(1) memory regardless of input size. This matters when processing millions of records.
+                    </div>
+                </div>
+                <table>
+                    <tr><th>Function</th><th>What It Does</th><th>DS Use Case</th></tr>
+                    <tr><td><code>chain()</code></td><td>Concatenate iterables</td><td>Merge multiple data files lazily</td></tr>
+                    <tr><td><code>islice()</code></td><td>Slice any iterator</td><td>Take first N records from generator</td></tr>
+                    <tr><td><code>groupby()</code></td><td>Group consecutive elements</td><td>Process sorted log entries by date</td></tr>
+                    <tr><td><code>product()</code></td><td>Cartesian product</td><td>Generate hyperparameter grid</td></tr>
+                    <tr><td><code>combinations()</code></td><td>All r-length combos</td><td>Feature interaction pairs</td></tr>
+                    <tr><td><code>starmap()</code></td><td>map() with unpacked args</td><td>Apply function to paired data</td></tr>
+                </table>
+
+                <h3>pathlib — Modern File Handling</h3>
+                <p>Stop using <code>os.path.join()</code>. Use <code>pathlib.Path</code> — it's object-oriented, cross-platform, and reads like English:</p>
+                <ul>
+                    <li><code>Path('data') / 'train' / 'images'</code> → builds paths with <code>/</code> operator</li>
+                    <li><code>path.glob('*.csv')</code> → find all CSV files</li>
+                    <li><code>path.stem</code>, <code>path.suffix</code>, <code>path.parent</code> → parse without regex</li>
+                    <li><code>path.read_text()</code> / <code>path.write_text()</code> → no need for <code>open()</code></li>
+                </ul>
+
+                <h3>Error Handling Patterns for Data Pipelines</h3>
+                <div class="callout warning">
+                    <div class="callout-title">⚠️ Never Do This</div>
+                    Bare <code>except:</code> catches <code>SystemExit</code> and <code>KeyboardInterrupt</code>. Always catch <strong>specific exceptions</strong>. In DS pipelines, catch <code>ValueError</code> (bad data), <code>FileNotFoundError</code> (missing files), <code>KeyError</code> (missing columns).
+                </div>
+                <p><strong>LBYL vs EAFP:</strong> Python prefers <em>"Easier to Ask Forgiveness than Permission"</em> (EAFP). Use <code>try/except</code> instead of checking conditions first. It's faster when exceptions are rare (which they usually are).</p>
+
+                <h3>Virtual Environments — Dependency Isolation</h3>
+                <table>
+                    <tr><th>Tool</th><th>Best For</th><th>Create</th><th>Key Feature</th></tr>
+                    <tr><td>venv</td><td>Simple projects</td><td><code>python -m venv env</code></td><td>Built-in, lightweight</td></tr>
+                    <tr><td>conda</td><td>DS/ML (C dependencies)</td><td><code>conda create -n myenv python=3.11</code></td><td>Handles non-Python deps (CUDA, MKL)</td></tr>
+                    <tr><td>poetry</td><td>Modern packaging</td><td><code>poetry init</code></td><td>Lock files, deterministic builds</td></tr>
+                    <tr><td>uv</td><td>Speed (Rust-based)</td><td><code>uv venv</code></td><td>10-100x faster than pip</td></tr>
+                </table>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 Essential Code Examples</h2>
+
+                <h3>collections In Action</h3>
+                <div class="code-block">
+<span class="keyword">from</span> collections <span class="keyword">import</span> defaultdict, Counter, namedtuple, deque
+
+<span class="comment"># defaultdict — Group samples by label (no KeyError!)</span>
+samples_by_label = defaultdict(list)
+<span class="keyword">for</span> feature, label <span class="keyword">in</span> <span class="function">zip</span>(features, labels):
+    samples_by_label[label].append(feature)
+<span class="comment"># {'cat': [f1, f3], 'dog': [f2, f4]} — no if/else needed</span>
+
+<span class="comment"># Counter — Class distribution analysis</span>
+y_train = [<span class="number">0</span>, <span class="number">1</span>, <span class="number">1</span>, <span class="number">0</span>, <span class="number">1</span>, <span class="number">2</span>, <span class="number">0</span>, <span class="number">1</span>]
+dist = Counter(y_train)
+<span class="function">print</span>(dist)  <span class="comment"># Counter({1: 4, 0: 3, 2: 1})</span>
+<span class="function">print</span>(dist.most_common(<span class="number">2</span>))  <span class="comment"># [(1, 4), (0, 3)]</span>
+
+<span class="comment"># namedtuple — Return multiple values with names</span>
+ModelResult = namedtuple(<span class="string">'ModelResult'</span>, [<span class="string">'accuracy'</span>, <span class="string">'precision'</span>, <span class="string">'recall'</span>, <span class="string">'f1'</span>])
+result = ModelResult(accuracy=<span class="number">0.95</span>, precision=<span class="number">0.93</span>, recall=<span class="number">0.91</span>, f1=<span class="number">0.92</span>)
+<span class="function">print</span>(result.accuracy)  <span class="comment"># 0.95 — much clearer than result[0]</span>
+
+<span class="comment"># deque — Sliding window for streaming data</span>
+window = deque(maxlen=<span class="number">5</span>)
+<span class="keyword">for</span> value <span class="keyword">in</span> data_stream:
+    window.append(value)
+    moving_avg = <span class="function">sum</span>(window) / <span class="function">len</span>(window)
+                </div>
+
+                <h3>itertools for Data Pipelines</h3>
+                <div class="code-block">
+<span class="keyword">from</span> itertools <span class="keyword">import</span> chain, islice, product, combinations
+
+<span class="comment"># chain — Merge multiple data files lazily (no memory explosion)</span>
+<span class="keyword">def</span> <span class="function">read_csv_lines</span>(filepath):
+    <span class="keyword">with</span> <span class="function">open</span>(filepath) <span class="keyword">as</span> f:
+<span class="keyword">next</span>(f)  <span class="comment"># skip header</span>
+<span class="keyword">yield from</span> f
+
+all_data = chain(
+    read_csv_lines(<span class="string">'jan.csv'</span>),
+    read_csv_lines(<span class="string">'feb.csv'</span>),
+    read_csv_lines(<span class="string">'mar.csv'</span>)
+)
+<span class="comment"># Processes millions of lines with O(1) memory!</span>
+
+<span class="comment"># product — Generate hyperparameter grid</span>
+learning_rates = [<span class="number">0.001</span>, <span class="number">0.01</span>, <span class="number">0.1</span>]
+batch_sizes = [<span class="number">16</span>, <span class="number">32</span>, <span class="number">64</span>]
+dropouts = [<span class="number">0.1</span>, <span class="number">0.3</span>, <span class="number">0.5</span>]
+<span class="keyword">for</span> lr, bs, do <span class="keyword">in</span> product(learning_rates, batch_sizes, dropouts):
+    train_model(lr=lr, batch_size=bs, dropout=do)
+<span class="comment"># 27 combinations without nested loops</span>
+
+<span class="comment"># combinations — Feature interaction pairs</span>
+features = [<span class="string">'age'</span>, <span class="string">'income'</span>, <span class="string">'score'</span>, <span class="string">'tenure'</span>]
+<span class="keyword">for</span> f1, f2 <span class="keyword">in</span> combinations(features, <span class="number">2</span>):
+    df[<span class="string">f'{f1}_x_{f2}'</span>] = df[f1] * df[f2]
+<span class="comment"># Creates: age_x_income, age_x_score, ... (6 pairs)</span>
+                </div>
+
+                <h3>pathlib — Modern File Management</h3>
+                <div class="code-block">
+<span class="keyword">from</span> pathlib <span class="keyword">import</span> Path
+
+<span class="comment"># Build paths naturally (cross-platform)</span>
+data_dir = Path(<span class="string">'data'</span>) / <span class="string">'processed'</span> / <span class="string">'v2'</span>
+data_dir.mkdir(parents=<span class="keyword">True</span>, exist_ok=<span class="keyword">True</span>)
+
+<span class="comment"># Find all CSV files recursively</span>
+csv_files = <span class="function">list</span>(data_dir.glob(<span class="string">'**/*.csv'</span>))
+<span class="function">print</span>(<span class="string">f"Found {len(csv_files)} CSV files"</span>)
+
+<span class="comment"># Parse file names without string hacking</span>
+<span class="keyword">for</span> f <span class="keyword">in</span> csv_files:
+    <span class="function">print</span>(<span class="string">f"Name: {f.stem}, Extension: {f.suffix}, Parent: {f.parent}"</span>)
+
+<span class="comment"># Read/write without open()</span>
+config = Path(<span class="string">'config.json'</span>).read_text()
+Path(<span class="string">'output.txt'</span>).write_text(<span class="string">'Results: 95.2% accuracy'</span>)
+                </div>
+
+                <h3>Advanced Comprehensions &amp; Unpacking</h3>
+                <div class="code-block">
+<span class="comment"># Nested comprehension — Flatten list of lists</span>
+nested = [[<span class="number">1</span>, <span class="number">2</span>], [<span class="number">3</span>, <span class="number">4</span>], [<span class="number">5</span>, <span class="number">6</span>]]
+flat = [x <span class="keyword">for</span> sublist <span class="keyword">in</span> nested <span class="keyword">for</span> x <span class="keyword">in</span> sublist]
+<span class="comment"># [1, 2, 3, 4, 5, 6]</span>
+
+<span class="comment"># Dict comprehension — Invert a mapping</span>
+label_to_id = {<span class="string">'cat'</span>: <span class="number">0</span>, <span class="string">'dog'</span>: <span class="number">1</span>, <span class="string">'bird'</span>: <span class="number">2</span>}
+id_to_label = {v: k <span class="keyword">for</span> k, v <span class="keyword">in</span> label_to_id.items()}
+
+<span class="comment"># Set comprehension — Unique words from documents</span>
+docs = [<span class="string">"hello world"</span>, <span class="string">"world of python"</span>]
+vocab = {word <span class="keyword">for</span> doc <span class="keyword">in</span> docs <span class="keyword">for</span> word <span class="keyword">in</span> doc.split()}
+
+<span class="comment"># Walrus operator (:=) — Assign + use in expression (3.8+)</span>
+<span class="keyword">if</span> (n := <span class="function">len</span>(data)) &gt; <span class="number">1000</span>:
+    <span class="function">print</span>(<span class="string">f"Large dataset: {n} samples"</span>)
+
+<span class="comment"># Extended unpacking — Split data elegantly</span>
+first, *middle, last = sorted(scores)
+<span class="function">print</span>(<span class="string">f"Min: {first}, Max: {last}, Middle: {middle}"</span>)
+                </div>
+
+                <h3>Robust Error Handling for Pipelines</h3>
+                <div class="code-block">
+<span class="keyword">import</span> logging
+logger = logging.getLogger(__name__)
+
+<span class="keyword">def</span> <span class="function">load_and_validate</span>(filepath):
+    <span class="string">"""Production-grade data loading with proper error handling."""</span>
+    <span class="keyword">try</span>:
+df = pd.read_csv(filepath)
+    <span class="keyword">except</span> <span class="function">FileNotFoundError</span>:
+logger.error(<span class="string">f"File not found: {filepath}"</span>)
+<span class="keyword">raise</span>
+    <span class="keyword">except</span> pd.errors.EmptyDataError:
+logger.warning(<span class="string">f"Empty file: {filepath}"</span>)
+<span class="keyword">return</span> pd.DataFrame()
+    <span class="keyword">except</span> pd.errors.ParserError <span class="keyword">as</span> e:
+logger.error(<span class="string">f"Parse error in {filepath}: {e}"</span>)
+<span class="keyword">raise</span> ValueError(<span class="string">f"Corrupted CSV: {filepath}"</span>) <span class="keyword">from</span> e
+    
+    <span class="comment"># Validate required columns</span>
+    required = {<span class="string">'id'</span>, <span class="string">'target'</span>, <span class="string">'timestamp'</span>}
+    missing = required - <span class="function">set</span>(df.columns)
+    <span class="keyword">if</span> missing:
+<span class="keyword">raise</span> <span class="function">KeyError</span>(<span class="string">f"Missing columns: {missing}"</span>)
+    
+    <span class="keyword">return</span> df
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: What's the difference between a list and a tuple? When would you use each in DS?</strong>
+                    <p><strong>Answer:</strong> Lists are mutable, tuples immutable. But the deeper answer: tuples are <strong>hashable</strong> (can be dict keys), use <strong>less memory</strong> (no over-allocation), and signal <strong>intent</strong> ("this shouldn't change"). Use tuples for (lat, lon) pairs, function return values, dict keys for caching. Use lists for feature collections that grow.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: How does Python's GIL affect data science workflows?</strong>
+                    <p><strong>Answer:</strong> The GIL prevents true multi-threading for CPU-bound tasks. But here's what most people miss: <strong>NumPy, Pandas, and scikit-learn release the GIL</strong> during C-level computations. So vectorized operations ARE parallel at the C level. For pure Python CPU work, use <code>multiprocessing</code>. For I/O (API calls, file reads), threading works fine because the GIL is released during I/O waits.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: Explain the difference between <code>is</code> and <code>==</code>. Why does this matter?</strong>
+                    <p><strong>Answer:</strong> <code>==</code> checks value equality (<code>__eq__</code>). <code>is</code> checks <strong>identity</strong> (same memory address). Python interns small integers (-5 to 256) and some strings, so <code>300 is 300</code> may be <code>False</code>. Always use <code>==</code> for values. Only use <code>is</code> for <code>None</code> checks: <code>if x is None</code>.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: How would you handle a 10GB CSV that doesn't fit in memory?</strong>
+                    <p><strong>Answer:</strong> 5 strategies, from simplest to most powerful: (1) <code>pd.read_csv(chunksize=50000)</code> — process in batches, (2) <code>usecols=['needed_cols']</code> — load only what you need, (3) <code>dtype={'col': 'int32'}</code> — use smaller types, (4) Dask — lazy Pandas-like API, (5) DuckDB — SQL on CSV files with zero memory overhead.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: What's the time complexity of dict lookup vs list search? Why?</strong>
+                    <p><strong>Answer:</strong> Dict: <strong>O(1)</strong> average via hash tables (Python's dict uses open addressing). List: <strong>O(n)</strong> linear scan. Internally, dict hashes the key to compute a slot index, then handles collisions via probing. Sets use the same mechanism. This is why <code>x in my_set</code> is fast but <code>x in my_list</code> is slow.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q6: Explain shallow vs deep copy. Give a real DS scenario where this matters.</strong>
+                    <p><strong>Answer:</strong> <code>copy.copy()</code> copies outer container but shares inner objects. <code>copy.deepcopy()</code> recursively copies everything. <strong>Real scenario:</strong> You have a list of dicts (config per experiment). Shallow copy means modifying one experiment's config changes all of them. Deep copy gives independent configs. Pandas <code>.copy()</code> is deep by default — but <code>df2 = df</code> is NOT a copy at all.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q7: What is a defaultdict and when would you use it over a regular dict?</strong>
+                    <p><strong>Answer:</strong> <code>defaultdict(factory)</code> auto-creates default values for missing keys. Use <code>defaultdict(list)</code> to group items without <code>if key not in dict</code> checks. Use <code>defaultdict(int)</code> to count. It's cleaner and ~20% faster than <code>dict.setdefault()</code> for grouping operations in data processing.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q8: What are generators and why are they critical for large-scale data processing?</strong>
+                    <p><strong>Answer:</strong> Generators yield values one at a time using <code>yield</code>, consuming <strong>O(1) memory</strong> regardless of data size. A list of 1 billion items = ~8GB RAM. A generator of 1 billion items = ~100 bytes. Critical for: reading large files, streaming data, batch training. <code>yield from</code> delegates to sub-generators.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q9: How would you remove duplicates from a list while preserving order?</strong>
+                    <p><strong>Answer:</strong> <code>list(dict.fromkeys(my_list))</code> — uses dict's insertion-order guarantee (3.7+), runs in O(n). Old approach: <code>seen = set(); [x for x in lst if not (x in seen or seen.add(x))]</code>. For DataFrames: <code>df.drop_duplicates(subset=['key_col'])</code>.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q10: Explain Python's garbage collection mechanism.</strong>
+                    <p><strong>Answer:</strong> Two mechanisms: (1) <strong>Reference counting</strong> — each object has a count; freed when count hits 0. Immediate cleanup. (2) <strong>Cyclic garbage collector</strong> — detects reference cycles (A → B → A) that refcount can't handle. Runs periodically on generations (gen0, gen1, gen2). You can force it with <code>gc.collect()</code> — useful after deleting large ML models.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q11: What's the difference between <code>__str__</code> and <code>__repr__</code>?</strong>
+                    <p><strong>Answer:</strong> <code>__str__</code> is for end users (readable), <code>__repr__</code> is for developers (unambiguous, ideally <code>eval</code>-able). If only one is defined, implement <code>__repr__</code> — Python falls back to it for <code>str()</code> too. In ML: <code>__repr__</code> should show model params: <code>LinearRegression(lr=0.01, reg=l2)</code>.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q12: How does <code>*args</code> and <code>**kwargs</code> help in ML code?</strong>
+                    <p><strong>Answer:</strong> They enable flexible function signatures. <code>*args</code>: variable positional args (multiple datasets). <code>**kwargs</code>: variable keyword args (hyperparameters). Essential for: wrapper functions, decorators, scikit-learn's <code>set_params(**params)</code>, and <code>model.fit(X, y, **fit_params)</code>.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q13: What are f-strings and why are they preferred over .format() and %?</strong>
+                    <p><strong>Answer:</strong> f-strings (3.6+) are <strong>fastest</strong>, most readable formatting. They support expressions: <code>f"{accuracy:.2%}"</code> → "95.23%", <code>f"{x=}"</code> (3.8+) → "x=42" for debugging. <code>.format()</code> is slower and more verbose. <code>%</code> formatting is legacy C-style. Always use f-strings in modern Python.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q14: Explain the LEGB scope rule.</strong>
+                    <p><strong>Answer:</strong> Python resolves names in order: <strong>L</strong>ocal → <strong>E</strong>nclosing function → <strong>G</strong>lobal → <strong>B</strong>uilt-in. This is why you can accidentally shadow built-ins: <code>list = [1,2]</code> breaks <code>list()</code>. Use <code>nonlocal</code> to modify enclosing scope, <code>global</code> for module scope (but avoid globals in production code).</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q15: What's the difference between <code>append()</code> and <code>extend()</code>?</strong>
+                    <p><strong>Answer:</strong> <code>append(x)</code> adds x as a single element. <code>extend(iterable)</code> unpacks and adds each element. <code>[1,2].append([3,4])</code> → <code>[1,2,[3,4]]</code>. <code>[1,2].extend([3,4])</code> → <code>[1,2,3,4]</code>. Use <code>extend()</code> when merging feature lists; <code>append()</code> when adding one item to results.</p>
+                </div>
+            </div>
+        `
+    },
+
+    "numpy": {
+        concepts: `
+            <div class="section">
+                <h2>NumPy ndarray Fundamentals</h2>
+
+                <h3>🧠 Why NumPy Is 50-100x Faster Than Python Lists</h3>
+                <table>
+                    <tr><th>Feature</th><th>Python List</th><th>NumPy ndarray</th></tr>
+                    <tr><td>Storage</td><td>Array of pointers to objects scattered in memory</td><td>Contiguous block of raw typed data</td></tr>
+                    <tr><td>Type</td><td>Each element can be different type</td><td>Homogeneous — all elements same dtype</td></tr>
+                    <tr><td>Operations</td><td>Python loop (bytecode interpretation)</td><td>Compiled C/Fortran loops</td></tr>
+                    <tr><td>Memory</td><td>~28 bytes per int + pointer overhead</td><td>8 bytes per int64 (no overhead)</td></tr>
+                    <tr><td>SIMD</td><td>Not possible</td><td>Uses CPU vector instructions (SSE/AVX)</td></tr>
+                </table>
+
+                <h3>Memory Layout: C-Order vs Fortran-Order</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ Performance-Critical Knowledge</div>
+                    <div class="box-content">
+                        <strong>C-order (row-major):</strong> Rows stored contiguously. <code>arr[0,0], arr[0,1], arr[0,2], arr[1,0]...</code><br>
+                        <strong>Fortran-order (col-major):</strong> Columns stored contiguously. <code>arr[0,0], arr[1,0], arr[2,0], arr[0,1]...</code><br>
+                        NumPy defaults to C-order. <strong>Iterating along the last axis is fastest</strong> (cache-friendly). Fortran-order is preferred when interfacing with LAPACK/BLAS (used internally by NumPy's linear algebra).
+                    </div>
+                </div>
+
+                <h3>Strides: The Secret Behind Views</h3>
+                <p>Every ndarray has a <strong>strides</strong> tuple — bytes to jump in each dimension. For a <code>(3,4)</code> float64 array: strides = <code>(32, 8)</code> means jump 32 bytes for next row, 8 bytes for next column. Slicing creates <strong>views</strong> (no copy) by adjusting strides. <code>arr[::2]</code> doubles the row stride.</p>
+
+                <h3>Broadcasting Rules — The Complete Picture</h3>
+                <div class="info-box">
+                    <div class="box-title">🎯 Broadcasting Rules (Right to Left)</div>
+                    <div class="box-content">
+                        Two arrays are compatible when, <strong>for each trailing dimension</strong>: (1) Dimensions are equal, OR (2) One of them is 1.<br>
+                        <strong>Example:</strong> (5, 3, 1) + (1, 4) → shape (5, 3, 4). The (1,) dims are "stretched" virtually — no memory is copied.
+                    </div>
+                </div>
+
+                <h3>Key dtype Choices for DS</h3>
+                <table>
+                    <tr><th>dtype</th><th>Bytes</th><th>Range</th><th>When to Use</th></tr>
+                    <tr><td>float32</td><td>4</td><td>±3.4e38</td><td>Deep learning (GPU prefers this), 50% less memory</td></tr>
+                    <tr><td>float64</td><td>8</td><td>±1.8e308</td><td>Default. Scientific computing, high-precision stats</td></tr>
+                    <tr><td>int32</td><td>4</td><td>±2.1 billion</td><td>Indices, counts, most integer data</td></tr>
+                    <tr><td>bool</td><td>1</td><td>True/False</td><td>Masks for filtering</td></tr>
+                    <tr><td>category (Pandas)</td><td>Varies</td><td>Finite set</td><td>Repeated strings → 90% memory savings</td></tr>
+                </table>
+
+                <h3>np.einsum — Einstein Summation (Power Tool)</h3>
+                <p><code>np.einsum</code> can express <strong>any</strong> tensor operation in one call: matrix multiply, trace, transpose, batch ops. Often faster than chaining NumPy functions because it avoids intermediate arrays.</p>
+
+                <h3>Linear Algebra for ML</h3>
+                <ul>
+                    <li><code>np.linalg.inv(X.T @ X) @ X.T @ y</code> → Normal equation (linear regression)</li>
+                    <li><code>U, S, Vt = np.linalg.svd(X)</code> → PCA, dimensionality reduction</li>
+                    <li><code>eigenvals, eigenvecs = np.linalg.eigh(cov)</code> → Covariance eigenvectors</li>
+                    <li><code>np.linalg.norm(X, axis=1)</code> → L2 norms for distance computation</li>
+                </ul>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 NumPy Code Examples</h2>
+
+                <h3>Array Creation &amp; Inspection</h3>
+                <div class="code-block">
+<span class="keyword">import</span> numpy <span class="keyword">as</span> np
+
+<span class="comment"># Create with specific dtypes for memory efficiency</span>
+X = np.random.randn(<span class="number">1000</span>, <span class="number">10</span>).astype(np.float32)  <span class="comment"># 40KB vs 80KB</span>
+y = np.random.randint(<span class="number">0</span>, <span class="number">2</span>, size=<span class="number">1000</span>, dtype=np.int8)  <span class="comment"># 1KB vs 8KB</span>
+
+<span class="comment"># Inspect memory layout</span>
+<span class="function">print</span>(<span class="string">f"Shape: {X.shape}"</span>)           <span class="comment"># (1000, 10)</span>
+<span class="function">print</span>(<span class="string">f"Strides: {X.strides}"</span>)       <span class="comment"># (40, 4) bytes</span>
+<span class="function">print</span>(<span class="string">f"C-contiguous: {X.flags['C_CONTIGUOUS']}"</span>)  <span class="comment"># True</span>
+<span class="function">print</span>(<span class="string">f"Memory: {X.nbytes / 1024:.1f} KB"</span>)  <span class="comment"># 39.1 KB</span>
+                </div>
+
+                <h3>Broadcasting for Feature Normalization</h3>
+                <div class="code-block">
+<span class="comment"># Normalize each feature (mean=0, std=1)</span>
+X = np.random.randn(<span class="number">1000</span>, <span class="number">5</span>)  <span class="comment"># 1000 samples, 5 features</span>
+
+mean = X.mean(axis=<span class="number">0</span>)  <span class="comment"># shape (5,)</span>
+std = X.std(axis=<span class="number">0</span>)    <span class="comment"># shape (5,)</span>
+
+X_normalized = (X - mean) / std  <span class="comment"># Broadcasting! (1000,5) - (5,) works</span>
+
+<span class="comment"># Min-Max scaling to [0, 1]</span>
+X_min = X.min(axis=<span class="number">0</span>)
+X_max = X.max(axis=<span class="number">0</span>)
+X_scaled = (X - X_min) / (X_max - X_min + <span class="number">1e-8</span>)  <span class="comment"># epsilon avoids /0</span>
+                </div>
+
+                <h3>Advanced Indexing &amp; Boolean Masking</h3>
+                <div class="code-block">
+<span class="comment"># Boolean mask — filter outliers (3 sigma rule)</span>
+data = np.random.randn(<span class="number">10000</span>)
+mask = np.abs(data) &lt; <span class="number">3</span>  <span class="comment"># True where within 3 std devs</span>
+clean = data[mask]  <span class="comment"># Only non-outlier values</span>
+<span class="function">print</span>(<span class="string">f"Removed {len(data) - len(clean)} outliers"</span>)
+
+<span class="comment"># Fancy indexing — select specific rows/columns</span>
+X = np.random.randn(<span class="number">100</span>, <span class="number">10</span>)
+important_features = [<span class="number">0</span>, <span class="number">3</span>, <span class="number">7</span>]  <span class="comment"># indices of best features</span>
+X_selected = X[:, important_features]  <span class="comment"># shape (100, 3)</span>
+
+<span class="comment"># np.where — Conditional replacement</span>
+predictions = np.array([<span class="number">0.3</span>, <span class="number">0.7</span>, <span class="number">0.1</span>, <span class="number">0.9</span>])
+labels = np.where(predictions &gt; <span class="number">0.5</span>, <span class="number">1</span>, <span class="number">0</span>)  <span class="comment"># [0, 1, 0, 1]</span>
+                </div>
+
+                <h3>np.einsum — One Function to Rule Them All</h3>
+                <div class="code-block">
+A = np.random.randn(<span class="number">3</span>, <span class="number">4</span>)
+B = np.random.randn(<span class="number">4</span>, <span class="number">5</span>)
+
+<span class="comment"># Matrix multiply: C_ij = sum_k A_ik * B_kj</span>
+C = np.einsum(<span class="string">'ik,kj-&gt;ij'</span>, A, B)  <span class="comment"># same as A @ B</span>
+
+<span class="comment"># Trace: sum of diagonal</span>
+trace = np.einsum(<span class="string">'ii-&gt;'</span>, np.eye(<span class="number">4</span>))  <span class="comment"># 4.0</span>
+
+<span class="comment"># Batch matrix multiply (common in deep learning)</span>
+batch_A = np.random.randn(<span class="number">32</span>, <span class="number">10</span>, <span class="number">20</span>)  <span class="comment"># 32 matrices</span>
+batch_B = np.random.randn(<span class="number">32</span>, <span class="number">20</span>, <span class="number">5</span>)
+result = np.einsum(<span class="string">'bij,bjk-&gt;bik'</span>, batch_A, batch_B)  <span class="comment"># (32,10,5)</span>
+
+<span class="comment"># Dot product of each row pair</span>
+X = np.random.randn(<span class="number">100</span>, <span class="number">768</span>)  <span class="comment"># embeddings</span>
+similarity = np.einsum(<span class="string">'ij,kj-&gt;ik'</span>, X, X)  <span class="comment"># cosine sim matrix</span>
+                </div>
+
+                <h3>Linear Regression — The NumPy Way</h3>
+                <div class="code-block">
+<span class="comment"># Solve linear regression: y = Xβ + ε</span>
+<span class="comment"># Normal equation: β = (X^T X)^{-1} X^T y</span>
+X = np.column_stack([np.ones(<span class="number">100</span>), np.random.randn(<span class="number">100</span>, <span class="number">3</span>)])
+y = np.random.randn(<span class="number">100</span>)
+
+<span class="comment"># Method 1: Direct (numerically unstable for large X)</span>
+beta = np.linalg.inv(X.T @ X) @ X.T @ y
+
+<span class="comment"># Method 2: lstsq (stable, handles rank-deficient X)</span>
+beta, residuals, rank, sv = np.linalg.lstsq(X, y, rcond=<span class="keyword">None</span>)
+
+<span class="comment"># Method 3: SVD decomposition (most stable)</span>
+U, S, Vt = np.linalg.svd(X, full_matrices=<span class="keyword">False</span>)
+beta = Vt.T @ np.diag(<span class="number">1</span>/S) @ U.T @ y
+                </div>
+
+                <h3>Memory-Mapped Files for Huge Arrays</h3>
+                <div class="code-block">
+<span class="comment"># Process arrays larger than RAM</span>
+<span class="comment"># Create memory-mapped file</span>
+big_array = np.memmap(<span class="string">'huge_data.npy'</span>, dtype=np.float32,
+              mode=<span class="string">'w+'</span>, shape=(<span class="number">1000000</span>, <span class="number">100</span>))
+
+<span class="comment"># Write data in chunks</span>
+<span class="keyword">for</span> i <span class="keyword">in</span> <span class="function">range</span>(<span class="number">0</span>, <span class="number">1000000</span>, <span class="number">10000</span>):
+    big_array[i:i+<span class="number">10000</span>] = np.random.randn(<span class="number">10000</span>, <span class="number">100</span>)
+
+<span class="comment"># Read slices without loading the entire file</span>
+subset = big_array[<span class="number">5000</span>:<span class="number">6000</span>]  <span class="comment"># Only reads 1000 rows from disk</span>
+<span class="function">print</span>(subset.mean())
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 NumPy Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: Why is NumPy faster than Python lists for numerical operations?</strong>
+                    <p><strong>Answer:</strong> Three reasons: (1) <strong>Contiguous memory</strong> — CPU cache-friendly, no pointer chasing. (2) <strong>Compiled C loops</strong> — operations run in compiled C, not interpreted Python. (3) <strong>SIMD instructions</strong> — modern CPUs process 4-8 floats simultaneously (AVX). Together: 50-100x speedup.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: What's the difference between a view and a copy? Why does it matter?</strong>
+                    <p><strong>Answer:</strong> Views share data (slicing creates views). Copies duplicate data. <code>arr[::2]</code> is a view — modifying it modifies the original. <code>arr[[0,2,4]]</code> (fancy indexing) is a copy. Views are fast and memory-efficient. Use <code>np.shares_memory(a, b)</code> to check. Always <code>.copy()</code> when you need independent data.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: Explain broadcasting rules with an example.</strong>
+                    <p><strong>Answer:</strong> Compare shapes <strong>right-to-left</strong>. Dimensions must be equal or one must be 1. Example: <code>(3,1)</code> + <code>(1,4)</code> → <code>(3,4)</code>. Each (3,1) row is "stretched" to match 4 columns. No memory is actually copied — NumPy adjusts strides internally. <strong>Gotcha:</strong> <code>(3,)</code> + <code>(3,4)</code> fails — need to reshape to <code>(3,1)</code> first.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: What is axis=0 vs axis=1?</strong>
+                    <p><strong>Answer:</strong> <code>axis=0</code> = operate down rows (column-wise). <code>axis=1</code> = across columns (row-wise). Think: axis=0 <strong>collapses rows</strong>, axis=1 <strong>collapses columns</strong>. For (100,5) array: <code>mean(axis=0)</code> → shape (5,) — one mean per feature. <code>mean(axis=1)</code> → shape (100,) — one mean per sample.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: How would you implement PCA using only NumPy?</strong>
+                    <p><strong>Answer:</strong> (1) Center data: <code>X_c = X - X.mean(axis=0)</code>, (2) Covariance: <code>cov = X_c.T @ X_c / (n-1)</code>, (3) Eigendecomposition: <code>vals, vecs = np.linalg.eigh(cov)</code>, (4) Sort by eigenvalue descending, (5) Project: <code>X_pca = X_c @ vecs[:, -k:]</code>. Alternatively use SVD directly: <code>U, S, Vt = np.linalg.svd(X_c)</code>.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q6: What's the difference between np.dot, np.matmul (@), and np.einsum?</strong>
+                    <p><strong>Answer:</strong> <code>np.dot</code>: flattens for 1D, matrix multiply for 2D, but <strong>confusing for higher dims</strong>. <code>@ (matmul)</code>: clean matrix multiply, broadcasts over batch dims. <code>einsum</code>: most flexible — express any contraction. Use <code>@</code> for readability, <code>einsum</code> for complex ops. Avoid <code>np.dot</code> for 3D+ arrays.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q7: How do you handle NaN values in NumPy arrays?</strong>
+                    <p><strong>Answer:</strong> <code>np.isnan(arr)</code> detects NaNs. <code>np.nanmean(arr)</code>, <code>np.nanstd(arr)</code> — nan-safe aggregations. Replace: <code>arr[np.isnan(arr)] = 0</code>. <strong>Gotcha:</strong> <code>np.nan == np.nan</code> is <code>False</code>! NaN poisons comparisons. This is IEEE 754 standard.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q8: What's structured arrays and when would you use them over Pandas?</strong>
+                    <p><strong>Answer:</strong> Structured arrays have named fields with mixed dtypes: <code>np.dtype([('name', 'U10'), ('age', 'i4'), ('score', 'f8')])</code>. Use when: (1) You need NumPy speed without Pandas overhead, (2) Interfacing with binary file formats (HDF5, FITS), (3) Processing millions of records where Pandas is too slow.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q9: Explain the performance difference between C-order and Fortran-order.</strong>
+                    <p><strong>Answer:</strong> C-order stores rows contiguously; Fortran stores columns. Iterating along the <strong>last axis</strong> of C-order arrays is fastest because adjacent elements are in adjacent memory (cache-friendly). For column-heavy operations, Fortran order can be faster. NumPy defaults to C-order. <code>np.asfortranarray()</code> converts.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q10: How would you vectorize a custom function that doesn't have a NumPy equivalent?</strong>
+                    <p><strong>Answer:</strong> Three options in order of speed: (1) <code>np.vectorize(func)</code> — convenience wrapper, NOT actually vectorized (still Python loops), (2) Rewrite using broadcasting + boolean masks, (3) Use <code>@numba.jit(nopython=True)</code> for true compiled speed. Always prefer option 2 when possible.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q11: What's np.random.seed() vs np.random.RandomState vs np.random.default_rng()?</strong>
+                    <p><strong>Answer:</strong> <code>np.random.seed(42)</code>: global state, not thread-safe. <code>RandomState(42)</code>: isolated state, legacy. <code>default_rng(42)</code>: modern (NumPy 1.17+), uses PCG64, thread-safe, better statistical properties. <strong>Always use <code>default_rng()</code></strong> in new code.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q12: How do you compute pairwise distances between all points efficiently?</strong>
+                    <p><strong>Answer:</strong> Use the expansion: ||a-b||² = ||a||² + ||b||² - 2a·b. Code: <code>dists = np.sum(X**2, axis=1)[:,None] + np.sum(X**2, axis=1)[None,:] - 2 * X @ X.T</code>. This avoids the O(n²×d) explicit loop and leverages BLAS matrix multiply. <code>scipy.spatial.distance.cdist</code> wraps this.</p>
+                </div>
+            </div>
+        `
+    },
+
+    "pandas": {
+        concepts: `
+            <div class="section">
+                <h2>Pandas Core Concepts</h2>
+
+                <h3>🧠 DataFrame Internals — What Actually Happens Under the Hood</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ BlockManager Architecture</div>
+                    <div class="box-content">
+                        A DataFrame is NOT a 2D array. Internally, Pandas uses a <strong>BlockManager</strong> — columns of the same dtype are stored together in contiguous NumPy arrays (blocks). When you add a column of a different type, a new block is created. This is why <strong>column operations are fast</strong> (same block) but <strong>row iteration is slow</strong> (crosses blocks).
+                    </div>
+                </div>
+
+                <h3>DataFrame vs Series</h3>
+                <table>
+                    <tr><th>Feature</th><th>Series</th><th>DataFrame</th></tr>
+                    <tr><td>Dimensions</td><td>1D labeled array</td><td>2D labeled table</td></tr>
+                    <tr><td>Analogy</td><td>A column in a spreadsheet</td><td>The entire spreadsheet</td></tr>
+                    <tr><td>Index</td><td>Single index</td><td>Row index + column index</td></tr>
+                    <tr><td>Creation</td><td><code>pd.Series([1,2,3])</code></td><td><code>pd.DataFrame({'a': [1,2]})</code></td></tr>
+                </table>
+
+                <h3>The .loc vs .iloc Decision Tree</h3>
+                <div class="info-box">
+                    <div class="box-title">🎯 Golden Rule</div>
+                    <div class="box-content">
+                        <strong>.loc</strong> = Label-based. Use when you know column/row names. Inclusive on both ends.<br>
+                        <strong>.iloc</strong> = Integer-based. Use when you know positions. Exclusive on end (like Python slicing).<br>
+                        <strong>Gotcha:</strong> <code>df.loc[0:5]</code> includes row 5. <code>df.iloc[0:5]</code> excludes row 5. This trips up everyone.
+                    </div>
+                </div>
+
+                <h3>The SettingWithCopyWarning — Finally Explained</h3>
+                <p>When you chain indexing (<code>df[df.x > 0]['y'] = 5</code>), Pandas may create a temporary copy. Your assignment modifies the copy, not the original. <strong>Fix:</strong> Always use <code>.loc</code>: <code>df.loc[df.x > 0, 'y'] = 5</code>. In Pandas 2.0+, Copy-on-Write mode eliminates this issue entirely.</p>
+
+                <h3>GroupBy Split-Apply-Combine</h3>
+                <p>GroupBy is the most powerful Pandas operation. It follows three steps: (1) <strong>Split</strong> data into groups, (2) <strong>Apply</strong> a function to each group independently, (3) <strong>Combine</strong> results. The key insight: GroupBy is lazy — no computation happens until you call an aggregation.</p>
+
+                <h3>Method Chaining — The Pandas Way</h3>
+                <p>Fluent API style chains multiple operations. More readable, no intermediate variables, and enables <code>.pipe()</code> for custom functions. Use <code>.assign()</code> instead of <code>df['col'] = ...</code> for chainability.</p>
+
+                <h3>Memory Optimization Strategies</h3>
+                <table>
+                    <tr><th>Strategy</th><th>Savings</th><th>When to Use</th></tr>
+                    <tr><td>Category dtype</td><td>90%+</td><td>Columns with few unique strings (gender, country)</td></tr>
+                    <tr><td>Downcast numerics</td><td>50-75%</td><td>int64 to int32/int16 when range allows</td></tr>
+                    <tr><td>Sparse arrays</td><td>80%+</td><td>Columns that are mostly zeros/NaN</td></tr>
+                    <tr><td>Read in chunks</td><td>N/A</td><td>Files larger than RAM</td></tr>
+                </table>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 Pandas Code Examples</h2>
+
+                <h3>Method Chaining — Production Pattern</h3>
+                <div class="code-block">
+<span class="keyword">import</span> pandas <span class="keyword">as</span> pd
+<span class="keyword">import</span> numpy <span class="keyword">as</span> np
+
+<span class="comment"># Method chaining — clean, readable data pipeline</span>
+result = (
+    pd.read_csv(<span class="string">'sales.csv'</span>)
+    .rename(columns=<span class="keyword">str</span>.lower)
+    .assign(
+date=<span class="keyword">lambda</span> df: pd.to_datetime(df[<span class="string">'date'</span>]),
+revenue=<span class="keyword">lambda</span> df: df[<span class="string">'price'</span>] * df[<span class="string">'quantity'</span>],
+month=<span class="keyword">lambda</span> df: df[<span class="string">'date'</span>].dt.month
+    )
+    .query(<span class="string">'revenue > 100'</span>)
+    .groupby(<span class="string">'month'</span>)
+    .agg({<span class="string">'revenue'</span>: [<span class="string">'sum'</span>, <span class="string">'mean'</span>, <span class="string">'count'</span>]})
+    .sort_values((<span class="string">'revenue'</span>, <span class="string">'sum'</span>), ascending=<span class="keyword">False</span>)
+)
+                </div>
+
+                <h3>GroupBy — Beyond Basic Aggregation</h3>
+                <div class="code-block">
+<span class="comment"># Multi-aggregation with named columns</span>
+summary = df.groupby(<span class="string">'category'</span>).agg(
+    total_sales=(<span class="string">'revenue'</span>, <span class="string">'sum'</span>),
+    avg_price=(<span class="string">'price'</span>, <span class="string">'mean'</span>),
+    num_orders=(<span class="string">'order_id'</span>, <span class="string">'nunique'</span>),
+    top_product=(<span class="string">'product'</span>, <span class="keyword">lambda</span> x: x.mode().iloc[<span class="number">0</span>])
+)
+
+<span class="comment"># Transform — apply function, keep original shape</span>
+df[<span class="string">'pct_of_group'</span>] = df.groupby(<span class="string">'category'</span>)[<span class="string">'revenue'</span>].transform(
+    <span class="keyword">lambda</span> x: x / x.sum() * <span class="number">100</span>
+)
+
+<span class="comment"># Filter — keep only groups meeting criteria</span>
+big_groups = df.groupby(<span class="string">'category'</span>).filter(
+    <span class="keyword">lambda</span> g: len(g) >= <span class="number">10</span>
+)
+                </div>
+
+                <h3>Merge Patterns — SQL Joins in Pandas</h3>
+                <div class="code-block">
+<span class="comment"># LEFT JOIN with indicator to find unmatched</span>
+merged = pd.merge(
+    orders, customers,
+    on=<span class="string">'customer_id'</span>,
+    how=<span class="string">'left'</span>,
+    indicator=<span class="keyword">True</span>  <span class="comment"># adds _merge column</span>
+)
+orphan_orders = merged[merged[<span class="string">'_merge'</span>] == <span class="string">'left_only'</span>]
+
+<span class="comment"># Merge on different column names</span>
+result = pd.merge(
+    df1, df2,
+    left_on=<span class="string">'user_id'</span>,
+    right_on=<span class="string">'id'</span>,
+    suffixes=(<span class="string">'_orders'</span>, <span class="string">'_users'</span>)
+)
+                </div>
+
+                <h3>Time Series Operations</h3>
+                <div class="code-block">
+<span class="comment"># Resample — change frequency</span>
+daily = df.set_index(<span class="string">'date'</span>)
+weekly = daily[<span class="string">'revenue'</span>].resample(<span class="string">'W'</span>).sum()
+monthly = daily[<span class="string">'revenue'</span>].resample(<span class="string">'M'</span>).agg([<span class="string">'sum'</span>, <span class="string">'mean'</span>, <span class="string">'count'</span>])
+
+<span class="comment"># Rolling windows — moving averages</span>
+df[<span class="string">'ma_7'</span>] = df[<span class="string">'revenue'</span>].rolling(<span class="number">7</span>).mean()
+df[<span class="string">'ma_30'</span>] = df[<span class="string">'revenue'</span>].rolling(<span class="number">30</span>).mean()
+df[<span class="string">'expanding_mean'</span>] = df[<span class="string">'revenue'</span>].expanding().mean()
+
+<span class="comment"># Shift — create lag features for ML</span>
+df[<span class="string">'prev_day'</span>] = df[<span class="string">'revenue'</span>].shift(<span class="number">1</span>)
+df[<span class="string">'pct_change'</span>] = df[<span class="string">'revenue'</span>].pct_change()
+                </div>
+
+                <h3>Memory Optimization</h3>
+                <div class="code-block">
+<span class="comment"># Reduce DataFrame memory by 70%+</span>
+<span class="keyword">def</span> <span class="function">optimize_dtypes</span>(df):
+    <span class="keyword">for</span> col <span class="keyword">in</span> df.select_dtypes(include=[<span class="string">'int'</span>]).columns:
+df[col] = pd.to_numeric(df[col], downcast=<span class="string">'integer'</span>)
+    <span class="keyword">for</span> col <span class="keyword">in</span> df.select_dtypes(include=[<span class="string">'float'</span>]).columns:
+df[col] = pd.to_numeric(df[col], downcast=<span class="string">'float'</span>)
+    <span class="keyword">for</span> col <span class="keyword">in</span> df.select_dtypes(include=[<span class="string">'object'</span>]).columns:
+<span class="keyword">if</span> df[col].nunique() / len(df) < <span class="number">0.5</span>:
+    df[col] = df[col].astype(<span class="string">'category'</span>)
+    <span class="keyword">return</span> df
+
+<span class="comment"># Before: 800 MB → After: 200 MB</span>
+df = optimize_dtypes(df)
+<span class="function">print</span>(df.memory_usage(deep=<span class="keyword">True</span>).sum() / <span class="number">1e6</span>, <span class="string">"MB"</span>)
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 Pandas Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: What causes SettingWithCopyWarning and how do you fix it?</strong>
+                    <p><strong>Answer:</strong> Chained indexing (<code>df[mask]['col'] = val</code>) may modify a copy, not the original. Fix: use <code>df.loc[mask, 'col'] = val</code>. In Pandas 2.0+, enable Copy-on-Write: <code>pd.options.mode.copy_on_write = True</code>. This makes all indexing return views until modification, then copies automatically.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: How do you handle a 10GB CSV that doesn't fit in memory?</strong>
+                    <p><strong>Answer:</strong> 5 strategies: (1) <code>pd.read_csv(chunksize=50000)</code> — process in batches, (2) <code>usecols=['needed_cols']</code> — load only what you need, (3) <code>dtype={'col': 'int32'}</code> — use smaller types, (4) Dask — lazy Pandas-like API, (5) DuckDB — SQL on CSV files with zero memory overhead. Polars is also excellent for out-of-core processing.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: Explain the difference between merge, join, and concat.</strong>
+                    <p><strong>Answer:</strong> <code>merge()</code>: SQL-style joins on columns (most flexible). <code>join()</code>: joins on index (convenience wrapper). <code>concat()</code>: stack DataFrames along axis (union/append). Use <code>merge</code> for column-based joins, <code>concat</code> for stacking rows/columns. <code>join</code> is just <code>merge</code> with index.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: What's the difference between apply, map, and applymap?</strong>
+                    <p><strong>Answer:</strong> <code>map()</code>: Series only, element-wise. <code>apply()</code>: works on rows/columns of DataFrame or elements of Series. <code>applymap()</code>: element-wise on entire DataFrame (renamed to <code>map()</code> in Pandas 2.1). <strong>Performance tip:</strong> all three are slow — prefer vectorized operations whenever possible.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: How does GroupBy transform differ from agg?</strong>
+                    <p><strong>Answer:</strong> <code>agg()</code> reduces — returns one value per group (changes shape). <code>transform()</code> broadcasts — returns same shape as input. Example: <code>df.groupby('dept')['salary'].transform('mean')</code> fills every row with its department's average salary, while <code>.agg('mean')</code> returns one row per department.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q6: What is MultiIndex and when would you use it?</strong>
+                    <p><strong>Answer:</strong> Hierarchical indexing — multiple levels of row/column labels. Use for: pivot table results, panel data (entity + time), groupby with multiple keys. Access with <code>.xs()</code> or tuple slicing: <code>df.loc[('A', 2023)]</code>. Convert back with <code>.reset_index()</code>.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q7: How do you handle missing data in production?</strong>
+                    <p><strong>Answer:</strong> Strategy depends on context: (1) <code>dropna(thresh=N)</code> — keep rows with at least N non-null values, (2) <code>fillna(method='ffill')</code> — forward fill for time series, (3) <code>fillna(df.median())</code> — impute with median for ML, (4) <code>interpolate(method='time')</code> — time-weighted interpolation. Always check <code>df.isna().sum()</code> first.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q8: What is the category dtype and when should you use it?</strong>
+                    <p><strong>Answer:</strong> Stores repeated strings as integer codes + lookup table. Use when a column has few unique values relative to total rows (e.g., 50 countries in 1M rows). Benefits: 90%+ memory savings, faster groupby. Gotcha: operations that create new values (like string concatenation) convert back to object dtype.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q9: Pandas vs Polars vs DuckDB — when to use each?</strong>
+                    <p><strong>Answer:</strong> <strong>Pandas:</strong> best ecosystem, most tutorials, sufficient for &lt;1GB. <strong>Polars:</strong> 10-100x faster, lazy evaluation, multi-threaded, no GIL issues — use for 1-100GB. <strong>DuckDB:</strong> SQL interface, out-of-core, great for analytical queries — use when SQL is more natural or data exceeds RAM.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q10: How do you create lag features and rolling statistics for time series ML?</strong>
+                    <p><strong>Answer:</strong> <code>df['lag_1'] = df['value'].shift(1)</code> for lag features. <code>df['rolling_mean_7'] = df['value'].rolling(7).mean()</code> for rolling stats. <code>df['ewm_mean'] = df['value'].ewm(span=7).mean()</code> for exponential weighted. Always sort by time first, use <code>groupby().shift()</code> for multi-entity data to avoid data leakage.</p>
+                </div>
+            </div>
+        `
+    },
+
+    "visualization": {
+        concepts: `
+            <div class="section">
+                <h2>Data Visualization Principles</h2>
+
+                <h3>🧠 The Grammar of Graphics</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ Every Chart = Data + Aesthetics + Geometry</div>
+                    <div class="box-content">
+                        Leland Wilkinson's framework: <strong>Data</strong> (what to plot), <strong>Aesthetics</strong> (x, y, color, size mappings), <strong>Geometry</strong> (bars, lines, points), <strong>Statistics</strong> (binning, smoothing), <strong>Coordinates</strong> (cartesian, polar), <strong>Facets</strong> (subplots). Seaborn and Plotly follow this pattern. Understanding it means you can build any chart.
+                    </div>
+                </div>
+
+                <h3>Choosing the Right Chart</h3>
+                <table>
+                    <tr><th>Question</th><th>Chart Type</th><th>Library</th></tr>
+                    <tr><td>Distribution of one variable?</td><td>Histogram, KDE, Box plot</td><td>Seaborn</td></tr>
+                    <tr><td>Relationship between two variables?</td><td>Scatter, Hexbin, Regression</td><td>Seaborn/Plotly</td></tr>
+                    <tr><td>Comparison across categories?</td><td>Bar, Grouped bar, Violin</td><td>Seaborn</td></tr>
+                    <tr><td>Trend over time?</td><td>Line chart, Area chart</td><td>Plotly/Matplotlib</td></tr>
+                    <tr><td>Correlation matrix?</td><td>Heatmap</td><td>Seaborn</td></tr>
+                    <tr><td>Part of whole?</td><td>Pie, Treemap, Sunburst</td><td>Plotly</td></tr>
+                    <tr><td>Geographic data?</td><td>Choropleth, Scatter mapbox</td><td>Plotly/Folium</td></tr>
+                </table>
+
+                <h3>Matplotlib Architecture</h3>
+                <p>Three layers: <strong>Backend</strong> (rendering engine), <strong>Artist</strong> (everything drawn), <strong>Scripting</strong> (pyplot). The Figure contains Axes (subplots). Each Axes has Axis objects. Always prefer the object-oriented API (<code>fig, ax = plt.subplots()</code>) over pyplot for production code.</p>
+
+                <h3>Seaborn — Statistical Visualization</h3>
+                <p>Built on Matplotlib with statistical intelligence. Three API levels: <strong>Figure-level</strong> (relplot, catplot, displot — create their own figure), <strong>Axes-level</strong> (scatterplot, boxplot — plot on existing axes), <strong>Objects API</strong> (new in 0.12, more composable).</p>
+
+                <h3>Plotly — Interactive Dashboards</h3>
+                <p>JavaScript-powered charts with hover, zoom, selection. <code>plotly.express</code> for quick plots, <code>plotly.graph_objects</code> for full control. Integrates with Dash for production dashboards. Supports 3D plots, maps, and animations.</p>
+
+                <h3>Common Visualization Mistakes</h3>
+                <ul>
+                    <li>Truncated y-axis making differences look larger than they are</li>
+                    <li>Using pie charts for more than 5 categories</li>
+                    <li>Rainbow colormaps (poor for colorblind users) — use viridis/cividis</li>
+                    <li>Overplotting in scatter plots — use alpha, hexbin, or KDE instead</li>
+                    <li>Missing axis labels, titles, and units</li>
+                </ul>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 Visualization Code Examples</h2>
+
+                <h3>Matplotlib — Publication-Quality Figures</h3>
+                <div class="code-block">
+<span class="keyword">import</span> matplotlib.pyplot <span class="keyword">as</span> plt
+<span class="keyword">import</span> numpy <span class="keyword">as</span> np
+
+<span class="comment"># Professional figure setup</span>
+fig, axes = plt.subplots(<span class="number">1</span>, <span class="number">3</span>, figsize=(<span class="number">15</span>, <span class="number">5</span>))
+
+<span class="comment"># Subplot 1: Distribution</span>
+data = np.random.randn(<span class="number">1000</span>)
+axes[<span class="number">0</span>].hist(data, bins=<span class="number">30</span>, alpha=<span class="number">0.7</span>, color=<span class="string">'steelblue'</span>, edgecolor=<span class="string">'white'</span>)
+axes[<span class="number">0</span>].set_title(<span class="string">'Distribution'</span>, fontsize=<span class="number">14</span>, fontweight=<span class="string">'bold'</span>)
+axes[<span class="number">0</span>].axvline(data.mean(), color=<span class="string">'red'</span>, linestyle=<span class="string">'--'</span>, label=<span class="string">'Mean'</span>)
+
+<span class="comment"># Subplot 2: Scatter with colormap</span>
+x, y = np.random.randn(<span class="number">2</span>, <span class="number">100</span>)
+scatter = axes[<span class="number">1</span>].scatter(x, y, c=y, cmap=<span class="string">'viridis'</span>, alpha=<span class="number">0.7</span>)
+plt.colorbar(scatter, ax=axes[<span class="number">1</span>])
+
+<span class="comment"># Subplot 3: Line with confidence interval</span>
+x = np.linspace(<span class="number">0</span>, <span class="number">10</span>, <span class="number">100</span>)
+y = np.sin(x)
+axes[<span class="number">2</span>].plot(x, y, <span class="string">'b-'</span>, linewidth=<span class="number">2</span>)
+axes[<span class="number">2</span>].fill_between(x, y-<span class="number">0.3</span>, y+<span class="number">0.3</span>, alpha=<span class="number">0.2</span>)
+
+plt.tight_layout()
+plt.savefig(<span class="string">'figure.png'</span>, dpi=<span class="number">300</span>, bbox_inches=<span class="string">'tight'</span>)
+                </div>
+
+                <h3>Seaborn — Statistical Plots</h3>
+                <div class="code-block">
+<span class="keyword">import</span> seaborn <span class="keyword">as</span> sns
+
+<span class="comment"># Pair plot — see all relationships at once</span>
+sns.pairplot(df, hue=<span class="string">'target'</span>, diag_kind=<span class="string">'kde'</span>,
+     plot_kws={<span class="string">'alpha'</span>: <span class="number">0.6</span>})
+
+<span class="comment"># Correlation heatmap with annotations</span>
+fig, ax = plt.subplots(figsize=(<span class="number">10</span>, <span class="number">8</span>))
+mask = np.triu(np.ones_like(df.corr(), dtype=<span class="keyword">bool</span>))
+sns.heatmap(df.corr(), mask=mask, annot=<span class="keyword">True</span>, fmt=<span class="string">'.2f'</span>,
+    cmap=<span class="string">'RdBu_r'</span>, center=<span class="number">0</span>, square=<span class="keyword">True</span>)
+
+<span class="comment"># Violin + strip plot — distribution + individual points</span>
+fig, ax = plt.subplots(figsize=(<span class="number">10</span>, <span class="number">6</span>))
+sns.violinplot(x=<span class="string">'category'</span>, y=<span class="string">'value'</span>, data=df, inner=<span class="keyword">None</span>, alpha=<span class="number">0.3</span>)
+sns.stripplot(x=<span class="string">'category'</span>, y=<span class="string">'value'</span>, data=df, size=<span class="number">3</span>, jitter=<span class="keyword">True</span>)
+                </div>
+
+                <h3>Plotly — Interactive Visualizations</h3>
+                <div class="code-block">
+<span class="keyword">import</span> plotly.express <span class="keyword">as</span> px
+<span class="keyword">import</span> plotly.graph_objects <span class="keyword">as</span> go
+
+<span class="comment"># Interactive scatter with hover info</span>
+fig = px.scatter(df, x=<span class="string">'feature1'</span>, y=<span class="string">'feature2'</span>,
+         color=<span class="string">'target'</span>, size=<span class="string">'importance'</span>,
+         hover_data=[<span class="string">'name'</span>],
+         title=<span class="string">'Feature Analysis'</span>)
+
+<span class="comment"># Animated chart — data over time</span>
+fig = px.scatter(df, x=<span class="string">'gdp'</span>, y=<span class="string">'life_exp'</span>,
+         animation_frame=<span class="string">'year'</span>,
+         size=<span class="string">'population'</span>, color=<span class="string">'continent'</span>,
+         hover_name=<span class="string">'country'</span>,
+         size_max=<span class="number">60</span>)
+fig.show()
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 Visualization Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: When would you use Matplotlib vs Seaborn vs Plotly?</strong>
+                    <p><strong>Answer:</strong> <strong>Matplotlib:</strong> full control, publication figures, custom layouts. <strong>Seaborn:</strong> statistical plots, quick EDA, beautiful defaults. <strong>Plotly:</strong> interactive dashboards, web apps, 3D/maps. Rule of thumb: Seaborn for EDA, Matplotlib for papers, Plotly for stakeholders.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: How do you visualize high-dimensional data?</strong>
+                    <p><strong>Answer:</strong> (1) PCA/t-SNE/UMAP to 2D then scatter plot, (2) Pair plots for feature pairs, (3) Parallel coordinates, (4) Heatmap of correlation matrix, (5) SHAP summary plots for feature importance. For 100+ features, start with correlation heatmap to identify groups.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: How do you handle overplotting in scatter plots?</strong>
+                    <p><strong>Answer:</strong> (1) Reduce alpha: <code>alpha=0.1</code>, (2) Hexbin plots: <code>plt.hexbin()</code>, (3) 2D KDE: <code>sns.kdeplot()</code>, (4) Random sampling for display, (5) Datashader for millions of points. The key is encoding density visually.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: What makes a good visualization for non-technical stakeholders?</strong>
+                    <p><strong>Answer:</strong> (1) Clear title stating the conclusion, not the method, (2) Minimal chart junk — remove gridlines, borders, legends when obvious, (3) Annotate key data points directly, (4) Use color consistently and meaningfully, (5) Tell a story — what action should they take? Keep it to one insight per chart.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: Explain the Figure and Axes API in Matplotlib.</strong>
+                    <p><strong>Answer:</strong> <code>Figure</code> is the entire window/canvas. <code>Axes</code> is a single plot area within the figure. <code>fig, axes = plt.subplots(2,2)</code> creates 4 plots. Always use the OO API for production — <code>ax.plot()</code> not <code>plt.plot()</code>. This gives you explicit control over which subplot you're modifying.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q6: How do you make accessible visualizations?</strong>
+                    <p><strong>Answer:</strong> (1) Use colorblind-safe palettes (viridis, cividis), (2) Don't rely on color alone — add shapes/patterns, (3) Sufficient contrast ratios, (4) Alt text for web charts, (5) Large enough font sizes (12pt minimum). Test with colorblindness simulators.</p>
+                </div>
+            </div>
+        `
+    },
+
+    "advanced-python": {
+        concepts: `
+            <div class="section">
+                <h2>Advanced Python Engineering</h2>
+
+                <h3>🧠 Professional Decorators — Beyond "Hello World"</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ Closures & Wrappers</div>
+                    <div class="box-content">
+                        Decorators are higher-order functions that modify behavior without changing code. Professional implementation tools: Use <code>functools.wraps</code> to preserve metadata (name, docstring), handle both positional and keyword arguments, and support decorators with parameters (factories).
+                    </div>
+                </div>
+
+                <h3>Context Managers (The Pythonic Way)</h3>
+                <p>Managing resources (files, locks, DB connections) reliably. <code>with</code> blocks guarantee cleanup even on errors. Implementation options: (1) Class-based with <code>__enter__</code> and <code>__exit__</code>, (2) Function-based with <code>@contextlib.contextmanager</code> and <code>yield</code>.</p>
+
+                <h3>Iterators & Generators — Memory Efficiency</h3>
+                <div class="callout tip">
+                    <div class="callout-title">💡 Why Generators?</div>
+                    Generators use <strong>lazy evaluation</strong>. They produce values one at a time using <code>yield</code>, using constant memory O(1) regardless of dataset size. Ideal for processing huge datasets or infinite streams.
+                </div>
+
+                <h3>Object-Oriented Design for Data Science</h3>
+                <table>
+                    <tr><th>Concept</th><th>Data Science Use Case</th></tr>
+                    <tr><td>Inheritance</td><td>BaseModel → LinearModel → LogisticRegression</td></tr>
+                    <tr><td>Abstract Base Classes</td><td>Defining mandatory methods like <code>fit()</code>/<code>predict()</code></td></tr>
+                    <tr><td>Properties</td><td>Validating input parameters (e.g., learning rate > 0)</td></tr>
+                    <tr><td>Dunder Methods</td><td><code>__call__</code> for making models callable, <code>__getitem__</code> for datasets</td></tr>
+                </table>
+
+                <h3>Metaclasses & Dynamic Programming</h3>
+                <p>Classes are objects too! Classes define how instances behave; <strong>Metaclasses</strong> define how classes behave. Useful for registry patterns (auto-registering models) or enforcement of interface standards across a codebase. <code>type</code> is the default metaclass.</p>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 Advanced Python Code Examples</h2>
+
+                <h3>The Production-Grade Decorator</h3>
+                <div class="code-block">
+<span class="keyword">from</span> functools <span class="keyword">import</span> wraps
+<span class="keyword">import</span> time
+<span class="keyword">import</span> logging
+
+<span class="keyword">def</span> <span class="function">timer_with_logging</span>(logger):
+    <span class="keyword">def</span> <span class="function">decorator</span>(func):
+<span class="preprocessor">@wraps</span>(func)
+<span class="keyword">def</span> <span class="function">wrapper</span>(*args, **kwargs):
+    start = time.perf_counter()
+    <span class="keyword">try</span>:
+        result = func(*args, **kwargs)
+        <span class="keyword">return</span> result
+    <span class="keyword">finally</span>:
+        duration = time.perf_counter() - start
+        logger.info(<span class="string">f"Executed {func.__name__} in {duration:.4f}s"</span>)
+<span class="keyword">return</span> wrapper
+    <span class="keyword">return</span> decorator
+
+<span class="preprocessor">@timer_with_logging</span>(logging.getLogger(__name__))
+<span class="keyword">def</span> <span class="function">train_model</span>(X, y):
+    <span class="comment"># Simulate training</span>
+    time.sleep(<span class="number">1.5</span>)
+                </div>
+
+                <h3>Custom Context Manager for GPU Lock</h3>
+                <div class="code-block">
+<span class="keyword">from</span> contextlib <span class="keyword">import</span> contextmanager
+
+<span class="preprocessor">@contextmanager</span>
+<span class="keyword">def</span> <span class="function">gpu_lock</span>(device_id):
+    <span class="function">print</span>(<span class="string">f"Acquiring lock for GPU {device_id}"</span>)
+    <span class="keyword">try</span>:
+<span class="keyword">yield</span> <span class="string">f"GPU_{device_id}_CONTEXT"</span>
+    <span class="keyword">finally</span>:
+<span class="function">print</span>(<span class="string">f"Releasing GPU {device_id}"</span>)
+
+<span class="keyword">with</span> gpu_lock(<span class="number">0</span>) <span class="keyword">as</span> ctx:
+    <span class="function">print</span>(<span class="string">f"Training with {ctx}"</span>)
+                </div>
+
+                <h3>ABC & Protocol — Enforcing Interfaces</h3>
+                <div class="code-block">
+<span class="keyword">from</span> abc <span class="keyword">import</span> ABC, abstractmethod
+<span class="keyword">from</span> typing <span class="keyword">import</span> Protocol
+
+<span class="keyword">class</span> <span class="class">Predictor</span>(Protocol):
+    <span class="keyword">def</span> <span class="function">predict</span>(self, X: np.ndarray) -> np.ndarray: ...
+
+<span class="keyword">class</span> <span class="class">BaseModel</span>(ABC):
+    <span class="preprocessor">@abstractmethod</span>
+    <span class="keyword">def</span> <span class="function">fit</span>(self, X, y):
+<span class="keyword">pass</span>
+
+<span class="keyword">class</span> <span class="class">MyModel</span>(BaseModel):
+    <span class="keyword">def</span> <span class="function">fit</span>(self, X, y):
+<span class="function">print</span>(<span class="string">"Fitting..."</span>)
+    
+    <span class="keyword">def</span> <span class="function">predict</span>(self, X):
+<span class="keyword">return</span> X @ self.weights
+                </div>
+
+                <h3>Functional Pipelines with itertools</h3>
+                <div class="code-block">
+<span class="keyword">import</span> itertools
+
+<span class="comment"># Process infinite stream in batches</span>
+<span class="keyword">def</span> <span class="function">get_batches</span>(stream, size):
+    it = <span class="keyword">iter</span>(stream)
+    <span class="keyword">while</span> <span class="keyword">True</span>:
+batch = <span class="keyword">list</span>(itertools.islice(it, size))
+<span class="keyword">if</span> <span class="keyword">not</span> batch: <span class="keyword">break</span>
+<span class="keyword">yield</span> batch
+
+<span class="comment"># Data pipeline: chain -> filter -> map -> batch</span>
+processed = get_batches(
+    <span class="keyword">map</span>(<span class="keyword">str</span>.upper, <span class="keyword">filter</span>(<span class="keyword">lambda</span> x: <span class="keyword">len</span>(x) > <span class="number">5</span>, stream)),
+    batch_size=<span class="number">64</span>
+)
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 Advanced Python Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: What's the difference between <code>__str__</code> and <code>__repr__</code>?</strong>
+                    <p><strong>Answer:</strong> <code>__str__</code> is for end-users (informal, readable). <code>__repr__</code> is for developers (detailed, unambiguous, "eval-able"). For data science, always implement <code>__repr__</code> for models to show hyperparameters when printed.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: Explain Python's MRO (Method Resolution Order).</strong>
+                    <p><strong>Answer:</strong> C3 Linearization algorithm. It determines the search order for methods in multiple inheritance. Access it via <code>ClassName.mro()</code>. Python ensures that bases are searched after their subclasses and the order of bases in the class definition is preserved.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: How do you implement a Singleton pattern in Python?</strong>
+                    <p><strong>Answer:</strong> Several ways: (1) Overriding <code>__new__</code>, (2) Using a Metaclass (cleanest), (3) Module-level variables (simplest). Example with Metaclass: <code>class Singleton(type): ...</code> then <code>class Database(metaclass=Singleton): ...</code>.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: Decorators: How to handle <code>@timer(unit='ms')</code>?</strong>
+                    <p><strong>Answer:</strong> This is a decorator factory. You need three levels of functions: (1) Factory takes parameters and returns a decorator, (2) Decorator takes the function and returns a wrapper, (3) Wrapper takes args/kwargs and executes the logic.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: What are <code>*args</code> and <code>**kwargs</code> and when to use them?</strong>
+                    <p><strong>Answer:</strong> <code>*args</code> collects positional arguments into a tuple. <code>**kwargs</code> collects keyword arguments into a dictionary. Crucial for wrapping functions, implementing decorators, or creating flexible API interfaces like Scikit-learn's <code>__init__(**params)</code>.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q6: Explain the difference between <code>is</code> and <code>==</code>.</strong>
+                    <p><strong>Answer:</strong> <code>==</code> checks for <strong>equality</strong> (values are the same). <code>is</code> checks for <strong>identity</strong> (objects occupy the same memory address). Use <code>is</code> for Singletons like <code>None</code> or <code>bool</code>. Example: <code>a = [1]; b = [1]; a == b is True, a is b is False</code>.</p>
+                </div>
+            </div>
+        `
+    },
+    "sklearn": {
+        concepts: `
+            <div class="section">
+                <h2>Scikit-learn & ML Engineering</h2>
+
+                <h3>🧠 The Estimator API — Unified Interface</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ Consistency is King</div>
+                    <div class="box-content">
+                        Scikit-learn's brilliance lies in its interface consistency. <strong>Estimators</strong> have <code>fit(X, y)</code>, <strong>Transformers</strong> have <code>transform(X)</code>, and <strong>Predictors</strong> have <code>predict(X)</code>. This design allows for seamless swapping of models and preprocessing steps.
+                    </div>
+                </div>
+
+                <h3>Production Pipelines — Avoiding Data Leakage</h3>
+                <p>A <code>Pipeline</code> bundles preprocessing and modeling into a single object. <strong>Crucial Benefit:</strong> It ensures that transformers are fit <strong>only on the training fold</strong> during cross-validation, preventing information from the validation set (like mean/std) from "leaking" into training. Always use pipelines in production.</p>
+
+                <h3>ColumnTransformer — Heterogeneous Data</h3>
+                <p>Most real-world data is a mix of types. <code>ColumnTransformer</code> allows you to apply different preprocessing pipelines to different columns (e.g., OneHotEncode categories, Scale numerics) and then concatenate them for the model.</p>
+
+                <h3>Model Evaluation Beyond Accuracy</h3>
+                <table>
+                    <tr><th>Metric</th><th>Use Case</th><th>Scikit-learn Name</th></tr>
+                    <tr><td>F1-Score</td><td>Imbalanced classification (Precision-Recall balance)</td><td><code>f1_score</code></td></tr>
+                    <tr><td>ROC-AUC</td><td>Probability ranking / classifier quality</td><td><code>roc_auc_score</code></td></tr>
+                    <tr><td>MSE / MAE</td><td>Regression error magnitude</td><td><code>mean_squared_error</code></td></tr>
+                    <tr><td>R2 Score</td><td>Variance explained by model</td><td><code>r2_score</code></td></tr>
+                    <tr><td>Log Loss</td><td>Probabilistic predictions confidence</td><td><code>log_loss</code></td></tr>
+                </table>
+
+                <h3>Cross-Validation Strategies</h3>
+                <p>(1) <strong>K-Fold:</strong> standard, (2) <strong>Stratified K-Fold:</strong> for imbalanced data, (3) <strong>TimeSeriesSplit:</strong> for temporal data (preventing looking into the future), (4) <strong>GroupKFold:</strong> to ensure samples from the same group aren't split across train/test.</p>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 Scikit-learn Code Examples</h2>
+
+                <h3>The Modular Pipeline Pattern</h3>
+                <div class="code-block">
+<span class="keyword">from</span> sklearn.pipeline <span class="keyword">import</span> Pipeline
+<span class="keyword">from</span> sklearn.compose <span class="keyword">import</span> ColumnTransformer
+<span class="keyword">from</span> sklearn.preprocessing <span class="keyword">import</span> StandardScaler, OneHotEncoder
+<span class="keyword">from</span> sklearn.ensemble <span class="keyword">import</span> RandomForestClassifier
+
+<span class="comment"># Define preprocessing for different feature types</span>
+numeric_transformer = Pipeline(steps=[
+    (<span class="string">'scaler'</span>, StandardScaler())
+])
+
+categorical_transformer = Pipeline(steps=[
+    (<span class="string">'onehot'</span>, OneHotEncoder(handle_unknown=<span class="string">'ignore'</span>))
+])
+
+preprocessor = ColumnTransformer(transformers=[
+    (<span class="string">'num'</span>, numeric_transformer, numeric_features),
+    (<span class="string">'cat'</span>, categorical_transformer, categorical_features)
+])
+
+<span class="comment"># Create full pipeline</span>
+clf = Pipeline(steps=[
+    (<span class="string">'preprocessor'</span>, preprocessor),
+    (<span class="string">'classifier'</span>, RandomForestClassifier(n_estimators=<span class="number">100</span>))
+])
+
+<span class="comment"># Entire workflow in one object</span>
+clf.fit(X_train, y_train)
+preds = clf.predict(X_test)
+                </div>
+
+                <h3>Custom Transformers — Industry Standard</h3>
+                <div class="code-block">
+<span class="keyword">from</span> sklearn.base <span class="keyword">import</span> BaseEstimator, TransformerMixin
+
+<span class="keyword">class</span> <span class="class">LogTransformer</span>(BaseEstimator, TransformerMixin):
+    <span class="keyword">def</span> <span class="function">__init__</span>(self, columns=None):
+self.columns = columns
+
+    <span class="keyword">def</span> <span class="function">fit</span>(self, X, y=None):
+<span class="keyword">return</span> self
+
+    <span class="keyword">def</span> <span class="function">transform</span>(self, X):
+X_out = X.copy()
+<span class="keyword">for</span> col <span class="keyword">in</span> self.columns:
+    X_out[col] = np.log1p(X_out[col])
+<span class="keyword">return</span> X_out
+
+<span class="comment"># Now usable in any Pipeline</span>
+pipeline = Pipeline([
+    (<span class="string">'log'</span>, LogTransformer(columns=[<span class="string">'revenue'</span>])),
+    (<span class="string">'model'</span>, LinearRegression())
+])
+                </div>
+
+                <h3>Hyperparameter Optimization (Advanced)</h3>
+                <div class="code-block">
+<span class="keyword">from</span> sklearn.model_selection <span class="keyword">import</span> RandomizedSearchCV
+<span class="keyword">from</span> scipy.stats <span class="keyword">import</span> randint
+
+param_dist = {
+    <span class="string">'classifier__n_estimators'</span>: randint(<span class="number">50</span>, <span class="number">500</span>),
+    <span class="string">'classifier__max_depth'</span>: [<span class="number">5</span>, <span class="number">10</span>, <span class="number">20</span>, <span class="keyword">None</span>],
+    <span class="string">'preprocessor__num__scaler'</span>: [StandardScaler(), RobustScaler()]
+}
+
+search = RandomizedSearchCV(clf, param_dist, n_iter=<span class="number">50</span>, cv=<span class="number">3</span>)
+search.fit(X_train, y_train)
+
+<span class="function">print</span>(<span class="string">f"Best Score: {search.best_score_}"</span>)
+<span class="function">print</span>(<span class="string">f"Best Params: {search.best_params_}"</span>)
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 Scikit-learn Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: Why use <code>fit_transform</code> on train but only <code>transform</code> on test?</strong>
+                    <p><strong>Answer:</strong> To prevent <strong>Data Leakage</strong>. Mean/variance for scaling must be learned ONLY from training data. Applying <code>fit</code> to test data uses future information about the test distribution, leading to overly optimistic results.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: When would you use <code>predict_proba</code> instead of <code>predict</code>?</strong>
+                    <p><strong>Answer:</strong> When you need the <strong>uncertainty</strong> of the model or need to adjust the decision threshold. For cost-sensitive problems (e.g., fraud), you might flag anything with >10% probability, rather than the default 50%.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: Explain the bias-variance tradeoff in terms of Complexity.</strong>
+                    <p><strong>Answer:</strong> <strong>Underfitting</strong> (High Bias) happens when the model is too simple (e.g., linear on non-linear data). <strong>Overfitting</strong> (High Variance) happens when the model is too complex and captures noise. Regularization (Alpha/C parameters) is used to find the "sweet spot".</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: How do you handle imbalanced datasets in Sklearn?</strong>
+                    <p><strong>Answer:</strong> (1) <code>class_weight='balanced'</code> inside estimators, (2) Stratified cross-validation, (3) Focus on Precision-Recall curves/AUC instead of Accuracy, (4) Resampling (using <code>imblearn</code> library which is Sklearn-compatible).</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: What's the difference between L1 (Lasso) and L2 (Ridge) regularization?</strong>
+                    <p><strong>Answer:</strong> L1 adds absolute value penalty; it results in <strong>sparse models</strong> (coefficents become exactly zero), effectively performing feature selection. L2 adds squared penalty; it shrinks coefficients towards zero but rarely to zero, good for handling multicollinearity.</p>
+                </div>
+            </div>
+        `
+    },
+    "pytorch": {
+        concepts: `
+            <div class="section">
+                <h2>PyTorch & Deep Learning Primitives</h2>
+
+                <h3>🧠 Computational Graphs & Autograd</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ Dynamic vs Static</div>
+                    <div class="box-content">
+                        PyTorch uses <strong>Dynamic Computational Graphs</strong> (Define-by-Run). The graph is built on-the-fly as operations are performed. <code>Autograd</code> tracks every operation on tensors with <code>requires_grad=True</code> and automatically computes gradients using the chain rule during <code>.backward()</code>.
+                    </div>
+                </div>
+
+                <h3>Tensors — The Heart of PyTorch</h3>
+                <p>Tensors are multi-dimensional arrays (like NumPy) but with two superpowers: (1) <strong>GPU Acceleration</strong> (move to 'cuda' or 'mps'), (2) <strong>Automatic Differentiation</strong>. Bridging to NumPy is zero-copy for CPU tensors.</p>
+
+                <h3>Modular Architecture (nn.Module)</h3>
+                <p>Every model in PyTorch inherits from <code>nn.Module</code>. You define parameters/layers in <code>__init__</code> and the forward pass logic in <code>forward()</code>. This design promotes recursive composition — models can contain other modules.</p>
+
+                <h3>Data Engineering: Dataset & DataLoader</h3>
+                <table>
+                    <tr><th>Component</th><th>Responsibility</th></tr>
+                    <tr><td>Dataset</td><td>Defines HOW to load a single sample (__getitem__) and total count (__len__)</td></tr>
+                    <tr><td>DataLoader</td><td>Handles batching, shuffling, multi-process loading, and memory pinning</td></tr>
+                    <tr><td>Transforms</td><td>On-the-fly augmentation (cropping, flipping, normalizing)</td></tr>
+                </table>
+
+                <h3>The Optimization Loop Essentials</h3>
+                <p>Standard pattern: (1) Zero gradients, (2) Forward pass, (3) Compute Loss, (4) Backward pass (backprop), (5) Optimizer step. Don't forget <code>model.train()</code> and <code>model.eval()</code> to toggle dropout and batch norm behavior.</p>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 PyTorch Code Examples</h2>
+
+                <h3>The Ultimate Training Boilerplate</h3>
+                <div class="code-block">
+<span class="keyword">import</span> torch
+<span class="keyword">import</span> torch.nn <span class="keyword">as</span> nn
+<span class="keyword">import</span> torch.optim <span class="keyword">as</span> optim
+
+<span class="comment"># Device agnostic code</span>
+device = torch.device(<span class="string">"cuda"</span> <span class="keyword">if</span> torch.cuda.is_available() <span class="keyword">else</span> <span class="string">"cpu"</span>)
+
+<span class="comment"># 1. Define Architecture</span>
+<span class="keyword">class</span> <span class="class">SimpleNet</span>(nn.Module):
+    <span class="keyword">def</span> <span class="function">__init__</span>(self):
+super().__init__()
+self.flatten = nn.Flatten()
+self.fc = nn.Sequential(
+    nn.Linear(<span class="number">28</span>*<span class="number">28</span>, <span class="number">512</span>),
+    nn.ReLU(),
+    nn.Dropout(<span class="number">0.2</span>),
+    nn.Linear(<span class="number">512</span>, <span class="number">10</span>)
+)
+
+    <span class="keyword">def</span> <span class="function">forward</span>(self, x):
+x = self.flatten(x)
+<span class="keyword">return</span> self.fc(x)
+
+model = SimpleNet().to(device)
+optimizer = optim.Adam(model.parameters(), lr=<span class="number">1e-3</span>)
+criterion = nn.CrossEntropyLoss()
+
+<span class="comment"># 2. Training Loop</span>
+model.train()
+<span class="keyword">for</span> batch, (X, y) <span class="keyword">in</span> <span class="keyword">enumerate</span>(dataloader):
+    X, y = X.to(device), y.to(device)
+    
+    <span class="comment"># Zero -> Forward -> Backward -> Step</span>
+    optimizer.zero_grad()
+    pred = model(X)
+    loss = criterion(pred, y)
+    loss.backward()
+    optimizer.step()
+                </div>
+
+                <h3>Custom Dataset Implementation</h3>
+                <div class="code-block">
+<span class="keyword">from</span> torch.utils.data <span class="keyword">import</span> Dataset
+
+<span class="keyword">class</span> <span class="class">ImageDataset</span>(Dataset):
+    <span class="keyword">def</span> <span class="function">__init__</span>(self, annotations_file, img_dir, transform=None):
+self.img_labels = pd.read_csv(annotations_file)
+self.img_dir = img_dir
+self.transform = transform
+
+    <span class="keyword">def</span> <span class="function">__len__</span>(self):
+<span class="keyword">return</span> len(self.img_labels)
+
+    <span class="keyword">def</span> <span class="function">__getitem__</span>(self, idx):
+img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, <span class="number">0</span>])
+image = read_image(img_path)
+label = self.img_labels.iloc[idx, <span class="number">1</span>]
+<span class="keyword">if</span> self.transform:
+    image = self.transform(image)
+<span class="keyword">return</span> image, label
+                </div>
+
+                <h3>Transfer Learning — Freezing Layers</h3>
+                <div class="code-block">
+<span class="keyword">from</span> torchvision <span class="keyword">import</span> models
+
+model = models.resnet18(pretrained=<span class="keyword">True</span>)
+
+<span class="comment"># Freeze all weights</span>
+<span class="keyword">for</span> param <span class="keyword">in</span> model.parameters():
+    param.requires_grad = <span class="keyword">False</span>
+
+<span class="comment"># Replace final head (newly initialized, so requires_grad=True)</span>
+num_ftrs = model.fc.in_features
+model.fc = nn.Linear(num_ftrs, <span class="number">2</span>) 
+
+model = model.to(device)
+<span class="comment"># Only model.fc.parameters() will be updated</span>
+optimizer = optim.SGD(model.fc.parameters(), lr=<span class="number">0.001</span>)
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 PyTorch Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: Why is <code>optimizer.zero_grad()</code> necessary?</strong>
+                    <p><strong>Answer:</strong> By default, PyTorch <strong>accumulates gradients</strong> on every <code>.backward()</code> call. This is useful for RNNs or training with effectively larger batch sizes than memory allows. If you don't zero them out, gradients from previous batches will influence the current update, leading to incorrect training.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: What is the difference between <code>model.train()</code> and <code>model.eval()</code>?</strong>
+                    <p><strong>Answer:</strong> They set the mode for specific layers. <code>.train()</code> enables <strong>Dropout</strong> and <strong>Batch Normalization</strong> (calculates stats for current batch). <code>.eval()</code> disables dropout and uses running averages for Batch Norm. Forgetting <code>.eval()</code> during testing will lead to inconsistent/bad predictions.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: Explain the role of <code>torch.no_grad()</code>.</strong>
+                    <p><strong>Answer:</strong> It's a context manager that disables gradient calculation. Use it during <strong>inference</strong> or <strong>validation</strong> to save memory and compute resources. It prevents the creation of the computational graph for those operations.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: PyTorch vs TensorFlow — technical tradeoffs?</strong>
+                    <p><strong>Answer:</strong> PyTorch (Dynamic graph) is more Pythonic, easier to debug with standard tools, and highly favored in research. TensorFlow (Static graph/Keras) historically had better deployment tools (TFLite, TFServing) and massive industry scale, though the gap has significantly narrowed with PyTorch 2.0 and TorchServe.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: What is "Tensor Broadcasting" in PyTorch?</strong>
+                    <p><strong>Answer:</strong> Same as NumPy. If dimensions don't match, PyTorch automatically expands the smaller tensor (by repeating values) to match the larger one, provided they are compatible (trailing dimensions match or are 1). This happens without actual memory copying.</p>
+                </div>
+            </div>
+        `
+    },
+    "tensorflow": {
+        concepts: `
+            <div class="section">
+                <h2>TensorFlow & Production DL</h2>
+
+                <h3>🧠 The Keras Ecosystem</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ User-First API</div>
+                    <div class="box-content">
+                        Keras is TensorFlow's high-level API. It focuses on <strong>Developer Experience</strong> (DX) — minimizing the number of user actions for common use cases. <code>tf.keras</code> supports three ways to build models: (1) Sequential (simple stacks), (2) Functional (DAGs, multi-input/output), (3) Subclassing (full control).
+                    </div>
+                </div>
+
+                <h3>tf.data — Performance Pipelines</h3>
+                <p>Loading data is often the bottleneck. <code>tf.data.Dataset</code> enables "ETL" pipelines: <strong>Extract</strong> (from disk/cloud), <strong>Transform</strong> (shuffle, batch, repeat), <strong>Load</strong> (map to GPU). Concepts like <code>prefetch</code> and <code>interleave</code> ensure the GPU is never waiting for the CPU.</p>
+
+                <h3>Static Graphs & tf.function</h3>
+                <p>TensorFlow can convert Python code into a <strong>Static Computational Graph</strong> using <code>@tf.function</code>. This enables significant optimizations like constant folding and makes models exportable to environments without Python (C++, Java, JS).</p>
+
+                <h3>Monitoring with TensorBoard</h3>
+                <table>
+                    <tr><th>Component</th><th>Visualized metric</th></tr>
+                    <tr><td>Scalars</td><td>Loss/Accuracy curves in real-time</td></tr>
+                    <tr><td>Histograms</td><td>Weights/Gradients distribution (checking for vanishing/exploding)</td></tr>
+                    <tr><td>Graphs</td><td>The internal model architecture</td></tr>
+                    <tr><td>Projector</td><td>High-dimensional embeddings (t-SNE/PCA)</td></tr>
+                </table>
+
+                <h3>Deployment Architecture (TFX)</h3>
+                <p>TensorFlow Extended (TFX) is for end-to-end ML. Key components: <strong>TF Serving</strong> (for APIs), <strong>TF Lite</strong> (for mobile/edge), <strong>TFJS</strong> (for web browsers). TF Serving supports model versioning and A/B testing out of the box.</p>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 TensorFlow Code Examples</h2>
+
+                <h3>The Functional API Pattern</h3>
+                <div class="code-block">
+<span class="keyword">import</span> tensorflow <span class="keyword">as</span> tf
+<span class="keyword">from</span> tensorflow <span class="keyword">import</span> keras
+<span class="keyword">from</span> tensorflow.keras <span class="keyword">import</span> layers
+
+<span class="comment"># Functional API — best for production model logic</span>
+inputs = keras.Input(shape=(<span class="number">784</span>,))
+x = layers.Dense(<span class="number">64</span>, activation=<span class="string">"relu"</span>)(inputs)
+x = layers.Dense(<span class="number">64</span>, activation=<span class="string">"relu"</span>)(x)
+outputs = layers.Dense(<span class="number">10</span>, activation=<span class="string">"softmax"</span>)(x)
+
+model = keras.Model(inputs=inputs, outputs=outputs, name=<span class="string">"mnist_model"</span>)
+
+model.compile(
+    loss=keras.losses.SparseCategoricalCrossentropy(),
+    optimizer=keras.optimizers.RMSprop(),
+    metrics=[<span class="string">"accuracy"</span>],
+)
+
+history = model.fit(x_train, y_train, batch_size=<span class="number">64</span>, epochs=<span class="number">2</span>, validation_split=<span class="number">0.2</span>)
+                </div>
+
+                <h3>High-Performance Data Pipeline</h3>
+                <div class="code-block">
+<span class="keyword">def</span> <span class="function">load_and_preprocess</span>(path, label):
+    image = tf.io.read_file(path)
+    image = tf.image.decode_jpeg(image, channels=<span class="number">3</span>)
+    image = tf.image.resize(image, [<span class="number">224</span>, <span class="number">224</span>])
+    <span class="keyword">return</span> image / <span class="number">255.0</span>, label
+
+dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
+dataset = (dataset
+    .shuffle(<span class="number">1000</span>)
+    .map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
+    .batch(<span class="number">32</span>)
+    .prefetch(tf.data.AUTOTUNE) <span class="comment"># Overlap training and preprocessing</span>
+)
+                </div>
+
+                <h3>Custom Layers & Training Loops</h3>
+                <div class="code-block">
+<span class="comment"># Custom Training Loop (GradientTape)</span>
+optimizer = tf.keras.optimizers.Adam()
+loss_fn = tf.keras.losses.BinaryCrossentropy()
+
+<span class="preprocessor">@tf.function</span>  <span class="comment"># Compiles to static graph for speed</span>
+<span class="keyword">def</span> <span class="function">train_step</span>(x, y):
+    <span class="keyword">with</span> tf.GradientTape() <span class="keyword">as</span> tape:
+logits = model(x, training=<span class="keyword">True</span>)
+loss_value = loss_fn(y, logits)
+    
+    grads = tape.gradient(loss_value, model.trainable_weights)
+    optimizer.apply_gradients(zip(grads, model.trainable_weights))
+    <span class="keyword">return</span> loss_value
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 TensorFlow Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: What is <code>tf.function</code> and AutoGraph?</strong>
+                    <p><strong>Answer:</strong> <code>tf.function</code> is a decorator that converts a regular Python function into a TensorFlow <strong>static graph</strong>. AutoGraph is the internal tool that translates Python control flow (<code>if</code>, <code>while</code>) into TF graph ops. This allows for compiler-level optimizations and easy deployment without a Python environment.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: Why use <code>tf.data.AUTOTUNE</code>?</strong>
+                    <p><strong>Answer:</strong> It allows TensorFlow to <strong>dynamically adjust</strong> the level of parallelism and buffer sizes based on your CPU/disk hardware. It ensures that data preprocessing (CPU) is always one step ahead of model training (GPU), preventing hardware starvation.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: Functional API vs Sequential vs Subclassing?</strong>
+                    <p><strong>Answer:</strong> <strong>Sequential:</strong> purely linear stacks. <strong>Functional:</strong> most common for production, supports non-linear topology (shared layers, multiple inputs/outputs). <strong>Subclassing:</strong> full control over the forward pass, best for complex research/custom logic. Functional is generally preferred for its balance of power and debugging ease.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: How do you prevent overfitting in TensorFlow?</strong>
+                    <p><strong>Answer:</strong> (1) EarlyStopping callback, (2) Dropout layers, (3) L1/L2 kernels regularizers, (4) Data augmentation (via <code>tf.image</code> or <code>keras.layers</code>), (5) Learning rate schedules via <code>callbacks.ReduceLROnPlateau</code>.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: What is SavedModel format?</strong>
+                    <p><strong>Answer:</strong> The language-neutral, hermetic serialization format for TF models. It includes the model architecture, weights, and the computational graph (signatures). It is the standard format for TF Serving and TFLite conversion.</p>
+                </div>
+            </div>
+        `
+    },
+    "production": {
+        concepts: `
+            <div class="section">
+                <h2>Production Python & MLOps</h2>
+
+                <h3>🧠 FastAPI — The Modern Standard</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ High Performance APIs</div>
+                    <div class="box-content">
+                        FastAPI is built on <strong>Starlette</strong> and <strong>Pydantic</strong>. It supports <code>async/await</code> for handling concurrent requests without blocking, uses type hints for automatic validation, and generates interactive OpenAPI (Swagger) documentation. It is the gold standard for serving ML models today.
+                    </div>
+                </div>
+
+                <h3>Pydantic & Data Validation</h3>
+                <p>In production, you cannot trust input data. Pydantic enforces <strong>strict type checking</strong> and validation at runtime. If a JSON request arrives with a string instead of a float for a model feature, Pydantic catches it immediately and returns a clear error before the model even sees it.</p>
+
+                <h3>The ML Model Serving Lifecycle</h3>
+                <table>
+                    <tr><th>Stage</th><th>Responsibility</th><th>Tools</th></tr>
+                    <tr><td>Initialization</td><td>Loading model weights into memory (once)</td><td>FastAPI Lifespan</td></tr>
+                    <tr><td>Inference</td><td>Preprocessing input and getting prediction</td><td>NumPy/Pydantic</td></tr>
+                    <tr><td>Post-processing</td><td>Formatting prediction for the client</td><td>JSON/Protobuf</td></tr>
+                    <tr><td>Observability</td><td>Logging latency, inputs, and drift</td><td>Prometheus/ELK</td></tr>
+                </table>
+
+                <h3>Dependency Management & Docker</h3>
+                <p><strong>Conda vs Pip:</strong> Pip is standard for Python; Conda is better for C-extensions/CUDA. <strong>Docker:</strong> Containerizing the environment ensures it "works on my machine" translates to "works in the cloud". Use lightweight base images (python:3.10-slim) to minimize security risks and build times.</p>
+
+                <h3>Testing ML Applications</h3>
+                <p>(1) <strong>Unit tests:</strong> for preprocessing logic, (2) <strong>Integration tests:</strong> for the API endpoints, (3) <strong>Model Quality tests:</strong> ensuring the model meets a minimum accuracy threshold on a benchmark dataset before deployment.</p>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 Production Python Code Examples</h2>
+
+                <h3>The FastAPI Model Server Pattern</h3>
+                <div class="code-block">
+<span class="keyword">from</span> fastapi <span class="keyword">import</span> FastAPI, HTTPException
+<span class="keyword">from</span> pydantic <span class="keyword">import</span> BaseModel
+<span class="keyword">import</span> joblib
+
+app = FastAPI(title=<span class="string">"ML Model API"</span>)
+
+<span class="comment"># 1. Prediction Schema</span>
+<span class="keyword">class</span> <span class="class">PredictionInput</span>(BaseModel):
+    feature_1: <span class="keyword">float</span>
+    feature_2: <span class="keyword">float</span>
+    category: <span class="keyword">str</span>
+
+<span class="comment"># 2. Global Predictor Registry</span>
+model = <span class="keyword">None</span>
+
+<span class="preprocessor">@app.on_event</span>(<span class="string">"startup"</span>)
+<span class="keyword">def</span> <span class="function">load_model</span>():
+    <span class="keyword">global</span> model
+    model = joblib.load(<span class="string">'model.joblib'</span>)
+
+<span class="preprocessor">@app.post</span>(<span class="string">"/predict"</span>)
+<span class="keyword">async def</span> <span class="function">predict</span>(data: PredictionInput):
+    <span class="keyword">try</span>:
+features = [[data.feature_1, data.feature_2]]
+prediction = model.predict(features)
+<span class="keyword">return</span> {<span class="string">"prediction"</span>: <span class="keyword">float</span>(prediction[<span class="number">0</span>])}
+    <span class="keyword">except</span> Exception <span class="keyword">as</span> e:
+<span class="keyword">raise</span> HTTPException(status_code=<span class="number">500</span>, detail=<span class="keyword">str</span>(e))
+                </div>
+
+                <h3>Robust Logging Strategy</h3>
+                <div class="code-block">
+<span class="keyword">import</span> logging
+<span class="keyword">import</span> sys
+
+<span class="keyword">def</span> <span class="function">get_logger</span>(name):
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+    
+    <span class="comment"># JSON formatter for easier ELK/Splunk ingestion</span>
+    handler = logging.StreamHandler(sys.stdout)
+    formatter = logging.Formatter(
+<span class="string">'{"time":"%(asctime)s", "name":"%(name)s", "level":"%(levelname)s", "msg":"%(message)s"}'</span>
+    )
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    <span class="keyword">return</span> logger
+                </div>
+
+                <h3>Docker Configuration (ML Specific)</h3>
+                <div class="code-block">
+<span class="comment"># Dockerfile for ML Service</span>
+FROM python:<span class="number">3.10-slim</span>
+
+WORKDIR /app
+COPY requirements.txt .
+
+<span class="comment"># Install dependencies without cache</span>
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+<span class="comment"># Expose port and run server</span>
+EXPOSE <span class="number">8000</span>
+CMD [<span class="string">"uvicorn"</span>, <span class="string">"main:app"</span>, <span class="string">"--host"</span>, <span class="string">"0.0.0.0"</span>, <span class="string">"--port"</span>, <span class="string">"8000"</span>]
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 Production Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: Why use FastAPI over Flask for ML models?</strong>
+                    <p><strong>Answer:</strong> (1) Native <code>async</code> support (handles concurrent requests better), (2) Automatically generates Swagger UI for testing, (3) Pydantic integration for data validation, (4) Significantly higher throughput (close to Go/Node.js levels), (5) Built-in support for WebSockets and background tasks.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: How do you handle model versioning in production?</strong>
+                    <p><strong>Answer:</strong> (1) URL versioning (<code>/v1/predict</code>), (2) Model registry (MLflow/SageMaker) with aliases like <code>production</code> or <code>staging</code>, (3) Blue-green deployment — route traffic to the new version only after validation, (4) Embed the model version in the API response metadata for debugging.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: What is "Dependency Hell" and how do you solve it?</strong>
+                    <p><strong>Answer:</strong> It occurs when multiple libraries require conflicting versions of the same dependency. Solved by: (1) Using virtual environments (<code>venv</code>/<code>conda</code>), (2) pinning exact versions in <code>requirements.txt</code> or <code>poetry.lock</code>, (3) Docker to isolate the entire OS environment.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: How do you log inputs/outputs without violating privacy?</strong>
+                    <p><strong>Answer:</strong> (1) <strong>PII Masking:</strong> remove names/emails/IDs before logging, (2) Hash sensitive fields if they are needed for troubleshooting, (3) Separate logging of model metadata from raw data, (4) Use specialized monitoring tools like <strong>Arize</strong> or <strong>Whylogs</strong> for drift detection without full data capture.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: What's the role of CI/CD in Machine Learning?</strong>
+                    <p><strong>Answer:</strong> Beyond standard code tests, ML CI/CD (MLOps) includes <strong>Data Validation</strong> (is the incoming data schema correct?), <strong>Model Validation</strong> (is accuracy >= 90%?), and automated <strong>deployment to staging</strong> for human-in-the-loop review.</p>
+                </div>
+            </div>
+        `
+    },
+    "optimization": {
+        concepts: `
+            <div class="section">
+                <h2>Python High Performance & Optimization</h2>
+
+                <h3>🧠 The GIL (Global Interpreter Lock) — Deep Dive</h3>
+                <div class="info-box">
+                    <div class="box-title">⚡ The Bottleneck of Python</div>
+                    <div class="box-content">
+                        The GIL is a mutex that protects access to Python objects, preventing multiple native threads from executing Python bytecodes at once. <strong>Critical for DS:</strong> NumPy and Pandas release the GIL during C-level computations. Therefore, vectorized code IS truly parallel across CPU cores even with the GIL.
+                    </div>
+                </div>
+
+                <h3>Profiling: Finding the Real Bottleneck</h3>
+                <p>Never optimize without measuring. (1) <strong>cProfile:</strong> for function-level timing, (2) <strong>line_profiler:</strong> for line-by-line analysis in "hot" functions, (3) <strong>memory_profiler:</strong> to detect memory leaks and peak usage, (4) <strong>Py-Spy:</strong> a sampling profiler for zero-instrumentation production profiling.</p>
+
+                <h3>Numba — JIT Compilation for NumPy</h3>
+                <p>Numba translates a subset of Python and NumPy code into fast machine code using LLVM. By simply adding <code>@njit</code>, you can achieve C/Fortran-like speeds for math-heavy loops that cannot be vectorized with pure NumPy.</p>
+
+                <h3>Concurrency Models in Python</h3>
+                <table>
+                    <tr><th>Model</th><th>Best for...</th><th>Mechanism</th></tr>
+                    <tr><td>Threading</td><td>I/O-bound (APIs, DBs)</td><td>Concurrent but not parallel (GIL)</td></tr>
+                    <tr><td>Multiprocessing</td><td>CPU-bound (Training, Math)</td><td>True parallelism (separate OS processes)</td></tr>
+                    <tr><td>asyncio</td><td>High-concurrency I/O</td><td>Single-threaded cooperative multitasking</td></tr>
+                </table>
+
+                <h3>Vectorization & SIMD</h3>
+                <p>Single Instruction, Multiple Data (SIMD) allows a CPU to perform the same operation on multiple data points in one clock cycle. Modern NumPy leverages AVX-512 and MKL/OpenBLAS to ensure your <code>a + b</code> is as fast as the hardware allows.</p>
+
+                <h3>Cython — When All Else Fails</h3>
+                <p>Cython is a superset of Python that compiles to C. It allows you to call C functions directly and use static typing. Use it for complex algorithms that require low-level memory control (e.g., custom tree models or graph algorithms).</p>
+            </div>
+        `,
+        code: `
+            <div class="section">
+                <h2>💻 Performance & Optimization Code Examples</h2>
+
+                <h3>Numba — JIT Speedups</h3>
+                <div class="code-block">
+<span class="keyword">from</span> numba <span class="keyword">import</span> njit
+<span class="keyword">import</span> numpy <span class="keyword">as</span> np
+
+<span class="comment"># This loop is 100x slower in pure Python</span>
+<span class="preprocessor">@njit</span>(parallel=<span class="keyword">True</span>)
+<span class="keyword">def</span> <span class="function">monte_carlo_pi</span>(nsamples):
+    acc = <span class="number">0</span>
+    <span class="keyword">for</span> i <span class="keyword">in</span> range(nsamples):
+x = np.random.random()
+y = np.random.random()
+<span class="keyword">if</span> x**<span class="number">2</span> + y**<span class="number">2</span> < <span class="number">1.0</span>:
+    acc += <span class="number">1</span>
+    <span class="keyword">return</span> <span class="number">4.0</span> * acc / nsamples
+                </div>
+
+                <h3>Multiprocessing for Data Prep</h3>
+                <div class="code-block">
+<span class="keyword">from</span> multiprocessing <span class="keyword">import</span> Pool
+
+<span class="keyword">def</span> <span class="function">heavy_image_prep</span>(file_path):
+    <span class="comment"># Complex transform logic here</span>
+    <span class="keyword">return</span> processed_img
+
+<span class="comment"># Use all available cores</span>
+<span class="keyword">if</span> __name__ == <span class="string">'__main__'</span>:
+    <span class="keyword">with</span> Pool() <span class="keyword">as</span> p:
+results = p.map(heavy_image_prep, all_files)
+                </div>
+
+                <h3>Memory Optimization with __slots__</h3>
+                <div class="code-block">
+<span class="keyword">class</span> <span class="class">Observation</span>:
+    <span class="comment"># Prevents creation of __dict__, saving significant RAM</span>
+    __slots__ = (<span class="string">'timestamp'</span>, <span class="string">'value'</span>, <span class="string">'sensor_id'</span>)
+    
+    <span class="keyword">def</span> <span class="function">__init__</span>(self, ts, val, sid):
+self.timestamp = ts
+self.value = val
+self.sensor_id = sid
+
+<span class="comment"># 1 million instances: ~60MB vs ~160MB without __slots__</span>
+data = [Observation(i, i*<span class="number">1.1</span>, <span class="string">'S1'</span>) <span class="keyword">for</span> i <span class="keyword">in</span> <span class="keyword">range</span>(<span class="number">1000000</span>)]
+                </div>
+            </div>
+        `,
+        interview: `
+            <div class="section">
+                <h2>🎯 Performance Interview Questions</h2>
+
+                <div class="interview-box">
+                    <strong>Q1: Why does Python have a GIL?</strong>
+                    <p><strong>Answer:</strong> It simplifies implementation by making the memory management (reference counting) thread-safe without needing granular locks. It also makes single-threaded code faster and C-extension integration easier. Removing it is difficult because it effectively requires a rewrite of the interpreter (see: "no-gil" Python 3.13 proposal).</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q2: How do you optimize a function with a nested loop?</strong>
+                    <p><strong>Answer:</strong> (1) <strong>Vectorize</strong> with NumPy (broadcast), (2) If logic is too complex for NumPy, use <strong>Numba</strong> JIT, (3) Use <strong>Cython</strong> if you need C-level types, (4) Use <code>multiprocessing</code> if the iterations are independent and CPU-bound.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q3: Explain the "cProfile" overhead.</strong>
+                    <p><strong>Answer:</strong> cProfile is a deterministic profiler; it hooks into every function call. While very accurate, it adds significant overhead (sometimes 2x slowdown). For production systems, "Sampling Profilers" (like Py-Spy) are better as they only inspect the stack every few milliseconds, adding negligible overhead.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q4: When is Threading faster than Multiprocessing?</strong>
+                    <p><strong>Answer:</strong> For <strong>I/O-bound</strong> tasks (Network/Disk). Threading has much lower overhead (shared memory) compared to Multiprocessing (separate memory spaces, requires serialization/pickling of data between processes). For downloading 1000 images, threads are superior.</p>
+                </div>
+
+                <div class="interview-box">
+                    <strong>Q5: What is "Cache Locality" and how does NumPy help?</strong>
+                    <p><strong>Answer:</strong> CPUs are fastest when accessing contiguous memory (Spatial Locality). NumPy's C-contiguous arrays ensure that when one value is loaded into the CPU cache, the next values are also loaded, minimizing "Cache Misses" compared to Python lists of scattered objects.</p>
+                </div>
+            </div>
+        `
+    }
+};
+
+// Render dashboard cards
+function renderDashboard() {
+    const grid = document.getElementById('modulesGrid');
+    grid.innerHTML = modules.map(module => `
+        <div class="card" onclick="showModule('${module.id}')">
+            <div class="card-icon">${module.icon}</div>
+            <h3>${module.title}</h3>
+            <p>${module.description}</p>
+            <span class="category-label">${module.category}</span>
+        </div>
+    `).join('');
+}
+
+// Show specific module
+function showModule(moduleId) {
+    const module = modules.find(m => m.id === moduleId);
+    const content = MODULE_CONTENT[moduleId];
+
+    document.getElementById('dashboard').classList.remove('active');
+
+    const moduleHTML = `
+        <div class="module active" id="module-${moduleId}">
+            <button class="btn-back" onclick="backToDashboard()">← Back to Dashboard</button>
+            <header>
+                <h1>${module.icon} ${module.title}</h1>
+                <p class="subtitle">${module.description}</p>
+            </header>
+            
+            <div class="tabs">
+                <button class="tab-btn active" onclick="switchTab('${moduleId}', 'concepts', event)">📖 Key Concepts</button>
+                <button class="tab-btn" onclick="switchTab('${moduleId}', 'code', event)">💻 Code Examples</button>
+                <button class="tab-btn" onclick="switchTab('${moduleId}', 'interview', event)">🎯 Interview Questions</button>
+            </div>
+            
+            <div id="${moduleId}-concepts" class="tab active">${content.concepts}</div>
+            <div id="${moduleId}-code" class="tab">${content.code}</div>
+            <div id="${moduleId}-interview" class="tab">${content.interview}</div>
+        </div>
+    `;
+
+    document.getElementById('modulesContainer').innerHTML = moduleHTML;
+}
+
+// Switch tabs
+function switchTab(moduleId, tabName, e) {
+    const moduleEl = document.getElementById(`module-${moduleId}`);
+
+    // Update tab buttons
+    moduleEl.querySelectorAll('.tab-btn').forEach(btn => btn.classList.remove('active'));
+    if (e && e.target) {
+        e.target.classList.add('active');
+    } else {
+        // Fallback: find the button by tab name
+        const tabNames = ['concepts', 'code', 'interview'];
+        const idx = tabNames.indexOf(tabName);
+        if (idx !== -1) moduleEl.querySelectorAll('.tab-btn')[idx]?.classList.add('active');
+    }
+
+    // Update tab content
+    moduleEl.querySelectorAll('.tab').forEach(tab => tab.classList.remove('active'));
+    document.getElementById(`${moduleId}-${tabName}`).classList.add('active');
+}
+
+// Back to dashboard
+function backToDashboard() {
+    document.querySelectorAll('.module').forEach(m => m.remove());
+    document.getElementById('dashboard').classList.add('active');
+}
+
+// Initialize
+
+document.addEventListener('DOMContentLoaded', renderDashboard);

Type	Mutable	Ordered	Hashable	Use Case
list	✓	✓	✗	Sequential data, time series, feature lists
tuple	✗	✓	✓	Fixed records, dict keys, DataFrame rows
dict	✓	✓ (3.7+)	✗	Lookup tables, JSON, config, caches
set	✓	✗	✗	Unique values, membership testing O(1)
frozenset	✗	✗	✓	Immutable set, usable as dict keys
deque	✓	✓	✗	O(1) append/pop both ends, sliding windows
Class	Purpose	Why It Matters in DS
defaultdict	Dict with default factory	Group data without KeyError: `defaultdict(list)`
Counter	Count hashable objects	Label distribution: `Counter(y_train)`
namedtuple	Lightweight immutable class	Return multiple values with names, not indices
OrderedDict	Dict remembering insertion order	Legacy (dicts are ordered in 3.7+), but useful for `move_to_end()`
deque	Double-ended queue	Sliding window computations, BFS algorithms
ChainMap	Stack multiple dicts	Layer config: defaults → env → CLI overrides
Function	What It Does	DS Use Case
`chain()`	Concatenate iterables	Merge multiple data files lazily
`islice()`	Slice any iterator	Take first N records from generator
`groupby()`	Group consecutive elements	Process sorted log entries by date
`product()`	Cartesian product	Generate hyperparameter grid
`combinations()`	All r-length combos	Feature interaction pairs
`starmap()`	map() with unpacked args	Apply function to paired data
Tool	Best For	Create	Key Feature
venv	Simple projects	`python -m venv env`	Built-in, lightweight
conda	DS/ML (C dependencies)	`conda create -n myenv python=3.11`	Handles non-Python deps (CUDA, MKL)
poetry	Modern packaging	`poetry init`	Lock files, deterministic builds
uv	Speed (Rust-based)	`uv venv`	10-100x faster than pip
Feature	Python List	NumPy ndarray
Storage	Array of pointers to objects scattered in memory	Contiguous block of raw typed data
Type	Each element can be different type	Homogeneous — all elements same dtype
Operations	Python loop (bytecode interpretation)	Compiled C/Fortran loops
Memory	~28 bytes per int + pointer overhead	8 bytes per int64 (no overhead)
SIMD	Not possible	Uses CPU vector instructions (SSE/AVX)
dtype	Bytes	Range	When to Use
float32	4	±3.4e38	Deep learning (GPU prefers this), 50% less memory
float64	8	±1.8e308	Default. Scientific computing, high-precision stats
int32	4	±2.1 billion	Indices, counts, most integer data
bool	1	True/False	Masks for filtering
category (Pandas)	Varies	Finite set	Repeated strings → 90% memory savings
Feature	Series	DataFrame
Dimensions	1D labeled array	2D labeled table
Analogy	A column in a spreadsheet	The entire spreadsheet
Index	Single index	Row index + column index
Creation	`pd.Series([1,2,3])`	`pd.DataFrame({'a': [1,2]})`
Strategy	Savings	When to Use
Category dtype	90%+	Columns with few unique strings (gender, country)
Downcast numerics	50-75%	int64 to int32/int16 when range allows
Sparse arrays	80%+	Columns that are mostly zeros/NaN
Read in chunks	N/A	Files larger than RAM
Question	Chart Type	Library
Distribution of one variable?	Histogram, KDE, Box plot	Seaborn
Relationship between two variables?	Scatter, Hexbin, Regression	Seaborn/Plotly
Comparison across categories?	Bar, Grouped bar, Violin	Seaborn
Trend over time?	Line chart, Area chart	Plotly/Matplotlib
Correlation matrix?	Heatmap	Seaborn
Part of whole?	Pie, Treemap, Sunburst	Plotly
Geographic data?	Choropleth, Scatter mapbox	Plotly/Folium
Concept	Data Science Use Case
Inheritance	BaseModel → LinearModel → LogisticRegression
Abstract Base Classes	Defining mandatory methods like `fit()`/`predict()`
Properties	Validating input parameters (e.g., learning rate > 0)
Dunder Methods	`__call__` for making models callable, `__getitem__` for datasets