Spaces:

Executor-Tyrant-Framework
/

NuWave

Running

App Files Files Community

Executor-Tyrant-Framework commited on 28 days ago

Commit

5fa3f26

verified ·

1 Parent(s): ffd85b6

Sync from GitHub: 2d1f218e331d6687f61ad13796b3478380b66891

Browse files

Files changed (3) hide show

app.py +5 -5
nuwave/benchmark_loader.py +154 -81
nuwave/benchmark_pool.yaml +913 -0

app.py CHANGED Viewed

@@ -117,7 +117,7 @@ logger.info("Both clients ready. chat=%s | extractor=%s",
 from nuwave.organism import NuWaveOrganism
 from nuwave.kiss import KISSFilter, KISSConfig
 from nuwave.pith import PithPipeline, PithConfig
-from nuwave.benchmark_loader import sample_pairs as _sample_benchmark_pairs
 from nuwave.benchmark_loader import describe_sample as _describe_benchmark_sample
 from nuwave.benchmark_loader import load_pool as _load_benchmark_pool
 from nuwave.splat_engine import decompose_layer, SplatConfig, GaussianSplats
@@ -1230,16 +1230,16 @@ def on_interleaved_benchmark(
     # substrate nodes from non-sampled categories to be force-mapped to
     # whatever centroid was closest, garbling the diagnostic metrics.
     _full_benchmark_pool = _load_benchmark_pool()
-    _pool_interleaved, _pool_same_cat_pairs, _pool_meta = _sample_benchmark_pairs(
         pool=_full_benchmark_pool,
-        n_pairs=8,
     )
     INTERLEAVED_QUESTIONS = _pool_interleaved
     _INTERLEAVED_SAME_CAT_PAIRS = _pool_same_cat_pairs
     _pool_summary = _describe_benchmark_sample(_pool_meta)
     logger.info(
-        "Phase A pool sampled: %d pairs / %d turns | cats=%s | threads=%s",
-        _pool_summary["n_pairs"],
         _pool_summary["n_turns"],
         _pool_summary["categories_sampled"],
         _pool_summary["threads_sampled"],

 from nuwave.organism import NuWaveOrganism
 from nuwave.kiss import KISSFilter, KISSConfig
 from nuwave.pith import PithPipeline, PithConfig
+from nuwave.benchmark_loader import sample_chains as _sample_benchmark_chains
 from nuwave.benchmark_loader import describe_sample as _describe_benchmark_sample
 from nuwave.benchmark_loader import load_pool as _load_benchmark_pool
 from nuwave.splat_engine import decompose_layer, SplatConfig, GaussianSplats
     # substrate nodes from non-sampled categories to be force-mapped to
     # whatever centroid was closest, garbling the diagnostic metrics.
     _full_benchmark_pool = _load_benchmark_pool()
+    _pool_interleaved, _pool_same_cat_pairs, _pool_meta = _sample_benchmark_chains(
         pool=_full_benchmark_pool,
+        n_chains=8,
     )
     INTERLEAVED_QUESTIONS = _pool_interleaved
     _INTERLEAVED_SAME_CAT_PAIRS = _pool_same_cat_pairs
     _pool_summary = _describe_benchmark_sample(_pool_meta)
     logger.info(
+        "Phase B pool sampled: %d chains / %d turns | cats=%s | threads=%s",
+        _pool_summary["n_chains"],
         _pool_summary["n_turns"],
         _pool_summary["categories_sampled"],
         _pool_summary["threads_sampled"],

nuwave/benchmark_loader.py CHANGED Viewed

@@ -1,31 +1,40 @@
 """
-benchmark_loader.py — Load and sample from the Phase A benchmark prompt pool.
-The pool (benchmark_pool.yaml) contains 40 Q1/Q2 pairs across 10 categories
-with 8 conceptual threads woven through. This loader:
   1. Loads the pool YAML
-  2. Pairs Q2s with their parent Q1s by `parent` field
-  3. Samples N pairs per run with stratification discipline:
-     - At most 2 pairs per category (prevents category dominance)
      - At least 3 threads with 2+ representatives (cross-category co-firing)
-     - Multi-complexity coverage (rejected if all same complexity)
-  4. Returns interleaved Q1/Q2 turn sequence in the same shape the benchmark's
-     existing code expects:
-       turns 0..N-1: Q1s (one per sampled pair)
-       turns N..2N-1: Q2s (matching, in same order)
-       same-cat pairs: [(i, i+N) for i in range(N)]
 # ---- Changelog ----
-# [2026-05-10] Claude Opus 4.7 — Phase A loader
-# What: Wraps benchmark_pool.yaml with rejection-sampling stratification
-# Why: Replaces hardcoded 4-category 8-prompt INTERLEAVED_QUESTIONS with
-#      diversified 10-category 80-prompt pool. Per-run variance in which
-#      pairs get sampled is itself substrate diversification — the substrate
-#      sees different content / different concept threads dominating each
-#      run, which the canonical co-firing-discovery and predictive-coding
-#      mechanisms need to fire (Run 42 sidecar inspection diagnosis).
-# How: Pure-stdlib + pyyaml. No coupling to app.py or substrate code.
 # -------------------
 """
@@ -49,36 +58,45 @@ def load_pool(path: Optional[str] = None) -> Dict[str, Any]:
     """Load the benchmark pool YAML.
     Returns a dict with keys:
-        threads:           list of thread names (e.g. "energy", "memory", ...)
-        complexity_levels: list of complexity tags (casual..pop_culture)
-        q1_layer:          list of 40 Q1 dicts (id, category, thread, complexity, text)
-        q2_layer:          list of 40 Q2 dicts (id, parent, category, thread, complexity, text)
     """
     p = path or _DEFAULT_POOL_PATH
     with open(p) as f:
         return yaml.safe_load(f)
-def _build_pairs(
     pool: Dict[str, Any],
-) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
-    """Build (q1, q2) tuples from the pool.
-    Each Q2's `parent` field references its Q1's `id`. Pairs without a
-    matching Q2 are skipped (Phase A discipline guarantees full pairing,
-    but this stays defensive).
     """
     q2_by_parent = {q["parent"]: q for q in pool.get("q2_layer", [])}
-    pairs: List[Tuple[Dict[str, Any], Dict[str, Any]]] = []
     for q1 in pool.get("q1_layer", []):
         q2 = q2_by_parent.get(q1["id"])
-        if q2 is not None:
-            pairs.append((q1, q2))
-    return pairs
 def _validate_sample(
-    sample: List[Tuple[Dict[str, Any], Dict[str, Any]]],
     max_per_category: int = 2,
     min_threads_with_dups: int = 3,
     min_distinct_complexity_levels: int = 3,
@@ -86,40 +104,43 @@ def _validate_sample(
     """Stratification discipline check.
     Returns True if the sample respects:
-        - max `max_per_category` pairs per category (default 2)
-        - at least `min_threads_with_dups` threads with 2+ instances (default 3)
-        - at least `min_distinct_complexity_levels` distinct complexity tags
-          across the sample's q1+q2 levels (default 3)
-    Counted across Q1+Q2 pairs (each pair contributes its q1 thread, which
-    Q2 inherits — but Q2 has its own complexity, so complexity check pulls
-    from both layers).
     """
     if not sample:
         return False
-    cats = Counter(q1["category"] for q1, _q2 in sample)
     if any(count > max_per_category for count in cats.values()):
         return False
-    threads = Counter(q1["thread"] for q1, _q2 in sample)
-    threads_with_dups = sum(1 for count in threads.values() if count >= 2)
     if threads_with_dups < min_threads_with_dups:
         return False
     complexities: set = set()
-    for q1, q2 in sample:
         complexities.add(q1["complexity"])
         complexities.add(q2["complexity"])
     if len(complexities) < min_distinct_complexity_levels:
         return False
     return True
-def sample_pairs(
     pool: Optional[Dict[str, Any]] = None,
-    n_pairs: int = 8,
     seed: Optional[int] = None,
     max_attempts: int = 200,
 ) -> Tuple[
@@ -127,70 +148,117 @@ def sample_pairs(
     List[Tuple[int, int]],
     List[Dict[str, Any]],
 ]:
-    """Sample `n_pairs` pairs with stratification discipline.
     Args:
         pool: Pre-loaded pool dict. If None, loads from default path.
-        n_pairs: Number of Q1/Q2 pairs to sample. Each pair contributes
-                 2 turns (one Q1, one Q2), so total turns = 2 * n_pairs.
         seed: RNG seed for reproducibility. None = nondeterministic.
-        max_attempts: Rejection-sampling retry budget. After this many
-                      attempts without a valid sample, falls back to last
-                      candidate (rare on a 40-pair pool).
     Returns:
         interleaved_questions: list of (category, prompt_text) tuples,
-                               2*n_pairs entries. First n_pairs are Q1s,
-                               last n_pairs are matching Q2s.
         same_cat_pairs:        list of (q1_turn_idx, q2_turn_idx) tuples,
-                               n_pairs entries. Always [(i, i+n_pairs)].
-        sample_meta:           list of n_pairs dicts with q1_id, q2_id,
-                               category, thread, q1_complexity,
-                               q2_complexity (for logging/JSON output).
     """
     if pool is None:
         pool = load_pool()
-    pairs = _build_pairs(pool)
-    if len(pairs) < n_pairs:
         raise ValueError(
-            f"Pool has {len(pairs)} pairs, cannot sample {n_pairs}"
         )
     rng = random.Random(seed)
-    selected: Optional[List[Tuple[Dict[str, Any], Dict[str, Any]]]] = None
     for _attempt in range(max_attempts):
-        candidate = rng.sample(pairs, n_pairs)
         if _validate_sample(candidate):
             selected = candidate
             break
     if selected is None:
-        # Fallback — pool size + n_pairs combo can't satisfy constraints.
-        # Take the most recent candidate; partial constraint satisfaction is
-        # better than aborting the run.
-        selected = rng.sample(pairs, n_pairs)
     interleaved: List[Tuple[str, str]] = []
-    for q1, _q2 in selected:
         interleaved.append((q1["category"], q1["text"]))
-    for _q1, q2 in selected:
         interleaved.append((q2["category"], q2["text"]))
     same_cat_pairs: List[Tuple[int, int]] = [
-        (i, i + n_pairs) for i in range(n_pairs)
     ]
     sample_meta: List[Dict[str, Any]] = []
-    for q1, q2 in selected:
         sample_meta.append({
             "q1_id": q1["id"],
             "q2_id": q2["id"],
             "category": q1["category"],
             "thread": q1["thread"],
             "q1_complexity": q1["complexity"],
             "q2_complexity": q2["complexity"],
         })
     return interleaved, same_cat_pairs, sample_meta
@@ -202,21 +270,26 @@ def describe_sample(
     """Produce a small structured summary of a sample for logging.
     Used by the benchmark to surface in JSON output what was actually
-    sampled this run — useful for debugging variance and confirming the
-    stratification discipline produced the expected distribution.
     """
     cats = Counter(m["category"] for m in sample_meta)
     threads = Counter(m["thread"] for m in sample_meta)
-    complexities = Counter()
     for m in sample_meta:
         complexities[m["q1_complexity"]] += 1
         complexities[m["q2_complexity"]] += 1
     return {
-        "n_pairs": len(sample_meta),
-        "n_turns": 2 * len(sample_meta),
         "categories_sampled": dict(cats),
         "threads_sampled": dict(threads),
         "complexity_distribution": dict(complexities),
-        "pair_ids": [(m["q1_id"], m["q2_id"]) for m in sample_meta],
     }

 """
+benchmark_loader.py — Load and sample from the Phase B benchmark prompt pool.
+The pool (benchmark_pool.yaml) contains 17 categories × 4 chains × 3 layers
+= 204 prompts, with 8 conceptual threads woven through in a near-uniform
+bipartite (each thread spans 7-10 categories). Subversion is in
+priority_categories so it's force-included in every per-run sample.
+This loader:
   1. Loads the pool YAML
+  2. Builds (Q1, Q2, Q3) chain triples — each Q2 and Q3 references the
+     same parent Q1 by `parent` field (Q3 is a sibling to Q2 under Q1,
+     not a Q1→Q2→Q3 lineage)
+  3. Samples N chains per run with stratification discipline:
+     - Force-include 1 chain from each priority category (subversion)
+     - At most 2 chains per category (prevents category dominance)
      - At least 3 threads with 2+ representatives (cross-category co-firing)
+     - Multi-complexity coverage across all 3 layers
+  4. Returns interleaved Q1/Q2/Q3 turn sequence:
+       turns 0..N-1:    Q1s (one per sampled chain, in chain order)
+       turns N..2N-1:   matching Q2s (same chain order)
+       turns 2N..3N-1:  matching Q3s (same chain order)
+  5. Returns same-cat pair indices for the heatmap math. Phase A semantics
+     preserved: pairs are Q1↔Q2 only `[(i, i+N) for i in range(N)]`.
+     Q3 turns contribute to substrate but aren't part of the strict same-
+     cat-reselect calculation. Future work (Option B) can add Q1↔Q3 and
+     Q2↔Q3 pairings.
 # ---- Changelog ----
+# [2026-05-10] Claude Opus 4.7 — Phase A loader (Q1/Q2 pairs, 10 cats)
+# [2026-05-11] Claude Opus 4.7 — Phase B loader (Q1/Q2/Q3 chains, 17 cats,
+#              priority_categories). Function renamed sample_pairs →
+#              sample_chains. Returns 24-turn interleave (3 layers × 8
+#              chains). Subversion is forced in every sample to give
+#              substrate consistent expectation-subverting content
+#              exposure for the surprise-axis hypothesis test.
 # -------------------
 """
     """Load the benchmark pool YAML.
     Returns a dict with keys:
+        threads:             list of thread names (8 entries)
+        complexity_levels:   list of complexity tags (6 entries)
+        priority_categories: list of categories that must appear in every
+                             per-run sample (typically just ["subversion"])
+        q1_layer:            list of 68 Q1 dicts (id, category, thread,
+                             complexity, text)
+        q2_layer:            list of 68 Q2 dicts (adds: parent → Q1 id)
+        q3_layer:            list of 68 Q3 dicts (parent → Q1 id; Q3 is
+                             sibling to Q2 under Q1)
     """
     p = path or _DEFAULT_POOL_PATH
     with open(p) as f:
         return yaml.safe_load(f)
+def _build_chains(
     pool: Dict[str, Any],
+) -> List[Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]]:
+    """Build (Q1, Q2, Q3) chain triples from the pool.
+    Each Q2's and Q3's `parent` field references its Q1's `id`. Chains
+    without a complete (Q1, Q2, Q3) triple are skipped; Phase B
+    discipline guarantees full triples but defensive code stays.
     """
     q2_by_parent = {q["parent"]: q for q in pool.get("q2_layer", [])}
+    q3_by_parent = {q["parent"]: q for q in pool.get("q3_layer", [])}
+    chains: List[
+        Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]
+    ] = []
     for q1 in pool.get("q1_layer", []):
         q2 = q2_by_parent.get(q1["id"])
+        q3 = q3_by_parent.get(q1["id"])
+        if q2 is not None and q3 is not None:
+            chains.append((q1, q2, q3))
+    return chains
 def _validate_sample(
+    sample: List[Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]],
     max_per_category: int = 2,
     min_threads_with_dups: int = 3,
     min_distinct_complexity_levels: int = 3,
     """Stratification discipline check.
     Returns True if the sample respects:
+        - max `max_per_category` chains per category (default 2)
+        - at least `min_threads_with_dups` threads with 2+ instances (3)
+        - at least `min_distinct_complexity_levels` distinct complexity
+          tags across the sample's combined Q1+Q2+Q3 levels (3)
+    Counted across (Q1, Q2, Q3) chain triples. Each chain contributes
+    its (single) thread once and contributes 3 complexity tags (one per
+    layer).
     """
     if not sample:
         return False
+    cats = Counter(q1["category"] for q1, _q2, _q3 in sample)
     if any(count > max_per_category for count in cats.values()):
         return False
+    threads = Counter(q1["thread"] for q1, _q2, _q3 in sample)
+    threads_with_dups = sum(
+        1 for count in threads.values() if count >= 2
+    )
     if threads_with_dups < min_threads_with_dups:
         return False
     complexities: set = set()
+    for q1, q2, q3 in sample:
         complexities.add(q1["complexity"])
         complexities.add(q2["complexity"])
+        complexities.add(q3["complexity"])
     if len(complexities) < min_distinct_complexity_levels:
         return False
     return True
+def sample_chains(
     pool: Optional[Dict[str, Any]] = None,
+    n_chains: int = 8,
     seed: Optional[int] = None,
     max_attempts: int = 200,
 ) -> Tuple[
     List[Tuple[int, int]],
     List[Dict[str, Any]],
 ]:
+    """Sample `n_chains` chains with stratification + priority discipline.
     Args:
         pool: Pre-loaded pool dict. If None, loads from default path.
+        n_chains: Total number of (Q1, Q2, Q3) chains to sample. Each
+                  chain contributes 3 turns, so total turns = 3 * n_chains.
+                  Phase B default 8 chains → 24 turns/run.
         seed: RNG seed for reproducibility. None = nondeterministic.
+        max_attempts: Rejection-sampling retry budget on the non-priority
+                      portion of the sample.
     Returns:
         interleaved_questions: list of (category, prompt_text) tuples,
+                               3*n_chains entries. Turn structure:
+                                  0..n-1:    Q1s
+                                  n..2n-1:   Q2s (matching, same order)
+                                  2n..3n-1:  Q3s (matching, same order)
         same_cat_pairs:        list of (q1_turn_idx, q2_turn_idx) tuples,
+                               n_chains entries. Phase A semantics:
+                               always [(i, i+n_chains) for i in range(n)].
+                               Q3 turns aren't paired here (Option A from
+                               2026-05-11; future Option B can add Q1↔Q3
+                               and Q2↔Q3 pairs).
+        sample_meta:           list of n_chains dicts with q1_id, q2_id,
+                               q3_id, category, thread, q1_complexity,
+                               q2_complexity, q3_complexity.
+    Priority categories (from pool["priority_categories"]) are force-
+    included: one chain from each priority category is pre-selected
+    before rejection sampling fills the remaining slots from the non-
+    priority pool. Stratification is checked on the COMBINED final
+    sample, so the forced chain's thread/complexity contribute to the
+    constraint accounting.
     """
     if pool is None:
         pool = load_pool()
+    chains = _build_chains(pool)
+    if len(chains) < n_chains:
         raise ValueError(
+            f"Pool has {len(chains)} chains, cannot sample {n_chains}"
         )
+    priority_cats: List[str] = pool.get("priority_categories", []) or []
     rng = random.Random(seed)
+    # Step 1 — Pre-select forced chains from priority categories
+    forced: List[
+        Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]
+    ] = []
+    for cat in priority_cats:
+        cat_chains = [c for c in chains if c[0]["category"] == cat]
+        if cat_chains:
+            forced.append(rng.choice(cat_chains))
+    # Step 2 — Fill remaining slots from non-priority chains via
+    # rejection sampling against the COMBINED (forced + sampled) total
+    n_remaining = n_chains - len(forced)
+    if n_remaining < 0:
+        raise ValueError(
+            f"More priority categories ({len(forced)}) than n_chains "
+            f"({n_chains}); reduce priority list or raise n_chains"
+        )
+    forced_ids = {c[0]["id"] for c in forced}
+    non_priority = [c for c in chains if c[0]["id"] not in forced_ids]
+    selected: Optional[
+        List[Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]]
+    ] = None
     for _attempt in range(max_attempts):
+        if n_remaining > 0:
+            candidate_remaining = rng.sample(non_priority, n_remaining)
+        else:
+            candidate_remaining = []
+        candidate = forced + candidate_remaining
         if _validate_sample(candidate):
             selected = candidate
             break
     if selected is None:
+        # Fallback — accept partial constraint satisfaction rather than
+        # aborting. Forced chains still included; remaining slots filled
+        # by best-effort random draw.
+        if n_remaining > 0:
+            selected = forced + rng.sample(non_priority, n_remaining)
+        else:
+            selected = list(forced)
     interleaved: List[Tuple[str, str]] = []
+    for q1, _q2, _q3 in selected:
         interleaved.append((q1["category"], q1["text"]))
+    for _q1, q2, _q3 in selected:
         interleaved.append((q2["category"], q2["text"]))
+    for _q1, _q2, q3 in selected:
+        interleaved.append((q3["category"], q3["text"]))
     same_cat_pairs: List[Tuple[int, int]] = [
+        (i, i + n_chains) for i in range(n_chains)
     ]
     sample_meta: List[Dict[str, Any]] = []
+    for q1, q2, q3 in selected:
         sample_meta.append({
             "q1_id": q1["id"],
             "q2_id": q2["id"],
+            "q3_id": q3["id"],
             "category": q1["category"],
             "thread": q1["thread"],
             "q1_complexity": q1["complexity"],
             "q2_complexity": q2["complexity"],
+            "q3_complexity": q3["complexity"],
         })
     return interleaved, same_cat_pairs, sample_meta
     """Produce a small structured summary of a sample for logging.
     Used by the benchmark to surface in JSON output what was actually
+    sampled this run — useful for correlating per-run substrate
+    behavior with which threads / categories / complexity registers
+    were exercised, and for confirming priority_categories are
+    being respected.
     """
     cats = Counter(m["category"] for m in sample_meta)
     threads = Counter(m["thread"] for m in sample_meta)
+    complexities: Counter = Counter()
     for m in sample_meta:
         complexities[m["q1_complexity"]] += 1
         complexities[m["q2_complexity"]] += 1
+        complexities[m["q3_complexity"]] += 1
     return {
+        "n_chains": len(sample_meta),
+        "n_turns": 3 * len(sample_meta),
         "categories_sampled": dict(cats),
         "threads_sampled": dict(threads),
         "complexity_distribution": dict(complexities),
+        "chain_ids": [
+            (m["q1_id"], m["q2_id"], m["q3_id"]) for m in sample_meta
+        ],
     }

nuwave/benchmark_pool.yaml CHANGED Viewed

@@ -56,6 +56,15 @@ threads:
   - conflict
   - beauty
 complexity_levels:
   - casual           # offhand curiosity, low cognitive load
   - practical        # how-to, applied
@@ -333,6 +342,189 @@ q1_layer:
     complexity: pop_culture
     text: "Apollo 11 was 60 years ago and it still has cultural weight that newer space stuff doesn't really match. Why does that one stick?"
 # ────────────────────────────────────────────────────────────────────
 # Q2 LAYER — pending Q1 layer approval
 # ────────────────────────────────────────────────────────────────────
@@ -640,3 +832,724 @@ q2_layer:
     thread: memory
     complexity: conceptual
     text: "When SpaceX lands a booster perfectly, it's incredible engineering but doesn't land the same way emotionally. What does Apollo have that the modern stuff doesn't?"

   - conflict
   - beauty
+# Categories that MUST appear in every per-run sample. Subversion is here
+# so the substrate gets consistent exposure to expectation-subverting
+# content across runs — testing whether activation-pattern shifts produced
+# by riddle/twist/counter-intuitive prompts are strong enough to cause
+# canonical predictive-coding errors (the substrate-level "surprise"
+# events that have been silent across all runs to date).
+priority_categories:
+  - subversion
 complexity_levels:
   - casual           # offhand curiosity, low cognitive load
   - practical        # how-to, applied
     complexity: pop_culture
     text: "Apollo 11 was 60 years ago and it still has cultural weight that newer space stuff doesn't really match. Why does that one stick?"
+  # ───────────────────────────────────────────────────────────────────
+  # Phase B additions — 7 new categories × 4 Q1s each = 28 new Q1s
+  # Bipartite expansion: each existing thread now spans 7-10 categories
+  # instead of Phase A's 5. Subversion is in priority_categories so
+  # every per-run sample includes one of its 4 chains; the other 6 new
+  # categories rotate through with the existing 10.
+  # ───────────────────────────────────────────────────────────────────
+  # ─── Chemistry ─────────────────────────────────────────────────────
+  - id: chm-q1-1
+    category: chemistry
+    thread: energy
+    complexity: casual
+    text: "When I dump baking soda into vinegar and it foams over, where does that energy actually come from?"
+  - id: chm-q1-2
+    category: chemistry
+    thread: connection
+    complexity: conceptual
+    text: "Ice floats while most things get denser when they freeze. What's special about how water molecules connect that makes it the exception?"
+  - id: chm-q1-3
+    category: chemistry
+    thread: growth
+    complexity: practical
+    text: "Walk me through how a battery grows weaker over time — what's actually happening to the chemistry inside as it cycles?"
+  - id: chm-q1-4
+    category: chemistry
+    thread: pattern
+    complexity: theoretical
+    text: "Walk me through how chemists predict whether a reaction will be exothermic or endothermic just from the molecular structures."
+  # ─── Language / Linguistics ────────────────────────────────────────
+  - id: lng-q1-1
+    category: language
+    thread: pattern
+    complexity: casual
+    text: "People from totally different cultures still smile when they're happy. Real human universal, or have we just exported it?"
+  - id: lng-q1-2
+    category: language
+    thread: memory
+    complexity: practical
+    text: "Why is it easier to learn a third language once you've learned a second, even if the third is completely unrelated?"
+  - id: lng-q1-3
+    category: language
+    thread: conflict
+    complexity: deep
+    text: "Linguists fight over whether language shapes thought or thought shapes language. Where does that argument actually land in the evidence?"
+  - id: lng-q1-4
+    category: language
+    thread: time
+    complexity: pop_culture
+    text: "Slang moves so fast — 'bet,' 'no cap,' 'mid' — but somehow some words from the 90s never leave. What makes a slang term last?"
+  # ─── Psychology ────────────────────────────────────────────────────
+  - id: psy-q1-1
+    category: psychology
+    thread: memory
+    complexity: practical
+    text: "I forget where I put my keys five minutes ago, but I can still recite a poem from third grade. What's actually happening with memory there?"
+  - id: psy-q1-2
+    category: psychology
+    thread: connection
+    complexity: conceptual
+    text: "People in long-term relationships supposedly start to look like each other over time. Real effect, or confirmation bias dressed up as one?"
+  - id: psy-q1-3
+    category: psychology
+    thread: conflict
+    complexity: deep
+    text: "Cognitive dissonance — when your behavior doesn't match your beliefs, your mind rewrites the beliefs. How does that actually work, and is there any way to short-circuit it?"
+  - id: psy-q1-4
+    category: psychology
+    thread: growth
+    complexity: theoretical
+    text: "Personality is supposedly mostly locked in by 30 — the 'Big Five' calcified or whatever. What does the research actually say about how much it shifts in adulthood?"
+  # ─── Games / Strategy ─────────────────────────────���────────────────
+  - id: gms-q1-1
+    category: games
+    thread: pattern
+    complexity: casual
+    text: "Why is Minesweeper still the perfect 5-minute lunch break game? What did it nail that newer games miss?"
+  - id: gms-q1-2
+    category: games
+    thread: conflict
+    complexity: practical
+    text: "When poker players talk about reading 'tells,' how much of that is real psychology and how much is movie nonsense?"
+  - id: gms-q1-3
+    category: games
+    thread: time
+    complexity: conceptual
+    text: "Chess endgames are mostly tablebase territory now — computers have solved them. Why does the middle game still resist that?"
+  - id: gms-q1-4
+    category: games
+    thread: beauty
+    complexity: pop_culture
+    text: "Tetris feels weirdly good and you're literally just sorting blocks. Flow state, dopamine timing, something else? What's actually doing the work?"
+  # ─── Travel / Geography ────────────────────────────────────────────
+  - id: trv-q1-1
+    category: travel
+    thread: connection
+    complexity: casual
+    text: "A place feels familiar the second time even if you only spent two days there years ago. What's the brain actually doing with that thin slice of input?"
+  - id: trv-q1-2
+    category: travel
+    thread: memory
+    complexity: practical
+    text: "When you travel, why are food memories the most vivid ones years later? More than the views, more than the museums?"
+  - id: trv-q1-3
+    category: travel
+    thread: growth
+    complexity: conceptual
+    text: "Some cities feel like they're 'still themselves' after a hundred years and some feel completely transformed. What makes a place keep its character vs lose it?"
+  - id: trv-q1-4
+    category: travel
+    thread: time
+    complexity: deep
+    text: "People cry at airports and it's not really about the leaving — something about that specific liminal space catches people off guard. What is it about that space?"
+  # ─── Art (Visual) ──────────────────────────────────────────────────
+  - id: art-q1-1
+    category: art
+    thread: pattern
+    complexity: practical
+    text: "Most photographers say the rule of thirds 'works' even though most great photos break it. What's actually doing the work — the rule, or the deviation from it?"
+  - id: art-q1-2
+    category: art
+    thread: beauty
+    complexity: conceptual
+    text: "Some paintings make you stop walking past them in a museum. Most don't. There's no obvious pattern — what's actually pulling people in?"
+  - id: art-q1-3
+    category: art
+    thread: energy
+    complexity: deep
+    text: "Some Rothkos make people cry. They're just colored rectangles. What's actually happening when people respond to abstract art that strongly?"
+  - id: art-q1-4
+    category: art
+    thread: memory
+    complexity: pop_culture
+    text: "The shower scene in Psycho still scares people even though they've seen it parodied a hundred times before ever seeing the original. How does that work?"
+  # ─── Subversion (priority_category — always sampled) ───────────────
+  - id: sub-q1-1
+    category: subversion
+    thread: pattern
+    complexity: casual
+    text: "What has keys but can't open locks, space but no rooms, and you can enter but can't go inside? Why does that riddle work where others fall flat?"
+  - id: sub-q1-2
+    category: subversion
+    thread: conflict
+    complexity: conceptual
+    text: "Counterintuitive thing about heat loss: most of it doesn't actually go through your head, despite what we tell kids. So why does putting on a hat warm you up so much?"
+  - id: sub-q1-3
+    category: subversion
+    thread: beauty
+    complexity: deep
+    text: "If you could remove all uncertainty from your decisions, would you actually be better off? Or does decision-making partly work because you can't predict the outcome?"
+  - id: sub-q1-4
+    category: subversion
+    thread: memory
+    complexity: pop_culture
+    text: "The Sixth Sense's twist still works on rewatch even though you know it's coming. Most twist movies don't survive the rewatch. What makes that one different?"
 # ────────────────────────────────────────────────────────────────────
 # Q2 LAYER — pending Q1 layer approval
 # ────────────────────────────────────────────────────────────────────
     thread: memory
     complexity: conceptual
     text: "When SpaceX lands a booster perfectly, it's incredible engineering but doesn't land the same way emotionally. What does Apollo have that the modern stuff doesn't?"
+  # ───────────────────────────────────────────────────────────────────
+  # Phase B Q2 additions — 7 new categories × 4 Q2s each = 28 new Q2s
+  # Each Q2 inherits its Q1's category and thread; complexity register
+  # shifts (typically harder than Q1) and follow-up shape varies across
+  # the standard patterns: deepen / contrast / specific-case / apply /
+  # stress-test.
+  # ───────────────────────────────────────────────────────────────────
+  # ─── Chemistry ─────────────────────────────────────────────────────
+  - id: chm-q2-1
+    parent: chm-q1-1
+    category: chemistry
+    thread: energy
+    complexity: deep
+    text: "If chemical bonds store energy and breaking them releases it, where did the energy come from in the first place — back through stars, supernovas, the Big Bang?"
+  - id: chm-q2-2
+    parent: chm-q1-2
+    category: chemistry
+    thread: connection
+    complexity: practical
+    text: "If ice didn't float because of those weird hydrogen bonds, would lakes survive winters? How dependent is life on that one quirk of water?"
+  - id: chm-q2-3
+    parent: chm-q1-3
+    category: chemistry
+    thread: growth
+    complexity: deep
+    text: "Lithium-ion batteries degrade no matter what — even sitting unused. Is that thermodynamically inevitable for any energy storage, or is it just current chemistry?"
+  - id: chm-q2-4
+    parent: chm-q1-4
+    category: chemistry
+    thread: pattern
+    complexity: conceptual
+    text: "Some reactions are so reliable chemists call them 'click chemistry' — Nobel Prize-level reliable. What pattern makes a reaction click together that cleanly when most reactions are messy?"
+  # ─── Language / Linguistics ────────────────────────────────────────
+  - id: lng-q2-1
+    parent: lng-q1-1
+    category: language
+    thread: pattern
+    complexity: conceptual
+    text: "If smiling is universal, what about laughter? Is the pattern as cross-cultural, or do different cultures laugh at totally different things?"
+  - id: lng-q2-2
+    parent: lng-q1-2
+    category: language
+    thread: memory
+    complexity: conceptual
+    text: "Adults learning a new language always sound 'foreign' — kids don't. What's actually happening at the brain level that locks in around puberty?"
+  - id: lng-q2-3
+    parent: lng-q1-3
+    category: language
+    thread: conflict
+    complexity: theoretical
+    text: "If language genuinely shapes thought, what experiment could actually prove it — beyond the 'people see colors differently if they have different color words' kind of stuff?"
+  - id: lng-q2-4
+    parent: lng-q1-4
+    category: language
+    thread: time
+    complexity: deep
+    text: "Some words from older slang are now formal English — 'cool,' 'dude,' even 'awesome.' What separates the slang that climbs into the language from the slang that dies in five years?"
+  # ─── Psychology ────────────────────────────────────────────────────
+  - id: psy-q2-1
+    parent: psy-q1-1
+    category: psychology
+    thread: memory
+    complexity: deep
+    text: "Working memory and long-term memory feel like different systems entirely. Are they actually different, or different views of the same underlying thing?"
+  - id: psy-q2-2
+    parent: psy-q1-2
+    category: psychology
+    thread: connection
+    complexity: practical
+    text: "If we DO start to mirror people we're around, what does that mean for who you become if you switch your friend group every five years?"
+  - id: psy-q2-3
+    parent: psy-q1-3
+    category: psychology
+    thread: conflict
+    complexity: pop_culture
+    text: "Sunk cost fallacy is basically dissonance in disguise — you'd quit a bad relationship if you'd just started, but two years in you can't. Does naming the bias actually help anyone escape it?"
+  - id: psy-q2-4
+    parent: psy-q1-4
+    category: psychology
+    thread: growth
+    complexity: conceptual
+    text: "If the Big Five doesn't shift much in adulthood, why do trauma and major life events seem to genuinely change people? Is it personality changing, or something else doing the work?"
+  # ─── Games / Strategy ──────────────────────────────────────────────
+  - id: gms-q2-1
+    parent: gms-q1-1
+    category: games
+    thread: pattern
+    complexity: conceptual
+    text: "Games like Minesweeper, Solitaire, Wordle — they all share something structural that makes them perfect 5-minute games. What's the actual recipe?"
+  - id: gms-q2-2
+    parent: gms-q1-2
+    category: games
+    thread: conflict
+    complexity: conceptual
+    text: "Online poker has no tells, no body language. Is the game actually different at high levels, or do the math players win in both formats?"
+  - id: gms-q2-3
+    parent: gms-q1-3
+    category: games
+    thread: time
+    complexity: deep
+    text: "If chess endgames are solved and openings are deeply theorized, will the middle game eventually be solved too? Or is there something fundamentally different about that part of the game?"
+  - id: gms-q2-4
+    parent: gms-q1-4
+    category: games
+    thread: beauty
+    complexity: conceptual
+    text: "There's a 'Tetris effect' where people see falling blocks when they close their eyes after long sessions. What does that say about what the game's actually doing to your brain?"
+  # ─── Travel / Geography ────────────────────────────────────────────
+  - id: trv-q2-1
+    parent: trv-q1-1
+    category: travel
+    thread: connection
+    complexity: conceptual
+    text: "Sometimes a place you've never been feels familiar — déjà vu for locations. What's the brain doing when somewhere genuinely new feels remembered?"
+  - id: trv-q2-2
+    parent: trv-q1-2
+    category: travel
+    thread: memory
+    complexity: deep
+    text: "If food memories are the strongest travel memories, does that mean the rest of travel is forgettable by comparison? Or is food just punching above its weight?"
+  - id: trv-q2-3
+    parent: trv-q1-3
+    category: travel
+    thread: growth
+    complexity: practical
+    text: "Tokyo and Rome both have 2000 years of history. One feels ancient at every corner; the other feels like a brand-new city wearing some old buildings. What's the actual difference?"
+  - id: trv-q2-4
+    parent: trv-q1-4
+    category: travel
+    thread: time
+    complexity: pop_culture
+    text: "Train stations don't have the same emotional weight airports do, even though they involve the same departures. Is it the speed, the distance, or something about flight specifically?"
+  # ─── Art (Visual) ──────────────────────────────────────────────────
+  - id: art-q2-1
+    parent: art-q1-1
+    category: art
+    thread: pattern
+    complexity: conceptual
+    text: "Composition rules feel arbitrary until you violate them and the image breaks. Is there an underlying principle the rules are pointing at, or did we just train ourselves to expect them?"
+  - id: art-q2-2
+    parent: art-q1-2
+    category: art
+    thread: beauty
+    complexity: deep
+    text: "If great art has some deep structural property that makes it 'work,' you'd think we could engineer it. We can't, reliably. Does that mean the property doesn't exist, or that we just can't see it directly?"
+  - id: art-q2-3
+    parent: art-q1-3
+    category: art
+    thread: energy
+    complexity: theoretical
+    text: "Aesthetic experience produces measurable physiological responses — heart rate, pupil dilation, skin conductance. Does that mean we could in principle measure 'how much' a piece of art affects someone?"
+  - id: art-q2-4
+    parent: art-q1-4
+    category: art
+    thread: memory
+    complexity: conceptual
+    text: "If parody-first exposure doesn't immunize you from the original's impact, what does that say about how cinematic memory actually works?"
+  # ─── Subversion ────────────────────────────────────────────────────
+  - id: sub-q2-1
+    parent: sub-q1-1
+    category: subversion
+    thread: pattern
+    complexity: conceptual
+    text: "Riddles that work usually have one specific structural move — misdirection toward an obvious wrong answer. Are there other structural moves that produce the same satisfaction?"
+  - id: sub-q2-2
+    parent: sub-q1-2
+    category: subversion
+    thread: conflict
+    complexity: practical
+    text: "When you find out a 'fact' you've believed forever is wrong, some people update easily and others double down. What's actually different between those two reactions?"
+  - id: sub-q2-3
+    parent: sub-q1-3
+    category: subversion
+    thread: beauty
+    complexity: pop_culture
+    text: "Stories that subvert your expectations feel beautiful when they work — but the same trick used cynically feels gimmicky. What's the actual line between earned subversion and a cheap twist?"
+  - id: sub-q2-4
+    parent: sub-q1-4
+    category: subversion
+    thread: memory
+    complexity: deep
+    text: "If a twist's power survives knowing it, what's actually being subverted on rewatch? Not the surprise — something else. What?"
+# ──────────────────────────���─────────────────────────────────────────
+# Q3 LAYER — third-tier follow-ups, 17 categories × 4 chains = 68 Q3s
+# ────────────────────────────────────────────────────────────────────
+#
+# Each Q3 inherits its chain's category and thread. Q3 follow-up shapes
+# vary across the standard patterns: Apply (given Q1+Q2, would X work?),
+# Stress test (where does this break down?), Connect (how does this
+# relate to other-domain idea?), Personal (what does this mean for me?),
+# Synthesize (combining Q1+Q2, what's the takeaway?), Open question
+# (what should we be asking that we aren't?).
+#
+# Q3 turns are positions 17-24 in the 24-turn run sample (turns 0-7 are
+# Q1, 8-15 are Q2, 16-23 are Q3). Q3-pulls-Q1 in pith tests substrate
+# recall across a 16+-turn gap — a more demanding test than Phase A's
+# 8-turn Q2-pulls-Q1 measurement.
+q3_layer:
+  # ─── Biology ───────────────────────────────────────────────────────
+  - id: bio-q3-1
+    parent: bio-q1-1
+    category: biology
+    thread: growth
+    complexity: pop_culture
+    text: "Bats are tiny but live 30+ years — completely breaks the lifespan-vs-body-size rule that holds for most mammals. What did bats find that everyone else missed?"
+  - id: bio-q3-2
+    parent: bio-q1-2
+    category: biology
+    thread: memory
+    complexity: deep
+    text: "If memories are stored in synaptic strengths, what happens to memories when individual neurons die? Are they re-stored elsewhere automatically, or do you lose pieces?"
+  - id: bio-q3-3
+    parent: bio-q1-3
+    category: biology
+    thread: connection
+    complexity: pop_culture
+    text: "Cancer evades the immune system by hijacking the same self/non-self signals it normally responds to — like learning to whisper your password back. How close is that analogy to what's actually happening?"
+  - id: bio-q3-4
+    parent: bio-q1-4
+    category: biology
+    thread: energy
+    complexity: practical
+    text: "When I hit a wall on hard cardio and feel like I can't push through, is the mitochondrial limit actually being reached, or is the brain shutting things down protectively before then?"
+  # ─── Physics ───────────────────────────────────────────────────────
+  - id: phy-q3-1
+    parent: phy-q1-1
+    category: physics
+    thread: time
+    complexity: pop_culture
+    text: "If entropy gives time its arrow, what happens to the arrow inside a black hole — where the entropy is supposedly already maxed out?"
+  - id: phy-q3-2
+    parent: phy-q1-2
+    category: physics
+    thread: beauty
+    complexity: practical
+    text: "Some physicists say a theory's mathematical beauty predicts its truth. Other physicists say that's exactly how you fool yourself for a generation. Which side has more recent track record?"
+  - id: phy-q3-3
+    parent: phy-q1-3
+    category: physics
+    thread: energy
+    complexity: deep
+    text: "Hawking radiation lets black holes evaporate over astronomical timescales. If you're falling in right before that happens, do you experience the evaporation as it occurs, or does time work differently for you?"
+  - id: phy-q3-4
+    parent: phy-q1-4
+    category: physics
+    thread: pattern
+    complexity: pop_culture
+    text: "If sci-fi gets entanglement wrong, are there other quantum mechanics phenomena it actually gets RIGHT? Is there any film or book that nails the physics?"
+  # ─── Computing ─────────────────────────────────────────────────────
+  - id: cmp-q3-1
+    parent: cmp-q1-1
+    category: computing
+    thread: pattern
+    complexity: deep
+    text: "If 'standard pattern' instincts came from old text editors into modern game UIs, are there modern interfaces stuck with bad patterns just because everyone copied them once?"
+  - id: cmp-q3-2
+    parent: cmp-q1-2
+    category: computing
+    thread: memory
+    complexity: theoretical
+    text: "Cache hierarchies are entirely about latency-vs-capacity tradeoffs. If we had infinite-bandwidth memory at any distance, would the hierarchy collapse, or are there other reasons we'd still need it?"
+  - id: cmp-q3-3
+    parent: cmp-q1-3
+    category: computing
+    thread: beauty
+    complexity: pop_culture
+    text: "Knuth famously wrote 'beware of bugs in the above code, I have only proved it correct, not tried it.' Is there beauty in code that's elegantly wrong? Or is beauty inseparable from correctness?"
+  - id: cmp-q3-4
+    parent: cmp-q1-4
+    category: computing
+    thread: connection
+    complexity: practical
+    text: "If graph databases are great for highly-connected data, why hasn't social media moved to them entirely? What's keeping the major platforms on relational?"
+  # ─── Math ──────────────────────────────────────────────────────────
+  - id: mth-q3-1
+    parent: mth-q1-1
+    category: math
+    thread: pattern
+    complexity: conceptual
+    text: "Probability paradoxes — Monty Hall, the birthday problem, the boy-girl paradox — share a structural feature. Is there a unifying way to see why intuition fails on all of them at once?"
+  - id: mth-q3-2
+    parent: mth-q1-2
+    category: math
+    thread: growth
+    complexity: theoretical
+    text: "Compound growth feels infinite when graphed but real-world growth always hits limits. What does the math actually say about when exponential turns into logistic?"
+  - id: mth-q3-3
+    parent: mth-q1-3
+    category: math
+    thread: conflict
+    complexity: deep
+    text: "0.999... = 1 mostly stops bothering people once they've worked through the limit definition. Are there other math 'truths' where understanding the rigor never makes them feel intuitive?"
+  - id: mth-q3-4
+    parent: mth-q1-4
+    category: math
+    thread: beauty
+    complexity: pop_culture
+    text: "If the Riemann hypothesis turns out to be false, would that change what 'beautiful proof' means? Or would the proof of its falsity itself be considered beautiful?"
+  # ─── Philosophy ────────────────────────────────────────────────────
+  - id: phi-q3-1
+    parent: phi-q1-1
+    category: philosophy
+    thread: conflict
+    complexity: pop_culture
+    text: "Most people who claim 'lying is always wrong' would still lie to a Nazi at the door asking about hidden refugees. Does that mean their stated belief is wrong, or that their stated belief is actually a useful default with exceptions?"
+  - id: phi-q3-2
+    parent: phi-q1-2
+    category: philosophy
+    thread: time
+    complexity: deep
+    text: "Compatibilism saves free will by redefining it. Is that intellectually satisfying, or does it feel like changing the goalposts? When have other philosophical positions done the same trick well?"
+  - id: phi-q3-3
+    parent: phi-q1-3
+    category: philosophy
+    thread: energy
+    complexity: theoretical
+    text: "If consciousness emerges from organized energy patterns, could you in principle build a conscious being from a different substrate — quantum fields, magnetic patterns, anything besides neurons?"
+  - id: phi-q3-4
+    parent: phi-q1-4
+    category: philosophy
+    thread: connection
+    complexity: deep
+    text: "If the Cave's prisoners get out and see the sun, would they just be in a bigger cave with a different unseen shadow-source? At what point are you actually 'out'?"
+  # ─── Film / TV ─────────────────────────────────────────────────────
+  - id: flm-q3-1
+    parent: flm-q1-1
+    category: film_tv
+    thread: memory
+    complexity: practical
+    text: "If Ratatouille's meal scene works because it triggers food-memory in viewers, would the same scene with a culture's food you've never tasted hit as hard, or is the cultural specificity load-bearing?"
+  - id: flm-q3-2
+    parent: flm-q1-2
+    category: film_tv
+    thread: beauty
+    complexity: theoretical
+    text: "Three-act structure lives because it works on the brain at a level we don't articulate. Are there other narrative structures from other cultures (kishōtenketsu, dastan) that do the same thing differently?"
+  - id: flm-q3-3
+    parent: flm-q1-3
+    category: film_tv
+    thread: growth
+    complexity: pop_culture
+    text: "Walter White's slide is recognizable as how moral collapse works. Are there real historical figures whose decline mirrors his arc closely enough that watching the show feels like history?"
+  - id: flm-q3-4
+    parent: flm-q1-4
+    category: film_tv
+    thread: conflict
+    complexity: conceptual
+    text: "Breaking Bad and The Sopranos and Mad Men all share the 'man getting worse over time' arc. What changed in storytelling around 2000 that made that arc dominant?"
+  # ─── Relationships ─────────────────────────────────────────────────
+  - id: rel-q3-1
+    parent: rel-q1-1
+    category: relationships
+    thread: time
+    complexity: pop_culture
+    text: "Some couples have years of comfortable silence. Some never get there. Is comfortable silence learned, earned, or is it a personality match thing that exists from the start?"
+  - id: rel-q3-2
+    parent: rel-q1-2
+    category: relationships
+    thread: connection
+    complexity: practical
+    text: "Long-friendships-survive-divergent-paths makes sense for childhood friends. Does it work the same for friendships you make in your 30s, or are those structurally different?"
+  - id: rel-q3-3
+    parent: rel-q1-3
+    category: relationships
+    thread: conflict
+    complexity: theoretical
+    text: "If apologizing right is genuinely hard, are there cultures or institutions that teach the skill explicitly? What does that pedagogy look like?"
+  - id: rel-q3-4
+    parent: rel-q1-4
+    category: relationships
+    thread: memory
+    complexity: pop_culture
+    text: "Shared memory bonds people. But two people often remember the same event differently. Does the act of telling and retelling together generate a 'shared version' that becomes more bonding than the original event?"
+  # ─── Music ─────────────────────────────────────────────────────────
+  - id: mus-q3-1
+    parent: mus-q1-1
+    category: music
+    thread: pattern
+    complexity: conceptual
+    text: "If teen-years music hits different because of brain wiring, what about people who didn't have access to music as teens — late refugees, people in restricted environments? What's THEIR equivalent attachment?"
+  - id: mus-q3-2
+    parent: mus-q1-2
+    category: music
+    thread: energy
+    complexity: theoretical
+    text: "If high-energy songs can be engineered, can low-energy songs be engineered the same way? Why does ambient music feel like it resists that kind of analysis?"
+  - id: mus-q3-3
+    parent: mus-q1-3
+    category: music
+    thread: time
+    complexity: conceptual
+    text: "Some odd time signatures (5/4, 7/8) feel jarring; others feel natural after a few listens. What makes 'Take Five' or 'Money' feel resolved despite the unusual count?"
+  - id: mus-q3-4
+    parent: mus-q1-4
+    category: music
+    thread: conflict
+    complexity: practical
+    text: "If genre-collision works in Bohemian Rhapsody, when does it fail catastrophically? What's a clear example of a song trying the same trick and feeling like a medley instead?"
+  # ─── Food / Cooking ────────────────────────────────────────────────
+  - id: fud-q3-1
+    parent: fud-q1-1
+    category: food
+    thread: growth
+    complexity: deep
+    text: "If long ferments give bread depth that fast methods can't match, why isn't slow-fermented bread the default in every bakery? What's the actual tradeoff?"
+  - id: fud-q3-2
+    parent: fud-q1-2
+    category: food
+    thread: energy
+    complexity: theoretical
+    text: "If energy drinks and coffee hit differently despite same caffeine, what does that say about caffeine itself? Is 'caffeine' actually multiple things grouped under one name?"
+  - id: fud-q3-3
+    parent: fud-q1-3
+    category: food
+    thread: beauty
+    complexity: pop_culture
+    text: "If plating changes taste, can it change taste in negative directions too? Could you ruin an excellent dish by plating it badly enough?"
+  - id: fud-q3-4
+    parent: fud-q1-4
+    category: food
+    thread: pattern
+    complexity: practical
+    text: "Maillard reaction needs ~140°C. Why don't we get great Maillard at home — is it temperature, technique, or pan thermal mass?"
+  # ─── History ───────────────────────────────────────────────────────
+  - id: his-q3-1
+    parent: his-q1-1
+    category: history
+    thread: growth
+    complexity: conceptual
+    text: "If Britain's Industrial Revolution required specific local conditions, are there countries today with similar setups that haven't industrialized fully? What's missing?"
+  - id: his-q3-2
+    parent: his-q1-2
+    category: history
+    thread: connection
+    complexity: theoretical
+    text: "If the Silk Road's real value was idea/religion transmission, are there modern equivalents — networks where the trade is cover and the real exchange is information?"
+  - id: his-q3-3
+    parent: his-q1-3
+    category: history
+    thread: time
+    complexity: practical
+    text: "If Roman patterns recur in democracies, what specifically should US-watchers be looking at right now? Or is 'pattern recognition' across centuries always cherry-picked retroactively?"
+  - id: his-q3-4
+    parent: his-q1-4
+    category: history
+    thread: memory
+    complexity: deep
+    text: "Apollo's emotional weight came partly from being live, communal, uncertain. Is the modern parallel watching SpaceX online — or is it impossible to recreate that weight when engineering improvements make things more reliable?"
+  # ─── Chemistry ─────────────────────────────────────────────────────
+  - id: chm-q3-1
+    parent: chm-q1-1
+    category: chemistry
+    thread: energy
+    complexity: pop_culture
+    text: "Photosynthesis stores sun energy in chemical bonds. Burning fossil fuels releases that energy back. If we capture and re-store it through industrial processes, are we functionally just doing photosynthesis at industrial speed?"
+  - id: chm-q3-2
+    parent: chm-q1-2
+    category: chemistry
+    thread: connection
+    complexity: deep
+    text: "Hydrogen bonds give water its anomalies. Are there other 'hydrogen-bond-equivalent' weak interactions that give other molecules unexpected behavior? What molecule has the most surprising emergent property from a small detail?"
+  - id: chm-q3-3
+    parent: chm-q1-3
+    category: chemistry
+    thread: growth
+    complexity: pop_culture
+    text: "Battery degradation is partly mechanical (electrode cracking) and partly chemical (SEI layer growth). Solid-state batteries solve some of that. Where does the next bottleneck show up?"
+  - id: chm-q3-4
+    parent: chm-q1-4
+    category: chemistry
+    thread: pattern
+    complexity: deep
+    text: "If click chemistry's reliability comes from specific structural features, can the principle be extended — could there be 'click biology' for protein engineering?"
+  # ─── Language / Linguistics ────────────────────────────────────────
+  - id: lng-q3-1
+    parent: lng-q1-1
+    category: language
+    thread: pattern
+    complexity: theoretical
+    text: "If smiles and laughter are universal but humor isn't (a joke that crosses cultures fine often falls flat in another), what's actually transferable about emotional expression vs not?"
+  - id: lng-q3-2
+    parent: lng-q1-2
+    category: language
+    thread: memory
+    complexity: pop_culture
+    text: "Adult language learning produces an accent because the auditory cortex stops re-tuning around puberty. Is that lock-in for ALL phonemes, or just the phonemes you weren't exposed to early enough?"
+  - id: lng-q3-3
+    parent: lng-q1-3
+    category: language
+    thread: conflict
+    complexity: pop_culture
+    text: "If Sapir-Whorf is hard to test rigorously, what would convince a hardcore universalist that language DOES shape thought? Or is the position effectively unfalsifiable?"
+  - id: lng-q3-4
+    parent: lng-q1-4
+    category: language
+    thread: time
+    complexity: conceptual
+    text: "Some slang dies because the social group dies; some lives because it filled a real gap in the language. 'Cool' filled a gap. What real gap does 'mid' fill, and will it stay?"
+  # ─── Psychology ────────────────────────────────────────────────────
+  - id: psy-q3-1
+    parent: psy-q1-1
+    category: psychology
+    thread: memory
+    complexity: theoretical
+    text: "If working and long-term memory are different views of the same system, what's actually happening in 'memory consolidation' during sleep? Does sleep just reduce interference, or is something more specific going on?"
+  - id: psy-q3-2
+    parent: psy-q1-2
+    category: psychology
+    thread: connection
+    complexity: pop_culture
+    text: "If we mirror people we're around, what about people we DON'T like? Do we anti-mirror them — adopt opposite traits to differentiate? Is that even a thing?"
+  - id: psy-q3-3
+    parent: psy-q1-3
+    category: psychology
+    thread: conflict
+    complexity: conceptual
+    text: "Cognitive dissonance handles a single inconsistency. What about when you hold dozens of inconsistent beliefs at once — does the brain dissonance-resolve all of them simultaneously, or just whichever one becomes salient?"
+  - id: psy-q3-4
+    parent: psy-q1-4
+    category: psychology
+    thread: growth
+    complexity: deep
+    text: "If personality is mostly stable but experiences can change behavior dramatically, where does that leave the question of 'who you really are'? Is 'real self' a useful concept or a confused one?"
+  # ─── Games / Strategy ──────────────────────────────────────────────
+  - id: gms-q3-1
+    parent: gms-q1-1
+    category: games
+    thread: pattern
+    complexity: deep
+    text: "If 5-minute-game design has a recipe, why have so many flagship mobile games failed at it? Is the recipe necessary-but-not-sufficient?"
+  - id: gms-q3-2
+    parent: gms-q1-2
+    category: games
+    thread: conflict
+    complexity: theoretical
+    text: "If high-level online poker is essentially solved at the math level, what does 'getting better' even mean for top pros now? What's the next skill ceiling?"
+  - id: gms-q3-3
+    parent: gms-q1-3
+    category: games
+    thread: time
+    complexity: pop_culture
+    text: "Computers solved chess endgames by brute force. Is the middle game resistant because it has more positions, or because evaluation in the middle game requires something other than position-counting?"
+  - id: gms-q3-4
+    parent: gms-q1-4
+    category: games
+    thread: beauty
+    complexity: deep
+    text: "The Tetris effect (seeing falling blocks after long play sessions) shows up in other games too. What's it indicating about how the brain is restructuring during play, and is that restructuring useful elsewhere?"
+  # ─── Travel / Geography ────────────────────────────────────────────
+  - id: trv-q3-1
+    parent: trv-q1-1
+    category: travel
+    thread: connection
+    complexity: deep
+    text: "If a place feels familiar from a brief past visit, what about places that feel familiar from books or movies you've never visited? Is that a different mechanism or the same?"
+  - id: trv-q3-2
+    parent: trv-q1-2
+    category: travel
+    thread: memory
+    complexity: theoretical
+    text: "If food travel-memories are vivid, what about smell memories from a place? Walking into a city and recognizing its smell years later — is that as durable as food memory or more fragile?"
+  - id: trv-q3-3
+    parent: trv-q1-3
+    category: travel
+    thread: growth
+    complexity: pop_culture
+    text: "Tokyo and Rome show the difference. Apply the question: which modern city today is going to feel like a 'still itself' city in 100 years, and which will feel transformed beyond recognition?"
+  - id: trv-q3-4
+    parent: trv-q1-4
+    category: travel
+    thread: time
+    complexity: theoretical
+    text: "If airport-crying is partly about flight specifically — speed, distance, irreversibility — what's the equivalent for an interstellar departure? Would astronauts on a generation ship cry differently than people on a flight?"
+  # ─── Art (Visual) ──────────────────────────────────────────────────
+  - id: art-q3-1
+    parent: art-q1-1
+    category: art
+    thread: pattern
+    complexity: theoretical
+    text: "Composition rules might be inherited from natural-vision statistics — humans evolved to find certain spatial arrangements meaningful. Does that mean an AI trained on different visual statistics would produce alien art that humans literally couldn't see as art?"
+  - id: art-q3-2
+    parent: art-q1-2
+    category: art
+    thread: beauty
+    complexity: pop_culture
+    text: "If great art's structural property exists but eludes engineering, is that because it's emergent — only visible after the fact — or because it's actually socially/culturally constructed and there IS no underlying property?"
+  - id: art-q3-3
+    parent: art-q1-3
+    category: art
+    thread: energy
+    complexity: practical
+    text: "If we measure aesthetic response physiologically, could we discover that people who say they love a piece actually have weaker physiological response than people who say they don't? What would that mean about 'taste'?"
+  - id: art-q3-4
+    parent: art-q1-4
+    category: art
+    thread: memory
+    complexity: deep
+    text: "If parody-first exposure doesn't immunize you from the original's impact, what's the threshold for actual immunization? Would seeing it 50 times before the original do it? 500?"
+  # ─── Subversion ────────────────────────────────────────────────────
+  - id: sub-q3-1
+    parent: sub-q1-1
+    category: subversion
+    thread: pattern
+    complexity: theoretical
+    text: "Beyond misdirection, are there riddle structures based on changing a word's meaning mid-sentence, or on requiring impossible knowledge that turns out trivial? What other 'riddle types' actually exist?"
+  - id: sub-q3-2
+    parent: sub-q1-2
+    category: subversion
+    thread: conflict
+    complexity: deep
+    text: "If updating-vs-doubling-down is really about identity protection (the belief is part of who you think you are), are there real techniques for separating beliefs from identity? Or is that fighting human nature?"
+  - id: sub-q3-3
+    parent: sub-q1-3
+    category: subversion
+    thread: beauty
+    complexity: conceptual
+    text: "If earned subversion has a structural prerequisite, what is it? The story has to set up its own subversion fairly — the camera shot that shows the twist but you didn't see it the first time."
+  - id: sub-q3-4
+    parent: sub-q1-4
+    category: subversion
+    thread: memory
+    complexity: practical
+    text: "On rewatch, you notice the actor's choices that signal the truth without revealing it. Is the rewatch experience BETTER than the first watch in some way? Most twist movies aren't — what makes The Sixth Sense different?"