Spaces:

juiceb0xc0de
/

forge

Sleeping

App Files Files Community

forge / frontend /data.js

juiceb0xc0de's picture

Forge: initial deploy — API + static UI over prebuilt forge.db (seeded at build)

00cabba about 1 month ago

History Blame Contribute Delete

26.4 kB

	// Forge — mock graph + resolve engine
	// Matches the FastAPI shapes in the handoff doc, so swap to fetch() later.

	window.FORGE_NODES = [
	// optimizers
	{ canonical: "adamw", name: "AdamW", type: "optimizer", description: "Workhorse decoupled-weight-decay optimizer. Safe default.", tier: 1 },
	{ canonical: "adamw_8bit", name: "AdamW 8-bit", type: "optimizer", description: "bitsandbytes 8-bit AdamW. Saves VRAM. Default param groups can fight per-layer LR tricks.", tier: 1 },
	{ canonical: "paged_adamw", name: "Paged AdamW", type: "optimizer", description: "CPU-paged optimizer states. For when you really can't fit.", tier: 2 },
	{ canonical: "lion", name: "Lion", type: "optimizer", description: "Sign-momentum optimizer. Lower memory than AdamW; needs lower LR.", tier: 2 },
	{ canonical: "muon", name: "Muon", type: "optimizer", description: "Newton–Schulz orthogonalized momentum. Faster convergence on hidden weights.", tier: 2 },
	{ canonical: "sophia", name: "Sophia-G", type: "optimizer", description: "Hessian-informed second-order. Promising for LLMs.", tier: 3 },
	{ canonical: "adafactor", name: "Adafactor", type: "optimizer", description: "Memory-light. Tricky LR schedule.", tier: 2 },

	// schedulers
	{ canonical: "cosine", name: "Cosine", type: "scheduler", description: "Cosine decay with warmup. Boring; works.", tier: 1 },
	{ canonical: "onecycle", name: "OneCycle", type: "scheduler", description: "Aggressive warm-then-anneal. Faster but can overshoot on long runs.", tier: 2 },
	{ canonical: "dlrs", name: "DLRS", type: "scheduler", description: "Rick's dynamic LR scheduler. #1 on val_loss in the bench.", tier: 1 },
	{ canonical: "linear", name: "Linear", type: "scheduler", description: "Linear warmup → linear decay.", tier: 1 },
	{ canonical: "wsd", name: "WSD", type: "scheduler", description: "Warmup–Stable–Decay. Continual-pretrain friendly.", tier: 2 },
	{ canonical: "constant", name: "Constant", type: "scheduler", description: "Flat. Combine with manual restarts.", tier: 1 },

	// techniques
	{ canonical: "qlora", name: "QLoRA", type: "technique", description: "4-bit base + LoRA adapters. Lets a 70B fit on one card.", tier: 1 },
	{ canonical: "lora", name: "LoRA", type: "technique", description: "Low-rank adapters. Cheap, composable, the default PEFT.", tier: 1 },
	{ canonical: "fft", name: "Full Fine-Tune", type: "technique", description: "Update every parameter. Hungry. Mutually exclusive with adapter methods.", tier: 1 },
	{ canonical: "grad_ckpt", name: "Grad Checkpoint", type: "technique", description: "Trade FLOPs for VRAM. Must be off for vLLM gen during training.", tier: 1 },
	{ canonical: "per_layer_lr_rotation", name: "Per-Layer LR Rotation", type: "technique", description: "Rick's trick: rotate LR across layer bands per step. Needs custom param groups.", tier: 1 },
	{ canonical: "chaos_inject", name: "Chaos Injectors", type: "technique", description: "Activation perturbation at hidden layers. NaN-prone without staged melt-in.", tier: 1 },
	{ canonical: "jacobian_reg", name: "Jacobian Reg", type: "technique", description: "Smoothness penalty via Jacobian. Forward pass corrupts the injector cache.", tier: 1 },
	{ canonical: "fsdp", name: "FSDP", type: "technique", description: "Fully-Sharded Data Parallel. Sharding for big models.", tier: 1 },
	{ canonical: "ddp", name: "DDP", type: "technique", description: "Vanilla data-parallel. Cheap when the model fits.", tier: 1 },
	{ canonical: "deepspeed_z3", name: "DeepSpeed ZeRO-3", type: "technique", description: "ZeRO stage 3 partitioning. Battle-tested.", tier: 1 },
	{ canonical: "unsloth", name: "Unsloth", type: "technique", description: "Fused kernels for LoRA/QLoRA. PyTorch-only; tight coupling to bnb.", tier: 2 },

	// quantization
	{ canonical: "bnb_4bit", name: "bnb 4-bit", type: "quantization", description: "bitsandbytes NF4. The QLoRA base.", tier: 1 },
	{ canonical: "bnb_8bit", name: "bnb 8-bit", type: "quantization", description: "LLM.int8(). Inference-leaning; training works.", tier: 1 },
	{ canonical: "gptq", name: "GPTQ", type: "quantization", description: "Post-training quant. Inference-only for our purposes.", tier: 2 },
	{ canonical: "awq", name: "AWQ", type: "quantization", description: "Activation-aware weight quant. Inference-time.", tier: 2 },

	// architectures
	{ canonical: "llama3", name: "Llama-3", type: "architecture", description: "Llama-3 8B / 70B family.", tier: 1 },
	{ canonical: "mistral", name: "Mistral", type: "architecture", description: "Mistral / Mixtral.", tier: 1 },
	{ canonical: "qwen2", name: "Qwen-2.5", type: "architecture", description: "Strong open multilingual base.", tier: 1 },
	{ canonical: "distilbert", name: "DistilBERT", type: "architecture", description: "The bench model. SST-2 sandbox.", tier: 1 },

	// inference
	{ canonical: "vllm", name: "vLLM", type: "inference", description: "PagedAttention server. Needs grad-ckpt off during in-train generation.", tier: 1 },
	{ canonical: "sglang", name: "SGLang", type: "inference", description: "Structured-gen server.", tier: 2 },
	];

	// edges. relations: REQUIRES, CONDITIONAL, COMPATIBLE, DEGRADES, BREAKS
	// Direction: from → to. For REQUIRES: "from needs to to work".
	// For BREAKS: symmetric in display but stored once.
	window.FORGE_EDGES = [
	// --- T1 verified seed (from the backend doc) -----------------------------
	{ from: "per_layer_lr_rotation", to: "adamw_8bit", relation: "BREAKS", tier: 1, confidence: 1.0,
	fix: "Pass custom optimizer_grouped_parameters — adamw_8bit's default decay/no-decay split overrides your per-layer LR bands.",
	evidence: [{ url: "mempalace://chaos-injection-trainer-notes", quote: "adamw_8bit produces decay/no-decay param groups by default; per-layer LR rotation silently no-ops unless you pass optimizer_grouped_parameters yourself.", source_type: "practitioner_run", tier: 1 }] },

	{ from: "jacobian_reg", to: "chaos_inject", relation: "BREAKS", tier: 1, confidence: 1.0,
	fix: "Jacobian-reg's extra forward pass corrupts the injector activation cache → ortho_loss is poisoned. Disable one.",
	evidence: [{ url: "mempalace://chaos-injection-trainer-notes", quote: "the jacobian reg forward overwrites the activation cache that chaos_inject samples from; ortho_loss explodes.", source_type: "practitioner_run", tier: 1 }] },

	{ from: "chaos_inject", to: "staged_meltin", relation: "REQUIRES", tier: 1, confidence: 1.0,
	fix: "Stage injectors in over ~100 steps; NaN at layer ~6 if injected from step 0.",
	evidence: [{ url: "mempalace://chaos-injection-trainer-notes", quote: "NaN at injector layer ~6 if run from step 0; staged melt-in over 100 steps fixed it.", source_type: "practitioner_run", tier: 1 }] },

	{ from: "grad_ckpt", to: "vllm", relation: "BREAKS", tier: 1, confidence: 1.0,
	fix: "Disable gradient_checkpointing for the in-train vLLM gen pass. TRL-documented.",
	evidence: [{ url: "https://huggingface.co/docs/trl", quote: "gradient_checkpointing must be disabled when generating with vLLM during training.", source_type: "official_docs", tier: 1 }] },

	// --- core technique mutex / requires -------------------------------------
	{ from: "qlora", to: "fft", relation: "BREAKS", tier: 1, confidence: 1.0,
	fix: "QLoRA freezes the base model; Full Fine-Tune updates it. Pick one.",
	evidence: [{ url: "https://arxiv.org/abs/2305.14314", quote: "QLoRA backprops gradients through a frozen 4-bit quantized model into low-rank adapters.", source_type: "paper", tier: 1 }] },

	{ from: "qlora", to: "bnb_4bit", relation: "REQUIRES", tier: 1, confidence: 1.0,
	fix: "QLoRA is defined as 4-bit NF4 base + LoRA adapters — load the base in bnb 4-bit.",
	evidence: [{ url: "https://huggingface.co/docs/peft", quote: "QLoRA fine-tunes a 4-bit quantized base model loaded via bitsandbytes NF4.", source_type: "official_docs", tier: 1 }] },

	{ from: "qlora", to: "lora", relation: "REQUIRES", tier: 1, confidence: 1.0,
	fix: "QLoRA = 4-bit base + LoRA adapters. The adapter rank is your hyperparameter.",
	evidence: [{ url: "https://arxiv.org/abs/2305.14314", quote: "QLoRA augments the frozen quantized model with Low-Rank Adapters.", source_type: "paper", tier: 1 }] },

	{ from: "lora", to: "fft", relation: "BREAKS", tier: 2, confidence: 0.85,
	fix: "Adapter-method and full fine-tune are mutually exclusive within one run.",
	evidence: [{ url: "https://huggingface.co/docs/peft", quote: "PEFT methods freeze the base; choose either full fine-tuning or a PEFT method per run.", source_type: "official_docs", tier: 2 }] },

	// --- optimizer ↔ quantization --------------------------------------------
	{ from: "muon", to: "adamw_8bit", relation: "BREAKS", tier: 2, confidence: 0.8,
	fix: "Muon owns the optimizer step for hidden weights; 8-bit AdamW state is incompatible with the Newton–Schulz update.",
	evidence: [{ url: "https://kellerjordan.github.io/posts/muon/", quote: "Muon replaces the optimizer update for 2D weights; use AdamW for the rest, not its 8-bit variant.", source_type: "blog", tier: 2 }] },

	{ from: "muon", to: "adamw", relation: "REQUIRES", tier: 2, confidence: 0.9,
	fix: "Muon only updates 2D hidden weights — embeddings + biases still need AdamW.",
	evidence: [{ url: "https://kellerjordan.github.io/posts/muon/", quote: "non-hidden parameters (embeddings, scalars) are handled by a standard AdamW.", source_type: "blog", tier: 2 }] },

	{ from: "lion", to: "bnb_8bit", relation: "DEGRADES", tier: 3, confidence: 0.5,
	fix: "Lion sign-update interacts poorly with 8-bit state quant — drop to 16-bit moments.",
	evidence: [{ url: "https://github.com/bitsandbytes-foundation/bitsandbytes/issues", quote: "single user report: Lion+8-bit moments diverged at step 4k on a 7B base.", source_type: "issue", tier: 3 }] },

	// --- unsloth / quant family ----------------------------------------------
	{ from: "unsloth", to: "bnb_4bit", relation: "REQUIRES", tier: 1, confidence: 1.0,
	fix: "Unsloth's fused kernels assume a bnb 4-bit base.",
	evidence: [{ url: "https://github.com/unslothai/unsloth", quote: "Unsloth supports 4-bit quantized models via bitsandbytes for QLoRA fine-tuning.", source_type: "official_docs", tier: 1 }] },

	{ from: "unsloth", to: "lora", relation: "REQUIRES", tier: 1, confidence: 1.0,
	fix: "Unsloth's fast path is the LoRA / QLoRA path.",
	evidence: [{ url: "https://github.com/unslothai/unsloth", quote: "Unsloth accelerates LoRA and QLoRA fine-tuning with custom Triton kernels.", source_type: "official_docs", tier: 1 }] },

	{ from: "unsloth", to: "fsdp", relation: "BREAKS", tier: 2, confidence: 0.8,
	fix: "Unsloth's custom kernels don't compose with FSDP sharding hooks today.",
	evidence: [{ url: "https://github.com/unslothai/unsloth/issues", quote: "FSDP is not currently supported alongside Unsloth's fused kernels.", source_type: "issue", tier: 2 }] },

	// --- inference quants (category errors) ----------------------------------
	{ from: "awq", to: "fft", relation: "BREAKS", tier: 2, confidence: 0.85,
	fix: "AWQ is an inference-time weight quant. You can't fine-tune through it.",
	evidence: [{ url: "https://github.com/casper-hansen/AutoAWQ", quote: "AWQ is intended for post-training quantization for inference.", source_type: "official_docs", tier: 2 }] },

	{ from: "gptq", to: "fft", relation: "BREAKS", tier: 2, confidence: 0.85,
	fix: "GPTQ is post-training quant — frozen base only.",
	evidence: [{ url: "https://arxiv.org/abs/2210.17323", quote: "GPTQ is a one-shot post-training quantization method.", source_type: "paper", tier: 2 }] },

	// --- distributed -----------------------------------------------------------
	{ from: "fsdp", to: "bnb_4bit", relation: "CONDITIONAL", tier: 2, confidence: 0.8, conditions: { plugin: "bnb-fsdp" },
	fix: "Works only with the bnb-FSDP plugin; vanilla FSDP shards over uninitialized 4-bit weights.",
	evidence: [{ url: "https://huggingface.co/docs/accelerate", quote: "FSDP + bitsandbytes 4-bit requires the bnb-fsdp wrap policy.", source_type: "official_docs", tier: 2 }] },

	{ from: "deepspeed_z3", to: "bnb_8bit", relation: "DEGRADES", tier: 3, confidence: 0.5,
	fix: "Reports of slowdown / hangs on multi-node Z3 + 8-bit. Use bf16 weights, 8-bit optimizer states only.",
	evidence: [{ url: "https://github.com/microsoft/DeepSpeed/issues", quote: "Z3 + 8-bit weights hang on the param-gather step in some configs.", source_type: "issue", tier: 3 }] },

	// --- scheduler benchmark relations ---------------------------------------
	{ from: "dlrs", to: "distilbert", relation: "COMPATIBLE", tier: 1, confidence: 1.0,
	fix: "DLRS #1 on the SST-2 bench: val_loss 0.2653, val_acc 0.890, steps_to_target 266.7 (n=3 seeds).",
	evidence: [{ url: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", quote: "DLRS leads on val_loss across 3 seeds on distilbert/sst2.", source_type: "benchmark", tier: 1 }] },

	{ from: "onecycle", to: "distilbert", relation: "DEGRADES", tier: 1, confidence: 1.0,
	fix: "OneCycle underperforms on the SST-2 bench: val_loss 0.4284 vs cohort cutoff 0.4022.",
	evidence: [{ url: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", quote: "OneCycle val_loss 0.4284 is above the cohort cutoff 0.4022.", source_type: "benchmark", tier: 1 }] },

	// --- compatible/positive edges (just so the graph isn't all conflict) ----
	{ from: "lora", to: "bnb_8bit", relation: "COMPATIBLE", tier: 2, confidence: 0.8, fix: "", evidence: [] },
	{ from: "lora", to: "bnb_4bit", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "grad_ckpt", to: "fsdp", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "grad_ckpt", to: "qlora",relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "cosine", to: "adamw", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "dlrs", to: "adamw", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "fsdp", to: "llama3", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "qlora", to: "llama3", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "qlora", to: "mistral", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "qlora", to: "qwen2", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "lora", to: "distilbert", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "vllm", to: "llama3", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "vllm", to: "mistral", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "deepspeed_z3", to: "fft", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "fsdp", to: "fft", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
	{ from: "muon", to: "llama3", relation: "COMPATIBLE", tier: 2, confidence: 0.8, fix: "", evidence: [] },
	];

	// add "staged melt-in" as a hidden technique reachable as a fix-target
	window.FORGE_NODES.push({
	canonical: "staged_meltin", name: "Staged Melt-In", type: "technique",
	description: "Linear ramp-in of chaos injectors over N steps. Prevents NaN at layer ~6.",
	tier: 1,
	});

	// ============================================================================
	// RESOLVE ENGINE
	// ============================================================================
	// For each non-selected node N, look at edges between N and any selected S:
	// BREAKS → blocked (red)
	// REQUIRES(S → N) → S needs N; mark N "needed by S" (amber/conditional)
	// REQUIRES(N → S) → N needs S, S is selected → that requirement is satisfied (ignore)
	// CONDITIONAL → conditional (amber) with conditions
	// DEGRADES → available + warning
	// COMPATIBLE → available
	// Plus: if N has any REQUIRES(N → X) where X is not selected and X is also not
	// universally optional, N is conditional ("needs X").

	window.forgeResolve = function (selectedCanons) {
	const sel = new Set(selectedCanons);
	const out = {};
	const nodeByCanon = Object.fromEntries(window.FORGE_NODES.map(n => [n.canonical, n]));
	const incoming = {};
	const outgoing = {};
	for (const e of window.FORGE_EDGES) {
	(outgoing[e.from] = outgoing[e.from] \|\| []).push(e);
	(incoming[e.to] = incoming[e.to] \|\| []).push(e);
	}

	for (const n of window.FORGE_NODES) {
	if (sel.has(n.canonical)) {
	out[n.canonical] = { status: "selected", reasons: [], warnings: [] };
	continue;
	}
	const reasons = [];
	const warnings = [];

	// edges adjacent to n
	const adj = [...(outgoing[n.canonical] \|\| []), ...(incoming[n.canonical] \|\| [])];

	for (const e of adj) {
	const otherCanon = e.from === n.canonical ? e.to : e.from;
	const direction = e.from === n.canonical ? "out" : "in"; // n -> other or other -> n
	const other = nodeByCanon[otherCanon];
	if (!other) continue;

	if (sel.has(otherCanon)) {
	if (e.relation === "BREAKS") {
	reasons.push({
	kind: "blocked",
	label: `breaks with ${other.name}`,
	fix: e.fix,
	evidence: e.evidence,
	tier: e.tier,
	confidence: e.confidence,
	otherCanon,
	});
	} else if (e.relation === "CONDITIONAL") {
	reasons.push({
	kind: "conditional",
	label: `conditional on ${other.name}`,
	fix: e.fix,
	conditions: e.conditions,
	evidence: e.evidence,
	tier: e.tier,
	otherCanon,
	});
	} else if (e.relation === "REQUIRES" && direction === "in") {
	// other -> n means "other requires n". other is selected, n is not.
	// n is a missing ingredient.
	reasons.push({
	kind: "needed-by",
	label: `needed by ${other.name}`,
	fix: e.fix,
	evidence: e.evidence,
	tier: e.tier,
	otherCanon,
	});
	} else if (e.relation === "DEGRADES") {
	warnings.push({
	label: `degrades with ${other.name}`,
	fix: e.fix,
	evidence: e.evidence,
	tier: e.tier,
	otherCanon,
	});
	}
	}
	}

	let status = "available";
	if (reasons.some(r => r.kind === "blocked")) status = "blocked";
	else if (reasons.some(r => r.kind === "conditional" \|\| r.kind === "needed-by")) status = "conditional";

	out[n.canonical] = { status, reasons, warnings };
	}

	return out;
	};

	// recipe payload
	window.forgeRecipe = function (selectedCanons) {
	const sel = new Set(selectedCanons);
	const nodeByCanon = Object.fromEntries(window.FORGE_NODES.map(n => [n.canonical, n]));
	const conflicts = [];
	const unmet = [];
	const warnings = [];

	for (const e of window.FORGE_EDGES) {
	const a = sel.has(e.from), b = sel.has(e.to);
	if (a && b) {
	if (e.relation === "BREAKS") {
	conflicts.push({ a: nodeByCanon[e.from].name, b: nodeByCanon[e.to].name, fix: e.fix, tier: e.tier, evidence: e.evidence });
	}
	if (e.relation === "DEGRADES") {
	warnings.push({ a: nodeByCanon[e.from].name, b: nodeByCanon[e.to].name, fix: e.fix, tier: e.tier, evidence: e.evidence });
	}
	} else if (a && !b && e.relation === "REQUIRES") {
	unmet.push({ from: nodeByCanon[e.from].name, missing: nodeByCanon[e.to].name, missingCanon: e.to, fix: e.fix, tier: e.tier, evidence: e.evidence });
	} else if (!a && b && e.relation === "REQUIRES") {
	// selected b but not a; the requires goes from a→b meaning a needs b. a not selected, so not unmet here.
	}
	}

	// scheduler benchmark cohort (mock real bench data)
	const scheduler_picks = [
	{ component: "DLRS", val_loss: 0.26528, val_acc: 0.89029, steps_to_target: 266.7, n_seeds: 3, source: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", canonical: "dlrs" },
	{ component: "Cosine", val_loss: 0.31417, val_acc: 0.87959, steps_to_target: 312.0, n_seeds: 3, source: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", canonical: "cosine" },
	{ component: "WSD", val_loss: 0.34081, val_acc: 0.86790, steps_to_target: 348.3, n_seeds: 3, source: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", canonical: "wsd" },
	];
	const scheduler_warnings = [
	{ component: "OneCycle", val_loss: 0.42844, cutoff: 0.4022, canonical: "onecycle" },
	];

	const valid = conflicts.length === 0 && unmet.length === 0;

	return { selected: selectedCanons, valid, conflicts, unmet, warnings, scheduler_picks, scheduler_warnings };
	};

	// ============================================================================
	// LIVE DISCOVERIES FEED (mock SSE)
	// ============================================================================
	window.FORGE_DISCOVERIES = [
	{ a: "Gradient Checkpointing", relation: "COMPATIBLE", b: "LoRA", tier: 2, source: "huggingface.co/docs/peft", ts_offset: -8 },
	{ a: "Muon", relation: "REQUIRES", b: "AdamW", tier: 2, source: "kellerjordan.github.io", ts_offset: -22 },
	{ a: "DeepSpeed ZeRO-3", relation: "DEGRADES", b: "bnb 8-bit", tier: 3, source: "github.com/microsoft/DeepSpeed", ts_offset: -41 },
	{ a: "QLoRA", relation: "COMPATIBLE", b: "Qwen-2.5", tier: 2, source: "qwenlm.github.io", ts_offset: -67 },
	{ a: "FSDP", relation: "CONDITIONAL",b: "bnb 4-bit", tier: 2, source: "huggingface.co/docs/accelerate", ts_offset: -94 },
	{ a: "Sophia-G", relation: "DEGRADES", b: "bnb 8-bit", tier: 3, source: "github.com/Liuhong99/Sophia", ts_offset: -128 },
	{ a: "WSD", relation: "COMPATIBLE", b: "Continual PT", tier: 2, source: "arxiv.org/abs/2404.06395", ts_offset: -171 },
	{ a: "Unsloth", relation: "BREAKS", b: "FSDP", tier: 2, source: "github.com/unslothai/unsloth", ts_offset: -210 },
	{ a: "AWQ", relation: "BREAKS", b: "Full Fine-Tune",tier: 2, source: "github.com/casper-hansen/AutoAWQ", ts_offset: -266 },
	{ a: "OneCycle", relation: "DEGRADES", b: "distilbert/sst2", tier: 1, source: "hf.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", ts_offset: -311 },
	];

	// new live discoveries that stream in over time (offsets in seconds from "now")
	window.FORGE_FUTURE_DISCOVERIES = [
	{ a: "Per-Layer LR Rotation", relation: "BREAKS", b: "AdamW 8-bit", tier: 1, source: "mempalace://rick-notes" },
	{ a: "Jacobian Reg", relation: "BREAKS", b: "Chaos Injectors", tier: 1, source: "mempalace://rick-notes" },
	{ a: "Lion", relation: "DEGRADES", b: "bnb 8-bit", tier: 3, source: "github.com/bitsandbytes-foundation/bitsandbytes" },
	{ a: "Paged AdamW", relation: "COMPATIBLE", b: "QLoRA", tier: 2, source: "huggingface.co/docs/transformers" },
	{ a: "DLRS", relation: "COMPATIBLE", b: "WSD", tier: 2, source: "hf.co/spaces/juiceb0xc0de/lr-scheduler-benchmark" },
	{ a: "Cosine", relation: "COMPATIBLE", b: "AdamW", tier: 1, source: "pytorch.org/docs" },
	{ a: "FlashAttention-3", relation: "COMPATIBLE", b: "Llama-3", tier: 2, source: "github.com/Dao-AILab/flash-attention" },
	{ a: "Liger Kernel", relation: "COMPATIBLE", b: "Unsloth", tier: 3, source: "github.com/linkedin/Liger-Kernel" },
	];

	// ============================================================================
	// LIVE API BOOTSTRAP — prefer the live Forge API; fall back to the bundled mock.
	// Default base is same-origin (the API serves this frontend in prod).
	// Split-port dev: ?api=http://localhost:8010
	// ============================================================================
	window.FORGE_API = (function () {
	const m = location.search.match(/[?&]api=([^&]+)/);
	return m ? decodeURIComponent(m[1]) : "";
	})();
	window.FORGE_LIVE = false;
	window.forgeBootstrap = async function () {
	const base = window.FORGE_API.replace(/\/$/, "");
	try {
	const g = await (await fetch(base + "/graph", { cache: "no-store" })).json();
	if (g && Array.isArray(g.nodes) && g.nodes.length && Array.isArray(g.edges)) {
	const tierByNode = {};
	for (const e of g.edges) {
	for (const c of [e.from_canon, e.to_canon]) {
	tierByNode[c] = Math.min(tierByNode[c] == null ? 9 : tierByNode[c], e.tier \|\| 2);
	}
	}
	window.FORGE_NODES = g.nodes.map(n => ({
	canonical: n.canonical, name: n.name, type: n.type,
	description: n.description \|\| "", tier: tierByNode[n.canonical] \|\| 2,
	}));
	window.FORGE_EDGES = g.edges.map(e => ({
	from: e.from_canon, to: e.to_canon, relation: e.relation, tier: e.tier,
	confidence: e.confidence, fix: e.fix \|\| "", conditions: e.conditions \|\| {},
	evidence: e.evidence \|\| [],
	}));
	window.FORGE_LIVE = true;
	}
	const feed = await (await fetch(base + "/feed?n=12", { cache: "no-store" })).json();
	if (Array.isArray(feed) && feed.length) {
	window.FORGE_DISCOVERIES = feed.map(f => ({
	a: f.a, relation: f.relation, b: f.b, tier: f.tier, source: "bright data", ts_offset: 0,
	}));
	}
	} catch (err) {
	console.warn("[forge] live API unavailable, using bundled mock:", err.message);
	}
	return window.FORGE_LIVE;
	};