Spaces:
Sleeping
Sleeping
| // Forge — mock graph + resolve engine | |
| // Matches the FastAPI shapes in the handoff doc, so swap to fetch() later. | |
| window.FORGE_NODES = [ | |
| // optimizers | |
| { canonical: "adamw", name: "AdamW", type: "optimizer", description: "Workhorse decoupled-weight-decay optimizer. Safe default.", tier: 1 }, | |
| { canonical: "adamw_8bit", name: "AdamW 8-bit", type: "optimizer", description: "bitsandbytes 8-bit AdamW. Saves VRAM. Default param groups can fight per-layer LR tricks.", tier: 1 }, | |
| { canonical: "paged_adamw", name: "Paged AdamW", type: "optimizer", description: "CPU-paged optimizer states. For when you really can't fit.", tier: 2 }, | |
| { canonical: "lion", name: "Lion", type: "optimizer", description: "Sign-momentum optimizer. Lower memory than AdamW; needs lower LR.", tier: 2 }, | |
| { canonical: "muon", name: "Muon", type: "optimizer", description: "Newton–Schulz orthogonalized momentum. Faster convergence on hidden weights.", tier: 2 }, | |
| { canonical: "sophia", name: "Sophia-G", type: "optimizer", description: "Hessian-informed second-order. Promising for LLMs.", tier: 3 }, | |
| { canonical: "adafactor", name: "Adafactor", type: "optimizer", description: "Memory-light. Tricky LR schedule.", tier: 2 }, | |
| // schedulers | |
| { canonical: "cosine", name: "Cosine", type: "scheduler", description: "Cosine decay with warmup. Boring; works.", tier: 1 }, | |
| { canonical: "onecycle", name: "OneCycle", type: "scheduler", description: "Aggressive warm-then-anneal. Faster but can overshoot on long runs.", tier: 2 }, | |
| { canonical: "dlrs", name: "DLRS", type: "scheduler", description: "Rick's dynamic LR scheduler. #1 on val_loss in the bench.", tier: 1 }, | |
| { canonical: "linear", name: "Linear", type: "scheduler", description: "Linear warmup → linear decay.", tier: 1 }, | |
| { canonical: "wsd", name: "WSD", type: "scheduler", description: "Warmup–Stable–Decay. Continual-pretrain friendly.", tier: 2 }, | |
| { canonical: "constant", name: "Constant", type: "scheduler", description: "Flat. Combine with manual restarts.", tier: 1 }, | |
| // techniques | |
| { canonical: "qlora", name: "QLoRA", type: "technique", description: "4-bit base + LoRA adapters. Lets a 70B fit on one card.", tier: 1 }, | |
| { canonical: "lora", name: "LoRA", type: "technique", description: "Low-rank adapters. Cheap, composable, the default PEFT.", tier: 1 }, | |
| { canonical: "fft", name: "Full Fine-Tune", type: "technique", description: "Update every parameter. Hungry. Mutually exclusive with adapter methods.", tier: 1 }, | |
| { canonical: "grad_ckpt", name: "Grad Checkpoint", type: "technique", description: "Trade FLOPs for VRAM. Must be off for vLLM gen during training.", tier: 1 }, | |
| { canonical: "per_layer_lr_rotation", name: "Per-Layer LR Rotation", type: "technique", description: "Rick's trick: rotate LR across layer bands per step. Needs custom param groups.", tier: 1 }, | |
| { canonical: "chaos_inject", name: "Chaos Injectors", type: "technique", description: "Activation perturbation at hidden layers. NaN-prone without staged melt-in.", tier: 1 }, | |
| { canonical: "jacobian_reg", name: "Jacobian Reg", type: "technique", description: "Smoothness penalty via Jacobian. Forward pass corrupts the injector cache.", tier: 1 }, | |
| { canonical: "fsdp", name: "FSDP", type: "technique", description: "Fully-Sharded Data Parallel. Sharding for big models.", tier: 1 }, | |
| { canonical: "ddp", name: "DDP", type: "technique", description: "Vanilla data-parallel. Cheap when the model fits.", tier: 1 }, | |
| { canonical: "deepspeed_z3", name: "DeepSpeed ZeRO-3", type: "technique", description: "ZeRO stage 3 partitioning. Battle-tested.", tier: 1 }, | |
| { canonical: "unsloth", name: "Unsloth", type: "technique", description: "Fused kernels for LoRA/QLoRA. PyTorch-only; tight coupling to bnb.", tier: 2 }, | |
| // quantization | |
| { canonical: "bnb_4bit", name: "bnb 4-bit", type: "quantization", description: "bitsandbytes NF4. The QLoRA base.", tier: 1 }, | |
| { canonical: "bnb_8bit", name: "bnb 8-bit", type: "quantization", description: "LLM.int8(). Inference-leaning; training works.", tier: 1 }, | |
| { canonical: "gptq", name: "GPTQ", type: "quantization", description: "Post-training quant. Inference-only for our purposes.", tier: 2 }, | |
| { canonical: "awq", name: "AWQ", type: "quantization", description: "Activation-aware weight quant. Inference-time.", tier: 2 }, | |
| // architectures | |
| { canonical: "llama3", name: "Llama-3", type: "architecture", description: "Llama-3 8B / 70B family.", tier: 1 }, | |
| { canonical: "mistral", name: "Mistral", type: "architecture", description: "Mistral / Mixtral.", tier: 1 }, | |
| { canonical: "qwen2", name: "Qwen-2.5", type: "architecture", description: "Strong open multilingual base.", tier: 1 }, | |
| { canonical: "distilbert", name: "DistilBERT", type: "architecture", description: "The bench model. SST-2 sandbox.", tier: 1 }, | |
| // inference | |
| { canonical: "vllm", name: "vLLM", type: "inference", description: "PagedAttention server. Needs grad-ckpt off during in-train generation.", tier: 1 }, | |
| { canonical: "sglang", name: "SGLang", type: "inference", description: "Structured-gen server.", tier: 2 }, | |
| ]; | |
| // edges. relations: REQUIRES, CONDITIONAL, COMPATIBLE, DEGRADES, BREAKS | |
| // Direction: from → to. For REQUIRES: "from needs to to work". | |
| // For BREAKS: symmetric in display but stored once. | |
| window.FORGE_EDGES = [ | |
| // --- T1 verified seed (from the backend doc) ----------------------------- | |
| { from: "per_layer_lr_rotation", to: "adamw_8bit", relation: "BREAKS", tier: 1, confidence: 1.0, | |
| fix: "Pass custom optimizer_grouped_parameters — adamw_8bit's default decay/no-decay split overrides your per-layer LR bands.", | |
| evidence: [{ url: "mempalace://chaos-injection-trainer-notes", quote: "adamw_8bit produces decay/no-decay param groups by default; per-layer LR rotation silently no-ops unless you pass optimizer_grouped_parameters yourself.", source_type: "practitioner_run", tier: 1 }] }, | |
| { from: "jacobian_reg", to: "chaos_inject", relation: "BREAKS", tier: 1, confidence: 1.0, | |
| fix: "Jacobian-reg's extra forward pass corrupts the injector activation cache → ortho_loss is poisoned. Disable one.", | |
| evidence: [{ url: "mempalace://chaos-injection-trainer-notes", quote: "the jacobian reg forward overwrites the activation cache that chaos_inject samples from; ortho_loss explodes.", source_type: "practitioner_run", tier: 1 }] }, | |
| { from: "chaos_inject", to: "staged_meltin", relation: "REQUIRES", tier: 1, confidence: 1.0, | |
| fix: "Stage injectors in over ~100 steps; NaN at layer ~6 if injected from step 0.", | |
| evidence: [{ url: "mempalace://chaos-injection-trainer-notes", quote: "NaN at injector layer ~6 if run from step 0; staged melt-in over 100 steps fixed it.", source_type: "practitioner_run", tier: 1 }] }, | |
| { from: "grad_ckpt", to: "vllm", relation: "BREAKS", tier: 1, confidence: 1.0, | |
| fix: "Disable gradient_checkpointing for the in-train vLLM gen pass. TRL-documented.", | |
| evidence: [{ url: "https://huggingface.co/docs/trl", quote: "gradient_checkpointing must be disabled when generating with vLLM during training.", source_type: "official_docs", tier: 1 }] }, | |
| // --- core technique mutex / requires ------------------------------------- | |
| { from: "qlora", to: "fft", relation: "BREAKS", tier: 1, confidence: 1.0, | |
| fix: "QLoRA freezes the base model; Full Fine-Tune updates it. Pick one.", | |
| evidence: [{ url: "https://arxiv.org/abs/2305.14314", quote: "QLoRA backprops gradients through a frozen 4-bit quantized model into low-rank adapters.", source_type: "paper", tier: 1 }] }, | |
| { from: "qlora", to: "bnb_4bit", relation: "REQUIRES", tier: 1, confidence: 1.0, | |
| fix: "QLoRA is defined as 4-bit NF4 base + LoRA adapters — load the base in bnb 4-bit.", | |
| evidence: [{ url: "https://huggingface.co/docs/peft", quote: "QLoRA fine-tunes a 4-bit quantized base model loaded via bitsandbytes NF4.", source_type: "official_docs", tier: 1 }] }, | |
| { from: "qlora", to: "lora", relation: "REQUIRES", tier: 1, confidence: 1.0, | |
| fix: "QLoRA = 4-bit base + LoRA adapters. The adapter rank is your hyperparameter.", | |
| evidence: [{ url: "https://arxiv.org/abs/2305.14314", quote: "QLoRA augments the frozen quantized model with Low-Rank Adapters.", source_type: "paper", tier: 1 }] }, | |
| { from: "lora", to: "fft", relation: "BREAKS", tier: 2, confidence: 0.85, | |
| fix: "Adapter-method and full fine-tune are mutually exclusive within one run.", | |
| evidence: [{ url: "https://huggingface.co/docs/peft", quote: "PEFT methods freeze the base; choose either full fine-tuning or a PEFT method per run.", source_type: "official_docs", tier: 2 }] }, | |
| // --- optimizer ↔ quantization -------------------------------------------- | |
| { from: "muon", to: "adamw_8bit", relation: "BREAKS", tier: 2, confidence: 0.8, | |
| fix: "Muon owns the optimizer step for hidden weights; 8-bit AdamW state is incompatible with the Newton–Schulz update.", | |
| evidence: [{ url: "https://kellerjordan.github.io/posts/muon/", quote: "Muon replaces the optimizer update for 2D weights; use AdamW for the rest, not its 8-bit variant.", source_type: "blog", tier: 2 }] }, | |
| { from: "muon", to: "adamw", relation: "REQUIRES", tier: 2, confidence: 0.9, | |
| fix: "Muon only updates 2D hidden weights — embeddings + biases still need AdamW.", | |
| evidence: [{ url: "https://kellerjordan.github.io/posts/muon/", quote: "non-hidden parameters (embeddings, scalars) are handled by a standard AdamW.", source_type: "blog", tier: 2 }] }, | |
| { from: "lion", to: "bnb_8bit", relation: "DEGRADES", tier: 3, confidence: 0.5, | |
| fix: "Lion sign-update interacts poorly with 8-bit state quant — drop to 16-bit moments.", | |
| evidence: [{ url: "https://github.com/bitsandbytes-foundation/bitsandbytes/issues", quote: "single user report: Lion+8-bit moments diverged at step 4k on a 7B base.", source_type: "issue", tier: 3 }] }, | |
| // --- unsloth / quant family ---------------------------------------------- | |
| { from: "unsloth", to: "bnb_4bit", relation: "REQUIRES", tier: 1, confidence: 1.0, | |
| fix: "Unsloth's fused kernels assume a bnb 4-bit base.", | |
| evidence: [{ url: "https://github.com/unslothai/unsloth", quote: "Unsloth supports 4-bit quantized models via bitsandbytes for QLoRA fine-tuning.", source_type: "official_docs", tier: 1 }] }, | |
| { from: "unsloth", to: "lora", relation: "REQUIRES", tier: 1, confidence: 1.0, | |
| fix: "Unsloth's fast path is the LoRA / QLoRA path.", | |
| evidence: [{ url: "https://github.com/unslothai/unsloth", quote: "Unsloth accelerates LoRA and QLoRA fine-tuning with custom Triton kernels.", source_type: "official_docs", tier: 1 }] }, | |
| { from: "unsloth", to: "fsdp", relation: "BREAKS", tier: 2, confidence: 0.8, | |
| fix: "Unsloth's custom kernels don't compose with FSDP sharding hooks today.", | |
| evidence: [{ url: "https://github.com/unslothai/unsloth/issues", quote: "FSDP is not currently supported alongside Unsloth's fused kernels.", source_type: "issue", tier: 2 }] }, | |
| // --- inference quants (category errors) ---------------------------------- | |
| { from: "awq", to: "fft", relation: "BREAKS", tier: 2, confidence: 0.85, | |
| fix: "AWQ is an inference-time weight quant. You can't fine-tune through it.", | |
| evidence: [{ url: "https://github.com/casper-hansen/AutoAWQ", quote: "AWQ is intended for post-training quantization for inference.", source_type: "official_docs", tier: 2 }] }, | |
| { from: "gptq", to: "fft", relation: "BREAKS", tier: 2, confidence: 0.85, | |
| fix: "GPTQ is post-training quant — frozen base only.", | |
| evidence: [{ url: "https://arxiv.org/abs/2210.17323", quote: "GPTQ is a one-shot post-training quantization method.", source_type: "paper", tier: 2 }] }, | |
| // --- distributed ----------------------------------------------------------- | |
| { from: "fsdp", to: "bnb_4bit", relation: "CONDITIONAL", tier: 2, confidence: 0.8, conditions: { plugin: "bnb-fsdp" }, | |
| fix: "Works only with the bnb-FSDP plugin; vanilla FSDP shards over uninitialized 4-bit weights.", | |
| evidence: [{ url: "https://huggingface.co/docs/accelerate", quote: "FSDP + bitsandbytes 4-bit requires the bnb-fsdp wrap policy.", source_type: "official_docs", tier: 2 }] }, | |
| { from: "deepspeed_z3", to: "bnb_8bit", relation: "DEGRADES", tier: 3, confidence: 0.5, | |
| fix: "Reports of slowdown / hangs on multi-node Z3 + 8-bit. Use bf16 weights, 8-bit optimizer states only.", | |
| evidence: [{ url: "https://github.com/microsoft/DeepSpeed/issues", quote: "Z3 + 8-bit weights hang on the param-gather step in some configs.", source_type: "issue", tier: 3 }] }, | |
| // --- scheduler benchmark relations --------------------------------------- | |
| { from: "dlrs", to: "distilbert", relation: "COMPATIBLE", tier: 1, confidence: 1.0, | |
| fix: "DLRS #1 on the SST-2 bench: val_loss 0.2653, val_acc 0.890, steps_to_target 266.7 (n=3 seeds).", | |
| evidence: [{ url: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", quote: "DLRS leads on val_loss across 3 seeds on distilbert/sst2.", source_type: "benchmark", tier: 1 }] }, | |
| { from: "onecycle", to: "distilbert", relation: "DEGRADES", tier: 1, confidence: 1.0, | |
| fix: "OneCycle underperforms on the SST-2 bench: val_loss 0.4284 vs cohort cutoff 0.4022.", | |
| evidence: [{ url: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", quote: "OneCycle val_loss 0.4284 is above the cohort cutoff 0.4022.", source_type: "benchmark", tier: 1 }] }, | |
| // --- compatible/positive edges (just so the graph isn't all conflict) ---- | |
| { from: "lora", to: "bnb_8bit", relation: "COMPATIBLE", tier: 2, confidence: 0.8, fix: "", evidence: [] }, | |
| { from: "lora", to: "bnb_4bit", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "grad_ckpt", to: "fsdp", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "grad_ckpt", to: "qlora",relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "cosine", to: "adamw", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "dlrs", to: "adamw", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "fsdp", to: "llama3", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "qlora", to: "llama3", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "qlora", to: "mistral", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "qlora", to: "qwen2", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "lora", to: "distilbert", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "vllm", to: "llama3", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "vllm", to: "mistral", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "deepspeed_z3", to: "fft", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "fsdp", to: "fft", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] }, | |
| { from: "muon", to: "llama3", relation: "COMPATIBLE", tier: 2, confidence: 0.8, fix: "", evidence: [] }, | |
| ]; | |
| // add "staged melt-in" as a hidden technique reachable as a fix-target | |
| window.FORGE_NODES.push({ | |
| canonical: "staged_meltin", name: "Staged Melt-In", type: "technique", | |
| description: "Linear ramp-in of chaos injectors over N steps. Prevents NaN at layer ~6.", | |
| tier: 1, | |
| }); | |
| // ============================================================================ | |
| // RESOLVE ENGINE | |
| // ============================================================================ | |
| // For each non-selected node N, look at edges between N and any selected S: | |
| // BREAKS → blocked (red) | |
| // REQUIRES(S → N) → S needs N; mark N "needed by S" (amber/conditional) | |
| // REQUIRES(N → S) → N needs S, S is selected → that requirement is satisfied (ignore) | |
| // CONDITIONAL → conditional (amber) with conditions | |
| // DEGRADES → available + warning | |
| // COMPATIBLE → available | |
| // Plus: if N has any REQUIRES(N → X) where X is *not* selected and X is also not | |
| // universally optional, N is conditional ("needs X"). | |
| window.forgeResolve = function (selectedCanons) { | |
| const sel = new Set(selectedCanons); | |
| const out = {}; | |
| const nodeByCanon = Object.fromEntries(window.FORGE_NODES.map(n => [n.canonical, n])); | |
| const incoming = {}; | |
| const outgoing = {}; | |
| for (const e of window.FORGE_EDGES) { | |
| (outgoing[e.from] = outgoing[e.from] || []).push(e); | |
| (incoming[e.to] = incoming[e.to] || []).push(e); | |
| } | |
| for (const n of window.FORGE_NODES) { | |
| if (sel.has(n.canonical)) { | |
| out[n.canonical] = { status: "selected", reasons: [], warnings: [] }; | |
| continue; | |
| } | |
| const reasons = []; | |
| const warnings = []; | |
| // edges adjacent to n | |
| const adj = [...(outgoing[n.canonical] || []), ...(incoming[n.canonical] || [])]; | |
| for (const e of adj) { | |
| const otherCanon = e.from === n.canonical ? e.to : e.from; | |
| const direction = e.from === n.canonical ? "out" : "in"; // n -> other or other -> n | |
| const other = nodeByCanon[otherCanon]; | |
| if (!other) continue; | |
| if (sel.has(otherCanon)) { | |
| if (e.relation === "BREAKS") { | |
| reasons.push({ | |
| kind: "blocked", | |
| label: `breaks with ${other.name}`, | |
| fix: e.fix, | |
| evidence: e.evidence, | |
| tier: e.tier, | |
| confidence: e.confidence, | |
| otherCanon, | |
| }); | |
| } else if (e.relation === "CONDITIONAL") { | |
| reasons.push({ | |
| kind: "conditional", | |
| label: `conditional on ${other.name}`, | |
| fix: e.fix, | |
| conditions: e.conditions, | |
| evidence: e.evidence, | |
| tier: e.tier, | |
| otherCanon, | |
| }); | |
| } else if (e.relation === "REQUIRES" && direction === "in") { | |
| // other -> n means "other requires n". other is selected, n is not. | |
| // n is a missing ingredient. | |
| reasons.push({ | |
| kind: "needed-by", | |
| label: `needed by ${other.name}`, | |
| fix: e.fix, | |
| evidence: e.evidence, | |
| tier: e.tier, | |
| otherCanon, | |
| }); | |
| } else if (e.relation === "DEGRADES") { | |
| warnings.push({ | |
| label: `degrades with ${other.name}`, | |
| fix: e.fix, | |
| evidence: e.evidence, | |
| tier: e.tier, | |
| otherCanon, | |
| }); | |
| } | |
| } | |
| } | |
| let status = "available"; | |
| if (reasons.some(r => r.kind === "blocked")) status = "blocked"; | |
| else if (reasons.some(r => r.kind === "conditional" || r.kind === "needed-by")) status = "conditional"; | |
| out[n.canonical] = { status, reasons, warnings }; | |
| } | |
| return out; | |
| }; | |
| // recipe payload | |
| window.forgeRecipe = function (selectedCanons) { | |
| const sel = new Set(selectedCanons); | |
| const nodeByCanon = Object.fromEntries(window.FORGE_NODES.map(n => [n.canonical, n])); | |
| const conflicts = []; | |
| const unmet = []; | |
| const warnings = []; | |
| for (const e of window.FORGE_EDGES) { | |
| const a = sel.has(e.from), b = sel.has(e.to); | |
| if (a && b) { | |
| if (e.relation === "BREAKS") { | |
| conflicts.push({ a: nodeByCanon[e.from].name, b: nodeByCanon[e.to].name, fix: e.fix, tier: e.tier, evidence: e.evidence }); | |
| } | |
| if (e.relation === "DEGRADES") { | |
| warnings.push({ a: nodeByCanon[e.from].name, b: nodeByCanon[e.to].name, fix: e.fix, tier: e.tier, evidence: e.evidence }); | |
| } | |
| } else if (a && !b && e.relation === "REQUIRES") { | |
| unmet.push({ from: nodeByCanon[e.from].name, missing: nodeByCanon[e.to].name, missingCanon: e.to, fix: e.fix, tier: e.tier, evidence: e.evidence }); | |
| } else if (!a && b && e.relation === "REQUIRES") { | |
| // selected b but not a; the requires goes from a→b meaning a needs b. a not selected, so not unmet here. | |
| } | |
| } | |
| // scheduler benchmark cohort (mock real bench data) | |
| const scheduler_picks = [ | |
| { component: "DLRS", val_loss: 0.26528, val_acc: 0.89029, steps_to_target: 266.7, n_seeds: 3, source: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", canonical: "dlrs" }, | |
| { component: "Cosine", val_loss: 0.31417, val_acc: 0.87959, steps_to_target: 312.0, n_seeds: 3, source: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", canonical: "cosine" }, | |
| { component: "WSD", val_loss: 0.34081, val_acc: 0.86790, steps_to_target: 348.3, n_seeds: 3, source: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", canonical: "wsd" }, | |
| ]; | |
| const scheduler_warnings = [ | |
| { component: "OneCycle", val_loss: 0.42844, cutoff: 0.4022, canonical: "onecycle" }, | |
| ]; | |
| const valid = conflicts.length === 0 && unmet.length === 0; | |
| return { selected: selectedCanons, valid, conflicts, unmet, warnings, scheduler_picks, scheduler_warnings }; | |
| }; | |
| // ============================================================================ | |
| // LIVE DISCOVERIES FEED (mock SSE) | |
| // ============================================================================ | |
| window.FORGE_DISCOVERIES = [ | |
| { a: "Gradient Checkpointing", relation: "COMPATIBLE", b: "LoRA", tier: 2, source: "huggingface.co/docs/peft", ts_offset: -8 }, | |
| { a: "Muon", relation: "REQUIRES", b: "AdamW", tier: 2, source: "kellerjordan.github.io", ts_offset: -22 }, | |
| { a: "DeepSpeed ZeRO-3", relation: "DEGRADES", b: "bnb 8-bit", tier: 3, source: "github.com/microsoft/DeepSpeed", ts_offset: -41 }, | |
| { a: "QLoRA", relation: "COMPATIBLE", b: "Qwen-2.5", tier: 2, source: "qwenlm.github.io", ts_offset: -67 }, | |
| { a: "FSDP", relation: "CONDITIONAL",b: "bnb 4-bit", tier: 2, source: "huggingface.co/docs/accelerate", ts_offset: -94 }, | |
| { a: "Sophia-G", relation: "DEGRADES", b: "bnb 8-bit", tier: 3, source: "github.com/Liuhong99/Sophia", ts_offset: -128 }, | |
| { a: "WSD", relation: "COMPATIBLE", b: "Continual PT", tier: 2, source: "arxiv.org/abs/2404.06395", ts_offset: -171 }, | |
| { a: "Unsloth", relation: "BREAKS", b: "FSDP", tier: 2, source: "github.com/unslothai/unsloth", ts_offset: -210 }, | |
| { a: "AWQ", relation: "BREAKS", b: "Full Fine-Tune",tier: 2, source: "github.com/casper-hansen/AutoAWQ", ts_offset: -266 }, | |
| { a: "OneCycle", relation: "DEGRADES", b: "distilbert/sst2", tier: 1, source: "hf.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", ts_offset: -311 }, | |
| ]; | |
| // new live discoveries that stream in over time (offsets in seconds from "now") | |
| window.FORGE_FUTURE_DISCOVERIES = [ | |
| { a: "Per-Layer LR Rotation", relation: "BREAKS", b: "AdamW 8-bit", tier: 1, source: "mempalace://rick-notes" }, | |
| { a: "Jacobian Reg", relation: "BREAKS", b: "Chaos Injectors", tier: 1, source: "mempalace://rick-notes" }, | |
| { a: "Lion", relation: "DEGRADES", b: "bnb 8-bit", tier: 3, source: "github.com/bitsandbytes-foundation/bitsandbytes" }, | |
| { a: "Paged AdamW", relation: "COMPATIBLE", b: "QLoRA", tier: 2, source: "huggingface.co/docs/transformers" }, | |
| { a: "DLRS", relation: "COMPATIBLE", b: "WSD", tier: 2, source: "hf.co/spaces/juiceb0xc0de/lr-scheduler-benchmark" }, | |
| { a: "Cosine", relation: "COMPATIBLE", b: "AdamW", tier: 1, source: "pytorch.org/docs" }, | |
| { a: "FlashAttention-3", relation: "COMPATIBLE", b: "Llama-3", tier: 2, source: "github.com/Dao-AILab/flash-attention" }, | |
| { a: "Liger Kernel", relation: "COMPATIBLE", b: "Unsloth", tier: 3, source: "github.com/linkedin/Liger-Kernel" }, | |
| ]; | |
| // ============================================================================ | |
| // LIVE API BOOTSTRAP — prefer the live Forge API; fall back to the bundled mock. | |
| // Default base is same-origin (the API serves this frontend in prod). | |
| // Split-port dev: ?api=http://localhost:8010 | |
| // ============================================================================ | |
| window.FORGE_API = (function () { | |
| const m = location.search.match(/[?&]api=([^&]+)/); | |
| return m ? decodeURIComponent(m[1]) : ""; | |
| })(); | |
| window.FORGE_LIVE = false; | |
| window.forgeBootstrap = async function () { | |
| const base = window.FORGE_API.replace(/\/$/, ""); | |
| try { | |
| const g = await (await fetch(base + "/graph", { cache: "no-store" })).json(); | |
| if (g && Array.isArray(g.nodes) && g.nodes.length && Array.isArray(g.edges)) { | |
| const tierByNode = {}; | |
| for (const e of g.edges) { | |
| for (const c of [e.from_canon, e.to_canon]) { | |
| tierByNode[c] = Math.min(tierByNode[c] == null ? 9 : tierByNode[c], e.tier || 2); | |
| } | |
| } | |
| window.FORGE_NODES = g.nodes.map(n => ({ | |
| canonical: n.canonical, name: n.name, type: n.type, | |
| description: n.description || "", tier: tierByNode[n.canonical] || 2, | |
| })); | |
| window.FORGE_EDGES = g.edges.map(e => ({ | |
| from: e.from_canon, to: e.to_canon, relation: e.relation, tier: e.tier, | |
| confidence: e.confidence, fix: e.fix || "", conditions: e.conditions || {}, | |
| evidence: e.evidence || [], | |
| })); | |
| window.FORGE_LIVE = true; | |
| } | |
| const feed = await (await fetch(base + "/feed?n=12", { cache: "no-store" })).json(); | |
| if (Array.isArray(feed) && feed.length) { | |
| window.FORGE_DISCOVERIES = feed.map(f => ({ | |
| a: f.a, relation: f.relation, b: f.b, tier: f.tier, source: "bright data", ts_offset: 0, | |
| })); | |
| } | |
| } catch (err) { | |
| console.warn("[forge] live API unavailable, using bundled mock:", err.message); | |
| } | |
| return window.FORGE_LIVE; | |
| }; | |