forge / frontend /data.js
juiceb0xc0de's picture
Forge: initial deploy — API + static UI over prebuilt forge.db (seeded at build)
00cabba
Raw
History Blame Contribute Delete
26.4 kB
// Forge — mock graph + resolve engine
// Matches the FastAPI shapes in the handoff doc, so swap to fetch() later.
window.FORGE_NODES = [
// optimizers
{ canonical: "adamw", name: "AdamW", type: "optimizer", description: "Workhorse decoupled-weight-decay optimizer. Safe default.", tier: 1 },
{ canonical: "adamw_8bit", name: "AdamW 8-bit", type: "optimizer", description: "bitsandbytes 8-bit AdamW. Saves VRAM. Default param groups can fight per-layer LR tricks.", tier: 1 },
{ canonical: "paged_adamw", name: "Paged AdamW", type: "optimizer", description: "CPU-paged optimizer states. For when you really can't fit.", tier: 2 },
{ canonical: "lion", name: "Lion", type: "optimizer", description: "Sign-momentum optimizer. Lower memory than AdamW; needs lower LR.", tier: 2 },
{ canonical: "muon", name: "Muon", type: "optimizer", description: "Newton–Schulz orthogonalized momentum. Faster convergence on hidden weights.", tier: 2 },
{ canonical: "sophia", name: "Sophia-G", type: "optimizer", description: "Hessian-informed second-order. Promising for LLMs.", tier: 3 },
{ canonical: "adafactor", name: "Adafactor", type: "optimizer", description: "Memory-light. Tricky LR schedule.", tier: 2 },
// schedulers
{ canonical: "cosine", name: "Cosine", type: "scheduler", description: "Cosine decay with warmup. Boring; works.", tier: 1 },
{ canonical: "onecycle", name: "OneCycle", type: "scheduler", description: "Aggressive warm-then-anneal. Faster but can overshoot on long runs.", tier: 2 },
{ canonical: "dlrs", name: "DLRS", type: "scheduler", description: "Rick's dynamic LR scheduler. #1 on val_loss in the bench.", tier: 1 },
{ canonical: "linear", name: "Linear", type: "scheduler", description: "Linear warmup → linear decay.", tier: 1 },
{ canonical: "wsd", name: "WSD", type: "scheduler", description: "Warmup–Stable–Decay. Continual-pretrain friendly.", tier: 2 },
{ canonical: "constant", name: "Constant", type: "scheduler", description: "Flat. Combine with manual restarts.", tier: 1 },
// techniques
{ canonical: "qlora", name: "QLoRA", type: "technique", description: "4-bit base + LoRA adapters. Lets a 70B fit on one card.", tier: 1 },
{ canonical: "lora", name: "LoRA", type: "technique", description: "Low-rank adapters. Cheap, composable, the default PEFT.", tier: 1 },
{ canonical: "fft", name: "Full Fine-Tune", type: "technique", description: "Update every parameter. Hungry. Mutually exclusive with adapter methods.", tier: 1 },
{ canonical: "grad_ckpt", name: "Grad Checkpoint", type: "technique", description: "Trade FLOPs for VRAM. Must be off for vLLM gen during training.", tier: 1 },
{ canonical: "per_layer_lr_rotation", name: "Per-Layer LR Rotation", type: "technique", description: "Rick's trick: rotate LR across layer bands per step. Needs custom param groups.", tier: 1 },
{ canonical: "chaos_inject", name: "Chaos Injectors", type: "technique", description: "Activation perturbation at hidden layers. NaN-prone without staged melt-in.", tier: 1 },
{ canonical: "jacobian_reg", name: "Jacobian Reg", type: "technique", description: "Smoothness penalty via Jacobian. Forward pass corrupts the injector cache.", tier: 1 },
{ canonical: "fsdp", name: "FSDP", type: "technique", description: "Fully-Sharded Data Parallel. Sharding for big models.", tier: 1 },
{ canonical: "ddp", name: "DDP", type: "technique", description: "Vanilla data-parallel. Cheap when the model fits.", tier: 1 },
{ canonical: "deepspeed_z3", name: "DeepSpeed ZeRO-3", type: "technique", description: "ZeRO stage 3 partitioning. Battle-tested.", tier: 1 },
{ canonical: "unsloth", name: "Unsloth", type: "technique", description: "Fused kernels for LoRA/QLoRA. PyTorch-only; tight coupling to bnb.", tier: 2 },
// quantization
{ canonical: "bnb_4bit", name: "bnb 4-bit", type: "quantization", description: "bitsandbytes NF4. The QLoRA base.", tier: 1 },
{ canonical: "bnb_8bit", name: "bnb 8-bit", type: "quantization", description: "LLM.int8(). Inference-leaning; training works.", tier: 1 },
{ canonical: "gptq", name: "GPTQ", type: "quantization", description: "Post-training quant. Inference-only for our purposes.", tier: 2 },
{ canonical: "awq", name: "AWQ", type: "quantization", description: "Activation-aware weight quant. Inference-time.", tier: 2 },
// architectures
{ canonical: "llama3", name: "Llama-3", type: "architecture", description: "Llama-3 8B / 70B family.", tier: 1 },
{ canonical: "mistral", name: "Mistral", type: "architecture", description: "Mistral / Mixtral.", tier: 1 },
{ canonical: "qwen2", name: "Qwen-2.5", type: "architecture", description: "Strong open multilingual base.", tier: 1 },
{ canonical: "distilbert", name: "DistilBERT", type: "architecture", description: "The bench model. SST-2 sandbox.", tier: 1 },
// inference
{ canonical: "vllm", name: "vLLM", type: "inference", description: "PagedAttention server. Needs grad-ckpt off during in-train generation.", tier: 1 },
{ canonical: "sglang", name: "SGLang", type: "inference", description: "Structured-gen server.", tier: 2 },
];
// edges. relations: REQUIRES, CONDITIONAL, COMPATIBLE, DEGRADES, BREAKS
// Direction: from → to. For REQUIRES: "from needs to to work".
// For BREAKS: symmetric in display but stored once.
window.FORGE_EDGES = [
// --- T1 verified seed (from the backend doc) -----------------------------
{ from: "per_layer_lr_rotation", to: "adamw_8bit", relation: "BREAKS", tier: 1, confidence: 1.0,
fix: "Pass custom optimizer_grouped_parameters — adamw_8bit's default decay/no-decay split overrides your per-layer LR bands.",
evidence: [{ url: "mempalace://chaos-injection-trainer-notes", quote: "adamw_8bit produces decay/no-decay param groups by default; per-layer LR rotation silently no-ops unless you pass optimizer_grouped_parameters yourself.", source_type: "practitioner_run", tier: 1 }] },
{ from: "jacobian_reg", to: "chaos_inject", relation: "BREAKS", tier: 1, confidence: 1.0,
fix: "Jacobian-reg's extra forward pass corrupts the injector activation cache → ortho_loss is poisoned. Disable one.",
evidence: [{ url: "mempalace://chaos-injection-trainer-notes", quote: "the jacobian reg forward overwrites the activation cache that chaos_inject samples from; ortho_loss explodes.", source_type: "practitioner_run", tier: 1 }] },
{ from: "chaos_inject", to: "staged_meltin", relation: "REQUIRES", tier: 1, confidence: 1.0,
fix: "Stage injectors in over ~100 steps; NaN at layer ~6 if injected from step 0.",
evidence: [{ url: "mempalace://chaos-injection-trainer-notes", quote: "NaN at injector layer ~6 if run from step 0; staged melt-in over 100 steps fixed it.", source_type: "practitioner_run", tier: 1 }] },
{ from: "grad_ckpt", to: "vllm", relation: "BREAKS", tier: 1, confidence: 1.0,
fix: "Disable gradient_checkpointing for the in-train vLLM gen pass. TRL-documented.",
evidence: [{ url: "https://huggingface.co/docs/trl", quote: "gradient_checkpointing must be disabled when generating with vLLM during training.", source_type: "official_docs", tier: 1 }] },
// --- core technique mutex / requires -------------------------------------
{ from: "qlora", to: "fft", relation: "BREAKS", tier: 1, confidence: 1.0,
fix: "QLoRA freezes the base model; Full Fine-Tune updates it. Pick one.",
evidence: [{ url: "https://arxiv.org/abs/2305.14314", quote: "QLoRA backprops gradients through a frozen 4-bit quantized model into low-rank adapters.", source_type: "paper", tier: 1 }] },
{ from: "qlora", to: "bnb_4bit", relation: "REQUIRES", tier: 1, confidence: 1.0,
fix: "QLoRA is defined as 4-bit NF4 base + LoRA adapters — load the base in bnb 4-bit.",
evidence: [{ url: "https://huggingface.co/docs/peft", quote: "QLoRA fine-tunes a 4-bit quantized base model loaded via bitsandbytes NF4.", source_type: "official_docs", tier: 1 }] },
{ from: "qlora", to: "lora", relation: "REQUIRES", tier: 1, confidence: 1.0,
fix: "QLoRA = 4-bit base + LoRA adapters. The adapter rank is your hyperparameter.",
evidence: [{ url: "https://arxiv.org/abs/2305.14314", quote: "QLoRA augments the frozen quantized model with Low-Rank Adapters.", source_type: "paper", tier: 1 }] },
{ from: "lora", to: "fft", relation: "BREAKS", tier: 2, confidence: 0.85,
fix: "Adapter-method and full fine-tune are mutually exclusive within one run.",
evidence: [{ url: "https://huggingface.co/docs/peft", quote: "PEFT methods freeze the base; choose either full fine-tuning or a PEFT method per run.", source_type: "official_docs", tier: 2 }] },
// --- optimizer ↔ quantization --------------------------------------------
{ from: "muon", to: "adamw_8bit", relation: "BREAKS", tier: 2, confidence: 0.8,
fix: "Muon owns the optimizer step for hidden weights; 8-bit AdamW state is incompatible with the Newton–Schulz update.",
evidence: [{ url: "https://kellerjordan.github.io/posts/muon/", quote: "Muon replaces the optimizer update for 2D weights; use AdamW for the rest, not its 8-bit variant.", source_type: "blog", tier: 2 }] },
{ from: "muon", to: "adamw", relation: "REQUIRES", tier: 2, confidence: 0.9,
fix: "Muon only updates 2D hidden weights — embeddings + biases still need AdamW.",
evidence: [{ url: "https://kellerjordan.github.io/posts/muon/", quote: "non-hidden parameters (embeddings, scalars) are handled by a standard AdamW.", source_type: "blog", tier: 2 }] },
{ from: "lion", to: "bnb_8bit", relation: "DEGRADES", tier: 3, confidence: 0.5,
fix: "Lion sign-update interacts poorly with 8-bit state quant — drop to 16-bit moments.",
evidence: [{ url: "https://github.com/bitsandbytes-foundation/bitsandbytes/issues", quote: "single user report: Lion+8-bit moments diverged at step 4k on a 7B base.", source_type: "issue", tier: 3 }] },
// --- unsloth / quant family ----------------------------------------------
{ from: "unsloth", to: "bnb_4bit", relation: "REQUIRES", tier: 1, confidence: 1.0,
fix: "Unsloth's fused kernels assume a bnb 4-bit base.",
evidence: [{ url: "https://github.com/unslothai/unsloth", quote: "Unsloth supports 4-bit quantized models via bitsandbytes for QLoRA fine-tuning.", source_type: "official_docs", tier: 1 }] },
{ from: "unsloth", to: "lora", relation: "REQUIRES", tier: 1, confidence: 1.0,
fix: "Unsloth's fast path is the LoRA / QLoRA path.",
evidence: [{ url: "https://github.com/unslothai/unsloth", quote: "Unsloth accelerates LoRA and QLoRA fine-tuning with custom Triton kernels.", source_type: "official_docs", tier: 1 }] },
{ from: "unsloth", to: "fsdp", relation: "BREAKS", tier: 2, confidence: 0.8,
fix: "Unsloth's custom kernels don't compose with FSDP sharding hooks today.",
evidence: [{ url: "https://github.com/unslothai/unsloth/issues", quote: "FSDP is not currently supported alongside Unsloth's fused kernels.", source_type: "issue", tier: 2 }] },
// --- inference quants (category errors) ----------------------------------
{ from: "awq", to: "fft", relation: "BREAKS", tier: 2, confidence: 0.85,
fix: "AWQ is an inference-time weight quant. You can't fine-tune through it.",
evidence: [{ url: "https://github.com/casper-hansen/AutoAWQ", quote: "AWQ is intended for post-training quantization for inference.", source_type: "official_docs", tier: 2 }] },
{ from: "gptq", to: "fft", relation: "BREAKS", tier: 2, confidence: 0.85,
fix: "GPTQ is post-training quant — frozen base only.",
evidence: [{ url: "https://arxiv.org/abs/2210.17323", quote: "GPTQ is a one-shot post-training quantization method.", source_type: "paper", tier: 2 }] },
// --- distributed -----------------------------------------------------------
{ from: "fsdp", to: "bnb_4bit", relation: "CONDITIONAL", tier: 2, confidence: 0.8, conditions: { plugin: "bnb-fsdp" },
fix: "Works only with the bnb-FSDP plugin; vanilla FSDP shards over uninitialized 4-bit weights.",
evidence: [{ url: "https://huggingface.co/docs/accelerate", quote: "FSDP + bitsandbytes 4-bit requires the bnb-fsdp wrap policy.", source_type: "official_docs", tier: 2 }] },
{ from: "deepspeed_z3", to: "bnb_8bit", relation: "DEGRADES", tier: 3, confidence: 0.5,
fix: "Reports of slowdown / hangs on multi-node Z3 + 8-bit. Use bf16 weights, 8-bit optimizer states only.",
evidence: [{ url: "https://github.com/microsoft/DeepSpeed/issues", quote: "Z3 + 8-bit weights hang on the param-gather step in some configs.", source_type: "issue", tier: 3 }] },
// --- scheduler benchmark relations ---------------------------------------
{ from: "dlrs", to: "distilbert", relation: "COMPATIBLE", tier: 1, confidence: 1.0,
fix: "DLRS #1 on the SST-2 bench: val_loss 0.2653, val_acc 0.890, steps_to_target 266.7 (n=3 seeds).",
evidence: [{ url: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", quote: "DLRS leads on val_loss across 3 seeds on distilbert/sst2.", source_type: "benchmark", tier: 1 }] },
{ from: "onecycle", to: "distilbert", relation: "DEGRADES", tier: 1, confidence: 1.0,
fix: "OneCycle underperforms on the SST-2 bench: val_loss 0.4284 vs cohort cutoff 0.4022.",
evidence: [{ url: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", quote: "OneCycle val_loss 0.4284 is above the cohort cutoff 0.4022.", source_type: "benchmark", tier: 1 }] },
// --- compatible/positive edges (just so the graph isn't all conflict) ----
{ from: "lora", to: "bnb_8bit", relation: "COMPATIBLE", tier: 2, confidence: 0.8, fix: "", evidence: [] },
{ from: "lora", to: "bnb_4bit", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "grad_ckpt", to: "fsdp", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "grad_ckpt", to: "qlora",relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "cosine", to: "adamw", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "dlrs", to: "adamw", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "fsdp", to: "llama3", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "qlora", to: "llama3", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "qlora", to: "mistral", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "qlora", to: "qwen2", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "lora", to: "distilbert", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "vllm", to: "llama3", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "vllm", to: "mistral", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "deepspeed_z3", to: "fft", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "fsdp", to: "fft", relation: "COMPATIBLE", tier: 1, confidence: 1.0, fix: "", evidence: [] },
{ from: "muon", to: "llama3", relation: "COMPATIBLE", tier: 2, confidence: 0.8, fix: "", evidence: [] },
];
// add "staged melt-in" as a hidden technique reachable as a fix-target
window.FORGE_NODES.push({
canonical: "staged_meltin", name: "Staged Melt-In", type: "technique",
description: "Linear ramp-in of chaos injectors over N steps. Prevents NaN at layer ~6.",
tier: 1,
});
// ============================================================================
// RESOLVE ENGINE
// ============================================================================
// For each non-selected node N, look at edges between N and any selected S:
// BREAKS → blocked (red)
// REQUIRES(S → N) → S needs N; mark N "needed by S" (amber/conditional)
// REQUIRES(N → S) → N needs S, S is selected → that requirement is satisfied (ignore)
// CONDITIONAL → conditional (amber) with conditions
// DEGRADES → available + warning
// COMPATIBLE → available
// Plus: if N has any REQUIRES(N → X) where X is *not* selected and X is also not
// universally optional, N is conditional ("needs X").
window.forgeResolve = function (selectedCanons) {
const sel = new Set(selectedCanons);
const out = {};
const nodeByCanon = Object.fromEntries(window.FORGE_NODES.map(n => [n.canonical, n]));
const incoming = {};
const outgoing = {};
for (const e of window.FORGE_EDGES) {
(outgoing[e.from] = outgoing[e.from] || []).push(e);
(incoming[e.to] = incoming[e.to] || []).push(e);
}
for (const n of window.FORGE_NODES) {
if (sel.has(n.canonical)) {
out[n.canonical] = { status: "selected", reasons: [], warnings: [] };
continue;
}
const reasons = [];
const warnings = [];
// edges adjacent to n
const adj = [...(outgoing[n.canonical] || []), ...(incoming[n.canonical] || [])];
for (const e of adj) {
const otherCanon = e.from === n.canonical ? e.to : e.from;
const direction = e.from === n.canonical ? "out" : "in"; // n -> other or other -> n
const other = nodeByCanon[otherCanon];
if (!other) continue;
if (sel.has(otherCanon)) {
if (e.relation === "BREAKS") {
reasons.push({
kind: "blocked",
label: `breaks with ${other.name}`,
fix: e.fix,
evidence: e.evidence,
tier: e.tier,
confidence: e.confidence,
otherCanon,
});
} else if (e.relation === "CONDITIONAL") {
reasons.push({
kind: "conditional",
label: `conditional on ${other.name}`,
fix: e.fix,
conditions: e.conditions,
evidence: e.evidence,
tier: e.tier,
otherCanon,
});
} else if (e.relation === "REQUIRES" && direction === "in") {
// other -> n means "other requires n". other is selected, n is not.
// n is a missing ingredient.
reasons.push({
kind: "needed-by",
label: `needed by ${other.name}`,
fix: e.fix,
evidence: e.evidence,
tier: e.tier,
otherCanon,
});
} else if (e.relation === "DEGRADES") {
warnings.push({
label: `degrades with ${other.name}`,
fix: e.fix,
evidence: e.evidence,
tier: e.tier,
otherCanon,
});
}
}
}
let status = "available";
if (reasons.some(r => r.kind === "blocked")) status = "blocked";
else if (reasons.some(r => r.kind === "conditional" || r.kind === "needed-by")) status = "conditional";
out[n.canonical] = { status, reasons, warnings };
}
return out;
};
// recipe payload
window.forgeRecipe = function (selectedCanons) {
const sel = new Set(selectedCanons);
const nodeByCanon = Object.fromEntries(window.FORGE_NODES.map(n => [n.canonical, n]));
const conflicts = [];
const unmet = [];
const warnings = [];
for (const e of window.FORGE_EDGES) {
const a = sel.has(e.from), b = sel.has(e.to);
if (a && b) {
if (e.relation === "BREAKS") {
conflicts.push({ a: nodeByCanon[e.from].name, b: nodeByCanon[e.to].name, fix: e.fix, tier: e.tier, evidence: e.evidence });
}
if (e.relation === "DEGRADES") {
warnings.push({ a: nodeByCanon[e.from].name, b: nodeByCanon[e.to].name, fix: e.fix, tier: e.tier, evidence: e.evidence });
}
} else if (a && !b && e.relation === "REQUIRES") {
unmet.push({ from: nodeByCanon[e.from].name, missing: nodeByCanon[e.to].name, missingCanon: e.to, fix: e.fix, tier: e.tier, evidence: e.evidence });
} else if (!a && b && e.relation === "REQUIRES") {
// selected b but not a; the requires goes from a→b meaning a needs b. a not selected, so not unmet here.
}
}
// scheduler benchmark cohort (mock real bench data)
const scheduler_picks = [
{ component: "DLRS", val_loss: 0.26528, val_acc: 0.89029, steps_to_target: 266.7, n_seeds: 3, source: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", canonical: "dlrs" },
{ component: "Cosine", val_loss: 0.31417, val_acc: 0.87959, steps_to_target: 312.0, n_seeds: 3, source: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", canonical: "cosine" },
{ component: "WSD", val_loss: 0.34081, val_acc: 0.86790, steps_to_target: 348.3, n_seeds: 3, source: "https://huggingface.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", canonical: "wsd" },
];
const scheduler_warnings = [
{ component: "OneCycle", val_loss: 0.42844, cutoff: 0.4022, canonical: "onecycle" },
];
const valid = conflicts.length === 0 && unmet.length === 0;
return { selected: selectedCanons, valid, conflicts, unmet, warnings, scheduler_picks, scheduler_warnings };
};
// ============================================================================
// LIVE DISCOVERIES FEED (mock SSE)
// ============================================================================
window.FORGE_DISCOVERIES = [
{ a: "Gradient Checkpointing", relation: "COMPATIBLE", b: "LoRA", tier: 2, source: "huggingface.co/docs/peft", ts_offset: -8 },
{ a: "Muon", relation: "REQUIRES", b: "AdamW", tier: 2, source: "kellerjordan.github.io", ts_offset: -22 },
{ a: "DeepSpeed ZeRO-3", relation: "DEGRADES", b: "bnb 8-bit", tier: 3, source: "github.com/microsoft/DeepSpeed", ts_offset: -41 },
{ a: "QLoRA", relation: "COMPATIBLE", b: "Qwen-2.5", tier: 2, source: "qwenlm.github.io", ts_offset: -67 },
{ a: "FSDP", relation: "CONDITIONAL",b: "bnb 4-bit", tier: 2, source: "huggingface.co/docs/accelerate", ts_offset: -94 },
{ a: "Sophia-G", relation: "DEGRADES", b: "bnb 8-bit", tier: 3, source: "github.com/Liuhong99/Sophia", ts_offset: -128 },
{ a: "WSD", relation: "COMPATIBLE", b: "Continual PT", tier: 2, source: "arxiv.org/abs/2404.06395", ts_offset: -171 },
{ a: "Unsloth", relation: "BREAKS", b: "FSDP", tier: 2, source: "github.com/unslothai/unsloth", ts_offset: -210 },
{ a: "AWQ", relation: "BREAKS", b: "Full Fine-Tune",tier: 2, source: "github.com/casper-hansen/AutoAWQ", ts_offset: -266 },
{ a: "OneCycle", relation: "DEGRADES", b: "distilbert/sst2", tier: 1, source: "hf.co/spaces/juiceb0xc0de/lr-scheduler-benchmark", ts_offset: -311 },
];
// new live discoveries that stream in over time (offsets in seconds from "now")
window.FORGE_FUTURE_DISCOVERIES = [
{ a: "Per-Layer LR Rotation", relation: "BREAKS", b: "AdamW 8-bit", tier: 1, source: "mempalace://rick-notes" },
{ a: "Jacobian Reg", relation: "BREAKS", b: "Chaos Injectors", tier: 1, source: "mempalace://rick-notes" },
{ a: "Lion", relation: "DEGRADES", b: "bnb 8-bit", tier: 3, source: "github.com/bitsandbytes-foundation/bitsandbytes" },
{ a: "Paged AdamW", relation: "COMPATIBLE", b: "QLoRA", tier: 2, source: "huggingface.co/docs/transformers" },
{ a: "DLRS", relation: "COMPATIBLE", b: "WSD", tier: 2, source: "hf.co/spaces/juiceb0xc0de/lr-scheduler-benchmark" },
{ a: "Cosine", relation: "COMPATIBLE", b: "AdamW", tier: 1, source: "pytorch.org/docs" },
{ a: "FlashAttention-3", relation: "COMPATIBLE", b: "Llama-3", tier: 2, source: "github.com/Dao-AILab/flash-attention" },
{ a: "Liger Kernel", relation: "COMPATIBLE", b: "Unsloth", tier: 3, source: "github.com/linkedin/Liger-Kernel" },
];
// ============================================================================
// LIVE API BOOTSTRAP — prefer the live Forge API; fall back to the bundled mock.
// Default base is same-origin (the API serves this frontend in prod).
// Split-port dev: ?api=http://localhost:8010
// ============================================================================
window.FORGE_API = (function () {
const m = location.search.match(/[?&]api=([^&]+)/);
return m ? decodeURIComponent(m[1]) : "";
})();
window.FORGE_LIVE = false;
window.forgeBootstrap = async function () {
const base = window.FORGE_API.replace(/\/$/, "");
try {
const g = await (await fetch(base + "/graph", { cache: "no-store" })).json();
if (g && Array.isArray(g.nodes) && g.nodes.length && Array.isArray(g.edges)) {
const tierByNode = {};
for (const e of g.edges) {
for (const c of [e.from_canon, e.to_canon]) {
tierByNode[c] = Math.min(tierByNode[c] == null ? 9 : tierByNode[c], e.tier || 2);
}
}
window.FORGE_NODES = g.nodes.map(n => ({
canonical: n.canonical, name: n.name, type: n.type,
description: n.description || "", tier: tierByNode[n.canonical] || 2,
}));
window.FORGE_EDGES = g.edges.map(e => ({
from: e.from_canon, to: e.to_canon, relation: e.relation, tier: e.tier,
confidence: e.confidence, fix: e.fix || "", conditions: e.conditions || {},
evidence: e.evidence || [],
}));
window.FORGE_LIVE = true;
}
const feed = await (await fetch(base + "/feed?n=12", { cache: "no-store" })).json();
if (Array.isArray(feed) && feed.length) {
window.FORGE_DISCOVERIES = feed.map(f => ({
a: f.a, relation: f.relation, b: f.b, tier: f.tier, source: "bright data", ts_offset: 0,
}));
}
} catch (err) {
console.warn("[forge] live API unavailable, using bundled mock:", err.message);
}
return window.FORGE_LIVE;
};