model-architectures / blt.diff.svg
ArthurZ's picture
ArthurZ HF Staff
Staged data-flow view for codecs/conv/pipelines (meta-only, memory-safe) with shapes + exposed internals
1fc060f verified
|
Raw
History Blame Contribute Delete
32.7 kB
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1200 2040" width="1200" height="2040" font-size="14">
<style>
:root {
--bg: #ffffff; --fg: #1b1f24; --muted: #6b7280; --panel: #f6f8fa; --grid: #e5e7eb;
--embed: #dbeafe; --embed-s: #3b82f6;
--attn: #cffafe; --attn-s: #06b6d4;
--mamba: #dcfce7; --mamba-s: #22c55e;
--linattn: #fce7f3; --linattn-s: #ec4899;
--recur: #ede9fe; --recur-s: #8b5cf6;
--moe: #ffedd5; --moe-s: #f97316;
--mlp: #ede9fe; --mlp-s: #8b5cf6;
--norm: #e5e7eb; --norm-s: #9ca3af;
--head: #fee2e2; --head-s: #ef4444;
--config: #f1f5f9; --config-s: #64748b;
--rope: #fef9c3; --rope-s: #eab308;
--layer: #f8fafc; --layer-s: #cbd5e1;
--io: #f1f5f9; --io-s: #94a3b8;
--soft: #fae8ff; --soft-s: #c026d3;
--add: #ffffff; --add-s: #475569;
--block-s: #94a3b8;
--residual: #f59e0b;
--added: #16a34a; --over: #d97706; --deleted: #dc2626;
--lt-full: #06b6d4; --lt-sliding: #3b82f6; --lt-chunked: #8b5cf6;
--lt-compressed: #f97316; --lt-heavy: #dc2626; --lt-linear: #ec4899; --lt-mamba: #22c55e;
--cell-on: #0ea5e9; --cell-off: #e5e7eb;
--vision: #dcfce7; --vision-s: #16a34a; --audio: #fae8ff; --audio-s: #c026d3;
--proj: #fef3c7; --proj-s: #d97706; --xattn: #db2777;
--conv: #d1fae5; --conv-s: #10b981; --act: #ecfccb; --act-s: #65a30d;
--pool: #e0f2fe; --pool-s: #0284c7; --quant: #fae8ff; --quant-s: #c026d3;
}
@media (prefers-color-scheme: dark) {
:root {
--bg: #0d1117; --fg: #e6edf3; --muted: #8b949e; --panel: #161b22; --grid: #30363d;
--embed: #172554; --attn: #083344; --mamba: #052e16; --linattn: #500724;
--recur: #2e1065; --moe: #431407; --mlp: #2e1065; --norm: #21262d; --head: #450a0a;
--config: #1e293b; --rope: #422006; --layer: #161b22;
--conv: #022c22; --act: #1a2e05; --pool: #082f49; --quant: #3b0764; --proj: #422006;
--io: #1e293b; --soft: #3b0764; --add: #0d1117; --cell-off: #21262d;
}
}
.bg { fill: var(--bg); }
text { font-family: ui-sans-serif, -apple-system, "Segoe UI", Roboto, sans-serif; fill: var(--fg); }
.title { font-size: 22px; font-weight: 700; }
.subtitle { font-size: 13px; fill: var(--muted); }
.box-label { font-size: 14px; font-weight: 600; }
.box-label.sm { font-size: 12.5px; }
.box-sub { font-size: 11px; fill: var(--muted); }
.glyph { font-size: 18px; font-weight: 700; fill: var(--add-s); }
.badge { font-size: 12px; font-weight: 700; fill: var(--fg); }
.facts-k { font-size: 11.5px; fill: var(--muted); }
.facts-v { font-size: 11.5px; font-weight: 600; }
.legend-t { font-size: 11.5px; fill: var(--fg); }
.panel { fill: var(--panel); stroke: var(--grid); }
rect.b { rx: 9; stroke-width: 1.6; }
.c-embed { fill: var(--embed); stroke: var(--embed-s); }
.c-attn { fill: var(--attn); stroke: var(--attn-s); }
.c-mamba { fill: var(--mamba); stroke: var(--mamba-s); }
.c-linattn { fill: var(--linattn); stroke: var(--linattn-s); }
.c-recur { fill: var(--recur); stroke: var(--recur-s); }
.c-moe { fill: var(--moe); stroke: var(--moe-s); }
.c-mlp { fill: var(--mlp); stroke: var(--mlp-s); }
.c-norm { fill: var(--norm); stroke: var(--norm-s); }
.c-head { fill: var(--head); stroke: var(--head-s); }
.c-config{ fill: var(--config);stroke: var(--config-s); }
.c-rope { fill: var(--rope); stroke: var(--rope-s); }
.c-proj { fill: var(--proj); stroke: var(--proj-s); }
.c-conv { fill: var(--conv); stroke: var(--conv-s); }
.c-act { fill: var(--act); stroke: var(--act-s); }
.c-pool { fill: var(--pool); stroke: var(--pool-s); }
.c-quant { fill: var(--quant); stroke: var(--quant-s); }
.c-layer { fill: var(--layer); stroke: var(--layer-s); }
.c-io { fill: var(--io); stroke: var(--io-s); }
.c-soft { fill: var(--soft); stroke: var(--soft-s); }
.c-add { fill: var(--add); stroke: var(--add-s); }
.c-block { fill: none; stroke: var(--block-s); stroke-width: 1.6; stroke-dasharray: 7 5; }
.c-lt-full { fill: var(--lt-full); stroke: var(--lt-full); }
.c-lt-sliding { fill: var(--lt-sliding); stroke: var(--lt-sliding); }
.c-lt-chunked { fill: var(--lt-chunked); stroke: var(--lt-chunked); }
.c-lt-compressed { fill: var(--lt-compressed); stroke: var(--lt-compressed); }
.c-lt-heavy { fill: var(--lt-heavy); stroke: var(--lt-heavy); }
.c-lt-linear { fill: var(--lt-linear); stroke: var(--lt-linear); }
.c-lt-mamba { fill: var(--lt-mamba); stroke: var(--lt-mamba); }
.cell-on { fill: var(--cell-on); }
.cell-off { fill: var(--cell-off); }
.grid-frame { fill: none; stroke: var(--grid); stroke-width: 1; }
.mask-bg { fill: var(--cell-off); }
.mask-on { fill: #22c55e; }
.mask-div { stroke: var(--fg); stroke-width: 1.5; stroke-dasharray: 3 2; }
.c-vision { fill: var(--vision); stroke: var(--vision-s); }
.c-audio { fill: var(--audio); stroke: var(--audio-s); }
.c-proj { fill: var(--proj); stroke: var(--proj-s); }
.c-sub { fill: var(--bg); stroke: var(--block-s); stroke-width: 1.2; }
.sec-h { font-size: 12px; font-weight: 700; }
.sec-hbar { fill: var(--bg); opacity: 0.82; }
.residual.xattn { stroke: var(--xattn); stroke-width: 2.4; }
.ghost { opacity: 0.32; stroke-dasharray: 4 3; }
.ch-added rect.b, rect.b.ch-added { stroke: var(--added); stroke-width: 3.2; }
.ch-over rect.b, rect.b.ch-over { stroke: var(--over); stroke-width: 3.2; }
.ch-deleted rect.b, rect.b.ch-deleted { stroke: var(--deleted); stroke-width: 3.2; }
.edge { stroke: var(--grid); stroke-width: 2; }
.flow { stroke: var(--grid); stroke-width: 2; fill: none; }
.residual { stroke: var(--residual); stroke-width: 2; fill: none; }
.rope { stroke: var(--rope-s); stroke-width: 2.2; fill: none; }
.xattn { stroke: var(--xattn); stroke-width: 2.4; fill: none; }
.cell-idx { font-size: 9px; fill: #ffffff; font-weight: 600; }
.sky { fill: #bae6fd; } .sun { fill: #fde047; } .hill { fill: #4ade80; }
</style>
<defs><marker id="ah-flow" viewBox="0 0 10 10" refX="8" refY="5" markerWidth="7" markerHeight="7" orient="auto-start-reverse"><path d="M0,0 L10,5 L0,10 z" fill="var(--grid)"/></marker><marker id="ah-residual" viewBox="0 0 10 10" refX="8" refY="5" markerWidth="7" markerHeight="7" orient="auto-start-reverse"><path d="M0,0 L10,5 L0,10 z" fill="var(--residual)"/></marker><marker id="ah-rope" viewBox="0 0 10 10" refX="8" refY="5" markerWidth="7" markerHeight="7" orient="auto-start-reverse"><path d="M0,0 L10,5 L0,10 z" fill="var(--rope-s)"/></marker><marker id="ah-xattn" viewBox="0 0 10 10" refX="8" refY="5" markerWidth="7" markerHeight="7" orient="auto-start-reverse"><path d="M0,0 L10,5 L0,10 z" fill="var(--xattn)"/></marker></defs>
<rect class="bg" x="0" y="0" width="1200" height="2040"/>
<text class="title" x="24" y="34">blt</text>
<text class="subtitle" x="24" y="54">diff vs mllama (+llama) · 12 overridden · 27 added · 0 deleted · 6 new · 2 inherited-as-is</text>
<g class="ghost"><rect class="b c-sub " x="300" y="181" width="580" height="552" rx="11"/><rect class="sec-hbar" x="305" y="185" width="570" height="18" rx="5"/><text class="sec-h" x="312" y="198">local_encoder · LocalEncoder</text></g>
<g class="ghost"><rect class="b c-sub " x="300" y="1103" width="580" height="447" rx="11"/><rect class="sec-hbar" x="305" y="1107" width="570" height="18" rx="5"/><text class="sec-h" x="312" y="1120">local_decoder · LocalDecoder</text></g>
<g class="ghost"><rect class="b c-sub " x="300" y="1632" width="580" height="380" rx="11"/><rect class="sec-hbar" x="305" y="1636" width="570" height="18" rx="5"/><text class="sec-h" x="312" y="1649">patcher · Patcher</text></g>
<g class="ghost"><rect class="b c-sub " x="300" y="757" width="580" height="322" rx="11"/><rect class="sec-hbar" x="305" y="761" width="570" height="18" rx="5"/><text class="sec-h" x="312" y="774">global_transformer · GlobalTransformer</text></g>
<g class="ghost"><rect class="b c-sub " x="310" y="209" width="560" height="319" rx="11"/><rect class="sec-hbar" x="315" y="213" width="550" height="18" rx="5"/><text class="sec-h" x="322" y="226">layers · ModuleList</text></g>
<g class=""><rect class="b c-sub ch-over" x="318" y="234" width="544" height="281" rx="11"/><rect class="sec-hbar" x="323" y="238" width="534" height="18" rx="5"/><text class="sec-h" x="330" y="251">0 · BltTransformerLayer</text></g>
<g class=""><rect class="b c-sub ch-over" x="310" y="785" width="560" height="252" rx="11"/><rect class="sec-hbar" x="315" y="789" width="550" height="18" rx="5"/><text class="sec-h" x="322" y="802">layers · 25× BltTransformerLayer</text></g>
<g class=""><rect class="b c-sub ch-over" x="310" y="1131" width="560" height="252" rx="11"/><rect class="sec-hbar" x="315" y="1135" width="550" height="18" rx="5"/><text class="sec-h" x="322" y="1148">layers · 9× BltTransformerLayer</text></g>
<g class=""><rect class="b c-sub ch-over" x="310" y="1689" width="560" height="252" rx="11"/><rect class="sec-hbar" x="315" y="1693" width="550" height="18" rx="5"/><text class="sec-h" x="322" y="1706">layers · 14× BltTransformerLayer</text></g>
<g class="ghost"><rect class="b c-sub " x="310" y="593" width="560" height="127" rx="11"/><rect class="sec-hbar" x="315" y="597" width="550" height="18" rx="5"/><text class="sec-h" x="322" y="610">cross_attn_layers · ModuleList</text></g>
<g class=""><rect class="b c-sub ch-over" x="310" y="1448" width="560" height="89" rx="11"/><rect class="sec-hbar" x="315" y="1452" width="550" height="18" rx="5"/><text class="sec-h" x="322" y="1465">cross_attn_layers · 9× BltCrossAttention</text></g>
<g class=""><rect class="b c-sub ch-over" x="318" y="618" width="544" height="89" rx="11"/><rect class="sec-hbar" x="323" y="622" width="534" height="18" rx="5"/><text class="sec-h" x="330" y="635">0 · BltCrossAttention</text></g>
<g class=""><rect class="b c-sub ch-over" x="318" y="810" width="544" height="89" rx="11"/><rect class="sec-hbar" x="323" y="814" width="534" height="18" rx="5"/><text class="sec-h" x="330" y="827">self_attn · BltSelfAttention</text></g>
<g class="ghost"><rect class="b c-sub " x="318" y="906" width="544" height="89" rx="11"/><rect class="sec-hbar" x="323" y="910" width="534" height="18" rx="5"/><text class="sec-h" x="330" y="923">mlp · MllamaMLP ↩ inherited</text></g>
<g class=""><rect class="b c-sub ch-over" x="318" y="1156" width="544" height="89" rx="11"/><rect class="sec-hbar" x="323" y="1160" width="534" height="18" rx="5"/><text class="sec-h" x="330" y="1173">self_attn · BltSelfAttention</text></g>
<g class="ghost"><rect class="b c-sub " x="318" y="1252" width="544" height="89" rx="11"/><rect class="sec-hbar" x="323" y="1256" width="534" height="18" rx="5"/><text class="sec-h" x="330" y="1269">mlp · MllamaMLP ↩ inherited</text></g>
<g class=""><rect class="b c-sub ch-over" x="318" y="1714" width="544" height="89" rx="11"/><rect class="sec-hbar" x="323" y="1718" width="534" height="18" rx="5"/><text class="sec-h" x="330" y="1731">self_attn · BltSelfAttention</text></g>
<g class="ghost"><rect class="b c-sub " x="318" y="1810" width="544" height="89" rx="11"/><rect class="sec-hbar" x="323" y="1814" width="534" height="18" rx="5"/><text class="sec-h" x="330" y="1827">mlp · MllamaMLP ↩ inherited</text></g>
<g class=""><rect class="b c-sub ch-over" x="326" y="259" width="528" height="89" rx="11"/><rect class="sec-hbar" x="331" y="263" width="518" height="18" rx="5"/><text class="sec-h" x="338" y="276">self_attn · BltSelfAttention</text></g>
<g class="ghost"><rect class="b c-sub " x="326" y="355" width="528" height="89" rx="11"/><rect class="sec-hbar" x="331" y="359" width="518" height="18" rx="5"/><text class="sec-h" x="338" y="372">mlp · MllamaMLP ↩ inherited</text></g>
<polyline class="flow" points="590.0,157 590.0,181" marker-end="url(#ah-flow)"/>
<polyline class="flow" points="590.0,733 590.0,757" marker-end="url(#ah-flow)"/>
<polyline class="flow" points="590.0,1079 590.0,1103" marker-end="url(#ah-flow)"/>
<polyline class="flow" points="590.0,1550 590.0,1574" marker-end="url(#ah-flow)"/>
<polyline class="flow" points="590.0,1608 590.0,1632" marker-end="url(#ah-flow)"/>
<g class=""><rect class="b c-io" x="300" y="108" width="580" height="49" rx="24"/><text class="box-label sm" x="590.0" y="130.5" text-anchor="middle">inputs</text><text class="box-sub" x="590.0" y="145.5" text-anchor="middle">input_ids / pixel_values / input_values</text></g>
<g class="ghost"><title>q_proj: Linear [1024→1024]</title><rect class="b c-proj" x="334" y="284" width="166" height="22" rx="9"/><text class="box-label sm" x="417.0" y="299.5" text-anchor="middle">q_proj [1024→1024]</text></g>
<g class="ghost"><title>k_proj: Linear [1024→1024]</title><rect class="b c-proj" x="507" y="284" width="166" height="22" rx="9"/><text class="box-label sm" x="590.0" y="299.5" text-anchor="middle">k_proj [1024→1024]</text></g>
<g class="ghost"><title>v_proj: Linear [1024→1024]</title><rect class="b c-proj" x="680" y="284" width="166" height="22" rx="9"/><text class="box-label sm" x="763.0" y="299.5" text-anchor="middle">v_proj [1024→1024]</text></g>
<g class="ghost"><title>o_proj: Linear [1024→1024]</title><rect class="b c-proj" x="334" y="313" width="512" height="22" rx="9"/><text class="box-label sm" x="590.0" y="328.5" text-anchor="middle">o_proj [1024→1024]</text></g>
<g class="ghost"><title>gate_proj: Linear [1024→2730]</title><rect class="b c-proj" x="334" y="380" width="166" height="22" rx="9"/><text class="box-label sm" x="417.0" y="395.5" text-anchor="middle">gate_proj [1024→2730]</text></g>
<g class="ghost"><title>up_proj: Linear [1024→2730]</title><rect class="b c-proj" x="507" y="380" width="166" height="22" rx="9"/><text class="box-label sm" x="590.0" y="395.5" text-anchor="middle">up_proj [1024→2730]</text></g>
<g class="ghost"><title>down_proj: Linear [2730→1024]</title><rect class="b c-proj" x="680" y="380" width="166" height="22" rx="9"/><text class="box-label sm" x="763.0" y="395.5" text-anchor="middle">down_proj [2730→1024]</text></g>
<g class="ghost"><title>act_fn: SiLUActivation</title><rect class="b c-act" x="334" y="409" width="512" height="22" rx="9"/><text class="box-label sm" x="590.0" y="424.5" text-anchor="middle">act_fn SiLU</text></g>
<g class=""><title>input_layernorm: BltRMSNorm 1024</title><rect class="b c-norm ch-added" x="326" y="451" width="528" height="22" rx="9"/><text class="box-label sm" x="590.0" y="466.5" text-anchor="middle">input_layernorm RMSNorm 1024</text><circle cx="338" cy="463" r="5" fill="var(--added)"/></g>
<g class=""><title>post_attention_layernorm: BltRMSNorm 1024</title><rect class="b c-norm ch-added" x="326" y="480" width="528" height="22" rx="9"/><text class="box-label sm" x="590.0" y="495.5" text-anchor="middle">post_attention_layernorm RMSNorm 1024</text><circle cx="338" cy="492" r="5" fill="var(--added)"/></g>
<g class=""><title>rotary_emb: BltRotaryEmbedding</title><rect class="b c-rope ch-over" x="310" y="535" width="276" height="22" rx="9"/><text class="box-label sm" x="448.0" y="550.5" text-anchor="middle">rotary_emb RotaryEmbedding</text><circle cx="322" cy="547" r="5" fill="var(--over)"/></g>
<g class=""><title>patch_embedding_projection: Linear [1024→2048]</title><rect class="b c-proj ch-added" x="593" y="535" width="276" height="22" rx="9"/><text class="box-label sm" x="731.0" y="550.5" text-anchor="middle">patch_embedding_projection [1024→2048]</text><circle cx="605" cy="547" r="5" fill="var(--added)"/></g>
<g class=""><title>embed_tokens: Embedding [260×1024]</title><rect class="b c-embed ch-over" x="310" y="564" width="560" height="22" rx="9"/><text class="box-label sm" x="590.0" y="579.5" text-anchor="middle">embed_tokens Embedding [260×1024]</text><circle cx="322" cy="576" r="5" fill="var(--over)"/></g>
<g class="ghost"><title>q_proj: Linear [1024→1024]</title><rect class="b c-proj" x="326" y="643" width="171" height="22" rx="9"/><text class="box-label sm" x="411.5" y="658.5" text-anchor="middle">q_proj [1024→1024]</text></g>
<g class="ghost"><title>k_proj: Linear [1024→1024]</title><rect class="b c-proj" x="504" y="643" width="171" height="22" rx="9"/><text class="box-label sm" x="589.5" y="658.5" text-anchor="middle">k_proj [1024→1024]</text></g>
<g class="ghost"><title>v_proj: Linear [1024→1024]</title><rect class="b c-proj" x="682" y="643" width="171" height="22" rx="9"/><text class="box-label sm" x="767.5" y="658.5" text-anchor="middle">v_proj [1024→1024]</text></g>
<g class="ghost"><title>o_proj: Linear [1024→1024]</title><rect class="b c-proj" x="326" y="672" width="171" height="22" rx="9"/><text class="box-label sm" x="411.5" y="687.5" text-anchor="middle">o_proj [1024→1024]</text></g>
<g class="ghost"><title>q_norm: BltRMSNorm 1024</title><rect class="b c-norm" x="504" y="672" width="171" height="22" rx="9"/><text class="box-label sm" x="589.5" y="687.5" text-anchor="middle">q_norm RMSNorm 1024</text></g>
<g class="ghost"><title>k_norm: BltRMSNorm 1024</title><rect class="b c-norm" x="682" y="672" width="171" height="22" rx="9"/><text class="box-label sm" x="767.5" y="687.5" text-anchor="middle">k_norm RMSNorm 1024</text></g>
<g class="ghost"><title>q_proj: Linear [2048→2048]</title><rect class="b c-proj" x="326" y="835" width="171" height="22" rx="9"/><text class="box-label sm" x="411.5" y="850.5" text-anchor="middle">q_proj [2048→2048]</text></g>
<g class="ghost"><title>k_proj: Linear [2048→2048]</title><rect class="b c-proj" x="504" y="835" width="171" height="22" rx="9"/><text class="box-label sm" x="589.5" y="850.5" text-anchor="middle">k_proj [2048→2048]</text></g>
<g class="ghost"><title>v_proj: Linear [2048→2048]</title><rect class="b c-proj" x="682" y="835" width="171" height="22" rx="9"/><text class="box-label sm" x="767.5" y="850.5" text-anchor="middle">v_proj [2048→2048]</text></g>
<g class="ghost"><title>o_proj: Linear [2048→2048]</title><rect class="b c-proj" x="326" y="864" width="528" height="22" rx="9"/><text class="box-label sm" x="590.0" y="879.5" text-anchor="middle">o_proj [2048→2048]</text></g>
<g class="ghost"><title>gate_proj: Linear [2048→5632]</title><rect class="b c-proj" x="326" y="931" width="171" height="22" rx="9"/><text class="box-label sm" x="411.5" y="946.5" text-anchor="middle">gate_proj [2048→5632]</text></g>
<g class="ghost"><title>up_proj: Linear [2048→5632]</title><rect class="b c-proj" x="504" y="931" width="171" height="22" rx="9"/><text class="box-label sm" x="589.5" y="946.5" text-anchor="middle">up_proj [2048→5632]</text></g>
<g class="ghost"><title>down_proj: Linear [5632→2048]</title><rect class="b c-proj" x="682" y="931" width="171" height="22" rx="9"/><text class="box-label sm" x="767.5" y="946.5" text-anchor="middle">down_proj [5632→2048]</text></g>
<g class="ghost"><title>act_fn: SiLUActivation</title><rect class="b c-act" x="326" y="960" width="528" height="22" rx="9"/><text class="box-label sm" x="590.0" y="975.5" text-anchor="middle">act_fn SiLU</text></g>
<g class=""><title>input_layernorm: BltRMSNorm 2048</title><rect class="b c-norm ch-added" x="318" y="1002" width="268" height="22" rx="9"/><text class="box-label sm" x="452.0" y="1017.5" text-anchor="middle">input_layernorm RMSNorm 2048</text><circle cx="330" cy="1014" r="5" fill="var(--added)"/></g>
<g class=""><title>post_attention_layernorm: BltRMSNorm 2048</title><rect class="b c-norm ch-added" x="593" y="1002" width="268" height="22" rx="9"/><text class="box-label sm" x="727.0" y="1017.5" text-anchor="middle">post_attention_layernorm RMSNorm 2048</text><circle cx="605" cy="1014" r="5" fill="var(--added)"/></g>
<g class=""><title>rotary_emb: BltRotaryEmbedding</title><rect class="b c-rope ch-over" x="310" y="1044" width="276" height="22" rx="9"/><text class="box-label sm" x="448.0" y="1059.5" text-anchor="middle">rotary_emb RotaryEmbedding</text><circle cx="322" cy="1056" r="5" fill="var(--over)"/></g>
<g class=""><title>token_embedding_projection: Identity</title><rect class="b c-sub ch-added" x="593" y="1044" width="276" height="22" rx="9"/><text class="box-label sm" x="731.0" y="1059.5" text-anchor="middle">token_embedding_projection Identity</text><circle cx="605" cy="1056" r="5" fill="var(--added)"/></g>
<g class="ghost"><title>q_proj: Linear [1024→1024]</title><rect class="b c-proj" x="326" y="1181" width="171" height="22" rx="9"/><text class="box-label sm" x="411.5" y="1196.5" text-anchor="middle">q_proj [1024→1024]</text></g>
<g class="ghost"><title>k_proj: Linear [1024→1024]</title><rect class="b c-proj" x="504" y="1181" width="171" height="22" rx="9"/><text class="box-label sm" x="589.5" y="1196.5" text-anchor="middle">k_proj [1024→1024]</text></g>
<g class="ghost"><title>v_proj: Linear [1024→1024]</title><rect class="b c-proj" x="682" y="1181" width="171" height="22" rx="9"/><text class="box-label sm" x="767.5" y="1196.5" text-anchor="middle">v_proj [1024→1024]</text></g>
<g class="ghost"><title>o_proj: Linear [1024→1024]</title><rect class="b c-proj" x="326" y="1210" width="528" height="22" rx="9"/><text class="box-label sm" x="590.0" y="1225.5" text-anchor="middle">o_proj [1024→1024]</text></g>
<g class="ghost"><title>gate_proj: Linear [1024→2816]</title><rect class="b c-proj" x="326" y="1277" width="171" height="22" rx="9"/><text class="box-label sm" x="411.5" y="1292.5" text-anchor="middle">gate_proj [1024→2816]</text></g>
<g class="ghost"><title>up_proj: Linear [1024→2816]</title><rect class="b c-proj" x="504" y="1277" width="171" height="22" rx="9"/><text class="box-label sm" x="589.5" y="1292.5" text-anchor="middle">up_proj [1024→2816]</text></g>
<g class="ghost"><title>down_proj: Linear [2816→1024]</title><rect class="b c-proj" x="682" y="1277" width="171" height="22" rx="9"/><text class="box-label sm" x="767.5" y="1292.5" text-anchor="middle">down_proj [2816→1024]</text></g>
<g class="ghost"><title>act_fn: SiLUActivation</title><rect class="b c-act" x="326" y="1306" width="528" height="22" rx="9"/><text class="box-label sm" x="590.0" y="1321.5" text-anchor="middle">act_fn SiLU</text></g>
<g class=""><title>input_layernorm: BltRMSNorm 1024</title><rect class="b c-norm ch-added" x="318" y="1348" width="268" height="22" rx="9"/><text class="box-label sm" x="452.0" y="1363.5" text-anchor="middle">input_layernorm RMSNorm 1024</text><circle cx="330" cy="1360" r="5" fill="var(--added)"/></g>
<g class=""><title>post_attention_layernorm: BltRMSNorm 1024</title><rect class="b c-norm ch-added" x="593" y="1348" width="268" height="22" rx="9"/><text class="box-label sm" x="727.0" y="1363.5" text-anchor="middle">post_attention_layernorm RMSNorm 1024</text><circle cx="605" cy="1360" r="5" fill="var(--added)"/></g>
<g class=""><title>rotary_emb: BltRotaryEmbedding</title><rect class="b c-rope ch-over" x="310" y="1390" width="276" height="22" rx="9"/><text class="box-label sm" x="448.0" y="1405.5" text-anchor="middle">rotary_emb RotaryEmbedding</text><circle cx="322" cy="1402" r="5" fill="var(--over)"/></g>
<g class=""><title>patch_embedding_projection: Linear [2048→2048]</title><rect class="b c-proj ch-added" x="593" y="1390" width="276" height="22" rx="9"/><text class="box-label sm" x="731.0" y="1405.5" text-anchor="middle">patch_embedding_projection [2048→2048]</text><circle cx="605" cy="1402" r="5" fill="var(--added)"/></g>
<g class=""><title>norm: BltRMSNorm 1024</title><rect class="b c-norm ch-added" x="310" y="1419" width="560" height="22" rx="9"/><text class="box-label sm" x="590.0" y="1434.5" text-anchor="middle">norm RMSNorm 1024</text><circle cx="322" cy="1431" r="5" fill="var(--added)"/></g>
<g class="ghost"><title>q_proj: Linear [1024→1024]</title><rect class="b c-proj" x="318" y="1473" width="176" height="22" rx="9"/><text class="box-label sm" x="406.0" y="1488.5" text-anchor="middle">q_proj [1024→1024]</text></g>
<g class="ghost"><title>k_proj: Linear [1024→1024]</title><rect class="b c-proj" x="501" y="1473" width="176" height="22" rx="9"/><text class="box-label sm" x="589.0" y="1488.5" text-anchor="middle">k_proj [1024→1024]</text></g>
<g class="ghost"><title>v_proj: Linear [1024→1024]</title><rect class="b c-proj" x="685" y="1473" width="176" height="22" rx="9"/><text class="box-label sm" x="773.0" y="1488.5" text-anchor="middle">v_proj [1024→1024]</text></g>
<g class="ghost"><title>o_proj: Linear [1024→1024]</title><rect class="b c-proj" x="318" y="1502" width="176" height="22" rx="9"/><text class="box-label sm" x="406.0" y="1517.5" text-anchor="middle">o_proj [1024→1024]</text></g>
<g class="ghost"><title>q_norm: BltRMSNorm 1024</title><rect class="b c-norm" x="501" y="1502" width="176" height="22" rx="9"/><text class="box-label sm" x="589.0" y="1517.5" text-anchor="middle">q_norm RMSNorm 1024</text></g>
<g class="ghost"><title>k_norm: BltRMSNorm 1024</title><rect class="b c-norm" x="685" y="1502" width="176" height="22" rx="9"/><text class="box-label sm" x="773.0" y="1517.5" text-anchor="middle">k_norm RMSNorm 1024</text></g>
<g class=""><rect class="b c-embed ch-over" x="300" y="1574" width="580" height="34" rx="9"/><text class="box-label sm" x="590.0" y="1595.5" text-anchor="middle">encoder_hash_tok_embedding · Embedding</text><circle cx="312" cy="1586" r="5" fill="var(--over)"/></g>
<g class=""><title>rotary_emb: BltRotaryEmbedding</title><rect class="b c-rope ch-over" x="310" y="1660" width="560" height="22" rx="9"/><text class="box-label sm" x="590.0" y="1675.5" text-anchor="middle">rotary_emb RotaryEmbedding</text><circle cx="322" cy="1672" r="5" fill="var(--over)"/></g>
<g class="ghost"><title>q_proj: Linear [768→768]</title><rect class="b c-proj" x="326" y="1739" width="171" height="22" rx="9"/><text class="box-label sm" x="411.5" y="1754.5" text-anchor="middle">q_proj [768→768]</text></g>
<g class="ghost"><title>k_proj: Linear [768→768]</title><rect class="b c-proj" x="504" y="1739" width="171" height="22" rx="9"/><text class="box-label sm" x="589.5" y="1754.5" text-anchor="middle">k_proj [768→768]</text></g>
<g class="ghost"><title>v_proj: Linear [768→768]</title><rect class="b c-proj" x="682" y="1739" width="171" height="22" rx="9"/><text class="box-label sm" x="767.5" y="1754.5" text-anchor="middle">v_proj [768→768]</text></g>
<g class="ghost"><title>o_proj: Linear [768→768]</title><rect class="b c-proj" x="326" y="1768" width="528" height="22" rx="9"/><text class="box-label sm" x="590.0" y="1783.5" text-anchor="middle">o_proj [768→768]</text></g>
<g class="ghost"><title>gate_proj: Linear [768→2048]</title><rect class="b c-proj" x="326" y="1835" width="171" height="22" rx="9"/><text class="box-label sm" x="411.5" y="1850.5" text-anchor="middle">gate_proj [768→2048]</text></g>
<g class="ghost"><title>up_proj: Linear [768→2048]</title><rect class="b c-proj" x="504" y="1835" width="171" height="22" rx="9"/><text class="box-label sm" x="589.5" y="1850.5" text-anchor="middle">up_proj [768→2048]</text></g>
<g class="ghost"><title>down_proj: Linear [2048→768]</title><rect class="b c-proj" x="682" y="1835" width="171" height="22" rx="9"/><text class="box-label sm" x="767.5" y="1850.5" text-anchor="middle">down_proj [2048→768]</text></g>
<g class="ghost"><title>act_fn: SiLUActivation</title><rect class="b c-act" x="326" y="1864" width="528" height="22" rx="9"/><text class="box-label sm" x="590.0" y="1879.5" text-anchor="middle">act_fn SiLU</text></g>
<g class=""><title>input_layernorm: BltRMSNorm 768</title><rect class="b c-norm ch-added" x="318" y="1906" width="268" height="22" rx="9"/><text class="box-label sm" x="452.0" y="1921.5" text-anchor="middle">input_layernorm RMSNorm 768</text><circle cx="330" cy="1918" r="5" fill="var(--added)"/></g>
<g class=""><title>post_attention_layernorm: BltRMSNorm 768</title><rect class="b c-norm ch-added" x="593" y="1906" width="268" height="22" rx="9"/><text class="box-label sm" x="727.0" y="1921.5" text-anchor="middle">post_attention_layernorm RMSNorm 768</text><circle cx="605" cy="1918" r="5" fill="var(--added)"/></g>
<g class=""><title>embed_tokens: Embedding [260×768]</title><rect class="b c-embed ch-over" x="310" y="1948" width="276" height="22" rx="9"/><text class="box-label sm" x="448.0" y="1963.5" text-anchor="middle">embed_tokens Embedding [260×768]</text><circle cx="322" cy="1960" r="5" fill="var(--over)"/></g>
<g class=""><title>norm: BltRMSNorm 768</title><rect class="b c-norm ch-added" x="593" y="1948" width="276" height="22" rx="9"/><text class="box-label sm" x="731.0" y="1963.5" text-anchor="middle">norm RMSNorm 768</text><circle cx="605" cy="1960" r="5" fill="var(--added)"/></g>
<g class=""><title>lm_head: Linear [768→260]</title><rect class="b c-head ch-over" x="310" y="1977" width="560" height="22" rx="9"/><text class="box-label sm" x="590.0" y="1992.5" text-anchor="middle">lm_head [768→260]</text><circle cx="322" cy="1989" r="5" fill="var(--over)"/></g>
<rect class="panel" x="912" y="88" width="264" height="174" rx="8"/>
<text class="facts-k" x="926" y="112">model id</text>
<text class="facts-v" x="1162" y="112" text-anchor="end">itazap/blt-1b-hf</text>
<text class="facts-k" x="926" y="134">parent</text>
<text class="facts-v" x="1162" y="134" text-anchor="end">mllama</text>
<text class="facts-k" x="926" y="156">classes</text>
<text class="facts-v" x="1162" y="156" text-anchor="end">13</text>
<text class="facts-k" x="926" y="178">overridden</text>
<text class="facts-v" x="1162" y="178" text-anchor="end">12</text>
<text class="facts-k" x="926" y="200">added</text>
<text class="facts-v" x="1162" y="200" text-anchor="end">27</text>
<text class="facts-k" x="926" y="222">new classes</text>
<text class="facts-v" x="1162" y="222" text-anchor="end">6</text>
<text class="facts-k" x="926" y="244">inherited as-is</text>
<text class="facts-v" x="1162" y="244" text-anchor="end">2</text>
<text class="legend-t" x="914" y="282" font-weight="700">legend</text>
<rect x="916" y="293" width="16" height="12" rx="2" fill="none" stroke="var(--added)" stroke-width="3"/>
<text class="legend-t" x="940" y="303">new submodule (vs parent)</text>
<rect x="916" y="311" width="16" height="12" rx="2" fill="none" stroke="var(--over)" stroke-width="3"/>
<text class="legend-t" x="940" y="321">changed / redefined</text>
<rect x="916" y="329" width="16" height="12" rx="2" fill="none" stroke="var(--deleted)" stroke-width="3"/>
<text class="legend-t" x="940" y="339">deleted</text>
<rect x="916" y="347" width="16" height="12" rx="2" fill="none" stroke="var(--grid)" stroke-width="3"/>
<text class="legend-t" x="940" y="357">inherited / copy-pasted</text>
<text class="legend-t" x="914" y="380" font-weight="700">changes by class</text>
<circle cx="920" cy="393" r="4" fill="var(--over)"/>
<text class="facts-v" x="932" y="396">BltCrossAttention</text>
<text class="box-sub" x="932" y="409">ovr __init__,forward</text>
<circle cx="920" cy="421" r="4" fill="var(--over)"/>
<text class="facts-v" x="932" y="424">BltPreTrainedModel</text>
<text class="box-sub" x="932" y="437">ovr _init_weights; 6 attr</text>
<circle cx="920" cy="449" r="4" fill="var(--over)"/>
<text class="facts-v" x="932" y="452">BltRotaryEmbedding</text>
<text class="box-sub" x="932" y="465">ovr forward</text>
<circle cx="920" cy="477" r="4" fill="var(--over)"/>
<text class="facts-v" x="932" y="480">BltSelfAttention</text>
<text class="box-sub" x="932" y="493">ovr __init__</text>
<circle cx="920" cy="505" r="4" fill="var(--over)"/>
<text class="facts-v" x="932" y="508">BltTransformerLayer</text>
<text class="box-sub" x="932" y="521">ovr __init__</text>
<circle cx="920" cy="533" r="4" fill="var(--added)"/>
<text class="facts-v" x="932" y="536">BltForCausalLM</text>
<text class="box-sub" x="932" y="549">add __init__,forward; 4 attr</text>
<circle cx="920" cy="561" r="4" fill="var(--added)"/>
<text class="facts-v" x="932" y="564">BltGlobalTransformer</text>
<text class="box-sub" x="932" y="577">add __init__,forward; 2 attr</text>
<circle cx="920" cy="589" r="4" fill="var(--added)"/>
<text class="facts-v" x="932" y="592">BltLocalDecoder</text>
<text class="box-sub" x="932" y="605">add __init__,forward; 1 attr</text>
<circle cx="920" cy="617" r="4" fill="var(--added)"/>
<text class="facts-v" x="932" y="620">BltLocalEncoder</text>
<text class="box-sub" x="932" y="633">add __init__,forward,patch_reduce; 2 attr</text>
<circle cx="920" cy="645" r="4" fill="var(--added)"/>
<text class="facts-v" x="932" y="648">BltModel</text>
<text class="box-sub" x="932" y="661">add __init__,_patch_ids_from_lengths,forward…</text>
<circle cx="920" cy="673" r="4" fill="var(--added)"/>
<text class="facts-v" x="932" y="676">BltPatcher</text>
<text class="box-sub" x="932" y="689">add __init__,forward,patch_lengths_from_entr…</text>
</svg>