| --- |
| title: SAGE — End-to-End Operational Flow |
| --- |
| flowchart LR |
|
|
| %% ========================================================= |
| %% SAGE Operational Flow — Stage by Stage |
| %% Left-to-right: raw input → trained model → live serving |
| %% ========================================================= |
|
|
| actor["👤 Operator"] |
|
|
| subgraph stage0["📥 Stage 0 — Inputs"] |
| raw[ |
| data/raw/*.jsonl |
| cfg_m[ |
| architecture params |
| cfg_t[ |
| LR · warmup · ckpts |
| end |
|
|
| subgraph stage1["🔤 Stage 1 — Tokenizer"] |
| direction TB |
| tok_train_s[ |
| SentencePiece BPE |
| tok_model_s[( |
| vocab_size = 50k |
| tok_val_s[ |
| roundtrip checks |
| tok_train_s --> tok_model_s --> tok_val_s |
| end |
|
|
| subgraph stage2["🔧 Stage 2 — Data Pipeline"] |
| direction TB |
| ingest_s[ |
| stream JSONL records |
| add domain/quality tags |
| filter_s[ |
| quality · lang · PII |
| safety · license |
| dedup_s[ |
| exact + near-dup removal |
| shard_s[ |
| tokenize → pack |
| write Parquet shards |
| parquet_s[( |
| shard-NNNNN.parquet |
| input_ids · labels · mask |
| ingest_s --> filter_s --> dedup_s --> shard_s --> parquet_s |
| end |
|
|
| subgraph stage3["🧠 Stage 3 — Model"] |
| direction TB |
| config_s[ |
| 24L · 2048d · GQA 16/8 |
| rope_s[ |
| base_freq=500kHz |
| context=4096 |
| attn_s[ |
| fused QKV · Flash-SDPA |
| KV-cache enabled |
| mlp_s[ |
| dim=5632 |
| norm_s[ |
| pre-norm · eps=1e-5 |
| block_s[ |
| residual connections |
| transformer_s[ |
| forward → logits + KV |
| config_s --> rope_s & norm_s & attn_s & mlp_s |
| rope_s --> attn_s |
| norm_s & attn_s & mlp_s --> block_s --> transformer_s |
| end |
|
|
| subgraph stage4["🏋 Stage 4 — Training"] |
| direction TB |
| hw_s[ |
| auto: device · dtype |
| batch · grad_accum |
| opt_s[ |
| peak 3e-4 → 3e-5 |
| warmup 2000 steps |
| loss_s[ |
| next-token prediction |
| loop_s[ |
| AMP · grad clip 1.0 |
| log every 10 steps |
| ckpt_s[ |
| every 1000 steps |
| keep last 5 |
| metrics_s[( |
| metrics.jsonl |
| W&B optional |
| hw_s & opt_s & loss_s --> loop_s --> ckpt_s & metrics_s |
| end |
|
|
| subgraph stage5["📊 Stage 5 — Evaluation"] |
| direction TB |
| ppl_s[ |
| val loss on held-out shards |
| bench_s[ |
| task harness registry |
| long_s[ |
| needle-in-haystack |
| regress_s[ |
| checkpoint comparison |
| ppl_s & bench_s & long_s --> regress_s |
| end |
|
|
| subgraph stage6["🚀 Stage 6 — Serving"] |
| direction TB |
| kv_s[ |
| O(1) per-token gen |
| quant_s[ |
| INT8 / GGUF export |
| gpu_api_s[ |
| /health |
| /chat |
| /generate |
| cpu_api_s[ |
| control-plane only |
| ide_s[ |
| Chat · Presets · CLI |
| Inspector · Settings |
| kv_s --> gpu_api_s |
| quant_s --> cpu_api_s |
| gpu_api_s --> ide_s |
| cpu_api_s --> ide_s |
| end |
|
|
| %% ===================== MAIN FLOW ===================== |
| actor --> raw & cfg_m & cfg_t |
| raw --> stage1 |
| raw --> stage2 |
| cfg_m --> stage3 |
| tok_model_s --> stage2 |
| tok_model_s --> stage3 |
|
|
| cfg_t --> stage4 |
| parquet_s --> stage4 |
| transformer_s --> stage4 |
|
|
| transformer_s --> stage5 |
| ckpt_s --> stage5 |
|
|
| ckpt_s --> gpu_api_s |
| transformer_s --> kv_s & quant_s |
|
|
| %% ===================== STYLE ===================== |
| classDef actor fill:#1d4ed8,stroke:#93c5fd,color:#ffffff |
| classDef io fill:#111827,stroke:#6b7280,color:#d1d5db |
| classDef token fill:#0d5467,stroke:#67e8f9,color:#ffffff |
| classDef data fill:#0f766e,stroke:#5eead4,color:#ffffff |
| classDef model fill:#4c1d95,stroke:#c4b5fd,color:#ffffff |
| classDef train fill:#78350f,stroke:#fcd34d,color:#ffffff |
| classDef eval fill:#7f1d1d,stroke:#fca5a5,color:#ffffff |
| classDef serve fill:#065f46,stroke:#86efac,color:#ffffff |
|
|
| class actor actor |
| class raw,cfg_m,cfg_t,parquet_s,metrics_s io |
| class tok_train_s,tok_model_s,tok_val_s token |
| class ingest_s,filter_s,dedup_s,shard_s data |
| class config_s,rope_s,attn_s,mlp_s,norm_s,block_s,transformer_s model |
| class hw_s,opt_s,loss_s,loop_s,ckpt_s train |
| class ppl_s,bench_s,long_s,regress_s eval |
| class kv_s,quant_s,gpu_api_s,cpu_api_s,ide_s serve |
|
|