sage / docs /flow_llm.mmd
sage002's picture
feat: add authenticated remote control UI and ngrok launcher
15af856 verified
---
title: SAGE — End-to-End Operational Flow
---
flowchart LR
%% =========================================================
%% SAGE Operational Flow — Stage by Stage
%% Left-to-right: raw input → trained model → live serving
%% =========================================================
actor["👤 Operator"]
subgraph stage0["📥 Stage 0 — Inputs"]
raw["Raw Text / JSONL
data/raw/*.jsonl"]
cfg_m["configs/model/1b.yaml
architecture params"]
cfg_t["configs/train/schedule.yaml
LR · warmup · ckpts"]
end
subgraph stage1["🔤 Stage 1 — Tokenizer"]
direction TB
tok_train_s["train_tokenizer.py
SentencePiece BPE"]
tok_model_s[("tokenizer.model
vocab_size = 50k")]
tok_val_s["validate_tokenizer.py
roundtrip checks"]
tok_train_s --> tok_model_s --> tok_val_s
end
subgraph stage2["🔧 Stage 2 — Data Pipeline"]
direction TB
ingest_s["ingest.py
stream JSONL records
add domain/quality tags"]
filter_s["filter.py
quality · lang · PII
safety · license"]
dedup_s["dedup.py
exact + near-dup removal"]
shard_s["shard.py
tokenize → pack
write Parquet shards"]
parquet_s[("data/processed/
shard-NNNNN.parquet
input_ids · labels · mask")]
ingest_s --> filter_s --> dedup_s --> shard_s --> parquet_s
end
subgraph stage3["🧠 Stage 3 — Model"]
direction TB
config_s["ModelConfig
24L · 2048d · GQA 16/8"]
rope_s["RoPE Cache
base_freq=500kHz
context=4096"]
attn_s["GQAAttention
fused QKV · Flash-SDPA
KV-cache enabled"]
mlp_s["SwiGLU MLP
dim=5632"]
norm_s["RMSNorm
pre-norm · eps=1e-5"]
block_s["TransformerBlock × 24
residual connections"]
transformer_s["SageTransformer
forward → logits + KV"]
config_s --> rope_s & norm_s & attn_s & mlp_s
rope_s --> attn_s
norm_s & attn_s & mlp_s --> block_s --> transformer_s
end
subgraph stage4["🏋 Stage 4 — Training"]
direction TB
hw_s["HardwareConfig
auto: device · dtype
batch · grad_accum"]
opt_s["AdamW + Cosine LR
peak 3e-43e-5
warmup 2000 steps"]
loss_s["masked_cross_entropy
next-token prediction"]
loop_s["Training Loop
AMP · grad clip 1.0
log every 10 steps"]
ckpt_s["Checkpoints
every 1000 steps
keep last 5"]
metrics_s[("runs/name/
metrics.jsonl
W&B optional")]
hw_s & opt_s & loss_s --> loop_s --> ckpt_s & metrics_s
end
subgraph stage5["📊 Stage 5 — Evaluation"]
direction TB
ppl_s["Perplexity
val loss on held-out shards"]
bench_s["Benchmarks
task harness registry"]
long_s["Long Context
needle-in-haystack"]
regress_s["Regression
checkpoint comparison"]
ppl_s & bench_s & long_s --> regress_s
end
subgraph stage6["🚀 Stage 6 — Serving"]
direction TB
kv_s["KV Cache
O(1) per-token gen"]
quant_s["Quantize
INT8 / GGUF export"]
gpu_api_s["GPU FastAPI Server
/health
/chat
/generate"]
cpu_api_s["CPU Server
control-plane only"]
ide_s["SAGE Browser IDE
Chat · Presets · CLI
Inspector · Settings"]
kv_s --> gpu_api_s
quant_s --> cpu_api_s
gpu_api_s --> ide_s
cpu_api_s --> ide_s
end
%% ===================== MAIN FLOW =====================
actor --> raw & cfg_m & cfg_t
raw --> stage1
raw --> stage2
cfg_m --> stage3
tok_model_s --> stage2
tok_model_s --> stage3
cfg_t --> stage4
parquet_s --> stage4
transformer_s --> stage4
transformer_s --> stage5
ckpt_s --> stage5
ckpt_s --> gpu_api_s
transformer_s --> kv_s & quant_s
%% ===================== STYLE =====================
classDef actor fill:#1d4ed8,stroke:#93c5fd,color:#ffffff
classDef io fill:#111827,stroke:#6b7280,color:#d1d5db
classDef token fill:#0d5467,stroke:#67e8f9,color:#ffffff
classDef data fill:#0f766e,stroke:#5eead4,color:#ffffff
classDef model fill:#4c1d95,stroke:#c4b5fd,color:#ffffff
classDef train fill:#78350f,stroke:#fcd34d,color:#ffffff
classDef eval fill:#7f1d1d,stroke:#fca5a5,color:#ffffff
classDef serve fill:#065f46,stroke:#86efac,color:#ffffff
class actor actor
class raw,cfg_m,cfg_t,parquet_s,metrics_s io
class tok_train_s,tok_model_s,tok_val_s token
class ingest_s,filter_s,dedup_s,shard_s data
class config_s,rope_s,attn_s,mlp_s,norm_s,block_s,transformer_s model
class hw_s,opt_s,loss_s,loop_s,ckpt_s train
class ppl_s,bench_s,long_s,regress_s eval
class kv_s,quant_s,gpu_api_s,cpu_api_s,ide_s serve