File size: 9,462 Bytes
ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 ef18673 15af856 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 | ---
title: SAGE 1B — System Architecture
---
flowchart TB
%% =========================================================
%% SAGE 1B End-to-End System Architecture
%% All modules, data flows, and runtime outputs
%% =========================================================
actor["👤 Developer / Operator"]
subgraph repo["SAGE Repository"]
direction TB
subgraph configs["configs/"]
cfg_model["model/1b.yaml
24 layers · 2048 d_model
16Q / 8KV heads · 4096 ctx
vocab 50k · RoPE 500kHz"]
cfg_train["train/schedule.yaml
LR 3e-4 → 3e-5 · warmup 2000
ckpt every 1000 · eval every 1000"]
end
subgraph tokenizer_pkg["tokenizer/"]
tok_train["train_tokenizer.py
SentencePiece BPE
--vocab-size --model-prefix"]
tok_validate["validate_tokenizer.py
roundtrip + edge-case checks"]
tok_model[("tokenizer.model
tokenizer.vocab")]
end
subgraph data_pkg["data/"]
bootstrap["bootstrap.py
5-source starter JSONL
general_web · code · math
multilingual · synthetic"]
ingest["ingest.py
SOURCE_REGISTRY
stream_source()
stable_record_id()"]
filter_mod["filter.py
FilterConfig
filter_record()
quality · lang · PII · safety"]
dedup["dedup.py
deduplicate_records()
exact + near-duplicate removal"]
shard["shard.py
ShardConfig
write_shards()
tokenize → Parquet rows"]
dataset_mod["dataset.py
DatasetConfig
PackedDataset
skip(n_batches)"]
pipeline_cli["pipeline.py
build_records()
run_pipeline()
CLI entry point"]
end
subgraph model_pkg["model/"]
model_cfg["config.py
ModelConfig dataclass
from_yaml() · to_dict()"]
rmsnorm["rmsnorm.py
RMSNorm
float32 accumulation"]
rope_mod["rope.py
build_rope_cache()
rotate_half()
apply_rope()"]
attn_mod["attention.py
GQAAttention
fused QKV proj
repeat_kv()
Flash-SDPA + KV-cache"]
mlp_mod["mlp.py
SwiGLUMLP
gate × up → silu → down"]
block_mod["block.py
TransformerBlock
pre-norm residual"]
sage_model["model.py
SageTransformer
_reset_parameters()
forward() → logits + KV"]
end
subgraph train_pkg["train/"]
hw["hardware.py
HardwareConfig
auto device · dtype · VRAM
micro_batch · grad_accum"]
dist["distributed.py
get_training_strategy()
cpu / mps / single
ddp / fsdp + ZeRO"]
opt_mod["optimizer.py
ScheduleConfig
create_optimizer() AdamW
lr_lambda() cosine
create_scheduler()"]
loss_mod["loss.py
masked_cross_entropy()
next-token prediction"]
ckpt_mod["checkpoint.py
save_checkpoint()
load_latest_checkpoint()
_prune_old_checkpoints()"]
trainer_mod["trainer.py
TrainerConfig
train() main loop
AMP · grad clip · W&B
collate_batch()
create_dataloader()"]
end
subgraph eval_pkg["eval/"]
ppl_mod["perplexity.py
evaluate_perplexity()
val loss + ppl score"]
bench_mod["benchmarks.py
benchmark harness registry
run_all_benchmarks()"]
longctx_mod["long_context.py
needle-in-haystack probes
multi-length position tests"]
regress_mod["regression.py
compare_checkpoints()
metric delta reporting"]
run_bench["run_benchmarks.py
CLI entry point"]
end
subgraph serve_pkg["serve/"]
kv_mod["kv_cache.py
KVCache dataclass
empty() · append()"]
quant_mod["quantize.py
export_int8_state_dict()
gguf_conversion_command()"]
ctrl["control_plane.py
CommandPreset · PresetField
CommandJob · CommandManager
build_control_router()
_build_presets()
_build_command_for_preset()
HMAC session auth"]
gpu_server["server.py
FastAPI GPU app
get_model() lazy load
get_tokenizer()
_generate_token_ids()
chat_status()"]
cpu_server["server_cpu.py
FastAPI CPU app
control-plane only"]
ide["static/index.html
SAGE IDE
Chat · Presets · Jobs
CLI Terminal · Inspector
Settings · Command Palette"]
end
subgraph scripts_pkg["scripts/"]
s_data["run_data_pipeline.sh"]
s_train["run_training.sh"]
s_eval["run_eval.sh"]
s_serve["run_serve.sh"]
s_serve_cpu["run_serve_cpu.sh"]
s_tok["run_validate_tokenizer.sh"]
end
subgraph outputs["Runtime Outputs"]
raw_data[("data/raw/*.jsonl
5 source corpus files")]
processed[("data/processed/
shard-NNNNN.parquet
manifest.json")]
tok_out[("tokenizer/
tokenizer.model
tokenizer.vocab")]
run_metrics[("runs/name/
metrics.jsonl")]
run_ckpt[("runs/name/
ckpt_step_XXXXXXX.pt
keep last 5")]
end
end
%% ===================== ACTOR FLOWS =====================
actor --> bootstrap
actor --> tok_train
actor --> pipeline_cli
actor --> trainer_mod
actor --> run_bench
actor --> gpu_server
actor --> ide
%% ===================== TOKENIZER =====================
bootstrap --> raw_data
raw_data --> tok_train
cfg_model --> tok_train
tok_train --> tok_out
tok_out --> tok_validate
tok_out --> shard
%% ===================== DATA PIPELINE =====================
raw_data --> ingest
ingest --> filter_mod
filter_mod --> dedup
dedup --> shard
shard --> processed
processed --> dataset_mod
pipeline_cli --> ingest
%% ===================== MODEL CONSTRUCTION =====================
cfg_model --> model_cfg
model_cfg --> rmsnorm
model_cfg --> rope_mod
model_cfg --> attn_mod
model_cfg --> mlp_mod
rope_mod --> attn_mod
rmsnorm --> block_mod
attn_mod --> block_mod
mlp_mod --> block_mod
block_mod --> sage_model
%% ===================== TRAINING =====================
cfg_train --> opt_mod
cfg_train --> trainer_mod
cfg_train --> ckpt_mod
dataset_mod --> trainer_mod
sage_model --> trainer_mod
hw --> trainer_mod
dist --> hw
opt_mod --> trainer_mod
loss_mod --> trainer_mod
ckpt_mod --> trainer_mod
trainer_mod --> run_metrics
trainer_mod --> run_ckpt
trainer_mod --> ppl_mod
s_train --> trainer_mod
%% ===================== EVALUATION =====================
sage_model --> ppl_mod
sage_model --> bench_mod
sage_model --> longctx_mod
ppl_mod --> regress_mod
bench_mod --> regress_mod
longctx_mod --> regress_mod
run_bench --> bench_mod
s_eval --> run_bench
%% ===================== SERVING =====================
run_ckpt --> gpu_server
tok_out --> gpu_server
sage_model --> kv_mod
sage_model --> quant_mod
kv_mod --> gpu_server
quant_mod --> cpu_server
ctrl --> gpu_server
ctrl --> cpu_server
gpu_server --> ide
cpu_server --> ide
s_serve --> gpu_server
s_serve_cpu --> cpu_server
s_data --> pipeline_cli
s_tok --> tok_validate
%% ===================== STYLE =====================
classDef actor fill:#1d4ed8,stroke:#bfdbfe,color:#ffffff
classDef config fill:#1f2937,stroke:#93c5fd,color:#ffffff
classDef pipeline fill:#0f766e,stroke:#5eead4,color:#ffffff
classDef model fill:#4c1d95,stroke:#c4b5fd,color:#ffffff
classDef train fill:#92400e,stroke:#fcd34d,color:#ffffff
classDef eval fill:#7f1d1d,stroke:#fca5a5,color:#ffffff
classDef serve fill:#065f46,stroke:#86efac,color:#ffffff
classDef io fill:#111827,stroke:#9ca3af,color:#d1d5db
classDef script fill:#374151,stroke:#6b7280,color:#d1d5db
class actor actor
class cfg_model,cfg_train config
class bootstrap,ingest,filter_mod,dedup,shard,dataset_mod,pipeline_cli,tok_train,tok_validate pipeline
class model_cfg,rmsnorm,rope_mod,attn_mod,mlp_mod,block_mod,sage_model model
class hw,dist,opt_mod,loss_mod,ckpt_mod,trainer_mod train
class ppl_mod,bench_mod,longctx_mod,regress_mod,run_bench eval
class kv_mod,quant_mod,ctrl,gpu_server,cpu_server,ide serve
class raw_data,processed,tok_out,tok_model,run_metrics,run_ckpt io
class s_data,s_train,s_eval,s_serve,s_serve_cpu,s_tok script
|