--- title: SAGE 1B — System Architecture --- flowchart TB %% ========================================================= %% SAGE 1B End-to-End System Architecture %% All modules, data flows, and runtime outputs %% ========================================================= actor["👤 Developer / Operator"] subgraph repo["SAGE Repository"] direction TB subgraph configs["configs/"] cfg_model["model/1b.yaml 24 layers · 2048 d_model 16Q / 8KV heads · 4096 ctx vocab 50k · RoPE 500kHz"] cfg_train["train/schedule.yaml LR 3e-4 → 3e-5 · warmup 2000 ckpt every 1000 · eval every 1000"] end subgraph tokenizer_pkg["tokenizer/"] tok_train["train_tokenizer.py SentencePiece BPE --vocab-size --model-prefix"] tok_validate["validate_tokenizer.py roundtrip + edge-case checks"] tok_model[("tokenizer.model tokenizer.vocab")] end subgraph data_pkg["data/"] bootstrap["bootstrap.py 5-source starter JSONL general_web · code · math multilingual · synthetic"] ingest["ingest.py SOURCE_REGISTRY stream_source() stable_record_id()"] filter_mod["filter.py FilterConfig filter_record() quality · lang · PII · safety"] dedup["dedup.py deduplicate_records() exact + near-duplicate removal"] shard["shard.py ShardConfig write_shards() tokenize → Parquet rows"] dataset_mod["dataset.py DatasetConfig PackedDataset skip(n_batches)"] pipeline_cli["pipeline.py build_records() run_pipeline() CLI entry point"] end subgraph model_pkg["model/"] model_cfg["config.py ModelConfig dataclass from_yaml() · to_dict()"] rmsnorm["rmsnorm.py RMSNorm float32 accumulation"] rope_mod["rope.py build_rope_cache() rotate_half() apply_rope()"] attn_mod["attention.py GQAAttention fused QKV proj repeat_kv() Flash-SDPA + KV-cache"] mlp_mod["mlp.py SwiGLUMLP gate × up → silu → down"] block_mod["block.py TransformerBlock pre-norm residual"] sage_model["model.py SageTransformer _reset_parameters() forward() → logits + KV"] end subgraph train_pkg["train/"] hw["hardware.py HardwareConfig auto device · dtype · VRAM micro_batch · grad_accum"] dist["distributed.py get_training_strategy() cpu / mps / single ddp / fsdp + ZeRO"] opt_mod["optimizer.py ScheduleConfig create_optimizer() AdamW lr_lambda() cosine create_scheduler()"] loss_mod["loss.py masked_cross_entropy() next-token prediction"] ckpt_mod["checkpoint.py save_checkpoint() load_latest_checkpoint() _prune_old_checkpoints()"] trainer_mod["trainer.py TrainerConfig train() main loop AMP · grad clip · W&B collate_batch() create_dataloader()"] end subgraph eval_pkg["eval/"] ppl_mod["perplexity.py evaluate_perplexity() val loss + ppl score"] bench_mod["benchmarks.py benchmark harness registry run_all_benchmarks()"] longctx_mod["long_context.py needle-in-haystack probes multi-length position tests"] regress_mod["regression.py compare_checkpoints() metric delta reporting"] run_bench["run_benchmarks.py CLI entry point"] end subgraph serve_pkg["serve/"] kv_mod["kv_cache.py KVCache dataclass empty() · append()"] quant_mod["quantize.py export_int8_state_dict() gguf_conversion_command()"] ctrl["control_plane.py CommandPreset · PresetField CommandJob · CommandManager build_control_router() _build_presets() _build_command_for_preset() HMAC session auth"] gpu_server["server.py FastAPI GPU app get_model() lazy load get_tokenizer() _generate_token_ids() chat_status()"] cpu_server["server_cpu.py FastAPI CPU app control-plane only"] ide["static/index.html SAGE IDE Chat · Presets · Jobs CLI Terminal · Inspector Settings · Command Palette"] end subgraph scripts_pkg["scripts/"] s_data["run_data_pipeline.sh"] s_train["run_training.sh"] s_eval["run_eval.sh"] s_serve["run_serve.sh"] s_serve_cpu["run_serve_cpu.sh"] s_tok["run_validate_tokenizer.sh"] end subgraph outputs["Runtime Outputs"] raw_data[("data/raw/*.jsonl 5 source corpus files")] processed[("data/processed/ shard-NNNNN.parquet manifest.json")] tok_out[("tokenizer/ tokenizer.model tokenizer.vocab")] run_metrics[("runs/name/ metrics.jsonl")] run_ckpt[("runs/name/ ckpt_step_XXXXXXX.pt keep last 5")] end end %% ===================== ACTOR FLOWS ===================== actor --> bootstrap actor --> tok_train actor --> pipeline_cli actor --> trainer_mod actor --> run_bench actor --> gpu_server actor --> ide %% ===================== TOKENIZER ===================== bootstrap --> raw_data raw_data --> tok_train cfg_model --> tok_train tok_train --> tok_out tok_out --> tok_validate tok_out --> shard %% ===================== DATA PIPELINE ===================== raw_data --> ingest ingest --> filter_mod filter_mod --> dedup dedup --> shard shard --> processed processed --> dataset_mod pipeline_cli --> ingest %% ===================== MODEL CONSTRUCTION ===================== cfg_model --> model_cfg model_cfg --> rmsnorm model_cfg --> rope_mod model_cfg --> attn_mod model_cfg --> mlp_mod rope_mod --> attn_mod rmsnorm --> block_mod attn_mod --> block_mod mlp_mod --> block_mod block_mod --> sage_model %% ===================== TRAINING ===================== cfg_train --> opt_mod cfg_train --> trainer_mod cfg_train --> ckpt_mod dataset_mod --> trainer_mod sage_model --> trainer_mod hw --> trainer_mod dist --> hw opt_mod --> trainer_mod loss_mod --> trainer_mod ckpt_mod --> trainer_mod trainer_mod --> run_metrics trainer_mod --> run_ckpt trainer_mod --> ppl_mod s_train --> trainer_mod %% ===================== EVALUATION ===================== sage_model --> ppl_mod sage_model --> bench_mod sage_model --> longctx_mod ppl_mod --> regress_mod bench_mod --> regress_mod longctx_mod --> regress_mod run_bench --> bench_mod s_eval --> run_bench %% ===================== SERVING ===================== run_ckpt --> gpu_server tok_out --> gpu_server sage_model --> kv_mod sage_model --> quant_mod kv_mod --> gpu_server quant_mod --> cpu_server ctrl --> gpu_server ctrl --> cpu_server gpu_server --> ide cpu_server --> ide s_serve --> gpu_server s_serve_cpu --> cpu_server s_data --> pipeline_cli s_tok --> tok_validate %% ===================== STYLE ===================== classDef actor fill:#1d4ed8,stroke:#bfdbfe,color:#ffffff classDef config fill:#1f2937,stroke:#93c5fd,color:#ffffff classDef pipeline fill:#0f766e,stroke:#5eead4,color:#ffffff classDef model fill:#4c1d95,stroke:#c4b5fd,color:#ffffff classDef train fill:#92400e,stroke:#fcd34d,color:#ffffff classDef eval fill:#7f1d1d,stroke:#fca5a5,color:#ffffff classDef serve fill:#065f46,stroke:#86efac,color:#ffffff classDef io fill:#111827,stroke:#9ca3af,color:#d1d5db classDef script fill:#374151,stroke:#6b7280,color:#d1d5db class actor actor class cfg_model,cfg_train config class bootstrap,ingest,filter_mod,dedup,shard,dataset_mod,pipeline_cli,tok_train,tok_validate pipeline class model_cfg,rmsnorm,rope_mod,attn_mod,mlp_mod,block_mod,sage_model model class hw,dist,opt_mod,loss_mod,ckpt_mod,trainer_mod train class ppl_mod,bench_mod,longctx_mod,regress_mod,run_bench eval class kv_mod,quant_mod,ctrl,gpu_server,cpu_server,ide serve class raw_data,processed,tok_out,tok_model,run_metrics,run_ckpt io class s_data,s_train,s_eval,s_serve,s_serve_cpu,s_tok script