| --- |
| title: SAGE 1B — System Architecture |
| --- |
| flowchart TB |
|
|
| %% ========================================================= |
| %% SAGE 1B End-to-End System Architecture |
| %% All modules, data flows, and runtime outputs |
| %% ========================================================= |
|
|
| actor["👤 Developer / Operator"] |
|
|
| subgraph repo["SAGE Repository"] |
| direction TB |
|
|
| subgraph configs["configs/"] |
| cfg_model[ |
| 24 layers · 2048 d_model |
| 16Q / 8KV heads · 4096 ctx |
| vocab 50k · RoPE 500kHz |
| cfg_train[ |
| LR 3e-4 → 3e-5 · warmup 2000 |
| ckpt every 1000 · eval every 1000 |
| end |
|
|
| subgraph tokenizer_pkg["tokenizer/"] |
| tok_train[ |
| SentencePiece BPE |
| --vocab-size --model-prefix |
| tok_validate[ |
| roundtrip + edge-case checks |
| tok_model[( |
| tokenizer.vocab |
| end |
|
|
| subgraph data_pkg["data/"] |
| bootstrap[ |
| 5-source starter JSONL |
| general_web · code · math |
| multilingual · synthetic |
| ingest[ |
| SOURCE_REGISTRY |
| stream_source() |
| stable_record_id() |
| filter_mod[ |
| FilterConfig |
| filter_record() |
| quality · lang · PII · safety |
| dedup[ |
| deduplicate_records() |
| exact + near-duplicate removal |
| shard[ |
| ShardConfig |
| write_shards() |
| tokenize → Parquet rows |
| dataset_mod[ |
| DatasetConfig |
| PackedDataset |
| skip(n_batches) |
| pipeline_cli[ |
| build_records() |
| run_pipeline() |
| CLI entry point |
| end |
|
|
| subgraph model_pkg["model/"] |
| model_cfg[ |
| ModelConfig dataclass |
| from_yaml() · to_dict() |
| rmsnorm[ |
| RMSNorm |
| float32 accumulation |
| rope_mod[ |
| build_rope_cache() |
| rotate_half() |
| apply_rope() |
| attn_mod[ |
| GQAAttention |
| fused QKV proj |
| repeat_kv() |
| Flash-SDPA + KV-cache |
| mlp_mod[ |
| SwiGLUMLP |
| gate × up → silu → down |
| block_mod[ |
| TransformerBlock |
| pre-norm residual |
| sage_model[ |
| SageTransformer |
| _reset_parameters() |
| forward() → logits + KV |
| end |
|
|
| subgraph train_pkg["train/"] |
| hw[ |
| HardwareConfig |
| auto device · dtype · VRAM |
| micro_batch · grad_accum |
| dist[ |
| get_training_strategy() |
| cpu / mps / single |
| ddp / fsdp + ZeRO |
| opt_mod[ |
| ScheduleConfig |
| create_optimizer() AdamW |
| lr_lambda() cosine |
| create_scheduler() |
| loss_mod[ |
| masked_cross_entropy() |
| next-token prediction |
| ckpt_mod[ |
| save_checkpoint() |
| load_latest_checkpoint() |
| _prune_old_checkpoints() |
| trainer_mod[ |
| TrainerConfig |
| train() main loop |
| AMP · grad clip · W&B |
| collate_batch() |
| create_dataloader() |
| end |
|
|
| subgraph eval_pkg["eval/"] |
| ppl_mod[ |
| evaluate_perplexity() |
| val loss + ppl score |
| bench_mod[ |
| benchmark harness registry |
| run_all_benchmarks() |
| longctx_mod[ |
| needle-in-haystack probes |
| multi-length position tests |
| regress_mod[ |
| compare_checkpoints() |
| metric delta reporting |
| run_bench[ |
| CLI entry point |
| end |
|
|
| subgraph serve_pkg["serve/"] |
| kv_mod[ |
| KVCache dataclass |
| empty() · append() |
| quant_mod[ |
| export_int8_state_dict() |
| gguf_conversion_command() |
| ctrl[ |
| CommandPreset · PresetField |
| CommandJob · CommandManager |
| build_control_router() |
| _build_presets() |
| _build_command_for_preset() |
| HMAC session auth |
| gpu_server[ |
| FastAPI GPU app |
| get_model() lazy load |
| get_tokenizer() |
| _generate_token_ids() |
| chat_status() |
| cpu_server[ |
| FastAPI CPU app |
| control-plane only |
| ide[ |
| SAGE IDE |
| Chat · Presets · Jobs |
| CLI Terminal · Inspector |
| Settings · Command Palette |
| end |
|
|
| subgraph scripts_pkg["scripts/"] |
| s_data["run_data_pipeline.sh"] |
| s_train["run_training.sh"] |
| s_eval["run_eval.sh"] |
| s_serve["run_serve.sh"] |
| s_serve_cpu["run_serve_cpu.sh"] |
| s_tok["run_validate_tokenizer.sh"] |
| end |
|
|
| subgraph outputs["Runtime Outputs"] |
| raw_data[( |
| 5 source corpus files |
| processed[( |
| shard-NNNNN.parquet |
| manifest.json |
| tok_out[( |
| tokenizer.model |
| tokenizer.vocab |
| run_metrics[( |
| metrics.jsonl |
| run_ckpt[( |
| ckpt_step_XXXXXXX.pt |
| keep last 5 |
| end |
| end |
|
|
| %% ===================== ACTOR FLOWS ===================== |
| actor --> bootstrap |
| actor --> tok_train |
| actor --> pipeline_cli |
| actor --> trainer_mod |
| actor --> run_bench |
| actor --> gpu_server |
| actor --> ide |
|
|
| %% ===================== TOKENIZER ===================== |
| bootstrap --> raw_data |
| raw_data --> tok_train |
| cfg_model --> tok_train |
| tok_train --> tok_out |
| tok_out --> tok_validate |
| tok_out --> shard |
|
|
| %% ===================== DATA PIPELINE ===================== |
| raw_data --> ingest |
| ingest --> filter_mod |
| filter_mod --> dedup |
| dedup --> shard |
| shard --> processed |
| processed --> dataset_mod |
| pipeline_cli --> ingest |
|
|
| %% ===================== MODEL CONSTRUCTION ===================== |
| cfg_model --> model_cfg |
| model_cfg --> rmsnorm |
| model_cfg --> rope_mod |
| model_cfg --> attn_mod |
| model_cfg --> mlp_mod |
| rope_mod --> attn_mod |
| rmsnorm --> block_mod |
| attn_mod --> block_mod |
| mlp_mod --> block_mod |
| block_mod --> sage_model |
|
|
| %% ===================== TRAINING ===================== |
| cfg_train --> opt_mod |
| cfg_train --> trainer_mod |
| cfg_train --> ckpt_mod |
| dataset_mod --> trainer_mod |
| sage_model --> trainer_mod |
| hw --> trainer_mod |
| dist --> hw |
| opt_mod --> trainer_mod |
| loss_mod --> trainer_mod |
| ckpt_mod --> trainer_mod |
| trainer_mod --> run_metrics |
| trainer_mod --> run_ckpt |
| trainer_mod --> ppl_mod |
| s_train --> trainer_mod |
|
|
| %% ===================== EVALUATION ===================== |
| sage_model --> ppl_mod |
| sage_model --> bench_mod |
| sage_model --> longctx_mod |
| ppl_mod --> regress_mod |
| bench_mod --> regress_mod |
| longctx_mod --> regress_mod |
| run_bench --> bench_mod |
| s_eval --> run_bench |
|
|
| %% ===================== SERVING ===================== |
| run_ckpt --> gpu_server |
| tok_out --> gpu_server |
| sage_model --> kv_mod |
| sage_model --> quant_mod |
| kv_mod --> gpu_server |
| quant_mod --> cpu_server |
| ctrl --> gpu_server |
| ctrl --> cpu_server |
| gpu_server --> ide |
| cpu_server --> ide |
| s_serve --> gpu_server |
| s_serve_cpu --> cpu_server |
| s_data --> pipeline_cli |
| s_tok --> tok_validate |
|
|
| %% ===================== STYLE ===================== |
| classDef actor fill:#1d4ed8,stroke:#bfdbfe,color:#ffffff |
| classDef config fill:#1f2937,stroke:#93c5fd,color:#ffffff |
| classDef pipeline fill:#0f766e,stroke:#5eead4,color:#ffffff |
| classDef model fill:#4c1d95,stroke:#c4b5fd,color:#ffffff |
| classDef train fill:#92400e,stroke:#fcd34d,color:#ffffff |
| classDef eval fill:#7f1d1d,stroke:#fca5a5,color:#ffffff |
| classDef serve fill:#065f46,stroke:#86efac,color:#ffffff |
| classDef io fill:#111827,stroke:#9ca3af,color:#d1d5db |
| classDef script fill:#374151,stroke:#6b7280,color:#d1d5db |
|
|
| class actor actor |
| class cfg_model,cfg_train config |
| class bootstrap,ingest,filter_mod,dedup,shard,dataset_mod,pipeline_cli,tok_train,tok_validate pipeline |
| class model_cfg,rmsnorm,rope_mod,attn_mod,mlp_mod,block_mod,sage_model model |
| class hw,dist,opt_mod,loss_mod,ckpt_mod,trainer_mod train |
| class ppl_mod,bench_mod,longctx_mod,regress_mod,run_bench eval |
| class kv_mod,quant_mod,ctrl,gpu_server,cpu_server,ide serve |
| class raw_data,processed,tok_out,tok_model,run_metrics,run_ckpt io |
| class s_data,s_train,s_eval,s_serve,s_serve_cpu,s_tok script |
|
|