File size: 9,462 Bytes
ef18673
15af856
ef18673
 
 
 
15af856
 
ef18673
 
15af856
ef18673
 
 
 
 
15af856
 
 
 
 
 
 
ef18673
 
15af856
 
 
 
 
 
 
 
ef18673
 
15af856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef18673
 
15af856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef18673
 
15af856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef18673
 
15af856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef18673
 
15af856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef18673
 
15af856
ef18673
 
 
15af856
 
 
ef18673
 
 
15af856
 
 
 
 
 
 
 
 
 
 
 
 
ef18673
 
 
15af856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef18673
15af856
 
 
ef18673
15af856
ef18673
 
15af856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef18673
15af856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
---
title: SAGE 1B — System Architecture
---
flowchart TB

    %% =========================================================
    %% SAGE 1B End-to-End System Architecture
    %% All modules, data flows, and runtime outputs
    %% =========================================================

    actor["👤 Developer / Operator"]

    subgraph repo["SAGE Repository"]
        direction TB

        subgraph configs["configs/"]
            cfg_model["model/1b.yaml
            24 layers · 2048 d_model
            16Q / 8KV heads · 4096 ctx
            vocab 50k · RoPE 500kHz"]
            cfg_train["train/schedule.yaml
            LR 3e-43e-5 · warmup 2000
            ckpt every 1000 · eval every 1000"]
        end

        subgraph tokenizer_pkg["tokenizer/"]
            tok_train["train_tokenizer.py
            SentencePiece BPE
            --vocab-size --model-prefix"]
            tok_validate["validate_tokenizer.py
            roundtrip + edge-case checks"]
            tok_model[("tokenizer.model
            tokenizer.vocab")]
        end

        subgraph data_pkg["data/"]
            bootstrap["bootstrap.py
            5-source starter JSONL
            general_web · code · math
            multilingual · synthetic"]
            ingest["ingest.py
            SOURCE_REGISTRY
            stream_source()
            stable_record_id()"]
            filter_mod["filter.py
            FilterConfig
            filter_record()
            quality · lang · PII · safety"]
            dedup["dedup.py
            deduplicate_records()
            exact + near-duplicate removal"]
            shard["shard.py
            ShardConfig
            write_shards()
            tokenize → Parquet rows"]
            dataset_mod["dataset.py
            DatasetConfig
            PackedDataset
            skip(n_batches)"]
            pipeline_cli["pipeline.py
            build_records()
            run_pipeline()
            CLI entry point"]
        end

        subgraph model_pkg["model/"]
            model_cfg["config.py
            ModelConfig dataclass
            from_yaml() · to_dict()"]
            rmsnorm["rmsnorm.py
            RMSNorm
            float32 accumulation"]
            rope_mod["rope.py
            build_rope_cache()
            rotate_half()
            apply_rope()"]
            attn_mod["attention.py
            GQAAttention
            fused QKV proj
            repeat_kv()
            Flash-SDPA + KV-cache"]
            mlp_mod["mlp.py
            SwiGLUMLP
            gate × up → silu → down"]
            block_mod["block.py
            TransformerBlock
            pre-norm residual"]
            sage_model["model.py
            SageTransformer
            _reset_parameters()
            forward() → logits + KV"]
        end

        subgraph train_pkg["train/"]
            hw["hardware.py
            HardwareConfig
            auto device · dtype · VRAM
            micro_batch · grad_accum"]
            dist["distributed.py
            get_training_strategy()
            cpu / mps / single
            ddp / fsdp + ZeRO"]
            opt_mod["optimizer.py
            ScheduleConfig
            create_optimizer() AdamW
            lr_lambda() cosine
            create_scheduler()"]
            loss_mod["loss.py
            masked_cross_entropy()
            next-token prediction"]
            ckpt_mod["checkpoint.py
            save_checkpoint()
            load_latest_checkpoint()
            _prune_old_checkpoints()"]
            trainer_mod["trainer.py
            TrainerConfig
            train() main loop
            AMP · grad clip · W&B
            collate_batch()
            create_dataloader()"]
        end

        subgraph eval_pkg["eval/"]
            ppl_mod["perplexity.py
            evaluate_perplexity()
            val loss + ppl score"]
            bench_mod["benchmarks.py
            benchmark harness registry
            run_all_benchmarks()"]
            longctx_mod["long_context.py
            needle-in-haystack probes
            multi-length position tests"]
            regress_mod["regression.py
            compare_checkpoints()
            metric delta reporting"]
            run_bench["run_benchmarks.py
            CLI entry point"]
        end

        subgraph serve_pkg["serve/"]
            kv_mod["kv_cache.py
            KVCache dataclass
            empty() · append()"]
            quant_mod["quantize.py
            export_int8_state_dict()
            gguf_conversion_command()"]
            ctrl["control_plane.py
            CommandPreset · PresetField
            CommandJob · CommandManager
            build_control_router()
            _build_presets()
            _build_command_for_preset()
            HMAC session auth"]
            gpu_server["server.py
            FastAPI GPU app
            get_model() lazy load
            get_tokenizer()
            _generate_token_ids()
            chat_status()"]
            cpu_server["server_cpu.py
            FastAPI CPU app
            control-plane only"]
            ide["static/index.html
            SAGE IDE
            Chat · Presets · Jobs
            CLI Terminal · Inspector
            Settings · Command Palette"]
        end

        subgraph scripts_pkg["scripts/"]
            s_data["run_data_pipeline.sh"]
            s_train["run_training.sh"]
            s_eval["run_eval.sh"]
            s_serve["run_serve.sh"]
            s_serve_cpu["run_serve_cpu.sh"]
            s_tok["run_validate_tokenizer.sh"]
        end

        subgraph outputs["Runtime Outputs"]
            raw_data[("data/raw/*.jsonl
            5 source corpus files")]
            processed[("data/processed/
            shard-NNNNN.parquet
            manifest.json")]
            tok_out[("tokenizer/
            tokenizer.model
            tokenizer.vocab")]
            run_metrics[("runs/name/
            metrics.jsonl")]
            run_ckpt[("runs/name/
            ckpt_step_XXXXXXX.pt
            keep last 5")]
        end
    end

    %% ===================== ACTOR FLOWS =====================
    actor --> bootstrap
    actor --> tok_train
    actor --> pipeline_cli
    actor --> trainer_mod
    actor --> run_bench
    actor --> gpu_server
    actor --> ide

    %% ===================== TOKENIZER =====================
    bootstrap --> raw_data
    raw_data --> tok_train
    cfg_model --> tok_train
    tok_train --> tok_out
    tok_out --> tok_validate
    tok_out --> shard

    %% ===================== DATA PIPELINE =====================
    raw_data --> ingest
    ingest --> filter_mod
    filter_mod --> dedup
    dedup --> shard
    shard --> processed
    processed --> dataset_mod
    pipeline_cli --> ingest

    %% ===================== MODEL CONSTRUCTION =====================
    cfg_model --> model_cfg
    model_cfg --> rmsnorm
    model_cfg --> rope_mod
    model_cfg --> attn_mod
    model_cfg --> mlp_mod
    rope_mod --> attn_mod
    rmsnorm --> block_mod
    attn_mod --> block_mod
    mlp_mod --> block_mod
    block_mod --> sage_model

    %% ===================== TRAINING =====================
    cfg_train --> opt_mod
    cfg_train --> trainer_mod
    cfg_train --> ckpt_mod
    dataset_mod --> trainer_mod
    sage_model --> trainer_mod
    hw --> trainer_mod
    dist --> hw
    opt_mod --> trainer_mod
    loss_mod --> trainer_mod
    ckpt_mod --> trainer_mod
    trainer_mod --> run_metrics
    trainer_mod --> run_ckpt
    trainer_mod --> ppl_mod
    s_train --> trainer_mod

    %% ===================== EVALUATION =====================
    sage_model --> ppl_mod
    sage_model --> bench_mod
    sage_model --> longctx_mod
    ppl_mod --> regress_mod
    bench_mod --> regress_mod
    longctx_mod --> regress_mod
    run_bench --> bench_mod
    s_eval --> run_bench

    %% ===================== SERVING =====================
    run_ckpt --> gpu_server
    tok_out --> gpu_server
    sage_model --> kv_mod
    sage_model --> quant_mod
    kv_mod --> gpu_server
    quant_mod --> cpu_server
    ctrl --> gpu_server
    ctrl --> cpu_server
    gpu_server --> ide
    cpu_server --> ide
    s_serve --> gpu_server
    s_serve_cpu --> cpu_server
    s_data --> pipeline_cli
    s_tok --> tok_validate

    %% ===================== STYLE =====================
    classDef actor    fill:#1d4ed8,stroke:#bfdbfe,color:#ffffff
    classDef config   fill:#1f2937,stroke:#93c5fd,color:#ffffff
    classDef pipeline fill:#0f766e,stroke:#5eead4,color:#ffffff
    classDef model    fill:#4c1d95,stroke:#c4b5fd,color:#ffffff
    classDef train    fill:#92400e,stroke:#fcd34d,color:#ffffff
    classDef eval     fill:#7f1d1d,stroke:#fca5a5,color:#ffffff
    classDef serve    fill:#065f46,stroke:#86efac,color:#ffffff
    classDef io       fill:#111827,stroke:#9ca3af,color:#d1d5db
    classDef script   fill:#374151,stroke:#6b7280,color:#d1d5db

    class actor actor
    class cfg_model,cfg_train config
    class bootstrap,ingest,filter_mod,dedup,shard,dataset_mod,pipeline_cli,tok_train,tok_validate pipeline
    class model_cfg,rmsnorm,rope_mod,attn_mod,mlp_mod,block_mod,sage_model model
    class hw,dist,opt_mod,loss_mod,ckpt_mod,trainer_mod train
    class ppl_mod,bench_mod,longctx_mod,regress_mod,run_bench eval
    class kv_mod,quant_mod,ctrl,gpu_server,cpu_server,ide serve
    class raw_data,processed,tok_out,tok_model,run_metrics,run_ckpt io
    class s_data,s_train,s_eval,s_serve,s_serve_cpu,s_tok script