File size: 2,505 Bytes
9e3a160 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | {
"model_type": "atome-lm",
"architecture": "routed-ternary-3pathway",
"_comment": "Atome LM is a custom architecture, NOT a transformers AutoModel. Load with atome_llm.core.atome_lm.AtomeLM from github.com/TilelliLab/atome-lm. This config documents the bundled checkpoints; it is not consumed by transformers.",
"checkpoints": {
"atome_944k.bin": {
"format": "ATOME01 packed C-engine blob (4 trits/byte)",
"precision": "ternary {-alpha, 0, +alpha} per tensor (BitNet b1.58 style)",
"bits_per_weight": 1.58,
"params": 944640,
"disk_bytes": 276655,
"loadable_by": "Atome C99 engine (atome_load)",
"derived_from": "atome_1m_v1.pt"
},
"atome_1m_v1.pt": {
"format": "PyTorch state_dict",
"precision": "fp32 source (export to ternary via scripts/export_to_atome.py)",
"params": 944640,
"config": {
"vocab_size": 256,
"d_model": 256,
"n_layers": 8,
"d_head": 64,
"top_k": 4,
"kernel_size": 5,
"n_pathways": 3
},
"tokenizer": "byte-level (no vocab file; ids 0-255)",
"final_val_loss": 1.0545,
"final_val_ppl": 2.87
},
"vanilla_1m_v1.pt": {
"format": "PyTorch state_dict",
"precision": "fp32",
"role": "param-fair vanilla GPT baseline for the 944K reversal A/B in HONEST_RESULTS.md",
"params": 950608,
"config": {
"kind": "vanilla_transformer_fp32",
"vocab_size": 256,
"d_model": 152,
"n_layers": 3,
"n_heads": 4,
"d_ff": 608,
"max_seq": 256
},
"final_val_loss": 0.9337,
"final_val_ppl": 2.54
}
},
"engine_default_config": {
"_comment": "The C99 engine compile-time #defines; ~60K params, the MCU target regime (NOT the 944K bundled checkpoint).",
"vocab_size": 256,
"d_model": 64,
"n_layers": 4,
"d_head": 16,
"top_k": 4,
"kernel_size": 5,
"n_pathways": 3
},
"training": {
"corpus": "TinyStories (train.txt + valid.txt concatenated)",
"steps": 30000,
"seq_len": 256,
"batch_size": 64,
"accum_steps": 4,
"optimizer": "AdamW lr 3e-4->3e-5 cosine, warmup 1000, weight_decay 0.1",
"precision": "bf16 autocast",
"seed": 0,
"seeds_note": "single seed only; multi-seed variance not yet measured"
},
"license": "Apache-2.0",
"version": "0.3.0",
"source_repository": "https://github.com/TilelliLab/atome-lm",
"project_home": "https://atomelm.com"
}
|