| { |
| "model_type": "atome-lm", |
| "architecture": "routed-ternary-3pathway", |
| "_comment": "Atome LM is a custom architecture, NOT a transformers AutoModel. Load with atome_llm.core.atome_lm.AtomeLM from github.com/TilelliLab/atome-lm. This config documents the bundled checkpoints; it is not consumed by transformers.", |
|
|
| "checkpoints": { |
| "atome_944k.bin": { |
| "format": "ATOME01 packed C-engine blob (4 trits/byte)", |
| "precision": "ternary {-alpha, 0, +alpha} per tensor (BitNet b1.58 style)", |
| "bits_per_weight": 1.58, |
| "params": 944640, |
| "disk_bytes": 276655, |
| "loadable_by": "Atome C99 engine (atome_load)", |
| "derived_from": "atome_1m_v1.pt" |
| }, |
| "atome_1m_v1.pt": { |
| "format": "PyTorch state_dict", |
| "precision": "fp32 source (export to ternary via scripts/export_to_atome.py)", |
| "params": 944640, |
| "config": { |
| "vocab_size": 256, |
| "d_model": 256, |
| "n_layers": 8, |
| "d_head": 64, |
| "top_k": 4, |
| "kernel_size": 5, |
| "n_pathways": 3 |
| }, |
| "tokenizer": "byte-level (no vocab file; ids 0-255)", |
| "final_val_loss": 1.0545, |
| "final_val_ppl": 2.87 |
| }, |
| "vanilla_1m_v1.pt": { |
| "format": "PyTorch state_dict", |
| "precision": "fp32", |
| "role": "param-fair vanilla GPT baseline for the 944K reversal A/B in HONEST_RESULTS.md", |
| "params": 950608, |
| "config": { |
| "kind": "vanilla_transformer_fp32", |
| "vocab_size": 256, |
| "d_model": 152, |
| "n_layers": 3, |
| "n_heads": 4, |
| "d_ff": 608, |
| "max_seq": 256 |
| }, |
| "final_val_loss": 0.9337, |
| "final_val_ppl": 2.54 |
| } |
| }, |
|
|
| "engine_default_config": { |
| "_comment": "The C99 engine compile-time #defines; ~60K params, the MCU target regime (NOT the 944K bundled checkpoint).", |
| "vocab_size": 256, |
| "d_model": 64, |
| "n_layers": 4, |
| "d_head": 16, |
| "top_k": 4, |
| "kernel_size": 5, |
| "n_pathways": 3 |
| }, |
|
|
| "training": { |
| "corpus": "TinyStories (train.txt + valid.txt concatenated)", |
| "steps": 30000, |
| "seq_len": 256, |
| "batch_size": 64, |
| "accum_steps": 4, |
| "optimizer": "AdamW lr 3e-4->3e-5 cosine, warmup 1000, weight_decay 0.1", |
| "precision": "bf16 autocast", |
| "seed": 0, |
| "seeds_note": "single seed only; multi-seed variance not yet measured" |
| }, |
|
|
| "license": "Apache-2.0", |
| "version": "0.3.0", |
| "source_repository": "https://github.com/TilelliLab/atome-lm", |
| "project_home": "https://atomelm.com" |
| } |
|
|