TilelliLab
/

atome-lm

Text Generation

microcontroller

routed-architecture

Model card Files Files and versions

atome-lm / config.json

TilelliLab's picture

Atome LM v0.3.0 — checkpoints + honest model card

9e3a160 verified 1 day ago

history blame contribute delete

2.51 kB

	{
	"model_type": "atome-lm",
	"architecture": "routed-ternary-3pathway",
	"_comment": "Atome LM is a custom architecture, NOT a transformers AutoModel. Load with atome_llm.core.atome_lm.AtomeLM from github.com/TilelliLab/atome-lm. This config documents the bundled checkpoints; it is not consumed by transformers.",

	"checkpoints": {
	"atome_944k.bin": {
	"format": "ATOME01 packed C-engine blob (4 trits/byte)",
	"precision": "ternary {-alpha, 0, +alpha} per tensor (BitNet b1.58 style)",
	"bits_per_weight": 1.58,
	"params": 944640,
	"disk_bytes": 276655,
	"loadable_by": "Atome C99 engine (atome_load)",
	"derived_from": "atome_1m_v1.pt"
	},
	"atome_1m_v1.pt": {
	"format": "PyTorch state_dict",
	"precision": "fp32 source (export to ternary via scripts/export_to_atome.py)",
	"params": 944640,
	"config": {
	"vocab_size": 256,
	"d_model": 256,
	"n_layers": 8,
	"d_head": 64,
	"top_k": 4,
	"kernel_size": 5,
	"n_pathways": 3
	},
	"tokenizer": "byte-level (no vocab file; ids 0-255)",
	"final_val_loss": 1.0545,
	"final_val_ppl": 2.87
	},
	"vanilla_1m_v1.pt": {
	"format": "PyTorch state_dict",
	"precision": "fp32",
	"role": "param-fair vanilla GPT baseline for the 944K reversal A/B in HONEST_RESULTS.md",
	"params": 950608,
	"config": {
	"kind": "vanilla_transformer_fp32",
	"vocab_size": 256,
	"d_model": 152,
	"n_layers": 3,
	"n_heads": 4,
	"d_ff": 608,
	"max_seq": 256
	},
	"final_val_loss": 0.9337,
	"final_val_ppl": 2.54
	}
	},

	"engine_default_config": {
	"_comment": "The C99 engine compile-time #defines; ~60K params, the MCU target regime (NOT the 944K bundled checkpoint).",
	"vocab_size": 256,
	"d_model": 64,
	"n_layers": 4,
	"d_head": 16,
	"top_k": 4,
	"kernel_size": 5,
	"n_pathways": 3
	},

	"training": {
	"corpus": "TinyStories (train.txt + valid.txt concatenated)",
	"steps": 30000,
	"seq_len": 256,
	"batch_size": 64,
	"accum_steps": 4,
	"optimizer": "AdamW lr 3e-4->3e-5 cosine, warmup 1000, weight_decay 0.1",
	"precision": "bf16 autocast",
	"seed": 0,
	"seeds_note": "single seed only; multi-seed variance not yet measured"
	},

	"license": "Apache-2.0",
	"version": "0.3.0",
	"source_repository": "https://github.com/TilelliLab/atome-lm",
	"project_home": "https://atomelm.com"
	}