astroPT_sparse / astropt /p32k10 /config.json
Smith42's picture
movement
307c015
{
"model_name": "AstroPT-0097.4M",
"time": 1740584479,
"log_via_wandb": true,
"log_emissions": false,
"out_dir": "logs/astropt070M_sparse_32patch",
"eval_interval": 100,
"log_interval": 10,
"checkpoint_interval": 500,
"eval_iters": 100,
"eval_only": false,
"always_save_checkpoint": false,
"init_from": "scratch",
"hf_url": "smith42/galaxies",
"stream_hf_dataset": false,
"gradient_accumulation_steps": 40,
"batch_size": 64,
"spiral": true,
"block_size": 64,
"image_size": 256,
"num_workers": 64,
"n_layer": 12,
"n_head": 12,
"n_embd": 768,
"n_chan": 3,
"dropout": 0.0,
"patch_size": 32,
"bias": false,
"attn_type": "causal",
"k_ratio": 0.1,
"learning_rate": 0.0006,
"max_iters": 3500,
"weight_decay": 0.1,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"decay_lr": true,
"warmup_iters": 2000,
"lr_decay_iters": 3300.0000000000005,
"min_lr": "5.9999999999999995e-05",
"backend": "nccl",
"device": "cuda",
"dtype": "bfloat16",
"compile": true,
"model_type": "astroPT",
"architectures": [
"astroPT"
]
}