| { |
| "model_name": "AstroPT-0097.4M", |
| "time": 1740584479, |
| "log_via_wandb": true, |
| "log_emissions": false, |
| "out_dir": "logs/astropt070M_sparse_32patch", |
| "eval_interval": 100, |
| "log_interval": 10, |
| "checkpoint_interval": 500, |
| "eval_iters": 100, |
| "eval_only": false, |
| "always_save_checkpoint": false, |
| "init_from": "scratch", |
| "hf_url": "smith42/galaxies", |
| "stream_hf_dataset": false, |
| "gradient_accumulation_steps": 40, |
| "batch_size": 64, |
| "spiral": true, |
| "block_size": 64, |
| "image_size": 256, |
| "num_workers": 64, |
| "n_layer": 12, |
| "n_head": 12, |
| "n_embd": 768, |
| "n_chan": 3, |
| "dropout": 0.0, |
| "patch_size": 32, |
| "bias": false, |
| "attn_type": "causal", |
| "k_ratio": 0.1, |
| "learning_rate": 0.0006, |
| "max_iters": 3500, |
| "weight_decay": 0.1, |
| "beta1": 0.9, |
| "beta2": 0.95, |
| "grad_clip": 1.0, |
| "decay_lr": true, |
| "warmup_iters": 2000, |
| "lr_decay_iters": 3300.0000000000005, |
| "min_lr": "5.9999999999999995e-05", |
| "backend": "nccl", |
| "device": "cuda", |
| "dtype": "bfloat16", |
| "compile": true, |
| "model_type": "astroPT", |
| "architectures": [ |
| "astroPT" |
| ] |
| } |