| { | |
| "step": 14400, | |
| "val_bpb": 1.2370320890571345, | |
| "model_config": { | |
| "sequence_len": 2048, | |
| "vocab_size": 32768, | |
| "n_layer": 34, | |
| "n_head": 17, | |
| "n_kv_head": 17, | |
| "n_embd": 2176, | |
| "window_pattern": "L" | |
| }, | |
| "user_config": { | |
| "run": "pre1905_physics_clm_d34_expanded", | |
| "device_type": "", | |
| "dtype": "bfloat16", | |
| "model_tag": null, | |
| "model_step": null, | |
| "data_dir": "/opt/dlami/nvme/gpt1905_training/physics_clm_data", | |
| "num_epochs": 10, | |
| "num_iterations": -1, | |
| "max_seq_len": 2048, | |
| "device_batch_size": 4, | |
| "total_batch_size": 65536, | |
| "embedding_lr": 0.05, | |
| "unembedding_lr": 0.001, | |
| "matrix_lr": 0.005, | |
| "weight_decay": 0.0, | |
| "eval_every": 50, | |
| "eval_tokens": 655360, | |
| "save_every": 4800, | |
| "output_tag": "pre1905_physics_clm_d34_expanded", | |
| "dry_run": false, | |
| "physics_eval": true, | |
| "judge_model": "claude-sonnet-4-20250514", | |
| "max_concurrent_api": 8 | |
| } | |
| } |