| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.00019859395480001588, | |
| "global_step": 10, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.5873015873015873e-06, | |
| "loss": 10.5611, | |
| "theoretical_loss": 20.84128112621979, | |
| "tokens_seen": 65536 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.1746031746031746e-06, | |
| "loss": 10.5541, | |
| "theoretical_loss": 17.594664429001284, | |
| "tokens_seen": 131072 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 10.4474, | |
| "theoretical_loss": 15.967940417509208, | |
| "tokens_seen": 196608 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 6.349206349206349e-06, | |
| "loss": 10.2462, | |
| "theoretical_loss": 14.92078008066121, | |
| "tokens_seen": 262144 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 7.936507936507936e-06, | |
| "loss": 10.0164, | |
| "theoretical_loss": 14.164680262435617, | |
| "tokens_seen": 327680 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 9.716, | |
| "theoretical_loss": 13.581024797222863, | |
| "tokens_seen": 393216 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 9.4783, | |
| "theoretical_loss": 13.11027232607383, | |
| "tokens_seen": 458752 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.2698412698412699e-05, | |
| "loss": 9.3542, | |
| "theoretical_loss": 12.718592950155966, | |
| "tokens_seen": 524288 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 9.1909, | |
| "theoretical_loss": 12.385055788546264, | |
| "tokens_seen": 589824 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.5873015873015872e-05, | |
| "loss": 9.0334, | |
| "theoretical_loss": 12.09587593170772, | |
| "tokens_seen": 655360 | |
| } | |
| ], | |
| "max_steps": 50354, | |
| "num_train_epochs": 9223372036854775807, | |
| "total_flos": 334453800960000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |