| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.00019859395480001588, |
| "global_step": 10, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.5873015873015873e-06, |
| "loss": 10.5611, |
| "theoretical_loss": 20.84128112621979, |
| "tokens_seen": 65536 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 3.1746031746031746e-06, |
| "loss": 10.5541, |
| "theoretical_loss": 17.594664429001284, |
| "tokens_seen": 131072 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.7619047619047615e-06, |
| "loss": 10.4474, |
| "theoretical_loss": 15.967940417509208, |
| "tokens_seen": 196608 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 6.349206349206349e-06, |
| "loss": 10.2462, |
| "theoretical_loss": 14.92078008066121, |
| "tokens_seen": 262144 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 7.936507936507936e-06, |
| "loss": 10.0164, |
| "theoretical_loss": 14.164680262435617, |
| "tokens_seen": 327680 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.523809523809523e-06, |
| "loss": 9.716, |
| "theoretical_loss": 13.581024797222863, |
| "tokens_seen": 393216 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 9.4783, |
| "theoretical_loss": 13.11027232607383, |
| "tokens_seen": 458752 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.2698412698412699e-05, |
| "loss": 9.3542, |
| "theoretical_loss": 12.718592950155966, |
| "tokens_seen": 524288 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 9.1909, |
| "theoretical_loss": 12.385055788546264, |
| "tokens_seen": 589824 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.5873015873015872e-05, |
| "loss": 9.0334, |
| "theoretical_loss": 12.09587593170772, |
| "tokens_seen": 655360 |
| } |
| ], |
| "max_steps": 50354, |
| "num_train_epochs": 9223372036854775807, |
| "total_flos": 334453800960000.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|