| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.2, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "grad_norm": 21.125, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 2.357309112548828, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 10.625, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 2.2842657470703127, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 7.09375, |
| "learning_rate": 3.7e-06, |
| "loss": 1.831963348388672, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 3.734375, |
| "learning_rate": 4.95e-06, |
| "loss": 1.6006643676757812, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 3.453125, |
| "learning_rate": 4.998766400914329e-06, |
| "loss": 1.4665501403808594, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 3.859375, |
| "learning_rate": 4.994859202020212e-06, |
| "loss": 1.4475660705566407, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 3.328125, |
| "learning_rate": 4.988280445403164e-06, |
| "loss": 1.643369140625, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.21875, |
| "learning_rate": 4.979037175760548e-06, |
| "loss": 1.5347616577148437, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 3.484375, |
| "learning_rate": 4.967139291017018e-06, |
| "loss": 1.3884881591796876, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 3.59375, |
| "learning_rate": 4.9525995317255675e-06, |
| "loss": 1.4483978271484375, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 3.625, |
| "learning_rate": 4.935433467424624e-06, |
| "loss": 1.47322265625, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 3.109375, |
| "learning_rate": 4.915659479965806e-06, |
| "loss": 1.4528363037109375, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 3.25, |
| "learning_rate": 4.893298743830168e-06, |
| "loss": 1.4053765869140624, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 3.640625, |
| "learning_rate": 4.868375203454041e-06, |
| "loss": 1.4791285705566406, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 3.390625, |
| "learning_rate": 4.840915547588725e-06, |
| "loss": 1.3204701232910157, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 3.796875, |
| "learning_rate": 4.81094918072151e-06, |
| "loss": 1.423112335205078, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.734375, |
| "learning_rate": 4.778508191588613e-06, |
| "loss": 1.3661781311035157, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 3.140625, |
| "learning_rate": 4.743627318813757e-06, |
| "loss": 1.3950372314453126, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 3.046875, |
| "learning_rate": 4.706343913709178e-06, |
| "loss": 1.4158805847167968, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.71875, |
| "learning_rate": 4.66669790027891e-06, |
| "loss": 1.3910885620117188, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 2500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3171365683200000.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|