| { | |
| "best_metric": 0.01243716301208047, | |
| "best_model_checkpoint": "./checkpoints/easyrec-small", | |
| "epoch": 1.733531451213472, | |
| "eval_steps": 1000, | |
| "global_step": 14000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06191183754333829, | |
| "grad_norm": 24.75, | |
| "learning_rate": 4.896813604094436e-05, | |
| "loss": 3.2576, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12382367508667658, | |
| "grad_norm": 21.75, | |
| "learning_rate": 4.793627208188873e-05, | |
| "loss": 2.7834, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18573551263001487, | |
| "grad_norm": 21.25, | |
| "learning_rate": 4.6904408122833086e-05, | |
| "loss": 2.6955, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.24764735017335315, | |
| "grad_norm": 24.375, | |
| "learning_rate": 4.587254416377745e-05, | |
| "loss": 2.5928, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.30955918771669144, | |
| "grad_norm": 27.875, | |
| "learning_rate": 4.484068020472182e-05, | |
| "loss": 2.5848, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.37147102526002973, | |
| "grad_norm": 21.625, | |
| "learning_rate": 4.3808816245666175e-05, | |
| "loss": 2.5582, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.433382862803368, | |
| "grad_norm": 27.125, | |
| "learning_rate": 4.2776952286610534e-05, | |
| "loss": 2.5386, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.4952947003467063, | |
| "grad_norm": 29.625, | |
| "learning_rate": 4.17450883275549e-05, | |
| "loss": 2.5143, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5572065378900446, | |
| "grad_norm": 20.0, | |
| "learning_rate": 4.071322436849926e-05, | |
| "loss": 2.5071, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.6191183754333829, | |
| "grad_norm": 26.25, | |
| "learning_rate": 3.968136040944362e-05, | |
| "loss": 2.4754, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6810302129767212, | |
| "grad_norm": 23.0, | |
| "learning_rate": 3.864949645038798e-05, | |
| "loss": 2.4846, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.7429420505200595, | |
| "grad_norm": 24.5, | |
| "learning_rate": 3.761763249133234e-05, | |
| "loss": 2.4806, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.8048538880633977, | |
| "grad_norm": 24.125, | |
| "learning_rate": 3.658576853227671e-05, | |
| "loss": 2.4532, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.866765725606736, | |
| "grad_norm": 24.25, | |
| "learning_rate": 3.555390457322107e-05, | |
| "loss": 2.4561, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9286775631500743, | |
| "grad_norm": 30.75, | |
| "learning_rate": 3.452204061416543e-05, | |
| "loss": 2.4607, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9905894006934126, | |
| "grad_norm": 26.375, | |
| "learning_rate": 3.349017665510979e-05, | |
| "loss": 2.4332, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.052501238236751, | |
| "grad_norm": 29.375, | |
| "learning_rate": 3.2458312696054156e-05, | |
| "loss": 2.4328, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.1144130757800892, | |
| "grad_norm": 25.0, | |
| "learning_rate": 3.1426448736998515e-05, | |
| "loss": 2.4362, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.1763249133234275, | |
| "grad_norm": 24.25, | |
| "learning_rate": 3.0394584777942874e-05, | |
| "loss": 2.4432, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.2382367508667658, | |
| "grad_norm": 34.75, | |
| "learning_rate": 2.936272081888724e-05, | |
| "loss": 2.427, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.300148588410104, | |
| "grad_norm": 24.5, | |
| "learning_rate": 2.83308568598316e-05, | |
| "loss": 2.4219, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.3620604259534423, | |
| "grad_norm": 35.0, | |
| "learning_rate": 2.729899290077596e-05, | |
| "loss": 2.4213, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.4239722634967806, | |
| "grad_norm": 28.75, | |
| "learning_rate": 2.6267128941720326e-05, | |
| "loss": 2.424, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.485884101040119, | |
| "grad_norm": 26.0, | |
| "learning_rate": 2.5235264982664684e-05, | |
| "loss": 2.4192, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.5477959385834572, | |
| "grad_norm": 22.75, | |
| "learning_rate": 2.420340102360905e-05, | |
| "loss": 2.4032, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.6097077761267955, | |
| "grad_norm": 35.0, | |
| "learning_rate": 2.3171537064553412e-05, | |
| "loss": 2.4317, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.6716196136701336, | |
| "grad_norm": 20.875, | |
| "learning_rate": 2.213967310549777e-05, | |
| "loss": 2.432, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.733531451213472, | |
| "grad_norm": 25.625, | |
| "learning_rate": 2.1107809146442133e-05, | |
| "loss": 2.4309, | |
| "step": 14000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 24228, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |