{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.924302788844622, "eval_steps": 500, "global_step": 310, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1593625498007968, "grad_norm": 1.439923644065857, "learning_rate": 6.25e-06, "loss": 0.8005, "step": 10 }, { "epoch": 0.3187250996015936, "grad_norm": 1.1607290506362915, "learning_rate": 9.995433337085492e-06, "loss": 0.6192, "step": 20 }, { "epoch": 0.47808764940239046, "grad_norm": 0.735303521156311, "learning_rate": 9.944154131125643e-06, "loss": 0.5349, "step": 30 }, { "epoch": 0.6374501992031872, "grad_norm": 1.0546119213104248, "learning_rate": 9.836474315195148e-06, "loss": 0.5105, "step": 40 }, { "epoch": 0.796812749003984, "grad_norm": 0.655317485332489, "learning_rate": 9.673622250534155e-06, "loss": 0.4992, "step": 50 }, { "epoch": 0.9561752988047809, "grad_norm": 0.8014914393424988, "learning_rate": 9.457455677726447e-06, "loss": 0.4943, "step": 60 }, { "epoch": 1.1115537848605577, "grad_norm": 0.8364565372467041, "learning_rate": 9.190440524459203e-06, "loss": 0.4301, "step": 70 }, { "epoch": 1.2709163346613546, "grad_norm": 0.6313614249229431, "learning_rate": 8.87562277536726e-06, "loss": 0.3869, "step": 80 }, { "epoch": 1.4302788844621515, "grad_norm": 0.7729827761650085, "learning_rate": 8.516593724857598e-06, "loss": 0.3895, "step": 90 }, { "epoch": 1.5896414342629481, "grad_norm": 0.5305516123771667, "learning_rate": 8.117449009293668e-06, "loss": 0.3809, "step": 100 }, { "epoch": 1.749003984063745, "grad_norm": 0.6976670026779175, "learning_rate": 7.682741885881314e-06, "loss": 0.3707, "step": 110 }, { "epoch": 1.908366533864542, "grad_norm": 0.5881310701370239, "learning_rate": 7.217431291229068e-06, "loss": 0.3831, "step": 120 }, { "epoch": 2.0637450199203187, "grad_norm": 0.5917549729347229, "learning_rate": 6.726825272106539e-06, "loss": 0.3343, "step": 130 }, { "epoch": 2.2231075697211153, "grad_norm": 0.6392484903335571, "learning_rate": 6.216520433716544e-06, "loss": 0.2776, "step": 140 }, { "epoch": 2.3824701195219125, "grad_norm": 0.5469350814819336, "learning_rate": 5.69233809622687e-06, "loss": 0.2751, "step": 150 }, { "epoch": 2.541832669322709, "grad_norm": 0.5329071879386902, "learning_rate": 5.160257887858278e-06, "loss": 0.2758, "step": 160 }, { "epoch": 2.7011952191235062, "grad_norm": 0.608709454536438, "learning_rate": 4.626349532067879e-06, "loss": 0.2711, "step": 170 }, { "epoch": 2.860557768924303, "grad_norm": 0.5087049603462219, "learning_rate": 4.096703606968007e-06, "loss": 0.2685, "step": 180 }, { "epoch": 3.0159362549800797, "grad_norm": 0.7022324800491333, "learning_rate": 3.5773620668448384e-06, "loss": 0.2626, "step": 190 }, { "epoch": 3.1752988047808763, "grad_norm": 0.5048023462295532, "learning_rate": 3.074249318355046e-06, "loss": 0.1978, "step": 200 }, { "epoch": 3.3346613545816735, "grad_norm": 0.4734826385974884, "learning_rate": 2.5931046376510875e-06, "loss": 0.1886, "step": 210 }, { "epoch": 3.49402390438247, "grad_norm": 0.6656137108802795, "learning_rate": 2.139416699389153e-06, "loss": 0.1918, "step": 220 }, { "epoch": 3.653386454183267, "grad_norm": 0.4610200524330139, "learning_rate": 1.7183609644824096e-06, "loss": 0.1908, "step": 230 }, { "epoch": 3.812749003984064, "grad_norm": 0.5110896229743958, "learning_rate": 1.3347406408508695e-06, "loss": 0.1758, "step": 240 }, { "epoch": 3.9721115537848606, "grad_norm": 0.4129928946495056, "learning_rate": 9.929318906602176e-07, "loss": 0.1944, "step": 250 }, { "epoch": 4.127490039840637, "grad_norm": 0.39533188939094543, "learning_rate": 6.968339090999188e-07, "loss": 0.1561, "step": 260 }, { "epoch": 4.286852589641434, "grad_norm": 0.4790317118167877, "learning_rate": 4.4982444417866753e-07, "loss": 0.1381, "step": 270 }, { "epoch": 4.446215139442231, "grad_norm": 0.39792048931121826, "learning_rate": 2.547212649466568e-07, "loss": 0.1532, "step": 280 }, { "epoch": 4.605577689243028, "grad_norm": 0.4457632899284363, "learning_rate": 1.1375001769728e-07, "loss": 0.153, "step": 290 }, { "epoch": 4.764940239043825, "grad_norm": 0.36862707138061523, "learning_rate": 2.8518836829732332e-08, "loss": 0.1556, "step": 300 }, { "epoch": 4.924302788844622, "grad_norm": 0.44045692682266235, "learning_rate": 0.0, "loss": 0.1487, "step": 310 }, { "epoch": 4.924302788844622, "step": 310, "total_flos": 238832327327744.0, "train_loss": 0.3164117013254473, "train_runtime": 47203.2069, "train_samples_per_second": 0.212, "train_steps_per_second": 0.007 } ], "logging_steps": 10, "max_steps": 310, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 238832327327744.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }