| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.924302788844622, | |
| "eval_steps": 500, | |
| "global_step": 310, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1593625498007968, | |
| "grad_norm": 1.439923644065857, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.8005, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.3187250996015936, | |
| "grad_norm": 1.1607290506362915, | |
| "learning_rate": 9.995433337085492e-06, | |
| "loss": 0.6192, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.47808764940239046, | |
| "grad_norm": 0.735303521156311, | |
| "learning_rate": 9.944154131125643e-06, | |
| "loss": 0.5349, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6374501992031872, | |
| "grad_norm": 1.0546119213104248, | |
| "learning_rate": 9.836474315195148e-06, | |
| "loss": 0.5105, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.796812749003984, | |
| "grad_norm": 0.655317485332489, | |
| "learning_rate": 9.673622250534155e-06, | |
| "loss": 0.4992, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9561752988047809, | |
| "grad_norm": 0.8014914393424988, | |
| "learning_rate": 9.457455677726447e-06, | |
| "loss": 0.4943, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.1115537848605577, | |
| "grad_norm": 0.8364565372467041, | |
| "learning_rate": 9.190440524459203e-06, | |
| "loss": 0.4301, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.2709163346613546, | |
| "grad_norm": 0.6313614249229431, | |
| "learning_rate": 8.87562277536726e-06, | |
| "loss": 0.3869, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.4302788844621515, | |
| "grad_norm": 0.7729827761650085, | |
| "learning_rate": 8.516593724857598e-06, | |
| "loss": 0.3895, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.5896414342629481, | |
| "grad_norm": 0.5305516123771667, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 0.3809, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.749003984063745, | |
| "grad_norm": 0.6976670026779175, | |
| "learning_rate": 7.682741885881314e-06, | |
| "loss": 0.3707, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.908366533864542, | |
| "grad_norm": 0.5881310701370239, | |
| "learning_rate": 7.217431291229068e-06, | |
| "loss": 0.3831, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.0637450199203187, | |
| "grad_norm": 0.5917549729347229, | |
| "learning_rate": 6.726825272106539e-06, | |
| "loss": 0.3343, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.2231075697211153, | |
| "grad_norm": 0.6392484903335571, | |
| "learning_rate": 6.216520433716544e-06, | |
| "loss": 0.2776, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.3824701195219125, | |
| "grad_norm": 0.5469350814819336, | |
| "learning_rate": 5.69233809622687e-06, | |
| "loss": 0.2751, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.541832669322709, | |
| "grad_norm": 0.5329071879386902, | |
| "learning_rate": 5.160257887858278e-06, | |
| "loss": 0.2758, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.7011952191235062, | |
| "grad_norm": 0.608709454536438, | |
| "learning_rate": 4.626349532067879e-06, | |
| "loss": 0.2711, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.860557768924303, | |
| "grad_norm": 0.5087049603462219, | |
| "learning_rate": 4.096703606968007e-06, | |
| "loss": 0.2685, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.0159362549800797, | |
| "grad_norm": 0.7022324800491333, | |
| "learning_rate": 3.5773620668448384e-06, | |
| "loss": 0.2626, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.1752988047808763, | |
| "grad_norm": 0.5048023462295532, | |
| "learning_rate": 3.074249318355046e-06, | |
| "loss": 0.1978, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.3346613545816735, | |
| "grad_norm": 0.4734826385974884, | |
| "learning_rate": 2.5931046376510875e-06, | |
| "loss": 0.1886, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.49402390438247, | |
| "grad_norm": 0.6656137108802795, | |
| "learning_rate": 2.139416699389153e-06, | |
| "loss": 0.1918, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.653386454183267, | |
| "grad_norm": 0.4610200524330139, | |
| "learning_rate": 1.7183609644824096e-06, | |
| "loss": 0.1908, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.812749003984064, | |
| "grad_norm": 0.5110896229743958, | |
| "learning_rate": 1.3347406408508695e-06, | |
| "loss": 0.1758, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.9721115537848606, | |
| "grad_norm": 0.4129928946495056, | |
| "learning_rate": 9.929318906602176e-07, | |
| "loss": 0.1944, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.127490039840637, | |
| "grad_norm": 0.39533188939094543, | |
| "learning_rate": 6.968339090999188e-07, | |
| "loss": 0.1561, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.286852589641434, | |
| "grad_norm": 0.4790317118167877, | |
| "learning_rate": 4.4982444417866753e-07, | |
| "loss": 0.1381, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.446215139442231, | |
| "grad_norm": 0.39792048931121826, | |
| "learning_rate": 2.547212649466568e-07, | |
| "loss": 0.1532, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.605577689243028, | |
| "grad_norm": 0.4457632899284363, | |
| "learning_rate": 1.1375001769728e-07, | |
| "loss": 0.153, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.764940239043825, | |
| "grad_norm": 0.36862707138061523, | |
| "learning_rate": 2.8518836829732332e-08, | |
| "loss": 0.1556, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.924302788844622, | |
| "grad_norm": 0.44045692682266235, | |
| "learning_rate": 0.0, | |
| "loss": 0.1487, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.924302788844622, | |
| "step": 310, | |
| "total_flos": 238832327327744.0, | |
| "train_loss": 0.3164117013254473, | |
| "train_runtime": 47203.2069, | |
| "train_samples_per_second": 0.212, | |
| "train_steps_per_second": 0.007 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 310, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 238832327327744.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |