| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9983268265476855, |
| "eval_steps": 500, |
| "global_step": 672, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.044617958728388175, |
| "grad_norm": 0.44321257723607643, |
| "learning_rate": 5e-06, |
| "loss": 0.7473, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08923591745677635, |
| "grad_norm": 1.338530090223911, |
| "learning_rate": 5e-06, |
| "loss": 0.6749, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13385387618516453, |
| "grad_norm": 0.286982469204418, |
| "learning_rate": 5e-06, |
| "loss": 0.6518, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1784718349135527, |
| "grad_norm": 0.22191132313947864, |
| "learning_rate": 5e-06, |
| "loss": 0.6309, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.22308979364194087, |
| "grad_norm": 0.22313526614443369, |
| "learning_rate": 5e-06, |
| "loss": 0.6242, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.26770775237032907, |
| "grad_norm": 0.21768014018210444, |
| "learning_rate": 5e-06, |
| "loss": 0.6177, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3123257110987172, |
| "grad_norm": 0.20654128227881519, |
| "learning_rate": 5e-06, |
| "loss": 0.607, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3569436698271054, |
| "grad_norm": 0.2096515326020643, |
| "learning_rate": 5e-06, |
| "loss": 0.608, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4015616285554936, |
| "grad_norm": 0.214474799126283, |
| "learning_rate": 5e-06, |
| "loss": 0.5995, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.44617958728388174, |
| "grad_norm": 0.2204481199549279, |
| "learning_rate": 5e-06, |
| "loss": 0.6024, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.49079754601226994, |
| "grad_norm": 0.22134589080039557, |
| "learning_rate": 5e-06, |
| "loss": 0.584, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5354155047406581, |
| "grad_norm": 0.21238352336285804, |
| "learning_rate": 5e-06, |
| "loss": 0.5869, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5800334634690463, |
| "grad_norm": 0.22914773839860497, |
| "learning_rate": 5e-06, |
| "loss": 0.5869, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6246514221974344, |
| "grad_norm": 0.23025215144092048, |
| "learning_rate": 5e-06, |
| "loss": 0.5874, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6692693809258227, |
| "grad_norm": 0.21502067226392538, |
| "learning_rate": 5e-06, |
| "loss": 0.5779, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7138873396542108, |
| "grad_norm": 0.22383555331430666, |
| "learning_rate": 5e-06, |
| "loss": 0.5841, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.758505298382599, |
| "grad_norm": 0.22090378539108488, |
| "learning_rate": 5e-06, |
| "loss": 0.5779, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8031232571109872, |
| "grad_norm": 0.22032432955026485, |
| "learning_rate": 5e-06, |
| "loss": 0.574, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8477412158393753, |
| "grad_norm": 0.24073566899355314, |
| "learning_rate": 5e-06, |
| "loss": 0.5733, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8923591745677635, |
| "grad_norm": 0.2140757527533294, |
| "learning_rate": 5e-06, |
| "loss": 0.572, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9369771332961517, |
| "grad_norm": 0.22679886715463268, |
| "learning_rate": 5e-06, |
| "loss": 0.5717, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9815950920245399, |
| "grad_norm": 0.2529726282581025, |
| "learning_rate": 5e-06, |
| "loss": 0.5714, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9994422755158952, |
| "eval_loss": 0.5709418058395386, |
| "eval_runtime": 223.3167, |
| "eval_samples_per_second": 27.042, |
| "eval_steps_per_second": 0.425, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.0262130507529281, |
| "grad_norm": 0.2607396745733898, |
| "learning_rate": 5e-06, |
| "loss": 0.6088, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0708310094813163, |
| "grad_norm": 0.2163251119039884, |
| "learning_rate": 5e-06, |
| "loss": 0.5447, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.1154489682097044, |
| "grad_norm": 0.22159094074691163, |
| "learning_rate": 5e-06, |
| "loss": 0.5438, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1600669269380925, |
| "grad_norm": 0.21627035107572698, |
| "learning_rate": 5e-06, |
| "loss": 0.5458, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.2046848856664807, |
| "grad_norm": 0.23271897339521683, |
| "learning_rate": 5e-06, |
| "loss": 0.5483, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.2493028443948688, |
| "grad_norm": 0.24002851008302872, |
| "learning_rate": 5e-06, |
| "loss": 0.5442, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2939208031232572, |
| "grad_norm": 0.2453568380812131, |
| "learning_rate": 5e-06, |
| "loss": 0.5474, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3385387618516453, |
| "grad_norm": 0.21246695532602308, |
| "learning_rate": 5e-06, |
| "loss": 0.5327, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3831567205800335, |
| "grad_norm": 0.23086082729025076, |
| "learning_rate": 5e-06, |
| "loss": 0.5388, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4277746793084216, |
| "grad_norm": 0.22600113921388443, |
| "learning_rate": 5e-06, |
| "loss": 0.5392, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4723926380368098, |
| "grad_norm": 0.2289968612823342, |
| "learning_rate": 5e-06, |
| "loss": 0.5353, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.5170105967651981, |
| "grad_norm": 0.23941957998072452, |
| "learning_rate": 5e-06, |
| "loss": 0.5388, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.561628555493586, |
| "grad_norm": 0.2302773313085982, |
| "learning_rate": 5e-06, |
| "loss": 0.5321, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6062465142219744, |
| "grad_norm": 0.23125753346116468, |
| "learning_rate": 5e-06, |
| "loss": 0.5355, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.6508644729503625, |
| "grad_norm": 0.22766972806947575, |
| "learning_rate": 5e-06, |
| "loss": 0.5327, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.6954824316787507, |
| "grad_norm": 0.21840397423259322, |
| "learning_rate": 5e-06, |
| "loss": 0.5405, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.7401003904071388, |
| "grad_norm": 0.22699613496146168, |
| "learning_rate": 5e-06, |
| "loss": 0.5337, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.784718349135527, |
| "grad_norm": 0.23126490164298122, |
| "learning_rate": 5e-06, |
| "loss": 0.5325, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.8293363078639153, |
| "grad_norm": 0.23691741692227147, |
| "learning_rate": 5e-06, |
| "loss": 0.5315, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.8739542665923032, |
| "grad_norm": 0.22142710480950437, |
| "learning_rate": 5e-06, |
| "loss": 0.5281, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.9185722253206916, |
| "grad_norm": 0.29222348999396186, |
| "learning_rate": 5e-06, |
| "loss": 0.5316, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.9631901840490797, |
| "grad_norm": 0.2348302665783151, |
| "learning_rate": 5e-06, |
| "loss": 0.5317, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.9988845510317903, |
| "eval_loss": 0.5504088997840881, |
| "eval_runtime": 225.1824, |
| "eval_samples_per_second": 26.818, |
| "eval_steps_per_second": 0.422, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.007808142777468, |
| "grad_norm": 0.3168047860383918, |
| "learning_rate": 5e-06, |
| "loss": 0.5741, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.0524261015058562, |
| "grad_norm": 0.2834626392548171, |
| "learning_rate": 5e-06, |
| "loss": 0.5027, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.097044060234244, |
| "grad_norm": 0.2569670194124399, |
| "learning_rate": 5e-06, |
| "loss": 0.5015, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.1416620189626325, |
| "grad_norm": 0.23763594409899297, |
| "learning_rate": 5e-06, |
| "loss": 0.4997, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.1862799776910204, |
| "grad_norm": 0.23783136640612776, |
| "learning_rate": 5e-06, |
| "loss": 0.5066, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.230897936419409, |
| "grad_norm": 0.23752043284775223, |
| "learning_rate": 5e-06, |
| "loss": 0.5, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.275515895147797, |
| "grad_norm": 0.2204952509796257, |
| "learning_rate": 5e-06, |
| "loss": 0.5048, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.320133853876185, |
| "grad_norm": 0.24946351851573337, |
| "learning_rate": 5e-06, |
| "loss": 0.4972, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.3647518126045735, |
| "grad_norm": 0.22710269863348215, |
| "learning_rate": 5e-06, |
| "loss": 0.512, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.4093697713329614, |
| "grad_norm": 0.24573604962686163, |
| "learning_rate": 5e-06, |
| "loss": 0.5102, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.4539877300613497, |
| "grad_norm": 0.2454775848592334, |
| "learning_rate": 5e-06, |
| "loss": 0.5103, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.4986056887897377, |
| "grad_norm": 0.2410815694953051, |
| "learning_rate": 5e-06, |
| "loss": 0.496, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.543223647518126, |
| "grad_norm": 0.23866340070030165, |
| "learning_rate": 5e-06, |
| "loss": 0.5098, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.5878416062465144, |
| "grad_norm": 0.24075487463764883, |
| "learning_rate": 5e-06, |
| "loss": 0.4983, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.6324595649749023, |
| "grad_norm": 0.2502533834047079, |
| "learning_rate": 5e-06, |
| "loss": 0.5024, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.6770775237032907, |
| "grad_norm": 0.2660873218372233, |
| "learning_rate": 5e-06, |
| "loss": 0.5018, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.721695482431679, |
| "grad_norm": 0.260710830944657, |
| "learning_rate": 5e-06, |
| "loss": 0.4986, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.766313441160067, |
| "grad_norm": 0.23395448772416363, |
| "learning_rate": 5e-06, |
| "loss": 0.5044, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.810931399888455, |
| "grad_norm": 0.24309792455207027, |
| "learning_rate": 5e-06, |
| "loss": 0.4959, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.8555493586168432, |
| "grad_norm": 0.23050931374816222, |
| "learning_rate": 5e-06, |
| "loss": 0.5051, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.9001673173452316, |
| "grad_norm": 0.23428496858539824, |
| "learning_rate": 5e-06, |
| "loss": 0.4967, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.9447852760736195, |
| "grad_norm": 0.22967426268231442, |
| "learning_rate": 5e-06, |
| "loss": 0.4981, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.989403234802008, |
| "grad_norm": 0.2561574384965489, |
| "learning_rate": 5e-06, |
| "loss": 0.4944, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.9983268265476855, |
| "eval_loss": 0.5417667031288147, |
| "eval_runtime": 227.6813, |
| "eval_samples_per_second": 26.524, |
| "eval_steps_per_second": 0.417, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.9983268265476855, |
| "step": 672, |
| "total_flos": 1125415649280000.0, |
| "train_loss": 0.5495298178423018, |
| "train_runtime": 36802.8232, |
| "train_samples_per_second": 9.353, |
| "train_steps_per_second": 0.018 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 672, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1125415649280000.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|