| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013348907058234607, |
| "grad_norm": 74.82529080509352, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 0.6277, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.026697814116469213, |
| "grad_norm": 1.6096919343173548, |
| "learning_rate": 1.2666666666666669e-06, |
| "loss": 0.6328, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04004672117470382, |
| "grad_norm": 0.9314352408083703, |
| "learning_rate": 1.9333333333333336e-06, |
| "loss": 0.6041, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.053395628232938426, |
| "grad_norm": 0.811733547929806, |
| "learning_rate": 2.6e-06, |
| "loss": 0.6042, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06674453529117304, |
| "grad_norm": 0.7093539476838706, |
| "learning_rate": 3.266666666666667e-06, |
| "loss": 0.603, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08009344234940764, |
| "grad_norm": 0.7170335789165264, |
| "learning_rate": 3.9333333333333335e-06, |
| "loss": 0.6155, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09344234940764225, |
| "grad_norm": 0.6815670283453077, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.6002, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10679125646587685, |
| "grad_norm": 0.656703252575977, |
| "learning_rate": 4.99956677884892e-06, |
| "loss": 0.61, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.12014016352411146, |
| "grad_norm": 0.5920011648125949, |
| "learning_rate": 4.994694765050121e-06, |
| "loss": 0.6102, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.13348907058234608, |
| "grad_norm": 0.5227326374455843, |
| "learning_rate": 4.984419797901491e-06, |
| "loss": 0.6009, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14683797764058068, |
| "grad_norm": 0.5495253836256794, |
| "learning_rate": 4.9687641306938766e-06, |
| "loss": 0.5938, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.16018688469881529, |
| "grad_norm": 0.48232788709453134, |
| "learning_rate": 4.94776167011629e-06, |
| "loss": 0.5871, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1735357917570499, |
| "grad_norm": 0.506411321536645, |
| "learning_rate": 4.921457902821578e-06, |
| "loss": 0.5974, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1868846988152845, |
| "grad_norm": 0.4596419488812572, |
| "learning_rate": 4.889909796912454e-06, |
| "loss": 0.5867, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2002336058735191, |
| "grad_norm": 0.4945179863512727, |
| "learning_rate": 4.85318567856128e-06, |
| "loss": 0.5829, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2135825129317537, |
| "grad_norm": 0.48473847553604443, |
| "learning_rate": 4.811365084030784e-06, |
| "loss": 0.59, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2269314199899883, |
| "grad_norm": 0.5177422654479074, |
| "learning_rate": 4.764538587416233e-06, |
| "loss": 0.5961, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2402803270482229, |
| "grad_norm": 0.5448020887198345, |
| "learning_rate": 4.712807604482108e-06, |
| "loss": 0.5687, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.25362923410645755, |
| "grad_norm": 0.4464063495342875, |
| "learning_rate": 4.656284173018144e-06, |
| "loss": 0.5858, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.26697814116469215, |
| "grad_norm": 0.45099478856513986, |
| "learning_rate": 4.595090710190419e-06, |
| "loss": 0.6043, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.28032704822292676, |
| "grad_norm": 0.44848286804261916, |
| "learning_rate": 4.529359747413038e-06, |
| "loss": 0.591, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.29367595528116136, |
| "grad_norm": 0.4411730495621085, |
| "learning_rate": 4.4592336433146e-06, |
| "loss": 0.5944, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.30702486233939597, |
| "grad_norm": 0.48448789063177666, |
| "learning_rate": 4.384864275421109e-06, |
| "loss": 0.5999, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.32037376939763057, |
| "grad_norm": 0.444274267493019, |
| "learning_rate": 4.30641271122307e-06, |
| "loss": 0.6094, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3337226764558652, |
| "grad_norm": 0.4863069333544111, |
| "learning_rate": 4.224048859339175e-06, |
| "loss": 0.6174, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3470715835140998, |
| "grad_norm": 0.45811760037842997, |
| "learning_rate": 4.1379511015320625e-06, |
| "loss": 0.6011, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3604204905723344, |
| "grad_norm": 0.4519496506749116, |
| "learning_rate": 4.048305906373151e-06, |
| "loss": 0.6139, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.373769397630569, |
| "grad_norm": 0.46096665359482364, |
| "learning_rate": 3.955307425393224e-06, |
| "loss": 0.5747, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3871183046888036, |
| "grad_norm": 0.43711465245434705, |
| "learning_rate": 3.859157072593459e-06, |
| "loss": 0.5878, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4004672117470382, |
| "grad_norm": 0.4882102831091009, |
| "learning_rate": 3.760063088227542e-06, |
| "loss": 0.6029, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4138161188052728, |
| "grad_norm": 0.4342997927660038, |
| "learning_rate": 3.658240087799655e-06, |
| "loss": 0.5778, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4271650258635074, |
| "grad_norm": 0.43187112453168347, |
| "learning_rate": 3.5539085972550786e-06, |
| "loss": 0.5992, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.440513932921742, |
| "grad_norm": 0.471747382817541, |
| "learning_rate": 3.4472945753701038e-06, |
| "loss": 0.5953, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4538628399799766, |
| "grad_norm": 0.44915302997798534, |
| "learning_rate": 3.338628924375638e-06, |
| "loss": 0.5794, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.4672117470382112, |
| "grad_norm": 0.4619611771891075, |
| "learning_rate": 3.228146989874389e-06, |
| "loss": 0.5877, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4805606540964458, |
| "grad_norm": 0.4885767281577149, |
| "learning_rate": 3.116088051134695e-06, |
| "loss": 0.5849, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4939095611546805, |
| "grad_norm": 0.46385582406105635, |
| "learning_rate": 3.002694802864912e-06, |
| "loss": 0.6113, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5072584682129151, |
| "grad_norm": 0.4631046992756232, |
| "learning_rate": 2.888212829590719e-06, |
| "loss": 0.6048, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5206073752711496, |
| "grad_norm": 0.43044083063105687, |
| "learning_rate": 2.77289007377372e-06, |
| "loss": 0.5776, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5339562823293843, |
| "grad_norm": 0.48516134620233986, |
| "learning_rate": 2.6569762988232838e-06, |
| "loss": 0.604, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5473051893876189, |
| "grad_norm": 0.4819838364354133, |
| "learning_rate": 2.5407225481646146e-06, |
| "loss": 0.5734, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5606540964458535, |
| "grad_norm": 0.4651999207262788, |
| "learning_rate": 2.4243806015345988e-06, |
| "loss": 0.5838, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5740030035040881, |
| "grad_norm": 0.4376372826414532, |
| "learning_rate": 2.3082024296829538e-06, |
| "loss": 0.5692, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.5873519105623227, |
| "grad_norm": 0.45235705289954437, |
| "learning_rate": 2.192439648659699e-06, |
| "loss": 0.5807, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6007008176205573, |
| "grad_norm": 0.4787657223646769, |
| "learning_rate": 2.0773429748708153e-06, |
| "loss": 0.5954, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6140497246787919, |
| "grad_norm": 0.4533551445182391, |
| "learning_rate": 1.963161682082342e-06, |
| "loss": 0.5884, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6273986317370265, |
| "grad_norm": 0.4272140082533887, |
| "learning_rate": 1.850143061548907e-06, |
| "loss": 0.5823, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6407475387952611, |
| "grad_norm": 0.4392811733084597, |
| "learning_rate": 1.7385318864359304e-06, |
| "loss": 0.5881, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6540964458534958, |
| "grad_norm": 0.45353341871174696, |
| "learning_rate": 1.6285698816954626e-06, |
| "loss": 0.5975, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6674453529117303, |
| "grad_norm": 0.4996106273981709, |
| "learning_rate": 1.520495200543754e-06, |
| "loss": 0.5933, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.680794259969965, |
| "grad_norm": 0.4365504487522075, |
| "learning_rate": 1.41454190867443e-06, |
| "loss": 0.5967, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6941431670281996, |
| "grad_norm": 0.44793327079373074, |
| "learning_rate": 1.3109394773243117e-06, |
| "loss": 0.5848, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7074920740864342, |
| "grad_norm": 0.42862128316402553, |
| "learning_rate": 1.2099122862898214e-06, |
| "loss": 0.6007, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7208409811446688, |
| "grad_norm": 0.432473388398012, |
| "learning_rate": 1.1116791379703032e-06, |
| "loss": 0.6002, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7341898882029034, |
| "grad_norm": 0.4365401035182129, |
| "learning_rate": 1.0164527834907468e-06, |
| "loss": 0.5881, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.747538795261138, |
| "grad_norm": 0.44113418564253065, |
| "learning_rate": 9.244394619302338e-07, |
| "loss": 0.5681, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7608877023193726, |
| "grad_norm": 0.4530120280115404, |
| "learning_rate": 8.35838453654009e-07, |
| "loss": 0.6012, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7742366093776072, |
| "grad_norm": 0.43315844481160726, |
| "learning_rate": 7.508416487165862e-07, |
| "loss": 0.5864, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7875855164358418, |
| "grad_norm": 0.43418599980747014, |
| "learning_rate": 6.696331312706245e-07, |
| "loss": 0.573, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8009344234940764, |
| "grad_norm": 0.393826774521195, |
| "learning_rate": 5.923887808816373e-07, |
| "loss": 0.5887, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8142833305523111, |
| "grad_norm": 0.447594763094874, |
| "learning_rate": 5.192758916120236e-07, |
| "loss": 0.5824, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8276322376105456, |
| "grad_norm": 0.41723158327426507, |
| "learning_rate": 4.5045280969937847e-07, |
| "loss": 0.5913, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8409811446687803, |
| "grad_norm": 0.42914391176019, |
| "learning_rate": 3.86068590613804e-07, |
| "loss": 0.5901, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.8543300517270148, |
| "grad_norm": 0.42096167751505276, |
| "learning_rate": 3.262626762369525e-07, |
| "loss": 0.5972, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8676789587852495, |
| "grad_norm": 0.4194093214775328, |
| "learning_rate": 2.7116459286195887e-07, |
| "loss": 0.6007, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.881027865843484, |
| "grad_norm": 0.43327319840700385, |
| "learning_rate": 2.208936706683351e-07, |
| "loss": 0.5838, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8943767729017187, |
| "grad_norm": 0.4359893596297785, |
| "learning_rate": 1.7555878527937164e-07, |
| "loss": 0.5914, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.9077256799599532, |
| "grad_norm": 0.415818533124484, |
| "learning_rate": 1.352581219617824e-07, |
| "loss": 0.5777, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9210745870181879, |
| "grad_norm": 0.41684651467396844, |
| "learning_rate": 1.0007896297828113e-07, |
| "loss": 0.5804, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.9344234940764224, |
| "grad_norm": 0.4339073826453879, |
| "learning_rate": 7.009749855363457e-08, |
| "loss": 0.5962, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9477724011346571, |
| "grad_norm": 0.4254475467695668, |
| "learning_rate": 4.537866186360207e-08, |
| "loss": 0.5783, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.9611213081928917, |
| "grad_norm": 0.43873911481956157, |
| "learning_rate": 2.59759884041369e-08, |
| "loss": 0.5809, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9744702152511263, |
| "grad_norm": 0.42847736934790803, |
| "learning_rate": 1.193150004542204e-08, |
| "loss": 0.5988, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.987819122309361, |
| "grad_norm": 0.4233057016442379, |
| "learning_rate": 3.275614021857609e-09, |
| "loss": 0.5788, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.4142778830762649, |
| "learning_rate": 2.7077055103075233e-11, |
| "loss": 0.5609, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 750, |
| "total_flos": 9.236595014945997e+17, |
| "train_loss": 0.5925992005666098, |
| "train_runtime": 15477.022, |
| "train_samples_per_second": 24.78, |
| "train_steps_per_second": 0.048 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 750, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.236595014945997e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|