| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500.0, | |
| "global_step": 619, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0016155088852988692, | |
| "grad_norm": 17.625, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "loss": 1.1847093105316162, | |
| "step": 1, | |
| "token_acc": 0.7405614714424008 | |
| }, | |
| { | |
| "epoch": 0.01615508885298869, | |
| "grad_norm": 7.3125, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 0.9536339441935221, | |
| "step": 10, | |
| "token_acc": 0.7746876697447039 | |
| }, | |
| { | |
| "epoch": 0.03231017770597738, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 1.999986292247427e-05, | |
| "loss": 0.7083949089050293, | |
| "step": 20, | |
| "token_acc": 0.817535314174379 | |
| }, | |
| { | |
| "epoch": 0.048465266558966075, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 1.9983418166140286e-05, | |
| "loss": 0.6189894676208496, | |
| "step": 30, | |
| "token_acc": 0.8316753668866345 | |
| }, | |
| { | |
| "epoch": 0.06462035541195477, | |
| "grad_norm": 5.25, | |
| "learning_rate": 1.99396095545518e-05, | |
| "loss": 0.595418643951416, | |
| "step": 40, | |
| "token_acc": 0.8332687463669831 | |
| }, | |
| { | |
| "epoch": 0.08077544426494346, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 1.9868557164068073e-05, | |
| "loss": 0.5784546852111816, | |
| "step": 50, | |
| "token_acc": 0.8330070477682068 | |
| }, | |
| { | |
| "epoch": 0.09693053311793215, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 1.977045574435264e-05, | |
| "loss": 0.5632569313049316, | |
| "step": 60, | |
| "token_acc": 0.8422178988326848 | |
| }, | |
| { | |
| "epoch": 0.11308562197092084, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 1.9645574184577982e-05, | |
| "loss": 0.5411516189575195, | |
| "step": 70, | |
| "token_acc": 0.8414407360281884 | |
| }, | |
| { | |
| "epoch": 0.12924071082390953, | |
| "grad_norm": 5.125, | |
| "learning_rate": 1.949425477641904e-05, | |
| "loss": 0.5536646366119384, | |
| "step": 80, | |
| "token_acc": 0.8417469117790098 | |
| }, | |
| { | |
| "epoch": 0.14539579967689822, | |
| "grad_norm": 5.375, | |
| "learning_rate": 1.931691227585549e-05, | |
| "loss": 0.552952766418457, | |
| "step": 90, | |
| "token_acc": 0.8431372549019608 | |
| }, | |
| { | |
| "epoch": 0.16155088852988692, | |
| "grad_norm": 4.875, | |
| "learning_rate": 1.9114032766354453e-05, | |
| "loss": 0.5462624549865722, | |
| "step": 100, | |
| "token_acc": 0.843832148768377 | |
| }, | |
| { | |
| "epoch": 0.1777059773828756, | |
| "grad_norm": 4.4375, | |
| "learning_rate": 1.888617232654949e-05, | |
| "loss": 0.5441291809082032, | |
| "step": 110, | |
| "token_acc": 0.8419827335338054 | |
| }, | |
| { | |
| "epoch": 0.1938610662358643, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 1.8633955506067717e-05, | |
| "loss": 0.5480209350585937, | |
| "step": 120, | |
| "token_acc": 0.8493486139680674 | |
| }, | |
| { | |
| "epoch": 0.210016155088853, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 1.8358073613682705e-05, | |
| "loss": 0.553197193145752, | |
| "step": 130, | |
| "token_acc": 0.8403582205782147 | |
| }, | |
| { | |
| "epoch": 0.22617124394184168, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 1.805928282248516e-05, | |
| "loss": 0.558505630493164, | |
| "step": 140, | |
| "token_acc": 0.8416950803701899 | |
| }, | |
| { | |
| "epoch": 0.24232633279483037, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 1.7738402097265063e-05, | |
| "loss": 0.5198162078857422, | |
| "step": 150, | |
| "token_acc": 0.8472963789923308 | |
| }, | |
| { | |
| "epoch": 0.25848142164781907, | |
| "grad_norm": 4.875, | |
| "learning_rate": 1.73963109497861e-05, | |
| "loss": 0.5393078804016114, | |
| "step": 160, | |
| "token_acc": 0.844424985405721 | |
| }, | |
| { | |
| "epoch": 0.27463651050080773, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 1.703394702810504e-05, | |
| "loss": 0.5661784172058105, | |
| "step": 170, | |
| "token_acc": 0.8364490554957424 | |
| }, | |
| { | |
| "epoch": 0.29079159935379645, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 1.665230354654361e-05, | |
| "loss": 0.5104843139648437, | |
| "step": 180, | |
| "token_acc": 0.8500578927055191 | |
| }, | |
| { | |
| "epoch": 0.3069466882067851, | |
| "grad_norm": 4.71875, | |
| "learning_rate": 1.6252426563357054e-05, | |
| "loss": 0.5394854545593262, | |
| "step": 190, | |
| "token_acc": 0.8471082454083626 | |
| }, | |
| { | |
| "epoch": 0.32310177705977383, | |
| "grad_norm": 4.46875, | |
| "learning_rate": 1.5835412113561176e-05, | |
| "loss": 0.5530763149261475, | |
| "step": 200, | |
| "token_acc": 0.8418517790446236 | |
| }, | |
| { | |
| "epoch": 0.3392568659127625, | |
| "grad_norm": 4.40625, | |
| "learning_rate": 1.5402403204776552e-05, | |
| "loss": 0.5297245979309082, | |
| "step": 210, | |
| "token_acc": 0.8469646691391762 | |
| }, | |
| { | |
| "epoch": 0.3554119547657512, | |
| "grad_norm": 4.75, | |
| "learning_rate": 1.4954586684324077e-05, | |
| "loss": 0.5240192413330078, | |
| "step": 220, | |
| "token_acc": 0.8480382588327152 | |
| }, | |
| { | |
| "epoch": 0.3715670436187399, | |
| "grad_norm": 4.625, | |
| "learning_rate": 1.4493189986158966e-05, | |
| "loss": 0.5166411399841309, | |
| "step": 230, | |
| "token_acc": 0.8447706065318819 | |
| }, | |
| { | |
| "epoch": 0.3877221324717286, | |
| "grad_norm": 4.375, | |
| "learning_rate": 1.4019477766559604e-05, | |
| "loss": 0.5292513847351075, | |
| "step": 240, | |
| "token_acc": 0.8480617127233668 | |
| }, | |
| { | |
| "epoch": 0.40387722132471726, | |
| "grad_norm": 5.0, | |
| "learning_rate": 1.3534748437792573e-05, | |
| "loss": 0.5284510135650635, | |
| "step": 250, | |
| "token_acc": 0.8485742379547689 | |
| }, | |
| { | |
| "epoch": 0.420032310177706, | |
| "grad_norm": 4.28125, | |
| "learning_rate": 1.3040330609254903e-05, | |
| "loss": 0.5196167945861816, | |
| "step": 260, | |
| "token_acc": 0.8470174753490188 | |
| }, | |
| { | |
| "epoch": 0.43618739903069464, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 1.2537579445848058e-05, | |
| "loss": 0.5236739635467529, | |
| "step": 270, | |
| "token_acc": 0.8470025385666862 | |
| }, | |
| { | |
| "epoch": 0.45234248788368336, | |
| "grad_norm": 4.625, | |
| "learning_rate": 1.2027872953565125e-05, | |
| "loss": 0.5147537231445313, | |
| "step": 280, | |
| "token_acc": 0.8483849063924725 | |
| }, | |
| { | |
| "epoch": 0.46849757673667203, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 1.1512608202472195e-05, | |
| "loss": 0.5182638168334961, | |
| "step": 290, | |
| "token_acc": 0.8484908071969325 | |
| }, | |
| { | |
| "epoch": 0.48465266558966075, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 1.0993197497436392e-05, | |
| "loss": 0.5263832092285157, | |
| "step": 300, | |
| "token_acc": 0.848084030344291 | |
| }, | |
| { | |
| "epoch": 0.5008077544426495, | |
| "grad_norm": 4.28125, | |
| "learning_rate": 1.0471064507096427e-05, | |
| "loss": 0.5192934036254883, | |
| "step": 310, | |
| "token_acc": 0.8522849069472863 | |
| }, | |
| { | |
| "epoch": 0.5169628432956381, | |
| "grad_norm": 4.46875, | |
| "learning_rate": 9.947640361685805e-06, | |
| "loss": 0.49528074264526367, | |
| "step": 320, | |
| "token_acc": 0.853359489946052 | |
| }, | |
| { | |
| "epoch": 0.5331179321486268, | |
| "grad_norm": 4.625, | |
| "learning_rate": 9.424359730404329e-06, | |
| "loss": 0.5119996070861816, | |
| "step": 330, | |
| "token_acc": 0.8492686234621718 | |
| }, | |
| { | |
| "epoch": 0.5492730210016155, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 8.902656889089548e-06, | |
| "loss": 0.5261263847351074, | |
| "step": 340, | |
| "token_acc": 0.8463559404495475 | |
| }, | |
| { | |
| "epoch": 0.5654281098546042, | |
| "grad_norm": 4.375, | |
| "learning_rate": 8.38396178896639e-06, | |
| "loss": 0.5155754566192627, | |
| "step": 350, | |
| "token_acc": 0.853113410111263 | |
| }, | |
| { | |
| "epoch": 0.5815831987075929, | |
| "grad_norm": 4.59375, | |
| "learning_rate": 7.869696137250235e-06, | |
| "loss": 0.5171246528625488, | |
| "step": 360, | |
| "token_acc": 0.8516091166976426 | |
| }, | |
| { | |
| "epoch": 0.5977382875605816, | |
| "grad_norm": 4.6875, | |
| "learning_rate": 7.361269500346274e-06, | |
| "loss": 0.5315141677856445, | |
| "step": 370, | |
| "token_acc": 0.8478004713275726 | |
| }, | |
| { | |
| "epoch": 0.6138933764135702, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 6.860075440325951e-06, | |
| "loss": 0.5140278816223145, | |
| "step": 380, | |
| "token_acc": 0.8488246287006983 | |
| }, | |
| { | |
| "epoch": 0.630048465266559, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 6.367487695270218e-06, | |
| "loss": 0.5044659614562989, | |
| "step": 390, | |
| "token_acc": 0.8520192591136877 | |
| }, | |
| { | |
| "epoch": 0.6462035541195477, | |
| "grad_norm": 5.0, | |
| "learning_rate": 5.884856413948913e-06, | |
| "loss": 0.5070418357849121, | |
| "step": 400, | |
| "token_acc": 0.8493284017909286 | |
| }, | |
| { | |
| "epoch": 0.6623586429725363, | |
| "grad_norm": 4.59375, | |
| "learning_rate": 5.413504455156855e-06, | |
| "loss": 0.5049197196960449, | |
| "step": 410, | |
| "token_acc": 0.8500828540793449 | |
| }, | |
| { | |
| "epoch": 0.678513731825525, | |
| "grad_norm": 4.4375, | |
| "learning_rate": 4.954723761849809e-06, | |
| "loss": 0.4989957332611084, | |
| "step": 420, | |
| "token_acc": 0.8527764127764128 | |
| }, | |
| { | |
| "epoch": 0.6946688206785138, | |
| "grad_norm": 5.125, | |
| "learning_rate": 4.509771820018682e-06, | |
| "loss": 0.49854435920715334, | |
| "step": 430, | |
| "token_acc": 0.8521560574948666 | |
| }, | |
| { | |
| "epoch": 0.7108239095315024, | |
| "grad_norm": 4.46875, | |
| "learning_rate": 4.0798682120078046e-06, | |
| "loss": 0.5228716373443604, | |
| "step": 440, | |
| "token_acc": 0.8458528951486698 | |
| }, | |
| { | |
| "epoch": 0.7269789983844911, | |
| "grad_norm": 5.25, | |
| "learning_rate": 3.6661912737244996e-06, | |
| "loss": 0.5174414157867432, | |
| "step": 450, | |
| "token_acc": 0.8437163814180929 | |
| }, | |
| { | |
| "epoch": 0.7431340872374798, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 3.2698748649022693e-06, | |
| "loss": 0.4988471508026123, | |
| "step": 460, | |
| "token_acc": 0.8513395031660984 | |
| }, | |
| { | |
| "epoch": 0.7592891760904685, | |
| "grad_norm": 4.59375, | |
| "learning_rate": 2.8920052612700755e-06, | |
| "loss": 0.5026087760925293, | |
| "step": 470, | |
| "token_acc": 0.8537626066718387 | |
| }, | |
| { | |
| "epoch": 0.7754442649434572, | |
| "grad_norm": 4.40625, | |
| "learning_rate": 2.5336181771460877e-06, | |
| "loss": 0.49767255783081055, | |
| "step": 480, | |
| "token_acc": 0.8517356344381252 | |
| }, | |
| { | |
| "epoch": 0.7915993537964459, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 2.195695926616702e-06, | |
| "loss": 0.5065960884094238, | |
| "step": 490, | |
| "token_acc": 0.8521747648902821 | |
| }, | |
| { | |
| "epoch": 0.8077544426494345, | |
| "grad_norm": 4.4375, | |
| "learning_rate": 1.8791647310819371e-06, | |
| "loss": 0.5075524330139161, | |
| "step": 500, | |
| "token_acc": 0.8482975174173291 | |
| }, | |
| { | |
| "epoch": 0.8239095315024233, | |
| "grad_norm": 5.25, | |
| "learning_rate": 1.5848921805469396e-06, | |
| "loss": 0.5103363990783691, | |
| "step": 510, | |
| "token_acc": 0.8537348455220962 | |
| }, | |
| { | |
| "epoch": 0.840064620355412, | |
| "grad_norm": 4.9375, | |
| "learning_rate": 1.3136848556180893e-06, | |
| "loss": 0.527727460861206, | |
| "step": 520, | |
| "token_acc": 0.8470945020543925 | |
| }, | |
| { | |
| "epoch": 0.8562197092084006, | |
| "grad_norm": 4.375, | |
| "learning_rate": 1.0662861167216243e-06, | |
| "loss": 0.4919741630554199, | |
| "step": 530, | |
| "token_acc": 0.8542659791808541 | |
| }, | |
| { | |
| "epoch": 0.8723747980613893, | |
| "grad_norm": 6.90625, | |
| "learning_rate": 8.433740666043899e-07, | |
| "loss": 0.5399604797363281, | |
| "step": 540, | |
| "token_acc": 0.8415647921760391 | |
| }, | |
| { | |
| "epoch": 0.8885298869143781, | |
| "grad_norm": 4.40625, | |
| "learning_rate": 6.455596917013274e-07, | |
| "loss": 0.5133064270019532, | |
| "step": 550, | |
| "token_acc": 0.8489405331510594 | |
| }, | |
| { | |
| "epoch": 0.9046849757673667, | |
| "grad_norm": 4.875, | |
| "learning_rate": 4.733851874641382e-07, | |
| "loss": 0.5087760925292969, | |
| "step": 560, | |
| "token_acc": 0.8505905511811024 | |
| }, | |
| { | |
| "epoch": 0.9208400646203554, | |
| "grad_norm": 4.625, | |
| "learning_rate": 3.273224722412327e-07, | |
| "loss": 0.4963532447814941, | |
| "step": 570, | |
| "token_acc": 0.8557692307692307 | |
| }, | |
| { | |
| "epoch": 0.9369951534733441, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 2.077718937823414e-07, | |
| "loss": 0.5168875694274903, | |
| "step": 580, | |
| "token_acc": 0.8515441751368257 | |
| }, | |
| { | |
| "epoch": 0.9531502423263328, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 1.1506113191316447e-07, | |
| "loss": 0.5301695823669433, | |
| "step": 590, | |
| "token_acc": 0.8447769953051644 | |
| }, | |
| { | |
| "epoch": 0.9693053311793215, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 4.944430038773762e-08, | |
| "loss": 0.507291316986084, | |
| "step": 600, | |
| "token_acc": 0.8520109332292073 | |
| }, | |
| { | |
| "epoch": 0.9854604200323102, | |
| "grad_norm": 4.28125, | |
| "learning_rate": 1.1101250380300965e-08, | |
| "loss": 0.48858890533447263, | |
| "step": 610, | |
| "token_acc": 0.8529469355786176 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 619, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.618575333317673e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |