| { |
| "best_global_step": 5780, |
| "best_metric": 3.4774351119995117, |
| "best_model_checkpoint": "sindhibert_session6/checkpoint-5780", |
| "epoch": 2.0, |
| "eval_steps": 2890, |
| "global_step": 5780, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03460207612456748, |
| "grad_norm": 5.271186828613281, |
| "learning_rate": 8.583815028901734e-07, |
| "loss": 14.7112890625, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06920415224913495, |
| "grad_norm": 5.328507900238037, |
| "learning_rate": 1.7254335260115608e-06, |
| "loss": 14.65601806640625, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.10380622837370242, |
| "grad_norm": 5.2988433837890625, |
| "learning_rate": 2.592485549132948e-06, |
| "loss": 14.6437841796875, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1384083044982699, |
| "grad_norm": 5.144472122192383, |
| "learning_rate": 2.9992958916410005e-06, |
| "loss": 14.644112548828126, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.17301038062283736, |
| "grad_norm": 5.255625247955322, |
| "learning_rate": 2.994135629312096e-06, |
| "loss": 14.592845458984375, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.20761245674740483, |
| "grad_norm": 5.365286827087402, |
| "learning_rate": 2.9839827352831524e-06, |
| "loss": 14.59123779296875, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2422145328719723, |
| "grad_norm": 5.114892959594727, |
| "learning_rate": 2.96887113529664e-06, |
| "loss": 14.588660888671875, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2768166089965398, |
| "grad_norm": 5.162683486938477, |
| "learning_rate": 2.948851324536296e-06, |
| "loss": 14.5647998046875, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.31141868512110726, |
| "grad_norm": 5.2245635986328125, |
| "learning_rate": 2.9239901988982294e-06, |
| "loss": 14.566002197265625, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3460207612456747, |
| "grad_norm": 5.22702169418335, |
| "learning_rate": 2.8943708314592917e-06, |
| "loss": 14.56361328125, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3806228373702422, |
| "grad_norm": 5.353369235992432, |
| "learning_rate": 2.8600921948896393e-06, |
| "loss": 14.56337890625, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.41522491349480967, |
| "grad_norm": 5.189014911651611, |
| "learning_rate": 2.821268830737051e-06, |
| "loss": 14.566427001953125, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.44982698961937717, |
| "grad_norm": 5.108746528625488, |
| "learning_rate": 2.7780304666880683e-06, |
| "loss": 14.5394287109375, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4844290657439446, |
| "grad_norm": 5.1513895988464355, |
| "learning_rate": 2.7305215830848867e-06, |
| "loss": 14.531617431640624, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5190311418685121, |
| "grad_norm": 5.2011942863464355, |
| "learning_rate": 2.678900930146467e-06, |
| "loss": 14.5282666015625, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5536332179930796, |
| "grad_norm": 5.219590663909912, |
| "learning_rate": 2.6233409975070707e-06, |
| "loss": 14.489625244140624, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 5.411579132080078, |
| "learning_rate": 2.5640274378447445e-06, |
| "loss": 14.49999755859375, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6228373702422145, |
| "grad_norm": 5.120180606842041, |
| "learning_rate": 2.5011584465256946e-06, |
| "loss": 14.515745849609376, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.657439446366782, |
| "grad_norm": 5.3347883224487305, |
| "learning_rate": 2.434944099337454e-06, |
| "loss": 14.52021728515625, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6920415224913494, |
| "grad_norm": 4.978041172027588, |
| "learning_rate": 2.365605650523803e-06, |
| "loss": 14.50703857421875, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.726643598615917, |
| "grad_norm": 5.3864898681640625, |
| "learning_rate": 2.293374793467048e-06, |
| "loss": 14.493427734375, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7612456747404844, |
| "grad_norm": 5.149505138397217, |
| "learning_rate": 2.2184928864880712e-06, |
| "loss": 14.453104248046875, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7958477508650519, |
| "grad_norm": 5.451559066772461, |
| "learning_rate": 2.1412101463511406e-06, |
| "loss": 14.48880126953125, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8304498269896193, |
| "grad_norm": 5.231166839599609, |
| "learning_rate": 2.0617848121683582e-06, |
| "loss": 14.46824951171875, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8650519031141869, |
| "grad_norm": 5.132116794586182, |
| "learning_rate": 1.9804822824975567e-06, |
| "loss": 14.45693603515625, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8996539792387543, |
| "grad_norm": 5.454078197479248, |
| "learning_rate": 1.8975742285170185e-06, |
| "loss": 14.468741455078124, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9342560553633218, |
| "grad_norm": 5.184955596923828, |
| "learning_rate": 1.8133376862403233e-06, |
| "loss": 14.482393798828125, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9688581314878892, |
| "grad_norm": 5.030216693878174, |
| "learning_rate": 1.7280541308046812e-06, |
| "loss": 14.4536962890625, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 3.484435558319092, |
| "eval_runtime": 11.6834, |
| "eval_samples_per_second": 639.538, |
| "eval_steps_per_second": 10.014, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.0034602076124568, |
| "grad_norm": 5.57059383392334, |
| "learning_rate": 1.642008535926004e-06, |
| "loss": 14.452493896484375, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.0380622837370241, |
| "grad_norm": 5.143775939941406, |
| "learning_rate": 1.555488421663523e-06, |
| "loss": 14.467264404296875, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0726643598615917, |
| "grad_norm": 5.192799091339111, |
| "learning_rate": 1.4687828936758435e-06, |
| "loss": 14.464970703125, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.1072664359861593, |
| "grad_norm": 5.265404224395752, |
| "learning_rate": 1.3821816771787413e-06, |
| "loss": 14.4964404296875, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.1418685121107266, |
| "grad_norm": 5.28303337097168, |
| "learning_rate": 1.295974148832716e-06, |
| "loss": 14.460384521484375, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 5.456825256347656, |
| "learning_rate": 1.2104483697952423e-06, |
| "loss": 14.472330322265625, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.2110726643598615, |
| "grad_norm": 5.192461013793945, |
| "learning_rate": 1.1258901231687475e-06, |
| "loss": 14.4242919921875, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.245674740484429, |
| "grad_norm": 5.02662992477417, |
| "learning_rate": 1.0425819590606824e-06, |
| "loss": 14.4378662109375, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.2802768166089966, |
| "grad_norm": 5.156518459320068, |
| "learning_rate": 9.608022504465906e-07, |
| "loss": 14.4332080078125, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.314878892733564, |
| "grad_norm": 5.137354850769043, |
| "learning_rate": 8.808242629910054e-07, |
| "loss": 14.42055419921875, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.3494809688581315, |
| "grad_norm": 4.931822299957275, |
| "learning_rate": 8.029152419343472e-07, |
| "loss": 14.446268310546875, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.3840830449826989, |
| "grad_norm": 5.438608169555664, |
| "learning_rate": 7.273355190969783e-07, |
| "loss": 14.436180419921875, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.4186851211072664, |
| "grad_norm": 5.530498027801514, |
| "learning_rate": 6.54337642984345e-07, |
| "loss": 14.427529296875, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.453287197231834, |
| "grad_norm": 5.223482608795166, |
| "learning_rate": 5.841655348999535e-07, |
| "loss": 14.415745849609374, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.4878892733564013, |
| "grad_norm": 5.245316982269287, |
| "learning_rate": 5.170536738860046e-07, |
| "loss": 14.42922119140625, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.5224913494809689, |
| "grad_norm": 5.100404739379883, |
| "learning_rate": 4.532263132152101e-07, |
| "loss": 14.461243896484374, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.5570934256055362, |
| "grad_norm": 5.252062797546387, |
| "learning_rate": 3.928967310518557e-07, |
| "loss": 14.419617919921874, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.5916955017301038, |
| "grad_norm": 5.039636611938477, |
| "learning_rate": 3.362665177860309e-07, |
| "loss": 14.477161865234375, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.6262975778546713, |
| "grad_norm": 5.289979457855225, |
| "learning_rate": 2.8352490242237476e-07, |
| "loss": 14.38912109375, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.6608996539792389, |
| "grad_norm": 5.163676738739014, |
| "learning_rate": 2.348481202742086e-07, |
| "loss": 14.403753662109375, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.6955017301038062, |
| "grad_norm": 5.600104331970215, |
| "learning_rate": 1.9039882407588976e-07, |
| "loss": 14.447159423828126, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.7301038062283736, |
| "grad_norm": 5.240525722503662, |
| "learning_rate": 1.503255404811511e-07, |
| "loss": 14.401494140625, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 5.186587810516357, |
| "learning_rate": 1.1476217376352293e-07, |
| "loss": 14.4153125, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.7993079584775087, |
| "grad_norm": 5.226868152618408, |
| "learning_rate": 8.382755837722234e-08, |
| "loss": 14.4057421875, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.8339100346020762, |
| "grad_norm": 5.335669994354248, |
| "learning_rate": 5.762506187361205e-08, |
| "loss": 14.435169677734375, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.8685121107266436, |
| "grad_norm": 5.08687686920166, |
| "learning_rate": 3.624223950008548e-08, |
| "loss": 14.43446533203125, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.903114186851211, |
| "grad_norm": 5.379976272583008, |
| "learning_rate": 1.975054163552692e-08, |
| "loss": 14.415914306640625, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.9377162629757785, |
| "grad_norm": 5.047989845275879, |
| "learning_rate": 8.205075039945553e-09, |
| "loss": 14.41682861328125, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.972318339100346, |
| "grad_norm": 5.379687786102295, |
| "learning_rate": 1.6444187160679635e-09, |
| "loss": 14.4242138671875, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 3.4774351119995117, |
| "eval_runtime": 11.6654, |
| "eval_samples_per_second": 640.524, |
| "eval_steps_per_second": 10.03, |
| "step": 5780 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 5780, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 2890, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.8943456160818586e+17, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|