| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.226646248085758, |
| "eval_steps": 100, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.015313935681470138, |
| "grad_norm": 3.8326407564846536, |
| "learning_rate": 6.134969325153375e-07, |
| "loss": 1.3339, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.030627871362940276, |
| "grad_norm": 3.5496788557416803, |
| "learning_rate": 1.226993865030675e-06, |
| "loss": 1.3517, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.045941807044410414, |
| "grad_norm": 2.292172802820395, |
| "learning_rate": 1.8404907975460124e-06, |
| "loss": 1.3142, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06125574272588055, |
| "grad_norm": 1.7775340318261912, |
| "learning_rate": 2.45398773006135e-06, |
| "loss": 1.2477, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07656967840735068, |
| "grad_norm": 1.4368794370978828, |
| "learning_rate": 3.0674846625766875e-06, |
| "loss": 1.2037, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.09188361408882083, |
| "grad_norm": 1.061354555879628, |
| "learning_rate": 3.680981595092025e-06, |
| "loss": 1.1505, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10719754977029096, |
| "grad_norm": 0.8377616484610274, |
| "learning_rate": 4.294478527607362e-06, |
| "loss": 1.1179, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1225114854517611, |
| "grad_norm": 0.7884987790196414, |
| "learning_rate": 4.9079754601227e-06, |
| "loss": 1.0735, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13782542113323124, |
| "grad_norm": 0.7279957151761491, |
| "learning_rate": 5.521472392638038e-06, |
| "loss": 1.0556, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.15313935681470137, |
| "grad_norm": 0.6692858930956382, |
| "learning_rate": 6.134969325153375e-06, |
| "loss": 1.0294, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.16845329249617153, |
| "grad_norm": 0.6401840707040195, |
| "learning_rate": 6.748466257668712e-06, |
| "loss": 1.0175, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.18376722817764166, |
| "grad_norm": 0.6584204446837412, |
| "learning_rate": 7.36196319018405e-06, |
| "loss": 1.0283, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1990811638591118, |
| "grad_norm": 0.6647811794468166, |
| "learning_rate": 7.975460122699386e-06, |
| "loss": 0.9799, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.21439509954058192, |
| "grad_norm": 0.6774675226534475, |
| "learning_rate": 8.588957055214725e-06, |
| "loss": 0.969, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.22970903522205208, |
| "grad_norm": 0.6918496726012217, |
| "learning_rate": 9.202453987730062e-06, |
| "loss": 0.9787, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2450229709035222, |
| "grad_norm": 0.6740082248654927, |
| "learning_rate": 9.8159509202454e-06, |
| "loss": 0.9596, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.26033690658499237, |
| "grad_norm": 0.6791518891702232, |
| "learning_rate": 1.0429447852760737e-05, |
| "loss": 0.9813, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.27565084226646247, |
| "grad_norm": 0.7029695620068616, |
| "learning_rate": 1.1042944785276076e-05, |
| "loss": 0.9703, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.29096477794793263, |
| "grad_norm": 0.6355433423695775, |
| "learning_rate": 1.1656441717791411e-05, |
| "loss": 0.9375, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.30627871362940273, |
| "grad_norm": 0.6551855479751819, |
| "learning_rate": 1.226993865030675e-05, |
| "loss": 0.952, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.30627871362940273, |
| "eval_loss": 0.964697003364563, |
| "eval_runtime": 3.4019, |
| "eval_samples_per_second": 36.45, |
| "eval_steps_per_second": 2.352, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3215926493108729, |
| "grad_norm": 0.6655305965463972, |
| "learning_rate": 1.2883435582822085e-05, |
| "loss": 0.9237, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.33690658499234305, |
| "grad_norm": 0.7257201177401903, |
| "learning_rate": 1.3496932515337424e-05, |
| "loss": 0.9511, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.35222052067381315, |
| "grad_norm": 0.7424667090609683, |
| "learning_rate": 1.4110429447852763e-05, |
| "loss": 0.9323, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3675344563552833, |
| "grad_norm": 0.7336582339806711, |
| "learning_rate": 1.47239263803681e-05, |
| "loss": 0.9449, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.38284839203675347, |
| "grad_norm": 0.6851538651091853, |
| "learning_rate": 1.5337423312883436e-05, |
| "loss": 0.9133, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3981623277182236, |
| "grad_norm": 0.7424313941044212, |
| "learning_rate": 1.5950920245398772e-05, |
| "loss": 0.9173, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.41347626339969373, |
| "grad_norm": 0.7937170928985942, |
| "learning_rate": 1.656441717791411e-05, |
| "loss": 0.9348, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.42879019908116384, |
| "grad_norm": 0.7668730085289572, |
| "learning_rate": 1.717791411042945e-05, |
| "loss": 0.9091, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.444104134762634, |
| "grad_norm": 0.713212902268727, |
| "learning_rate": 1.7791411042944788e-05, |
| "loss": 0.9054, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.45941807044410415, |
| "grad_norm": 0.8266274957992115, |
| "learning_rate": 1.8404907975460123e-05, |
| "loss": 0.9165, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.47473200612557426, |
| "grad_norm": 0.7773849012810377, |
| "learning_rate": 1.9018404907975462e-05, |
| "loss": 0.9261, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4900459418070444, |
| "grad_norm": 0.7698543962501324, |
| "learning_rate": 1.96319018404908e-05, |
| "loss": 0.8848, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5053598774885145, |
| "grad_norm": 0.7326492641075325, |
| "learning_rate": 1.9999908278985548e-05, |
| "loss": 0.9002, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5206738131699847, |
| "grad_norm": 0.725915155149828, |
| "learning_rate": 1.9998876436895888e-05, |
| "loss": 0.9064, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5359877488514548, |
| "grad_norm": 0.7137681931522117, |
| "learning_rate": 1.99966982201439e-05, |
| "loss": 0.9083, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5513016845329249, |
| "grad_norm": 0.8244990854189328, |
| "learning_rate": 1.9993373878462894e-05, |
| "loss": 0.8988, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5666156202143952, |
| "grad_norm": 0.7883005110592801, |
| "learning_rate": 1.99889037929898e-05, |
| "loss": 0.9021, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5819295558958653, |
| "grad_norm": 0.7224060026030372, |
| "learning_rate": 1.9983288476221482e-05, |
| "loss": 0.8881, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5972434915773354, |
| "grad_norm": 0.7010662098507998, |
| "learning_rate": 1.9976528571955946e-05, |
| "loss": 0.8889, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6125574272588055, |
| "grad_norm": 0.7154815490781321, |
| "learning_rate": 1.9968624855218578e-05, |
| "loss": 0.8884, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6125574272588055, |
| "eval_loss": 0.9119902849197388, |
| "eval_runtime": 3.4604, |
| "eval_samples_per_second": 35.834, |
| "eval_steps_per_second": 2.312, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6278713629402757, |
| "grad_norm": 0.6984184509316927, |
| "learning_rate": 1.995957823217325e-05, |
| "loss": 0.9033, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6431852986217458, |
| "grad_norm": 0.7120998555383481, |
| "learning_rate": 1.9949389740018438e-05, |
| "loss": 0.8988, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6584992343032159, |
| "grad_norm": 0.66696186884436, |
| "learning_rate": 1.993806054686832e-05, |
| "loss": 0.8735, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6738131699846861, |
| "grad_norm": 0.7395425655626215, |
| "learning_rate": 1.9925591951618822e-05, |
| "loss": 0.9127, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6891271056661562, |
| "grad_norm": 0.6960401538019414, |
| "learning_rate": 1.9911985383798737e-05, |
| "loss": 0.875, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7044410413476263, |
| "grad_norm": 0.7195874036136765, |
| "learning_rate": 1.9897242403405792e-05, |
| "loss": 0.8782, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7197549770290965, |
| "grad_norm": 0.7129931764921758, |
| "learning_rate": 1.9881364700727827e-05, |
| "loss": 0.8943, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7350689127105666, |
| "grad_norm": 0.690645634714852, |
| "learning_rate": 1.9864354096148966e-05, |
| "loss": 0.8782, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7503828483920367, |
| "grad_norm": 0.7055958514936376, |
| "learning_rate": 1.9846212539940955e-05, |
| "loss": 0.8867, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7656967840735069, |
| "grad_norm": 0.7517885221069497, |
| "learning_rate": 1.982694211203952e-05, |
| "loss": 0.8843, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.781010719754977, |
| "grad_norm": 0.7270711746255435, |
| "learning_rate": 1.9806545021805922e-05, |
| "loss": 0.8757, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7963246554364471, |
| "grad_norm": 0.707252502344583, |
| "learning_rate": 1.9785023607773655e-05, |
| "loss": 0.8842, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8116385911179173, |
| "grad_norm": 0.8083132789219781, |
| "learning_rate": 1.976238033738033e-05, |
| "loss": 0.8578, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8269525267993875, |
| "grad_norm": 0.7287731737231247, |
| "learning_rate": 1.9738617806684767e-05, |
| "loss": 0.911, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8422664624808576, |
| "grad_norm": 0.7683997901360534, |
| "learning_rate": 1.9713738740069384e-05, |
| "loss": 0.8607, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8575803981623277, |
| "grad_norm": 0.6883196631913017, |
| "learning_rate": 1.9687745989927823e-05, |
| "loss": 0.8781, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8728943338437979, |
| "grad_norm": 0.7137416791751114, |
| "learning_rate": 1.966064253633793e-05, |
| "loss": 0.8675, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.888208269525268, |
| "grad_norm": 0.800711538711323, |
| "learning_rate": 1.963243148672009e-05, |
| "loss": 0.8696, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9035222052067381, |
| "grad_norm": 0.7017619169785259, |
| "learning_rate": 1.960311607548096e-05, |
| "loss": 0.8724, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9188361408882083, |
| "grad_norm": 0.7347091076360852, |
| "learning_rate": 1.957269966364263e-05, |
| "loss": 0.8753, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9188361408882083, |
| "eval_loss": 0.8846515417098999, |
| "eval_runtime": 3.4431, |
| "eval_samples_per_second": 36.014, |
| "eval_steps_per_second": 2.323, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9341500765696784, |
| "grad_norm": 0.6840257399223311, |
| "learning_rate": 1.9541185738457304e-05, |
| "loss": 0.8661, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9494640122511485, |
| "grad_norm": 0.6350789314835292, |
| "learning_rate": 1.9508577913007475e-05, |
| "loss": 0.866, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9647779479326187, |
| "grad_norm": 0.742666392443459, |
| "learning_rate": 1.9474879925791665e-05, |
| "loss": 0.8712, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.9800918836140888, |
| "grad_norm": 0.7707700113231902, |
| "learning_rate": 1.944009564029584e-05, |
| "loss": 0.8656, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9954058192955589, |
| "grad_norm": 0.7026681496550549, |
| "learning_rate": 1.9404229044550432e-05, |
| "loss": 0.8707, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.0122511485451762, |
| "grad_norm": 0.8698658929763603, |
| "learning_rate": 1.9367284250673126e-05, |
| "loss": 1.0103, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0275650842266462, |
| "grad_norm": 0.7696705674515646, |
| "learning_rate": 1.9329265494397386e-05, |
| "loss": 0.8254, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.0428790199081164, |
| "grad_norm": 0.703085270940529, |
| "learning_rate": 1.929017713458685e-05, |
| "loss": 0.7994, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0581929555895866, |
| "grad_norm": 0.7281489106195598, |
| "learning_rate": 1.9250023652735573e-05, |
| "loss": 0.7997, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.0735068912710566, |
| "grad_norm": 0.6721859356177987, |
| "learning_rate": 1.920880965245422e-05, |
| "loss": 0.8028, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0888208269525268, |
| "grad_norm": 0.6999456629939552, |
| "learning_rate": 1.9166539858942258e-05, |
| "loss": 0.7923, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.104134762633997, |
| "grad_norm": 0.761057382772568, |
| "learning_rate": 1.912321911844622e-05, |
| "loss": 0.8023, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.119448698315467, |
| "grad_norm": 0.7613985157569584, |
| "learning_rate": 1.907885239770408e-05, |
| "loss": 0.8171, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.1347626339969372, |
| "grad_norm": 0.7130602486133167, |
| "learning_rate": 1.9033444783375806e-05, |
| "loss": 0.8136, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1500765696784074, |
| "grad_norm": 0.7290477717213912, |
| "learning_rate": 1.8987001481460177e-05, |
| "loss": 0.7956, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.1653905053598774, |
| "grad_norm": 0.6942113670274667, |
| "learning_rate": 1.8939527816697917e-05, |
| "loss": 0.795, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1807044410413476, |
| "grad_norm": 0.7278550945084655, |
| "learning_rate": 1.8891029231961208e-05, |
| "loss": 0.8038, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.1960183767228179, |
| "grad_norm": 0.7637922924754829, |
| "learning_rate": 1.8841511287629667e-05, |
| "loss": 0.7941, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.2113323124042878, |
| "grad_norm": 0.7227667767094055, |
| "learning_rate": 1.8790979660952832e-05, |
| "loss": 0.7899, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.226646248085758, |
| "grad_norm": 0.6856723635729023, |
| "learning_rate": 1.8739440145399295e-05, |
| "loss": 0.8037, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.226646248085758, |
| "eval_loss": 0.8724085092544556, |
| "eval_runtime": 3.6883, |
| "eval_samples_per_second": 33.619, |
| "eval_steps_per_second": 2.169, |
| "step": 400 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1630, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 42520176230400.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|