| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9973045822102425, |
| "eval_steps": 500, |
| "global_step": 370, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013477088948787063, |
| "grad_norm": 0.5035669564032814, |
| "learning_rate": 2.324299151359584e-05, |
| "loss": 0.9005, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.026954177897574125, |
| "grad_norm": 0.24763188697993135, |
| "learning_rate": 5.229673090559063e-05, |
| "loss": 0.7632, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04043126684636118, |
| "grad_norm": 0.20013031966113695, |
| "learning_rate": 8.135047029758543e-05, |
| "loss": 0.6906, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05390835579514825, |
| "grad_norm": 0.1251478415546449, |
| "learning_rate": 0.00011040420968958023, |
| "loss": 0.7043, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0673854447439353, |
| "grad_norm": 0.11919120868369729, |
| "learning_rate": 0.00013945794908157503, |
| "loss": 0.6721, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08086253369272237, |
| "grad_norm": 0.11002800921737756, |
| "learning_rate": 0.00016851168847356983, |
| "loss": 0.6547, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09433962264150944, |
| "grad_norm": 0.10585232318571937, |
| "learning_rate": 0.00019756542786556462, |
| "loss": 0.6431, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1078167115902965, |
| "grad_norm": 0.09430517129254268, |
| "learning_rate": 0.00020337099396794078, |
| "loss": 0.6525, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12129380053908356, |
| "grad_norm": 0.09827237994214541, |
| "learning_rate": 0.00020334994420963612, |
| "loss": 0.6534, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1347708894878706, |
| "grad_norm": 0.093132225400241, |
| "learning_rate": 0.00020331270707393218, |
| "loss": 0.6139, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14824797843665768, |
| "grad_norm": 0.09685064409380317, |
| "learning_rate": 0.00020325929046708394, |
| "loss": 0.6531, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.16172506738544473, |
| "grad_norm": 0.09546000097274944, |
| "learning_rate": 0.0002031897057306004, |
| "loss": 0.6238, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1752021563342318, |
| "grad_norm": 0.0939477020249697, |
| "learning_rate": 0.0002031039676388368, |
| "loss": 0.6164, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.18867924528301888, |
| "grad_norm": 0.0946418762320291, |
| "learning_rate": 0.00020300209439585746, |
| "loss": 0.6319, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20215633423180593, |
| "grad_norm": 0.09873544291494986, |
| "learning_rate": 0.00020288410763157072, |
| "loss": 0.6444, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.215633423180593, |
| "grad_norm": 0.09945318963647702, |
| "learning_rate": 0.00020275003239713643, |
| "loss": 0.6289, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.22911051212938005, |
| "grad_norm": 0.09005837678182746, |
| "learning_rate": 0.00020259989715964725, |
| "loss": 0.646, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.24258760107816713, |
| "grad_norm": 0.08562686306523551, |
| "learning_rate": 0.00020243373379608405, |
| "loss": 0.6021, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2560646900269542, |
| "grad_norm": 0.09631216348489038, |
| "learning_rate": 0.00020225157758654811, |
| "loss": 0.636, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2695417789757412, |
| "grad_norm": 0.0873288065850411, |
| "learning_rate": 0.00020205346720677026, |
| "loss": 0.6126, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2830188679245283, |
| "grad_norm": 0.09563091511300963, |
| "learning_rate": 0.00020183944471989908, |
| "loss": 0.6417, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.29649595687331537, |
| "grad_norm": 0.09341897730064971, |
| "learning_rate": 0.00020160955556757014, |
| "loss": 0.6515, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.30997304582210244, |
| "grad_norm": 0.09408032623188706, |
| "learning_rate": 0.0002013638485602576, |
| "loss": 0.6334, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.32345013477088946, |
| "grad_norm": 0.09175833278712385, |
| "learning_rate": 0.0002011023758669108, |
| "loss": 0.6231, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.33692722371967654, |
| "grad_norm": 0.10127664235816392, |
| "learning_rate": 0.00020082519300387768, |
| "loss": 0.6665, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3504043126684636, |
| "grad_norm": 0.0992200486615755, |
| "learning_rate": 0.00020053235882311723, |
| "loss": 0.6404, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3638814016172507, |
| "grad_norm": 0.09666032486747288, |
| "learning_rate": 0.0002002239354997043, |
| "loss": 0.6387, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.37735849056603776, |
| "grad_norm": 0.08965912130325993, |
| "learning_rate": 0.00019989998851862806, |
| "loss": 0.6207, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3908355795148248, |
| "grad_norm": 0.09303656357445633, |
| "learning_rate": 0.00019956058666088837, |
| "loss": 0.5836, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.40431266846361186, |
| "grad_norm": 0.09290309402167178, |
| "learning_rate": 0.00019920580198889217, |
| "loss": 0.6091, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.41778975741239893, |
| "grad_norm": 0.09536333588714274, |
| "learning_rate": 0.00019883570983115278, |
| "loss": 0.6203, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.431266846361186, |
| "grad_norm": 0.08960543617684208, |
| "learning_rate": 0.00019845038876629612, |
| "loss": 0.6045, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.444743935309973, |
| "grad_norm": 0.11391645242680655, |
| "learning_rate": 0.0001980499206063769, |
| "loss": 0.6672, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4582210242587601, |
| "grad_norm": 0.10716911606612207, |
| "learning_rate": 0.000197634390379508, |
| "loss": 0.6524, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4716981132075472, |
| "grad_norm": 0.10208726337254333, |
| "learning_rate": 0.00019720388631180708, |
| "loss": 0.6491, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.48517520215633425, |
| "grad_norm": 0.1047282945224739, |
| "learning_rate": 0.00019675849980866448, |
| "loss": 0.6064, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.49865229110512127, |
| "grad_norm": 0.10429634621609762, |
| "learning_rate": 0.00019629832543533569, |
| "loss": 0.6585, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5121293800539084, |
| "grad_norm": 0.10614609188558548, |
| "learning_rate": 0.0001958234608968631, |
| "loss": 0.6219, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5256064690026954, |
| "grad_norm": 0.10790871458104308, |
| "learning_rate": 0.00019533400701733116, |
| "loss": 0.6221, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5390835579514824, |
| "grad_norm": 0.09975657956979572, |
| "learning_rate": 0.00019483006771845913, |
| "loss": 0.616, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5525606469002695, |
| "grad_norm": 0.10497666053319826, |
| "learning_rate": 0.0001943117499975364, |
| "loss": 0.6151, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5660377358490566, |
| "grad_norm": 0.10335630823488554, |
| "learning_rate": 0.00019377916390470444, |
| "loss": 0.6019, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5795148247978437, |
| "grad_norm": 0.09830734078899045, |
| "learning_rate": 0.00019323242251959095, |
| "loss": 0.6314, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5929919137466307, |
| "grad_norm": 0.10158788434700039, |
| "learning_rate": 0.0001926716419273004, |
| "loss": 0.6363, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6064690026954178, |
| "grad_norm": 0.11188559384408059, |
| "learning_rate": 0.0001920969411937668, |
| "loss": 0.6188, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6199460916442049, |
| "grad_norm": 0.10765336307796565, |
| "learning_rate": 0.0001915084423404733, |
| "loss": 0.5823, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.633423180592992, |
| "grad_norm": 0.13912098964156255, |
| "learning_rate": 0.00019090627031854437, |
| "loss": 0.6585, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6469002695417789, |
| "grad_norm": 0.10060391197082363, |
| "learning_rate": 0.0001902905529822161, |
| "loss": 0.6208, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.660377358490566, |
| "grad_norm": 0.10049822759259563, |
| "learning_rate": 0.00018966142106168973, |
| "loss": 0.5969, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6738544474393531, |
| "grad_norm": 0.10153721056570988, |
| "learning_rate": 0.00018901900813537504, |
| "loss": 0.618, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6873315363881402, |
| "grad_norm": 0.10180657751376652, |
| "learning_rate": 0.0001883634506015285, |
| "loss": 0.6125, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7008086253369272, |
| "grad_norm": 0.1169413543524632, |
| "learning_rate": 0.00018769488764929304, |
| "loss": 0.6475, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.10929936844812664, |
| "learning_rate": 0.00018701346122914532, |
| "loss": 0.6044, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7277628032345014, |
| "grad_norm": 0.10942460270891054, |
| "learning_rate": 0.00018631931602275633, |
| "loss": 0.6216, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7412398921832885, |
| "grad_norm": 0.10903788409577728, |
| "learning_rate": 0.00018561259941227238, |
| "loss": 0.64, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 0.10307110500873073, |
| "learning_rate": 0.00018489346144902268, |
| "loss": 0.6312, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7681940700808625, |
| "grad_norm": 0.11437232081812504, |
| "learning_rate": 0.0001841620548216603, |
| "loss": 0.633, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7816711590296496, |
| "grad_norm": 0.10000818634803642, |
| "learning_rate": 0.00018341853482374272, |
| "loss": 0.5924, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7951482479784366, |
| "grad_norm": 0.09769544101966106, |
| "learning_rate": 0.00018266305932076, |
| "loss": 0.6098, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8086253369272237, |
| "grad_norm": 0.1070595651142661, |
| "learning_rate": 0.0001818957887166163, |
| "loss": 0.6126, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8221024258760108, |
| "grad_norm": 0.10482485236668332, |
| "learning_rate": 0.00018111688591957256, |
| "loss": 0.6089, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8355795148247979, |
| "grad_norm": 0.10072850256867404, |
| "learning_rate": 0.00018032651630765773, |
| "loss": 0.6103, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8490566037735849, |
| "grad_norm": 0.12922630942139549, |
| "learning_rate": 0.00017952484769355524, |
| "loss": 0.6235, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.862533692722372, |
| "grad_norm": 0.11602744996730874, |
| "learning_rate": 0.00017871205028897292, |
| "loss": 0.6203, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.876010781671159, |
| "grad_norm": 0.10543663711204088, |
| "learning_rate": 0.000177888296668503, |
| "loss": 0.6166, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.889487870619946, |
| "grad_norm": 0.1044857066174908, |
| "learning_rate": 0.00017705376173298123, |
| "loss": 0.6244, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9029649595687331, |
| "grad_norm": 0.10952273362511196, |
| "learning_rate": 0.00017620862267235112, |
| "loss": 0.5959, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9164420485175202, |
| "grad_norm": 0.11050618881134476, |
| "learning_rate": 0.0001753530589280431, |
| "loss": 0.6095, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9299191374663073, |
| "grad_norm": 0.11078328129844443, |
| "learning_rate": 0.00017448725215487489, |
| "loss": 0.6091, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9433962264150944, |
| "grad_norm": 0.11188327922579694, |
| "learning_rate": 0.00017361138618248234, |
| "loss": 0.6191, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9568733153638814, |
| "grad_norm": 0.10268029783589272, |
| "learning_rate": 0.00017272564697628855, |
| "loss": 0.5958, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.9703504043126685, |
| "grad_norm": 0.11689512866574439, |
| "learning_rate": 0.00017183022259801897, |
| "loss": 0.632, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9838274932614556, |
| "grad_norm": 0.10470402952181684, |
| "learning_rate": 0.0001709253031657722, |
| "loss": 0.5921, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.9973045822102425, |
| "grad_norm": 0.10948497086974719, |
| "learning_rate": 0.0001700110808136535, |
| "loss": 0.6054, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9973045822102425, |
| "eval_loss": 0.6348516941070557, |
| "eval_runtime": 30.4297, |
| "eval_samples_per_second": 2.333, |
| "eval_steps_per_second": 0.592, |
| "step": 370 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1113, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 221573000724480.0, |
| "train_batch_size": 18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|