| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.1981819998002197, |
| "eval_steps": 500, |
| "global_step": 1500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0079912096693637, |
| "grad_norm": 3.2677111625671387, |
| "learning_rate": 4e-05, |
| "loss": 1.8334, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0159824193387274, |
| "grad_norm": 2.2300846576690674, |
| "learning_rate": 8.444444444444444e-05, |
| "loss": 1.1382, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0239736290080911, |
| "grad_norm": 0.9214928150177002, |
| "learning_rate": 0.00012888888888888892, |
| "loss": 0.9079, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0319648386774548, |
| "grad_norm": 0.7646894454956055, |
| "learning_rate": 0.00017333333333333334, |
| "loss": 0.8226, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0399560483468185, |
| "grad_norm": 0.5976511240005493, |
| "learning_rate": 0.00019999627041039135, |
| "loss": 0.7371, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0479472580161822, |
| "grad_norm": 0.5931047797203064, |
| "learning_rate": 0.00019995431572214454, |
| "loss": 0.8082, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.055938467685545896, |
| "grad_norm": 0.6012359857559204, |
| "learning_rate": 0.00019986576398242566, |
| "loss": 0.7508, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0639296773549096, |
| "grad_norm": 0.5190461277961731, |
| "learning_rate": 0.00019973065647259348, |
| "loss": 0.7647, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0719208870242733, |
| "grad_norm": 0.5790061354637146, |
| "learning_rate": 0.00019954905617753814, |
| "loss": 0.7418, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.079912096693637, |
| "grad_norm": 0.428479939699173, |
| "learning_rate": 0.00019932104775631846, |
| "loss": 0.6965, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0879033063630007, |
| "grad_norm": 0.539878249168396, |
| "learning_rate": 0.00019904673750269537, |
| "loss": 0.7899, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0958945160323644, |
| "grad_norm": 0.5310686230659485, |
| "learning_rate": 0.00019872625329557953, |
| "loss": 0.6959, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1038857257017281, |
| "grad_norm": 0.4262120723724365, |
| "learning_rate": 0.0001983597445394162, |
| "loss": 0.7349, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.11187693537109179, |
| "grad_norm": 0.4776265621185303, |
| "learning_rate": 0.00019794738209453545, |
| "loss": 0.7591, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1198681450404555, |
| "grad_norm": 0.3852095305919647, |
| "learning_rate": 0.00019748935819749987, |
| "loss": 0.6843, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1278593547098192, |
| "grad_norm": 0.534788191318512, |
| "learning_rate": 0.00019698588637148703, |
| "loss": 0.7827, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1358505643791829, |
| "grad_norm": 0.35920798778533936, |
| "learning_rate": 0.00019643720132674856, |
| "loss": 0.7002, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1438417740485466, |
| "grad_norm": 0.403860479593277, |
| "learning_rate": 0.00019584355885119196, |
| "loss": 0.7003, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1518329837179103, |
| "grad_norm": 0.5393890738487244, |
| "learning_rate": 0.00019520523569113677, |
| "loss": 0.6816, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.159824193387274, |
| "grad_norm": 0.3763524293899536, |
| "learning_rate": 0.0001945225294222997, |
| "loss": 0.6774, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1678154030566377, |
| "grad_norm": 0.36979958415031433, |
| "learning_rate": 0.00019379575831106994, |
| "loss": 0.6983, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1758066127260014, |
| "grad_norm": 0.384091854095459, |
| "learning_rate": 0.00019302526116613864, |
| "loss": 0.7057, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1837978223953651, |
| "grad_norm": 0.45156919956207275, |
| "learning_rate": 0.0001922113971805517, |
| "loss": 0.7439, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1917890320647288, |
| "grad_norm": 0.4209638833999634, |
| "learning_rate": 0.0001913545457642601, |
| "loss": 0.7141, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1997802417340925, |
| "grad_norm": 0.5019676685333252, |
| "learning_rate": 0.0001904551063672452, |
| "loss": 0.7205, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2077714514034562, |
| "grad_norm": 0.43800297379493713, |
| "learning_rate": 0.00018951349829330168, |
| "loss": 0.7181, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2157626610728199, |
| "grad_norm": 0.40507107973098755, |
| "learning_rate": 0.0001885301605045651, |
| "loss": 0.7303, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.22375387074218359, |
| "grad_norm": 0.3452669084072113, |
| "learning_rate": 0.000187505551416875, |
| "loss": 0.647, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2317450804115473, |
| "grad_norm": 0.3924757242202759, |
| "learning_rate": 0.00018644014868606895, |
| "loss": 0.6721, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.239736290080911, |
| "grad_norm": 0.5480809807777405, |
| "learning_rate": 0.0001853344489853074, |
| "loss": 0.7755, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2477274997502747, |
| "grad_norm": 0.456853985786438, |
| "learning_rate": 0.0001841889677735327, |
| "loss": 0.7203, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2557187094196384, |
| "grad_norm": 0.40140455961227417, |
| "learning_rate": 0.0001830042390551708, |
| "loss": 0.643, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2637099190890021, |
| "grad_norm": 0.47574156522750854, |
| "learning_rate": 0.00018178081513118706, |
| "loss": 0.7128, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2717011287583658, |
| "grad_norm": 0.43806251883506775, |
| "learning_rate": 0.00018051926634161282, |
| "loss": 0.6922, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2796923384277295, |
| "grad_norm": 0.4530179500579834, |
| "learning_rate": 0.0001792201807996622, |
| "loss": 0.7907, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2876835480970932, |
| "grad_norm": 0.40421706438064575, |
| "learning_rate": 0.00017788416411756338, |
| "loss": 0.7358, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2956747577664569, |
| "grad_norm": 0.41535094380378723, |
| "learning_rate": 0.00017651183912423228, |
| "loss": 0.7031, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3036659674358206, |
| "grad_norm": 0.3584170341491699, |
| "learning_rate": 0.00017510384557492, |
| "loss": 0.7208, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3116571771051843, |
| "grad_norm": 0.42786943912506104, |
| "learning_rate": 0.00017366083985296947, |
| "loss": 0.7615, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.319648386774548, |
| "grad_norm": 0.4445035457611084, |
| "learning_rate": 0.00017218349466382023, |
| "loss": 0.7002, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3276395964439117, |
| "grad_norm": 0.5019694566726685, |
| "learning_rate": 0.0001706724987214045, |
| "loss": 0.7204, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3356308061132754, |
| "grad_norm": 0.3719067871570587, |
| "learning_rate": 0.00016912855642708, |
| "loss": 0.6981, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3436220157826391, |
| "grad_norm": 0.47156500816345215, |
| "learning_rate": 0.00016755238754124965, |
| "loss": 0.6733, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3516132254520028, |
| "grad_norm": 0.4605729579925537, |
| "learning_rate": 0.0001659447268478212, |
| "loss": 0.74, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3596044351213665, |
| "grad_norm": 0.4625272750854492, |
| "learning_rate": 0.00016430632381166305, |
| "loss": 0.7508, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3675956447907302, |
| "grad_norm": 0.4062426686286926, |
| "learning_rate": 0.0001626379422292162, |
| "loss": 0.7178, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3755868544600939, |
| "grad_norm": 0.42503005266189575, |
| "learning_rate": 0.00016094035987242484, |
| "loss": 0.6757, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3835780641294576, |
| "grad_norm": 0.4469659924507141, |
| "learning_rate": 0.00015921436812615204, |
| "loss": 0.723, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3915692737988213, |
| "grad_norm": 0.3277670443058014, |
| "learning_rate": 0.00015746077161924905, |
| "loss": 0.7035, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.399560483468185, |
| "grad_norm": 0.4804005026817322, |
| "learning_rate": 0.00015568038784945077, |
| "loss": 0.7347, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4075516931375487, |
| "grad_norm": 0.47554656863212585, |
| "learning_rate": 0.00015387404680227175, |
| "loss": 0.7332, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.4155429028069124, |
| "grad_norm": 0.47048240900039673, |
| "learning_rate": 0.00015204259056408046, |
| "loss": 0.7516, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.4235341124762761, |
| "grad_norm": 0.4335585832595825, |
| "learning_rate": 0.00015018687292953293, |
| "loss": 0.6726, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.4315253221456398, |
| "grad_norm": 0.30512747168540955, |
| "learning_rate": 0.00014830775900354735, |
| "loss": 0.6954, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.4395165318150035, |
| "grad_norm": 0.3644169867038727, |
| "learning_rate": 0.00014640612479800686, |
| "loss": 0.6699, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.44750774148436717, |
| "grad_norm": 0.7686610221862793, |
| "learning_rate": 0.00014448285682337682, |
| "loss": 0.6825, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4554989511537309, |
| "grad_norm": 0.42735007405281067, |
| "learning_rate": 0.00014253885167542866, |
| "loss": 0.7192, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4634901608230946, |
| "grad_norm": 0.3812963664531708, |
| "learning_rate": 0.00014057501561726157, |
| "loss": 0.708, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4714813704924583, |
| "grad_norm": 0.3944273591041565, |
| "learning_rate": 0.0001385922641568175, |
| "loss": 0.7389, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.479472580161822, |
| "grad_norm": 0.44416099786758423, |
| "learning_rate": 0.00013659152162008676, |
| "loss": 0.7025, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4874637898311857, |
| "grad_norm": 0.43281784653663635, |
| "learning_rate": 0.0001345737207202023, |
| "loss": 0.7012, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4954549995005494, |
| "grad_norm": 0.44126081466674805, |
| "learning_rate": 0.0001325398021226242, |
| "loss": 0.6811, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5034462091699131, |
| "grad_norm": 0.39465758204460144, |
| "learning_rate": 0.00013049071400661716, |
| "loss": 0.7229, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5114374188392768, |
| "grad_norm": 0.4265965223312378, |
| "learning_rate": 0.00012842741162322487, |
| "loss": 0.66, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5194286285086405, |
| "grad_norm": 0.3862599730491638, |
| "learning_rate": 0.00012635085684994767, |
| "loss": 0.7013, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5274198381780042, |
| "grad_norm": 0.5364603400230408, |
| "learning_rate": 0.00012426201774233135, |
| "loss": 0.7172, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5354110478473679, |
| "grad_norm": 0.5026273727416992, |
| "learning_rate": 0.00012216186808267546, |
| "loss": 0.7058, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5434022575167315, |
| "grad_norm": 0.39891964197158813, |
| "learning_rate": 0.0001200513869260721, |
| "loss": 0.7015, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5513934671860953, |
| "grad_norm": 0.45894622802734375, |
| "learning_rate": 0.00011793155814398674, |
| "loss": 0.753, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.559384676855459, |
| "grad_norm": 0.34293729066848755, |
| "learning_rate": 0.00011580336996559343, |
| "loss": 0.6815, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "grad_norm": 0.4446139931678772, |
| "learning_rate": 0.00011366781451707879, |
| "loss": 0.6742, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5753670961941864, |
| "grad_norm": 0.48648640513420105, |
| "learning_rate": 0.0001115258873591291, |
| "loss": 0.6994, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5833583058635501, |
| "grad_norm": 0.41053032875061035, |
| "learning_rate": 0.00010937858702281631, |
| "loss": 0.7079, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5913495155329138, |
| "grad_norm": 0.4511827230453491, |
| "learning_rate": 0.00010722691454409943, |
| "loss": 0.708, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5993407252022775, |
| "grad_norm": 0.37551945447921753, |
| "learning_rate": 0.00010507187299715815, |
| "loss": 0.7, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6073319348716412, |
| "grad_norm": 0.38525089621543884, |
| "learning_rate": 0.00010291446702677599, |
| "loss": 0.6866, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.6153231445410049, |
| "grad_norm": 0.3816082179546356, |
| "learning_rate": 0.0001007557023799917, |
| "loss": 0.7071, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6233143542103686, |
| "grad_norm": 0.48344865441322327, |
| "learning_rate": 9.859658543723659e-05, |
| "loss": 0.7181, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6313055638797322, |
| "grad_norm": 0.4207400977611542, |
| "learning_rate": 9.643812274317644e-05, |
| "loss": 0.7565, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.639296773549096, |
| "grad_norm": 0.5104153752326965, |
| "learning_rate": 9.428132053747712e-05, |
| "loss": 0.7211, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6472879832184597, |
| "grad_norm": 0.40380504727363586, |
| "learning_rate": 9.212718428571231e-05, |
| "loss": 0.6808, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6552791928878234, |
| "grad_norm": 0.53224778175354, |
| "learning_rate": 8.997671821063191e-05, |
| "loss": 0.6786, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.663270402557187, |
| "grad_norm": 0.42555850744247437, |
| "learning_rate": 8.783092482401005e-05, |
| "loss": 0.7767, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6712616122265508, |
| "grad_norm": 0.4147053360939026, |
| "learning_rate": 8.569080445929073e-05, |
| "loss": 0.6728, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6792528218959145, |
| "grad_norm": 0.3875350058078766, |
| "learning_rate": 8.355735480524874e-05, |
| "loss": 0.651, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6872440315652782, |
| "grad_norm": 0.36827635765075684, |
| "learning_rate": 8.143157044088377e-05, |
| "loss": 0.6989, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6952352412346419, |
| "grad_norm": 0.39672666788101196, |
| "learning_rate": 7.931444237176398e-05, |
| "loss": 0.6997, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7032264509040056, |
| "grad_norm": 0.4494044780731201, |
| "learning_rate": 7.72069575680357e-05, |
| "loss": 0.6977, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.7112176605733693, |
| "grad_norm": 0.4261849522590637, |
| "learning_rate": 7.5110098504314e-05, |
| "loss": 0.7528, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.719208870242733, |
| "grad_norm": 0.3963007926940918, |
| "learning_rate": 7.30248427016697e-05, |
| "loss": 0.7152, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7272000799120967, |
| "grad_norm": 0.346351683139801, |
| "learning_rate": 7.095216227192467e-05, |
| "loss": 0.6679, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7351912895814604, |
| "grad_norm": 0.32863008975982666, |
| "learning_rate": 6.889302346446969e-05, |
| "loss": 0.6647, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7431824992508241, |
| "grad_norm": 0.3735399544239044, |
| "learning_rate": 6.684838621581478e-05, |
| "loss": 0.6917, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7511737089201878, |
| "grad_norm": 0.5078648924827576, |
| "learning_rate": 6.481920370208274e-05, |
| "loss": 0.7392, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7591649185895515, |
| "grad_norm": 0.4455859065055847, |
| "learning_rate": 6.28064218946542e-05, |
| "loss": 0.6857, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7671561282589152, |
| "grad_norm": 0.41593629121780396, |
| "learning_rate": 6.0810979119171254e-05, |
| "loss": 0.676, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.775147337928279, |
| "grad_norm": 0.3919152319431305, |
| "learning_rate": 5.883380561810563e-05, |
| "loss": 0.707, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7831385475976426, |
| "grad_norm": 0.34168556332588196, |
| "learning_rate": 5.6875823117095025e-05, |
| "loss": 0.6813, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.7911297572670063, |
| "grad_norm": 0.3636936545372009, |
| "learning_rate": 5.493794439524979e-05, |
| "loss": 0.6822, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.79912096693637, |
| "grad_norm": 0.38939976692199707, |
| "learning_rate": 5.302107285963045e-05, |
| "loss": 0.7016, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8071121766057336, |
| "grad_norm": 0.4251338243484497, |
| "learning_rate": 5.1126102124094064e-05, |
| "loss": 0.662, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.8151033862750974, |
| "grad_norm": 0.4065021276473999, |
| "learning_rate": 4.9253915592706515e-05, |
| "loss": 0.6864, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.8230945959444611, |
| "grad_norm": 0.38323187828063965, |
| "learning_rate": 4.74053860479137e-05, |
| "loss": 0.6989, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.8310858056138248, |
| "grad_norm": 0.36701148748397827, |
| "learning_rate": 4.558137524366533e-05, |
| "loss": 0.7326, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.8390770152831885, |
| "grad_norm": 0.3849141299724579, |
| "learning_rate": 4.3782733503678886e-05, |
| "loss": 0.7265, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.8470682249525522, |
| "grad_norm": 0.42157188057899475, |
| "learning_rate": 4.2010299325033034e-05, |
| "loss": 0.6975, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8550594346219159, |
| "grad_norm": 0.42658373713493347, |
| "learning_rate": 4.026489898727419e-05, |
| "loss": 0.6891, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.8630506442912796, |
| "grad_norm": 0.605895459651947, |
| "learning_rate": 3.854734616721852e-05, |
| "loss": 0.7375, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.8710418539606433, |
| "grad_norm": 0.40604451298713684, |
| "learning_rate": 3.6858441559629306e-05, |
| "loss": 0.7395, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.879033063630007, |
| "grad_norm": 0.3668944537639618, |
| "learning_rate": 3.519897250394612e-05, |
| "loss": 0.6727, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8870242732993707, |
| "grad_norm": 0.42747315764427185, |
| "learning_rate": 3.3569712617240435e-05, |
| "loss": 0.6856, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8950154829687343, |
| "grad_norm": 0.4526374936103821, |
| "learning_rate": 3.197142143356787e-05, |
| "loss": 0.6866, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.9030066926380981, |
| "grad_norm": 0.3767329454421997, |
| "learning_rate": 3.040484404988614e-05, |
| "loss": 0.667, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.9109979023074618, |
| "grad_norm": 0.4605715572834015, |
| "learning_rate": 2.8870710778703103e-05, |
| "loss": 0.7107, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.9189891119768255, |
| "grad_norm": 0.5346247553825378, |
| "learning_rate": 2.736973680761702e-05, |
| "loss": 0.7104, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.9269803216461892, |
| "grad_norm": 0.4917076528072357, |
| "learning_rate": 2.590262186590805e-05, |
| "loss": 0.7009, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.9349715313155529, |
| "grad_norm": 0.4159565269947052, |
| "learning_rate": 2.447004989833599e-05, |
| "loss": 0.6347, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.9429627409849166, |
| "grad_norm": 0.3852473795413971, |
| "learning_rate": 2.307268874629649e-05, |
| "loss": 0.7313, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.9509539506542803, |
| "grad_norm": 0.4686223268508911, |
| "learning_rate": 2.1711189836484314e-05, |
| "loss": 0.6376, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.958945160323644, |
| "grad_norm": 0.4293384552001953, |
| "learning_rate": 2.038618787720925e-05, |
| "loss": 0.686, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9669363699930077, |
| "grad_norm": 0.49944257736206055, |
| "learning_rate": 1.9098300562505266e-05, |
| "loss": 0.7029, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.9749275796623714, |
| "grad_norm": 0.3824506998062134, |
| "learning_rate": 1.784812828417197e-05, |
| "loss": 0.7253, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.982918789331735, |
| "grad_norm": 0.3724282383918762, |
| "learning_rate": 1.663625385188182e-05, |
| "loss": 0.7033, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.9909099990010988, |
| "grad_norm": 0.47136667370796204, |
| "learning_rate": 1.5463242221483743e-05, |
| "loss": 0.66, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.9989012086704625, |
| "grad_norm": 0.42494097352027893, |
| "learning_rate": 1.432964023163028e-05, |
| "loss": 0.6818, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.006392967735491, |
| "grad_norm": 0.48029494285583496, |
| "learning_rate": 1.3235976348850165e-05, |
| "loss": 0.656, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.0143841774048548, |
| "grad_norm": 0.3681392967700958, |
| "learning_rate": 1.218276042118629e-05, |
| "loss": 0.6828, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.0223753870742183, |
| "grad_norm": 0.3528028726577759, |
| "learning_rate": 1.1170483440512614e-05, |
| "loss": 0.6531, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.030366596743582, |
| "grad_norm": 0.3827133774757385, |
| "learning_rate": 1.0199617313642063e-05, |
| "loss": 0.6469, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.0383578064129457, |
| "grad_norm": 0.4323211908340454, |
| "learning_rate": 9.270614642331376e-06, |
| "loss": 0.6718, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.0463490160823095, |
| "grad_norm": 0.4139029085636139, |
| "learning_rate": 8.383908512285555e-06, |
| "loss": 0.6629, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.0543402257516732, |
| "grad_norm": 0.4098852872848511, |
| "learning_rate": 7.5399122912605095e-06, |
| "loss": 0.7382, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.062331435421037, |
| "grad_norm": 0.3670465648174286, |
| "learning_rate": 6.739019436357774e-06, |
| "loss": 0.6502, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.0703226450904006, |
| "grad_norm": 0.456601619720459, |
| "learning_rate": 5.981603310601414e-06, |
| "loss": 0.6587, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.0783138547597642, |
| "grad_norm": 0.36275264620780945, |
| "learning_rate": 5.2680170088822425e-06, |
| "loss": 0.674, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.0863050644291279, |
| "grad_norm": 0.5391157865524292, |
| "learning_rate": 4.5985931933508754e-06, |
| "loss": 0.6754, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.0942962740984916, |
| "grad_norm": 0.3605053424835205, |
| "learning_rate": 3.973643938336113e-06, |
| "loss": 0.6801, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.1022874837678553, |
| "grad_norm": 0.4948176443576813, |
| "learning_rate": 3.393460584861008e-06, |
| "loss": 0.7102, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.110278693437219, |
| "grad_norm": 0.4405811131000519, |
| "learning_rate": 2.8583136048245697e-06, |
| "loss": 0.6844, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.1182699031065828, |
| "grad_norm": 0.42184901237487793, |
| "learning_rate": 2.368452474912153e-06, |
| "loss": 0.6668, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.1262611127759465, |
| "grad_norm": 0.5292870998382568, |
| "learning_rate": 1.9241055602935877e-06, |
| "loss": 0.6738, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.13425232244531, |
| "grad_norm": 0.393926203250885, |
| "learning_rate": 1.5254800081630826e-06, |
| "loss": 0.684, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.1422435321146738, |
| "grad_norm": 0.2781499922275543, |
| "learning_rate": 1.1727616511706508e-06, |
| "loss": 0.7076, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.1502347417840375, |
| "grad_norm": 0.43156924843788147, |
| "learning_rate": 8.661149207899844e-07, |
| "loss": 0.6329, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.1582259514534012, |
| "grad_norm": 0.40006959438323975, |
| "learning_rate": 6.056827706632185e-07, |
| "loss": 0.6547, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.166217161122765, |
| "grad_norm": 0.40933167934417725, |
| "learning_rate": 3.9158660995830545e-07, |
| "loss": 0.7007, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.1742083707921287, |
| "grad_norm": 0.4096595346927643, |
| "learning_rate": 2.2392624677004536e-07, |
| "loss": 0.6493, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.1821995804614924, |
| "grad_norm": 0.3870149850845337, |
| "learning_rate": 1.0277984159122733e-07, |
| "loss": 0.6752, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.1901907901308562, |
| "grad_norm": 0.41296494007110596, |
| "learning_rate": 2.820387087548726e-08, |
| "loss": 0.7173, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.1981819998002197, |
| "grad_norm": 0.38857489824295044, |
| "learning_rate": 2.331007089351189e-10, |
| "loss": 0.7149, |
| "step": 1500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.6210611577054822e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|