{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997473684210526, "eval_steps": 500, "global_step": 1484, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006736842105263158, "grad_norm": 3.6967623233795166, "learning_rate": 1.3422818791946309e-06, "loss": 2.4093, "step": 1 }, { "epoch": 0.006736842105263158, "grad_norm": 2.5490193367004395, "learning_rate": 1.3422818791946309e-05, "loss": 2.4939, "step": 10 }, { "epoch": 0.013473684210526317, "grad_norm": 0.18483224511146545, "learning_rate": 2.6845637583892618e-05, "loss": 1.1877, "step": 20 }, { "epoch": 0.020210526315789474, "grad_norm": 0.2031693309545517, "learning_rate": 4.026845637583892e-05, "loss": 0.8909, "step": 30 }, { "epoch": 0.026947368421052633, "grad_norm": 0.6876732707023621, "learning_rate": 5.3691275167785237e-05, "loss": 0.7581, "step": 40 }, { "epoch": 0.03368421052631579, "grad_norm": 0.09247241914272308, "learning_rate": 6.711409395973155e-05, "loss": 0.7594, "step": 50 }, { "epoch": 0.04042105263157895, "grad_norm": 0.1324968934059143, "learning_rate": 8.053691275167784e-05, "loss": 0.7405, "step": 60 }, { "epoch": 0.04715789473684211, "grad_norm": 0.05673883110284805, "learning_rate": 9.395973154362417e-05, "loss": 0.7065, "step": 70 }, { "epoch": 0.053894736842105266, "grad_norm": 0.04617280140519142, "learning_rate": 0.00010738255033557047, "loss": 0.6817, "step": 80 }, { "epoch": 0.06063157894736842, "grad_norm": 0.04381496459245682, "learning_rate": 0.0001208053691275168, "loss": 0.6789, "step": 90 }, { "epoch": 0.06736842105263158, "grad_norm": 0.07428538799285889, "learning_rate": 0.0001342281879194631, "loss": 0.6816, "step": 100 }, { "epoch": 0.07410526315789474, "grad_norm": 0.04249708354473114, "learning_rate": 0.00014765100671140942, "loss": 0.6997, "step": 110 }, { "epoch": 0.0808421052631579, "grad_norm": 0.05957937240600586, "learning_rate": 0.0001610738255033557, "loss": 0.6807, "step": 120 }, { "epoch": 0.08757894736842105, "grad_norm": 0.03975442424416542, "learning_rate": 0.000174496644295302, "loss": 0.6733, "step": 130 }, { "epoch": 0.09431578947368421, "grad_norm": 0.04079463332891464, "learning_rate": 0.00018791946308724833, "loss": 0.6556, "step": 140 }, { "epoch": 0.10105263157894737, "grad_norm": 0.04245497286319733, "learning_rate": 0.00019985018726591762, "loss": 0.6575, "step": 150 }, { "epoch": 0.10778947368421053, "grad_norm": 0.09695123136043549, "learning_rate": 0.00019835205992509364, "loss": 0.6916, "step": 160 }, { "epoch": 0.11452631578947368, "grad_norm": 0.03505201265215874, "learning_rate": 0.00019685393258426966, "loss": 0.6622, "step": 170 }, { "epoch": 0.12126315789473684, "grad_norm": 0.02820334956049919, "learning_rate": 0.0001953558052434457, "loss": 0.6497, "step": 180 }, { "epoch": 0.128, "grad_norm": 0.04135354235768318, "learning_rate": 0.00019385767790262173, "loss": 0.6671, "step": 190 }, { "epoch": 0.13473684210526315, "grad_norm": 0.031461067497730255, "learning_rate": 0.00019235955056179775, "loss": 0.657, "step": 200 }, { "epoch": 0.1414736842105263, "grad_norm": 0.04208710789680481, "learning_rate": 0.0001908614232209738, "loss": 0.6766, "step": 210 }, { "epoch": 0.1482105263157895, "grad_norm": 3.495147705078125, "learning_rate": 0.00018936329588014982, "loss": 3.9378, "step": 220 }, { "epoch": 0.15494736842105264, "grad_norm": 0.18893112242221832, "learning_rate": 0.00018786516853932586, "loss": 7.1374, "step": 230 }, { "epoch": 0.1616842105263158, "grad_norm": 0.0959916040301323, "learning_rate": 0.00018636704119850189, "loss": 5.8104, "step": 240 }, { "epoch": 0.16842105263157894, "grad_norm": 0.08286964148283005, "learning_rate": 0.0001848689138576779, "loss": 4.7292, "step": 250 }, { "epoch": 0.1751578947368421, "grad_norm": 0.04510454833507538, "learning_rate": 0.00018337078651685393, "loss": 4.9858, "step": 260 }, { "epoch": 0.18189473684210528, "grad_norm": 0.2256896197795868, "learning_rate": 0.00018187265917602997, "loss": 4.7463, "step": 270 }, { "epoch": 0.18863157894736843, "grad_norm": 0.06342379748821259, "learning_rate": 0.00018037453183520602, "loss": 4.517, "step": 280 }, { "epoch": 0.19536842105263158, "grad_norm": 0.07497289776802063, "learning_rate": 0.00017887640449438204, "loss": 4.4052, "step": 290 }, { "epoch": 0.20210526315789473, "grad_norm": 0.08952877670526505, "learning_rate": 0.00017737827715355806, "loss": 3.9614, "step": 300 }, { "epoch": 0.20884210526315788, "grad_norm": 0.044066932052373886, "learning_rate": 0.00017588014981273408, "loss": 4.5861, "step": 310 }, { "epoch": 0.21557894736842106, "grad_norm": 0.08251778781414032, "learning_rate": 0.0001743820224719101, "loss": 4.5163, "step": 320 }, { "epoch": 0.22231578947368422, "grad_norm": 0.04723803699016571, "learning_rate": 0.00017288389513108615, "loss": 4.1904, "step": 330 }, { "epoch": 0.22905263157894737, "grad_norm": 0.09082615375518799, "learning_rate": 0.0001713857677902622, "loss": 4.1982, "step": 340 }, { "epoch": 0.23578947368421052, "grad_norm": 0.04866361245512962, "learning_rate": 0.00016988764044943822, "loss": 3.8506, "step": 350 }, { "epoch": 0.24252631578947367, "grad_norm": 0.04515402019023895, "learning_rate": 0.00016838951310861424, "loss": 4.4254, "step": 360 }, { "epoch": 0.24926315789473685, "grad_norm": 0.14205284416675568, "learning_rate": 0.00016689138576779026, "loss": 4.4111, "step": 370 }, { "epoch": 0.256, "grad_norm": 0.16082021594047546, "learning_rate": 0.0001653932584269663, "loss": 4.1119, "step": 380 }, { "epoch": 0.26273684210526316, "grad_norm": 0.061411116272211075, "learning_rate": 0.00016389513108614235, "loss": 4.059, "step": 390 }, { "epoch": 0.2694736842105263, "grad_norm": 0.058379318565130234, "learning_rate": 0.00016239700374531837, "loss": 3.7307, "step": 400 }, { "epoch": 0.27621052631578946, "grad_norm": 0.048859789967536926, "learning_rate": 0.0001608988764044944, "loss": 4.3039, "step": 410 }, { "epoch": 0.2829473684210526, "grad_norm": 0.06003361940383911, "learning_rate": 0.0001594007490636704, "loss": 4.2032, "step": 420 }, { "epoch": 0.28968421052631577, "grad_norm": 0.10120591521263123, "learning_rate": 0.00015790262172284646, "loss": 3.9567, "step": 430 }, { "epoch": 0.296421052631579, "grad_norm": 0.21033401787281036, "learning_rate": 0.00015640449438202248, "loss": 3.9369, "step": 440 }, { "epoch": 0.3031578947368421, "grad_norm": 0.06378967314958572, "learning_rate": 0.00015490636704119852, "loss": 3.6318, "step": 450 }, { "epoch": 0.3098947368421053, "grad_norm": 0.042198359966278076, "learning_rate": 0.00015340823970037455, "loss": 4.1789, "step": 460 }, { "epoch": 0.31663157894736843, "grad_norm": 0.053648848086595535, "learning_rate": 0.00015191011235955057, "loss": 4.1562, "step": 470 }, { "epoch": 0.3233684210526316, "grad_norm": 0.0808805301785469, "learning_rate": 0.00015041198501872659, "loss": 3.8883, "step": 480 }, { "epoch": 0.33010526315789473, "grad_norm": 0.13895294070243835, "learning_rate": 0.00014891385767790263, "loss": 3.9055, "step": 490 }, { "epoch": 0.3368421052631579, "grad_norm": 0.11999215185642242, "learning_rate": 0.00014741573033707865, "loss": 3.6025, "step": 500 }, { "epoch": 0.34357894736842104, "grad_norm": 0.0969998687505722, "learning_rate": 0.0001459176029962547, "loss": 4.2401, "step": 510 }, { "epoch": 0.3503157894736842, "grad_norm": 0.2578948438167572, "learning_rate": 0.00014441947565543072, "loss": 4.1355, "step": 520 }, { "epoch": 0.35705263157894734, "grad_norm": 0.067634217441082, "learning_rate": 0.00014292134831460674, "loss": 3.8735, "step": 530 }, { "epoch": 0.36378947368421055, "grad_norm": 0.1961352676153183, "learning_rate": 0.0001414232209737828, "loss": 3.7641, "step": 540 }, { "epoch": 0.3705263157894737, "grad_norm": 0.07940343767404556, "learning_rate": 0.0001399250936329588, "loss": 3.5177, "step": 550 }, { "epoch": 0.37726315789473686, "grad_norm": 1.3029491901397705, "learning_rate": 0.00013842696629213483, "loss": 4.1854, "step": 560 }, { "epoch": 0.384, "grad_norm": 0.10544762760400772, "learning_rate": 0.00013692883895131088, "loss": 4.3064, "step": 570 }, { "epoch": 0.39073684210526316, "grad_norm": 0.150394469499588, "learning_rate": 0.0001354307116104869, "loss": 3.9517, "step": 580 }, { "epoch": 0.3974736842105263, "grad_norm": 0.06921563297510147, "learning_rate": 0.00013393258426966294, "loss": 3.8917, "step": 590 }, { "epoch": 0.40421052631578946, "grad_norm": 0.06402010470628738, "learning_rate": 0.00013243445692883896, "loss": 3.5635, "step": 600 }, { "epoch": 0.4109473684210526, "grad_norm": 0.08918313682079315, "learning_rate": 0.00013093632958801498, "loss": 4.1197, "step": 610 }, { "epoch": 0.41768421052631577, "grad_norm": 0.054397523403167725, "learning_rate": 0.000129438202247191, "loss": 4.0442, "step": 620 }, { "epoch": 0.4244210526315789, "grad_norm": 0.068702831864357, "learning_rate": 0.00012794007490636705, "loss": 3.7506, "step": 630 }, { "epoch": 0.43115789473684213, "grad_norm": 0.14575353264808655, "learning_rate": 0.0001264419475655431, "loss": 3.7359, "step": 640 }, { "epoch": 0.4378947368421053, "grad_norm": 0.1481335461139679, "learning_rate": 0.00012494382022471912, "loss": 3.3705, "step": 650 }, { "epoch": 0.44463157894736843, "grad_norm": 0.06438197940587997, "learning_rate": 0.00012344569288389514, "loss": 4.0248, "step": 660 }, { "epoch": 0.4513684210526316, "grad_norm": 0.38855019211769104, "learning_rate": 0.00012194756554307116, "loss": 4.0265, "step": 670 }, { "epoch": 0.45810526315789474, "grad_norm": 0.20793034136295319, "learning_rate": 0.00012044943820224719, "loss": 3.7305, "step": 680 }, { "epoch": 0.4648421052631579, "grad_norm": 0.11011853814125061, "learning_rate": 0.00011895131086142324, "loss": 3.6933, "step": 690 }, { "epoch": 0.47157894736842104, "grad_norm": 0.06795340031385422, "learning_rate": 0.00011745318352059926, "loss": 3.3734, "step": 700 }, { "epoch": 0.4783157894736842, "grad_norm": 0.07788679003715515, "learning_rate": 0.00011595505617977529, "loss": 3.9053, "step": 710 }, { "epoch": 0.48505263157894735, "grad_norm": 0.07339611649513245, "learning_rate": 0.00011445692883895131, "loss": 3.8685, "step": 720 }, { "epoch": 0.4917894736842105, "grad_norm": 0.16048288345336914, "learning_rate": 0.00011295880149812735, "loss": 3.5673, "step": 730 }, { "epoch": 0.4985263157894737, "grad_norm": 0.2596355974674225, "learning_rate": 0.00011146067415730337, "loss": 3.5684, "step": 740 }, { "epoch": 0.5052631578947369, "grad_norm": 0.10115884989500046, "learning_rate": 0.00010996254681647941, "loss": 3.2226, "step": 750 }, { "epoch": 0.512, "grad_norm": 0.13997367024421692, "learning_rate": 0.00010846441947565545, "loss": 3.8579, "step": 760 }, { "epoch": 0.5187368421052632, "grad_norm": 0.08359155058860779, "learning_rate": 0.00010696629213483147, "loss": 3.8313, "step": 770 }, { "epoch": 0.5254736842105263, "grad_norm": 0.2407791018486023, "learning_rate": 0.0001054681647940075, "loss": 3.5257, "step": 780 }, { "epoch": 0.5322105263157895, "grad_norm": 0.34615418314933777, "learning_rate": 0.00010397003745318352, "loss": 3.5113, "step": 790 }, { "epoch": 0.5389473684210526, "grad_norm": 0.06987264007329941, "learning_rate": 0.00010247191011235954, "loss": 3.1525, "step": 800 }, { "epoch": 0.5456842105263158, "grad_norm": 0.07933894544839859, "learning_rate": 0.00010097378277153558, "loss": 3.718, "step": 810 }, { "epoch": 0.5524210526315789, "grad_norm": 0.12424171715974808, "learning_rate": 9.947565543071161e-05, "loss": 3.6641, "step": 820 }, { "epoch": 0.5591578947368421, "grad_norm": 0.2515564262866974, "learning_rate": 9.797752808988764e-05, "loss": 3.4268, "step": 830 }, { "epoch": 0.5658947368421052, "grad_norm": 0.30851560831069946, "learning_rate": 9.647940074906368e-05, "loss": 3.3856, "step": 840 }, { "epoch": 0.5726315789473684, "grad_norm": 0.05149822682142258, "learning_rate": 9.49812734082397e-05, "loss": 3.1259, "step": 850 }, { "epoch": 0.5793684210526315, "grad_norm": 0.17960771918296814, "learning_rate": 9.348314606741574e-05, "loss": 3.6767, "step": 860 }, { "epoch": 0.5861052631578947, "grad_norm": 0.17523854970932007, "learning_rate": 9.198501872659176e-05, "loss": 3.5995, "step": 870 }, { "epoch": 0.592842105263158, "grad_norm": 0.3186163008213043, "learning_rate": 9.04868913857678e-05, "loss": 3.3966, "step": 880 }, { "epoch": 0.5995789473684211, "grad_norm": 0.21263690292835236, "learning_rate": 8.898876404494383e-05, "loss": 3.3526, "step": 890 }, { "epoch": 0.6063157894736843, "grad_norm": 0.10399254411458969, "learning_rate": 8.749063670411985e-05, "loss": 3.0519, "step": 900 }, { "epoch": 0.6130526315789474, "grad_norm": 0.13143524527549744, "learning_rate": 8.599250936329589e-05, "loss": 3.629, "step": 910 }, { "epoch": 0.6197894736842106, "grad_norm": 0.15374666452407837, "learning_rate": 8.449438202247192e-05, "loss": 3.6895, "step": 920 }, { "epoch": 0.6265263157894737, "grad_norm": 0.23757484555244446, "learning_rate": 8.299625468164794e-05, "loss": 3.3622, "step": 930 }, { "epoch": 0.6332631578947369, "grad_norm": 0.1661984622478485, "learning_rate": 8.149812734082397e-05, "loss": 3.3248, "step": 940 }, { "epoch": 0.64, "grad_norm": 0.08603614568710327, "learning_rate": 8e-05, "loss": 3.0086, "step": 950 }, { "epoch": 0.6467368421052632, "grad_norm": 0.07694745808839798, "learning_rate": 7.850187265917604e-05, "loss": 3.5162, "step": 960 }, { "epoch": 0.6534736842105263, "grad_norm": 0.16395558416843414, "learning_rate": 7.700374531835206e-05, "loss": 3.4812, "step": 970 }, { "epoch": 0.6602105263157895, "grad_norm": 0.13817398250102997, "learning_rate": 7.55056179775281e-05, "loss": 3.2516, "step": 980 }, { "epoch": 0.6669473684210526, "grad_norm": 0.25807198882102966, "learning_rate": 7.400749063670413e-05, "loss": 3.2101, "step": 990 }, { "epoch": 0.6736842105263158, "grad_norm": 0.06848172843456268, "learning_rate": 7.250936329588015e-05, "loss": 2.93, "step": 1000 }, { "epoch": 0.6804210526315789, "grad_norm": 1.089575171470642, "learning_rate": 7.101123595505618e-05, "loss": 3.4925, "step": 1010 }, { "epoch": 0.6871578947368421, "grad_norm": 0.20126965641975403, "learning_rate": 6.951310861423222e-05, "loss": 3.4603, "step": 1020 }, { "epoch": 0.6938947368421052, "grad_norm": 0.21779027581214905, "learning_rate": 6.801498127340824e-05, "loss": 3.1723, "step": 1030 }, { "epoch": 0.7006315789473684, "grad_norm": 0.18239159882068634, "learning_rate": 6.651685393258428e-05, "loss": 3.1903, "step": 1040 }, { "epoch": 0.7073684210526315, "grad_norm": 0.06677573919296265, "learning_rate": 6.50187265917603e-05, "loss": 2.8445, "step": 1050 }, { "epoch": 0.7141052631578947, "grad_norm": 0.42619746923446655, "learning_rate": 6.352059925093634e-05, "loss": 3.4319, "step": 1060 }, { "epoch": 0.7208421052631578, "grad_norm": 0.12023507058620453, "learning_rate": 6.202247191011237e-05, "loss": 3.3826, "step": 1070 }, { "epoch": 0.7275789473684211, "grad_norm": 0.15099403262138367, "learning_rate": 6.052434456928839e-05, "loss": 3.1425, "step": 1080 }, { "epoch": 0.7343157894736843, "grad_norm": 0.3474717438220978, "learning_rate": 5.902621722846442e-05, "loss": 3.1279, "step": 1090 }, { "epoch": 0.7410526315789474, "grad_norm": 0.12225649505853653, "learning_rate": 5.752808988764046e-05, "loss": 2.9033, "step": 1100 }, { "epoch": 0.7477894736842106, "grad_norm": 0.19639068841934204, "learning_rate": 5.6029962546816485e-05, "loss": 3.3681, "step": 1110 }, { "epoch": 0.7545263157894737, "grad_norm": 0.10571427643299103, "learning_rate": 5.453183520599251e-05, "loss": 3.335, "step": 1120 }, { "epoch": 0.7612631578947369, "grad_norm": 0.5154901146888733, "learning_rate": 5.3033707865168545e-05, "loss": 3.0952, "step": 1130 }, { "epoch": 0.768, "grad_norm": 0.6122628450393677, "learning_rate": 5.153558052434457e-05, "loss": 3.1269, "step": 1140 }, { "epoch": 0.7747368421052632, "grad_norm": 0.19698569178581238, "learning_rate": 5.00374531835206e-05, "loss": 2.8233, "step": 1150 }, { "epoch": 0.7814736842105263, "grad_norm": 0.13018374145030975, "learning_rate": 4.853932584269663e-05, "loss": 3.3094, "step": 1160 }, { "epoch": 0.7882105263157895, "grad_norm": 0.09522128850221634, "learning_rate": 4.704119850187266e-05, "loss": 3.2765, "step": 1170 }, { "epoch": 0.7949473684210526, "grad_norm": 0.10098107159137726, "learning_rate": 4.554307116104869e-05, "loss": 3.0807, "step": 1180 }, { "epoch": 0.8016842105263158, "grad_norm": 0.18019132316112518, "learning_rate": 4.404494382022472e-05, "loss": 3.0332, "step": 1190 }, { "epoch": 0.8084210526315789, "grad_norm": 0.16289708018302917, "learning_rate": 4.2546816479400754e-05, "loss": 2.7374, "step": 1200 }, { "epoch": 0.8151578947368421, "grad_norm": 0.12666673958301544, "learning_rate": 4.104868913857678e-05, "loss": 3.2118, "step": 1210 }, { "epoch": 0.8218947368421052, "grad_norm": 0.16891352832317352, "learning_rate": 3.955056179775281e-05, "loss": 3.1902, "step": 1220 }, { "epoch": 0.8286315789473684, "grad_norm": 0.10958009213209152, "learning_rate": 3.805243445692884e-05, "loss": 2.9862, "step": 1230 }, { "epoch": 0.8353684210526315, "grad_norm": 0.10642745345830917, "learning_rate": 3.655430711610487e-05, "loss": 3.0052, "step": 1240 }, { "epoch": 0.8421052631578947, "grad_norm": 0.05656813085079193, "learning_rate": 3.50561797752809e-05, "loss": 2.723, "step": 1250 }, { "epoch": 0.8488421052631578, "grad_norm": 0.08322717994451523, "learning_rate": 3.355805243445693e-05, "loss": 3.234, "step": 1260 }, { "epoch": 0.8555789473684211, "grad_norm": 0.13246551156044006, "learning_rate": 3.2059925093632956e-05, "loss": 3.212, "step": 1270 }, { "epoch": 0.8623157894736843, "grad_norm": 0.10225304961204529, "learning_rate": 3.056179775280899e-05, "loss": 2.9484, "step": 1280 }, { "epoch": 0.8690526315789474, "grad_norm": 0.19440552592277527, "learning_rate": 2.9063670411985024e-05, "loss": 2.9266, "step": 1290 }, { "epoch": 0.8757894736842106, "grad_norm": 0.08913037180900574, "learning_rate": 2.7565543071161047e-05, "loss": 2.6801, "step": 1300 }, { "epoch": 0.8825263157894737, "grad_norm": 0.10815408080816269, "learning_rate": 2.606741573033708e-05, "loss": 3.1505, "step": 1310 }, { "epoch": 0.8892631578947369, "grad_norm": 0.14371147751808167, "learning_rate": 2.4569288389513108e-05, "loss": 3.1293, "step": 1320 }, { "epoch": 0.896, "grad_norm": 0.1680973470211029, "learning_rate": 2.3071161048689138e-05, "loss": 2.8961, "step": 1330 }, { "epoch": 0.9027368421052632, "grad_norm": 0.19012019038200378, "learning_rate": 2.157303370786517e-05, "loss": 2.9096, "step": 1340 }, { "epoch": 0.9094736842105263, "grad_norm": 0.060957688838243484, "learning_rate": 2.00749063670412e-05, "loss": 2.6879, "step": 1350 }, { "epoch": 0.9162105263157895, "grad_norm": 0.15055014193058014, "learning_rate": 1.857677902621723e-05, "loss": 3.108, "step": 1360 }, { "epoch": 0.9229473684210526, "grad_norm": 0.1378874033689499, "learning_rate": 1.707865168539326e-05, "loss": 3.0428, "step": 1370 }, { "epoch": 0.9296842105263158, "grad_norm": 0.14901022613048553, "learning_rate": 1.558052434456929e-05, "loss": 2.8589, "step": 1380 }, { "epoch": 0.9364210526315789, "grad_norm": 0.17515867948532104, "learning_rate": 1.4082397003745318e-05, "loss": 2.8563, "step": 1390 }, { "epoch": 0.9431578947368421, "grad_norm": 0.11909812688827515, "learning_rate": 1.258426966292135e-05, "loss": 2.5759, "step": 1400 }, { "epoch": 0.9498947368421052, "grad_norm": 0.16348549723625183, "learning_rate": 1.1086142322097379e-05, "loss": 3.089, "step": 1410 }, { "epoch": 0.9566315789473684, "grad_norm": 0.08107765763998032, "learning_rate": 9.588014981273409e-06, "loss": 3.0145, "step": 1420 }, { "epoch": 0.9633684210526315, "grad_norm": 0.13251617550849915, "learning_rate": 8.089887640449438e-06, "loss": 2.8256, "step": 1430 }, { "epoch": 0.9701052631578947, "grad_norm": 0.10319063812494278, "learning_rate": 6.591760299625469e-06, "loss": 2.8456, "step": 1440 }, { "epoch": 0.9768421052631578, "grad_norm": 0.08950542658567429, "learning_rate": 5.093632958801498e-06, "loss": 2.605, "step": 1450 }, { "epoch": 0.983578947368421, "grad_norm": 0.08379487693309784, "learning_rate": 3.5955056179775286e-06, "loss": 3.0334, "step": 1460 }, { "epoch": 0.9903157894736843, "grad_norm": 0.1561821848154068, "learning_rate": 2.097378277153558e-06, "loss": 3.0357, "step": 1470 }, { "epoch": 0.9970526315789474, "grad_norm": 0.07574011385440826, "learning_rate": 5.992509363295881e-07, "loss": 2.7458, "step": 1480 } ], "logging_steps": 10, "max_steps": 1484, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.32780044727799e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }