| { |
| "best_metric": 0.2870275791624106, |
| "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/ViViT_WLASL_200_epochs_p20/checkpoint-57160", |
| "epoch": 51.005000699888015, |
| "eval_steps": 500, |
| "global_step": 92885, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0002799552071668533, |
| "grad_norm": 46.26704025268555, |
| "learning_rate": 1.3577827547592386e-07, |
| "loss": 30.8454, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0005599104143337066, |
| "grad_norm": 45.19619369506836, |
| "learning_rate": 2.757558790593505e-07, |
| "loss": 30.9112, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0008398656215005599, |
| "grad_norm": 44.790931701660156, |
| "learning_rate": 4.1573348264277716e-07, |
| "loss": 30.966, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0011198208286674132, |
| "grad_norm": 46.15544509887695, |
| "learning_rate": 5.557110862262039e-07, |
| "loss": 30.9534, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0013997760358342665, |
| "grad_norm": 45.963539123535156, |
| "learning_rate": 6.956886898096305e-07, |
| "loss": 30.9846, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0016797312430011197, |
| "grad_norm": 46.944801330566406, |
| "learning_rate": 8.356662933930571e-07, |
| "loss": 30.8929, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.001959686450167973, |
| "grad_norm": 44.26920700073242, |
| "learning_rate": 9.756438969764837e-07, |
| "loss": 30.7879, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0022396416573348264, |
| "grad_norm": 44.74138259887695, |
| "learning_rate": 1.1156215005599104e-06, |
| "loss": 30.8412, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0025195968645016797, |
| "grad_norm": 44.76321792602539, |
| "learning_rate": 1.2555991041433373e-06, |
| "loss": 30.8549, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.002799552071668533, |
| "grad_norm": 44.44749450683594, |
| "learning_rate": 1.395576707726764e-06, |
| "loss": 31.0151, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.003079507278835386, |
| "grad_norm": 44.81037139892578, |
| "learning_rate": 1.5355543113101904e-06, |
| "loss": 30.8179, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0033594624860022394, |
| "grad_norm": 44.556419372558594, |
| "learning_rate": 1.675531914893617e-06, |
| "loss": 30.923, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.003639417693169093, |
| "grad_norm": 44.91850280761719, |
| "learning_rate": 1.815509518477044e-06, |
| "loss": 30.8516, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.003919372900335946, |
| "grad_norm": 42.26573181152344, |
| "learning_rate": 1.9554871220604704e-06, |
| "loss": 30.8223, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.0041993281075028, |
| "grad_norm": 42.67766189575195, |
| "learning_rate": 2.095464725643897e-06, |
| "loss": 30.8226, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.004479283314669653, |
| "grad_norm": 39.56558609008789, |
| "learning_rate": 2.2354423292273237e-06, |
| "loss": 30.7627, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.004759238521836506, |
| "grad_norm": 36.79159164428711, |
| "learning_rate": 2.3754199328107503e-06, |
| "loss": 30.7847, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.005, |
| "eval_accuracy": 0.0002553626149131767, |
| "eval_f1": 2.9184298847220195e-05, |
| "eval_loss": 7.671034336090088, |
| "eval_precision": 1.5476522115950103e-05, |
| "eval_recall": 0.0002553626149131767, |
| "eval_runtime": 261.8693, |
| "eval_samples_per_second": 14.954, |
| "eval_steps_per_second": 7.477, |
| "eval_top_10_accuracy": 0.004341164453524004, |
| "eval_top_1_accuracy": 0.0002553626149131767, |
| "eval_top_5_accuracy": 0.0012768130745658835, |
| "step": 1786 |
| }, |
| { |
| "epoch": 1.0000384938409854, |
| "grad_norm": 37.38892364501953, |
| "learning_rate": 2.515397536394177e-06, |
| "loss": 30.7364, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.0003184490481523, |
| "grad_norm": 38.72145080566406, |
| "learning_rate": 2.6553751399776037e-06, |
| "loss": 30.544, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.000598404255319, |
| "grad_norm": 38.69617462158203, |
| "learning_rate": 2.7953527435610303e-06, |
| "loss": 30.5909, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.000878359462486, |
| "grad_norm": 35.71845626831055, |
| "learning_rate": 2.933930571108623e-06, |
| "loss": 30.5322, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.0011583146696528, |
| "grad_norm": 37.04778289794922, |
| "learning_rate": 3.0739081746920493e-06, |
| "loss": 30.5843, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.0014382698768196, |
| "grad_norm": 34.91965866088867, |
| "learning_rate": 3.213885778275476e-06, |
| "loss": 30.4873, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0017182250839867, |
| "grad_norm": 31.99736785888672, |
| "learning_rate": 3.353863381858903e-06, |
| "loss": 30.4577, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.0019981802911535, |
| "grad_norm": 32.1821403503418, |
| "learning_rate": 3.4938409854423293e-06, |
| "loss": 30.521, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0022781354983203, |
| "grad_norm": 31.080778121948242, |
| "learning_rate": 3.633818589025756e-06, |
| "loss": 30.4738, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.0025580907054872, |
| "grad_norm": 33.43671417236328, |
| "learning_rate": 3.773796192609183e-06, |
| "loss": 30.518, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.002838045912654, |
| "grad_norm": 30.197689056396484, |
| "learning_rate": 3.913773796192609e-06, |
| "loss": 30.4191, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.0031180011198209, |
| "grad_norm": 33.55823516845703, |
| "learning_rate": 4.053751399776036e-06, |
| "loss": 30.4412, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.0033979563269877, |
| "grad_norm": 33.3959846496582, |
| "learning_rate": 4.193729003359463e-06, |
| "loss": 30.4303, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0036779115341545, |
| "grad_norm": 33.3631706237793, |
| "learning_rate": 4.333706606942889e-06, |
| "loss": 30.3455, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0039578667413214, |
| "grad_norm": 31.35999870300293, |
| "learning_rate": 4.473684210526316e-06, |
| "loss": 30.3703, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0042378219484882, |
| "grad_norm": 34.44354248046875, |
| "learning_rate": 4.613661814109743e-06, |
| "loss": 30.5096, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.004517777155655, |
| "grad_norm": 34.77235794067383, |
| "learning_rate": 4.753639417693169e-06, |
| "loss": 30.4935, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.0047977323628219, |
| "grad_norm": 34.465576171875, |
| "learning_rate": 4.893617021276596e-06, |
| "loss": 30.4187, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0049993001119821, |
| "eval_accuracy": 0.0020429009193054137, |
| "eval_f1": 0.0003999515192909813, |
| "eval_loss": 7.604901313781738, |
| "eval_precision": 0.0002671184173273292, |
| "eval_recall": 0.0020429009193054137, |
| "eval_runtime": 221.6582, |
| "eval_samples_per_second": 17.667, |
| "eval_steps_per_second": 8.833, |
| "eval_top_10_accuracy": 0.011491317671092951, |
| "eval_top_1_accuracy": 0.0020429009193054137, |
| "eval_top_5_accuracy": 0.0056179775280898875, |
| "step": 3572 |
| }, |
| { |
| "epoch": 2.000076987681971, |
| "grad_norm": 33.962181091308594, |
| "learning_rate": 5.0335946248600226e-06, |
| "loss": 30.4182, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.000356942889138, |
| "grad_norm": 41.75550079345703, |
| "learning_rate": 5.173572228443449e-06, |
| "loss": 30.1018, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.0006368980963045, |
| "grad_norm": 37.583744049072266, |
| "learning_rate": 5.313549832026876e-06, |
| "loss": 30.0925, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.0009168533034716, |
| "grad_norm": 38.75505447387695, |
| "learning_rate": 5.4535274356103025e-06, |
| "loss": 30.0771, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.001196808510638, |
| "grad_norm": 42.10888671875, |
| "learning_rate": 5.593505039193729e-06, |
| "loss": 30.072, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.0014767637178053, |
| "grad_norm": 39.6405029296875, |
| "learning_rate": 5.7320828667413216e-06, |
| "loss": 30.0103, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.001756718924972, |
| "grad_norm": 37.14402389526367, |
| "learning_rate": 5.872060470324748e-06, |
| "loss": 30.0757, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.002036674132139, |
| "grad_norm": 41.2797737121582, |
| "learning_rate": 6.012038073908175e-06, |
| "loss": 29.9727, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.0023166293393055, |
| "grad_norm": 42.102169036865234, |
| "learning_rate": 6.1520156774916016e-06, |
| "loss": 30.1018, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.0025965845464726, |
| "grad_norm": 44.31977462768555, |
| "learning_rate": 6.291993281075029e-06, |
| "loss": 29.9593, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.002876539753639, |
| "grad_norm": 41.01313781738281, |
| "learning_rate": 6.431970884658455e-06, |
| "loss": 30.0683, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.0031564949608063, |
| "grad_norm": 45.978580474853516, |
| "learning_rate": 6.5719484882418815e-06, |
| "loss": 30.031, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.0034364501679733, |
| "grad_norm": 43.09092712402344, |
| "learning_rate": 6.711926091825309e-06, |
| "loss": 29.9496, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.00371640537514, |
| "grad_norm": 48.287994384765625, |
| "learning_rate": 6.851903695408735e-06, |
| "loss": 29.9667, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.003996360582307, |
| "grad_norm": 40.08522033691406, |
| "learning_rate": 6.9918812989921615e-06, |
| "loss": 29.8607, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.0042763157894736, |
| "grad_norm": 43.70709991455078, |
| "learning_rate": 7.131858902575589e-06, |
| "loss": 29.7901, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.0045562709966407, |
| "grad_norm": 44.53886413574219, |
| "learning_rate": 7.271836506159015e-06, |
| "loss": 29.8422, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.0048362262038073, |
| "grad_norm": 46.5408821105957, |
| "learning_rate": 7.4118141097424415e-06, |
| "loss": 29.7427, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.0049986002239644, |
| "eval_accuracy": 0.003575076608784474, |
| "eval_f1": 0.0008090265726319296, |
| "eval_loss": 7.486084938049316, |
| "eval_precision": 0.0008795780348085499, |
| "eval_recall": 0.003575076608784474, |
| "eval_runtime": 222.3048, |
| "eval_samples_per_second": 17.615, |
| "eval_steps_per_second": 8.808, |
| "eval_top_10_accuracy": 0.027068437180796732, |
| "eval_top_1_accuracy": 0.003575076608784474, |
| "eval_top_5_accuracy": 0.016087844739530132, |
| "step": 5358 |
| }, |
| { |
| "epoch": 3.0001154815229563, |
| "grad_norm": 44.443878173828125, |
| "learning_rate": 7.551791713325867e-06, |
| "loss": 29.5441, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.0003954367301233, |
| "grad_norm": 52.79859924316406, |
| "learning_rate": 7.691769316909295e-06, |
| "loss": 28.9231, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.00067539193729, |
| "grad_norm": 47.09991455078125, |
| "learning_rate": 7.831746920492722e-06, |
| "loss": 29.1026, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.000955347144457, |
| "grad_norm": 49.0944709777832, |
| "learning_rate": 7.971724524076148e-06, |
| "loss": 29.1107, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.0012353023516236, |
| "grad_norm": 49.614070892333984, |
| "learning_rate": 8.111702127659574e-06, |
| "loss": 29.0765, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.0015152575587907, |
| "grad_norm": 48.07614517211914, |
| "learning_rate": 8.251679731243001e-06, |
| "loss": 29.0133, |
| "step": 5900 |
| }, |
| { |
| "epoch": 3.0017952127659573, |
| "grad_norm": 48.23404312133789, |
| "learning_rate": 8.391657334826427e-06, |
| "loss": 28.8593, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.0020751679731243, |
| "grad_norm": 49.85590744018555, |
| "learning_rate": 8.53023516237402e-06, |
| "loss": 28.9609, |
| "step": 6100 |
| }, |
| { |
| "epoch": 3.002355123180291, |
| "grad_norm": 50.691890716552734, |
| "learning_rate": 8.670212765957447e-06, |
| "loss": 28.9257, |
| "step": 6200 |
| }, |
| { |
| "epoch": 3.002635078387458, |
| "grad_norm": 49.11813735961914, |
| "learning_rate": 8.810190369540875e-06, |
| "loss": 28.7913, |
| "step": 6300 |
| }, |
| { |
| "epoch": 3.0029150335946246, |
| "grad_norm": 49.65032958984375, |
| "learning_rate": 8.9501679731243e-06, |
| "loss": 28.8195, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.0031949888017917, |
| "grad_norm": 49.955528259277344, |
| "learning_rate": 9.090145576707728e-06, |
| "loss": 28.7372, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.0034749440089588, |
| "grad_norm": 48.818843841552734, |
| "learning_rate": 9.230123180291154e-06, |
| "loss": 28.705, |
| "step": 6600 |
| }, |
| { |
| "epoch": 3.0037548992161254, |
| "grad_norm": 48.66638946533203, |
| "learning_rate": 9.37010078387458e-06, |
| "loss": 28.6832, |
| "step": 6700 |
| }, |
| { |
| "epoch": 3.0040348544232924, |
| "grad_norm": 50.39780044555664, |
| "learning_rate": 9.510078387458007e-06, |
| "loss": 28.6613, |
| "step": 6800 |
| }, |
| { |
| "epoch": 3.004314809630459, |
| "grad_norm": 51.30269241333008, |
| "learning_rate": 9.650055991041433e-06, |
| "loss": 28.4991, |
| "step": 6900 |
| }, |
| { |
| "epoch": 3.004594764837626, |
| "grad_norm": 51.30686569213867, |
| "learning_rate": 9.79003359462486e-06, |
| "loss": 28.5717, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.0048747200447927, |
| "grad_norm": 50.400569915771484, |
| "learning_rate": 9.930011198208288e-06, |
| "loss": 28.4534, |
| "step": 7100 |
| }, |
| { |
| "epoch": 3.005000699888018, |
| "eval_accuracy": 0.011491317671092951, |
| "eval_f1": 0.004413303303826739, |
| "eval_loss": 7.249152183532715, |
| "eval_precision": 0.0048748011754680045, |
| "eval_recall": 0.011491317671092951, |
| "eval_runtime": 220.6205, |
| "eval_samples_per_second": 17.75, |
| "eval_steps_per_second": 8.875, |
| "eval_top_10_accuracy": 0.06052093973442288, |
| "eval_top_1_accuracy": 0.011491317671092951, |
| "eval_top_5_accuracy": 0.037538304392236974, |
| "step": 7145 |
| }, |
| { |
| "epoch": 4.000153975363942, |
| "grad_norm": 50.86111068725586, |
| "learning_rate": 1.0069988801791714e-05, |
| "loss": 27.9795, |
| "step": 7200 |
| }, |
| { |
| "epoch": 4.000433930571108, |
| "grad_norm": 49.65926742553711, |
| "learning_rate": 1.020996640537514e-05, |
| "loss": 27.7397, |
| "step": 7300 |
| }, |
| { |
| "epoch": 4.000713885778276, |
| "grad_norm": 50.528770446777344, |
| "learning_rate": 1.0349944008958567e-05, |
| "loss": 27.5468, |
| "step": 7400 |
| }, |
| { |
| "epoch": 4.000993840985442, |
| "grad_norm": 50.71484375, |
| "learning_rate": 1.0489921612541993e-05, |
| "loss": 27.4879, |
| "step": 7500 |
| }, |
| { |
| "epoch": 4.001273796192609, |
| "grad_norm": 49.60582733154297, |
| "learning_rate": 1.062989921612542e-05, |
| "loss": 27.4364, |
| "step": 7600 |
| }, |
| { |
| "epoch": 4.001553751399776, |
| "grad_norm": 50.794559478759766, |
| "learning_rate": 1.0769876819708848e-05, |
| "loss": 27.3306, |
| "step": 7700 |
| }, |
| { |
| "epoch": 4.001833706606943, |
| "grad_norm": 52.04481887817383, |
| "learning_rate": 1.0909854423292274e-05, |
| "loss": 27.1802, |
| "step": 7800 |
| }, |
| { |
| "epoch": 4.00211366181411, |
| "grad_norm": 53.98943328857422, |
| "learning_rate": 1.1049832026875701e-05, |
| "loss": 27.4401, |
| "step": 7900 |
| }, |
| { |
| "epoch": 4.002393617021276, |
| "grad_norm": 50.8287467956543, |
| "learning_rate": 1.1189809630459127e-05, |
| "loss": 27.3483, |
| "step": 8000 |
| }, |
| { |
| "epoch": 4.002673572228444, |
| "grad_norm": 47.586692810058594, |
| "learning_rate": 1.132838745800672e-05, |
| "loss": 27.3851, |
| "step": 8100 |
| }, |
| { |
| "epoch": 4.0029535274356105, |
| "grad_norm": 50.97801971435547, |
| "learning_rate": 1.1468365061590145e-05, |
| "loss": 27.0601, |
| "step": 8200 |
| }, |
| { |
| "epoch": 4.003233482642777, |
| "grad_norm": 49.2835693359375, |
| "learning_rate": 1.1608342665173573e-05, |
| "loss": 27.1775, |
| "step": 8300 |
| }, |
| { |
| "epoch": 4.003513437849944, |
| "grad_norm": 49.225276947021484, |
| "learning_rate": 1.1748320268756998e-05, |
| "loss": 27.1897, |
| "step": 8400 |
| }, |
| { |
| "epoch": 4.003793393057111, |
| "grad_norm": 50.65304946899414, |
| "learning_rate": 1.1888297872340426e-05, |
| "loss": 27.2348, |
| "step": 8500 |
| }, |
| { |
| "epoch": 4.004073348264278, |
| "grad_norm": 51.88926315307617, |
| "learning_rate": 1.2028275475923854e-05, |
| "loss": 26.9358, |
| "step": 8600 |
| }, |
| { |
| "epoch": 4.0043533034714445, |
| "grad_norm": 53.61826705932617, |
| "learning_rate": 1.216825307950728e-05, |
| "loss": 26.8982, |
| "step": 8700 |
| }, |
| { |
| "epoch": 4.004633258678611, |
| "grad_norm": 48.89099884033203, |
| "learning_rate": 1.2308230683090707e-05, |
| "loss": 26.8073, |
| "step": 8800 |
| }, |
| { |
| "epoch": 4.004913213885779, |
| "grad_norm": 49.341552734375, |
| "learning_rate": 1.2448208286674133e-05, |
| "loss": 26.8561, |
| "step": 8900 |
| }, |
| { |
| "epoch": 4.005, |
| "eval_accuracy": 0.031920326864147086, |
| "eval_f1": 0.01254087277909593, |
| "eval_loss": 6.92970085144043, |
| "eval_precision": 0.011491031769049, |
| "eval_recall": 0.031920326864147086, |
| "eval_runtime": 224.0829, |
| "eval_samples_per_second": 17.476, |
| "eval_steps_per_second": 8.738, |
| "eval_top_10_accuracy": 0.12078651685393259, |
| "eval_top_1_accuracy": 0.031664964249233915, |
| "eval_top_5_accuracy": 0.07660878447395301, |
| "step": 8931 |
| }, |
| { |
| "epoch": 5.0001924692049275, |
| "grad_norm": 53.48250961303711, |
| "learning_rate": 1.2588185890257558e-05, |
| "loss": 26.0557, |
| "step": 9000 |
| }, |
| { |
| "epoch": 5.000472424412094, |
| "grad_norm": 49.2156982421875, |
| "learning_rate": 1.2728163493840986e-05, |
| "loss": 25.6844, |
| "step": 9100 |
| }, |
| { |
| "epoch": 5.000752379619261, |
| "grad_norm": 51.55862808227539, |
| "learning_rate": 1.2868141097424413e-05, |
| "loss": 25.5654, |
| "step": 9200 |
| }, |
| { |
| "epoch": 5.001032334826427, |
| "grad_norm": 54.36677551269531, |
| "learning_rate": 1.3008118701007841e-05, |
| "loss": 25.6638, |
| "step": 9300 |
| }, |
| { |
| "epoch": 5.001312290033595, |
| "grad_norm": 50.89421463012695, |
| "learning_rate": 1.3148096304591265e-05, |
| "loss": 25.8723, |
| "step": 9400 |
| }, |
| { |
| "epoch": 5.0015922452407615, |
| "grad_norm": 51.31764602661133, |
| "learning_rate": 1.3288073908174693e-05, |
| "loss": 25.5003, |
| "step": 9500 |
| }, |
| { |
| "epoch": 5.001872200447928, |
| "grad_norm": 50.556793212890625, |
| "learning_rate": 1.3428051511758118e-05, |
| "loss": 25.6165, |
| "step": 9600 |
| }, |
| { |
| "epoch": 5.002152155655095, |
| "grad_norm": 51.41532897949219, |
| "learning_rate": 1.3568029115341546e-05, |
| "loss": 25.5103, |
| "step": 9700 |
| }, |
| { |
| "epoch": 5.002432110862262, |
| "grad_norm": 50.99569320678711, |
| "learning_rate": 1.3708006718924973e-05, |
| "loss": 25.2506, |
| "step": 9800 |
| }, |
| { |
| "epoch": 5.002712066069429, |
| "grad_norm": 51.33198165893555, |
| "learning_rate": 1.3847984322508401e-05, |
| "loss": 25.4653, |
| "step": 9900 |
| }, |
| { |
| "epoch": 5.0029920212765955, |
| "grad_norm": 49.71004867553711, |
| "learning_rate": 1.3987961926091825e-05, |
| "loss": 25.4142, |
| "step": 10000 |
| }, |
| { |
| "epoch": 5.003271976483763, |
| "grad_norm": 49.55411911010742, |
| "learning_rate": 1.4126539753639417e-05, |
| "loss": 25.2711, |
| "step": 10100 |
| }, |
| { |
| "epoch": 5.00355193169093, |
| "grad_norm": 50.0079345703125, |
| "learning_rate": 1.4266517357222845e-05, |
| "loss": 25.3369, |
| "step": 10200 |
| }, |
| { |
| "epoch": 5.003831886898096, |
| "grad_norm": 49.790077209472656, |
| "learning_rate": 1.4406494960806272e-05, |
| "loss": 25.2757, |
| "step": 10300 |
| }, |
| { |
| "epoch": 5.004111842105263, |
| "grad_norm": 49.824241638183594, |
| "learning_rate": 1.4546472564389698e-05, |
| "loss": 25.0612, |
| "step": 10400 |
| }, |
| { |
| "epoch": 5.00439179731243, |
| "grad_norm": 50.10408020019531, |
| "learning_rate": 1.4686450167973126e-05, |
| "loss": 25.0615, |
| "step": 10500 |
| }, |
| { |
| "epoch": 5.004671752519597, |
| "grad_norm": 50.95100021362305, |
| "learning_rate": 1.4826427771556553e-05, |
| "loss": 25.2522, |
| "step": 10600 |
| }, |
| { |
| "epoch": 5.004951707726764, |
| "grad_norm": 50.80625915527344, |
| "learning_rate": 1.4966405375139977e-05, |
| "loss": 24.997, |
| "step": 10700 |
| }, |
| { |
| "epoch": 5.004999300111982, |
| "eval_accuracy": 0.045965270684371805, |
| "eval_f1": 0.020995371903437376, |
| "eval_loss": 6.5685930252075195, |
| "eval_precision": 0.021173968378675568, |
| "eval_recall": 0.045965270684371805, |
| "eval_runtime": 221.4663, |
| "eval_samples_per_second": 17.682, |
| "eval_steps_per_second": 8.841, |
| "eval_top_10_accuracy": 0.19458631256384065, |
| "eval_top_1_accuracy": 0.045965270684371805, |
| "eval_top_5_accuracy": 0.12921348314606743, |
| "step": 10717 |
| }, |
| { |
| "epoch": 6.0002309630459125, |
| "grad_norm": 50.683738708496094, |
| "learning_rate": 1.5106382978723405e-05, |
| "loss": 24.0698, |
| "step": 10800 |
| }, |
| { |
| "epoch": 6.000510918253079, |
| "grad_norm": 51.920692443847656, |
| "learning_rate": 1.5246360582306832e-05, |
| "loss": 23.6795, |
| "step": 10900 |
| }, |
| { |
| "epoch": 6.000790873460247, |
| "grad_norm": 51.51482391357422, |
| "learning_rate": 1.538633818589026e-05, |
| "loss": 23.793, |
| "step": 11000 |
| }, |
| { |
| "epoch": 6.001070828667413, |
| "grad_norm": 53.38042068481445, |
| "learning_rate": 1.5526315789473686e-05, |
| "loss": 23.7737, |
| "step": 11100 |
| }, |
| { |
| "epoch": 6.00135078387458, |
| "grad_norm": 51.36591720581055, |
| "learning_rate": 1.566629339305711e-05, |
| "loss": 23.4639, |
| "step": 11200 |
| }, |
| { |
| "epoch": 6.0016307390817465, |
| "grad_norm": 51.339813232421875, |
| "learning_rate": 1.5806270996640537e-05, |
| "loss": 23.6826, |
| "step": 11300 |
| }, |
| { |
| "epoch": 6.001910694288914, |
| "grad_norm": 51.70831298828125, |
| "learning_rate": 1.5946248600223963e-05, |
| "loss": 23.3406, |
| "step": 11400 |
| }, |
| { |
| "epoch": 6.002190649496081, |
| "grad_norm": 52.543033599853516, |
| "learning_rate": 1.6086226203807392e-05, |
| "loss": 23.6466, |
| "step": 11500 |
| }, |
| { |
| "epoch": 6.002470604703247, |
| "grad_norm": 50.490440368652344, |
| "learning_rate": 1.6226203807390818e-05, |
| "loss": 23.6253, |
| "step": 11600 |
| }, |
| { |
| "epoch": 6.002750559910415, |
| "grad_norm": 51.624053955078125, |
| "learning_rate": 1.6366181410974247e-05, |
| "loss": 23.296, |
| "step": 11700 |
| }, |
| { |
| "epoch": 6.003030515117581, |
| "grad_norm": 48.22161102294922, |
| "learning_rate": 1.650615901455767e-05, |
| "loss": 23.2675, |
| "step": 11800 |
| }, |
| { |
| "epoch": 6.003310470324748, |
| "grad_norm": 49.2001838684082, |
| "learning_rate": 1.66461366181411e-05, |
| "loss": 23.3034, |
| "step": 11900 |
| }, |
| { |
| "epoch": 6.003590425531915, |
| "grad_norm": 46.579803466796875, |
| "learning_rate": 1.6786114221724525e-05, |
| "loss": 23.2177, |
| "step": 12000 |
| }, |
| { |
| "epoch": 6.003870380739082, |
| "grad_norm": 51.24250411987305, |
| "learning_rate": 1.692469204927212e-05, |
| "loss": 23.0834, |
| "step": 12100 |
| }, |
| { |
| "epoch": 6.004150335946249, |
| "grad_norm": 53.71959686279297, |
| "learning_rate": 1.7064669652855545e-05, |
| "loss": 23.277, |
| "step": 12200 |
| }, |
| { |
| "epoch": 6.004430291153415, |
| "grad_norm": 51.754024505615234, |
| "learning_rate": 1.720464725643897e-05, |
| "loss": 23.0179, |
| "step": 12300 |
| }, |
| { |
| "epoch": 6.004710246360582, |
| "grad_norm": 52.60391616821289, |
| "learning_rate": 1.7344624860022396e-05, |
| "loss": 23.0741, |
| "step": 12400 |
| }, |
| { |
| "epoch": 6.004990201567749, |
| "grad_norm": 51.15227127075195, |
| "learning_rate": 1.7484602463605822e-05, |
| "loss": 22.8381, |
| "step": 12500 |
| }, |
| { |
| "epoch": 6.004998600223964, |
| "eval_accuracy": 0.07482124616956078, |
| "eval_f1": 0.03821121445416449, |
| "eval_loss": 6.204948425292969, |
| "eval_precision": 0.03864142268606078, |
| "eval_recall": 0.07482124616956078, |
| "eval_runtime": 224.4924, |
| "eval_samples_per_second": 17.444, |
| "eval_steps_per_second": 8.722, |
| "eval_top_10_accuracy": 0.2604698672114402, |
| "eval_top_1_accuracy": 0.0745658835546476, |
| "eval_top_5_accuracy": 0.18590398365679264, |
| "step": 12503 |
| }, |
| { |
| "epoch": 7.000269456886898, |
| "grad_norm": 51.505096435546875, |
| "learning_rate": 1.762458006718925e-05, |
| "loss": 21.753, |
| "step": 12600 |
| }, |
| { |
| "epoch": 7.000549412094065, |
| "grad_norm": 53.034732818603516, |
| "learning_rate": 1.7764557670772677e-05, |
| "loss": 21.4792, |
| "step": 12700 |
| }, |
| { |
| "epoch": 7.000829367301232, |
| "grad_norm": 53.07753372192383, |
| "learning_rate": 1.7904535274356103e-05, |
| "loss": 21.5145, |
| "step": 12800 |
| }, |
| { |
| "epoch": 7.001109322508398, |
| "grad_norm": 52.74058532714844, |
| "learning_rate": 1.8044512877939532e-05, |
| "loss": 21.4575, |
| "step": 12900 |
| }, |
| { |
| "epoch": 7.001389277715566, |
| "grad_norm": 53.695003509521484, |
| "learning_rate": 1.8184490481522955e-05, |
| "loss": 21.6702, |
| "step": 13000 |
| }, |
| { |
| "epoch": 7.001669232922732, |
| "grad_norm": 54.33465576171875, |
| "learning_rate": 1.8324468085106384e-05, |
| "loss": 21.3756, |
| "step": 13100 |
| }, |
| { |
| "epoch": 7.001949188129899, |
| "grad_norm": 55.21706008911133, |
| "learning_rate": 1.846444568868981e-05, |
| "loss": 21.2789, |
| "step": 13200 |
| }, |
| { |
| "epoch": 7.0022291433370665, |
| "grad_norm": 52.99767303466797, |
| "learning_rate": 1.860442329227324e-05, |
| "loss": 21.119, |
| "step": 13300 |
| }, |
| { |
| "epoch": 7.002509098544233, |
| "grad_norm": 52.56631851196289, |
| "learning_rate": 1.8744400895856665e-05, |
| "loss": 21.3768, |
| "step": 13400 |
| }, |
| { |
| "epoch": 7.0027890537514, |
| "grad_norm": 59.85367965698242, |
| "learning_rate": 1.888437849944009e-05, |
| "loss": 21.1635, |
| "step": 13500 |
| }, |
| { |
| "epoch": 7.003069008958566, |
| "grad_norm": 53.9185905456543, |
| "learning_rate": 1.9024356103023516e-05, |
| "loss": 21.239, |
| "step": 13600 |
| }, |
| { |
| "epoch": 7.003348964165734, |
| "grad_norm": 53.914737701416016, |
| "learning_rate": 1.9164333706606942e-05, |
| "loss": 21.2056, |
| "step": 13700 |
| }, |
| { |
| "epoch": 7.0036289193729, |
| "grad_norm": 53.44260025024414, |
| "learning_rate": 1.930431131019037e-05, |
| "loss": 20.8527, |
| "step": 13800 |
| }, |
| { |
| "epoch": 7.003908874580067, |
| "grad_norm": 50.94429016113281, |
| "learning_rate": 1.9444288913773797e-05, |
| "loss": 21.0278, |
| "step": 13900 |
| }, |
| { |
| "epoch": 7.004188829787234, |
| "grad_norm": 51.96830749511719, |
| "learning_rate": 1.9584266517357226e-05, |
| "loss": 20.979, |
| "step": 14000 |
| }, |
| { |
| "epoch": 7.004468784994401, |
| "grad_norm": 51.84593200683594, |
| "learning_rate": 1.9722844344904817e-05, |
| "loss": 21.0503, |
| "step": 14100 |
| }, |
| { |
| "epoch": 7.004748740201568, |
| "grad_norm": 52.195037841796875, |
| "learning_rate": 1.986142217245241e-05, |
| "loss": 20.56, |
| "step": 14200 |
| }, |
| { |
| "epoch": 7.005000699888018, |
| "eval_accuracy": 0.10214504596527069, |
| "eval_f1": 0.05936071255358349, |
| "eval_loss": 5.805036544799805, |
| "eval_precision": 0.05913221935096704, |
| "eval_recall": 0.10214504596527069, |
| "eval_runtime": 223.6709, |
| "eval_samples_per_second": 17.508, |
| "eval_steps_per_second": 8.754, |
| "eval_top_10_accuracy": 0.34627170582226763, |
| "eval_top_1_accuracy": 0.10214504596527069, |
| "eval_top_5_accuracy": 0.2553626149131767, |
| "step": 14290 |
| }, |
| { |
| "epoch": 8.000027995520716, |
| "grad_norm": 51.8380012512207, |
| "learning_rate": 2.0001399776035833e-05, |
| "loss": 20.5048, |
| "step": 14300 |
| }, |
| { |
| "epoch": 8.000307950727883, |
| "grad_norm": 56.783084869384766, |
| "learning_rate": 2.0141377379619263e-05, |
| "loss": 19.3519, |
| "step": 14400 |
| }, |
| { |
| "epoch": 8.00058790593505, |
| "grad_norm": 54.16028594970703, |
| "learning_rate": 2.028135498320269e-05, |
| "loss": 18.8881, |
| "step": 14500 |
| }, |
| { |
| "epoch": 8.000867861142217, |
| "grad_norm": 52.2464714050293, |
| "learning_rate": 2.0421332586786114e-05, |
| "loss": 19.0802, |
| "step": 14600 |
| }, |
| { |
| "epoch": 8.001147816349384, |
| "grad_norm": 54.0433464050293, |
| "learning_rate": 2.0561310190369543e-05, |
| "loss": 19.1179, |
| "step": 14700 |
| }, |
| { |
| "epoch": 8.001427771556552, |
| "grad_norm": 54.63380432128906, |
| "learning_rate": 2.070128779395297e-05, |
| "loss": 19.0229, |
| "step": 14800 |
| }, |
| { |
| "epoch": 8.001707726763717, |
| "grad_norm": 56.0116081237793, |
| "learning_rate": 2.0841265397536395e-05, |
| "loss": 19.1359, |
| "step": 14900 |
| }, |
| { |
| "epoch": 8.001987681970885, |
| "grad_norm": 52.73674392700195, |
| "learning_rate": 2.098124300111982e-05, |
| "loss": 18.7772, |
| "step": 15000 |
| }, |
| { |
| "epoch": 8.002267637178052, |
| "grad_norm": 48.63119888305664, |
| "learning_rate": 2.112122060470325e-05, |
| "loss": 18.9625, |
| "step": 15100 |
| }, |
| { |
| "epoch": 8.002547592385218, |
| "grad_norm": 52.9703369140625, |
| "learning_rate": 2.1261198208286676e-05, |
| "loss": 18.926, |
| "step": 15200 |
| }, |
| { |
| "epoch": 8.002827547592386, |
| "grad_norm": 54.72795104980469, |
| "learning_rate": 2.14011758118701e-05, |
| "loss": 18.4723, |
| "step": 15300 |
| }, |
| { |
| "epoch": 8.003107502799551, |
| "grad_norm": 52.77252197265625, |
| "learning_rate": 2.1541153415453527e-05, |
| "loss": 18.5272, |
| "step": 15400 |
| }, |
| { |
| "epoch": 8.003387458006719, |
| "grad_norm": 56.37332534790039, |
| "learning_rate": 2.1681131019036953e-05, |
| "loss": 18.6133, |
| "step": 15500 |
| }, |
| { |
| "epoch": 8.003667413213886, |
| "grad_norm": 55.63984680175781, |
| "learning_rate": 2.1821108622620383e-05, |
| "loss": 18.7277, |
| "step": 15600 |
| }, |
| { |
| "epoch": 8.003947368421052, |
| "grad_norm": 53.248497009277344, |
| "learning_rate": 2.196108622620381e-05, |
| "loss": 18.8904, |
| "step": 15700 |
| }, |
| { |
| "epoch": 8.00422732362822, |
| "grad_norm": 56.491886138916016, |
| "learning_rate": 2.2101063829787234e-05, |
| "loss": 18.7138, |
| "step": 15800 |
| }, |
| { |
| "epoch": 8.004507278835387, |
| "grad_norm": 53.40477752685547, |
| "learning_rate": 2.2241041433370663e-05, |
| "loss": 18.5271, |
| "step": 15900 |
| }, |
| { |
| "epoch": 8.004787234042553, |
| "grad_norm": 54.428314208984375, |
| "learning_rate": 2.2381019036954086e-05, |
| "loss": 18.323, |
| "step": 16000 |
| }, |
| { |
| "epoch": 8.005, |
| "eval_accuracy": 0.13253319713993872, |
| "eval_f1": 0.07727072976763863, |
| "eval_loss": 5.4230194091796875, |
| "eval_precision": 0.072025240634506, |
| "eval_recall": 0.13253319713993872, |
| "eval_runtime": 228.6261, |
| "eval_samples_per_second": 17.128, |
| "eval_steps_per_second": 8.564, |
| "eval_top_10_accuracy": 0.4101123595505618, |
| "eval_top_1_accuracy": 0.1317671092951992, |
| "eval_top_5_accuracy": 0.31128702757916243, |
| "step": 16076 |
| }, |
| { |
| "epoch": 9.000066489361702, |
| "grad_norm": 58.01615905761719, |
| "learning_rate": 2.2520996640537515e-05, |
| "loss": 18.0109, |
| "step": 16100 |
| }, |
| { |
| "epoch": 9.00034644456887, |
| "grad_norm": 55.21884536743164, |
| "learning_rate": 2.266097424412094e-05, |
| "loss": 16.6007, |
| "step": 16200 |
| }, |
| { |
| "epoch": 9.000626399776035, |
| "grad_norm": 55.47819137573242, |
| "learning_rate": 2.280095184770437e-05, |
| "loss": 16.6149, |
| "step": 16300 |
| }, |
| { |
| "epoch": 9.000906354983202, |
| "grad_norm": 53.711814880371094, |
| "learning_rate": 2.2940929451287796e-05, |
| "loss": 16.6412, |
| "step": 16400 |
| }, |
| { |
| "epoch": 9.00118631019037, |
| "grad_norm": 54.73662185668945, |
| "learning_rate": 2.308090705487122e-05, |
| "loss": 16.2739, |
| "step": 16500 |
| }, |
| { |
| "epoch": 9.001466265397536, |
| "grad_norm": 58.08010482788086, |
| "learning_rate": 2.3220884658454647e-05, |
| "loss": 16.38, |
| "step": 16600 |
| }, |
| { |
| "epoch": 9.001746220604703, |
| "grad_norm": 55.03322982788086, |
| "learning_rate": 2.3360862262038073e-05, |
| "loss": 16.4484, |
| "step": 16700 |
| }, |
| { |
| "epoch": 9.00202617581187, |
| "grad_norm": 57.478553771972656, |
| "learning_rate": 2.3500839865621502e-05, |
| "loss": 16.333, |
| "step": 16800 |
| }, |
| { |
| "epoch": 9.002306131019036, |
| "grad_norm": 56.241336822509766, |
| "learning_rate": 2.3640817469204928e-05, |
| "loss": 16.2893, |
| "step": 16900 |
| }, |
| { |
| "epoch": 9.002586086226204, |
| "grad_norm": 54.41828536987305, |
| "learning_rate": 2.3780795072788357e-05, |
| "loss": 16.3389, |
| "step": 17000 |
| }, |
| { |
| "epoch": 9.002866041433371, |
| "grad_norm": 55.62673568725586, |
| "learning_rate": 2.392077267637178e-05, |
| "loss": 16.1732, |
| "step": 17100 |
| }, |
| { |
| "epoch": 9.003145996640537, |
| "grad_norm": 57.23749542236328, |
| "learning_rate": 2.406075027995521e-05, |
| "loss": 16.371, |
| "step": 17200 |
| }, |
| { |
| "epoch": 9.003425951847705, |
| "grad_norm": 54.90141677856445, |
| "learning_rate": 2.4200727883538635e-05, |
| "loss": 16.3029, |
| "step": 17300 |
| }, |
| { |
| "epoch": 9.00370590705487, |
| "grad_norm": 53.32387924194336, |
| "learning_rate": 2.434070548712206e-05, |
| "loss": 15.9532, |
| "step": 17400 |
| }, |
| { |
| "epoch": 9.003985862262038, |
| "grad_norm": 57.22005844116211, |
| "learning_rate": 2.448068309070549e-05, |
| "loss": 16.1949, |
| "step": 17500 |
| }, |
| { |
| "epoch": 9.004265817469205, |
| "grad_norm": 53.49818420410156, |
| "learning_rate": 2.4620660694288916e-05, |
| "loss": 16.0044, |
| "step": 17600 |
| }, |
| { |
| "epoch": 9.004545772676371, |
| "grad_norm": 56.9859619140625, |
| "learning_rate": 2.476063829787234e-05, |
| "loss": 15.9484, |
| "step": 17700 |
| }, |
| { |
| "epoch": 9.004825727883539, |
| "grad_norm": 53.9587516784668, |
| "learning_rate": 2.4900615901455767e-05, |
| "loss": 15.752, |
| "step": 17800 |
| }, |
| { |
| "epoch": 9.004999300111981, |
| "eval_accuracy": 0.16189989785495404, |
| "eval_f1": 0.10126287636515371, |
| "eval_loss": 5.050577163696289, |
| "eval_precision": 0.0953715780728942, |
| "eval_recall": 0.16189989785495404, |
| "eval_runtime": 231.5088, |
| "eval_samples_per_second": 16.915, |
| "eval_steps_per_second": 8.458, |
| "eval_top_10_accuracy": 0.4846782431052094, |
| "eval_top_1_accuracy": 0.16215526046986722, |
| "eval_top_5_accuracy": 0.359805924412666, |
| "step": 17862 |
| }, |
| { |
| "epoch": 10.000104983202688, |
| "grad_norm": 56.3593635559082, |
| "learning_rate": 2.503919372900336e-05, |
| "loss": 15.1907, |
| "step": 17900 |
| }, |
| { |
| "epoch": 10.000384938409855, |
| "grad_norm": 53.651336669921875, |
| "learning_rate": 2.5179171332586787e-05, |
| "loss": 13.7771, |
| "step": 18000 |
| }, |
| { |
| "epoch": 10.00066489361702, |
| "grad_norm": 58.42094421386719, |
| "learning_rate": 2.5319148936170213e-05, |
| "loss": 13.9784, |
| "step": 18100 |
| }, |
| { |
| "epoch": 10.000944848824188, |
| "grad_norm": 55.46380615234375, |
| "learning_rate": 2.5459126539753642e-05, |
| "loss": 13.5654, |
| "step": 18200 |
| }, |
| { |
| "epoch": 10.001224804031356, |
| "grad_norm": 58.18601608276367, |
| "learning_rate": 2.5599104143337065e-05, |
| "loss": 13.6288, |
| "step": 18300 |
| }, |
| { |
| "epoch": 10.001504759238522, |
| "grad_norm": 58.32529067993164, |
| "learning_rate": 2.5739081746920497e-05, |
| "loss": 13.7221, |
| "step": 18400 |
| }, |
| { |
| "epoch": 10.001784714445689, |
| "grad_norm": 56.736839294433594, |
| "learning_rate": 2.587905935050392e-05, |
| "loss": 13.6705, |
| "step": 18500 |
| }, |
| { |
| "epoch": 10.002064669652855, |
| "grad_norm": 58.743064880371094, |
| "learning_rate": 2.6019036954087345e-05, |
| "loss": 13.9223, |
| "step": 18600 |
| }, |
| { |
| "epoch": 10.002344624860022, |
| "grad_norm": 55.71434783935547, |
| "learning_rate": 2.6159014557670775e-05, |
| "loss": 13.5166, |
| "step": 18700 |
| }, |
| { |
| "epoch": 10.00262458006719, |
| "grad_norm": 51.18729019165039, |
| "learning_rate": 2.62989921612542e-05, |
| "loss": 13.3252, |
| "step": 18800 |
| }, |
| { |
| "epoch": 10.002904535274356, |
| "grad_norm": 56.28409194946289, |
| "learning_rate": 2.643896976483763e-05, |
| "loss": 13.422, |
| "step": 18900 |
| }, |
| { |
| "epoch": 10.003184490481523, |
| "grad_norm": 56.53131866455078, |
| "learning_rate": 2.6578947368421052e-05, |
| "loss": 13.6352, |
| "step": 19000 |
| }, |
| { |
| "epoch": 10.00346444568869, |
| "grad_norm": 54.37139892578125, |
| "learning_rate": 2.6718924972004485e-05, |
| "loss": 13.7026, |
| "step": 19100 |
| }, |
| { |
| "epoch": 10.003744400895856, |
| "grad_norm": 54.172210693359375, |
| "learning_rate": 2.6858902575587907e-05, |
| "loss": 13.3519, |
| "step": 19200 |
| }, |
| { |
| "epoch": 10.004024356103024, |
| "grad_norm": 59.11737823486328, |
| "learning_rate": 2.6998880179171333e-05, |
| "loss": 13.4746, |
| "step": 19300 |
| }, |
| { |
| "epoch": 10.00430431131019, |
| "grad_norm": 60.201297760009766, |
| "learning_rate": 2.7138857782754762e-05, |
| "loss": 13.135, |
| "step": 19400 |
| }, |
| { |
| "epoch": 10.004584266517357, |
| "grad_norm": 62.821327209472656, |
| "learning_rate": 2.7278835386338188e-05, |
| "loss": 13.5812, |
| "step": 19500 |
| }, |
| { |
| "epoch": 10.004864221724524, |
| "grad_norm": 56.00190734863281, |
| "learning_rate": 2.7418812989921617e-05, |
| "loss": 13.1613, |
| "step": 19600 |
| }, |
| { |
| "epoch": 10.004998600223963, |
| "eval_accuracy": 0.1940755873340143, |
| "eval_f1": 0.13521619871280463, |
| "eval_loss": 4.658937454223633, |
| "eval_precision": 0.1293644795170035, |
| "eval_recall": 0.1940755873340143, |
| "eval_runtime": 218.1856, |
| "eval_samples_per_second": 17.948, |
| "eval_steps_per_second": 8.974, |
| "eval_top_10_accuracy": 0.556435137895812, |
| "eval_top_1_accuracy": 0.19330949948927478, |
| "eval_top_5_accuracy": 0.432073544433095, |
| "step": 19648 |
| }, |
| { |
| "epoch": 11.000143477043673, |
| "grad_norm": 50.50157928466797, |
| "learning_rate": 2.755879059350504e-05, |
| "loss": 12.0765, |
| "step": 19700 |
| }, |
| { |
| "epoch": 11.00042343225084, |
| "grad_norm": 60.919158935546875, |
| "learning_rate": 2.7698768197088465e-05, |
| "loss": 10.9856, |
| "step": 19800 |
| }, |
| { |
| "epoch": 11.000703387458007, |
| "grad_norm": 55.98445510864258, |
| "learning_rate": 2.7838745800671895e-05, |
| "loss": 11.1553, |
| "step": 19900 |
| }, |
| { |
| "epoch": 11.000983342665174, |
| "grad_norm": 53.11532211303711, |
| "learning_rate": 2.797872340425532e-05, |
| "loss": 10.8397, |
| "step": 20000 |
| }, |
| { |
| "epoch": 11.00126329787234, |
| "grad_norm": 58.633155822753906, |
| "learning_rate": 2.811870100783875e-05, |
| "loss": 11.1483, |
| "step": 20100 |
| }, |
| { |
| "epoch": 11.001543253079507, |
| "grad_norm": 55.615665435791016, |
| "learning_rate": 2.8258678611422172e-05, |
| "loss": 11.0123, |
| "step": 20200 |
| }, |
| { |
| "epoch": 11.001823208286675, |
| "grad_norm": 57.123165130615234, |
| "learning_rate": 2.8398656215005598e-05, |
| "loss": 10.5229, |
| "step": 20300 |
| }, |
| { |
| "epoch": 11.00210316349384, |
| "grad_norm": 51.48651885986328, |
| "learning_rate": 2.8538633818589027e-05, |
| "loss": 11.0409, |
| "step": 20400 |
| }, |
| { |
| "epoch": 11.002383118701008, |
| "grad_norm": 59.33769226074219, |
| "learning_rate": 2.8678611422172453e-05, |
| "loss": 11.1909, |
| "step": 20500 |
| }, |
| { |
| "epoch": 11.002663073908174, |
| "grad_norm": 56.830570220947266, |
| "learning_rate": 2.8818589025755882e-05, |
| "loss": 11.045, |
| "step": 20600 |
| }, |
| { |
| "epoch": 11.002943029115341, |
| "grad_norm": 59.105621337890625, |
| "learning_rate": 2.8958566629339308e-05, |
| "loss": 11.0254, |
| "step": 20700 |
| }, |
| { |
| "epoch": 11.003222984322509, |
| "grad_norm": 57.58058547973633, |
| "learning_rate": 2.9098544232922737e-05, |
| "loss": 11.1625, |
| "step": 20800 |
| }, |
| { |
| "epoch": 11.003502939529675, |
| "grad_norm": 60.10997009277344, |
| "learning_rate": 2.923852183650616e-05, |
| "loss": 10.6298, |
| "step": 20900 |
| }, |
| { |
| "epoch": 11.003782894736842, |
| "grad_norm": 54.05520248413086, |
| "learning_rate": 2.9378499440089585e-05, |
| "loss": 10.8325, |
| "step": 21000 |
| }, |
| { |
| "epoch": 11.00406284994401, |
| "grad_norm": 50.31863784790039, |
| "learning_rate": 2.9518477043673014e-05, |
| "loss": 10.8705, |
| "step": 21100 |
| }, |
| { |
| "epoch": 11.004342805151175, |
| "grad_norm": 58.63640594482422, |
| "learning_rate": 2.965845464725644e-05, |
| "loss": 10.7095, |
| "step": 21200 |
| }, |
| { |
| "epoch": 11.004622760358343, |
| "grad_norm": 56.01993179321289, |
| "learning_rate": 2.979703247480403e-05, |
| "loss": 10.4509, |
| "step": 21300 |
| }, |
| { |
| "epoch": 11.00490271556551, |
| "grad_norm": 61.992340087890625, |
| "learning_rate": 2.9937010078387464e-05, |
| "loss": 10.1883, |
| "step": 21400 |
| }, |
| { |
| "epoch": 11.005000699888019, |
| "eval_accuracy": 0.22625127681307455, |
| "eval_f1": 0.16842648729319404, |
| "eval_loss": 4.3067474365234375, |
| "eval_precision": 0.16332781901519114, |
| "eval_recall": 0.22625127681307455, |
| "eval_runtime": 226.1299, |
| "eval_samples_per_second": 17.317, |
| "eval_steps_per_second": 8.659, |
| "eval_top_10_accuracy": 0.6138917262512769, |
| "eval_top_1_accuracy": 0.22625127681307455, |
| "eval_top_5_accuracy": 0.4946373850868233, |
| "step": 21435 |
| }, |
| { |
| "epoch": 12.00018197088466, |
| "grad_norm": 44.793460845947266, |
| "learning_rate": 3.0076987681970886e-05, |
| "loss": 9.2143, |
| "step": 21500 |
| }, |
| { |
| "epoch": 12.000461926091825, |
| "grad_norm": 53.28424072265625, |
| "learning_rate": 3.0216965285554312e-05, |
| "loss": 8.0788, |
| "step": 21600 |
| }, |
| { |
| "epoch": 12.000741881298993, |
| "grad_norm": 56.62223434448242, |
| "learning_rate": 3.035694288913774e-05, |
| "loss": 8.3003, |
| "step": 21700 |
| }, |
| { |
| "epoch": 12.001021836506158, |
| "grad_norm": 50.635013580322266, |
| "learning_rate": 3.0496920492721163e-05, |
| "loss": 8.0193, |
| "step": 21800 |
| }, |
| { |
| "epoch": 12.001301791713326, |
| "grad_norm": 46.23341751098633, |
| "learning_rate": 3.0636898096304596e-05, |
| "loss": 8.2961, |
| "step": 21900 |
| }, |
| { |
| "epoch": 12.001581746920493, |
| "grad_norm": 61.92805862426758, |
| "learning_rate": 3.077687569988802e-05, |
| "loss": 8.3955, |
| "step": 22000 |
| }, |
| { |
| "epoch": 12.001861702127659, |
| "grad_norm": 59.257286071777344, |
| "learning_rate": 3.091685330347144e-05, |
| "loss": 8.398, |
| "step": 22100 |
| }, |
| { |
| "epoch": 12.002141657334827, |
| "grad_norm": 58.87281799316406, |
| "learning_rate": 3.1056830907054873e-05, |
| "loss": 8.2248, |
| "step": 22200 |
| }, |
| { |
| "epoch": 12.002421612541994, |
| "grad_norm": 60.63299560546875, |
| "learning_rate": 3.11968085106383e-05, |
| "loss": 8.4425, |
| "step": 22300 |
| }, |
| { |
| "epoch": 12.00270156774916, |
| "grad_norm": 60.67166519165039, |
| "learning_rate": 3.1336786114221725e-05, |
| "loss": 8.2765, |
| "step": 22400 |
| }, |
| { |
| "epoch": 12.002981522956327, |
| "grad_norm": 53.16400146484375, |
| "learning_rate": 3.147676371780515e-05, |
| "loss": 8.1763, |
| "step": 22500 |
| }, |
| { |
| "epoch": 12.003261478163493, |
| "grad_norm": 56.289981842041016, |
| "learning_rate": 3.161674132138858e-05, |
| "loss": 8.4768, |
| "step": 22600 |
| }, |
| { |
| "epoch": 12.00354143337066, |
| "grad_norm": 59.1424560546875, |
| "learning_rate": 3.175671892497201e-05, |
| "loss": 8.1219, |
| "step": 22700 |
| }, |
| { |
| "epoch": 12.003821388577828, |
| "grad_norm": 65.8774642944336, |
| "learning_rate": 3.189669652855543e-05, |
| "loss": 8.3595, |
| "step": 22800 |
| }, |
| { |
| "epoch": 12.004101343784994, |
| "grad_norm": 62.18092727661133, |
| "learning_rate": 3.203667413213886e-05, |
| "loss": 8.351, |
| "step": 22900 |
| }, |
| { |
| "epoch": 12.004381298992161, |
| "grad_norm": 58.203758239746094, |
| "learning_rate": 3.217665173572229e-05, |
| "loss": 8.0542, |
| "step": 23000 |
| }, |
| { |
| "epoch": 12.004661254199329, |
| "grad_norm": 57.017539978027344, |
| "learning_rate": 3.231662933930571e-05, |
| "loss": 8.0881, |
| "step": 23100 |
| }, |
| { |
| "epoch": 12.004941209406494, |
| "grad_norm": 60.21388244628906, |
| "learning_rate": 3.245660694288914e-05, |
| "loss": 8.1852, |
| "step": 23200 |
| }, |
| { |
| "epoch": 12.005, |
| "eval_accuracy": 0.24719101123595505, |
| "eval_f1": 0.19268227724832526, |
| "eval_loss": 4.012407302856445, |
| "eval_precision": 0.18863082228029204, |
| "eval_recall": 0.24719101123595505, |
| "eval_runtime": 226.6718, |
| "eval_samples_per_second": 17.276, |
| "eval_steps_per_second": 8.638, |
| "eval_top_10_accuracy": 0.6527068437180796, |
| "eval_top_1_accuracy": 0.24744637385086823, |
| "eval_top_5_accuracy": 0.5314096016343207, |
| "step": 23221 |
| }, |
| { |
| "epoch": 13.000220464725643, |
| "grad_norm": 40.455081939697266, |
| "learning_rate": 3.2596584546472564e-05, |
| "loss": 6.7682, |
| "step": 23300 |
| }, |
| { |
| "epoch": 13.000500419932811, |
| "grad_norm": 41.39524841308594, |
| "learning_rate": 3.2736562150056e-05, |
| "loss": 5.8189, |
| "step": 23400 |
| }, |
| { |
| "epoch": 13.000780375139978, |
| "grad_norm": 58.26043701171875, |
| "learning_rate": 3.2876539753639416e-05, |
| "loss": 6.1814, |
| "step": 23500 |
| }, |
| { |
| "epoch": 13.001060330347144, |
| "grad_norm": 50.91895294189453, |
| "learning_rate": 3.301511758118701e-05, |
| "loss": 6.0026, |
| "step": 23600 |
| }, |
| { |
| "epoch": 13.001340285554312, |
| "grad_norm": 63.752769470214844, |
| "learning_rate": 3.315509518477044e-05, |
| "loss": 5.9956, |
| "step": 23700 |
| }, |
| { |
| "epoch": 13.001620240761477, |
| "grad_norm": 51.161293029785156, |
| "learning_rate": 3.329507278835387e-05, |
| "loss": 5.9912, |
| "step": 23800 |
| }, |
| { |
| "epoch": 13.001900195968645, |
| "grad_norm": 48.86064147949219, |
| "learning_rate": 3.343505039193729e-05, |
| "loss": 5.9778, |
| "step": 23900 |
| }, |
| { |
| "epoch": 13.002180151175812, |
| "grad_norm": 57.04310607910156, |
| "learning_rate": 3.357502799552072e-05, |
| "loss": 6.0001, |
| "step": 24000 |
| }, |
| { |
| "epoch": 13.002460106382978, |
| "grad_norm": 46.83991241455078, |
| "learning_rate": 3.3715005599104146e-05, |
| "loss": 5.8923, |
| "step": 24100 |
| }, |
| { |
| "epoch": 13.002740061590146, |
| "grad_norm": 59.89580154418945, |
| "learning_rate": 3.385498320268757e-05, |
| "loss": 6.0239, |
| "step": 24200 |
| }, |
| { |
| "epoch": 13.003020016797313, |
| "grad_norm": 58.387611389160156, |
| "learning_rate": 3.3994960806271e-05, |
| "loss": 6.1917, |
| "step": 24300 |
| }, |
| { |
| "epoch": 13.003299972004479, |
| "grad_norm": 59.23344802856445, |
| "learning_rate": 3.413493840985442e-05, |
| "loss": 5.6954, |
| "step": 24400 |
| }, |
| { |
| "epoch": 13.003579927211646, |
| "grad_norm": 63.508567810058594, |
| "learning_rate": 3.427491601343785e-05, |
| "loss": 5.912, |
| "step": 24500 |
| }, |
| { |
| "epoch": 13.003859882418814, |
| "grad_norm": 58.91965103149414, |
| "learning_rate": 3.4414893617021275e-05, |
| "loss": 6.0244, |
| "step": 24600 |
| }, |
| { |
| "epoch": 13.00413983762598, |
| "grad_norm": 57.9660530090332, |
| "learning_rate": 3.455487122060471e-05, |
| "loss": 5.9531, |
| "step": 24700 |
| }, |
| { |
| "epoch": 13.004419792833147, |
| "grad_norm": 51.39337921142578, |
| "learning_rate": 3.469484882418813e-05, |
| "loss": 5.8885, |
| "step": 24800 |
| }, |
| { |
| "epoch": 13.004699748040313, |
| "grad_norm": 43.0623779296875, |
| "learning_rate": 3.483482642777156e-05, |
| "loss": 5.954, |
| "step": 24900 |
| }, |
| { |
| "epoch": 13.00497970324748, |
| "grad_norm": 62.9230842590332, |
| "learning_rate": 3.4974804031354985e-05, |
| "loss": 5.88, |
| "step": 25000 |
| }, |
| { |
| "epoch": 13.004999300111981, |
| "eval_accuracy": 0.26506639427987744, |
| "eval_f1": 0.21861788640092267, |
| "eval_loss": 3.7566869258880615, |
| "eval_precision": 0.21875192535304894, |
| "eval_recall": 0.26506639427987744, |
| "eval_runtime": 227.0931, |
| "eval_samples_per_second": 17.244, |
| "eval_steps_per_second": 8.622, |
| "eval_top_10_accuracy": 0.6884576098059244, |
| "eval_top_1_accuracy": 0.26506639427987744, |
| "eval_top_5_accuracy": 0.5720122574055159, |
| "step": 25007 |
| }, |
| { |
| "epoch": 14.00025895856663, |
| "grad_norm": 33.66314697265625, |
| "learning_rate": 3.511478163493841e-05, |
| "loss": 4.0538, |
| "step": 25100 |
| }, |
| { |
| "epoch": 14.000538913773797, |
| "grad_norm": 51.570316314697266, |
| "learning_rate": 3.5254759238521836e-05, |
| "loss": 3.9683, |
| "step": 25200 |
| }, |
| { |
| "epoch": 14.000818868980963, |
| "grad_norm": 39.41959762573242, |
| "learning_rate": 3.539473684210526e-05, |
| "loss": 4.1115, |
| "step": 25300 |
| }, |
| { |
| "epoch": 14.00109882418813, |
| "grad_norm": 46.20742416381836, |
| "learning_rate": 3.5534714445688695e-05, |
| "loss": 4.2485, |
| "step": 25400 |
| }, |
| { |
| "epoch": 14.001378779395298, |
| "grad_norm": 32.3474235534668, |
| "learning_rate": 3.567469204927212e-05, |
| "loss": 4.0841, |
| "step": 25500 |
| }, |
| { |
| "epoch": 14.001658734602463, |
| "grad_norm": 50.43800735473633, |
| "learning_rate": 3.581326987681971e-05, |
| "loss": 3.9711, |
| "step": 25600 |
| }, |
| { |
| "epoch": 14.00193868980963, |
| "grad_norm": 42.714744567871094, |
| "learning_rate": 3.5953247480403134e-05, |
| "loss": 4.0905, |
| "step": 25700 |
| }, |
| { |
| "epoch": 14.002218645016796, |
| "grad_norm": 50.72501754760742, |
| "learning_rate": 3.6093225083986566e-05, |
| "loss": 4.2036, |
| "step": 25800 |
| }, |
| { |
| "epoch": 14.002498600223964, |
| "grad_norm": 44.00167465209961, |
| "learning_rate": 3.623320268756999e-05, |
| "loss": 3.9163, |
| "step": 25900 |
| }, |
| { |
| "epoch": 14.002778555431131, |
| "grad_norm": 36.54977798461914, |
| "learning_rate": 3.637318029115342e-05, |
| "loss": 4.1644, |
| "step": 26000 |
| }, |
| { |
| "epoch": 14.003058510638297, |
| "grad_norm": 31.148479461669922, |
| "learning_rate": 3.6513157894736844e-05, |
| "loss": 4.1178, |
| "step": 26100 |
| }, |
| { |
| "epoch": 14.003338465845465, |
| "grad_norm": 50.2994384765625, |
| "learning_rate": 3.665313549832027e-05, |
| "loss": 3.9082, |
| "step": 26200 |
| }, |
| { |
| "epoch": 14.003618421052632, |
| "grad_norm": 53.5435905456543, |
| "learning_rate": 3.6793113101903695e-05, |
| "loss": 4.2392, |
| "step": 26300 |
| }, |
| { |
| "epoch": 14.003898376259798, |
| "grad_norm": 32.26350402832031, |
| "learning_rate": 3.693309070548712e-05, |
| "loss": 4.026, |
| "step": 26400 |
| }, |
| { |
| "epoch": 14.004178331466965, |
| "grad_norm": 51.61219024658203, |
| "learning_rate": 3.7073068309070554e-05, |
| "loss": 4.1701, |
| "step": 26500 |
| }, |
| { |
| "epoch": 14.004458286674133, |
| "grad_norm": 60.40871810913086, |
| "learning_rate": 3.721304591265398e-05, |
| "loss": 4.1327, |
| "step": 26600 |
| }, |
| { |
| "epoch": 14.004738241881299, |
| "grad_norm": 55.58240509033203, |
| "learning_rate": 3.7353023516237405e-05, |
| "loss": 4.5173, |
| "step": 26700 |
| }, |
| { |
| "epoch": 14.004998600223963, |
| "eval_accuracy": 0.2763023493360572, |
| "eval_f1": 0.2372876851202756, |
| "eval_loss": 3.6064484119415283, |
| "eval_precision": 0.24398663297186698, |
| "eval_recall": 0.2763023493360572, |
| "eval_runtime": 223.8625, |
| "eval_samples_per_second": 17.493, |
| "eval_steps_per_second": 8.746, |
| "eval_top_10_accuracy": 0.7124616956077631, |
| "eval_top_1_accuracy": 0.2763023493360572, |
| "eval_top_5_accuracy": 0.5926966292134831, |
| "step": 26793 |
| }, |
| { |
| "epoch": 15.000017497200448, |
| "grad_norm": 46.20799255371094, |
| "learning_rate": 3.749300111982083e-05, |
| "loss": 4.0528, |
| "step": 26800 |
| }, |
| { |
| "epoch": 15.000297452407615, |
| "grad_norm": 52.91650390625, |
| "learning_rate": 3.763297872340426e-05, |
| "loss": 2.4598, |
| "step": 26900 |
| }, |
| { |
| "epoch": 15.00057740761478, |
| "grad_norm": 29.833009719848633, |
| "learning_rate": 3.777295632698768e-05, |
| "loss": 2.5363, |
| "step": 27000 |
| }, |
| { |
| "epoch": 15.000857362821948, |
| "grad_norm": 42.154937744140625, |
| "learning_rate": 3.791293393057111e-05, |
| "loss": 2.4495, |
| "step": 27100 |
| }, |
| { |
| "epoch": 15.001137318029116, |
| "grad_norm": 40.74216079711914, |
| "learning_rate": 3.805291153415454e-05, |
| "loss": 2.5793, |
| "step": 27200 |
| }, |
| { |
| "epoch": 15.001417273236282, |
| "grad_norm": 41.93460464477539, |
| "learning_rate": 3.819288913773797e-05, |
| "loss": 2.6526, |
| "step": 27300 |
| }, |
| { |
| "epoch": 15.001697228443449, |
| "grad_norm": 29.095109939575195, |
| "learning_rate": 3.8332866741321386e-05, |
| "loss": 2.7321, |
| "step": 27400 |
| }, |
| { |
| "epoch": 15.001977183650617, |
| "grad_norm": 46.61332702636719, |
| "learning_rate": 3.847284434490482e-05, |
| "loss": 2.8783, |
| "step": 27500 |
| }, |
| { |
| "epoch": 15.002257138857782, |
| "grad_norm": 47.55717086791992, |
| "learning_rate": 3.8612821948488244e-05, |
| "loss": 3.0291, |
| "step": 27600 |
| }, |
| { |
| "epoch": 15.00253709406495, |
| "grad_norm": 35.126800537109375, |
| "learning_rate": 3.875279955207167e-05, |
| "loss": 2.8221, |
| "step": 27700 |
| }, |
| { |
| "epoch": 15.002817049272117, |
| "grad_norm": 33.11594772338867, |
| "learning_rate": 3.8891377379619264e-05, |
| "loss": 2.8732, |
| "step": 27800 |
| }, |
| { |
| "epoch": 15.003097004479283, |
| "grad_norm": 39.2805290222168, |
| "learning_rate": 3.903135498320269e-05, |
| "loss": 2.6688, |
| "step": 27900 |
| }, |
| { |
| "epoch": 15.00337695968645, |
| "grad_norm": 47.020259857177734, |
| "learning_rate": 3.9171332586786116e-05, |
| "loss": 2.8454, |
| "step": 28000 |
| }, |
| { |
| "epoch": 15.003656914893616, |
| "grad_norm": 28.50092124938965, |
| "learning_rate": 3.931131019036954e-05, |
| "loss": 2.9415, |
| "step": 28100 |
| }, |
| { |
| "epoch": 15.003936870100784, |
| "grad_norm": 51.885154724121094, |
| "learning_rate": 3.945128779395297e-05, |
| "loss": 2.94, |
| "step": 28200 |
| }, |
| { |
| "epoch": 15.004216825307951, |
| "grad_norm": 37.81270217895508, |
| "learning_rate": 3.95912653975364e-05, |
| "loss": 2.8525, |
| "step": 28300 |
| }, |
| { |
| "epoch": 15.004496780515117, |
| "grad_norm": 38.692378997802734, |
| "learning_rate": 3.973124300111982e-05, |
| "loss": 2.9696, |
| "step": 28400 |
| }, |
| { |
| "epoch": 15.004776735722285, |
| "grad_norm": 52.6309928894043, |
| "learning_rate": 3.987122060470325e-05, |
| "loss": 2.851, |
| "step": 28500 |
| }, |
| { |
| "epoch": 15.005000699888019, |
| "eval_accuracy": 0.27783452502553624, |
| "eval_f1": 0.2434374171463338, |
| "eval_loss": 3.5264151096343994, |
| "eval_precision": 0.25338318332444987, |
| "eval_recall": 0.27783452502553624, |
| "eval_runtime": 226.3306, |
| "eval_samples_per_second": 17.302, |
| "eval_steps_per_second": 8.651, |
| "eval_top_10_accuracy": 0.719611848825332, |
| "eval_top_1_accuracy": 0.2775791624106231, |
| "eval_top_5_accuracy": 0.608273748723187, |
| "step": 28580 |
| }, |
| { |
| "epoch": 16.00005599104143, |
| "grad_norm": 35.61888885498047, |
| "learning_rate": 4.001119820828668e-05, |
| "loss": 2.6647, |
| "step": 28600 |
| }, |
| { |
| "epoch": 16.0003359462486, |
| "grad_norm": 33.327659606933594, |
| "learning_rate": 4.0151175811870103e-05, |
| "loss": 1.6989, |
| "step": 28700 |
| }, |
| { |
| "epoch": 16.000615901455767, |
| "grad_norm": 18.319982528686523, |
| "learning_rate": 4.029115341545353e-05, |
| "loss": 1.6956, |
| "step": 28800 |
| }, |
| { |
| "epoch": 16.000895856662932, |
| "grad_norm": 25.141307830810547, |
| "learning_rate": 4.0431131019036955e-05, |
| "loss": 1.715, |
| "step": 28900 |
| }, |
| { |
| "epoch": 16.0011758118701, |
| "grad_norm": 43.47077560424805, |
| "learning_rate": 4.057110862262039e-05, |
| "loss": 2.0691, |
| "step": 29000 |
| }, |
| { |
| "epoch": 16.001455767077267, |
| "grad_norm": 16.283327102661133, |
| "learning_rate": 4.071108622620381e-05, |
| "loss": 1.8248, |
| "step": 29100 |
| }, |
| { |
| "epoch": 16.001735722284433, |
| "grad_norm": 35.769107818603516, |
| "learning_rate": 4.085106382978723e-05, |
| "loss": 2.0076, |
| "step": 29200 |
| }, |
| { |
| "epoch": 16.002015677491602, |
| "grad_norm": 49.17667770385742, |
| "learning_rate": 4.0991041433370665e-05, |
| "loss": 2.0197, |
| "step": 29300 |
| }, |
| { |
| "epoch": 16.002295632698768, |
| "grad_norm": 57.587589263916016, |
| "learning_rate": 4.113101903695409e-05, |
| "loss": 1.9761, |
| "step": 29400 |
| }, |
| { |
| "epoch": 16.002575587905934, |
| "grad_norm": 49.32463836669922, |
| "learning_rate": 4.127099664053752e-05, |
| "loss": 1.9658, |
| "step": 29500 |
| }, |
| { |
| "epoch": 16.002855543113103, |
| "grad_norm": 38.368656158447266, |
| "learning_rate": 4.141097424412094e-05, |
| "loss": 2.1054, |
| "step": 29600 |
| }, |
| { |
| "epoch": 16.00313549832027, |
| "grad_norm": 43.137908935546875, |
| "learning_rate": 4.155095184770437e-05, |
| "loss": 2.1054, |
| "step": 29700 |
| }, |
| { |
| "epoch": 16.003415453527435, |
| "grad_norm": 34.75947952270508, |
| "learning_rate": 4.1690929451287794e-05, |
| "loss": 2.1547, |
| "step": 29800 |
| }, |
| { |
| "epoch": 16.003695408734604, |
| "grad_norm": 55.931907653808594, |
| "learning_rate": 4.183090705487122e-05, |
| "loss": 2.0834, |
| "step": 29900 |
| }, |
| { |
| "epoch": 16.00397536394177, |
| "grad_norm": 22.097393035888672, |
| "learning_rate": 4.197088465845465e-05, |
| "loss": 1.9993, |
| "step": 30000 |
| }, |
| { |
| "epoch": 16.004255319148935, |
| "grad_norm": 36.39588928222656, |
| "learning_rate": 4.211086226203808e-05, |
| "loss": 2.208, |
| "step": 30100 |
| }, |
| { |
| "epoch": 16.004535274356105, |
| "grad_norm": 25.969524383544922, |
| "learning_rate": 4.2249440089585666e-05, |
| "loss": 2.025, |
| "step": 30200 |
| }, |
| { |
| "epoch": 16.00481522956327, |
| "grad_norm": 30.56204605102539, |
| "learning_rate": 4.238941769316909e-05, |
| "loss": 2.2493, |
| "step": 30300 |
| }, |
| { |
| "epoch": 16.005, |
| "eval_accuracy": 0.27247191011235955, |
| "eval_f1": 0.23831057018449936, |
| "eval_loss": 3.500147581100464, |
| "eval_precision": 0.24710587923462707, |
| "eval_recall": 0.27247191011235955, |
| "eval_runtime": 226.3004, |
| "eval_samples_per_second": 17.304, |
| "eval_steps_per_second": 8.652, |
| "eval_top_10_accuracy": 0.7183350357507661, |
| "eval_top_1_accuracy": 0.27247191011235955, |
| "eval_top_5_accuracy": 0.6085291113381001, |
| "step": 30366 |
| }, |
| { |
| "epoch": 17.000094484882418, |
| "grad_norm": 9.153155326843262, |
| "learning_rate": 4.2529395296752524e-05, |
| "loss": 1.7014, |
| "step": 30400 |
| }, |
| { |
| "epoch": 17.000374440089587, |
| "grad_norm": 23.366758346557617, |
| "learning_rate": 4.266937290033595e-05, |
| "loss": 1.2216, |
| "step": 30500 |
| }, |
| { |
| "epoch": 17.000654395296753, |
| "grad_norm": 41.90321350097656, |
| "learning_rate": 4.2809350503919376e-05, |
| "loss": 1.3224, |
| "step": 30600 |
| }, |
| { |
| "epoch": 17.00093435050392, |
| "grad_norm": 16.407875061035156, |
| "learning_rate": 4.29493281075028e-05, |
| "loss": 1.3376, |
| "step": 30700 |
| }, |
| { |
| "epoch": 17.001214305711088, |
| "grad_norm": 16.32012176513672, |
| "learning_rate": 4.3089305711086234e-05, |
| "loss": 1.2108, |
| "step": 30800 |
| }, |
| { |
| "epoch": 17.001494260918253, |
| "grad_norm": 38.008888244628906, |
| "learning_rate": 4.322928331466965e-05, |
| "loss": 1.5091, |
| "step": 30900 |
| }, |
| { |
| "epoch": 17.00177421612542, |
| "grad_norm": 20.518709182739258, |
| "learning_rate": 4.336926091825308e-05, |
| "loss": 1.3844, |
| "step": 31000 |
| }, |
| { |
| "epoch": 17.00205417133259, |
| "grad_norm": 46.348655700683594, |
| "learning_rate": 4.350923852183651e-05, |
| "loss": 1.6624, |
| "step": 31100 |
| }, |
| { |
| "epoch": 17.002334126539754, |
| "grad_norm": 32.697288513183594, |
| "learning_rate": 4.364921612541993e-05, |
| "loss": 1.4262, |
| "step": 31200 |
| }, |
| { |
| "epoch": 17.00261408174692, |
| "grad_norm": 27.621122360229492, |
| "learning_rate": 4.378919372900336e-05, |
| "loss": 1.6205, |
| "step": 31300 |
| }, |
| { |
| "epoch": 17.00289403695409, |
| "grad_norm": 32.8564338684082, |
| "learning_rate": 4.392917133258679e-05, |
| "loss": 1.7276, |
| "step": 31400 |
| }, |
| { |
| "epoch": 17.003173992161255, |
| "grad_norm": 32.846004486083984, |
| "learning_rate": 4.4069148936170215e-05, |
| "loss": 1.5652, |
| "step": 31500 |
| }, |
| { |
| "epoch": 17.00345394736842, |
| "grad_norm": 43.118534088134766, |
| "learning_rate": 4.420912653975364e-05, |
| "loss": 1.7943, |
| "step": 31600 |
| }, |
| { |
| "epoch": 17.003733902575586, |
| "grad_norm": 37.35938262939453, |
| "learning_rate": 4.4349104143337066e-05, |
| "loss": 1.5109, |
| "step": 31700 |
| }, |
| { |
| "epoch": 17.004013857782756, |
| "grad_norm": 16.283411026000977, |
| "learning_rate": 4.44890817469205e-05, |
| "loss": 1.7202, |
| "step": 31800 |
| }, |
| { |
| "epoch": 17.00429381298992, |
| "grad_norm": 57.66909408569336, |
| "learning_rate": 4.462905935050392e-05, |
| "loss": 2.0454, |
| "step": 31900 |
| }, |
| { |
| "epoch": 17.004573768197087, |
| "grad_norm": 30.193222045898438, |
| "learning_rate": 4.4769036954087344e-05, |
| "loss": 1.8866, |
| "step": 32000 |
| }, |
| { |
| "epoch": 17.004853723404256, |
| "grad_norm": 44.54677200317383, |
| "learning_rate": 4.4909014557670776e-05, |
| "loss": 1.9096, |
| "step": 32100 |
| }, |
| { |
| "epoch": 17.004999300111983, |
| "eval_accuracy": 0.2814096016343207, |
| "eval_f1": 0.243171253299741, |
| "eval_loss": 3.4932773113250732, |
| "eval_precision": 0.24868229744378875, |
| "eval_recall": 0.2814096016343207, |
| "eval_runtime": 226.1393, |
| "eval_samples_per_second": 17.317, |
| "eval_steps_per_second": 8.658, |
| "eval_top_10_accuracy": 0.7127170582226762, |
| "eval_top_1_accuracy": 0.2808988764044944, |
| "eval_top_5_accuracy": 0.6013789581205311, |
| "step": 32152 |
| }, |
| { |
| "epoch": 18.000132978723403, |
| "grad_norm": 37.95355224609375, |
| "learning_rate": 4.50489921612542e-05, |
| "loss": 1.5346, |
| "step": 32200 |
| }, |
| { |
| "epoch": 18.000412933930573, |
| "grad_norm": 15.054861068725586, |
| "learning_rate": 4.518896976483763e-05, |
| "loss": 0.9482, |
| "step": 32300 |
| }, |
| { |
| "epoch": 18.00069288913774, |
| "grad_norm": 40.022178649902344, |
| "learning_rate": 4.5328947368421054e-05, |
| "loss": 0.9161, |
| "step": 32400 |
| }, |
| { |
| "epoch": 18.000972844344904, |
| "grad_norm": 22.443218231201172, |
| "learning_rate": 4.5468924972004486e-05, |
| "loss": 1.1182, |
| "step": 32500 |
| }, |
| { |
| "epoch": 18.00125279955207, |
| "grad_norm": 51.938507080078125, |
| "learning_rate": 4.5608902575587905e-05, |
| "loss": 1.188, |
| "step": 32600 |
| }, |
| { |
| "epoch": 18.00153275475924, |
| "grad_norm": 17.9680118560791, |
| "learning_rate": 4.574888017917133e-05, |
| "loss": 1.1892, |
| "step": 32700 |
| }, |
| { |
| "epoch": 18.001812709966405, |
| "grad_norm": 16.573867797851562, |
| "learning_rate": 4.5888857782754764e-05, |
| "loss": 1.2303, |
| "step": 32800 |
| }, |
| { |
| "epoch": 18.00209266517357, |
| "grad_norm": 8.892263412475586, |
| "learning_rate": 4.602883538633819e-05, |
| "loss": 1.1585, |
| "step": 32900 |
| }, |
| { |
| "epoch": 18.00237262038074, |
| "grad_norm": 8.436017036437988, |
| "learning_rate": 4.6168812989921615e-05, |
| "loss": 1.2501, |
| "step": 33000 |
| }, |
| { |
| "epoch": 18.002652575587906, |
| "grad_norm": 52.22962188720703, |
| "learning_rate": 4.630879059350504e-05, |
| "loss": 1.3674, |
| "step": 33100 |
| }, |
| { |
| "epoch": 18.00293253079507, |
| "grad_norm": 9.320150375366211, |
| "learning_rate": 4.644876819708847e-05, |
| "loss": 1.3614, |
| "step": 33200 |
| }, |
| { |
| "epoch": 18.00321248600224, |
| "grad_norm": 11.164777755737305, |
| "learning_rate": 4.658874580067189e-05, |
| "loss": 1.5928, |
| "step": 33300 |
| }, |
| { |
| "epoch": 18.003492441209406, |
| "grad_norm": 39.596923828125, |
| "learning_rate": 4.672872340425532e-05, |
| "loss": 1.5899, |
| "step": 33400 |
| }, |
| { |
| "epoch": 18.003772396416572, |
| "grad_norm": 14.535404205322266, |
| "learning_rate": 4.686870100783875e-05, |
| "loss": 1.467, |
| "step": 33500 |
| }, |
| { |
| "epoch": 18.00405235162374, |
| "grad_norm": 20.554059982299805, |
| "learning_rate": 4.700867861142218e-05, |
| "loss": 1.5696, |
| "step": 33600 |
| }, |
| { |
| "epoch": 18.004332306830907, |
| "grad_norm": 52.87431335449219, |
| "learning_rate": 4.7147256438969764e-05, |
| "loss": 1.5463, |
| "step": 33700 |
| }, |
| { |
| "epoch": 18.004612262038073, |
| "grad_norm": 31.22138786315918, |
| "learning_rate": 4.728723404255319e-05, |
| "loss": 1.595, |
| "step": 33800 |
| }, |
| { |
| "epoch": 18.004892217245242, |
| "grad_norm": 50.54197311401367, |
| "learning_rate": 4.742721164613662e-05, |
| "loss": 1.7109, |
| "step": 33900 |
| }, |
| { |
| "epoch": 18.004998600223963, |
| "eval_accuracy": 0.27783452502553624, |
| "eval_f1": 0.24180912707733293, |
| "eval_loss": 3.517486810684204, |
| "eval_precision": 0.25168317098858467, |
| "eval_recall": 0.27783452502553624, |
| "eval_runtime": 227.7357, |
| "eval_samples_per_second": 17.195, |
| "eval_steps_per_second": 8.598, |
| "eval_top_10_accuracy": 0.7068437180796732, |
| "eval_top_1_accuracy": 0.2775791624106231, |
| "eval_top_5_accuracy": 0.5919305413687436, |
| "step": 33938 |
| }, |
| { |
| "epoch": 19.00017147256439, |
| "grad_norm": 46.21772003173828, |
| "learning_rate": 4.756718924972005e-05, |
| "loss": 1.2682, |
| "step": 34000 |
| }, |
| { |
| "epoch": 19.000451427771555, |
| "grad_norm": 4.63399600982666, |
| "learning_rate": 4.7707166853303474e-05, |
| "loss": 0.8455, |
| "step": 34100 |
| }, |
| { |
| "epoch": 19.000731382978724, |
| "grad_norm": 13.730817794799805, |
| "learning_rate": 4.78471444568869e-05, |
| "loss": 0.8606, |
| "step": 34200 |
| }, |
| { |
| "epoch": 19.00101133818589, |
| "grad_norm": 10.35786247253418, |
| "learning_rate": 4.798712206047033e-05, |
| "loss": 1.0884, |
| "step": 34300 |
| }, |
| { |
| "epoch": 19.001291293393056, |
| "grad_norm": 43.86582565307617, |
| "learning_rate": 4.812709966405375e-05, |
| "loss": 1.3047, |
| "step": 34400 |
| }, |
| { |
| "epoch": 19.001571248600225, |
| "grad_norm": 5.779742240905762, |
| "learning_rate": 4.826707726763718e-05, |
| "loss": 1.0745, |
| "step": 34500 |
| }, |
| { |
| "epoch": 19.00185120380739, |
| "grad_norm": 9.534099578857422, |
| "learning_rate": 4.840705487122061e-05, |
| "loss": 1.3069, |
| "step": 34600 |
| }, |
| { |
| "epoch": 19.002131159014557, |
| "grad_norm": 40.05844497680664, |
| "learning_rate": 4.8547032474804036e-05, |
| "loss": 0.9951, |
| "step": 34700 |
| }, |
| { |
| "epoch": 19.002411114221726, |
| "grad_norm": 35.630767822265625, |
| "learning_rate": 4.868701007838746e-05, |
| "loss": 1.1791, |
| "step": 34800 |
| }, |
| { |
| "epoch": 19.00269106942889, |
| "grad_norm": 23.145824432373047, |
| "learning_rate": 4.882698768197089e-05, |
| "loss": 1.1415, |
| "step": 34900 |
| }, |
| { |
| "epoch": 19.002971024636057, |
| "grad_norm": 5.915820598602295, |
| "learning_rate": 4.8966965285554314e-05, |
| "loss": 1.1801, |
| "step": 35000 |
| }, |
| { |
| "epoch": 19.003250979843227, |
| "grad_norm": 7.371408939361572, |
| "learning_rate": 4.910694288913774e-05, |
| "loss": 1.2856, |
| "step": 35100 |
| }, |
| { |
| "epoch": 19.003530935050392, |
| "grad_norm": 22.583425521850586, |
| "learning_rate": 4.9246920492721165e-05, |
| "loss": 1.3024, |
| "step": 35200 |
| }, |
| { |
| "epoch": 19.003810890257558, |
| "grad_norm": 33.190792083740234, |
| "learning_rate": 4.93868980963046e-05, |
| "loss": 1.2717, |
| "step": 35300 |
| }, |
| { |
| "epoch": 19.004090845464727, |
| "grad_norm": 37.30441665649414, |
| "learning_rate": 4.952687569988802e-05, |
| "loss": 1.4424, |
| "step": 35400 |
| }, |
| { |
| "epoch": 19.004370800671893, |
| "grad_norm": 25.534439086914062, |
| "learning_rate": 4.966685330347144e-05, |
| "loss": 1.5503, |
| "step": 35500 |
| }, |
| { |
| "epoch": 19.00465075587906, |
| "grad_norm": 17.24628257751465, |
| "learning_rate": 4.9806830907054875e-05, |
| "loss": 1.6463, |
| "step": 35600 |
| }, |
| { |
| "epoch": 19.004930711086224, |
| "grad_norm": 15.461166381835938, |
| "learning_rate": 4.99468085106383e-05, |
| "loss": 1.4479, |
| "step": 35700 |
| }, |
| { |
| "epoch": 19.00500069988802, |
| "eval_accuracy": 0.28345250255362614, |
| "eval_f1": 0.24778589478265647, |
| "eval_loss": 3.518174171447754, |
| "eval_precision": 0.25799831269060075, |
| "eval_recall": 0.28345250255362614, |
| "eval_runtime": 225.6187, |
| "eval_samples_per_second": 17.357, |
| "eval_steps_per_second": 8.678, |
| "eval_top_10_accuracy": 0.715270684371808, |
| "eval_top_1_accuracy": 0.28345250255362614, |
| "eval_top_5_accuracy": 0.5985699693564862, |
| "step": 35725 |
| }, |
| { |
| "epoch": 20.000209966405375, |
| "grad_norm": 14.617806434631348, |
| "learning_rate": 4.999035709841981e-05, |
| "loss": 1.0037, |
| "step": 35800 |
| }, |
| { |
| "epoch": 20.00048992161254, |
| "grad_norm": 31.072298049926758, |
| "learning_rate": 4.997480403135499e-05, |
| "loss": 0.9614, |
| "step": 35900 |
| }, |
| { |
| "epoch": 20.00076987681971, |
| "grad_norm": 17.07422637939453, |
| "learning_rate": 4.995925096429016e-05, |
| "loss": 0.9069, |
| "step": 36000 |
| }, |
| { |
| "epoch": 20.001049832026876, |
| "grad_norm": 7.916373252868652, |
| "learning_rate": 4.9943697897225334e-05, |
| "loss": 0.8297, |
| "step": 36100 |
| }, |
| { |
| "epoch": 20.00132978723404, |
| "grad_norm": 15.103555679321289, |
| "learning_rate": 4.992814483016051e-05, |
| "loss": 0.9506, |
| "step": 36200 |
| }, |
| { |
| "epoch": 20.00160974244121, |
| "grad_norm": 11.108366012573242, |
| "learning_rate": 4.9912591763095685e-05, |
| "loss": 1.2248, |
| "step": 36300 |
| }, |
| { |
| "epoch": 20.001889697648377, |
| "grad_norm": 39.42802047729492, |
| "learning_rate": 4.989703869603086e-05, |
| "loss": 1.3035, |
| "step": 36400 |
| }, |
| { |
| "epoch": 20.002169652855542, |
| "grad_norm": 11.509014129638672, |
| "learning_rate": 4.9881485628966035e-05, |
| "loss": 0.9473, |
| "step": 36500 |
| }, |
| { |
| "epoch": 20.00244960806271, |
| "grad_norm": 4.467772483825684, |
| "learning_rate": 4.986593256190121e-05, |
| "loss": 1.2713, |
| "step": 36600 |
| }, |
| { |
| "epoch": 20.002729563269877, |
| "grad_norm": 5.143234729766846, |
| "learning_rate": 4.9850379494836386e-05, |
| "loss": 1.1735, |
| "step": 36700 |
| }, |
| { |
| "epoch": 20.003009518477043, |
| "grad_norm": 3.546262502670288, |
| "learning_rate": 4.983482642777156e-05, |
| "loss": 1.2637, |
| "step": 36800 |
| }, |
| { |
| "epoch": 20.00328947368421, |
| "grad_norm": 42.26567840576172, |
| "learning_rate": 4.9819273360706736e-05, |
| "loss": 1.1088, |
| "step": 36900 |
| }, |
| { |
| "epoch": 20.003569428891378, |
| "grad_norm": 32.91080856323242, |
| "learning_rate": 4.980387582431256e-05, |
| "loss": 1.4778, |
| "step": 37000 |
| }, |
| { |
| "epoch": 20.003849384098544, |
| "grad_norm": 3.737553834915161, |
| "learning_rate": 4.978832275724773e-05, |
| "loss": 1.2936, |
| "step": 37100 |
| }, |
| { |
| "epoch": 20.00412933930571, |
| "grad_norm": 14.842428207397461, |
| "learning_rate": 4.977276969018291e-05, |
| "loss": 1.3193, |
| "step": 37200 |
| }, |
| { |
| "epoch": 20.00440929451288, |
| "grad_norm": 39.189353942871094, |
| "learning_rate": 4.975721662311808e-05, |
| "loss": 1.1632, |
| "step": 37300 |
| }, |
| { |
| "epoch": 20.004689249720045, |
| "grad_norm": 45.225460052490234, |
| "learning_rate": 4.974166355605326e-05, |
| "loss": 1.1817, |
| "step": 37400 |
| }, |
| { |
| "epoch": 20.00496920492721, |
| "grad_norm": 31.468297958374023, |
| "learning_rate": 4.972611048898843e-05, |
| "loss": 1.2076, |
| "step": 37500 |
| }, |
| { |
| "epoch": 20.005, |
| "eval_accuracy": 0.28421859039836567, |
| "eval_f1": 0.2493302460198215, |
| "eval_loss": 3.538327217102051, |
| "eval_precision": 0.2599856384394178, |
| "eval_recall": 0.28421859039836567, |
| "eval_runtime": 228.2995, |
| "eval_samples_per_second": 17.153, |
| "eval_steps_per_second": 8.576, |
| "eval_top_10_accuracy": 0.7070990806945863, |
| "eval_top_1_accuracy": 0.28421859039836567, |
| "eval_top_5_accuracy": 0.5980592441266599, |
| "step": 37511 |
| }, |
| { |
| "epoch": 21.00024846024636, |
| "grad_norm": 39.312591552734375, |
| "learning_rate": 4.9710557421923605e-05, |
| "loss": 0.8217, |
| "step": 37600 |
| }, |
| { |
| "epoch": 21.000528415453527, |
| "grad_norm": 40.887447357177734, |
| "learning_rate": 4.9695004354858784e-05, |
| "loss": 0.7459, |
| "step": 37700 |
| }, |
| { |
| "epoch": 21.000808370660696, |
| "grad_norm": 31.419702529907227, |
| "learning_rate": 4.9679451287793955e-05, |
| "loss": 0.9545, |
| "step": 37800 |
| }, |
| { |
| "epoch": 21.001088325867862, |
| "grad_norm": 32.3198127746582, |
| "learning_rate": 4.9663898220729134e-05, |
| "loss": 0.923, |
| "step": 37900 |
| }, |
| { |
| "epoch": 21.001368281075028, |
| "grad_norm": 5.489797592163086, |
| "learning_rate": 4.9648345153664306e-05, |
| "loss": 0.7275, |
| "step": 38000 |
| }, |
| { |
| "epoch": 21.001648236282193, |
| "grad_norm": 14.01230525970459, |
| "learning_rate": 4.963279208659948e-05, |
| "loss": 1.002, |
| "step": 38100 |
| }, |
| { |
| "epoch": 21.001928191489363, |
| "grad_norm": 32.620460510253906, |
| "learning_rate": 4.9617239019534657e-05, |
| "loss": 0.936, |
| "step": 38200 |
| }, |
| { |
| "epoch": 21.00220814669653, |
| "grad_norm": 39.93928909301758, |
| "learning_rate": 4.960168595246983e-05, |
| "loss": 0.9593, |
| "step": 38300 |
| }, |
| { |
| "epoch": 21.002488101903694, |
| "grad_norm": 40.96775817871094, |
| "learning_rate": 4.958613288540501e-05, |
| "loss": 0.8924, |
| "step": 38400 |
| }, |
| { |
| "epoch": 21.002768057110863, |
| "grad_norm": 24.79659652709961, |
| "learning_rate": 4.957057981834018e-05, |
| "loss": 1.0028, |
| "step": 38500 |
| }, |
| { |
| "epoch": 21.00304801231803, |
| "grad_norm": 11.152565956115723, |
| "learning_rate": 4.955502675127535e-05, |
| "loss": 1.1162, |
| "step": 38600 |
| }, |
| { |
| "epoch": 21.003327967525195, |
| "grad_norm": 37.34316635131836, |
| "learning_rate": 4.953947368421053e-05, |
| "loss": 1.1321, |
| "step": 38700 |
| }, |
| { |
| "epoch": 21.003607922732364, |
| "grad_norm": 5.6185526847839355, |
| "learning_rate": 4.95239206171457e-05, |
| "loss": 1.2126, |
| "step": 38800 |
| }, |
| { |
| "epoch": 21.00388787793953, |
| "grad_norm": 2.8256924152374268, |
| "learning_rate": 4.950836755008088e-05, |
| "loss": 0.9701, |
| "step": 38900 |
| }, |
| { |
| "epoch": 21.004167833146695, |
| "grad_norm": 23.547252655029297, |
| "learning_rate": 4.949281448301605e-05, |
| "loss": 0.9903, |
| "step": 39000 |
| }, |
| { |
| "epoch": 21.004447788353865, |
| "grad_norm": 46.85295486450195, |
| "learning_rate": 4.9477261415951224e-05, |
| "loss": 1.3198, |
| "step": 39100 |
| }, |
| { |
| "epoch": 21.00472774356103, |
| "grad_norm": 47.5512580871582, |
| "learning_rate": 4.9461863879557054e-05, |
| "loss": 1.4093, |
| "step": 39200 |
| }, |
| { |
| "epoch": 21.004999300111983, |
| "eval_accuracy": 0.27017364657814097, |
| "eval_f1": 0.23594364949777136, |
| "eval_loss": 3.6272244453430176, |
| "eval_precision": 0.2458368630291511, |
| "eval_recall": 0.27017364657814097, |
| "eval_runtime": 218.2444, |
| "eval_samples_per_second": 17.943, |
| "eval_steps_per_second": 8.972, |
| "eval_top_10_accuracy": 0.6917773237997957, |
| "eval_top_1_accuracy": 0.26991828396322776, |
| "eval_top_5_accuracy": 0.5898876404494382, |
| "step": 39297 |
| }, |
| { |
| "epoch": 22.000006998880178, |
| "grad_norm": 53.94511413574219, |
| "learning_rate": 4.9446310812492226e-05, |
| "loss": 1.3015, |
| "step": 39300 |
| }, |
| { |
| "epoch": 22.000286954087347, |
| "grad_norm": 6.432727336883545, |
| "learning_rate": 4.9430757745427405e-05, |
| "loss": 0.7464, |
| "step": 39400 |
| }, |
| { |
| "epoch": 22.000566909294513, |
| "grad_norm": 35.21794509887695, |
| "learning_rate": 4.941520467836258e-05, |
| "loss": 0.596, |
| "step": 39500 |
| }, |
| { |
| "epoch": 22.00084686450168, |
| "grad_norm": 41.56246566772461, |
| "learning_rate": 4.939965161129775e-05, |
| "loss": 0.7766, |
| "step": 39600 |
| }, |
| { |
| "epoch": 22.001126819708848, |
| "grad_norm": 2.1987500190734863, |
| "learning_rate": 4.938409854423293e-05, |
| "loss": 0.7652, |
| "step": 39700 |
| }, |
| { |
| "epoch": 22.001406774916013, |
| "grad_norm": 40.78813934326172, |
| "learning_rate": 4.93685454771681e-05, |
| "loss": 0.889, |
| "step": 39800 |
| }, |
| { |
| "epoch": 22.00168673012318, |
| "grad_norm": 57.54431915283203, |
| "learning_rate": 4.935299241010328e-05, |
| "loss": 0.9513, |
| "step": 39900 |
| }, |
| { |
| "epoch": 22.00196668533035, |
| "grad_norm": 22.238609313964844, |
| "learning_rate": 4.933743934303845e-05, |
| "loss": 0.9372, |
| "step": 40000 |
| }, |
| { |
| "epoch": 22.002246640537514, |
| "grad_norm": 10.823844909667969, |
| "learning_rate": 4.932188627597362e-05, |
| "loss": 0.7748, |
| "step": 40100 |
| }, |
| { |
| "epoch": 22.00252659574468, |
| "grad_norm": 70.64641571044922, |
| "learning_rate": 4.93063332089088e-05, |
| "loss": 0.9411, |
| "step": 40200 |
| }, |
| { |
| "epoch": 22.00280655095185, |
| "grad_norm": 31.34319305419922, |
| "learning_rate": 4.929078014184397e-05, |
| "loss": 1.337, |
| "step": 40300 |
| }, |
| { |
| "epoch": 22.003086506159015, |
| "grad_norm": 24.273786544799805, |
| "learning_rate": 4.927522707477915e-05, |
| "loss": 1.2788, |
| "step": 40400 |
| }, |
| { |
| "epoch": 22.00336646136618, |
| "grad_norm": 12.706284523010254, |
| "learning_rate": 4.925967400771432e-05, |
| "loss": 1.3021, |
| "step": 40500 |
| }, |
| { |
| "epoch": 22.00364641657335, |
| "grad_norm": 13.552841186523438, |
| "learning_rate": 4.9244120940649495e-05, |
| "loss": 0.8038, |
| "step": 40600 |
| }, |
| { |
| "epoch": 22.003926371780516, |
| "grad_norm": 55.14998245239258, |
| "learning_rate": 4.9228567873584674e-05, |
| "loss": 1.3207, |
| "step": 40700 |
| }, |
| { |
| "epoch": 22.00420632698768, |
| "grad_norm": 46.6702995300293, |
| "learning_rate": 4.9213014806519845e-05, |
| "loss": 1.3924, |
| "step": 40800 |
| }, |
| { |
| "epoch": 22.00448628219485, |
| "grad_norm": 4.238155841827393, |
| "learning_rate": 4.9197461739455024e-05, |
| "loss": 1.0947, |
| "step": 40900 |
| }, |
| { |
| "epoch": 22.004766237402016, |
| "grad_norm": 11.425141334533691, |
| "learning_rate": 4.9181908672390196e-05, |
| "loss": 1.3122, |
| "step": 41000 |
| }, |
| { |
| "epoch": 22.004998600223963, |
| "eval_accuracy": 0.2722165474974464, |
| "eval_f1": 0.23637877440977845, |
| "eval_loss": 3.679060459136963, |
| "eval_precision": 0.24200232503648353, |
| "eval_recall": 0.2722165474974464, |
| "eval_runtime": 226.0774, |
| "eval_samples_per_second": 17.322, |
| "eval_steps_per_second": 8.661, |
| "eval_top_10_accuracy": 0.695097037793667, |
| "eval_top_1_accuracy": 0.27247191011235955, |
| "eval_top_5_accuracy": 0.5822267620020429, |
| "step": 41083 |
| }, |
| { |
| "epoch": 23.000045492721163, |
| "grad_norm": 7.7932448387146, |
| "learning_rate": 4.916635560532537e-05, |
| "loss": 1.0572, |
| "step": 41100 |
| }, |
| { |
| "epoch": 23.000325447928333, |
| "grad_norm": 3.319699764251709, |
| "learning_rate": 4.9150802538260547e-05, |
| "loss": 0.669, |
| "step": 41200 |
| }, |
| { |
| "epoch": 23.0006054031355, |
| "grad_norm": 38.11969757080078, |
| "learning_rate": 4.913524947119572e-05, |
| "loss": 0.884, |
| "step": 41300 |
| }, |
| { |
| "epoch": 23.000885358342664, |
| "grad_norm": 37.383209228515625, |
| "learning_rate": 4.91196964041309e-05, |
| "loss": 0.6125, |
| "step": 41400 |
| }, |
| { |
| "epoch": 23.001165313549834, |
| "grad_norm": 21.38405418395996, |
| "learning_rate": 4.910429886773672e-05, |
| "loss": 0.987, |
| "step": 41500 |
| }, |
| { |
| "epoch": 23.001445268757, |
| "grad_norm": 44.750972747802734, |
| "learning_rate": 4.908874580067189e-05, |
| "loss": 0.8492, |
| "step": 41600 |
| }, |
| { |
| "epoch": 23.001725223964165, |
| "grad_norm": 27.360767364501953, |
| "learning_rate": 4.907319273360707e-05, |
| "loss": 0.8555, |
| "step": 41700 |
| }, |
| { |
| "epoch": 23.002005179171334, |
| "grad_norm": 34.80257797241211, |
| "learning_rate": 4.905763966654224e-05, |
| "loss": 0.9643, |
| "step": 41800 |
| }, |
| { |
| "epoch": 23.0022851343785, |
| "grad_norm": 34.58405685424805, |
| "learning_rate": 4.904208659947742e-05, |
| "loss": 0.748, |
| "step": 41900 |
| }, |
| { |
| "epoch": 23.002565089585666, |
| "grad_norm": 45.360355377197266, |
| "learning_rate": 4.9026533532412594e-05, |
| "loss": 1.115, |
| "step": 42000 |
| }, |
| { |
| "epoch": 23.00284504479283, |
| "grad_norm": 53.12576675415039, |
| "learning_rate": 4.9010980465347766e-05, |
| "loss": 1.0141, |
| "step": 42100 |
| }, |
| { |
| "epoch": 23.003125, |
| "grad_norm": 6.911433696746826, |
| "learning_rate": 4.8995427398282944e-05, |
| "loss": 1.0746, |
| "step": 42200 |
| }, |
| { |
| "epoch": 23.003404955207166, |
| "grad_norm": 2.8777873516082764, |
| "learning_rate": 4.8979874331218116e-05, |
| "loss": 1.088, |
| "step": 42300 |
| }, |
| { |
| "epoch": 23.003684910414332, |
| "grad_norm": 22.604394912719727, |
| "learning_rate": 4.8964321264153295e-05, |
| "loss": 1.1257, |
| "step": 42400 |
| }, |
| { |
| "epoch": 23.0039648656215, |
| "grad_norm": 15.631820678710938, |
| "learning_rate": 4.894876819708847e-05, |
| "loss": 1.1837, |
| "step": 42500 |
| }, |
| { |
| "epoch": 23.004244820828667, |
| "grad_norm": 55.372398376464844, |
| "learning_rate": 4.893321513002364e-05, |
| "loss": 1.2726, |
| "step": 42600 |
| }, |
| { |
| "epoch": 23.004524776035833, |
| "grad_norm": 33.215152740478516, |
| "learning_rate": 4.891766206295882e-05, |
| "loss": 1.2988, |
| "step": 42700 |
| }, |
| { |
| "epoch": 23.004804731243002, |
| "grad_norm": 3.4601495265960693, |
| "learning_rate": 4.890210899589399e-05, |
| "loss": 1.4774, |
| "step": 42800 |
| }, |
| { |
| "epoch": 23.00500069988802, |
| "eval_accuracy": 0.276046986721144, |
| "eval_f1": 0.24171610224981013, |
| "eval_loss": 3.7163209915161133, |
| "eval_precision": 0.25483022396965194, |
| "eval_recall": 0.276046986721144, |
| "eval_runtime": 223.2644, |
| "eval_samples_per_second": 17.54, |
| "eval_steps_per_second": 8.77, |
| "eval_top_10_accuracy": 0.6940755873340143, |
| "eval_top_1_accuracy": 0.27579162410623087, |
| "eval_top_5_accuracy": 0.5812053115423902, |
| "step": 42870 |
| }, |
| { |
| "epoch": 24.00008398656215, |
| "grad_norm": 6.2556962966918945, |
| "learning_rate": 4.888655592882917e-05, |
| "loss": 0.8567, |
| "step": 42900 |
| }, |
| { |
| "epoch": 24.00036394176932, |
| "grad_norm": 0.7903256416320801, |
| "learning_rate": 4.887100286176434e-05, |
| "loss": 0.7553, |
| "step": 43000 |
| }, |
| { |
| "epoch": 24.000643896976484, |
| "grad_norm": 2.442410945892334, |
| "learning_rate": 4.885544979469951e-05, |
| "loss": 0.7151, |
| "step": 43100 |
| }, |
| { |
| "epoch": 24.00092385218365, |
| "grad_norm": 0.9351925849914551, |
| "learning_rate": 4.883989672763469e-05, |
| "loss": 0.7546, |
| "step": 43200 |
| }, |
| { |
| "epoch": 24.001203807390816, |
| "grad_norm": 36.959136962890625, |
| "learning_rate": 4.882434366056986e-05, |
| "loss": 0.6823, |
| "step": 43300 |
| }, |
| { |
| "epoch": 24.001483762597985, |
| "grad_norm": 35.2205696105957, |
| "learning_rate": 4.880879059350504e-05, |
| "loss": 0.9348, |
| "step": 43400 |
| }, |
| { |
| "epoch": 24.00176371780515, |
| "grad_norm": 27.507863998413086, |
| "learning_rate": 4.8793393057110865e-05, |
| "loss": 0.6786, |
| "step": 43500 |
| }, |
| { |
| "epoch": 24.002043673012317, |
| "grad_norm": 4.061860084533691, |
| "learning_rate": 4.8777839990046037e-05, |
| "loss": 1.0083, |
| "step": 43600 |
| }, |
| { |
| "epoch": 24.002323628219486, |
| "grad_norm": 44.9168701171875, |
| "learning_rate": 4.8762286922981215e-05, |
| "loss": 0.9745, |
| "step": 43700 |
| }, |
| { |
| "epoch": 24.00260358342665, |
| "grad_norm": 1.4848639965057373, |
| "learning_rate": 4.874673385591639e-05, |
| "loss": 1.0088, |
| "step": 43800 |
| }, |
| { |
| "epoch": 24.002883538633817, |
| "grad_norm": 33.28787612915039, |
| "learning_rate": 4.8731180788851566e-05, |
| "loss": 0.7772, |
| "step": 43900 |
| }, |
| { |
| "epoch": 24.003163493840987, |
| "grad_norm": 46.953208923339844, |
| "learning_rate": 4.871562772178674e-05, |
| "loss": 1.189, |
| "step": 44000 |
| }, |
| { |
| "epoch": 24.003443449048152, |
| "grad_norm": 3.805981159210205, |
| "learning_rate": 4.870007465472191e-05, |
| "loss": 1.1122, |
| "step": 44100 |
| }, |
| { |
| "epoch": 24.003723404255318, |
| "grad_norm": 5.7398505210876465, |
| "learning_rate": 4.868452158765709e-05, |
| "loss": 0.9651, |
| "step": 44200 |
| }, |
| { |
| "epoch": 24.004003359462487, |
| "grad_norm": 42.19569778442383, |
| "learning_rate": 4.866896852059226e-05, |
| "loss": 1.1016, |
| "step": 44300 |
| }, |
| { |
| "epoch": 24.004283314669653, |
| "grad_norm": 32.88911437988281, |
| "learning_rate": 4.865341545352744e-05, |
| "loss": 0.821, |
| "step": 44400 |
| }, |
| { |
| "epoch": 24.00456326987682, |
| "grad_norm": 6.485898971557617, |
| "learning_rate": 4.863786238646261e-05, |
| "loss": 1.2985, |
| "step": 44500 |
| }, |
| { |
| "epoch": 24.004843225083988, |
| "grad_norm": 46.899261474609375, |
| "learning_rate": 4.862230931939778e-05, |
| "loss": 1.1338, |
| "step": 44600 |
| }, |
| { |
| "epoch": 24.005, |
| "eval_accuracy": 0.267364657814096, |
| "eval_f1": 0.23461128701357323, |
| "eval_loss": 3.7576823234558105, |
| "eval_precision": 0.2471144003255852, |
| "eval_recall": 0.267364657814096, |
| "eval_runtime": 228.6369, |
| "eval_samples_per_second": 17.128, |
| "eval_steps_per_second": 8.564, |
| "eval_top_10_accuracy": 0.6897344228804902, |
| "eval_top_1_accuracy": 0.2676200204290092, |
| "eval_top_5_accuracy": 0.5817160367722165, |
| "step": 44656 |
| }, |
| { |
| "epoch": 25.000122480403135, |
| "grad_norm": 29.33512306213379, |
| "learning_rate": 4.860675625233296e-05, |
| "loss": 0.884, |
| "step": 44700 |
| }, |
| { |
| "epoch": 25.0004024356103, |
| "grad_norm": 29.168378829956055, |
| "learning_rate": 4.859120318526813e-05, |
| "loss": 0.7048, |
| "step": 44800 |
| }, |
| { |
| "epoch": 25.00068239081747, |
| "grad_norm": 4.4006547927856445, |
| "learning_rate": 4.857565011820331e-05, |
| "loss": 0.5229, |
| "step": 44900 |
| }, |
| { |
| "epoch": 25.000962346024636, |
| "grad_norm": 0.7437628507614136, |
| "learning_rate": 4.8560097051138484e-05, |
| "loss": 0.6759, |
| "step": 45000 |
| }, |
| { |
| "epoch": 25.0012423012318, |
| "grad_norm": 1.8170311450958252, |
| "learning_rate": 4.854454398407366e-05, |
| "loss": 0.7961, |
| "step": 45100 |
| }, |
| { |
| "epoch": 25.00152225643897, |
| "grad_norm": 65.61835479736328, |
| "learning_rate": 4.8528990917008834e-05, |
| "loss": 0.7743, |
| "step": 45200 |
| }, |
| { |
| "epoch": 25.001802211646137, |
| "grad_norm": 38.23652267456055, |
| "learning_rate": 4.851343784994401e-05, |
| "loss": 1.0217, |
| "step": 45300 |
| }, |
| { |
| "epoch": 25.002082166853302, |
| "grad_norm": 6.596869468688965, |
| "learning_rate": 4.8497884782879185e-05, |
| "loss": 0.8333, |
| "step": 45400 |
| }, |
| { |
| "epoch": 25.00236212206047, |
| "grad_norm": 42.07466506958008, |
| "learning_rate": 4.8482331715814364e-05, |
| "loss": 0.7881, |
| "step": 45500 |
| }, |
| { |
| "epoch": 25.002642077267637, |
| "grad_norm": 2.7012946605682373, |
| "learning_rate": 4.8466778648749536e-05, |
| "loss": 0.9458, |
| "step": 45600 |
| }, |
| { |
| "epoch": 25.002922032474803, |
| "grad_norm": 30.443016052246094, |
| "learning_rate": 4.845138111235536e-05, |
| "loss": 0.9779, |
| "step": 45700 |
| }, |
| { |
| "epoch": 25.003201987681972, |
| "grad_norm": 10.28647518157959, |
| "learning_rate": 4.843582804529053e-05, |
| "loss": 1.0701, |
| "step": 45800 |
| }, |
| { |
| "epoch": 25.003481942889138, |
| "grad_norm": 41.59596252441406, |
| "learning_rate": 4.842027497822571e-05, |
| "loss": 1.08, |
| "step": 45900 |
| }, |
| { |
| "epoch": 25.003761898096304, |
| "grad_norm": 33.521087646484375, |
| "learning_rate": 4.840472191116088e-05, |
| "loss": 1.1218, |
| "step": 46000 |
| }, |
| { |
| "epoch": 25.004041853303473, |
| "grad_norm": 1.2339781522750854, |
| "learning_rate": 4.8389168844096054e-05, |
| "loss": 1.1475, |
| "step": 46100 |
| }, |
| { |
| "epoch": 25.00432180851064, |
| "grad_norm": 1.9241328239440918, |
| "learning_rate": 4.837361577703123e-05, |
| "loss": 1.1846, |
| "step": 46200 |
| }, |
| { |
| "epoch": 25.004601763717805, |
| "grad_norm": 37.55961608886719, |
| "learning_rate": 4.8358062709966404e-05, |
| "loss": 1.348, |
| "step": 46300 |
| }, |
| { |
| "epoch": 25.00488171892497, |
| "grad_norm": 3.4552130699157715, |
| "learning_rate": 4.834250964290158e-05, |
| "loss": 1.1446, |
| "step": 46400 |
| }, |
| { |
| "epoch": 25.004999300111983, |
| "eval_accuracy": 0.274514811031665, |
| "eval_f1": 0.24301366205430505, |
| "eval_loss": 3.76619553565979, |
| "eval_precision": 0.2533079046421743, |
| "eval_recall": 0.274514811031665, |
| "eval_runtime": 228.3549, |
| "eval_samples_per_second": 17.149, |
| "eval_steps_per_second": 8.574, |
| "eval_top_10_accuracy": 0.7073544433094995, |
| "eval_top_1_accuracy": 0.274514811031665, |
| "eval_top_5_accuracy": 0.5957609805924413, |
| "step": 46442 |
| }, |
| { |
| "epoch": 26.00016097424412, |
| "grad_norm": 0.36602893471717834, |
| "learning_rate": 4.8326956575836755e-05, |
| "loss": 0.652, |
| "step": 46500 |
| }, |
| { |
| "epoch": 26.000440929451287, |
| "grad_norm": 3.7436516284942627, |
| "learning_rate": 4.8311403508771927e-05, |
| "loss": 0.5429, |
| "step": 46600 |
| }, |
| { |
| "epoch": 26.000720884658456, |
| "grad_norm": 34.448280334472656, |
| "learning_rate": 4.8295850441707105e-05, |
| "loss": 0.6339, |
| "step": 46700 |
| }, |
| { |
| "epoch": 26.001000839865622, |
| "grad_norm": 1.2372946739196777, |
| "learning_rate": 4.828029737464228e-05, |
| "loss": 0.7041, |
| "step": 46800 |
| }, |
| { |
| "epoch": 26.001280795072788, |
| "grad_norm": 7.009436130523682, |
| "learning_rate": 4.8264744307577456e-05, |
| "loss": 0.6515, |
| "step": 46900 |
| }, |
| { |
| "epoch": 26.001560750279957, |
| "grad_norm": 26.225847244262695, |
| "learning_rate": 4.8249191240512635e-05, |
| "loss": 0.7411, |
| "step": 47000 |
| }, |
| { |
| "epoch": 26.001840705487123, |
| "grad_norm": 50.131893157958984, |
| "learning_rate": 4.8233638173447806e-05, |
| "loss": 0.9812, |
| "step": 47100 |
| }, |
| { |
| "epoch": 26.00212066069429, |
| "grad_norm": 31.176666259765625, |
| "learning_rate": 4.8218085106382985e-05, |
| "loss": 0.7793, |
| "step": 47200 |
| }, |
| { |
| "epoch": 26.002400615901454, |
| "grad_norm": 36.463321685791016, |
| "learning_rate": 4.820253203931816e-05, |
| "loss": 0.9363, |
| "step": 47300 |
| }, |
| { |
| "epoch": 26.002680571108623, |
| "grad_norm": 0.5882710218429565, |
| "learning_rate": 4.8186978972253336e-05, |
| "loss": 0.8734, |
| "step": 47400 |
| }, |
| { |
| "epoch": 26.00296052631579, |
| "grad_norm": 56.16053771972656, |
| "learning_rate": 4.817142590518851e-05, |
| "loss": 0.8313, |
| "step": 47500 |
| }, |
| { |
| "epoch": 26.003240481522955, |
| "grad_norm": 27.409542083740234, |
| "learning_rate": 4.815587283812368e-05, |
| "loss": 1.0755, |
| "step": 47600 |
| }, |
| { |
| "epoch": 26.003520436730124, |
| "grad_norm": 37.072975158691406, |
| "learning_rate": 4.814031977105886e-05, |
| "loss": 1.1273, |
| "step": 47700 |
| }, |
| { |
| "epoch": 26.00380039193729, |
| "grad_norm": 12.40834903717041, |
| "learning_rate": 4.8124922234664675e-05, |
| "loss": 1.0032, |
| "step": 47800 |
| }, |
| { |
| "epoch": 26.004080347144455, |
| "grad_norm": 33.848052978515625, |
| "learning_rate": 4.8109369167599854e-05, |
| "loss": 1.1193, |
| "step": 47900 |
| }, |
| { |
| "epoch": 26.004360302351625, |
| "grad_norm": 35.6231803894043, |
| "learning_rate": 4.8093816100535026e-05, |
| "loss": 1.1809, |
| "step": 48000 |
| }, |
| { |
| "epoch": 26.00464025755879, |
| "grad_norm": 53.72087860107422, |
| "learning_rate": 4.80782630334702e-05, |
| "loss": 1.2457, |
| "step": 48100 |
| }, |
| { |
| "epoch": 26.004920212765956, |
| "grad_norm": 27.564422607421875, |
| "learning_rate": 4.8062709966405376e-05, |
| "loss": 1.0876, |
| "step": 48200 |
| }, |
| { |
| "epoch": 26.004998600223963, |
| "eval_accuracy": 0.27860061287027577, |
| "eval_f1": 0.24350325505841336, |
| "eval_loss": 3.8689017295837402, |
| "eval_precision": 0.25253788318139697, |
| "eval_recall": 0.27860061287027577, |
| "eval_runtime": 228.9607, |
| "eval_samples_per_second": 17.103, |
| "eval_steps_per_second": 8.552, |
| "eval_top_10_accuracy": 0.6851378958120531, |
| "eval_top_1_accuracy": 0.278855975485189, |
| "eval_top_5_accuracy": 0.5781409601634321, |
| "step": 48228 |
| }, |
| { |
| "epoch": 27.000199468085107, |
| "grad_norm": 8.388084411621094, |
| "learning_rate": 4.804715689934055e-05, |
| "loss": 0.7608, |
| "step": 48300 |
| }, |
| { |
| "epoch": 27.000479423292273, |
| "grad_norm": 1.5461616516113281, |
| "learning_rate": 4.803160383227573e-05, |
| "loss": 0.7676, |
| "step": 48400 |
| }, |
| { |
| "epoch": 27.00075937849944, |
| "grad_norm": 2.8455636501312256, |
| "learning_rate": 4.80160507652109e-05, |
| "loss": 0.5519, |
| "step": 48500 |
| }, |
| { |
| "epoch": 27.001039333706608, |
| "grad_norm": 37.09521484375, |
| "learning_rate": 4.800049769814608e-05, |
| "loss": 0.738, |
| "step": 48600 |
| }, |
| { |
| "epoch": 27.001319288913773, |
| "grad_norm": 24.946674346923828, |
| "learning_rate": 4.798494463108125e-05, |
| "loss": 0.8532, |
| "step": 48700 |
| }, |
| { |
| "epoch": 27.00159924412094, |
| "grad_norm": 1.9327168464660645, |
| "learning_rate": 4.796939156401643e-05, |
| "loss": 1.0297, |
| "step": 48800 |
| }, |
| { |
| "epoch": 27.00187919932811, |
| "grad_norm": 37.39794921875, |
| "learning_rate": 4.79538384969516e-05, |
| "loss": 0.7483, |
| "step": 48900 |
| }, |
| { |
| "epoch": 27.002159154535274, |
| "grad_norm": 1.6338342428207397, |
| "learning_rate": 4.793828542988678e-05, |
| "loss": 0.7587, |
| "step": 49000 |
| }, |
| { |
| "epoch": 27.00243910974244, |
| "grad_norm": 13.684679985046387, |
| "learning_rate": 4.792273236282195e-05, |
| "loss": 0.7818, |
| "step": 49100 |
| }, |
| { |
| "epoch": 27.00271906494961, |
| "grad_norm": 31.797103881835938, |
| "learning_rate": 4.790717929575713e-05, |
| "loss": 0.8512, |
| "step": 49200 |
| }, |
| { |
| "epoch": 27.002999020156775, |
| "grad_norm": 41.46651840209961, |
| "learning_rate": 4.78916262286923e-05, |
| "loss": 1.3144, |
| "step": 49300 |
| }, |
| { |
| "epoch": 27.00327897536394, |
| "grad_norm": 6.321496963500977, |
| "learning_rate": 4.787607316162748e-05, |
| "loss": 0.9538, |
| "step": 49400 |
| }, |
| { |
| "epoch": 27.00355893057111, |
| "grad_norm": 0.416471391916275, |
| "learning_rate": 4.786052009456265e-05, |
| "loss": 1.2089, |
| "step": 49500 |
| }, |
| { |
| "epoch": 27.003838885778276, |
| "grad_norm": 31.824552536010742, |
| "learning_rate": 4.784496702749782e-05, |
| "loss": 1.0261, |
| "step": 49600 |
| }, |
| { |
| "epoch": 27.00411884098544, |
| "grad_norm": 51.06576156616211, |
| "learning_rate": 4.7829413960433e-05, |
| "loss": 1.1528, |
| "step": 49700 |
| }, |
| { |
| "epoch": 27.00439879619261, |
| "grad_norm": 20.590635299682617, |
| "learning_rate": 4.7813860893368174e-05, |
| "loss": 0.9639, |
| "step": 49800 |
| }, |
| { |
| "epoch": 27.004678751399776, |
| "grad_norm": 6.22679328918457, |
| "learning_rate": 4.779830782630335e-05, |
| "loss": 1.1261, |
| "step": 49900 |
| }, |
| { |
| "epoch": 27.004958706606942, |
| "grad_norm": 49.03010559082031, |
| "learning_rate": 4.7782754759238525e-05, |
| "loss": 1.0755, |
| "step": 50000 |
| }, |
| { |
| "epoch": 27.00500069988802, |
| "eval_accuracy": 0.2773237997957099, |
| "eval_f1": 0.24651992442655182, |
| "eval_loss": 3.8659050464630127, |
| "eval_precision": 0.2626311954133711, |
| "eval_recall": 0.2773237997957099, |
| "eval_runtime": 217.8846, |
| "eval_samples_per_second": 17.973, |
| "eval_steps_per_second": 8.986, |
| "eval_top_10_accuracy": 0.70097037793667, |
| "eval_top_1_accuracy": 0.2773237997957099, |
| "eval_top_5_accuracy": 0.5855464759959143, |
| "step": 50015 |
| }, |
| { |
| "epoch": 28.000237961926093, |
| "grad_norm": 0.523884117603302, |
| "learning_rate": 4.776735722284434e-05, |
| "loss": 0.6872, |
| "step": 50100 |
| }, |
| { |
| "epoch": 28.00051791713326, |
| "grad_norm": 23.129125595092773, |
| "learning_rate": 4.775180415577952e-05, |
| "loss": 0.5709, |
| "step": 50200 |
| }, |
| { |
| "epoch": 28.000797872340424, |
| "grad_norm": 33.83433532714844, |
| "learning_rate": 4.773625108871469e-05, |
| "loss": 0.7503, |
| "step": 50300 |
| }, |
| { |
| "epoch": 28.001077827547594, |
| "grad_norm": 0.6037617325782776, |
| "learning_rate": 4.772069802164987e-05, |
| "loss": 0.9381, |
| "step": 50400 |
| }, |
| { |
| "epoch": 28.00135778275476, |
| "grad_norm": 0.9508163332939148, |
| "learning_rate": 4.770514495458505e-05, |
| "loss": 0.7982, |
| "step": 50500 |
| }, |
| { |
| "epoch": 28.001637737961925, |
| "grad_norm": 5.3412251472473145, |
| "learning_rate": 4.768959188752022e-05, |
| "loss": 0.8581, |
| "step": 50600 |
| }, |
| { |
| "epoch": 28.001917693169094, |
| "grad_norm": 44.1540641784668, |
| "learning_rate": 4.76740388204554e-05, |
| "loss": 0.9748, |
| "step": 50700 |
| }, |
| { |
| "epoch": 28.00219764837626, |
| "grad_norm": 122.2665786743164, |
| "learning_rate": 4.765848575339057e-05, |
| "loss": 0.8151, |
| "step": 50800 |
| }, |
| { |
| "epoch": 28.002477603583426, |
| "grad_norm": 1.555612564086914, |
| "learning_rate": 4.764293268632575e-05, |
| "loss": 0.827, |
| "step": 50900 |
| }, |
| { |
| "epoch": 28.002757558790595, |
| "grad_norm": 27.170162200927734, |
| "learning_rate": 4.762737961926092e-05, |
| "loss": 0.7203, |
| "step": 51000 |
| }, |
| { |
| "epoch": 28.00303751399776, |
| "grad_norm": 32.98384094238281, |
| "learning_rate": 4.7611826552196094e-05, |
| "loss": 0.9387, |
| "step": 51100 |
| }, |
| { |
| "epoch": 28.003317469204926, |
| "grad_norm": 4.3118157386779785, |
| "learning_rate": 4.759627348513127e-05, |
| "loss": 0.9075, |
| "step": 51200 |
| }, |
| { |
| "epoch": 28.003597424412096, |
| "grad_norm": 18.410526275634766, |
| "learning_rate": 4.7580720418066445e-05, |
| "loss": 1.0489, |
| "step": 51300 |
| }, |
| { |
| "epoch": 28.00387737961926, |
| "grad_norm": 0.6224632859230042, |
| "learning_rate": 4.7565167351001623e-05, |
| "loss": 0.777, |
| "step": 51400 |
| }, |
| { |
| "epoch": 28.004157334826427, |
| "grad_norm": 21.2976016998291, |
| "learning_rate": 4.7549614283936795e-05, |
| "loss": 0.8111, |
| "step": 51500 |
| }, |
| { |
| "epoch": 28.004437290033593, |
| "grad_norm": 39.73042678833008, |
| "learning_rate": 4.753406121687197e-05, |
| "loss": 1.1039, |
| "step": 51600 |
| }, |
| { |
| "epoch": 28.004717245240762, |
| "grad_norm": 38.503684997558594, |
| "learning_rate": 4.7518508149807146e-05, |
| "loss": 1.1345, |
| "step": 51700 |
| }, |
| { |
| "epoch": 28.004997200447928, |
| "grad_norm": 1.6712877750396729, |
| "learning_rate": 4.750295508274232e-05, |
| "loss": 1.1092, |
| "step": 51800 |
| }, |
| { |
| "epoch": 28.005, |
| "eval_accuracy": 0.278855975485189, |
| "eval_f1": 0.2448271325501993, |
| "eval_loss": 3.91512393951416, |
| "eval_precision": 0.2539590289973334, |
| "eval_recall": 0.278855975485189, |
| "eval_runtime": 227.3347, |
| "eval_samples_per_second": 17.226, |
| "eval_steps_per_second": 8.613, |
| "eval_top_10_accuracy": 0.6966292134831461, |
| "eval_top_1_accuracy": 0.27860061287027577, |
| "eval_top_5_accuracy": 0.5891215526046987, |
| "step": 51801 |
| }, |
| { |
| "epoch": 29.00027645576708, |
| "grad_norm": 41.43709945678711, |
| "learning_rate": 4.7487402015677497e-05, |
| "loss": 0.6971, |
| "step": 51900 |
| }, |
| { |
| "epoch": 29.000556410974244, |
| "grad_norm": 33.58243942260742, |
| "learning_rate": 4.747184894861267e-05, |
| "loss": 0.5892, |
| "step": 52000 |
| }, |
| { |
| "epoch": 29.00083636618141, |
| "grad_norm": 0.9046162962913513, |
| "learning_rate": 4.745629588154784e-05, |
| "loss": 0.7421, |
| "step": 52100 |
| }, |
| { |
| "epoch": 29.00111632138858, |
| "grad_norm": 1.261534333229065, |
| "learning_rate": 4.744074281448302e-05, |
| "loss": 0.7439, |
| "step": 52200 |
| }, |
| { |
| "epoch": 29.001396276595745, |
| "grad_norm": 25.14644432067871, |
| "learning_rate": 4.742518974741819e-05, |
| "loss": 0.6257, |
| "step": 52300 |
| }, |
| { |
| "epoch": 29.00167623180291, |
| "grad_norm": 30.892152786254883, |
| "learning_rate": 4.740963668035337e-05, |
| "loss": 0.6543, |
| "step": 52400 |
| }, |
| { |
| "epoch": 29.00195618701008, |
| "grad_norm": 38.89350509643555, |
| "learning_rate": 4.739423914395919e-05, |
| "loss": 0.8098, |
| "step": 52500 |
| }, |
| { |
| "epoch": 29.002236142217246, |
| "grad_norm": 1.8801395893096924, |
| "learning_rate": 4.737868607689437e-05, |
| "loss": 0.7963, |
| "step": 52600 |
| }, |
| { |
| "epoch": 29.00251609742441, |
| "grad_norm": 4.187483310699463, |
| "learning_rate": 4.7363133009829544e-05, |
| "loss": 0.9501, |
| "step": 52700 |
| }, |
| { |
| "epoch": 29.002796052631577, |
| "grad_norm": 0.38438934087753296, |
| "learning_rate": 4.7347579942764716e-05, |
| "loss": 1.0176, |
| "step": 52800 |
| }, |
| { |
| "epoch": 29.003076007838747, |
| "grad_norm": 24.411516189575195, |
| "learning_rate": 4.7332026875699894e-05, |
| "loss": 0.8995, |
| "step": 52900 |
| }, |
| { |
| "epoch": 29.003355963045912, |
| "grad_norm": 37.18138885498047, |
| "learning_rate": 4.7316473808635066e-05, |
| "loss": 0.9454, |
| "step": 53000 |
| }, |
| { |
| "epoch": 29.003635918253078, |
| "grad_norm": 34.37055206298828, |
| "learning_rate": 4.7300920741570245e-05, |
| "loss": 1.382, |
| "step": 53100 |
| }, |
| { |
| "epoch": 29.003915873460247, |
| "grad_norm": 0.7984645962715149, |
| "learning_rate": 4.728536767450542e-05, |
| "loss": 1.2185, |
| "step": 53200 |
| }, |
| { |
| "epoch": 29.004195828667413, |
| "grad_norm": 0.9875502586364746, |
| "learning_rate": 4.726981460744059e-05, |
| "loss": 0.9532, |
| "step": 53300 |
| }, |
| { |
| "epoch": 29.00447578387458, |
| "grad_norm": 38.7867317199707, |
| "learning_rate": 4.725426154037577e-05, |
| "loss": 0.9921, |
| "step": 53400 |
| }, |
| { |
| "epoch": 29.004755739081748, |
| "grad_norm": 2.064009666442871, |
| "learning_rate": 4.723870847331094e-05, |
| "loss": 0.9877, |
| "step": 53500 |
| }, |
| { |
| "epoch": 29.004999300111983, |
| "eval_accuracy": 0.283197139938713, |
| "eval_f1": 0.24799378699098912, |
| "eval_loss": 4.014481544494629, |
| "eval_precision": 0.25962104387530993, |
| "eval_recall": 0.283197139938713, |
| "eval_runtime": 225.1257, |
| "eval_samples_per_second": 17.395, |
| "eval_steps_per_second": 8.697, |
| "eval_top_10_accuracy": 0.6902451481103167, |
| "eval_top_1_accuracy": 0.283197139938713, |
| "eval_top_5_accuracy": 0.585291113381001, |
| "step": 53587 |
| }, |
| { |
| "epoch": 30.000034994400895, |
| "grad_norm": 5.7924675941467285, |
| "learning_rate": 4.722315540624611e-05, |
| "loss": 0.7719, |
| "step": 53600 |
| }, |
| { |
| "epoch": 30.00031494960806, |
| "grad_norm": 0.8171272873878479, |
| "learning_rate": 4.720760233918129e-05, |
| "loss": 0.568, |
| "step": 53700 |
| }, |
| { |
| "epoch": 30.00059490481523, |
| "grad_norm": 31.9758358001709, |
| "learning_rate": 4.719204927211646e-05, |
| "loss": 0.7045, |
| "step": 53800 |
| }, |
| { |
| "epoch": 30.000874860022396, |
| "grad_norm": 7.561930179595947, |
| "learning_rate": 4.717649620505164e-05, |
| "loss": 0.589, |
| "step": 53900 |
| }, |
| { |
| "epoch": 30.00115481522956, |
| "grad_norm": 2.9528915882110596, |
| "learning_rate": 4.716094313798681e-05, |
| "loss": 0.8783, |
| "step": 54000 |
| }, |
| { |
| "epoch": 30.00143477043673, |
| "grad_norm": 38.09792709350586, |
| "learning_rate": 4.7145390070921984e-05, |
| "loss": 0.6687, |
| "step": 54100 |
| }, |
| { |
| "epoch": 30.001714725643897, |
| "grad_norm": 17.62958526611328, |
| "learning_rate": 4.712983700385716e-05, |
| "loss": 0.7255, |
| "step": 54200 |
| }, |
| { |
| "epoch": 30.001994680851062, |
| "grad_norm": 47.918392181396484, |
| "learning_rate": 4.7114283936792335e-05, |
| "loss": 0.8257, |
| "step": 54300 |
| }, |
| { |
| "epoch": 30.00227463605823, |
| "grad_norm": 4.136358737945557, |
| "learning_rate": 4.7098730869727513e-05, |
| "loss": 0.9338, |
| "step": 54400 |
| }, |
| { |
| "epoch": 30.002554591265397, |
| "grad_norm": 40.67402267456055, |
| "learning_rate": 4.7083177802662685e-05, |
| "loss": 0.8837, |
| "step": 54500 |
| }, |
| { |
| "epoch": 30.002834546472563, |
| "grad_norm": 28.413721084594727, |
| "learning_rate": 4.7067780266268516e-05, |
| "loss": 1.0067, |
| "step": 54600 |
| }, |
| { |
| "epoch": 30.003114501679732, |
| "grad_norm": 3.6661288738250732, |
| "learning_rate": 4.705222719920369e-05, |
| "loss": 0.94, |
| "step": 54700 |
| }, |
| { |
| "epoch": 30.003394456886898, |
| "grad_norm": 0.3771117329597473, |
| "learning_rate": 4.7036829662809505e-05, |
| "loss": 1.0855, |
| "step": 54800 |
| }, |
| { |
| "epoch": 30.003674412094064, |
| "grad_norm": 68.63765716552734, |
| "learning_rate": 4.702127659574468e-05, |
| "loss": 1.2102, |
| "step": 54900 |
| }, |
| { |
| "epoch": 30.003954367301233, |
| "grad_norm": 1.7344911098480225, |
| "learning_rate": 4.7005723528679855e-05, |
| "loss": 0.8625, |
| "step": 55000 |
| }, |
| { |
| "epoch": 30.0042343225084, |
| "grad_norm": 1.0122687816619873, |
| "learning_rate": 4.6990170461615034e-05, |
| "loss": 1.2124, |
| "step": 55100 |
| }, |
| { |
| "epoch": 30.004514277715565, |
| "grad_norm": 0.7474389672279358, |
| "learning_rate": 4.6974617394550206e-05, |
| "loss": 1.1678, |
| "step": 55200 |
| }, |
| { |
| "epoch": 30.004794232922734, |
| "grad_norm": 6.55318546295166, |
| "learning_rate": 4.695906432748538e-05, |
| "loss": 0.946, |
| "step": 55300 |
| }, |
| { |
| "epoch": 30.004998600223963, |
| "eval_accuracy": 0.2770684371807967, |
| "eval_f1": 0.24776271203710667, |
| "eval_loss": 3.9974467754364014, |
| "eval_precision": 0.2626098273936753, |
| "eval_recall": 0.2770684371807967, |
| "eval_runtime": 224.4315, |
| "eval_samples_per_second": 17.449, |
| "eval_steps_per_second": 8.724, |
| "eval_top_10_accuracy": 0.6915219611848825, |
| "eval_top_1_accuracy": 0.2763023493360572, |
| "eval_top_5_accuracy": 0.5883554647599591, |
| "step": 55373 |
| }, |
| { |
| "epoch": 31.00007348824188, |
| "grad_norm": 1.8461582660675049, |
| "learning_rate": 4.6943511260420556e-05, |
| "loss": 0.7635, |
| "step": 55400 |
| }, |
| { |
| "epoch": 31.000353443449047, |
| "grad_norm": 19.579442977905273, |
| "learning_rate": 4.692795819335573e-05, |
| "loss": 0.4863, |
| "step": 55500 |
| }, |
| { |
| "epoch": 31.000633398656216, |
| "grad_norm": 0.31262636184692383, |
| "learning_rate": 4.691240512629091e-05, |
| "loss": 0.7288, |
| "step": 55600 |
| }, |
| { |
| "epoch": 31.000913353863382, |
| "grad_norm": 0.9600892066955566, |
| "learning_rate": 4.689685205922608e-05, |
| "loss": 0.7114, |
| "step": 55700 |
| }, |
| { |
| "epoch": 31.001193309070548, |
| "grad_norm": 33.14706039428711, |
| "learning_rate": 4.688129899216126e-05, |
| "loss": 0.5404, |
| "step": 55800 |
| }, |
| { |
| "epoch": 31.001473264277717, |
| "grad_norm": 71.96259307861328, |
| "learning_rate": 4.686574592509643e-05, |
| "loss": 1.0186, |
| "step": 55900 |
| }, |
| { |
| "epoch": 31.001753219484883, |
| "grad_norm": 69.26034545898438, |
| "learning_rate": 4.685019285803161e-05, |
| "loss": 0.5707, |
| "step": 56000 |
| }, |
| { |
| "epoch": 31.00203317469205, |
| "grad_norm": 19.351402282714844, |
| "learning_rate": 4.683463979096679e-05, |
| "loss": 0.9263, |
| "step": 56100 |
| }, |
| { |
| "epoch": 31.002313129899218, |
| "grad_norm": 34.91017532348633, |
| "learning_rate": 4.681908672390196e-05, |
| "loss": 1.0285, |
| "step": 56200 |
| }, |
| { |
| "epoch": 31.002593085106383, |
| "grad_norm": 4.248389720916748, |
| "learning_rate": 4.680353365683713e-05, |
| "loss": 0.9372, |
| "step": 56300 |
| }, |
| { |
| "epoch": 31.00287304031355, |
| "grad_norm": 11.464951515197754, |
| "learning_rate": 4.678798058977231e-05, |
| "loss": 1.2444, |
| "step": 56400 |
| }, |
| { |
| "epoch": 31.00315299552072, |
| "grad_norm": 3.1078383922576904, |
| "learning_rate": 4.677242752270748e-05, |
| "loss": 0.9295, |
| "step": 56500 |
| }, |
| { |
| "epoch": 31.003432950727884, |
| "grad_norm": 36.12800979614258, |
| "learning_rate": 4.675687445564266e-05, |
| "loss": 0.9508, |
| "step": 56600 |
| }, |
| { |
| "epoch": 31.00371290593505, |
| "grad_norm": 31.067119598388672, |
| "learning_rate": 4.674132138857783e-05, |
| "loss": 0.9861, |
| "step": 56700 |
| }, |
| { |
| "epoch": 31.003992861142216, |
| "grad_norm": 2.1897337436676025, |
| "learning_rate": 4.6725768321513003e-05, |
| "loss": 1.031, |
| "step": 56800 |
| }, |
| { |
| "epoch": 31.004272816349385, |
| "grad_norm": 32.6429443359375, |
| "learning_rate": 4.671021525444818e-05, |
| "loss": 0.9505, |
| "step": 56900 |
| }, |
| { |
| "epoch": 31.00455277155655, |
| "grad_norm": 24.413639068603516, |
| "learning_rate": 4.6694662187383354e-05, |
| "loss": 0.9977, |
| "step": 57000 |
| }, |
| { |
| "epoch": 31.004832726763716, |
| "grad_norm": 3.9543466567993164, |
| "learning_rate": 4.667910912031853e-05, |
| "loss": 1.1572, |
| "step": 57100 |
| }, |
| { |
| "epoch": 31.00500069988802, |
| "eval_accuracy": 0.2870275791624106, |
| "eval_f1": 0.2544566698227792, |
| "eval_loss": 4.0120158195495605, |
| "eval_precision": 0.26635475344936527, |
| "eval_recall": 0.2870275791624106, |
| "eval_runtime": 226.0085, |
| "eval_samples_per_second": 17.327, |
| "eval_steps_per_second": 8.663, |
| "eval_top_10_accuracy": 0.68488253319714, |
| "eval_top_1_accuracy": 0.28677221654749746, |
| "eval_top_5_accuracy": 0.5786516853932584, |
| "step": 57160 |
| }, |
| { |
| "epoch": 32.00011198208286, |
| "grad_norm": 30.936607360839844, |
| "learning_rate": 4.6663556053253705e-05, |
| "loss": 0.8606, |
| "step": 57200 |
| }, |
| { |
| "epoch": 32.00039193729003, |
| "grad_norm": 0.24527552723884583, |
| "learning_rate": 4.6648002986188877e-05, |
| "loss": 0.4935, |
| "step": 57300 |
| }, |
| { |
| "epoch": 32.0006718924972, |
| "grad_norm": 0.4248438775539398, |
| "learning_rate": 4.6632449919124055e-05, |
| "loss": 0.7442, |
| "step": 57400 |
| }, |
| { |
| "epoch": 32.000951847704364, |
| "grad_norm": 16.248767852783203, |
| "learning_rate": 4.661689685205923e-05, |
| "loss": 0.4462, |
| "step": 57500 |
| }, |
| { |
| "epoch": 32.00123180291153, |
| "grad_norm": 2.6000514030456543, |
| "learning_rate": 4.6601343784994406e-05, |
| "loss": 0.6116, |
| "step": 57600 |
| }, |
| { |
| "epoch": 32.0015117581187, |
| "grad_norm": 0.5838096141815186, |
| "learning_rate": 4.658579071792958e-05, |
| "loss": 0.7441, |
| "step": 57700 |
| }, |
| { |
| "epoch": 32.001791713325865, |
| "grad_norm": 44.025821685791016, |
| "learning_rate": 4.657023765086475e-05, |
| "loss": 0.8305, |
| "step": 57800 |
| }, |
| { |
| "epoch": 32.002071668533034, |
| "grad_norm": 2.842480421066284, |
| "learning_rate": 4.655468458379993e-05, |
| "loss": 0.804, |
| "step": 57900 |
| }, |
| { |
| "epoch": 32.0023516237402, |
| "grad_norm": 38.31273651123047, |
| "learning_rate": 4.65391315167351e-05, |
| "loss": 1.0669, |
| "step": 58000 |
| }, |
| { |
| "epoch": 32.002631578947366, |
| "grad_norm": 2.9648852348327637, |
| "learning_rate": 4.652357844967028e-05, |
| "loss": 0.9346, |
| "step": 58100 |
| }, |
| { |
| "epoch": 32.002911534154535, |
| "grad_norm": 29.513328552246094, |
| "learning_rate": 4.650802538260545e-05, |
| "loss": 0.7173, |
| "step": 58200 |
| }, |
| { |
| "epoch": 32.003191489361704, |
| "grad_norm": 43.86397933959961, |
| "learning_rate": 4.649247231554062e-05, |
| "loss": 1.083, |
| "step": 58300 |
| }, |
| { |
| "epoch": 32.003471444568866, |
| "grad_norm": 24.38750648498535, |
| "learning_rate": 4.64769192484758e-05, |
| "loss": 1.1133, |
| "step": 58400 |
| }, |
| { |
| "epoch": 32.003751399776036, |
| "grad_norm": 2.4515841007232666, |
| "learning_rate": 4.646136618141097e-05, |
| "loss": 0.8093, |
| "step": 58500 |
| }, |
| { |
| "epoch": 32.004031354983205, |
| "grad_norm": 1.9614719152450562, |
| "learning_rate": 4.644581311434615e-05, |
| "loss": 1.1983, |
| "step": 58600 |
| }, |
| { |
| "epoch": 32.00431131019037, |
| "grad_norm": 1.845036506652832, |
| "learning_rate": 4.6430260047281324e-05, |
| "loss": 1.0139, |
| "step": 58700 |
| }, |
| { |
| "epoch": 32.004591265397536, |
| "grad_norm": 37.803550720214844, |
| "learning_rate": 4.64147069802165e-05, |
| "loss": 0.9255, |
| "step": 58800 |
| }, |
| { |
| "epoch": 32.004871220604706, |
| "grad_norm": 3.2317845821380615, |
| "learning_rate": 4.6399309443822326e-05, |
| "loss": 1.0663, |
| "step": 58900 |
| }, |
| { |
| "epoch": 32.005, |
| "eval_accuracy": 0.2763023493360572, |
| "eval_f1": 0.24460618831487968, |
| "eval_loss": 4.123536109924316, |
| "eval_precision": 0.25515447978420397, |
| "eval_recall": 0.2763023493360572, |
| "eval_runtime": 225.915, |
| "eval_samples_per_second": 17.334, |
| "eval_steps_per_second": 8.667, |
| "eval_top_10_accuracy": 0.673391215526047, |
| "eval_top_1_accuracy": 0.2763023493360572, |
| "eval_top_5_accuracy": 0.5702247191011236, |
| "step": 58946 |
| }, |
| { |
| "epoch": 33.00015047592385, |
| "grad_norm": 34.167762756347656, |
| "learning_rate": 4.63837563767575e-05, |
| "loss": 0.8821, |
| "step": 59000 |
| }, |
| { |
| "epoch": 33.00043043113102, |
| "grad_norm": 37.967613220214844, |
| "learning_rate": 4.636820330969268e-05, |
| "loss": 0.5966, |
| "step": 59100 |
| }, |
| { |
| "epoch": 33.000710386338184, |
| "grad_norm": 31.84786605834961, |
| "learning_rate": 4.635265024262785e-05, |
| "loss": 0.8587, |
| "step": 59200 |
| }, |
| { |
| "epoch": 33.000990341545354, |
| "grad_norm": 1.0245041847229004, |
| "learning_rate": 4.633709717556302e-05, |
| "loss": 0.641, |
| "step": 59300 |
| }, |
| { |
| "epoch": 33.00127029675252, |
| "grad_norm": 28.23471450805664, |
| "learning_rate": 4.63215441084982e-05, |
| "loss": 0.8437, |
| "step": 59400 |
| }, |
| { |
| "epoch": 33.001550251959685, |
| "grad_norm": 8.509440422058105, |
| "learning_rate": 4.630599104143337e-05, |
| "loss": 0.5655, |
| "step": 59500 |
| }, |
| { |
| "epoch": 33.001830207166854, |
| "grad_norm": 1.8368055820465088, |
| "learning_rate": 4.629043797436855e-05, |
| "loss": 0.6445, |
| "step": 59600 |
| }, |
| { |
| "epoch": 33.002110162374024, |
| "grad_norm": 1.10395085811615, |
| "learning_rate": 4.627488490730372e-05, |
| "loss": 0.8858, |
| "step": 59700 |
| }, |
| { |
| "epoch": 33.002390117581186, |
| "grad_norm": 31.648338317871094, |
| "learning_rate": 4.6259331840238893e-05, |
| "loss": 0.89, |
| "step": 59800 |
| }, |
| { |
| "epoch": 33.002670072788355, |
| "grad_norm": 37.29691696166992, |
| "learning_rate": 4.624377877317407e-05, |
| "loss": 0.9659, |
| "step": 59900 |
| }, |
| { |
| "epoch": 33.00295002799552, |
| "grad_norm": 0.43430274724960327, |
| "learning_rate": 4.6228225706109244e-05, |
| "loss": 0.8978, |
| "step": 60000 |
| }, |
| { |
| "epoch": 33.00322998320269, |
| "grad_norm": 35.974586486816406, |
| "learning_rate": 4.621267263904442e-05, |
| "loss": 0.8231, |
| "step": 60100 |
| }, |
| { |
| "epoch": 33.003509938409856, |
| "grad_norm": 8.3503999710083, |
| "learning_rate": 4.6197119571979595e-05, |
| "loss": 1.1412, |
| "step": 60200 |
| }, |
| { |
| "epoch": 33.00378989361702, |
| "grad_norm": 34.63167190551758, |
| "learning_rate": 4.6181566504914767e-05, |
| "loss": 1.1708, |
| "step": 60300 |
| }, |
| { |
| "epoch": 33.00406984882419, |
| "grad_norm": 25.57069969177246, |
| "learning_rate": 4.6166013437849945e-05, |
| "loss": 1.0096, |
| "step": 60400 |
| }, |
| { |
| "epoch": 33.00434980403136, |
| "grad_norm": 42.470584869384766, |
| "learning_rate": 4.615046037078512e-05, |
| "loss": 0.9288, |
| "step": 60500 |
| }, |
| { |
| "epoch": 33.00462975923852, |
| "grad_norm": 39.73023986816406, |
| "learning_rate": 4.6134907303720296e-05, |
| "loss": 0.8984, |
| "step": 60600 |
| }, |
| { |
| "epoch": 33.00490971444569, |
| "grad_norm": 32.1846923828125, |
| "learning_rate": 4.611935423665547e-05, |
| "loss": 1.085, |
| "step": 60700 |
| }, |
| { |
| "epoch": 33.00499930011198, |
| "eval_accuracy": 0.27145045965270687, |
| "eval_f1": 0.2402946650128174, |
| "eval_loss": 4.142614364624023, |
| "eval_precision": 0.2528004594086391, |
| "eval_recall": 0.27145045965270687, |
| "eval_runtime": 226.7122, |
| "eval_samples_per_second": 17.273, |
| "eval_steps_per_second": 8.637, |
| "eval_top_10_accuracy": 0.6782431052093973, |
| "eval_top_1_accuracy": 0.27145045965270687, |
| "eval_top_5_accuracy": 0.5717568947906027, |
| "step": 60732 |
| }, |
| { |
| "epoch": 34.000188969764835, |
| "grad_norm": 1.583235502243042, |
| "learning_rate": 4.6103801169590646e-05, |
| "loss": 0.7598, |
| "step": 60800 |
| }, |
| { |
| "epoch": 34.000468924972004, |
| "grad_norm": 41.911720275878906, |
| "learning_rate": 4.608840363319647e-05, |
| "loss": 0.6443, |
| "step": 60900 |
| }, |
| { |
| "epoch": 34.000748880179174, |
| "grad_norm": 0.4762161076068878, |
| "learning_rate": 4.607285056613164e-05, |
| "loss": 0.6395, |
| "step": 61000 |
| }, |
| { |
| "epoch": 34.001028835386336, |
| "grad_norm": 44.91167068481445, |
| "learning_rate": 4.605729749906682e-05, |
| "loss": 0.6762, |
| "step": 61100 |
| }, |
| { |
| "epoch": 34.001308790593505, |
| "grad_norm": 2.2632224559783936, |
| "learning_rate": 4.604174443200199e-05, |
| "loss": 0.6015, |
| "step": 61200 |
| }, |
| { |
| "epoch": 34.001588745800674, |
| "grad_norm": 0.8883789777755737, |
| "learning_rate": 4.6026191364937164e-05, |
| "loss": 0.8364, |
| "step": 61300 |
| }, |
| { |
| "epoch": 34.00186870100784, |
| "grad_norm": 0.21396863460540771, |
| "learning_rate": 4.601063829787234e-05, |
| "loss": 0.8178, |
| "step": 61400 |
| }, |
| { |
| "epoch": 34.002148656215006, |
| "grad_norm": 19.145397186279297, |
| "learning_rate": 4.5995085230807515e-05, |
| "loss": 0.7655, |
| "step": 61500 |
| }, |
| { |
| "epoch": 34.002428611422175, |
| "grad_norm": 34.72240447998047, |
| "learning_rate": 4.5979532163742694e-05, |
| "loss": 0.724, |
| "step": 61600 |
| }, |
| { |
| "epoch": 34.00270856662934, |
| "grad_norm": 0.7193199396133423, |
| "learning_rate": 4.5963979096677865e-05, |
| "loss": 0.8898, |
| "step": 61700 |
| }, |
| { |
| "epoch": 34.00298852183651, |
| "grad_norm": 41.06938934326172, |
| "learning_rate": 4.594842602961304e-05, |
| "loss": 0.9544, |
| "step": 61800 |
| }, |
| { |
| "epoch": 34.003268477043676, |
| "grad_norm": 31.43454933166504, |
| "learning_rate": 4.5932872962548216e-05, |
| "loss": 0.8788, |
| "step": 61900 |
| }, |
| { |
| "epoch": 34.00354843225084, |
| "grad_norm": 51.27229309082031, |
| "learning_rate": 4.591731989548339e-05, |
| "loss": 0.7306, |
| "step": 62000 |
| }, |
| { |
| "epoch": 34.00382838745801, |
| "grad_norm": 1.796972632408142, |
| "learning_rate": 4.590176682841857e-05, |
| "loss": 0.9033, |
| "step": 62100 |
| }, |
| { |
| "epoch": 34.00410834266518, |
| "grad_norm": 39.01441955566406, |
| "learning_rate": 4.588621376135374e-05, |
| "loss": 0.9423, |
| "step": 62200 |
| }, |
| { |
| "epoch": 34.00438829787234, |
| "grad_norm": 0.1421416848897934, |
| "learning_rate": 4.587066069428892e-05, |
| "loss": 1.0109, |
| "step": 62300 |
| }, |
| { |
| "epoch": 34.00466825307951, |
| "grad_norm": 32.059959411621094, |
| "learning_rate": 4.585510762722409e-05, |
| "loss": 1.0664, |
| "step": 62400 |
| }, |
| { |
| "epoch": 34.00494820828668, |
| "grad_norm": 47.05455780029297, |
| "learning_rate": 4.583955456015927e-05, |
| "loss": 1.3799, |
| "step": 62500 |
| }, |
| { |
| "epoch": 34.00499860022396, |
| "eval_accuracy": 0.27553626149131766, |
| "eval_f1": 0.24451187615318176, |
| "eval_loss": 4.101657390594482, |
| "eval_precision": 0.25940964549613826, |
| "eval_recall": 0.27553626149131766, |
| "eval_runtime": 226.0648, |
| "eval_samples_per_second": 17.322, |
| "eval_steps_per_second": 8.661, |
| "eval_top_10_accuracy": 0.6838610827374872, |
| "eval_top_1_accuracy": 0.27553626149131766, |
| "eval_top_5_accuracy": 0.5743105209397344, |
| "step": 62518 |
| }, |
| { |
| "epoch": 35.000227463605825, |
| "grad_norm": 39.157588958740234, |
| "learning_rate": 4.582400149309444e-05, |
| "loss": 0.5925, |
| "step": 62600 |
| }, |
| { |
| "epoch": 35.00050741881299, |
| "grad_norm": 13.6599760055542, |
| "learning_rate": 4.580844842602962e-05, |
| "loss": 0.6346, |
| "step": 62700 |
| }, |
| { |
| "epoch": 35.000787374020156, |
| "grad_norm": 0.17690366506576538, |
| "learning_rate": 4.579289535896479e-05, |
| "loss": 0.7364, |
| "step": 62800 |
| }, |
| { |
| "epoch": 35.001067329227325, |
| "grad_norm": 5.204614162445068, |
| "learning_rate": 4.577734229189997e-05, |
| "loss": 0.6949, |
| "step": 62900 |
| }, |
| { |
| "epoch": 35.00134728443449, |
| "grad_norm": 0.2207849770784378, |
| "learning_rate": 4.5761944755505786e-05, |
| "loss": 0.7559, |
| "step": 63000 |
| }, |
| { |
| "epoch": 35.00162723964166, |
| "grad_norm": 41.076332092285156, |
| "learning_rate": 4.5746391688440964e-05, |
| "loss": 0.7937, |
| "step": 63100 |
| }, |
| { |
| "epoch": 35.001907194848826, |
| "grad_norm": 47.679779052734375, |
| "learning_rate": 4.5730838621376136e-05, |
| "loss": 0.9563, |
| "step": 63200 |
| }, |
| { |
| "epoch": 35.00218715005599, |
| "grad_norm": 40.40637969970703, |
| "learning_rate": 4.571528555431131e-05, |
| "loss": 0.9855, |
| "step": 63300 |
| }, |
| { |
| "epoch": 35.00246710526316, |
| "grad_norm": 41.624244689941406, |
| "learning_rate": 4.569973248724649e-05, |
| "loss": 0.7511, |
| "step": 63400 |
| }, |
| { |
| "epoch": 35.00274706047033, |
| "grad_norm": 32.867408752441406, |
| "learning_rate": 4.568417942018166e-05, |
| "loss": 0.7931, |
| "step": 63500 |
| }, |
| { |
| "epoch": 35.00302701567749, |
| "grad_norm": 28.88155174255371, |
| "learning_rate": 4.566862635311684e-05, |
| "loss": 0.7677, |
| "step": 63600 |
| }, |
| { |
| "epoch": 35.00330697088466, |
| "grad_norm": 3.3230557441711426, |
| "learning_rate": 4.565307328605201e-05, |
| "loss": 0.9521, |
| "step": 63700 |
| }, |
| { |
| "epoch": 35.00358692609183, |
| "grad_norm": 56.24114227294922, |
| "learning_rate": 4.563752021898718e-05, |
| "loss": 0.9281, |
| "step": 63800 |
| }, |
| { |
| "epoch": 35.00386688129899, |
| "grad_norm": 0.7060623168945312, |
| "learning_rate": 4.562196715192236e-05, |
| "loss": 0.8661, |
| "step": 63900 |
| }, |
| { |
| "epoch": 35.00414683650616, |
| "grad_norm": 0.19642525911331177, |
| "learning_rate": 4.560641408485753e-05, |
| "loss": 1.1495, |
| "step": 64000 |
| }, |
| { |
| "epoch": 35.00442679171333, |
| "grad_norm": 0.7218245267868042, |
| "learning_rate": 4.559086101779271e-05, |
| "loss": 1.2618, |
| "step": 64100 |
| }, |
| { |
| "epoch": 35.00470674692049, |
| "grad_norm": 0.09785791486501694, |
| "learning_rate": 4.557530795072788e-05, |
| "loss": 0.8072, |
| "step": 64200 |
| }, |
| { |
| "epoch": 35.00498670212766, |
| "grad_norm": 2.7851946353912354, |
| "learning_rate": 4.555975488366306e-05, |
| "loss": 0.8419, |
| "step": 64300 |
| }, |
| { |
| "epoch": 35.005000699888015, |
| "eval_accuracy": 0.27936670071501535, |
| "eval_f1": 0.24866188033961273, |
| "eval_loss": 4.176891803741455, |
| "eval_precision": 0.261978146413795, |
| "eval_recall": 0.27936670071501535, |
| "eval_runtime": 226.5401, |
| "eval_samples_per_second": 17.286, |
| "eval_steps_per_second": 8.643, |
| "eval_top_10_accuracy": 0.683605720122574, |
| "eval_top_1_accuracy": 0.27936670071501535, |
| "eval_top_5_accuracy": 0.5727783452502554, |
| "step": 64305 |
| }, |
| { |
| "epoch": 36.00026595744681, |
| "grad_norm": 0.516681969165802, |
| "learning_rate": 4.554420181659824e-05, |
| "loss": 0.506, |
| "step": 64400 |
| }, |
| { |
| "epoch": 36.000545912653976, |
| "grad_norm": 0.44977912306785583, |
| "learning_rate": 4.552864874953341e-05, |
| "loss": 0.5155, |
| "step": 64500 |
| }, |
| { |
| "epoch": 36.000825867861145, |
| "grad_norm": 36.72551727294922, |
| "learning_rate": 4.551309568246859e-05, |
| "loss": 0.7122, |
| "step": 64600 |
| }, |
| { |
| "epoch": 36.00110582306831, |
| "grad_norm": 4.889037609100342, |
| "learning_rate": 4.549754261540376e-05, |
| "loss": 0.5901, |
| "step": 64700 |
| }, |
| { |
| "epoch": 36.00138577827548, |
| "grad_norm": 1.347294569015503, |
| "learning_rate": 4.5481989548338934e-05, |
| "loss": 0.7308, |
| "step": 64800 |
| }, |
| { |
| "epoch": 36.001665733482646, |
| "grad_norm": 27.506364822387695, |
| "learning_rate": 4.546643648127411e-05, |
| "loss": 0.8497, |
| "step": 64900 |
| }, |
| { |
| "epoch": 36.00194568868981, |
| "grad_norm": 10.64461898803711, |
| "learning_rate": 4.5450883414209285e-05, |
| "loss": 0.613, |
| "step": 65000 |
| }, |
| { |
| "epoch": 36.00222564389698, |
| "grad_norm": 46.96599197387695, |
| "learning_rate": 4.543548587781511e-05, |
| "loss": 1.1151, |
| "step": 65100 |
| }, |
| { |
| "epoch": 36.00250559910414, |
| "grad_norm": 2.2545366287231445, |
| "learning_rate": 4.541993281075028e-05, |
| "loss": 0.7595, |
| "step": 65200 |
| }, |
| { |
| "epoch": 36.00278555431131, |
| "grad_norm": 0.24742832779884338, |
| "learning_rate": 4.540437974368545e-05, |
| "loss": 1.0187, |
| "step": 65300 |
| }, |
| { |
| "epoch": 36.00306550951848, |
| "grad_norm": 0.34370434284210205, |
| "learning_rate": 4.538882667662063e-05, |
| "loss": 0.748, |
| "step": 65400 |
| }, |
| { |
| "epoch": 36.00334546472564, |
| "grad_norm": 0.0902349129319191, |
| "learning_rate": 4.53732736095558e-05, |
| "loss": 0.8016, |
| "step": 65500 |
| }, |
| { |
| "epoch": 36.00362541993281, |
| "grad_norm": 5.595888137817383, |
| "learning_rate": 4.535772054249098e-05, |
| "loss": 1.2848, |
| "step": 65600 |
| }, |
| { |
| "epoch": 36.00390537513998, |
| "grad_norm": 0.23979386687278748, |
| "learning_rate": 4.534216747542615e-05, |
| "loss": 0.8729, |
| "step": 65700 |
| }, |
| { |
| "epoch": 36.00418533034714, |
| "grad_norm": 2.2355868816375732, |
| "learning_rate": 4.532661440836133e-05, |
| "loss": 0.8902, |
| "step": 65800 |
| }, |
| { |
| "epoch": 36.00446528555431, |
| "grad_norm": 1.7017910480499268, |
| "learning_rate": 4.5311061341296504e-05, |
| "loss": 1.1558, |
| "step": 65900 |
| }, |
| { |
| "epoch": 36.00474524076148, |
| "grad_norm": 0.6865089535713196, |
| "learning_rate": 4.529550827423168e-05, |
| "loss": 1.1308, |
| "step": 66000 |
| }, |
| { |
| "epoch": 36.005, |
| "eval_accuracy": 0.27936670071501535, |
| "eval_f1": 0.24678017904603605, |
| "eval_loss": 4.133280277252197, |
| "eval_precision": 0.2568876401518128, |
| "eval_recall": 0.27936670071501535, |
| "eval_runtime": 227.4478, |
| "eval_samples_per_second": 17.217, |
| "eval_steps_per_second": 8.609, |
| "eval_top_10_accuracy": 0.6889683350357507, |
| "eval_top_1_accuracy": 0.2796220633299285, |
| "eval_top_5_accuracy": 0.5804392236976507, |
| "step": 66091 |
| }, |
| { |
| "epoch": 37.00002449608063, |
| "grad_norm": 4.969370365142822, |
| "learning_rate": 4.5279955207166854e-05, |
| "loss": 0.9023, |
| "step": 66100 |
| }, |
| { |
| "epoch": 37.000304451287796, |
| "grad_norm": 3.5516724586486816, |
| "learning_rate": 4.526440214010203e-05, |
| "loss": 0.4191, |
| "step": 66200 |
| }, |
| { |
| "epoch": 37.00058440649496, |
| "grad_norm": 1.1282627582550049, |
| "learning_rate": 4.5248849073037205e-05, |
| "loss": 0.5857, |
| "step": 66300 |
| }, |
| { |
| "epoch": 37.00086436170213, |
| "grad_norm": 0.38395920395851135, |
| "learning_rate": 4.5233296005972384e-05, |
| "loss": 0.5167, |
| "step": 66400 |
| }, |
| { |
| "epoch": 37.0011443169093, |
| "grad_norm": 0.11064700782299042, |
| "learning_rate": 4.5217742938907556e-05, |
| "loss": 0.4823, |
| "step": 66500 |
| }, |
| { |
| "epoch": 37.00142427211646, |
| "grad_norm": 1.7022432088851929, |
| "learning_rate": 4.5202189871842734e-05, |
| "loss": 0.8452, |
| "step": 66600 |
| }, |
| { |
| "epoch": 37.00170422732363, |
| "grad_norm": 34.14882278442383, |
| "learning_rate": 4.5186636804777906e-05, |
| "loss": 0.6892, |
| "step": 66700 |
| }, |
| { |
| "epoch": 37.0019841825308, |
| "grad_norm": 29.29656219482422, |
| "learning_rate": 4.517108373771308e-05, |
| "loss": 0.7937, |
| "step": 66800 |
| }, |
| { |
| "epoch": 37.00226413773796, |
| "grad_norm": 23.31835174560547, |
| "learning_rate": 4.515553067064826e-05, |
| "loss": 0.8147, |
| "step": 66900 |
| }, |
| { |
| "epoch": 37.00254409294513, |
| "grad_norm": 29.693347930908203, |
| "learning_rate": 4.513997760358343e-05, |
| "loss": 0.9029, |
| "step": 67000 |
| }, |
| { |
| "epoch": 37.0028240481523, |
| "grad_norm": 32.401817321777344, |
| "learning_rate": 4.512458006718925e-05, |
| "loss": 0.8017, |
| "step": 67100 |
| }, |
| { |
| "epoch": 37.00310400335946, |
| "grad_norm": 66.0666732788086, |
| "learning_rate": 4.510933806146572e-05, |
| "loss": 0.9821, |
| "step": 67200 |
| }, |
| { |
| "epoch": 37.00338395856663, |
| "grad_norm": 2.4841063022613525, |
| "learning_rate": 4.50937849944009e-05, |
| "loss": 0.755, |
| "step": 67300 |
| }, |
| { |
| "epoch": 37.0036639137738, |
| "grad_norm": 28.219749450683594, |
| "learning_rate": 4.507823192733607e-05, |
| "loss": 1.0117, |
| "step": 67400 |
| }, |
| { |
| "epoch": 37.00394386898096, |
| "grad_norm": 0.13683520257472992, |
| "learning_rate": 4.506267886027124e-05, |
| "loss": 0.9562, |
| "step": 67500 |
| }, |
| { |
| "epoch": 37.00422382418813, |
| "grad_norm": 0.6364304423332214, |
| "learning_rate": 4.504712579320642e-05, |
| "loss": 1.1528, |
| "step": 67600 |
| }, |
| { |
| "epoch": 37.0045037793953, |
| "grad_norm": 2.150608539581299, |
| "learning_rate": 4.5031572726141594e-05, |
| "loss": 0.8858, |
| "step": 67700 |
| }, |
| { |
| "epoch": 37.00478373460246, |
| "grad_norm": 5.197592258453369, |
| "learning_rate": 4.501601965907677e-05, |
| "loss": 0.9785, |
| "step": 67800 |
| }, |
| { |
| "epoch": 37.00499930011198, |
| "eval_accuracy": 0.2770684371807967, |
| "eval_f1": 0.2470607814773372, |
| "eval_loss": 4.239144325256348, |
| "eval_precision": 0.26169534456877486, |
| "eval_recall": 0.2770684371807967, |
| "eval_runtime": 222.8835, |
| "eval_samples_per_second": 17.57, |
| "eval_steps_per_second": 8.785, |
| "eval_top_10_accuracy": 0.6654749744637385, |
| "eval_top_1_accuracy": 0.2770684371807967, |
| "eval_top_5_accuracy": 0.559244126659857, |
| "step": 67877 |
| }, |
| { |
| "epoch": 38.00006298992161, |
| "grad_norm": 19.93975067138672, |
| "learning_rate": 4.5000466592011944e-05, |
| "loss": 1.0973, |
| "step": 67900 |
| }, |
| { |
| "epoch": 38.00034294512878, |
| "grad_norm": 3.1067473888397217, |
| "learning_rate": 4.498491352494712e-05, |
| "loss": 0.5088, |
| "step": 68000 |
| }, |
| { |
| "epoch": 38.00062290033595, |
| "grad_norm": 11.543988227844238, |
| "learning_rate": 4.4969360457882295e-05, |
| "loss": 0.6467, |
| "step": 68100 |
| }, |
| { |
| "epoch": 38.00090285554311, |
| "grad_norm": 0.6623213291168213, |
| "learning_rate": 4.4953807390817474e-05, |
| "loss": 0.5682, |
| "step": 68200 |
| }, |
| { |
| "epoch": 38.00118281075028, |
| "grad_norm": 30.84617805480957, |
| "learning_rate": 4.4938254323752646e-05, |
| "loss": 0.6804, |
| "step": 68300 |
| }, |
| { |
| "epoch": 38.00146276595745, |
| "grad_norm": 44.52362823486328, |
| "learning_rate": 4.4922701256687824e-05, |
| "loss": 0.6082, |
| "step": 68400 |
| }, |
| { |
| "epoch": 38.00174272116461, |
| "grad_norm": 0.663332998752594, |
| "learning_rate": 4.4907148189622996e-05, |
| "loss": 0.6514, |
| "step": 68500 |
| }, |
| { |
| "epoch": 38.00202267637178, |
| "grad_norm": 33.60304641723633, |
| "learning_rate": 4.4891595122558175e-05, |
| "loss": 0.7611, |
| "step": 68600 |
| }, |
| { |
| "epoch": 38.00230263157895, |
| "grad_norm": 0.861744225025177, |
| "learning_rate": 4.487604205549335e-05, |
| "loss": 0.64, |
| "step": 68700 |
| }, |
| { |
| "epoch": 38.00258258678611, |
| "grad_norm": 37.43878936767578, |
| "learning_rate": 4.4860488988428525e-05, |
| "loss": 0.7982, |
| "step": 68800 |
| }, |
| { |
| "epoch": 38.00286254199328, |
| "grad_norm": 18.1073055267334, |
| "learning_rate": 4.48449359213637e-05, |
| "loss": 1.1362, |
| "step": 68900 |
| }, |
| { |
| "epoch": 38.00314249720045, |
| "grad_norm": 32.217166900634766, |
| "learning_rate": 4.482938285429887e-05, |
| "loss": 0.6346, |
| "step": 69000 |
| }, |
| { |
| "epoch": 38.00342245240761, |
| "grad_norm": 19.9949951171875, |
| "learning_rate": 4.481382978723405e-05, |
| "loss": 1.0563, |
| "step": 69100 |
| }, |
| { |
| "epoch": 38.00370240761478, |
| "grad_norm": 6.759098052978516, |
| "learning_rate": 4.479827672016922e-05, |
| "loss": 1.0793, |
| "step": 69200 |
| }, |
| { |
| "epoch": 38.00398236282195, |
| "grad_norm": 31.205677032470703, |
| "learning_rate": 4.47827236531044e-05, |
| "loss": 0.9697, |
| "step": 69300 |
| }, |
| { |
| "epoch": 38.00426231802911, |
| "grad_norm": 16.609729766845703, |
| "learning_rate": 4.476717058603957e-05, |
| "loss": 0.9377, |
| "step": 69400 |
| }, |
| { |
| "epoch": 38.00454227323628, |
| "grad_norm": 0.45362696051597595, |
| "learning_rate": 4.475161751897474e-05, |
| "loss": 1.1853, |
| "step": 69500 |
| }, |
| { |
| "epoch": 38.00482222844345, |
| "grad_norm": 47.770931243896484, |
| "learning_rate": 4.473606445190992e-05, |
| "loss": 0.923, |
| "step": 69600 |
| }, |
| { |
| "epoch": 38.00499860022396, |
| "eval_accuracy": 0.28038815117466803, |
| "eval_f1": 0.24841193213143883, |
| "eval_loss": 4.227442264556885, |
| "eval_precision": 0.25828937193211965, |
| "eval_recall": 0.28038815117466803, |
| "eval_runtime": 223.4537, |
| "eval_samples_per_second": 17.525, |
| "eval_steps_per_second": 8.762, |
| "eval_top_10_accuracy": 0.6682839632277835, |
| "eval_top_1_accuracy": 0.28038815117466803, |
| "eval_top_5_accuracy": 0.5768641470888662, |
| "step": 69663 |
| }, |
| { |
| "epoch": 39.0001014837626, |
| "grad_norm": 14.46917724609375, |
| "learning_rate": 4.472051138484509e-05, |
| "loss": 0.7307, |
| "step": 69700 |
| }, |
| { |
| "epoch": 39.00038143896977, |
| "grad_norm": 1.327642798423767, |
| "learning_rate": 4.470495831778027e-05, |
| "loss": 0.5555, |
| "step": 69800 |
| }, |
| { |
| "epoch": 39.00066139417693, |
| "grad_norm": 20.620176315307617, |
| "learning_rate": 4.4689405250715443e-05, |
| "loss": 0.4745, |
| "step": 69900 |
| }, |
| { |
| "epoch": 39.0009413493841, |
| "grad_norm": 0.19782160222530365, |
| "learning_rate": 4.4673852183650615e-05, |
| "loss": 0.6574, |
| "step": 70000 |
| }, |
| { |
| "epoch": 39.00122130459127, |
| "grad_norm": 1.6866756677627563, |
| "learning_rate": 4.4658299116585794e-05, |
| "loss": 0.6071, |
| "step": 70100 |
| }, |
| { |
| "epoch": 39.00150125979843, |
| "grad_norm": 0.5778133273124695, |
| "learning_rate": 4.4642746049520966e-05, |
| "loss": 0.6044, |
| "step": 70200 |
| }, |
| { |
| "epoch": 39.0017812150056, |
| "grad_norm": 0.14593109488487244, |
| "learning_rate": 4.4627192982456145e-05, |
| "loss": 0.8319, |
| "step": 70300 |
| }, |
| { |
| "epoch": 39.00206117021276, |
| "grad_norm": 29.488271713256836, |
| "learning_rate": 4.4611639915391316e-05, |
| "loss": 1.0373, |
| "step": 70400 |
| }, |
| { |
| "epoch": 39.00234112541993, |
| "grad_norm": 36.36933135986328, |
| "learning_rate": 4.459608684832649e-05, |
| "loss": 0.7747, |
| "step": 70500 |
| }, |
| { |
| "epoch": 39.0026210806271, |
| "grad_norm": 0.10845132917165756, |
| "learning_rate": 4.458053378126167e-05, |
| "loss": 0.7393, |
| "step": 70600 |
| }, |
| { |
| "epoch": 39.00290103583426, |
| "grad_norm": 10.269367218017578, |
| "learning_rate": 4.456498071419684e-05, |
| "loss": 0.8106, |
| "step": 70700 |
| }, |
| { |
| "epoch": 39.00318099104143, |
| "grad_norm": 27.30816078186035, |
| "learning_rate": 4.454942764713202e-05, |
| "loss": 1.0046, |
| "step": 70800 |
| }, |
| { |
| "epoch": 39.0034609462486, |
| "grad_norm": 0.13129769265651703, |
| "learning_rate": 4.453387458006719e-05, |
| "loss": 1.0303, |
| "step": 70900 |
| }, |
| { |
| "epoch": 39.003740901455764, |
| "grad_norm": 1.489894986152649, |
| "learning_rate": 4.451832151300236e-05, |
| "loss": 1.0482, |
| "step": 71000 |
| }, |
| { |
| "epoch": 39.00402085666293, |
| "grad_norm": 38.621490478515625, |
| "learning_rate": 4.450276844593754e-05, |
| "loss": 0.7289, |
| "step": 71100 |
| }, |
| { |
| "epoch": 39.0043008118701, |
| "grad_norm": 1.9483338594436646, |
| "learning_rate": 4.448721537887271e-05, |
| "loss": 0.8243, |
| "step": 71200 |
| }, |
| { |
| "epoch": 39.004580767077265, |
| "grad_norm": 41.24127197265625, |
| "learning_rate": 4.447166231180789e-05, |
| "loss": 1.0618, |
| "step": 71300 |
| }, |
| { |
| "epoch": 39.004860722284434, |
| "grad_norm": 0.1476653516292572, |
| "learning_rate": 4.445610924474307e-05, |
| "loss": 0.9857, |
| "step": 71400 |
| }, |
| { |
| "epoch": 39.005000699888015, |
| "eval_accuracy": 0.2819203268641471, |
| "eval_f1": 0.24980865661407028, |
| "eval_loss": 4.2032647132873535, |
| "eval_precision": 0.2609707892266626, |
| "eval_recall": 0.2819203268641471, |
| "eval_runtime": 222.7035, |
| "eval_samples_per_second": 17.584, |
| "eval_steps_per_second": 8.792, |
| "eval_top_10_accuracy": 0.6861593462717058, |
| "eval_top_1_accuracy": 0.2814096016343207, |
| "eval_top_5_accuracy": 0.5829928498467825, |
| "step": 71450 |
| }, |
| { |
| "epoch": 40.00013997760358, |
| "grad_norm": 0.46101224422454834, |
| "learning_rate": 4.444055617767824e-05, |
| "loss": 0.8026, |
| "step": 71500 |
| }, |
| { |
| "epoch": 40.00041993281075, |
| "grad_norm": 1.734413504600525, |
| "learning_rate": 4.442500311061342e-05, |
| "loss": 0.4295, |
| "step": 71600 |
| }, |
| { |
| "epoch": 40.00069988801792, |
| "grad_norm": 0.5759927034378052, |
| "learning_rate": 4.440945004354859e-05, |
| "loss": 0.5855, |
| "step": 71700 |
| }, |
| { |
| "epoch": 40.00097984322508, |
| "grad_norm": 0.35544517636299133, |
| "learning_rate": 4.4394052507154415e-05, |
| "loss": 0.6253, |
| "step": 71800 |
| }, |
| { |
| "epoch": 40.00125979843225, |
| "grad_norm": 1.3745167255401611, |
| "learning_rate": 4.437849944008959e-05, |
| "loss": 0.7667, |
| "step": 71900 |
| }, |
| { |
| "epoch": 40.00153975363942, |
| "grad_norm": 38.11959457397461, |
| "learning_rate": 4.436294637302476e-05, |
| "loss": 0.7042, |
| "step": 72000 |
| }, |
| { |
| "epoch": 40.00181970884658, |
| "grad_norm": 0.17603681981563568, |
| "learning_rate": 4.434739330595994e-05, |
| "loss": 0.6599, |
| "step": 72100 |
| }, |
| { |
| "epoch": 40.00209966405375, |
| "grad_norm": 15.766741752624512, |
| "learning_rate": 4.433184023889511e-05, |
| "loss": 1.0079, |
| "step": 72200 |
| }, |
| { |
| "epoch": 40.00237961926092, |
| "grad_norm": 5.181120872497559, |
| "learning_rate": 4.431628717183029e-05, |
| "loss": 0.7113, |
| "step": 72300 |
| }, |
| { |
| "epoch": 40.00265957446808, |
| "grad_norm": 0.3363553285598755, |
| "learning_rate": 4.430073410476546e-05, |
| "loss": 0.8276, |
| "step": 72400 |
| }, |
| { |
| "epoch": 40.00293952967525, |
| "grad_norm": 33.76218795776367, |
| "learning_rate": 4.428518103770063e-05, |
| "loss": 0.8144, |
| "step": 72500 |
| }, |
| { |
| "epoch": 40.00321948488242, |
| "grad_norm": 0.08487240225076675, |
| "learning_rate": 4.426962797063581e-05, |
| "loss": 0.8064, |
| "step": 72600 |
| }, |
| { |
| "epoch": 40.003499440089584, |
| "grad_norm": 28.670927047729492, |
| "learning_rate": 4.425407490357098e-05, |
| "loss": 0.8729, |
| "step": 72700 |
| }, |
| { |
| "epoch": 40.00377939529675, |
| "grad_norm": 25.651275634765625, |
| "learning_rate": 4.423852183650616e-05, |
| "loss": 0.8185, |
| "step": 72800 |
| }, |
| { |
| "epoch": 40.00405935050392, |
| "grad_norm": 0.2041385918855667, |
| "learning_rate": 4.4222968769441333e-05, |
| "loss": 1.0474, |
| "step": 72900 |
| }, |
| { |
| "epoch": 40.004339305711085, |
| "grad_norm": 27.669574737548828, |
| "learning_rate": 4.420741570237651e-05, |
| "loss": 0.8934, |
| "step": 73000 |
| }, |
| { |
| "epoch": 40.004619260918254, |
| "grad_norm": 0.12979120016098022, |
| "learning_rate": 4.4191862635311684e-05, |
| "loss": 1.1923, |
| "step": 73100 |
| }, |
| { |
| "epoch": 40.00489921612542, |
| "grad_norm": 56.33195877075195, |
| "learning_rate": 4.417630956824686e-05, |
| "loss": 0.7679, |
| "step": 73200 |
| }, |
| { |
| "epoch": 40.005, |
| "eval_accuracy": 0.2844739530132789, |
| "eval_f1": 0.25178392253063286, |
| "eval_loss": 4.198288440704346, |
| "eval_precision": 0.2640017201048866, |
| "eval_recall": 0.2844739530132789, |
| "eval_runtime": 225.0507, |
| "eval_samples_per_second": 17.401, |
| "eval_steps_per_second": 8.7, |
| "eval_top_10_accuracy": 0.6833503575076609, |
| "eval_top_1_accuracy": 0.2844739530132789, |
| "eval_top_5_accuracy": 0.5860572012257406, |
| "step": 73236 |
| }, |
| { |
| "epoch": 41.00017847144457, |
| "grad_norm": 40.168697357177734, |
| "learning_rate": 4.4160756501182035e-05, |
| "loss": 0.8715, |
| "step": 73300 |
| }, |
| { |
| "epoch": 41.00045842665173, |
| "grad_norm": 20.46622085571289, |
| "learning_rate": 4.414520343411721e-05, |
| "loss": 0.5017, |
| "step": 73400 |
| }, |
| { |
| "epoch": 41.0007383818589, |
| "grad_norm": 0.17491298913955688, |
| "learning_rate": 4.4129650367052385e-05, |
| "loss": 0.6206, |
| "step": 73500 |
| }, |
| { |
| "epoch": 41.00101833706607, |
| "grad_norm": 19.896448135375977, |
| "learning_rate": 4.4114097299987564e-05, |
| "loss": 0.6051, |
| "step": 73600 |
| }, |
| { |
| "epoch": 41.00129829227323, |
| "grad_norm": 0.23201890289783478, |
| "learning_rate": 4.4098544232922736e-05, |
| "loss": 0.5626, |
| "step": 73700 |
| }, |
| { |
| "epoch": 41.0015782474804, |
| "grad_norm": 26.803802490234375, |
| "learning_rate": 4.4082991165857914e-05, |
| "loss": 0.7498, |
| "step": 73800 |
| }, |
| { |
| "epoch": 41.00185820268757, |
| "grad_norm": 0.059675004333257675, |
| "learning_rate": 4.4067438098793086e-05, |
| "loss": 0.5219, |
| "step": 73900 |
| }, |
| { |
| "epoch": 41.002138157894734, |
| "grad_norm": 1.5195122957229614, |
| "learning_rate": 4.405188503172826e-05, |
| "loss": 0.5764, |
| "step": 74000 |
| }, |
| { |
| "epoch": 41.0024181131019, |
| "grad_norm": 26.826339721679688, |
| "learning_rate": 4.403633196466344e-05, |
| "loss": 0.8804, |
| "step": 74100 |
| }, |
| { |
| "epoch": 41.00269806830907, |
| "grad_norm": 62.245506286621094, |
| "learning_rate": 4.402077889759861e-05, |
| "loss": 1.0212, |
| "step": 74200 |
| }, |
| { |
| "epoch": 41.002978023516235, |
| "grad_norm": 14.845590591430664, |
| "learning_rate": 4.400522583053379e-05, |
| "loss": 0.7602, |
| "step": 74300 |
| }, |
| { |
| "epoch": 41.003257978723404, |
| "grad_norm": 0.10195853561162949, |
| "learning_rate": 4.398967276346896e-05, |
| "loss": 0.9441, |
| "step": 74400 |
| }, |
| { |
| "epoch": 41.00353793393057, |
| "grad_norm": 20.029823303222656, |
| "learning_rate": 4.397411969640413e-05, |
| "loss": 0.9074, |
| "step": 74500 |
| }, |
| { |
| "epoch": 41.003817889137736, |
| "grad_norm": 0.1141628846526146, |
| "learning_rate": 4.395856662933931e-05, |
| "loss": 0.9763, |
| "step": 74600 |
| }, |
| { |
| "epoch": 41.004097844344905, |
| "grad_norm": 10.608264923095703, |
| "learning_rate": 4.394301356227448e-05, |
| "loss": 0.8389, |
| "step": 74700 |
| }, |
| { |
| "epoch": 41.004377799552074, |
| "grad_norm": 29.61349868774414, |
| "learning_rate": 4.3927460495209654e-05, |
| "loss": 1.0023, |
| "step": 74800 |
| }, |
| { |
| "epoch": 41.004657754759236, |
| "grad_norm": 40.82460403442383, |
| "learning_rate": 4.391190742814483e-05, |
| "loss": 0.783, |
| "step": 74900 |
| }, |
| { |
| "epoch": 41.004937709966406, |
| "grad_norm": 0.5806564092636108, |
| "learning_rate": 4.3896354361080004e-05, |
| "loss": 0.8991, |
| "step": 75000 |
| }, |
| { |
| "epoch": 41.00499930011198, |
| "eval_accuracy": 0.28115423901940756, |
| "eval_f1": 0.25330357027391814, |
| "eval_loss": 4.209905624389648, |
| "eval_precision": 0.2713199055512641, |
| "eval_recall": 0.28115423901940756, |
| "eval_runtime": 222.5071, |
| "eval_samples_per_second": 17.599, |
| "eval_steps_per_second": 8.8, |
| "eval_top_10_accuracy": 0.6882022471910112, |
| "eval_top_1_accuracy": 0.28115423901940756, |
| "eval_top_5_accuracy": 0.5873340143003064, |
| "step": 75022 |
| }, |
| { |
| "epoch": 42.00021696528555, |
| "grad_norm": 1.9568456411361694, |
| "learning_rate": 4.388080129401518e-05, |
| "loss": 0.5599, |
| "step": 75100 |
| }, |
| { |
| "epoch": 42.00049692049272, |
| "grad_norm": 0.06384823471307755, |
| "learning_rate": 4.3865248226950355e-05, |
| "loss": 0.4513, |
| "step": 75200 |
| }, |
| { |
| "epoch": 42.00077687569989, |
| "grad_norm": 37.387027740478516, |
| "learning_rate": 4.384969515988553e-05, |
| "loss": 0.6817, |
| "step": 75300 |
| }, |
| { |
| "epoch": 42.00105683090705, |
| "grad_norm": 3.1402885913848877, |
| "learning_rate": 4.3834142092820705e-05, |
| "loss": 0.579, |
| "step": 75400 |
| }, |
| { |
| "epoch": 42.00133678611422, |
| "grad_norm": 24.56217384338379, |
| "learning_rate": 4.381858902575588e-05, |
| "loss": 0.5672, |
| "step": 75500 |
| }, |
| { |
| "epoch": 42.00161674132139, |
| "grad_norm": 16.695140838623047, |
| "learning_rate": 4.3803035958691056e-05, |
| "loss": 0.7093, |
| "step": 75600 |
| }, |
| { |
| "epoch": 42.001896696528554, |
| "grad_norm": 0.4463357925415039, |
| "learning_rate": 4.378748289162623e-05, |
| "loss": 0.568, |
| "step": 75700 |
| }, |
| { |
| "epoch": 42.002176651735724, |
| "grad_norm": 10.364665985107422, |
| "learning_rate": 4.377208535523206e-05, |
| "loss": 0.7371, |
| "step": 75800 |
| }, |
| { |
| "epoch": 42.002456606942886, |
| "grad_norm": 0.2388359159231186, |
| "learning_rate": 4.375653228816723e-05, |
| "loss": 0.6993, |
| "step": 75900 |
| }, |
| { |
| "epoch": 42.002736562150055, |
| "grad_norm": 1.1912405490875244, |
| "learning_rate": 4.37409792211024e-05, |
| "loss": 0.58, |
| "step": 76000 |
| }, |
| { |
| "epoch": 42.003016517357224, |
| "grad_norm": 0.1253499835729599, |
| "learning_rate": 4.372542615403758e-05, |
| "loss": 0.8355, |
| "step": 76100 |
| }, |
| { |
| "epoch": 42.00329647256439, |
| "grad_norm": 0.13281774520874023, |
| "learning_rate": 4.370987308697275e-05, |
| "loss": 0.9043, |
| "step": 76200 |
| }, |
| { |
| "epoch": 42.003576427771556, |
| "grad_norm": 0.41041648387908936, |
| "learning_rate": 4.369432001990793e-05, |
| "loss": 0.7185, |
| "step": 76300 |
| }, |
| { |
| "epoch": 42.003856382978725, |
| "grad_norm": 50.560123443603516, |
| "learning_rate": 4.36787669528431e-05, |
| "loss": 1.075, |
| "step": 76400 |
| }, |
| { |
| "epoch": 42.00413633818589, |
| "grad_norm": 0.2037896066904068, |
| "learning_rate": 4.3663213885778275e-05, |
| "loss": 0.9476, |
| "step": 76500 |
| }, |
| { |
| "epoch": 42.00441629339306, |
| "grad_norm": 0.8026627898216248, |
| "learning_rate": 4.3647660818713454e-05, |
| "loss": 0.9955, |
| "step": 76600 |
| }, |
| { |
| "epoch": 42.004696248600226, |
| "grad_norm": 21.437389373779297, |
| "learning_rate": 4.3632107751648626e-05, |
| "loss": 1.0739, |
| "step": 76700 |
| }, |
| { |
| "epoch": 42.00497620380739, |
| "grad_norm": 13.451566696166992, |
| "learning_rate": 4.3616554684583804e-05, |
| "loss": 1.1176, |
| "step": 76800 |
| }, |
| { |
| "epoch": 42.00499860022396, |
| "eval_accuracy": 0.2765577119509704, |
| "eval_f1": 0.24552088339676698, |
| "eval_loss": 4.34188985824585, |
| "eval_precision": 0.2595869870982841, |
| "eval_recall": 0.2765577119509704, |
| "eval_runtime": 226.8112, |
| "eval_samples_per_second": 17.265, |
| "eval_steps_per_second": 8.633, |
| "eval_top_10_accuracy": 0.673391215526047, |
| "eval_top_1_accuracy": 0.27681307456588355, |
| "eval_top_5_accuracy": 0.5686925434116445, |
| "step": 76808 |
| }, |
| { |
| "epoch": 43.00025545912654, |
| "grad_norm": 0.03739053010940552, |
| "learning_rate": 4.3601001617518976e-05, |
| "loss": 0.4309, |
| "step": 76900 |
| }, |
| { |
| "epoch": 43.000535414333704, |
| "grad_norm": 0.10674834996461868, |
| "learning_rate": 4.358544855045415e-05, |
| "loss": 0.4421, |
| "step": 77000 |
| }, |
| { |
| "epoch": 43.000815369540874, |
| "grad_norm": 0.10748764872550964, |
| "learning_rate": 4.356989548338933e-05, |
| "loss": 0.6075, |
| "step": 77100 |
| }, |
| { |
| "epoch": 43.00109532474804, |
| "grad_norm": 9.194770812988281, |
| "learning_rate": 4.35543424163245e-05, |
| "loss": 0.6129, |
| "step": 77200 |
| }, |
| { |
| "epoch": 43.001375279955205, |
| "grad_norm": 0.1717982441186905, |
| "learning_rate": 4.353878934925968e-05, |
| "loss": 0.5179, |
| "step": 77300 |
| }, |
| { |
| "epoch": 43.001655235162374, |
| "grad_norm": 0.15550194680690765, |
| "learning_rate": 4.352323628219485e-05, |
| "loss": 0.7215, |
| "step": 77400 |
| }, |
| { |
| "epoch": 43.001935190369544, |
| "grad_norm": 0.15924519300460815, |
| "learning_rate": 4.350768321513002e-05, |
| "loss": 0.5473, |
| "step": 77500 |
| }, |
| { |
| "epoch": 43.002215145576706, |
| "grad_norm": 32.287445068359375, |
| "learning_rate": 4.34921301480652e-05, |
| "loss": 0.801, |
| "step": 77600 |
| }, |
| { |
| "epoch": 43.002495100783875, |
| "grad_norm": 0.5695823431015015, |
| "learning_rate": 4.347657708100037e-05, |
| "loss": 0.8792, |
| "step": 77700 |
| }, |
| { |
| "epoch": 43.002775055991044, |
| "grad_norm": 21.394012451171875, |
| "learning_rate": 4.346102401393555e-05, |
| "loss": 0.8703, |
| "step": 77800 |
| }, |
| { |
| "epoch": 43.00305501119821, |
| "grad_norm": 0.2652227580547333, |
| "learning_rate": 4.3445626477541374e-05, |
| "loss": 0.5404, |
| "step": 77900 |
| }, |
| { |
| "epoch": 43.003334966405376, |
| "grad_norm": 0.10218947380781174, |
| "learning_rate": 4.3430073410476546e-05, |
| "loss": 0.9025, |
| "step": 78000 |
| }, |
| { |
| "epoch": 43.003614921612545, |
| "grad_norm": 28.094913482666016, |
| "learning_rate": 4.3414520343411725e-05, |
| "loss": 0.989, |
| "step": 78100 |
| }, |
| { |
| "epoch": 43.00389487681971, |
| "grad_norm": 0.22375081479549408, |
| "learning_rate": 4.3398967276346897e-05, |
| "loss": 0.8974, |
| "step": 78200 |
| }, |
| { |
| "epoch": 43.00417483202688, |
| "grad_norm": 1.988282322883606, |
| "learning_rate": 4.3383414209282075e-05, |
| "loss": 0.7192, |
| "step": 78300 |
| }, |
| { |
| "epoch": 43.004454787234046, |
| "grad_norm": 32.35314178466797, |
| "learning_rate": 4.336786114221725e-05, |
| "loss": 0.8441, |
| "step": 78400 |
| }, |
| { |
| "epoch": 43.00473474244121, |
| "grad_norm": 27.084787368774414, |
| "learning_rate": 4.335230807515242e-05, |
| "loss": 1.2777, |
| "step": 78500 |
| }, |
| { |
| "epoch": 43.005000699888015, |
| "eval_accuracy": 0.27808988764044945, |
| "eval_f1": 0.2469082169774653, |
| "eval_loss": 4.310375213623047, |
| "eval_precision": 0.26096653920645235, |
| "eval_recall": 0.27808988764044945, |
| "eval_runtime": 223.359, |
| "eval_samples_per_second": 17.532, |
| "eval_steps_per_second": 8.766, |
| "eval_top_10_accuracy": 0.6741573033707865, |
| "eval_top_1_accuracy": 0.2783452502553626, |
| "eval_top_5_accuracy": 0.5773748723186926, |
| "step": 78595 |
| }, |
| { |
| "epoch": 44.000013997760355, |
| "grad_norm": 30.237857818603516, |
| "learning_rate": 4.33367550080876e-05, |
| "loss": 1.0311, |
| "step": 78600 |
| }, |
| { |
| "epoch": 44.000293952967525, |
| "grad_norm": 57.867767333984375, |
| "learning_rate": 4.332120194102277e-05, |
| "loss": 0.3452, |
| "step": 78700 |
| }, |
| { |
| "epoch": 44.000573908174694, |
| "grad_norm": 0.10751175135374069, |
| "learning_rate": 4.330564887395795e-05, |
| "loss": 0.4273, |
| "step": 78800 |
| }, |
| { |
| "epoch": 44.000853863381856, |
| "grad_norm": 3.9307987689971924, |
| "learning_rate": 4.329009580689312e-05, |
| "loss": 0.5516, |
| "step": 78900 |
| }, |
| { |
| "epoch": 44.001133818589025, |
| "grad_norm": 31.563825607299805, |
| "learning_rate": 4.327454273982829e-05, |
| "loss": 0.5658, |
| "step": 79000 |
| }, |
| { |
| "epoch": 44.001413773796195, |
| "grad_norm": 0.5761617422103882, |
| "learning_rate": 4.325898967276347e-05, |
| "loss": 0.8374, |
| "step": 79100 |
| }, |
| { |
| "epoch": 44.00169372900336, |
| "grad_norm": 5.779951572418213, |
| "learning_rate": 4.324343660569864e-05, |
| "loss": 0.6332, |
| "step": 79200 |
| }, |
| { |
| "epoch": 44.001973684210526, |
| "grad_norm": 25.423784255981445, |
| "learning_rate": 4.322788353863382e-05, |
| "loss": 0.6223, |
| "step": 79300 |
| }, |
| { |
| "epoch": 44.002253639417695, |
| "grad_norm": 15.803873062133789, |
| "learning_rate": 4.321233047156899e-05, |
| "loss": 0.5489, |
| "step": 79400 |
| }, |
| { |
| "epoch": 44.00253359462486, |
| "grad_norm": 0.1971481591463089, |
| "learning_rate": 4.3196777404504165e-05, |
| "loss": 0.7783, |
| "step": 79500 |
| }, |
| { |
| "epoch": 44.00281354983203, |
| "grad_norm": 2.9763810634613037, |
| "learning_rate": 4.3181224337439344e-05, |
| "loss": 0.8164, |
| "step": 79600 |
| }, |
| { |
| "epoch": 44.003093505039196, |
| "grad_norm": 26.30331039428711, |
| "learning_rate": 4.316567127037452e-05, |
| "loss": 1.0041, |
| "step": 79700 |
| }, |
| { |
| "epoch": 44.00337346024636, |
| "grad_norm": 44.34019470214844, |
| "learning_rate": 4.3150118203309694e-05, |
| "loss": 0.8961, |
| "step": 79800 |
| }, |
| { |
| "epoch": 44.00365341545353, |
| "grad_norm": 26.827043533325195, |
| "learning_rate": 4.313472066691552e-05, |
| "loss": 0.9644, |
| "step": 79900 |
| }, |
| { |
| "epoch": 44.0039333706607, |
| "grad_norm": 7.778226375579834, |
| "learning_rate": 4.311916759985069e-05, |
| "loss": 1.0374, |
| "step": 80000 |
| }, |
| { |
| "epoch": 44.00421332586786, |
| "grad_norm": 41.65769958496094, |
| "learning_rate": 4.310361453278587e-05, |
| "loss": 0.9236, |
| "step": 80100 |
| }, |
| { |
| "epoch": 44.00449328107503, |
| "grad_norm": 37.12181091308594, |
| "learning_rate": 4.308806146572104e-05, |
| "loss": 0.7795, |
| "step": 80200 |
| }, |
| { |
| "epoch": 44.0047732362822, |
| "grad_norm": 4.020895481109619, |
| "learning_rate": 4.307250839865622e-05, |
| "loss": 0.8072, |
| "step": 80300 |
| }, |
| { |
| "epoch": 44.005, |
| "eval_accuracy": 0.27808988764044945, |
| "eval_f1": 0.2500487491688747, |
| "eval_loss": 4.37076997756958, |
| "eval_precision": 0.2685824383028162, |
| "eval_recall": 0.27808988764044945, |
| "eval_runtime": 221.8318, |
| "eval_samples_per_second": 17.653, |
| "eval_steps_per_second": 8.827, |
| "eval_top_10_accuracy": 0.6616445352400409, |
| "eval_top_1_accuracy": 0.27783452502553624, |
| "eval_top_5_accuracy": 0.5638406537282942, |
| "step": 80381 |
| }, |
| { |
| "epoch": 45.000052491601345, |
| "grad_norm": 5.015646457672119, |
| "learning_rate": 4.305695533159139e-05, |
| "loss": 0.7486, |
| "step": 80400 |
| }, |
| { |
| "epoch": 45.000332446808514, |
| "grad_norm": 10.068531036376953, |
| "learning_rate": 4.304140226452656e-05, |
| "loss": 0.4467, |
| "step": 80500 |
| }, |
| { |
| "epoch": 45.000612402015676, |
| "grad_norm": 26.70047950744629, |
| "learning_rate": 4.302584919746174e-05, |
| "loss": 0.48, |
| "step": 80600 |
| }, |
| { |
| "epoch": 45.000892357222845, |
| "grad_norm": 0.11867395788431168, |
| "learning_rate": 4.3010296130396914e-05, |
| "loss": 0.6253, |
| "step": 80700 |
| }, |
| { |
| "epoch": 45.001172312430015, |
| "grad_norm": 39.205528259277344, |
| "learning_rate": 4.299474306333209e-05, |
| "loss": 0.6083, |
| "step": 80800 |
| }, |
| { |
| "epoch": 45.00145226763718, |
| "grad_norm": 0.874306857585907, |
| "learning_rate": 4.2979189996267264e-05, |
| "loss": 0.5311, |
| "step": 80900 |
| }, |
| { |
| "epoch": 45.001732222844346, |
| "grad_norm": 0.12406778335571289, |
| "learning_rate": 4.2963636929202436e-05, |
| "loss": 0.5437, |
| "step": 81000 |
| }, |
| { |
| "epoch": 45.00201217805151, |
| "grad_norm": 1.986485481262207, |
| "learning_rate": 4.2948083862137615e-05, |
| "loss": 0.6653, |
| "step": 81100 |
| }, |
| { |
| "epoch": 45.00229213325868, |
| "grad_norm": 0.10207056999206543, |
| "learning_rate": 4.2932530795072787e-05, |
| "loss": 0.9323, |
| "step": 81200 |
| }, |
| { |
| "epoch": 45.00257208846585, |
| "grad_norm": 0.07560595870018005, |
| "learning_rate": 4.2916977728007965e-05, |
| "loss": 0.6401, |
| "step": 81300 |
| }, |
| { |
| "epoch": 45.00285204367301, |
| "grad_norm": 29.77736473083496, |
| "learning_rate": 4.290142466094314e-05, |
| "loss": 0.7685, |
| "step": 81400 |
| }, |
| { |
| "epoch": 45.00313199888018, |
| "grad_norm": 0.9332723617553711, |
| "learning_rate": 4.2885871593878316e-05, |
| "loss": 0.8516, |
| "step": 81500 |
| }, |
| { |
| "epoch": 45.00341195408735, |
| "grad_norm": 0.36157646775245667, |
| "learning_rate": 4.287031852681349e-05, |
| "loss": 0.6851, |
| "step": 81600 |
| }, |
| { |
| "epoch": 45.00369190929451, |
| "grad_norm": 1.132853388786316, |
| "learning_rate": 4.2854765459748666e-05, |
| "loss": 0.9181, |
| "step": 81700 |
| }, |
| { |
| "epoch": 45.00397186450168, |
| "grad_norm": 33.407169342041016, |
| "learning_rate": 4.2839212392683845e-05, |
| "loss": 0.8689, |
| "step": 81800 |
| }, |
| { |
| "epoch": 45.00425181970885, |
| "grad_norm": 1.4110562801361084, |
| "learning_rate": 4.282381485628966e-05, |
| "loss": 0.8921, |
| "step": 81900 |
| }, |
| { |
| "epoch": 45.00453177491601, |
| "grad_norm": 0.2189277559518814, |
| "learning_rate": 4.2808261789224834e-05, |
| "loss": 0.8344, |
| "step": 82000 |
| }, |
| { |
| "epoch": 45.00481173012318, |
| "grad_norm": 16.592561721801758, |
| "learning_rate": 4.279270872216001e-05, |
| "loss": 1.1258, |
| "step": 82100 |
| }, |
| { |
| "epoch": 45.00499930011198, |
| "eval_accuracy": 0.2801327885597549, |
| "eval_f1": 0.2509663358999416, |
| "eval_loss": 4.351675987243652, |
| "eval_precision": 0.26884382499806403, |
| "eval_recall": 0.2801327885597549, |
| "eval_runtime": 223.1566, |
| "eval_samples_per_second": 17.548, |
| "eval_steps_per_second": 8.774, |
| "eval_top_10_accuracy": 0.6731358529111338, |
| "eval_top_1_accuracy": 0.27987742594484166, |
| "eval_top_5_accuracy": 0.5707354443309499, |
| "step": 82167 |
| }, |
| { |
| "epoch": 46.00009098544233, |
| "grad_norm": 8.000289916992188, |
| "learning_rate": 4.2777155655095184e-05, |
| "loss": 0.8636, |
| "step": 82200 |
| }, |
| { |
| "epoch": 46.000370940649496, |
| "grad_norm": 0.263664573431015, |
| "learning_rate": 4.276160258803036e-05, |
| "loss": 0.3495, |
| "step": 82300 |
| }, |
| { |
| "epoch": 46.000650895856666, |
| "grad_norm": 20.22283935546875, |
| "learning_rate": 4.2746049520965535e-05, |
| "loss": 0.6638, |
| "step": 82400 |
| }, |
| { |
| "epoch": 46.00093085106383, |
| "grad_norm": 20.6286678314209, |
| "learning_rate": 4.273049645390071e-05, |
| "loss": 0.4812, |
| "step": 82500 |
| }, |
| { |
| "epoch": 46.001210806271, |
| "grad_norm": 1.6630127429962158, |
| "learning_rate": 4.2714943386835886e-05, |
| "loss": 0.654, |
| "step": 82600 |
| }, |
| { |
| "epoch": 46.001490761478166, |
| "grad_norm": 28.354536056518555, |
| "learning_rate": 4.269939031977106e-05, |
| "loss": 0.6758, |
| "step": 82700 |
| }, |
| { |
| "epoch": 46.00177071668533, |
| "grad_norm": 0.040363699197769165, |
| "learning_rate": 4.2683837252706236e-05, |
| "loss": 0.5949, |
| "step": 82800 |
| }, |
| { |
| "epoch": 46.0020506718925, |
| "grad_norm": 0.522068440914154, |
| "learning_rate": 4.266828418564141e-05, |
| "loss": 0.6394, |
| "step": 82900 |
| }, |
| { |
| "epoch": 46.00233062709967, |
| "grad_norm": 0.14378629624843597, |
| "learning_rate": 4.265273111857658e-05, |
| "loss": 0.7237, |
| "step": 83000 |
| }, |
| { |
| "epoch": 46.00261058230683, |
| "grad_norm": 25.07221031188965, |
| "learning_rate": 4.263717805151176e-05, |
| "loss": 0.6259, |
| "step": 83100 |
| }, |
| { |
| "epoch": 46.002890537514, |
| "grad_norm": 3.614312171936035, |
| "learning_rate": 4.262162498444694e-05, |
| "loss": 0.6505, |
| "step": 83200 |
| }, |
| { |
| "epoch": 46.00317049272117, |
| "grad_norm": 23.773239135742188, |
| "learning_rate": 4.260607191738211e-05, |
| "loss": 0.6757, |
| "step": 83300 |
| }, |
| { |
| "epoch": 46.00345044792833, |
| "grad_norm": 19.163311004638672, |
| "learning_rate": 4.259051885031729e-05, |
| "loss": 0.736, |
| "step": 83400 |
| }, |
| { |
| "epoch": 46.0037304031355, |
| "grad_norm": 26.841463088989258, |
| "learning_rate": 4.257496578325246e-05, |
| "loss": 0.7399, |
| "step": 83500 |
| }, |
| { |
| "epoch": 46.00401035834267, |
| "grad_norm": 0.11447547376155853, |
| "learning_rate": 4.255941271618764e-05, |
| "loss": 0.9328, |
| "step": 83600 |
| }, |
| { |
| "epoch": 46.00429031354983, |
| "grad_norm": 0.1793317198753357, |
| "learning_rate": 4.254385964912281e-05, |
| "loss": 0.6467, |
| "step": 83700 |
| }, |
| { |
| "epoch": 46.004570268757, |
| "grad_norm": 37.40285873413086, |
| "learning_rate": 4.252830658205799e-05, |
| "loss": 1.0173, |
| "step": 83800 |
| }, |
| { |
| "epoch": 46.00485022396417, |
| "grad_norm": 0.11013773828744888, |
| "learning_rate": 4.251275351499316e-05, |
| "loss": 0.9476, |
| "step": 83900 |
| }, |
| { |
| "epoch": 46.00499860022396, |
| "eval_accuracy": 0.28115423901940756, |
| "eval_f1": 0.24729637104677424, |
| "eval_loss": 4.371170520782471, |
| "eval_precision": 0.2582944003845434, |
| "eval_recall": 0.28115423901940756, |
| "eval_runtime": 223.6689, |
| "eval_samples_per_second": 17.508, |
| "eval_steps_per_second": 8.754, |
| "eval_top_10_accuracy": 0.6756894790602656, |
| "eval_top_1_accuracy": 0.2814096016343207, |
| "eval_top_5_accuracy": 0.5684371807967313, |
| "step": 83953 |
| }, |
| { |
| "epoch": 47.000129479283316, |
| "grad_norm": 0.257235050201416, |
| "learning_rate": 4.249735597859898e-05, |
| "loss": 0.7448, |
| "step": 84000 |
| }, |
| { |
| "epoch": 47.00040943449048, |
| "grad_norm": 18.50094985961914, |
| "learning_rate": 4.2481802911534156e-05, |
| "loss": 0.3183, |
| "step": 84100 |
| }, |
| { |
| "epoch": 47.00068938969765, |
| "grad_norm": 0.018771247938275337, |
| "learning_rate": 4.246624984446933e-05, |
| "loss": 0.6396, |
| "step": 84200 |
| }, |
| { |
| "epoch": 47.00096934490482, |
| "grad_norm": 0.19734026491641998, |
| "learning_rate": 4.245069677740451e-05, |
| "loss": 0.4782, |
| "step": 84300 |
| }, |
| { |
| "epoch": 47.00124930011198, |
| "grad_norm": 4.257106304168701, |
| "learning_rate": 4.243514371033968e-05, |
| "loss": 0.6645, |
| "step": 84400 |
| }, |
| { |
| "epoch": 47.00152925531915, |
| "grad_norm": 14.478767395019531, |
| "learning_rate": 4.241959064327485e-05, |
| "loss": 0.5289, |
| "step": 84500 |
| }, |
| { |
| "epoch": 47.00180921052632, |
| "grad_norm": 0.04039764776825905, |
| "learning_rate": 4.240403757621003e-05, |
| "loss": 0.6705, |
| "step": 84600 |
| }, |
| { |
| "epoch": 47.00208916573348, |
| "grad_norm": 31.222604751586914, |
| "learning_rate": 4.23884845091452e-05, |
| "loss": 0.5118, |
| "step": 84700 |
| }, |
| { |
| "epoch": 47.00236912094065, |
| "grad_norm": 4.133471488952637, |
| "learning_rate": 4.237293144208038e-05, |
| "loss": 0.716, |
| "step": 84800 |
| }, |
| { |
| "epoch": 47.00264907614782, |
| "grad_norm": 0.13676881790161133, |
| "learning_rate": 4.235737837501555e-05, |
| "loss": 0.6143, |
| "step": 84900 |
| }, |
| { |
| "epoch": 47.00292903135498, |
| "grad_norm": 0.7570433616638184, |
| "learning_rate": 4.234182530795073e-05, |
| "loss": 0.6359, |
| "step": 85000 |
| }, |
| { |
| "epoch": 47.00320898656215, |
| "grad_norm": 30.351444244384766, |
| "learning_rate": 4.23262722408859e-05, |
| "loss": 0.5945, |
| "step": 85100 |
| }, |
| { |
| "epoch": 47.00348894176932, |
| "grad_norm": 0.016572458669543266, |
| "learning_rate": 4.231071917382108e-05, |
| "loss": 0.9843, |
| "step": 85200 |
| }, |
| { |
| "epoch": 47.00376889697648, |
| "grad_norm": 4.526513576507568, |
| "learning_rate": 4.229516610675626e-05, |
| "loss": 0.8844, |
| "step": 85300 |
| }, |
| { |
| "epoch": 47.00404885218365, |
| "grad_norm": 19.184106826782227, |
| "learning_rate": 4.227961303969143e-05, |
| "loss": 0.845, |
| "step": 85400 |
| }, |
| { |
| "epoch": 47.00432880739082, |
| "grad_norm": 0.7215138077735901, |
| "learning_rate": 4.2264059972626604e-05, |
| "loss": 0.7167, |
| "step": 85500 |
| }, |
| { |
| "epoch": 47.00460876259798, |
| "grad_norm": 0.3852091133594513, |
| "learning_rate": 4.224850690556178e-05, |
| "loss": 0.9227, |
| "step": 85600 |
| }, |
| { |
| "epoch": 47.00488871780515, |
| "grad_norm": 20.54586410522461, |
| "learning_rate": 4.2232953838496954e-05, |
| "loss": 1.0757, |
| "step": 85700 |
| }, |
| { |
| "epoch": 47.005000699888015, |
| "eval_accuracy": 0.2676200204290092, |
| "eval_f1": 0.23724926687771428, |
| "eval_loss": 4.428029537200928, |
| "eval_precision": 0.251943858264083, |
| "eval_recall": 0.2676200204290092, |
| "eval_runtime": 219.2253, |
| "eval_samples_per_second": 17.863, |
| "eval_steps_per_second": 8.931, |
| "eval_top_10_accuracy": 0.6667517875383044, |
| "eval_top_1_accuracy": 0.2678753830439224, |
| "eval_top_5_accuracy": 0.5640960163432074, |
| "step": 85740 |
| }, |
| { |
| "epoch": 48.0001679731243, |
| "grad_norm": 1.403875708580017, |
| "learning_rate": 4.221740077143213e-05, |
| "loss": 0.6773, |
| "step": 85800 |
| }, |
| { |
| "epoch": 48.00044792833147, |
| "grad_norm": 1.0571541786193848, |
| "learning_rate": 4.2201847704367305e-05, |
| "loss": 0.4237, |
| "step": 85900 |
| }, |
| { |
| "epoch": 48.00072788353864, |
| "grad_norm": 0.06935632228851318, |
| "learning_rate": 4.218645016797312e-05, |
| "loss": 0.6378, |
| "step": 86000 |
| }, |
| { |
| "epoch": 48.0010078387458, |
| "grad_norm": 0.08270786702632904, |
| "learning_rate": 4.21708971009083e-05, |
| "loss": 0.56, |
| "step": 86100 |
| }, |
| { |
| "epoch": 48.00128779395297, |
| "grad_norm": 19.250934600830078, |
| "learning_rate": 4.215534403384347e-05, |
| "loss": 0.5513, |
| "step": 86200 |
| }, |
| { |
| "epoch": 48.00156774916013, |
| "grad_norm": 0.13220156729221344, |
| "learning_rate": 4.213979096677865e-05, |
| "loss": 0.5792, |
| "step": 86300 |
| }, |
| { |
| "epoch": 48.0018477043673, |
| "grad_norm": 19.1898250579834, |
| "learning_rate": 4.212423789971382e-05, |
| "loss": 0.7476, |
| "step": 86400 |
| }, |
| { |
| "epoch": 48.00212765957447, |
| "grad_norm": 35.42184829711914, |
| "learning_rate": 4.2108684832648995e-05, |
| "loss": 0.9539, |
| "step": 86500 |
| }, |
| { |
| "epoch": 48.00240761478163, |
| "grad_norm": 14.540600776672363, |
| "learning_rate": 4.209313176558417e-05, |
| "loss": 0.5537, |
| "step": 86600 |
| }, |
| { |
| "epoch": 48.0026875699888, |
| "grad_norm": 0.27380481362342834, |
| "learning_rate": 4.207757869851935e-05, |
| "loss": 0.6941, |
| "step": 86700 |
| }, |
| { |
| "epoch": 48.00296752519597, |
| "grad_norm": 0.0291057787835598, |
| "learning_rate": 4.2062025631454524e-05, |
| "loss": 0.989, |
| "step": 86800 |
| }, |
| { |
| "epoch": 48.00324748040313, |
| "grad_norm": 23.343826293945312, |
| "learning_rate": 4.20464725643897e-05, |
| "loss": 0.9426, |
| "step": 86900 |
| }, |
| { |
| "epoch": 48.0035274356103, |
| "grad_norm": 18.877023696899414, |
| "learning_rate": 4.2030919497324875e-05, |
| "loss": 0.6116, |
| "step": 87000 |
| }, |
| { |
| "epoch": 48.00380739081747, |
| "grad_norm": 52.804725646972656, |
| "learning_rate": 4.201536643026005e-05, |
| "loss": 0.8468, |
| "step": 87100 |
| }, |
| { |
| "epoch": 48.00408734602463, |
| "grad_norm": 0.39581963419914246, |
| "learning_rate": 4.1999813363195225e-05, |
| "loss": 0.6277, |
| "step": 87200 |
| }, |
| { |
| "epoch": 48.0043673012318, |
| "grad_norm": 0.08678244054317474, |
| "learning_rate": 4.1984260296130404e-05, |
| "loss": 0.734, |
| "step": 87300 |
| }, |
| { |
| "epoch": 48.00464725643897, |
| "grad_norm": 37.76765060424805, |
| "learning_rate": 4.1968707229065576e-05, |
| "loss": 0.8369, |
| "step": 87400 |
| }, |
| { |
| "epoch": 48.004927211646134, |
| "grad_norm": 25.679229736328125, |
| "learning_rate": 4.195315416200075e-05, |
| "loss": 0.7244, |
| "step": 87500 |
| }, |
| { |
| "epoch": 48.005, |
| "eval_accuracy": 0.267364657814096, |
| "eval_f1": 0.23883580701912732, |
| "eval_loss": 4.437381267547607, |
| "eval_precision": 0.2528897338332987, |
| "eval_recall": 0.267364657814096, |
| "eval_runtime": 227.9408, |
| "eval_samples_per_second": 17.18, |
| "eval_steps_per_second": 8.59, |
| "eval_top_10_accuracy": 0.6685393258426966, |
| "eval_top_1_accuracy": 0.2676200204290092, |
| "eval_top_5_accuracy": 0.5676710929519918, |
| "step": 87526 |
| }, |
| { |
| "epoch": 49.00020646696529, |
| "grad_norm": 0.036201562732458115, |
| "learning_rate": 4.1937601094935926e-05, |
| "loss": 0.4116, |
| "step": 87600 |
| }, |
| { |
| "epoch": 49.00048642217245, |
| "grad_norm": 0.03052523173391819, |
| "learning_rate": 4.19220480278711e-05, |
| "loss": 0.4815, |
| "step": 87700 |
| }, |
| { |
| "epoch": 49.00076637737962, |
| "grad_norm": 77.27123260498047, |
| "learning_rate": 4.190649496080628e-05, |
| "loss": 0.5239, |
| "step": 87800 |
| }, |
| { |
| "epoch": 49.00104633258679, |
| "grad_norm": 36.39849090576172, |
| "learning_rate": 4.189094189374145e-05, |
| "loss": 0.493, |
| "step": 87900 |
| }, |
| { |
| "epoch": 49.00132628779395, |
| "grad_norm": 34.98275375366211, |
| "learning_rate": 4.187538882667662e-05, |
| "loss": 0.7586, |
| "step": 88000 |
| }, |
| { |
| "epoch": 49.00160624300112, |
| "grad_norm": 2.225522041320801, |
| "learning_rate": 4.18598357596118e-05, |
| "loss": 0.4518, |
| "step": 88100 |
| }, |
| { |
| "epoch": 49.00188619820829, |
| "grad_norm": 20.050642013549805, |
| "learning_rate": 4.1844438223217616e-05, |
| "loss": 0.6425, |
| "step": 88200 |
| }, |
| { |
| "epoch": 49.00216615341545, |
| "grad_norm": 26.255083084106445, |
| "learning_rate": 4.1828885156152795e-05, |
| "loss": 0.5254, |
| "step": 88300 |
| }, |
| { |
| "epoch": 49.00244610862262, |
| "grad_norm": 68.98938751220703, |
| "learning_rate": 4.181333208908797e-05, |
| "loss": 0.5639, |
| "step": 88400 |
| }, |
| { |
| "epoch": 49.00272606382979, |
| "grad_norm": 0.42682796716690063, |
| "learning_rate": 4.1797779022023145e-05, |
| "loss": 0.5954, |
| "step": 88500 |
| }, |
| { |
| "epoch": 49.00300601903695, |
| "grad_norm": 0.07504092901945114, |
| "learning_rate": 4.178222595495832e-05, |
| "loss": 0.794, |
| "step": 88600 |
| }, |
| { |
| "epoch": 49.00328597424412, |
| "grad_norm": 2.559800148010254, |
| "learning_rate": 4.1766672887893496e-05, |
| "loss": 0.7073, |
| "step": 88700 |
| }, |
| { |
| "epoch": 49.00356592945129, |
| "grad_norm": 0.014354785904288292, |
| "learning_rate": 4.1751119820828675e-05, |
| "loss": 0.851, |
| "step": 88800 |
| }, |
| { |
| "epoch": 49.00384588465845, |
| "grad_norm": 0.1795697808265686, |
| "learning_rate": 4.1735566753763847e-05, |
| "loss": 0.9604, |
| "step": 88900 |
| }, |
| { |
| "epoch": 49.00412583986562, |
| "grad_norm": 27.99577522277832, |
| "learning_rate": 4.172001368669902e-05, |
| "loss": 0.5985, |
| "step": 89000 |
| }, |
| { |
| "epoch": 49.00440579507279, |
| "grad_norm": 24.083967208862305, |
| "learning_rate": 4.17044606196342e-05, |
| "loss": 0.9174, |
| "step": 89100 |
| }, |
| { |
| "epoch": 49.004685750279954, |
| "grad_norm": 0.6158522963523865, |
| "learning_rate": 4.168890755256937e-05, |
| "loss": 0.6783, |
| "step": 89200 |
| }, |
| { |
| "epoch": 49.00496570548712, |
| "grad_norm": 0.4537813067436218, |
| "learning_rate": 4.167335448550455e-05, |
| "loss": 0.9193, |
| "step": 89300 |
| }, |
| { |
| "epoch": 49.00499930011198, |
| "eval_accuracy": 0.27477017364657813, |
| "eval_f1": 0.2466602161316155, |
| "eval_loss": 4.401386260986328, |
| "eval_precision": 0.2617121945995394, |
| "eval_recall": 0.27477017364657813, |
| "eval_runtime": 219.9162, |
| "eval_samples_per_second": 17.807, |
| "eval_steps_per_second": 8.903, |
| "eval_top_10_accuracy": 0.666241062308478, |
| "eval_top_1_accuracy": 0.27477017364657813, |
| "eval_top_5_accuracy": 0.563585291113381, |
| "step": 89312 |
| }, |
| { |
| "epoch": 50.00024496080627, |
| "grad_norm": 0.04477471113204956, |
| "learning_rate": 4.165780141843972e-05, |
| "loss": 0.3977, |
| "step": 89400 |
| }, |
| { |
| "epoch": 50.00052491601344, |
| "grad_norm": 0.24003936350345612, |
| "learning_rate": 4.164224835137489e-05, |
| "loss": 0.4872, |
| "step": 89500 |
| }, |
| { |
| "epoch": 50.0008048712206, |
| "grad_norm": 27.157386779785156, |
| "learning_rate": 4.162669528431007e-05, |
| "loss": 0.5601, |
| "step": 89600 |
| }, |
| { |
| "epoch": 50.00108482642777, |
| "grad_norm": 0.051918625831604004, |
| "learning_rate": 4.161114221724524e-05, |
| "loss": 0.4453, |
| "step": 89700 |
| }, |
| { |
| "epoch": 50.00136478163494, |
| "grad_norm": 5.718064785003662, |
| "learning_rate": 4.159558915018042e-05, |
| "loss": 0.4906, |
| "step": 89800 |
| }, |
| { |
| "epoch": 50.0016447368421, |
| "grad_norm": 0.8962364792823792, |
| "learning_rate": 4.158003608311559e-05, |
| "loss": 0.5037, |
| "step": 89900 |
| }, |
| { |
| "epoch": 50.00192469204927, |
| "grad_norm": 0.05446602776646614, |
| "learning_rate": 4.1564483016050765e-05, |
| "loss": 0.61, |
| "step": 90000 |
| }, |
| { |
| "epoch": 50.00220464725644, |
| "grad_norm": 19.462753295898438, |
| "learning_rate": 4.154892994898594e-05, |
| "loss": 0.6704, |
| "step": 90100 |
| }, |
| { |
| "epoch": 50.0024846024636, |
| "grad_norm": 0.09495168179273605, |
| "learning_rate": 4.1533376881921115e-05, |
| "loss": 0.6498, |
| "step": 90200 |
| }, |
| { |
| "epoch": 50.00276455767077, |
| "grad_norm": 1.4602558612823486, |
| "learning_rate": 4.151797934552694e-05, |
| "loss": 0.7506, |
| "step": 90300 |
| }, |
| { |
| "epoch": 50.00304451287794, |
| "grad_norm": 21.6412296295166, |
| "learning_rate": 4.150242627846212e-05, |
| "loss": 0.75, |
| "step": 90400 |
| }, |
| { |
| "epoch": 50.003324468085104, |
| "grad_norm": 2.719374418258667, |
| "learning_rate": 4.148687321139729e-05, |
| "loss": 0.708, |
| "step": 90500 |
| }, |
| { |
| "epoch": 50.00360442329227, |
| "grad_norm": 5.177334308624268, |
| "learning_rate": 4.147132014433247e-05, |
| "loss": 0.8531, |
| "step": 90600 |
| }, |
| { |
| "epoch": 50.00388437849944, |
| "grad_norm": 38.69509506225586, |
| "learning_rate": 4.145576707726764e-05, |
| "loss": 0.886, |
| "step": 90700 |
| }, |
| { |
| "epoch": 50.004164333706605, |
| "grad_norm": 0.25815537571907043, |
| "learning_rate": 4.144021401020282e-05, |
| "loss": 0.7835, |
| "step": 90800 |
| }, |
| { |
| "epoch": 50.004444288913774, |
| "grad_norm": 27.770305633544922, |
| "learning_rate": 4.142466094313799e-05, |
| "loss": 0.8142, |
| "step": 90900 |
| }, |
| { |
| "epoch": 50.00472424412094, |
| "grad_norm": 18.81846046447754, |
| "learning_rate": 4.140910787607316e-05, |
| "loss": 0.8372, |
| "step": 91000 |
| }, |
| { |
| "epoch": 50.00499860022396, |
| "eval_accuracy": 0.27349336057201223, |
| "eval_f1": 0.24285441862576393, |
| "eval_loss": 4.476330280303955, |
| "eval_precision": 0.2554687395112048, |
| "eval_recall": 0.27349336057201223, |
| "eval_runtime": 227.1325, |
| "eval_samples_per_second": 17.241, |
| "eval_steps_per_second": 8.621, |
| "eval_top_10_accuracy": 0.6544943820224719, |
| "eval_top_1_accuracy": 0.27349336057201223, |
| "eval_top_5_accuracy": 0.5584780388151175, |
| "step": 91098 |
| }, |
| { |
| "epoch": 51.00000349944009, |
| "grad_norm": 0.6531506180763245, |
| "learning_rate": 4.139355480900834e-05, |
| "loss": 0.8241, |
| "step": 91100 |
| }, |
| { |
| "epoch": 51.00028345464726, |
| "grad_norm": 0.014770712703466415, |
| "learning_rate": 4.137800174194351e-05, |
| "loss": 0.3236, |
| "step": 91200 |
| }, |
| { |
| "epoch": 51.00056340985442, |
| "grad_norm": 27.77800178527832, |
| "learning_rate": 4.136244867487869e-05, |
| "loss": 0.3143, |
| "step": 91300 |
| }, |
| { |
| "epoch": 51.00084336506159, |
| "grad_norm": 0.24872513115406036, |
| "learning_rate": 4.1346895607813863e-05, |
| "loss": 0.3622, |
| "step": 91400 |
| }, |
| { |
| "epoch": 51.00112332026875, |
| "grad_norm": 17.729570388793945, |
| "learning_rate": 4.1331342540749035e-05, |
| "loss": 0.5148, |
| "step": 91500 |
| }, |
| { |
| "epoch": 51.00140327547592, |
| "grad_norm": 0.36014020442962646, |
| "learning_rate": 4.1315789473684214e-05, |
| "loss": 0.4913, |
| "step": 91600 |
| }, |
| { |
| "epoch": 51.00168323068309, |
| "grad_norm": 0.2546406090259552, |
| "learning_rate": 4.1300236406619386e-05, |
| "loss": 0.7763, |
| "step": 91700 |
| }, |
| { |
| "epoch": 51.001963185890254, |
| "grad_norm": 0.051592834293842316, |
| "learning_rate": 4.1284683339554565e-05, |
| "loss": 0.5542, |
| "step": 91800 |
| }, |
| { |
| "epoch": 51.00224314109742, |
| "grad_norm": 0.3881039321422577, |
| "learning_rate": 4.1269130272489737e-05, |
| "loss": 0.6371, |
| "step": 91900 |
| }, |
| { |
| "epoch": 51.00252309630459, |
| "grad_norm": 6.290889739990234, |
| "learning_rate": 4.125357720542491e-05, |
| "loss": 0.6698, |
| "step": 92000 |
| }, |
| { |
| "epoch": 51.002803051511755, |
| "grad_norm": 13.433815002441406, |
| "learning_rate": 4.123802413836009e-05, |
| "loss": 0.8478, |
| "step": 92100 |
| }, |
| { |
| "epoch": 51.003083006718924, |
| "grad_norm": 21.983304977416992, |
| "learning_rate": 4.122247107129526e-05, |
| "loss": 0.7023, |
| "step": 92200 |
| }, |
| { |
| "epoch": 51.00336296192609, |
| "grad_norm": 24.535905838012695, |
| "learning_rate": 4.120691800423044e-05, |
| "loss": 0.7172, |
| "step": 92300 |
| }, |
| { |
| "epoch": 51.003642917133256, |
| "grad_norm": 0.10872181504964828, |
| "learning_rate": 4.119152046783626e-05, |
| "loss": 0.767, |
| "step": 92400 |
| }, |
| { |
| "epoch": 51.003922872340425, |
| "grad_norm": 1.7466179132461548, |
| "learning_rate": 4.117596740077143e-05, |
| "loss": 0.8614, |
| "step": 92500 |
| }, |
| { |
| "epoch": 51.004202827547594, |
| "grad_norm": 24.38710594177246, |
| "learning_rate": 4.116041433370661e-05, |
| "loss": 0.9432, |
| "step": 92600 |
| }, |
| { |
| "epoch": 51.004482782754756, |
| "grad_norm": 0.1896403431892395, |
| "learning_rate": 4.1144861266641784e-05, |
| "loss": 0.611, |
| "step": 92700 |
| }, |
| { |
| "epoch": 51.004762737961926, |
| "grad_norm": 0.058731839060783386, |
| "learning_rate": 4.112930819957696e-05, |
| "loss": 0.7598, |
| "step": 92800 |
| }, |
| { |
| "epoch": 51.005000699888015, |
| "eval_accuracy": 0.2727272727272727, |
| "eval_f1": 0.24191750595869443, |
| "eval_loss": 4.436596393585205, |
| "eval_precision": 0.2553644020829925, |
| "eval_recall": 0.2727272727272727, |
| "eval_runtime": 222.7636, |
| "eval_samples_per_second": 17.579, |
| "eval_steps_per_second": 8.79, |
| "eval_top_10_accuracy": 0.6598569969356486, |
| "eval_top_1_accuracy": 0.27247191011235955, |
| "eval_top_5_accuracy": 0.56511746680286, |
| "step": 92885 |
| }, |
| { |
| "epoch": 51.005000699888015, |
| "step": 92885, |
| "total_flos": 1.9366677482157638e+21, |
| "train_loss": 6.364319215203574, |
| "train_runtime": 102825.8245, |
| "train_samples_per_second": 27.791, |
| "train_steps_per_second": 3.474 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 357200, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 20, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 20 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.9366677482157638e+21, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|