{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 35.97122302158273, "eval_steps": 500, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "action_loss": 1.9846010208129883, "epoch": 0, "step": 0 }, { "epoch": 0.008992805755395683, "grad_norm": 42.47126388549805, "learning_rate": 3.0000000000000004e-07, "loss": 1.5102, "step": 10 }, { "action_loss": 1.0997552871704102, "epoch": 0.008992805755395683, "step": 10 }, { "epoch": 0.017985611510791366, "grad_norm": 40.13206100463867, "learning_rate": 6.333333333333333e-07, "loss": 1.6006, "step": 20 }, { "action_loss": 1.4752774238586426, "epoch": 0.017985611510791366, "step": 20 }, { "epoch": 0.02697841726618705, "grad_norm": 24.512340545654297, "learning_rate": 9.666666666666668e-07, "loss": 1.5612, "step": 30 }, { "action_loss": 1.7262139320373535, "epoch": 0.02697841726618705, "step": 30 }, { "epoch": 0.03597122302158273, "grad_norm": 45.99003601074219, "learning_rate": 1.3e-06, "loss": 1.3915, "step": 40 }, { "action_loss": 1.534085750579834, "epoch": 0.03597122302158273, "step": 40 }, { "epoch": 0.044964028776978415, "grad_norm": 12.87304401397705, "learning_rate": 1.6333333333333333e-06, "loss": 1.2429, "step": 50 }, { "action_loss": 1.0506848096847534, "epoch": 0.044964028776978415, "step": 50 }, { "epoch": 0.0539568345323741, "grad_norm": 13.308693885803223, "learning_rate": 1.9666666666666668e-06, "loss": 0.992, "step": 60 }, { "action_loss": 0.686131477355957, "epoch": 0.0539568345323741, "step": 60 }, { "epoch": 0.06294964028776978, "grad_norm": 8.500147819519043, "learning_rate": 2.3e-06, "loss": 0.6037, "step": 70 }, { "action_loss": 0.4773000180721283, "epoch": 0.06294964028776978, "step": 70 }, { "epoch": 0.07194244604316546, "grad_norm": 2.5783159732818604, "learning_rate": 2.6333333333333337e-06, "loss": 0.4832, "step": 80 }, { "action_loss": 0.3617378771305084, "epoch": 0.07194244604316546, "step": 80 }, { "epoch": 0.08093525179856115, "grad_norm": 2.0823707580566406, "learning_rate": 2.966666666666667e-06, "loss": 0.3554, "step": 90 }, { "action_loss": 0.2483079582452774, "epoch": 0.08093525179856115, "step": 90 }, { "epoch": 0.08992805755395683, "grad_norm": 1.663017749786377, "learning_rate": 3.3e-06, "loss": 0.2659, "step": 100 }, { "action_loss": 0.22090141475200653, "epoch": 0.08992805755395683, "step": 100 }, { "epoch": 0.09892086330935251, "grad_norm": 1.2466670274734497, "learning_rate": 3.633333333333334e-06, "loss": 0.1962, "step": 110 }, { "action_loss": 0.18960440158843994, "epoch": 0.09892086330935251, "step": 110 }, { "epoch": 0.1079136690647482, "grad_norm": 1.054117202758789, "learning_rate": 3.966666666666667e-06, "loss": 0.1762, "step": 120 }, { "action_loss": 0.12490513175725937, "epoch": 0.1079136690647482, "step": 120 }, { "epoch": 0.11690647482014388, "grad_norm": 1.1360236406326294, "learning_rate": 4.2999999999999995e-06, "loss": 0.1249, "step": 130 }, { "action_loss": 0.12125053256750107, "epoch": 0.11690647482014388, "step": 130 }, { "epoch": 0.12589928057553956, "grad_norm": 0.9416245818138123, "learning_rate": 4.633333333333334e-06, "loss": 0.1125, "step": 140 }, { "action_loss": 0.1179327741265297, "epoch": 0.12589928057553956, "step": 140 }, { "epoch": 0.13489208633093525, "grad_norm": 1.0858702659606934, "learning_rate": 4.966666666666667e-06, "loss": 0.1181, "step": 150 }, { "action_loss": 0.22189848124980927, "epoch": 0.13489208633093525, "step": 150 }, { "epoch": 0.14388489208633093, "grad_norm": 0.8978649377822876, "learning_rate": 5.3e-06, "loss": 0.0998, "step": 160 }, { "action_loss": 0.1236891821026802, "epoch": 0.14388489208633093, "step": 160 }, { "epoch": 0.1528776978417266, "grad_norm": 1.1465848684310913, "learning_rate": 5.633333333333333e-06, "loss": 0.0897, "step": 170 }, { "action_loss": 0.08122071623802185, "epoch": 0.1528776978417266, "step": 170 }, { "epoch": 0.1618705035971223, "grad_norm": 0.8090065121650696, "learning_rate": 5.9666666666666666e-06, "loss": 0.0758, "step": 180 }, { "action_loss": 0.09764935821294785, "epoch": 0.1618705035971223, "step": 180 }, { "epoch": 0.17086330935251798, "grad_norm": 1.182331919670105, "learning_rate": 6.300000000000001e-06, "loss": 0.095, "step": 190 }, { "action_loss": 0.06910569220781326, "epoch": 0.17086330935251798, "step": 190 }, { "epoch": 0.17985611510791366, "grad_norm": 1.094026803970337, "learning_rate": 6.633333333333333e-06, "loss": 0.0889, "step": 200 }, { "action_loss": 0.06305773556232452, "epoch": 0.17985611510791366, "step": 200 }, { "epoch": 0.18884892086330934, "grad_norm": 0.6919820308685303, "learning_rate": 6.966666666666667e-06, "loss": 0.0746, "step": 210 }, { "action_loss": 0.11320754140615463, "epoch": 0.18884892086330934, "step": 210 }, { "epoch": 0.19784172661870503, "grad_norm": 1.0625081062316895, "learning_rate": 7.2999999999999996e-06, "loss": 0.0828, "step": 220 }, { "action_loss": 0.05975823104381561, "epoch": 0.19784172661870503, "step": 220 }, { "epoch": 0.2068345323741007, "grad_norm": 0.893651008605957, "learning_rate": 7.633333333333334e-06, "loss": 0.0753, "step": 230 }, { "action_loss": 0.08672492951154709, "epoch": 0.2068345323741007, "step": 230 }, { "epoch": 0.2158273381294964, "grad_norm": 0.6924575567245483, "learning_rate": 7.966666666666666e-06, "loss": 0.071, "step": 240 }, { "action_loss": 0.08947581052780151, "epoch": 0.2158273381294964, "step": 240 }, { "epoch": 0.22482014388489208, "grad_norm": 1.0333690643310547, "learning_rate": 8.3e-06, "loss": 0.0705, "step": 250 }, { "action_loss": 0.07843628525733948, "epoch": 0.22482014388489208, "step": 250 }, { "epoch": 0.23381294964028776, "grad_norm": 0.7722813487052917, "learning_rate": 8.633333333333334e-06, "loss": 0.0735, "step": 260 }, { "action_loss": 0.05789214372634888, "epoch": 0.23381294964028776, "step": 260 }, { "epoch": 0.24280575539568344, "grad_norm": 0.7672290802001953, "learning_rate": 8.966666666666668e-06, "loss": 0.0664, "step": 270 }, { "action_loss": 0.05705097317695618, "epoch": 0.24280575539568344, "step": 270 }, { "epoch": 0.2517985611510791, "grad_norm": 0.9948064684867859, "learning_rate": 9.3e-06, "loss": 0.068, "step": 280 }, { "action_loss": 0.04703124985098839, "epoch": 0.2517985611510791, "step": 280 }, { "epoch": 0.2607913669064748, "grad_norm": 0.8087928891181946, "learning_rate": 9.633333333333335e-06, "loss": 0.0604, "step": 290 }, { "action_loss": 0.07871907949447632, "epoch": 0.2607913669064748, "step": 290 }, { "epoch": 0.2697841726618705, "grad_norm": 0.7508419156074524, "learning_rate": 9.966666666666667e-06, "loss": 0.0705, "step": 300 }, { "action_loss": 0.07245784997940063, "epoch": 0.2697841726618705, "step": 300 }, { "epoch": 0.2787769784172662, "grad_norm": 0.9235150218009949, "learning_rate": 1.03e-05, "loss": 0.0671, "step": 310 }, { "action_loss": 0.10521646589040756, "epoch": 0.2787769784172662, "step": 310 }, { "epoch": 0.28776978417266186, "grad_norm": 0.9546489715576172, "learning_rate": 1.0633333333333334e-05, "loss": 0.0714, "step": 320 }, { "action_loss": 0.053123701363801956, "epoch": 0.28776978417266186, "step": 320 }, { "epoch": 0.29676258992805754, "grad_norm": 0.9042888879776001, "learning_rate": 1.0966666666666666e-05, "loss": 0.063, "step": 330 }, { "action_loss": 0.048521775752305984, "epoch": 0.29676258992805754, "step": 330 }, { "epoch": 0.3057553956834532, "grad_norm": 1.7151509523391724, "learning_rate": 1.13e-05, "loss": 0.0586, "step": 340 }, { "action_loss": 0.05160096660256386, "epoch": 0.3057553956834532, "step": 340 }, { "epoch": 0.3147482014388489, "grad_norm": 0.7376689314842224, "learning_rate": 1.1633333333333334e-05, "loss": 0.0634, "step": 350 }, { "action_loss": 0.06244044378399849, "epoch": 0.3147482014388489, "step": 350 }, { "epoch": 0.3237410071942446, "grad_norm": 0.934843122959137, "learning_rate": 1.1966666666666668e-05, "loss": 0.0689, "step": 360 }, { "action_loss": 0.04397326707839966, "epoch": 0.3237410071942446, "step": 360 }, { "epoch": 0.3327338129496403, "grad_norm": 0.9599816203117371, "learning_rate": 1.23e-05, "loss": 0.0537, "step": 370 }, { "action_loss": 0.0831124410033226, "epoch": 0.3327338129496403, "step": 370 }, { "epoch": 0.34172661870503596, "grad_norm": 0.904229462146759, "learning_rate": 1.2633333333333333e-05, "loss": 0.0588, "step": 380 }, { "action_loss": 0.04709586501121521, "epoch": 0.34172661870503596, "step": 380 }, { "epoch": 0.35071942446043164, "grad_norm": 0.8761948943138123, "learning_rate": 1.2966666666666669e-05, "loss": 0.0657, "step": 390 }, { "action_loss": 0.0283834058791399, "epoch": 0.35071942446043164, "step": 390 }, { "epoch": 0.3597122302158273, "grad_norm": 0.6816421747207642, "learning_rate": 1.3300000000000001e-05, "loss": 0.0516, "step": 400 }, { "action_loss": 0.07040861994028091, "epoch": 0.3597122302158273, "step": 400 }, { "epoch": 0.368705035971223, "grad_norm": 0.8394888043403625, "learning_rate": 1.3633333333333334e-05, "loss": 0.0573, "step": 410 }, { "action_loss": 0.04134225472807884, "epoch": 0.368705035971223, "step": 410 }, { "epoch": 0.3776978417266187, "grad_norm": 0.8784199357032776, "learning_rate": 1.3966666666666666e-05, "loss": 0.0596, "step": 420 }, { "action_loss": 0.05802275612950325, "epoch": 0.3776978417266187, "step": 420 }, { "epoch": 0.38669064748201437, "grad_norm": 0.7314625382423401, "learning_rate": 1.43e-05, "loss": 0.0547, "step": 430 }, { "action_loss": 0.07686466723680496, "epoch": 0.38669064748201437, "step": 430 }, { "epoch": 0.39568345323741005, "grad_norm": 1.1302796602249146, "learning_rate": 1.4633333333333334e-05, "loss": 0.0627, "step": 440 }, { "action_loss": 0.07702836394309998, "epoch": 0.39568345323741005, "step": 440 }, { "epoch": 0.40467625899280574, "grad_norm": 0.7513672709465027, "learning_rate": 1.4966666666666668e-05, "loss": 0.0577, "step": 450 }, { "action_loss": 0.044019535183906555, "epoch": 0.40467625899280574, "step": 450 }, { "epoch": 0.4136690647482014, "grad_norm": 0.9583770632743835, "learning_rate": 1.53e-05, "loss": 0.0528, "step": 460 }, { "action_loss": 0.06755993515253067, "epoch": 0.4136690647482014, "step": 460 }, { "epoch": 0.4226618705035971, "grad_norm": 1.0120878219604492, "learning_rate": 1.563333333333333e-05, "loss": 0.0553, "step": 470 }, { "action_loss": 0.033867236226797104, "epoch": 0.4226618705035971, "step": 470 }, { "epoch": 0.4316546762589928, "grad_norm": 0.9949330687522888, "learning_rate": 1.5966666666666667e-05, "loss": 0.0467, "step": 480 }, { "action_loss": 0.04936627671122551, "epoch": 0.4316546762589928, "step": 480 }, { "epoch": 0.44064748201438847, "grad_norm": 0.8369123339653015, "learning_rate": 1.63e-05, "loss": 0.0537, "step": 490 }, { "action_loss": 0.07674175500869751, "epoch": 0.44064748201438847, "step": 490 }, { "epoch": 0.44964028776978415, "grad_norm": 0.6987815499305725, "learning_rate": 1.6633333333333336e-05, "loss": 0.0448, "step": 500 }, { "action_loss": 0.06336789578199387, "epoch": 0.44964028776978415, "step": 500 }, { "epoch": 0.45863309352517984, "grad_norm": 1.2680033445358276, "learning_rate": 1.6966666666666668e-05, "loss": 0.05, "step": 510 }, { "action_loss": 0.040038034319877625, "epoch": 0.45863309352517984, "step": 510 }, { "epoch": 0.4676258992805755, "grad_norm": 0.9585620760917664, "learning_rate": 1.73e-05, "loss": 0.046, "step": 520 }, { "action_loss": 0.0570034384727478, "epoch": 0.4676258992805755, "step": 520 }, { "epoch": 0.4766187050359712, "grad_norm": 0.7816448211669922, "learning_rate": 1.7633333333333336e-05, "loss": 0.0545, "step": 530 }, { "action_loss": 0.09820006042718887, "epoch": 0.4766187050359712, "step": 530 }, { "epoch": 0.4856115107913669, "grad_norm": 0.7259025573730469, "learning_rate": 1.796666666666667e-05, "loss": 0.0613, "step": 540 }, { "action_loss": 0.0622897706925869, "epoch": 0.4856115107913669, "step": 540 }, { "epoch": 0.49460431654676257, "grad_norm": 1.52064049243927, "learning_rate": 1.83e-05, "loss": 0.0505, "step": 550 }, { "action_loss": 0.06254198402166367, "epoch": 0.49460431654676257, "step": 550 }, { "epoch": 0.5035971223021583, "grad_norm": 0.8620694875717163, "learning_rate": 1.8633333333333333e-05, "loss": 0.0507, "step": 560 }, { "action_loss": 0.038463640958070755, "epoch": 0.5035971223021583, "step": 560 }, { "epoch": 0.512589928057554, "grad_norm": 1.0454161167144775, "learning_rate": 1.896666666666667e-05, "loss": 0.0651, "step": 570 }, { "action_loss": 0.052690815180540085, "epoch": 0.512589928057554, "step": 570 }, { "epoch": 0.5215827338129496, "grad_norm": 1.089349389076233, "learning_rate": 1.93e-05, "loss": 0.054, "step": 580 }, { "action_loss": 0.05766578018665314, "epoch": 0.5215827338129496, "step": 580 }, { "epoch": 0.5305755395683454, "grad_norm": 0.824783205986023, "learning_rate": 1.9633333333333334e-05, "loss": 0.0483, "step": 590 }, { "action_loss": 0.0435149110853672, "epoch": 0.5305755395683454, "step": 590 }, { "epoch": 0.539568345323741, "grad_norm": 0.634839653968811, "learning_rate": 1.9966666666666666e-05, "loss": 0.0539, "step": 600 }, { "action_loss": 0.1412639617919922, "epoch": 0.539568345323741, "step": 600 }, { "epoch": 0.5485611510791367, "grad_norm": 0.6854180097579956, "learning_rate": 2.0300000000000002e-05, "loss": 0.0553, "step": 610 }, { "action_loss": 0.05032153055071831, "epoch": 0.5485611510791367, "step": 610 }, { "epoch": 0.5575539568345323, "grad_norm": 0.8581621050834656, "learning_rate": 2.0633333333333335e-05, "loss": 0.0544, "step": 620 }, { "action_loss": 0.05635926127433777, "epoch": 0.5575539568345323, "step": 620 }, { "epoch": 0.5665467625899281, "grad_norm": 0.8155955076217651, "learning_rate": 2.0966666666666667e-05, "loss": 0.0504, "step": 630 }, { "action_loss": 0.03853419050574303, "epoch": 0.5665467625899281, "step": 630 }, { "epoch": 0.5755395683453237, "grad_norm": 0.6739600300788879, "learning_rate": 2.13e-05, "loss": 0.0523, "step": 640 }, { "action_loss": 0.07915262877941132, "epoch": 0.5755395683453237, "step": 640 }, { "epoch": 0.5845323741007195, "grad_norm": 0.8874326944351196, "learning_rate": 2.1633333333333332e-05, "loss": 0.0498, "step": 650 }, { "action_loss": 0.02477145381271839, "epoch": 0.5845323741007195, "step": 650 }, { "epoch": 0.5935251798561151, "grad_norm": 0.7170674204826355, "learning_rate": 2.1966666666666668e-05, "loss": 0.0497, "step": 660 }, { "action_loss": 0.04419007897377014, "epoch": 0.5935251798561151, "step": 660 }, { "epoch": 0.6025179856115108, "grad_norm": 0.736640989780426, "learning_rate": 2.23e-05, "loss": 0.0376, "step": 670 }, { "action_loss": 0.068401999771595, "epoch": 0.6025179856115108, "step": 670 }, { "epoch": 0.6115107913669064, "grad_norm": 0.755790114402771, "learning_rate": 2.2633333333333336e-05, "loss": 0.0545, "step": 680 }, { "action_loss": 0.05957996845245361, "epoch": 0.6115107913669064, "step": 680 }, { "epoch": 0.6205035971223022, "grad_norm": 1.0454097986221313, "learning_rate": 2.2966666666666668e-05, "loss": 0.0458, "step": 690 }, { "action_loss": 0.04081706330180168, "epoch": 0.6205035971223022, "step": 690 }, { "epoch": 0.6294964028776978, "grad_norm": 0.6021479964256287, "learning_rate": 2.3300000000000004e-05, "loss": 0.0463, "step": 700 }, { "action_loss": 0.027214400470256805, "epoch": 0.6294964028776978, "step": 700 }, { "epoch": 0.6384892086330936, "grad_norm": 0.7980746030807495, "learning_rate": 2.3633333333333336e-05, "loss": 0.0515, "step": 710 }, { "action_loss": 0.028347447514533997, "epoch": 0.6384892086330936, "step": 710 }, { "epoch": 0.6474820143884892, "grad_norm": 1.1282484531402588, "learning_rate": 2.396666666666667e-05, "loss": 0.0413, "step": 720 }, { "action_loss": 0.07588180154561996, "epoch": 0.6474820143884892, "step": 720 }, { "epoch": 0.6564748201438849, "grad_norm": 0.6566957235336304, "learning_rate": 2.43e-05, "loss": 0.0537, "step": 730 }, { "action_loss": 0.08008075505495071, "epoch": 0.6564748201438849, "step": 730 }, { "epoch": 0.6654676258992805, "grad_norm": 0.9644797444343567, "learning_rate": 2.4633333333333334e-05, "loss": 0.0505, "step": 740 }, { "action_loss": 0.046841662377119064, "epoch": 0.6654676258992805, "step": 740 }, { "epoch": 0.6744604316546763, "grad_norm": 0.6028973460197449, "learning_rate": 2.496666666666667e-05, "loss": 0.0492, "step": 750 }, { "action_loss": 0.026886219158768654, "epoch": 0.6744604316546763, "step": 750 }, { "epoch": 0.6834532374100719, "grad_norm": 0.6365982294082642, "learning_rate": 2.5300000000000002e-05, "loss": 0.046, "step": 760 }, { "action_loss": 0.06126298010349274, "epoch": 0.6834532374100719, "step": 760 }, { "epoch": 0.6924460431654677, "grad_norm": 0.6139645576477051, "learning_rate": 2.5633333333333338e-05, "loss": 0.0448, "step": 770 }, { "action_loss": 0.026938028633594513, "epoch": 0.6924460431654677, "step": 770 }, { "epoch": 0.7014388489208633, "grad_norm": 0.777424156665802, "learning_rate": 2.5966666666666667e-05, "loss": 0.0457, "step": 780 }, { "action_loss": 0.019666267558932304, "epoch": 0.7014388489208633, "step": 780 }, { "epoch": 0.710431654676259, "grad_norm": 0.7796051502227783, "learning_rate": 2.6300000000000002e-05, "loss": 0.0404, "step": 790 }, { "action_loss": 0.05442919209599495, "epoch": 0.710431654676259, "step": 790 }, { "epoch": 0.7194244604316546, "grad_norm": 0.7425055503845215, "learning_rate": 2.663333333333333e-05, "loss": 0.047, "step": 800 }, { "action_loss": 0.037181321531534195, "epoch": 0.7194244604316546, "step": 800 }, { "epoch": 0.7284172661870504, "grad_norm": 0.5865107178688049, "learning_rate": 2.6966666666666667e-05, "loss": 0.0435, "step": 810 }, { "action_loss": 0.07274850457906723, "epoch": 0.7284172661870504, "step": 810 }, { "epoch": 0.737410071942446, "grad_norm": 0.7086899280548096, "learning_rate": 2.7300000000000003e-05, "loss": 0.0577, "step": 820 }, { "action_loss": 0.020512809976935387, "epoch": 0.737410071942446, "step": 820 }, { "epoch": 0.7464028776978417, "grad_norm": 0.8458464741706848, "learning_rate": 2.7633333333333332e-05, "loss": 0.0461, "step": 830 }, { "action_loss": 0.03701302781701088, "epoch": 0.7464028776978417, "step": 830 }, { "epoch": 0.7553956834532374, "grad_norm": 0.658902645111084, "learning_rate": 2.7966666666666668e-05, "loss": 0.0415, "step": 840 }, { "action_loss": 0.05045154690742493, "epoch": 0.7553956834532374, "step": 840 }, { "epoch": 0.7643884892086331, "grad_norm": 0.5924118161201477, "learning_rate": 2.83e-05, "loss": 0.0467, "step": 850 }, { "action_loss": 0.04594298079609871, "epoch": 0.7643884892086331, "step": 850 }, { "epoch": 0.7733812949640287, "grad_norm": 0.7038281559944153, "learning_rate": 2.8633333333333336e-05, "loss": 0.0481, "step": 860 }, { "action_loss": 0.04226092994213104, "epoch": 0.7733812949640287, "step": 860 }, { "epoch": 0.7823741007194245, "grad_norm": 0.8451381921768188, "learning_rate": 2.8966666666666668e-05, "loss": 0.0431, "step": 870 }, { "action_loss": 0.047415610402822495, "epoch": 0.7823741007194245, "step": 870 }, { "epoch": 0.7913669064748201, "grad_norm": 0.5565497875213623, "learning_rate": 2.93e-05, "loss": 0.0442, "step": 880 }, { "action_loss": 0.04603298008441925, "epoch": 0.7913669064748201, "step": 880 }, { "epoch": 0.8003597122302158, "grad_norm": 0.6900656819343567, "learning_rate": 2.9633333333333336e-05, "loss": 0.045, "step": 890 }, { "action_loss": 0.028182854875922203, "epoch": 0.8003597122302158, "step": 890 }, { "epoch": 0.8093525179856115, "grad_norm": 0.7165138721466064, "learning_rate": 2.9966666666666672e-05, "loss": 0.0442, "step": 900 }, { "action_loss": 0.06919363886117935, "epoch": 0.8093525179856115, "step": 900 }, { "epoch": 0.8183453237410072, "grad_norm": 0.6043387651443481, "learning_rate": 3.03e-05, "loss": 0.0451, "step": 910 }, { "action_loss": 0.04924764856696129, "epoch": 0.8183453237410072, "step": 910 }, { "epoch": 0.8273381294964028, "grad_norm": 0.6287219524383545, "learning_rate": 3.063333333333334e-05, "loss": 0.0506, "step": 920 }, { "action_loss": 0.03439154848456383, "epoch": 0.8273381294964028, "step": 920 }, { "epoch": 0.8363309352517986, "grad_norm": 0.619234025478363, "learning_rate": 3.096666666666666e-05, "loss": 0.0556, "step": 930 }, { "action_loss": 0.03588929399847984, "epoch": 0.8363309352517986, "step": 930 }, { "epoch": 0.8453237410071942, "grad_norm": 0.6230080127716064, "learning_rate": 3.13e-05, "loss": 0.0372, "step": 940 }, { "action_loss": 0.051580559462308884, "epoch": 0.8453237410071942, "step": 940 }, { "epoch": 0.85431654676259, "grad_norm": 0.7925558090209961, "learning_rate": 3.1633333333333334e-05, "loss": 0.0387, "step": 950 }, { "action_loss": 0.055591631680727005, "epoch": 0.85431654676259, "step": 950 }, { "epoch": 0.8633093525179856, "grad_norm": 0.6787837147712708, "learning_rate": 3.196666666666667e-05, "loss": 0.0468, "step": 960 }, { "action_loss": 0.03578504920005798, "epoch": 0.8633093525179856, "step": 960 }, { "epoch": 0.8723021582733813, "grad_norm": 0.7747719287872314, "learning_rate": 3.2300000000000006e-05, "loss": 0.0385, "step": 970 }, { "action_loss": 0.02706303261220455, "epoch": 0.8723021582733813, "step": 970 }, { "epoch": 0.8812949640287769, "grad_norm": 0.6129839420318604, "learning_rate": 3.263333333333333e-05, "loss": 0.0396, "step": 980 }, { "action_loss": 0.02432807721197605, "epoch": 0.8812949640287769, "step": 980 }, { "epoch": 0.8902877697841727, "grad_norm": 0.6361464262008667, "learning_rate": 3.296666666666667e-05, "loss": 0.0419, "step": 990 }, { "action_loss": 0.024824805557727814, "epoch": 0.8902877697841727, "step": 990 }, { "epoch": 0.8992805755395683, "grad_norm": 0.8641040921211243, "learning_rate": 3.33e-05, "loss": 0.0452, "step": 1000 }, { "action_loss": 0.06908880174160004, "epoch": 0.8992805755395683, "step": 1000 }, { "epoch": 0.908273381294964, "grad_norm": 0.6431705355644226, "learning_rate": 3.3633333333333335e-05, "loss": 0.0518, "step": 1010 }, { "action_loss": 0.03488612920045853, "epoch": 0.908273381294964, "step": 1010 }, { "epoch": 0.9172661870503597, "grad_norm": 0.729756236076355, "learning_rate": 3.396666666666667e-05, "loss": 0.0379, "step": 1020 }, { "action_loss": 0.051537465304136276, "epoch": 0.9172661870503597, "step": 1020 }, { "epoch": 0.9262589928057554, "grad_norm": 0.6819617748260498, "learning_rate": 3.430000000000001e-05, "loss": 0.043, "step": 1030 }, { "action_loss": 0.031779468059539795, "epoch": 0.9262589928057554, "step": 1030 }, { "epoch": 0.935251798561151, "grad_norm": 0.5410462021827698, "learning_rate": 3.463333333333333e-05, "loss": 0.0389, "step": 1040 }, { "action_loss": 0.031538281589746475, "epoch": 0.935251798561151, "step": 1040 }, { "epoch": 0.9442446043165468, "grad_norm": 0.9277002215385437, "learning_rate": 3.496666666666667e-05, "loss": 0.0427, "step": 1050 }, { "action_loss": 0.0465916246175766, "epoch": 0.9442446043165468, "step": 1050 }, { "epoch": 0.9532374100719424, "grad_norm": 0.6739461421966553, "learning_rate": 3.53e-05, "loss": 0.0491, "step": 1060 }, { "action_loss": 0.029435789212584496, "epoch": 0.9532374100719424, "step": 1060 }, { "epoch": 0.9622302158273381, "grad_norm": 0.4525446891784668, "learning_rate": 3.563333333333334e-05, "loss": 0.0344, "step": 1070 }, { "action_loss": 0.06522655487060547, "epoch": 0.9622302158273381, "step": 1070 }, { "epoch": 0.9712230215827338, "grad_norm": 0.7902148962020874, "learning_rate": 3.596666666666667e-05, "loss": 0.0441, "step": 1080 }, { "action_loss": 0.026672454550862312, "epoch": 0.9712230215827338, "step": 1080 }, { "epoch": 0.9802158273381295, "grad_norm": 0.6867257952690125, "learning_rate": 3.63e-05, "loss": 0.043, "step": 1090 }, { "action_loss": 0.04357036575675011, "epoch": 0.9802158273381295, "step": 1090 }, { "epoch": 0.9892086330935251, "grad_norm": 0.47091060876846313, "learning_rate": 3.6633333333333334e-05, "loss": 0.035, "step": 1100 }, { "action_loss": 0.05705428123474121, "epoch": 0.9892086330935251, "step": 1100 }, { "epoch": 0.9982014388489209, "grad_norm": 0.6273035407066345, "learning_rate": 3.6966666666666666e-05, "loss": 0.0364, "step": 1110 }, { "action_loss": 0.027807414531707764, "epoch": 0.9982014388489209, "step": 1110 }, { "epoch": 1.0071942446043165, "grad_norm": 0.6148586273193359, "learning_rate": 3.73e-05, "loss": 0.0362, "step": 1120 }, { "action_loss": 0.05247524753212929, "epoch": 1.0071942446043165, "step": 1120 }, { "epoch": 1.0161870503597121, "grad_norm": 0.6381557583808899, "learning_rate": 3.763333333333334e-05, "loss": 0.0408, "step": 1130 }, { "action_loss": 0.024059878662228584, "epoch": 1.0161870503597121, "step": 1130 }, { "epoch": 1.025179856115108, "grad_norm": 0.673534095287323, "learning_rate": 3.796666666666667e-05, "loss": 0.0378, "step": 1140 }, { "action_loss": 0.05647293105721474, "epoch": 1.025179856115108, "step": 1140 }, { "epoch": 1.0341726618705036, "grad_norm": 0.5943416953086853, "learning_rate": 3.83e-05, "loss": 0.04, "step": 1150 }, { "action_loss": 0.035889554768800735, "epoch": 1.0341726618705036, "step": 1150 }, { "epoch": 1.0431654676258992, "grad_norm": 0.5750225186347961, "learning_rate": 3.8633333333333335e-05, "loss": 0.0378, "step": 1160 }, { "action_loss": 0.051310230046510696, "epoch": 1.0431654676258992, "step": 1160 }, { "epoch": 1.0521582733812949, "grad_norm": 0.6392155289649963, "learning_rate": 3.896666666666667e-05, "loss": 0.0459, "step": 1170 }, { "action_loss": 0.047659117728471756, "epoch": 1.0521582733812949, "step": 1170 }, { "epoch": 1.0611510791366907, "grad_norm": 0.6537225842475891, "learning_rate": 3.9300000000000007e-05, "loss": 0.0428, "step": 1180 }, { "action_loss": 0.04779611900448799, "epoch": 1.0611510791366907, "step": 1180 }, { "epoch": 1.0701438848920863, "grad_norm": 0.5133776664733887, "learning_rate": 3.963333333333333e-05, "loss": 0.04, "step": 1190 }, { "action_loss": 0.07731218636035919, "epoch": 1.0701438848920863, "step": 1190 }, { "epoch": 1.079136690647482, "grad_norm": 0.4992883503437042, "learning_rate": 3.996666666666667e-05, "loss": 0.043, "step": 1200 }, { "action_loss": 0.02444678358733654, "epoch": 1.079136690647482, "step": 1200 }, { "epoch": 1.0881294964028776, "grad_norm": 0.4283023476600647, "learning_rate": 4.0300000000000004e-05, "loss": 0.0448, "step": 1210 }, { "action_loss": 0.043539389967918396, "epoch": 1.0881294964028776, "step": 1210 }, { "epoch": 1.0971223021582734, "grad_norm": 0.6933509707450867, "learning_rate": 4.0633333333333336e-05, "loss": 0.0454, "step": 1220 }, { "action_loss": 0.031839847564697266, "epoch": 1.0971223021582734, "step": 1220 }, { "epoch": 1.106115107913669, "grad_norm": 0.4243057668209076, "learning_rate": 4.096666666666667e-05, "loss": 0.0355, "step": 1230 }, { "action_loss": 0.02862989716231823, "epoch": 1.106115107913669, "step": 1230 }, { "epoch": 1.1151079136690647, "grad_norm": 0.5353140234947205, "learning_rate": 4.13e-05, "loss": 0.0417, "step": 1240 }, { "action_loss": 0.06790728121995926, "epoch": 1.1151079136690647, "step": 1240 }, { "epoch": 1.1241007194244603, "grad_norm": 0.6336038112640381, "learning_rate": 4.1633333333333333e-05, "loss": 0.039, "step": 1250 }, { "action_loss": 0.0231375303119421, "epoch": 1.1241007194244603, "step": 1250 }, { "epoch": 1.1330935251798562, "grad_norm": 0.5072726607322693, "learning_rate": 4.196666666666667e-05, "loss": 0.0291, "step": 1260 }, { "action_loss": 0.0320516973733902, "epoch": 1.1330935251798562, "step": 1260 }, { "epoch": 1.1420863309352518, "grad_norm": 0.5047056674957275, "learning_rate": 4.23e-05, "loss": 0.0322, "step": 1270 }, { "action_loss": 0.042932406067848206, "epoch": 1.1420863309352518, "step": 1270 }, { "epoch": 1.1510791366906474, "grad_norm": 0.455302357673645, "learning_rate": 4.263333333333334e-05, "loss": 0.0353, "step": 1280 }, { "action_loss": 0.02120894193649292, "epoch": 1.1510791366906474, "step": 1280 }, { "epoch": 1.1600719424460433, "grad_norm": 0.6246234178543091, "learning_rate": 4.296666666666666e-05, "loss": 0.0348, "step": 1290 }, { "action_loss": 0.03471735492348671, "epoch": 1.1600719424460433, "step": 1290 }, { "epoch": 1.169064748201439, "grad_norm": 0.6249594688415527, "learning_rate": 4.33e-05, "loss": 0.0402, "step": 1300 }, { "action_loss": 0.017111198976635933, "epoch": 1.169064748201439, "step": 1300 }, { "epoch": 1.1780575539568345, "grad_norm": 0.55198073387146, "learning_rate": 4.3633333333333335e-05, "loss": 0.0345, "step": 1310 }, { "action_loss": 0.0448656789958477, "epoch": 1.1780575539568345, "step": 1310 }, { "epoch": 1.1870503597122302, "grad_norm": 0.7394371628761292, "learning_rate": 4.396666666666667e-05, "loss": 0.0379, "step": 1320 }, { "action_loss": 0.04755084216594696, "epoch": 1.1870503597122302, "step": 1320 }, { "epoch": 1.1960431654676258, "grad_norm": 0.5845442414283752, "learning_rate": 4.43e-05, "loss": 0.0423, "step": 1330 }, { "action_loss": 0.025622190907597542, "epoch": 1.1960431654676258, "step": 1330 }, { "epoch": 1.2050359712230216, "grad_norm": 0.7974011898040771, "learning_rate": 4.463333333333334e-05, "loss": 0.0512, "step": 1340 }, { "action_loss": 0.028192924335598946, "epoch": 1.2050359712230216, "step": 1340 }, { "epoch": 1.2140287769784173, "grad_norm": 0.5436031222343445, "learning_rate": 4.496666666666667e-05, "loss": 0.032, "step": 1350 }, { "action_loss": 0.03954688832163811, "epoch": 1.2140287769784173, "step": 1350 }, { "epoch": 1.223021582733813, "grad_norm": 0.535480797290802, "learning_rate": 4.53e-05, "loss": 0.0475, "step": 1360 }, { "action_loss": 0.04230889678001404, "epoch": 1.223021582733813, "step": 1360 }, { "epoch": 1.2320143884892087, "grad_norm": 0.551406979560852, "learning_rate": 4.5633333333333336e-05, "loss": 0.0355, "step": 1370 }, { "action_loss": 0.021758034825325012, "epoch": 1.2320143884892087, "step": 1370 }, { "epoch": 1.2410071942446044, "grad_norm": 0.5582069158554077, "learning_rate": 4.596666666666667e-05, "loss": 0.0353, "step": 1380 }, { "action_loss": 0.028216758742928505, "epoch": 1.2410071942446044, "step": 1380 }, { "epoch": 1.25, "grad_norm": 0.3741975724697113, "learning_rate": 4.630000000000001e-05, "loss": 0.038, "step": 1390 }, { "action_loss": 0.029805244877934456, "epoch": 1.25, "step": 1390 }, { "epoch": 1.2589928057553956, "grad_norm": 0.5456962585449219, "learning_rate": 4.663333333333333e-05, "loss": 0.0348, "step": 1400 }, { "action_loss": 0.030890250578522682, "epoch": 1.2589928057553956, "step": 1400 }, { "epoch": 1.2679856115107913, "grad_norm": 0.6439878344535828, "learning_rate": 4.696666666666667e-05, "loss": 0.0477, "step": 1410 }, { "action_loss": 0.03227924928069115, "epoch": 1.2679856115107913, "step": 1410 }, { "epoch": 1.276978417266187, "grad_norm": 0.47595712542533875, "learning_rate": 4.73e-05, "loss": 0.042, "step": 1420 }, { "action_loss": 0.0646885335445404, "epoch": 1.276978417266187, "step": 1420 }, { "epoch": 1.2859712230215827, "grad_norm": 0.5156022310256958, "learning_rate": 4.763333333333334e-05, "loss": 0.0438, "step": 1430 }, { "action_loss": 0.06157734990119934, "epoch": 1.2859712230215827, "step": 1430 }, { "epoch": 1.2949640287769784, "grad_norm": 0.6146484017372131, "learning_rate": 4.796666666666667e-05, "loss": 0.033, "step": 1440 }, { "action_loss": 0.05324479565024376, "epoch": 1.2949640287769784, "step": 1440 }, { "epoch": 1.3039568345323742, "grad_norm": 0.5879250168800354, "learning_rate": 4.83e-05, "loss": 0.0431, "step": 1450 }, { "action_loss": 0.033081214874982834, "epoch": 1.3039568345323742, "step": 1450 }, { "epoch": 1.3129496402877698, "grad_norm": 0.5732369422912598, "learning_rate": 4.8633333333333334e-05, "loss": 0.0363, "step": 1460 }, { "action_loss": 0.02063913829624653, "epoch": 1.3129496402877698, "step": 1460 }, { "epoch": 1.3219424460431655, "grad_norm": 0.6337621212005615, "learning_rate": 4.8966666666666667e-05, "loss": 0.031, "step": 1470 }, { "action_loss": 0.0320940800011158, "epoch": 1.3219424460431655, "step": 1470 }, { "epoch": 1.330935251798561, "grad_norm": 0.4033293128013611, "learning_rate": 4.93e-05, "loss": 0.0369, "step": 1480 }, { "action_loss": 0.021541984751820564, "epoch": 1.330935251798561, "step": 1480 }, { "epoch": 1.3399280575539567, "grad_norm": 0.6541376113891602, "learning_rate": 4.963333333333334e-05, "loss": 0.0365, "step": 1490 }, { "action_loss": 0.04601737856864929, "epoch": 1.3399280575539567, "step": 1490 }, { "epoch": 1.3489208633093526, "grad_norm": 0.5119085907936096, "learning_rate": 4.996666666666667e-05, "loss": 0.043, "step": 1500 }, { "action_loss": 0.02843761257827282, "epoch": 1.3489208633093526, "step": 1500 }, { "epoch": 1.3579136690647482, "grad_norm": 0.5209753513336182, "learning_rate": 5.03e-05, "loss": 0.0319, "step": 1510 }, { "action_loss": 0.06987661868333817, "epoch": 1.3579136690647482, "step": 1510 }, { "epoch": 1.3669064748201438, "grad_norm": 0.6293902397155762, "learning_rate": 5.0633333333333335e-05, "loss": 0.036, "step": 1520 }, { "action_loss": 0.030725494027137756, "epoch": 1.3669064748201438, "step": 1520 }, { "epoch": 1.3758992805755397, "grad_norm": 0.44965022802352905, "learning_rate": 5.0966666666666674e-05, "loss": 0.0333, "step": 1530 }, { "action_loss": 0.03546636924147606, "epoch": 1.3758992805755397, "step": 1530 }, { "epoch": 1.3848920863309353, "grad_norm": 0.6537832021713257, "learning_rate": 5.130000000000001e-05, "loss": 0.0338, "step": 1540 }, { "action_loss": 0.026948845013976097, "epoch": 1.3848920863309353, "step": 1540 }, { "epoch": 1.393884892086331, "grad_norm": 0.5501567125320435, "learning_rate": 5.163333333333333e-05, "loss": 0.0447, "step": 1550 }, { "action_loss": 0.03450007736682892, "epoch": 1.393884892086331, "step": 1550 }, { "epoch": 1.4028776978417266, "grad_norm": 0.4552699327468872, "learning_rate": 5.196666666666667e-05, "loss": 0.03, "step": 1560 }, { "action_loss": 0.03332088142633438, "epoch": 1.4028776978417266, "step": 1560 }, { "epoch": 1.4118705035971222, "grad_norm": 0.6523012518882751, "learning_rate": 5.2300000000000004e-05, "loss": 0.0355, "step": 1570 }, { "action_loss": 0.032924819737672806, "epoch": 1.4118705035971222, "step": 1570 }, { "epoch": 1.420863309352518, "grad_norm": 0.5053160190582275, "learning_rate": 5.2633333333333336e-05, "loss": 0.0324, "step": 1580 }, { "action_loss": 0.026709839701652527, "epoch": 1.420863309352518, "step": 1580 }, { "epoch": 1.4298561151079137, "grad_norm": 0.3988085091114044, "learning_rate": 5.296666666666666e-05, "loss": 0.0345, "step": 1590 }, { "action_loss": 0.04479079321026802, "epoch": 1.4298561151079137, "step": 1590 }, { "epoch": 1.4388489208633093, "grad_norm": 0.4690578579902649, "learning_rate": 5.330000000000001e-05, "loss": 0.035, "step": 1600 }, { "action_loss": 0.03818154335021973, "epoch": 1.4388489208633093, "step": 1600 }, { "epoch": 1.4478417266187051, "grad_norm": 0.5943588018417358, "learning_rate": 5.3633333333333334e-05, "loss": 0.0408, "step": 1610 }, { "action_loss": 0.058222513645887375, "epoch": 1.4478417266187051, "step": 1610 }, { "epoch": 1.4568345323741008, "grad_norm": 0.5057544112205505, "learning_rate": 5.3966666666666666e-05, "loss": 0.0367, "step": 1620 }, { "action_loss": 0.026798009872436523, "epoch": 1.4568345323741008, "step": 1620 }, { "epoch": 1.4658273381294964, "grad_norm": 0.6484395265579224, "learning_rate": 5.4300000000000005e-05, "loss": 0.0442, "step": 1630 }, { "action_loss": 0.026172026991844177, "epoch": 1.4658273381294964, "step": 1630 }, { "epoch": 1.474820143884892, "grad_norm": 0.6282773017883301, "learning_rate": 5.463333333333334e-05, "loss": 0.0362, "step": 1640 }, { "action_loss": 0.037262264639139175, "epoch": 1.474820143884892, "step": 1640 }, { "epoch": 1.4838129496402876, "grad_norm": 0.49412378668785095, "learning_rate": 5.496666666666666e-05, "loss": 0.0312, "step": 1650 }, { "action_loss": 0.027168789878487587, "epoch": 1.4838129496402876, "step": 1650 }, { "epoch": 1.4928057553956835, "grad_norm": 0.3215131461620331, "learning_rate": 5.530000000000001e-05, "loss": 0.0445, "step": 1660 }, { "action_loss": 0.029547980055212975, "epoch": 1.4928057553956835, "step": 1660 }, { "epoch": 1.5017985611510791, "grad_norm": 0.5862283110618591, "learning_rate": 5.5633333333333335e-05, "loss": 0.0345, "step": 1670 }, { "action_loss": 0.05824686959385872, "epoch": 1.5017985611510791, "step": 1670 }, { "epoch": 1.5107913669064748, "grad_norm": 0.6398341059684753, "learning_rate": 5.596666666666667e-05, "loss": 0.0356, "step": 1680 }, { "action_loss": 0.02236400544643402, "epoch": 1.5107913669064748, "step": 1680 }, { "epoch": 1.5197841726618706, "grad_norm": 0.7369078397750854, "learning_rate": 5.63e-05, "loss": 0.046, "step": 1690 }, { "action_loss": 0.02641262672841549, "epoch": 1.5197841726618706, "step": 1690 }, { "epoch": 1.5287769784172662, "grad_norm": 0.707495927810669, "learning_rate": 5.663333333333334e-05, "loss": 0.0382, "step": 1700 }, { "action_loss": 0.03766700625419617, "epoch": 1.5287769784172662, "step": 1700 }, { "epoch": 1.5377697841726619, "grad_norm": 0.6171728372573853, "learning_rate": 5.696666666666667e-05, "loss": 0.0328, "step": 1710 }, { "action_loss": 0.036180559545755386, "epoch": 1.5377697841726619, "step": 1710 }, { "epoch": 1.5467625899280577, "grad_norm": 0.5686509013175964, "learning_rate": 5.73e-05, "loss": 0.0374, "step": 1720 }, { "action_loss": 0.03190896660089493, "epoch": 1.5467625899280577, "step": 1720 }, { "epoch": 1.5557553956834531, "grad_norm": 0.5465919375419617, "learning_rate": 5.7633333333333336e-05, "loss": 0.031, "step": 1730 }, { "action_loss": 0.053962934762239456, "epoch": 1.5557553956834531, "step": 1730 }, { "epoch": 1.564748201438849, "grad_norm": 0.4113007187843323, "learning_rate": 5.796666666666667e-05, "loss": 0.0374, "step": 1740 }, { "action_loss": 0.04617628455162048, "epoch": 1.564748201438849, "step": 1740 }, { "epoch": 1.5737410071942446, "grad_norm": 0.39361193776130676, "learning_rate": 5.83e-05, "loss": 0.034, "step": 1750 }, { "action_loss": 0.03190290927886963, "epoch": 1.5737410071942446, "step": 1750 }, { "epoch": 1.5827338129496402, "grad_norm": 0.34758028388023376, "learning_rate": 5.863333333333334e-05, "loss": 0.0321, "step": 1760 }, { "action_loss": 0.03133046254515648, "epoch": 1.5827338129496402, "step": 1760 }, { "epoch": 1.591726618705036, "grad_norm": 0.5439738035202026, "learning_rate": 5.896666666666667e-05, "loss": 0.0366, "step": 1770 }, { "action_loss": 0.01684042066335678, "epoch": 1.591726618705036, "step": 1770 }, { "epoch": 1.6007194244604317, "grad_norm": 0.42772218585014343, "learning_rate": 5.93e-05, "loss": 0.0271, "step": 1780 }, { "action_loss": 0.023494264110922813, "epoch": 1.6007194244604317, "step": 1780 }, { "epoch": 1.6097122302158273, "grad_norm": 0.530819296836853, "learning_rate": 5.9633333333333344e-05, "loss": 0.0399, "step": 1790 }, { "action_loss": 0.021217694506049156, "epoch": 1.6097122302158273, "step": 1790 }, { "epoch": 1.6187050359712232, "grad_norm": 0.5349923968315125, "learning_rate": 5.996666666666667e-05, "loss": 0.0415, "step": 1800 }, { "action_loss": 0.027703477069735527, "epoch": 1.6187050359712232, "step": 1800 }, { "epoch": 1.6276978417266186, "grad_norm": 0.4711857736110687, "learning_rate": 6.03e-05, "loss": 0.0333, "step": 1810 }, { "action_loss": 0.0325213223695755, "epoch": 1.6276978417266186, "step": 1810 }, { "epoch": 1.6366906474820144, "grad_norm": 0.460997611284256, "learning_rate": 6.063333333333333e-05, "loss": 0.0283, "step": 1820 }, { "action_loss": 0.02664881944656372, "epoch": 1.6366906474820144, "step": 1820 }, { "epoch": 1.64568345323741, "grad_norm": 0.4045039713382721, "learning_rate": 6.0966666666666674e-05, "loss": 0.0343, "step": 1830 }, { "action_loss": 0.04241783916950226, "epoch": 1.64568345323741, "step": 1830 }, { "epoch": 1.6546762589928057, "grad_norm": 0.6627966165542603, "learning_rate": 6.13e-05, "loss": 0.0368, "step": 1840 }, { "action_loss": 0.03632060065865517, "epoch": 1.6546762589928057, "step": 1840 }, { "epoch": 1.6636690647482015, "grad_norm": 0.5597823262214661, "learning_rate": 6.163333333333333e-05, "loss": 0.0318, "step": 1850 }, { "action_loss": 0.02323482185602188, "epoch": 1.6636690647482015, "step": 1850 }, { "epoch": 1.6726618705035972, "grad_norm": 0.5141417980194092, "learning_rate": 6.196666666666668e-05, "loss": 0.0357, "step": 1860 }, { "action_loss": 0.029906051233410835, "epoch": 1.6726618705035972, "step": 1860 }, { "epoch": 1.6816546762589928, "grad_norm": 0.5272186398506165, "learning_rate": 6.23e-05, "loss": 0.0296, "step": 1870 }, { "action_loss": 0.040586233139038086, "epoch": 1.6816546762589928, "step": 1870 }, { "epoch": 1.6906474820143886, "grad_norm": 0.405839204788208, "learning_rate": 6.263333333333333e-05, "loss": 0.0368, "step": 1880 }, { "action_loss": 0.023269765079021454, "epoch": 1.6906474820143886, "step": 1880 }, { "epoch": 1.699640287769784, "grad_norm": 0.5084345936775208, "learning_rate": 6.296666666666667e-05, "loss": 0.0418, "step": 1890 }, { "action_loss": 0.04019596055150032, "epoch": 1.699640287769784, "step": 1890 }, { "epoch": 1.70863309352518, "grad_norm": 0.5691424012184143, "learning_rate": 6.330000000000001e-05, "loss": 0.0412, "step": 1900 }, { "action_loss": 0.031151533126831055, "epoch": 1.70863309352518, "step": 1900 }, { "epoch": 1.7176258992805755, "grad_norm": 0.5187909007072449, "learning_rate": 6.363333333333334e-05, "loss": 0.0335, "step": 1910 }, { "action_loss": 0.028628522530198097, "epoch": 1.7176258992805755, "step": 1910 }, { "epoch": 1.7266187050359711, "grad_norm": 0.38961073756217957, "learning_rate": 6.396666666666667e-05, "loss": 0.0279, "step": 1920 }, { "action_loss": 0.06239907816052437, "epoch": 1.7266187050359711, "step": 1920 }, { "epoch": 1.735611510791367, "grad_norm": 0.5035207867622375, "learning_rate": 6.43e-05, "loss": 0.0365, "step": 1930 }, { "action_loss": 0.04597022011876106, "epoch": 1.735611510791367, "step": 1930 }, { "epoch": 1.7446043165467626, "grad_norm": 0.34811949729919434, "learning_rate": 6.463333333333334e-05, "loss": 0.0294, "step": 1940 }, { "action_loss": 0.03976987674832344, "epoch": 1.7446043165467626, "step": 1940 }, { "epoch": 1.7535971223021583, "grad_norm": 0.3528272211551666, "learning_rate": 6.496666666666667e-05, "loss": 0.0341, "step": 1950 }, { "action_loss": 0.0328768752515316, "epoch": 1.7535971223021583, "step": 1950 }, { "epoch": 1.762589928057554, "grad_norm": 0.45649126172065735, "learning_rate": 6.53e-05, "loss": 0.0354, "step": 1960 }, { "action_loss": 0.017510784789919853, "epoch": 1.762589928057554, "step": 1960 }, { "epoch": 1.7715827338129495, "grad_norm": 0.7250106930732727, "learning_rate": 6.563333333333333e-05, "loss": 0.0362, "step": 1970 }, { "action_loss": 0.05538160726428032, "epoch": 1.7715827338129495, "step": 1970 }, { "epoch": 1.7805755395683454, "grad_norm": 0.6517797708511353, "learning_rate": 6.596666666666667e-05, "loss": 0.0366, "step": 1980 }, { "action_loss": 0.023985454812645912, "epoch": 1.7805755395683454, "step": 1980 }, { "epoch": 1.789568345323741, "grad_norm": 0.556811511516571, "learning_rate": 6.630000000000001e-05, "loss": 0.0335, "step": 1990 }, { "action_loss": 0.015087206847965717, "epoch": 1.789568345323741, "step": 1990 }, { "epoch": 1.7985611510791366, "grad_norm": 0.4064406156539917, "learning_rate": 6.663333333333333e-05, "loss": 0.0344, "step": 2000 }, { "action_loss": 0.03436048701405525, "epoch": 1.7985611510791366, "step": 2000 }, { "epoch": 1.8075539568345325, "grad_norm": 0.43612056970596313, "learning_rate": 6.696666666666666e-05, "loss": 0.0331, "step": 2010 }, { "action_loss": 0.04318699240684509, "epoch": 1.8075539568345325, "step": 2010 }, { "epoch": 1.816546762589928, "grad_norm": 0.44221436977386475, "learning_rate": 6.730000000000001e-05, "loss": 0.0296, "step": 2020 }, { "action_loss": 0.025294005870819092, "epoch": 1.816546762589928, "step": 2020 }, { "epoch": 1.8255395683453237, "grad_norm": 0.5068031549453735, "learning_rate": 6.763333333333334e-05, "loss": 0.0323, "step": 2030 }, { "action_loss": 0.017787719145417213, "epoch": 1.8255395683453237, "step": 2030 }, { "epoch": 1.8345323741007196, "grad_norm": 0.40401867032051086, "learning_rate": 6.796666666666666e-05, "loss": 0.0298, "step": 2040 }, { "action_loss": 0.03343471139669418, "epoch": 1.8345323741007196, "step": 2040 }, { "epoch": 1.843525179856115, "grad_norm": 0.4174574315547943, "learning_rate": 6.83e-05, "loss": 0.0318, "step": 2050 }, { "action_loss": 0.014727622270584106, "epoch": 1.843525179856115, "step": 2050 }, { "epoch": 1.8525179856115108, "grad_norm": 0.5196689367294312, "learning_rate": 6.863333333333334e-05, "loss": 0.034, "step": 2060 }, { "action_loss": 0.02792099118232727, "epoch": 1.8525179856115108, "step": 2060 }, { "epoch": 1.8615107913669064, "grad_norm": 0.37597042322158813, "learning_rate": 6.896666666666667e-05, "loss": 0.0314, "step": 2070 }, { "action_loss": 0.02417396567761898, "epoch": 1.8615107913669064, "step": 2070 }, { "epoch": 1.870503597122302, "grad_norm": 0.6821014881134033, "learning_rate": 6.93e-05, "loss": 0.0393, "step": 2080 }, { "action_loss": 0.0384303443133831, "epoch": 1.870503597122302, "step": 2080 }, { "epoch": 1.879496402877698, "grad_norm": 0.6419340372085571, "learning_rate": 6.963333333333334e-05, "loss": 0.0316, "step": 2090 }, { "action_loss": 0.03398263081908226, "epoch": 1.879496402877698, "step": 2090 }, { "epoch": 1.8884892086330936, "grad_norm": 0.577555239200592, "learning_rate": 6.996666666666667e-05, "loss": 0.0312, "step": 2100 }, { "action_loss": 0.024318167939782143, "epoch": 1.8884892086330936, "step": 2100 }, { "epoch": 1.8974820143884892, "grad_norm": 0.4329935312271118, "learning_rate": 7.03e-05, "loss": 0.0393, "step": 2110 }, { "action_loss": 0.02247915416955948, "epoch": 1.8974820143884892, "step": 2110 }, { "epoch": 1.906474820143885, "grad_norm": 0.5825495719909668, "learning_rate": 7.063333333333333e-05, "loss": 0.0481, "step": 2120 }, { "action_loss": 0.02718437649309635, "epoch": 1.906474820143885, "step": 2120 }, { "epoch": 1.9154676258992804, "grad_norm": 0.40720635652542114, "learning_rate": 7.096666666666667e-05, "loss": 0.0405, "step": 2130 }, { "action_loss": 0.031118733808398247, "epoch": 1.9154676258992804, "step": 2130 }, { "epoch": 1.9244604316546763, "grad_norm": 0.5444257855415344, "learning_rate": 7.13e-05, "loss": 0.0369, "step": 2140 }, { "action_loss": 0.03353152796626091, "epoch": 1.9244604316546763, "step": 2140 }, { "epoch": 1.933453237410072, "grad_norm": 0.5383402705192566, "learning_rate": 7.163333333333334e-05, "loss": 0.0331, "step": 2150 }, { "action_loss": 0.021748552098870277, "epoch": 1.933453237410072, "step": 2150 }, { "epoch": 1.9424460431654675, "grad_norm": 0.45270729064941406, "learning_rate": 7.196666666666668e-05, "loss": 0.0431, "step": 2160 }, { "action_loss": 0.04610537365078926, "epoch": 1.9424460431654675, "step": 2160 }, { "epoch": 1.9514388489208634, "grad_norm": 0.6245124340057373, "learning_rate": 7.23e-05, "loss": 0.0295, "step": 2170 }, { "action_loss": 0.037314675748348236, "epoch": 1.9514388489208634, "step": 2170 }, { "epoch": 1.960431654676259, "grad_norm": 0.39634624123573303, "learning_rate": 7.263333333333334e-05, "loss": 0.0256, "step": 2180 }, { "action_loss": 0.03488745912909508, "epoch": 1.960431654676259, "step": 2180 }, { "epoch": 1.9694244604316546, "grad_norm": 0.6089788675308228, "learning_rate": 7.296666666666667e-05, "loss": 0.0335, "step": 2190 }, { "action_loss": 0.03533981367945671, "epoch": 1.9694244604316546, "step": 2190 }, { "epoch": 1.9784172661870505, "grad_norm": 0.41608238220214844, "learning_rate": 7.33e-05, "loss": 0.0321, "step": 2200 }, { "action_loss": 0.04141727089881897, "epoch": 1.9784172661870505, "step": 2200 }, { "epoch": 1.987410071942446, "grad_norm": 0.5215352773666382, "learning_rate": 7.363333333333334e-05, "loss": 0.0381, "step": 2210 }, { "action_loss": 0.03646570444107056, "epoch": 1.987410071942446, "step": 2210 }, { "epoch": 1.9964028776978417, "grad_norm": 0.38608527183532715, "learning_rate": 7.396666666666667e-05, "loss": 0.0332, "step": 2220 }, { "action_loss": 0.03032037615776062, "epoch": 1.9964028776978417, "step": 2220 }, { "epoch": 2.0053956834532376, "grad_norm": 0.6224957704544067, "learning_rate": 7.43e-05, "loss": 0.0358, "step": 2230 }, { "action_loss": 0.05555034801363945, "epoch": 2.0053956834532376, "step": 2230 }, { "epoch": 2.014388489208633, "grad_norm": 0.3323608338832855, "learning_rate": 7.463333333333334e-05, "loss": 0.034, "step": 2240 }, { "action_loss": 0.04010840877890587, "epoch": 2.014388489208633, "step": 2240 }, { "epoch": 2.023381294964029, "grad_norm": 0.5534118413925171, "learning_rate": 7.496666666666667e-05, "loss": 0.0406, "step": 2250 }, { "action_loss": 0.0279714222997427, "epoch": 2.023381294964029, "step": 2250 }, { "epoch": 2.0323741007194243, "grad_norm": 0.46975255012512207, "learning_rate": 7.53e-05, "loss": 0.0451, "step": 2260 }, { "action_loss": 0.020565662533044815, "epoch": 2.0323741007194243, "step": 2260 }, { "epoch": 2.04136690647482, "grad_norm": 0.32420456409454346, "learning_rate": 7.563333333333333e-05, "loss": 0.0383, "step": 2270 }, { "action_loss": 0.04796861112117767, "epoch": 2.04136690647482, "step": 2270 }, { "epoch": 2.050359712230216, "grad_norm": 0.6717978119850159, "learning_rate": 7.596666666666668e-05, "loss": 0.0381, "step": 2280 }, { "action_loss": 0.05045347288250923, "epoch": 2.050359712230216, "step": 2280 }, { "epoch": 2.0593525179856114, "grad_norm": 0.4191593527793884, "learning_rate": 7.630000000000001e-05, "loss": 0.0373, "step": 2290 }, { "action_loss": 0.024668237194418907, "epoch": 2.0593525179856114, "step": 2290 }, { "epoch": 2.068345323741007, "grad_norm": 0.4239371418952942, "learning_rate": 7.663333333333333e-05, "loss": 0.0357, "step": 2300 }, { "action_loss": 0.019539959728717804, "epoch": 2.068345323741007, "step": 2300 }, { "epoch": 2.077338129496403, "grad_norm": 0.566641628742218, "learning_rate": 7.696666666666668e-05, "loss": 0.0338, "step": 2310 }, { "action_loss": 0.024330219253897667, "epoch": 2.077338129496403, "step": 2310 }, { "epoch": 2.0863309352517985, "grad_norm": 0.6705495119094849, "learning_rate": 7.730000000000001e-05, "loss": 0.0376, "step": 2320 }, { "action_loss": 0.035560186952352524, "epoch": 2.0863309352517985, "step": 2320 }, { "epoch": 2.0953237410071943, "grad_norm": 0.3994458019733429, "learning_rate": 7.763333333333334e-05, "loss": 0.0361, "step": 2330 }, { "action_loss": 0.02927137352526188, "epoch": 2.0953237410071943, "step": 2330 }, { "epoch": 2.1043165467625897, "grad_norm": 0.6612066626548767, "learning_rate": 7.796666666666666e-05, "loss": 0.035, "step": 2340 }, { "action_loss": 0.019964154809713364, "epoch": 2.1043165467625897, "step": 2340 }, { "epoch": 2.1133093525179856, "grad_norm": 0.6276342868804932, "learning_rate": 7.83e-05, "loss": 0.0334, "step": 2350 }, { "action_loss": 0.05188046023249626, "epoch": 2.1133093525179856, "step": 2350 }, { "epoch": 2.1223021582733814, "grad_norm": 0.44070470333099365, "learning_rate": 7.863333333333334e-05, "loss": 0.0352, "step": 2360 }, { "action_loss": 0.039194390177726746, "epoch": 2.1223021582733814, "step": 2360 }, { "epoch": 2.131294964028777, "grad_norm": 0.40080583095550537, "learning_rate": 7.896666666666667e-05, "loss": 0.034, "step": 2370 }, { "action_loss": 0.01879771612584591, "epoch": 2.131294964028777, "step": 2370 }, { "epoch": 2.1402877697841727, "grad_norm": 0.5088931322097778, "learning_rate": 7.93e-05, "loss": 0.0351, "step": 2380 }, { "action_loss": 0.05169317126274109, "epoch": 2.1402877697841727, "step": 2380 }, { "epoch": 2.1492805755395685, "grad_norm": 0.3935542702674866, "learning_rate": 7.963333333333334e-05, "loss": 0.045, "step": 2390 }, { "action_loss": 0.025615274906158447, "epoch": 2.1492805755395685, "step": 2390 }, { "epoch": 2.158273381294964, "grad_norm": 0.413339227437973, "learning_rate": 7.996666666666667e-05, "loss": 0.0299, "step": 2400 }, { "action_loss": 0.024779757484793663, "epoch": 2.158273381294964, "step": 2400 }, { "epoch": 2.16726618705036, "grad_norm": 0.45071667432785034, "learning_rate": 8.030000000000001e-05, "loss": 0.0334, "step": 2410 }, { "action_loss": 0.032503049820661545, "epoch": 2.16726618705036, "step": 2410 }, { "epoch": 2.176258992805755, "grad_norm": 0.5145567059516907, "learning_rate": 8.063333333333333e-05, "loss": 0.0376, "step": 2420 }, { "action_loss": 0.05357672646641731, "epoch": 2.176258992805755, "step": 2420 }, { "epoch": 2.185251798561151, "grad_norm": 0.37442249059677124, "learning_rate": 8.096666666666667e-05, "loss": 0.0321, "step": 2430 }, { "action_loss": 0.04829807206988335, "epoch": 2.185251798561151, "step": 2430 }, { "epoch": 2.194244604316547, "grad_norm": 0.4302590489387512, "learning_rate": 8.13e-05, "loss": 0.0311, "step": 2440 }, { "action_loss": 0.03409452363848686, "epoch": 2.194244604316547, "step": 2440 }, { "epoch": 2.2032374100719423, "grad_norm": 0.40215346217155457, "learning_rate": 8.163333333333334e-05, "loss": 0.0409, "step": 2450 }, { "action_loss": 0.026151707395911217, "epoch": 2.2032374100719423, "step": 2450 }, { "epoch": 2.212230215827338, "grad_norm": 0.5210000276565552, "learning_rate": 8.196666666666668e-05, "loss": 0.0339, "step": 2460 }, { "action_loss": 0.030018983408808708, "epoch": 2.212230215827338, "step": 2460 }, { "epoch": 2.221223021582734, "grad_norm": 0.34802570939064026, "learning_rate": 8.23e-05, "loss": 0.0291, "step": 2470 }, { "action_loss": 0.02328537404537201, "epoch": 2.221223021582734, "step": 2470 }, { "epoch": 2.2302158273381294, "grad_norm": 0.4660751223564148, "learning_rate": 8.263333333333334e-05, "loss": 0.0364, "step": 2480 }, { "action_loss": 0.05720973014831543, "epoch": 2.2302158273381294, "step": 2480 }, { "epoch": 2.2392086330935252, "grad_norm": 0.5318640470504761, "learning_rate": 8.296666666666667e-05, "loss": 0.0422, "step": 2490 }, { "action_loss": 0.03181776404380798, "epoch": 2.2392086330935252, "step": 2490 }, { "epoch": 2.2482014388489207, "grad_norm": 0.38271841406822205, "learning_rate": 8.33e-05, "loss": 0.0393, "step": 2500 }, { "action_loss": 0.03751640394330025, "epoch": 2.2482014388489207, "step": 2500 }, { "epoch": 2.2571942446043165, "grad_norm": 0.5068331360816956, "learning_rate": 8.363333333333334e-05, "loss": 0.036, "step": 2510 }, { "action_loss": 0.03477765992283821, "epoch": 2.2571942446043165, "step": 2510 }, { "epoch": 2.2661870503597124, "grad_norm": 0.5735572576522827, "learning_rate": 8.396666666666667e-05, "loss": 0.0273, "step": 2520 }, { "action_loss": 0.05974923074245453, "epoch": 2.2661870503597124, "step": 2520 }, { "epoch": 2.2751798561151078, "grad_norm": 0.33657440543174744, "learning_rate": 8.43e-05, "loss": 0.0296, "step": 2530 }, { "action_loss": 0.06625620275735855, "epoch": 2.2751798561151078, "step": 2530 }, { "epoch": 2.2841726618705036, "grad_norm": 0.47753021121025085, "learning_rate": 8.463333333333335e-05, "loss": 0.0434, "step": 2540 }, { "action_loss": 0.03738829120993614, "epoch": 2.2841726618705036, "step": 2540 }, { "epoch": 2.2931654676258995, "grad_norm": 0.4966445863246918, "learning_rate": 8.496666666666667e-05, "loss": 0.0285, "step": 2550 }, { "action_loss": 0.018879801034927368, "epoch": 2.2931654676258995, "step": 2550 }, { "epoch": 2.302158273381295, "grad_norm": 0.38027775287628174, "learning_rate": 8.53e-05, "loss": 0.0373, "step": 2560 }, { "action_loss": 0.029340563341975212, "epoch": 2.302158273381295, "step": 2560 }, { "epoch": 2.3111510791366907, "grad_norm": 0.47208330035209656, "learning_rate": 8.563333333333333e-05, "loss": 0.0351, "step": 2570 }, { "action_loss": 0.02768884040415287, "epoch": 2.3111510791366907, "step": 2570 }, { "epoch": 2.3201438848920866, "grad_norm": 0.3876637816429138, "learning_rate": 8.596666666666668e-05, "loss": 0.0364, "step": 2580 }, { "action_loss": 0.03581665828824043, "epoch": 2.3201438848920866, "step": 2580 }, { "epoch": 2.329136690647482, "grad_norm": 0.49422159790992737, "learning_rate": 8.63e-05, "loss": 0.0277, "step": 2590 }, { "action_loss": 0.026792526245117188, "epoch": 2.329136690647482, "step": 2590 }, { "epoch": 2.338129496402878, "grad_norm": 0.4093102812767029, "learning_rate": 8.663333333333333e-05, "loss": 0.0339, "step": 2600 }, { "action_loss": 0.03501870855689049, "epoch": 2.338129496402878, "step": 2600 }, { "epoch": 2.347122302158273, "grad_norm": 0.5885116457939148, "learning_rate": 8.696666666666668e-05, "loss": 0.0382, "step": 2610 }, { "action_loss": 0.022165948525071144, "epoch": 2.347122302158273, "step": 2610 }, { "epoch": 2.356115107913669, "grad_norm": 0.42134010791778564, "learning_rate": 8.730000000000001e-05, "loss": 0.0305, "step": 2620 }, { "action_loss": 0.04442025348544121, "epoch": 2.356115107913669, "step": 2620 }, { "epoch": 2.365107913669065, "grad_norm": 0.40540072321891785, "learning_rate": 8.763333333333334e-05, "loss": 0.0343, "step": 2630 }, { "action_loss": 0.05606723949313164, "epoch": 2.365107913669065, "step": 2630 }, { "epoch": 2.3741007194244603, "grad_norm": 0.3981664180755615, "learning_rate": 8.796666666666667e-05, "loss": 0.0339, "step": 2640 }, { "action_loss": 0.06351574510335922, "epoch": 2.3741007194244603, "step": 2640 }, { "epoch": 2.383093525179856, "grad_norm": 0.3240129351615906, "learning_rate": 8.83e-05, "loss": 0.0284, "step": 2650 }, { "action_loss": 0.02897832542657852, "epoch": 2.383093525179856, "step": 2650 }, { "epoch": 2.3920863309352516, "grad_norm": 0.24557599425315857, "learning_rate": 8.863333333333334e-05, "loss": 0.0285, "step": 2660 }, { "action_loss": 0.05978124216198921, "epoch": 2.3920863309352516, "step": 2660 }, { "epoch": 2.4010791366906474, "grad_norm": 0.3374059796333313, "learning_rate": 8.896666666666667e-05, "loss": 0.0387, "step": 2670 }, { "action_loss": 0.015388262458145618, "epoch": 2.4010791366906474, "step": 2670 }, { "epoch": 2.4100719424460433, "grad_norm": 0.29263460636138916, "learning_rate": 8.93e-05, "loss": 0.0285, "step": 2680 }, { "action_loss": 0.02181188017129898, "epoch": 2.4100719424460433, "step": 2680 }, { "epoch": 2.4190647482014387, "grad_norm": 0.3783767819404602, "learning_rate": 8.963333333333333e-05, "loss": 0.0272, "step": 2690 }, { "action_loss": 0.03063620626926422, "epoch": 2.4190647482014387, "step": 2690 }, { "epoch": 2.4280575539568345, "grad_norm": 0.3140490651130676, "learning_rate": 8.996666666666667e-05, "loss": 0.027, "step": 2700 }, { "action_loss": 0.01899881474673748, "epoch": 2.4280575539568345, "step": 2700 }, { "epoch": 2.4370503597122304, "grad_norm": 0.36194276809692383, "learning_rate": 9.030000000000001e-05, "loss": 0.0276, "step": 2710 }, { "action_loss": 0.03877144306898117, "epoch": 2.4370503597122304, "step": 2710 }, { "epoch": 2.446043165467626, "grad_norm": 0.5266439318656921, "learning_rate": 9.063333333333333e-05, "loss": 0.0365, "step": 2720 }, { "action_loss": 0.011904190294444561, "epoch": 2.446043165467626, "step": 2720 }, { "epoch": 2.4550359712230216, "grad_norm": 0.45229965448379517, "learning_rate": 9.096666666666666e-05, "loss": 0.0258, "step": 2730 }, { "action_loss": 0.021771885454654694, "epoch": 2.4550359712230216, "step": 2730 }, { "epoch": 2.4640287769784175, "grad_norm": 0.4263295531272888, "learning_rate": 9.130000000000001e-05, "loss": 0.0274, "step": 2740 }, { "action_loss": 0.03203846141695976, "epoch": 2.4640287769784175, "step": 2740 }, { "epoch": 2.473021582733813, "grad_norm": 0.3759698271751404, "learning_rate": 9.163333333333334e-05, "loss": 0.0331, "step": 2750 }, { "action_loss": 0.06200624629855156, "epoch": 2.473021582733813, "step": 2750 }, { "epoch": 2.4820143884892087, "grad_norm": 0.4975913465023041, "learning_rate": 9.196666666666666e-05, "loss": 0.0344, "step": 2760 }, { "action_loss": 0.04198995232582092, "epoch": 2.4820143884892087, "step": 2760 }, { "epoch": 2.491007194244604, "grad_norm": 0.61861252784729, "learning_rate": 9.230000000000001e-05, "loss": 0.0308, "step": 2770 }, { "action_loss": 0.03699859604239464, "epoch": 2.491007194244604, "step": 2770 }, { "epoch": 2.5, "grad_norm": 0.5437571406364441, "learning_rate": 9.263333333333334e-05, "loss": 0.0306, "step": 2780 }, { "action_loss": 0.03229689970612526, "epoch": 2.5, "step": 2780 }, { "epoch": 2.508992805755396, "grad_norm": 0.48382920026779175, "learning_rate": 9.296666666666667e-05, "loss": 0.0402, "step": 2790 }, { "action_loss": 0.015412864275276661, "epoch": 2.508992805755396, "step": 2790 }, { "epoch": 2.5179856115107913, "grad_norm": 0.3957641124725342, "learning_rate": 9.33e-05, "loss": 0.0348, "step": 2800 }, { "action_loss": 0.03289773687720299, "epoch": 2.5179856115107913, "step": 2800 }, { "epoch": 2.526978417266187, "grad_norm": 0.45754343271255493, "learning_rate": 9.363333333333334e-05, "loss": 0.0335, "step": 2810 }, { "action_loss": 0.03442859649658203, "epoch": 2.526978417266187, "step": 2810 }, { "epoch": 2.5359712230215825, "grad_norm": 0.5171617865562439, "learning_rate": 9.396666666666667e-05, "loss": 0.0285, "step": 2820 }, { "action_loss": 0.013955176807940006, "epoch": 2.5359712230215825, "step": 2820 }, { "epoch": 2.5449640287769784, "grad_norm": 0.4076574444770813, "learning_rate": 9.43e-05, "loss": 0.0408, "step": 2830 }, { "action_loss": 0.03890778124332428, "epoch": 2.5449640287769784, "step": 2830 }, { "epoch": 2.553956834532374, "grad_norm": 0.35382628440856934, "learning_rate": 9.463333333333333e-05, "loss": 0.0354, "step": 2840 }, { "action_loss": 0.021359533071517944, "epoch": 2.553956834532374, "step": 2840 }, { "epoch": 2.56294964028777, "grad_norm": 0.3128947615623474, "learning_rate": 9.496666666666667e-05, "loss": 0.0284, "step": 2850 }, { "action_loss": 0.024072343483567238, "epoch": 2.56294964028777, "step": 2850 }, { "epoch": 2.5719424460431655, "grad_norm": 0.377530038356781, "learning_rate": 9.53e-05, "loss": 0.0347, "step": 2860 }, { "action_loss": 0.02731386013329029, "epoch": 2.5719424460431655, "step": 2860 }, { "epoch": 2.5809352517985613, "grad_norm": 0.3793106973171234, "learning_rate": 9.563333333333334e-05, "loss": 0.0365, "step": 2870 }, { "action_loss": 0.028457408770918846, "epoch": 2.5809352517985613, "step": 2870 }, { "epoch": 2.5899280575539567, "grad_norm": 0.4021304249763489, "learning_rate": 9.596666666666668e-05, "loss": 0.031, "step": 2880 }, { "action_loss": 0.046123404055833817, "epoch": 2.5899280575539567, "step": 2880 }, { "epoch": 2.5989208633093526, "grad_norm": 0.46353742480278015, "learning_rate": 9.63e-05, "loss": 0.0343, "step": 2890 }, { "action_loss": 0.03431477025151253, "epoch": 2.5989208633093526, "step": 2890 }, { "epoch": 2.6079136690647484, "grad_norm": 0.4130677282810211, "learning_rate": 9.663333333333334e-05, "loss": 0.0353, "step": 2900 }, { "action_loss": 0.023716041818261147, "epoch": 2.6079136690647484, "step": 2900 }, { "epoch": 2.616906474820144, "grad_norm": 0.524564266204834, "learning_rate": 9.696666666666667e-05, "loss": 0.0344, "step": 2910 }, { "action_loss": 0.04481560364365578, "epoch": 2.616906474820144, "step": 2910 }, { "epoch": 2.6258992805755397, "grad_norm": 0.47438567876815796, "learning_rate": 9.730000000000001e-05, "loss": 0.033, "step": 2920 }, { "action_loss": 0.03494875505566597, "epoch": 2.6258992805755397, "step": 2920 }, { "epoch": 2.634892086330935, "grad_norm": 0.3633502423763275, "learning_rate": 9.763333333333334e-05, "loss": 0.0323, "step": 2930 }, { "action_loss": 0.04246116802096367, "epoch": 2.634892086330935, "step": 2930 }, { "epoch": 2.643884892086331, "grad_norm": 0.44364237785339355, "learning_rate": 9.796666666666667e-05, "loss": 0.0374, "step": 2940 }, { "action_loss": 0.06808441132307053, "epoch": 2.643884892086331, "step": 2940 }, { "epoch": 2.652877697841727, "grad_norm": 0.37509554624557495, "learning_rate": 9.83e-05, "loss": 0.042, "step": 2950 }, { "action_loss": 0.04140718653798103, "epoch": 2.652877697841727, "step": 2950 }, { "epoch": 2.661870503597122, "grad_norm": 0.4470879137516022, "learning_rate": 9.863333333333334e-05, "loss": 0.0357, "step": 2960 }, { "action_loss": 0.01293111126869917, "epoch": 2.661870503597122, "step": 2960 }, { "epoch": 2.670863309352518, "grad_norm": 0.49237915873527527, "learning_rate": 9.896666666666667e-05, "loss": 0.0316, "step": 2970 }, { "action_loss": 0.05057262256741524, "epoch": 2.670863309352518, "step": 2970 }, { "epoch": 2.6798561151079134, "grad_norm": 0.31388965249061584, "learning_rate": 9.93e-05, "loss": 0.0292, "step": 2980 }, { "action_loss": 0.022580960765480995, "epoch": 2.6798561151079134, "step": 2980 }, { "epoch": 2.6888489208633093, "grad_norm": 0.48115071654319763, "learning_rate": 9.963333333333333e-05, "loss": 0.0337, "step": 2990 }, { "action_loss": 0.035065922886133194, "epoch": 2.6888489208633093, "step": 2990 }, { "epoch": 2.697841726618705, "grad_norm": 0.32735681533813477, "learning_rate": 9.996666666666668e-05, "loss": 0.0428, "step": 3000 }, { "action_loss": 0.02166689746081829, "epoch": 2.697841726618705, "step": 3000 }, { "epoch": 2.706834532374101, "grad_norm": 0.3252633512020111, "learning_rate": 9.999999384858465e-05, "loss": 0.0319, "step": 3010 }, { "action_loss": 0.031416330486536026, "epoch": 2.706834532374101, "step": 3010 }, { "epoch": 2.7158273381294964, "grad_norm": 0.4756034314632416, "learning_rate": 9.999997258443473e-05, "loss": 0.0336, "step": 3020 }, { "action_loss": 0.032278355211019516, "epoch": 2.7158273381294964, "step": 3020 }, { "epoch": 2.7248201438848922, "grad_norm": 0.37490707635879517, "learning_rate": 9.999993613161331e-05, "loss": 0.032, "step": 3030 }, { "action_loss": 0.03867727145552635, "epoch": 2.7248201438848922, "step": 3030 }, { "epoch": 2.7338129496402876, "grad_norm": 0.4816157817840576, "learning_rate": 9.999988449013146e-05, "loss": 0.0336, "step": 3040 }, { "action_loss": 0.034260865300893784, "epoch": 2.7338129496402876, "step": 3040 }, { "epoch": 2.7428057553956835, "grad_norm": 0.39170607924461365, "learning_rate": 9.99998176600049e-05, "loss": 0.04, "step": 3050 }, { "action_loss": 0.024505645036697388, "epoch": 2.7428057553956835, "step": 3050 }, { "epoch": 2.7517985611510793, "grad_norm": 0.31335553526878357, "learning_rate": 9.999973564125389e-05, "loss": 0.0284, "step": 3060 }, { "action_loss": 0.020431816577911377, "epoch": 2.7517985611510793, "step": 3060 }, { "epoch": 2.7607913669064748, "grad_norm": 0.21843942999839783, "learning_rate": 9.999963843390335e-05, "loss": 0.0266, "step": 3070 }, { "action_loss": 0.04215434193611145, "epoch": 2.7607913669064748, "step": 3070 }, { "epoch": 2.7697841726618706, "grad_norm": 0.4590599238872528, "learning_rate": 9.999952603798282e-05, "loss": 0.0426, "step": 3080 }, { "action_loss": 0.04853503406047821, "epoch": 2.7697841726618706, "step": 3080 }, { "epoch": 2.778776978417266, "grad_norm": 0.4399368464946747, "learning_rate": 9.999939845352646e-05, "loss": 0.0319, "step": 3090 }, { "action_loss": 0.02422218956053257, "epoch": 2.778776978417266, "step": 3090 }, { "epoch": 2.787769784172662, "grad_norm": 0.3519996404647827, "learning_rate": 9.999925568057298e-05, "loss": 0.0335, "step": 3100 }, { "action_loss": 0.02722478099167347, "epoch": 2.787769784172662, "step": 3100 }, { "epoch": 2.7967625899280577, "grad_norm": 0.4183286130428314, "learning_rate": 9.999909771916578e-05, "loss": 0.0302, "step": 3110 }, { "action_loss": 0.03578249737620354, "epoch": 2.7967625899280577, "step": 3110 }, { "epoch": 2.805755395683453, "grad_norm": 0.37691259384155273, "learning_rate": 9.999892456935285e-05, "loss": 0.0312, "step": 3120 }, { "action_loss": 0.05414033308625221, "epoch": 2.805755395683453, "step": 3120 }, { "epoch": 2.814748201438849, "grad_norm": 0.35077399015426636, "learning_rate": 9.999873623118679e-05, "loss": 0.0301, "step": 3130 }, { "action_loss": 0.024572907015681267, "epoch": 2.814748201438849, "step": 3130 }, { "epoch": 2.8237410071942444, "grad_norm": 0.2824244201183319, "learning_rate": 9.999853270472479e-05, "loss": 0.0337, "step": 3140 }, { "action_loss": 0.023358548060059547, "epoch": 2.8237410071942444, "step": 3140 }, { "epoch": 2.83273381294964, "grad_norm": 0.3367536664009094, "learning_rate": 9.999831399002871e-05, "loss": 0.0301, "step": 3150 }, { "action_loss": 0.025546088814735413, "epoch": 2.83273381294964, "step": 3150 }, { "epoch": 2.841726618705036, "grad_norm": 0.38753870129585266, "learning_rate": 9.999808008716494e-05, "loss": 0.0278, "step": 3160 }, { "action_loss": 0.05404965952038765, "epoch": 2.841726618705036, "step": 3160 }, { "epoch": 2.850719424460432, "grad_norm": 0.38986876606941223, "learning_rate": 9.999783099620459e-05, "loss": 0.0331, "step": 3170 }, { "action_loss": 0.014846508391201496, "epoch": 2.850719424460432, "step": 3170 }, { "epoch": 2.8597122302158273, "grad_norm": 0.3400036096572876, "learning_rate": 9.999756671722328e-05, "loss": 0.0311, "step": 3180 }, { "action_loss": 0.02283300645649433, "epoch": 2.8597122302158273, "step": 3180 }, { "epoch": 2.868705035971223, "grad_norm": 0.3853002190589905, "learning_rate": 9.99972872503013e-05, "loss": 0.0302, "step": 3190 }, { "action_loss": 0.02913178689777851, "epoch": 2.868705035971223, "step": 3190 }, { "epoch": 2.8776978417266186, "grad_norm": 0.4651801288127899, "learning_rate": 9.999699259552359e-05, "loss": 0.037, "step": 3200 }, { "action_loss": 0.018443187698721886, "epoch": 2.8776978417266186, "step": 3200 }, { "epoch": 2.8866906474820144, "grad_norm": 0.45617103576660156, "learning_rate": 9.99966827529796e-05, "loss": 0.029, "step": 3210 }, { "action_loss": 0.02459556609392166, "epoch": 2.8866906474820144, "step": 3210 }, { "epoch": 2.8956834532374103, "grad_norm": 0.38524913787841797, "learning_rate": 9.999635772276348e-05, "loss": 0.0306, "step": 3220 }, { "action_loss": 0.026449054479599, "epoch": 2.8956834532374103, "step": 3220 }, { "epoch": 2.9046762589928057, "grad_norm": 0.2345365732908249, "learning_rate": 9.999601750497396e-05, "loss": 0.0298, "step": 3230 }, { "action_loss": 0.02481948770582676, "epoch": 2.9046762589928057, "step": 3230 }, { "epoch": 2.9136690647482015, "grad_norm": 0.27162790298461914, "learning_rate": 9.99956620997144e-05, "loss": 0.0269, "step": 3240 }, { "action_loss": 0.02556995116174221, "epoch": 2.9136690647482015, "step": 3240 }, { "epoch": 2.922661870503597, "grad_norm": 0.33995774388313293, "learning_rate": 9.999529150709275e-05, "loss": 0.0338, "step": 3250 }, { "action_loss": 0.021205320954322815, "epoch": 2.922661870503597, "step": 3250 }, { "epoch": 2.931654676258993, "grad_norm": 0.5620662569999695, "learning_rate": 9.999490572722158e-05, "loss": 0.0262, "step": 3260 }, { "action_loss": 0.02165307104587555, "epoch": 2.931654676258993, "step": 3260 }, { "epoch": 2.9406474820143886, "grad_norm": 0.40702757239341736, "learning_rate": 9.99945047602181e-05, "loss": 0.0311, "step": 3270 }, { "action_loss": 0.01818850450217724, "epoch": 2.9406474820143886, "step": 3270 }, { "epoch": 2.949640287769784, "grad_norm": 0.40801700949668884, "learning_rate": 9.99940886062041e-05, "loss": 0.0269, "step": 3280 }, { "action_loss": 0.019220242276787758, "epoch": 2.949640287769784, "step": 3280 }, { "epoch": 2.95863309352518, "grad_norm": 0.3172686994075775, "learning_rate": 9.999365726530599e-05, "loss": 0.0309, "step": 3290 }, { "action_loss": 0.024196097627282143, "epoch": 2.95863309352518, "step": 3290 }, { "epoch": 2.9676258992805753, "grad_norm": 0.36396270990371704, "learning_rate": 9.999321073765481e-05, "loss": 0.0388, "step": 3300 }, { "action_loss": 0.01873696967959404, "epoch": 2.9676258992805753, "step": 3300 }, { "epoch": 2.976618705035971, "grad_norm": 0.36778977513313293, "learning_rate": 9.99927490233862e-05, "loss": 0.0274, "step": 3310 }, { "action_loss": 0.03456832841038704, "epoch": 2.976618705035971, "step": 3310 }, { "epoch": 2.985611510791367, "grad_norm": 0.4241669476032257, "learning_rate": 9.999227212264043e-05, "loss": 0.029, "step": 3320 }, { "action_loss": 0.01874489150941372, "epoch": 2.985611510791367, "step": 3320 }, { "epoch": 2.994604316546763, "grad_norm": 0.5349498391151428, "learning_rate": 9.999178003556236e-05, "loss": 0.0347, "step": 3330 }, { "action_loss": 0.028186233714222908, "epoch": 2.994604316546763, "step": 3330 }, { "epoch": 3.0035971223021583, "grad_norm": 0.5820927023887634, "learning_rate": 9.999127276230146e-05, "loss": 0.0279, "step": 3340 }, { "action_loss": 0.03897271305322647, "epoch": 3.0035971223021583, "step": 3340 }, { "epoch": 3.012589928057554, "grad_norm": 0.45627090334892273, "learning_rate": 9.999075030301184e-05, "loss": 0.0346, "step": 3350 }, { "action_loss": 0.02580629475414753, "epoch": 3.012589928057554, "step": 3350 }, { "epoch": 3.0215827338129495, "grad_norm": 0.5276095271110535, "learning_rate": 9.999021265785221e-05, "loss": 0.0376, "step": 3360 }, { "action_loss": 0.02370782382786274, "epoch": 3.0215827338129495, "step": 3360 }, { "epoch": 3.0305755395683454, "grad_norm": 0.46879589557647705, "learning_rate": 9.998965982698589e-05, "loss": 0.0347, "step": 3370 }, { "action_loss": 0.030709192156791687, "epoch": 3.0305755395683454, "step": 3370 }, { "epoch": 3.039568345323741, "grad_norm": 0.4706876873970032, "learning_rate": 9.998909181058082e-05, "loss": 0.0304, "step": 3380 }, { "action_loss": 0.04932411387562752, "epoch": 3.039568345323741, "step": 3380 }, { "epoch": 3.0485611510791366, "grad_norm": 0.44175204634666443, "learning_rate": 9.998850860880953e-05, "loss": 0.0351, "step": 3390 }, { "action_loss": 0.025992536917328835, "epoch": 3.0485611510791366, "step": 3390 }, { "epoch": 3.0575539568345325, "grad_norm": 0.3120158910751343, "learning_rate": 9.998791022184922e-05, "loss": 0.0279, "step": 3400 }, { "action_loss": 0.0187760628759861, "epoch": 3.0575539568345325, "step": 3400 }, { "epoch": 3.066546762589928, "grad_norm": 0.36204081773757935, "learning_rate": 9.99872966498816e-05, "loss": 0.0255, "step": 3410 }, { "action_loss": 0.03266133368015289, "epoch": 3.066546762589928, "step": 3410 }, { "epoch": 3.0755395683453237, "grad_norm": 0.40855398774147034, "learning_rate": 9.998666789309313e-05, "loss": 0.0332, "step": 3420 }, { "action_loss": 0.013902392238378525, "epoch": 3.0755395683453237, "step": 3420 }, { "epoch": 3.0845323741007196, "grad_norm": 0.39196068048477173, "learning_rate": 9.998602395167475e-05, "loss": 0.0275, "step": 3430 }, { "action_loss": 0.02908685803413391, "epoch": 3.0845323741007196, "step": 3430 }, { "epoch": 3.093525179856115, "grad_norm": 0.2500876188278198, "learning_rate": 9.998536482582213e-05, "loss": 0.0351, "step": 3440 }, { "action_loss": 0.03170985355973244, "epoch": 3.093525179856115, "step": 3440 }, { "epoch": 3.102517985611511, "grad_norm": 0.34220820665359497, "learning_rate": 9.998469051573544e-05, "loss": 0.0337, "step": 3450 }, { "action_loss": 0.04129009693861008, "epoch": 3.102517985611511, "step": 3450 }, { "epoch": 3.1115107913669067, "grad_norm": 0.30246850848197937, "learning_rate": 9.998400102161954e-05, "loss": 0.0305, "step": 3460 }, { "action_loss": 0.029358142986893654, "epoch": 3.1115107913669067, "step": 3460 }, { "epoch": 3.120503597122302, "grad_norm": 0.39586690068244934, "learning_rate": 9.998329634368388e-05, "loss": 0.032, "step": 3470 }, { "action_loss": 0.03412050008773804, "epoch": 3.120503597122302, "step": 3470 }, { "epoch": 3.129496402877698, "grad_norm": 0.4609016478061676, "learning_rate": 9.998257648214253e-05, "loss": 0.0285, "step": 3480 }, { "action_loss": 0.04701752960681915, "epoch": 3.129496402877698, "step": 3480 }, { "epoch": 3.1384892086330933, "grad_norm": 0.3979571461677551, "learning_rate": 9.998184143721417e-05, "loss": 0.035, "step": 3490 }, { "action_loss": 0.027500266209244728, "epoch": 3.1384892086330933, "step": 3490 }, { "epoch": 3.147482014388489, "grad_norm": 0.40863490104675293, "learning_rate": 9.998109120912206e-05, "loss": 0.0268, "step": 3500 }, { "action_loss": 0.043084725737571716, "epoch": 3.147482014388489, "step": 3500 }, { "epoch": 3.156474820143885, "grad_norm": 0.40761488676071167, "learning_rate": 9.998032579809411e-05, "loss": 0.0298, "step": 3510 }, { "action_loss": 0.021269284188747406, "epoch": 3.156474820143885, "step": 3510 }, { "epoch": 3.1654676258992804, "grad_norm": 0.37774792313575745, "learning_rate": 9.997954520436286e-05, "loss": 0.0303, "step": 3520 }, { "action_loss": 0.023330874741077423, "epoch": 3.1654676258992804, "step": 3520 }, { "epoch": 3.1744604316546763, "grad_norm": 0.36262407898902893, "learning_rate": 9.997874942816538e-05, "loss": 0.0282, "step": 3530 }, { "action_loss": 0.04931420460343361, "epoch": 3.1744604316546763, "step": 3530 }, { "epoch": 3.183453237410072, "grad_norm": 0.41383180022239685, "learning_rate": 9.997793846974345e-05, "loss": 0.0369, "step": 3540 }, { "action_loss": 0.0219560656696558, "epoch": 3.183453237410072, "step": 3540 }, { "epoch": 3.1924460431654675, "grad_norm": 0.3705684244632721, "learning_rate": 9.997711232934341e-05, "loss": 0.028, "step": 3550 }, { "action_loss": 0.03112269937992096, "epoch": 3.1924460431654675, "step": 3550 }, { "epoch": 3.2014388489208634, "grad_norm": 0.37710878252983093, "learning_rate": 9.99762710072162e-05, "loss": 0.0298, "step": 3560 }, { "action_loss": 0.03990311175584793, "epoch": 3.2014388489208634, "step": 3560 }, { "epoch": 3.210431654676259, "grad_norm": 0.28950050473213196, "learning_rate": 9.997541450361743e-05, "loss": 0.033, "step": 3570 }, { "action_loss": 0.04461054503917694, "epoch": 3.210431654676259, "step": 3570 }, { "epoch": 3.2194244604316546, "grad_norm": 0.42990684509277344, "learning_rate": 9.997454281880723e-05, "loss": 0.0332, "step": 3580 }, { "action_loss": 0.020163487643003464, "epoch": 3.2194244604316546, "step": 3580 }, { "epoch": 3.2284172661870505, "grad_norm": 0.4126536250114441, "learning_rate": 9.997365595305044e-05, "loss": 0.0298, "step": 3590 }, { "action_loss": 0.04580695554614067, "epoch": 3.2284172661870505, "step": 3590 }, { "epoch": 3.237410071942446, "grad_norm": 0.3394562005996704, "learning_rate": 9.997275390661644e-05, "loss": 0.0314, "step": 3600 }, { "action_loss": 0.01909526437520981, "epoch": 3.237410071942446, "step": 3600 }, { "epoch": 3.2464028776978417, "grad_norm": 0.5070739388465881, "learning_rate": 9.997183667977926e-05, "loss": 0.0338, "step": 3610 }, { "action_loss": 0.024926304817199707, "epoch": 3.2464028776978417, "step": 3610 }, { "epoch": 3.2553956834532376, "grad_norm": 0.4229183495044708, "learning_rate": 9.997090427281752e-05, "loss": 0.0284, "step": 3620 }, { "action_loss": 0.03422583267092705, "epoch": 3.2553956834532376, "step": 3620 }, { "epoch": 3.264388489208633, "grad_norm": 0.4681219458580017, "learning_rate": 9.996995668601448e-05, "loss": 0.0317, "step": 3630 }, { "action_loss": 0.03193238377571106, "epoch": 3.264388489208633, "step": 3630 }, { "epoch": 3.273381294964029, "grad_norm": 0.32914572954177856, "learning_rate": 9.996899391965798e-05, "loss": 0.0246, "step": 3640 }, { "action_loss": 0.030921803787350655, "epoch": 3.273381294964029, "step": 3640 }, { "epoch": 3.2823741007194247, "grad_norm": 0.38526010513305664, "learning_rate": 9.996801597404048e-05, "loss": 0.032, "step": 3650 }, { "action_loss": 0.02616908587515354, "epoch": 3.2823741007194247, "step": 3650 }, { "epoch": 3.29136690647482, "grad_norm": 0.32084619998931885, "learning_rate": 9.996702284945905e-05, "loss": 0.0266, "step": 3660 }, { "action_loss": 0.02872343920171261, "epoch": 3.29136690647482, "step": 3660 }, { "epoch": 3.300359712230216, "grad_norm": 0.38504621386528015, "learning_rate": 9.996601454621539e-05, "loss": 0.0292, "step": 3670 }, { "action_loss": 0.020996445789933205, "epoch": 3.300359712230216, "step": 3670 }, { "epoch": 3.3093525179856114, "grad_norm": 0.3750654458999634, "learning_rate": 9.996499106461577e-05, "loss": 0.0274, "step": 3680 }, { "action_loss": 0.019461018964648247, "epoch": 3.3093525179856114, "step": 3680 }, { "epoch": 3.318345323741007, "grad_norm": 0.36819109320640564, "learning_rate": 9.996395240497112e-05, "loss": 0.0325, "step": 3690 }, { "action_loss": 0.028915025293827057, "epoch": 3.318345323741007, "step": 3690 }, { "epoch": 3.327338129496403, "grad_norm": 0.3478351831436157, "learning_rate": 9.996289856759696e-05, "loss": 0.0292, "step": 3700 }, { "action_loss": 0.05025007203221321, "epoch": 3.327338129496403, "step": 3700 }, { "epoch": 3.3363309352517985, "grad_norm": 0.456193745136261, "learning_rate": 9.996182955281342e-05, "loss": 0.0303, "step": 3710 }, { "action_loss": 0.05873417481780052, "epoch": 3.3363309352517985, "step": 3710 }, { "epoch": 3.3453237410071943, "grad_norm": 0.4726487100124359, "learning_rate": 9.996074536094519e-05, "loss": 0.0326, "step": 3720 }, { "action_loss": 0.04003138095140457, "epoch": 3.3453237410071943, "step": 3720 }, { "epoch": 3.3543165467625897, "grad_norm": 0.269807368516922, "learning_rate": 9.995964599232168e-05, "loss": 0.0263, "step": 3730 }, { "action_loss": 0.04712820053100586, "epoch": 3.3543165467625897, "step": 3730 }, { "epoch": 3.3633093525179856, "grad_norm": 0.409456342458725, "learning_rate": 9.995853144727683e-05, "loss": 0.0308, "step": 3740 }, { "action_loss": 0.03699531778693199, "epoch": 3.3633093525179856, "step": 3740 }, { "epoch": 3.3723021582733814, "grad_norm": 0.33310821652412415, "learning_rate": 9.99574017261492e-05, "loss": 0.0286, "step": 3750 }, { "action_loss": 0.019385920837521553, "epoch": 3.3723021582733814, "step": 3750 }, { "epoch": 3.381294964028777, "grad_norm": 0.36837464570999146, "learning_rate": 9.995625682928198e-05, "loss": 0.0233, "step": 3760 }, { "action_loss": 0.024574005976319313, "epoch": 3.381294964028777, "step": 3760 }, { "epoch": 3.3902877697841727, "grad_norm": 0.39644894003868103, "learning_rate": 9.995509675702295e-05, "loss": 0.0253, "step": 3770 }, { "action_loss": 0.01671035960316658, "epoch": 3.3902877697841727, "step": 3770 }, { "epoch": 3.3992805755395685, "grad_norm": 0.30388233065605164, "learning_rate": 9.995392150972451e-05, "loss": 0.0305, "step": 3780 }, { "action_loss": 0.04253914952278137, "epoch": 3.3992805755395685, "step": 3780 }, { "epoch": 3.408273381294964, "grad_norm": 0.3304659128189087, "learning_rate": 9.995273108774366e-05, "loss": 0.0339, "step": 3790 }, { "action_loss": 0.01858912594616413, "epoch": 3.408273381294964, "step": 3790 }, { "epoch": 3.41726618705036, "grad_norm": 0.4158054292201996, "learning_rate": 9.995152549144205e-05, "loss": 0.0318, "step": 3800 }, { "action_loss": 0.030002504587173462, "epoch": 3.41726618705036, "step": 3800 }, { "epoch": 3.4262589928057556, "grad_norm": 0.3346783220767975, "learning_rate": 9.995030472118587e-05, "loss": 0.0284, "step": 3810 }, { "action_loss": 0.02828087843954563, "epoch": 3.4262589928057556, "step": 3810 }, { "epoch": 3.435251798561151, "grad_norm": 0.32906126976013184, "learning_rate": 9.9949068777346e-05, "loss": 0.0302, "step": 3820 }, { "action_loss": 0.015352529473602772, "epoch": 3.435251798561151, "step": 3820 }, { "epoch": 3.444244604316547, "grad_norm": 0.27659744024276733, "learning_rate": 9.994781766029786e-05, "loss": 0.0279, "step": 3830 }, { "action_loss": 0.04750797152519226, "epoch": 3.444244604316547, "step": 3830 }, { "epoch": 3.4532374100719423, "grad_norm": 0.430625319480896, "learning_rate": 9.994655137042151e-05, "loss": 0.0347, "step": 3840 }, { "action_loss": 0.05751028656959534, "epoch": 3.4532374100719423, "step": 3840 }, { "epoch": 3.462230215827338, "grad_norm": 0.28376466035842896, "learning_rate": 9.99452699081016e-05, "loss": 0.0275, "step": 3850 }, { "action_loss": 0.021751755848526955, "epoch": 3.462230215827338, "step": 3850 }, { "epoch": 3.471223021582734, "grad_norm": 0.24641521275043488, "learning_rate": 9.994397327372743e-05, "loss": 0.0326, "step": 3860 }, { "action_loss": 0.02780805714428425, "epoch": 3.471223021582734, "step": 3860 }, { "epoch": 3.4802158273381294, "grad_norm": 0.2976445257663727, "learning_rate": 9.994266146769286e-05, "loss": 0.0309, "step": 3870 }, { "action_loss": 0.015527352690696716, "epoch": 3.4802158273381294, "step": 3870 }, { "epoch": 3.4892086330935252, "grad_norm": 0.41069427132606506, "learning_rate": 9.994133449039642e-05, "loss": 0.0276, "step": 3880 }, { "action_loss": 0.01564951427280903, "epoch": 3.4892086330935252, "step": 3880 }, { "epoch": 3.4982014388489207, "grad_norm": 0.334350049495697, "learning_rate": 9.993999234224118e-05, "loss": 0.0313, "step": 3890 }, { "action_loss": 0.045259300619363785, "epoch": 3.4982014388489207, "step": 3890 }, { "epoch": 3.5071942446043165, "grad_norm": 0.38123586773872375, "learning_rate": 9.993863502363485e-05, "loss": 0.0268, "step": 3900 }, { "action_loss": 0.023005427792668343, "epoch": 3.5071942446043165, "step": 3900 }, { "epoch": 3.5161870503597124, "grad_norm": 0.3287089169025421, "learning_rate": 9.993726253498976e-05, "loss": 0.025, "step": 3910 }, { "action_loss": 0.01914181374013424, "epoch": 3.5161870503597124, "step": 3910 }, { "epoch": 3.5251798561151078, "grad_norm": 0.3359219431877136, "learning_rate": 9.993587487672282e-05, "loss": 0.025, "step": 3920 }, { "action_loss": 0.021401450037956238, "epoch": 3.5251798561151078, "step": 3920 }, { "epoch": 3.5341726618705036, "grad_norm": 0.46317943930625916, "learning_rate": 9.993447204925558e-05, "loss": 0.0302, "step": 3930 }, { "action_loss": 0.043349865823984146, "epoch": 3.5341726618705036, "step": 3930 }, { "epoch": 3.543165467625899, "grad_norm": 0.37350183725357056, "learning_rate": 9.993305405301416e-05, "loss": 0.0346, "step": 3940 }, { "action_loss": 0.03433537483215332, "epoch": 3.543165467625899, "step": 3940 }, { "epoch": 3.552158273381295, "grad_norm": 0.3471378684043884, "learning_rate": 9.993162088842935e-05, "loss": 0.0315, "step": 3950 }, { "action_loss": 0.05353161692619324, "epoch": 3.552158273381295, "step": 3950 }, { "epoch": 3.5611510791366907, "grad_norm": 0.505804181098938, "learning_rate": 9.993017255593646e-05, "loss": 0.0383, "step": 3960 }, { "action_loss": 0.016265803948044777, "epoch": 3.5611510791366907, "step": 3960 }, { "epoch": 3.5701438848920866, "grad_norm": 0.4195571839809418, "learning_rate": 9.992870905597548e-05, "loss": 0.0339, "step": 3970 }, { "action_loss": 0.04221159219741821, "epoch": 3.5701438848920866, "step": 3970 }, { "epoch": 3.579136690647482, "grad_norm": 0.3751494288444519, "learning_rate": 9.9927230388991e-05, "loss": 0.0259, "step": 3980 }, { "action_loss": 0.027569271624088287, "epoch": 3.579136690647482, "step": 3980 }, { "epoch": 3.588129496402878, "grad_norm": 0.30475571751594543, "learning_rate": 9.992573655543215e-05, "loss": 0.0291, "step": 3990 }, { "action_loss": 0.02312716655433178, "epoch": 3.588129496402878, "step": 3990 }, { "epoch": 3.597122302158273, "grad_norm": 0.255643367767334, "learning_rate": 9.992422755575277e-05, "loss": 0.0241, "step": 4000 }, { "action_loss": 0.036421120166778564, "epoch": 3.597122302158273, "step": 4000 }, { "epoch": 3.606115107913669, "grad_norm": 0.3654440939426422, "learning_rate": 9.992270339041123e-05, "loss": 0.0249, "step": 4010 }, { "action_loss": 0.020651865750551224, "epoch": 3.606115107913669, "step": 4010 }, { "epoch": 3.615107913669065, "grad_norm": 0.39390233159065247, "learning_rate": 9.992116405987053e-05, "loss": 0.0285, "step": 4020 }, { "action_loss": 0.034618962556123734, "epoch": 3.615107913669065, "step": 4020 }, { "epoch": 3.6241007194244603, "grad_norm": 0.5296534895896912, "learning_rate": 9.991960956459828e-05, "loss": 0.0281, "step": 4030 }, { "action_loss": 0.02683197893202305, "epoch": 3.6241007194244603, "step": 4030 }, { "epoch": 3.633093525179856, "grad_norm": 0.3689826428890228, "learning_rate": 9.991803990506669e-05, "loss": 0.0338, "step": 4040 }, { "action_loss": 0.030523838475346565, "epoch": 3.633093525179856, "step": 4040 }, { "epoch": 3.6420863309352516, "grad_norm": 0.3118903934955597, "learning_rate": 9.991645508175258e-05, "loss": 0.0304, "step": 4050 }, { "action_loss": 0.08989042788743973, "epoch": 3.6420863309352516, "step": 4050 }, { "epoch": 3.6510791366906474, "grad_norm": 0.4147702753543854, "learning_rate": 9.99148550951374e-05, "loss": 0.0322, "step": 4060 }, { "action_loss": 0.02255321852862835, "epoch": 3.6510791366906474, "step": 4060 }, { "epoch": 3.6600719424460433, "grad_norm": 0.29337453842163086, "learning_rate": 9.991323994570716e-05, "loss": 0.0389, "step": 4070 }, { "action_loss": 0.013976664282381535, "epoch": 3.6600719424460433, "step": 4070 }, { "epoch": 3.6690647482014387, "grad_norm": 0.38972628116607666, "learning_rate": 9.99116096339525e-05, "loss": 0.0306, "step": 4080 }, { "action_loss": 0.04654046520590782, "epoch": 3.6690647482014387, "step": 4080 }, { "epoch": 3.6780575539568345, "grad_norm": 0.27425992488861084, "learning_rate": 9.990996416036869e-05, "loss": 0.0289, "step": 4090 }, { "action_loss": 0.02963986061513424, "epoch": 3.6780575539568345, "step": 4090 }, { "epoch": 3.68705035971223, "grad_norm": 0.24673345685005188, "learning_rate": 9.990830352545555e-05, "loss": 0.0279, "step": 4100 }, { "action_loss": 0.015602379105985165, "epoch": 3.68705035971223, "step": 4100 }, { "epoch": 3.696043165467626, "grad_norm": 0.31614169478416443, "learning_rate": 9.990662772971756e-05, "loss": 0.0218, "step": 4110 }, { "action_loss": 0.03130681440234184, "epoch": 3.696043165467626, "step": 4110 }, { "epoch": 3.7050359712230216, "grad_norm": 0.35721778869628906, "learning_rate": 9.990493677366376e-05, "loss": 0.0265, "step": 4120 }, { "action_loss": 0.02686975710093975, "epoch": 3.7050359712230216, "step": 4120 }, { "epoch": 3.7140287769784175, "grad_norm": 0.37970051169395447, "learning_rate": 9.990323065780786e-05, "loss": 0.0223, "step": 4130 }, { "action_loss": 0.02629505842924118, "epoch": 3.7140287769784175, "step": 4130 }, { "epoch": 3.723021582733813, "grad_norm": 0.31396129727363586, "learning_rate": 9.990150938266808e-05, "loss": 0.0315, "step": 4140 }, { "action_loss": 0.024396710097789764, "epoch": 3.723021582733813, "step": 4140 }, { "epoch": 3.7320143884892087, "grad_norm": 0.2583223581314087, "learning_rate": 9.989977294876733e-05, "loss": 0.0247, "step": 4150 }, { "action_loss": 0.02246725559234619, "epoch": 3.7320143884892087, "step": 4150 }, { "epoch": 3.741007194244604, "grad_norm": 0.2541853189468384, "learning_rate": 9.989802135663308e-05, "loss": 0.0221, "step": 4160 }, { "action_loss": 0.04331565275788307, "epoch": 3.741007194244604, "step": 4160 }, { "epoch": 3.75, "grad_norm": 0.25699231028556824, "learning_rate": 9.989625460679743e-05, "loss": 0.0234, "step": 4170 }, { "action_loss": 0.04030969366431236, "epoch": 3.75, "step": 4170 }, { "epoch": 3.758992805755396, "grad_norm": 0.368784099817276, "learning_rate": 9.989447269979706e-05, "loss": 0.0239, "step": 4180 }, { "action_loss": 0.022751634940505028, "epoch": 3.758992805755396, "step": 4180 }, { "epoch": 3.7679856115107913, "grad_norm": 0.3524191379547119, "learning_rate": 9.989267563617328e-05, "loss": 0.0278, "step": 4190 }, { "action_loss": 0.008312073536217213, "epoch": 3.7679856115107913, "step": 4190 }, { "epoch": 3.776978417266187, "grad_norm": 0.3151185214519501, "learning_rate": 9.989086341647198e-05, "loss": 0.0228, "step": 4200 }, { "action_loss": 0.033195480704307556, "epoch": 3.776978417266187, "step": 4200 }, { "epoch": 3.7859712230215825, "grad_norm": 0.3018382787704468, "learning_rate": 9.988903604124366e-05, "loss": 0.0239, "step": 4210 }, { "action_loss": 0.010248244740068913, "epoch": 3.7859712230215825, "step": 4210 }, { "epoch": 3.7949640287769784, "grad_norm": 0.2567962408065796, "learning_rate": 9.988719351104343e-05, "loss": 0.0235, "step": 4220 }, { "action_loss": 0.02157588303089142, "epoch": 3.7949640287769784, "step": 4220 }, { "epoch": 3.803956834532374, "grad_norm": 0.38568902015686035, "learning_rate": 9.9885335826431e-05, "loss": 0.0237, "step": 4230 }, { "action_loss": 0.014937974512577057, "epoch": 3.803956834532374, "step": 4230 }, { "epoch": 3.81294964028777, "grad_norm": 0.3115173280239105, "learning_rate": 9.988346298797071e-05, "loss": 0.0243, "step": 4240 }, { "action_loss": 0.021143579855561256, "epoch": 3.81294964028777, "step": 4240 }, { "epoch": 3.8219424460431655, "grad_norm": 0.2777501046657562, "learning_rate": 9.988157499623146e-05, "loss": 0.0317, "step": 4250 }, { "action_loss": 0.021879812702536583, "epoch": 3.8219424460431655, "step": 4250 }, { "epoch": 3.8309352517985613, "grad_norm": 0.3384545147418976, "learning_rate": 9.987967185178677e-05, "loss": 0.0221, "step": 4260 }, { "action_loss": 0.01928863488137722, "epoch": 3.8309352517985613, "step": 4260 }, { "epoch": 3.8399280575539567, "grad_norm": 0.3378330171108246, "learning_rate": 9.987775355521476e-05, "loss": 0.0281, "step": 4270 }, { "action_loss": 0.012686069123446941, "epoch": 3.8399280575539567, "step": 4270 }, { "epoch": 3.8489208633093526, "grad_norm": 0.2699289619922638, "learning_rate": 9.987582010709817e-05, "loss": 0.0232, "step": 4280 }, { "action_loss": 0.07199747115373611, "epoch": 3.8489208633093526, "step": 4280 }, { "epoch": 3.8579136690647484, "grad_norm": 0.39285317063331604, "learning_rate": 9.987387150802431e-05, "loss": 0.0405, "step": 4290 }, { "action_loss": 0.011177587322890759, "epoch": 3.8579136690647484, "step": 4290 }, { "epoch": 3.866906474820144, "grad_norm": 0.2626073658466339, "learning_rate": 9.987190775858517e-05, "loss": 0.0212, "step": 4300 }, { "action_loss": 0.027077415958046913, "epoch": 3.866906474820144, "step": 4300 }, { "epoch": 3.8758992805755397, "grad_norm": 0.3339500427246094, "learning_rate": 9.98699288593772e-05, "loss": 0.0302, "step": 4310 }, { "action_loss": 0.01799231581389904, "epoch": 3.8758992805755397, "step": 4310 }, { "epoch": 3.884892086330935, "grad_norm": 0.36704859137535095, "learning_rate": 9.986793481100161e-05, "loss": 0.025, "step": 4320 }, { "action_loss": 0.0385267399251461, "epoch": 3.884892086330935, "step": 4320 }, { "epoch": 3.893884892086331, "grad_norm": 0.2664698362350464, "learning_rate": 9.986592561406412e-05, "loss": 0.0285, "step": 4330 }, { "action_loss": 0.01619792729616165, "epoch": 3.893884892086331, "step": 4330 }, { "epoch": 3.902877697841727, "grad_norm": 0.2604484260082245, "learning_rate": 9.986390126917503e-05, "loss": 0.021, "step": 4340 }, { "action_loss": 0.018918098881840706, "epoch": 3.902877697841727, "step": 4340 }, { "epoch": 3.911870503597122, "grad_norm": 0.23931583762168884, "learning_rate": 9.986186177694933e-05, "loss": 0.0233, "step": 4350 }, { "action_loss": 0.03502926230430603, "epoch": 3.911870503597122, "step": 4350 }, { "epoch": 3.920863309352518, "grad_norm": 0.39077213406562805, "learning_rate": 9.985980713800656e-05, "loss": 0.0344, "step": 4360 }, { "action_loss": 0.023137060925364494, "epoch": 3.920863309352518, "step": 4360 }, { "epoch": 3.9298561151079134, "grad_norm": 0.2971208095550537, "learning_rate": 9.985773735297084e-05, "loss": 0.0214, "step": 4370 }, { "action_loss": 0.03228767588734627, "epoch": 3.9298561151079134, "step": 4370 }, { "epoch": 3.9388489208633093, "grad_norm": 0.3721123933792114, "learning_rate": 9.985565242247092e-05, "loss": 0.0271, "step": 4380 }, { "action_loss": 0.02978029102087021, "epoch": 3.9388489208633093, "step": 4380 }, { "epoch": 3.947841726618705, "grad_norm": 0.2982925772666931, "learning_rate": 9.985355234714016e-05, "loss": 0.0329, "step": 4390 }, { "action_loss": 0.013008258305490017, "epoch": 3.947841726618705, "step": 4390 }, { "epoch": 3.956834532374101, "grad_norm": 0.3032430112361908, "learning_rate": 9.985143712761652e-05, "loss": 0.0202, "step": 4400 }, { "action_loss": 0.017659595236182213, "epoch": 3.956834532374101, "step": 4400 }, { "epoch": 3.9658273381294964, "grad_norm": 0.4083709418773651, "learning_rate": 9.984930676454252e-05, "loss": 0.0253, "step": 4410 }, { "action_loss": 0.02748696506023407, "epoch": 3.9658273381294964, "step": 4410 }, { "epoch": 3.9748201438848922, "grad_norm": 0.3608955442905426, "learning_rate": 9.984716125856532e-05, "loss": 0.0268, "step": 4420 }, { "action_loss": 0.016579637303948402, "epoch": 3.9748201438848922, "step": 4420 }, { "epoch": 3.9838129496402876, "grad_norm": 0.33221179246902466, "learning_rate": 9.984500061033667e-05, "loss": 0.0214, "step": 4430 }, { "action_loss": 0.019418930634856224, "epoch": 3.9838129496402876, "step": 4430 }, { "epoch": 3.9928057553956835, "grad_norm": 0.26419174671173096, "learning_rate": 9.984282482051293e-05, "loss": 0.0279, "step": 4440 }, { "action_loss": 0.061106204986572266, "epoch": 3.9928057553956835, "step": 4440 }, { "epoch": 4.001798561151079, "grad_norm": 0.3753238916397095, "learning_rate": 9.9840633889755e-05, "loss": 0.0244, "step": 4450 }, { "action_loss": 0.012001090683043003, "epoch": 4.001798561151079, "step": 4450 }, { "epoch": 4.010791366906475, "grad_norm": 0.4583355784416199, "learning_rate": 9.983842781872848e-05, "loss": 0.028, "step": 4460 }, { "action_loss": 0.026535307988524437, "epoch": 4.010791366906475, "step": 4460 }, { "epoch": 4.01978417266187, "grad_norm": 0.36587920784950256, "learning_rate": 9.98362066081035e-05, "loss": 0.0266, "step": 4470 }, { "action_loss": 0.0315113328397274, "epoch": 4.01978417266187, "step": 4470 }, { "epoch": 4.028776978417266, "grad_norm": 0.24483440816402435, "learning_rate": 9.983397025855479e-05, "loss": 0.0213, "step": 4480 }, { "action_loss": 0.03296194598078728, "epoch": 4.028776978417266, "step": 4480 }, { "epoch": 4.037769784172662, "grad_norm": 0.2663133442401886, "learning_rate": 9.983171877076171e-05, "loss": 0.0203, "step": 4490 }, { "action_loss": 0.07411356270313263, "epoch": 4.037769784172662, "step": 4490 }, { "epoch": 4.046762589928058, "grad_norm": 0.2743675410747528, "learning_rate": 9.98294521454082e-05, "loss": 0.0362, "step": 4500 }, { "action_loss": 0.015591899864375591, "epoch": 4.046762589928058, "step": 4500 }, { "epoch": 4.055755395683454, "grad_norm": 0.4251568913459778, "learning_rate": 9.98271703831828e-05, "loss": 0.025, "step": 4510 }, { "action_loss": 0.05422121658921242, "epoch": 4.055755395683454, "step": 4510 }, { "epoch": 4.0647482014388485, "grad_norm": 0.2480478584766388, "learning_rate": 9.982487348477865e-05, "loss": 0.0321, "step": 4520 }, { "action_loss": 0.03683903440833092, "epoch": 4.0647482014388485, "step": 4520 }, { "epoch": 4.073741007194244, "grad_norm": 0.3028353452682495, "learning_rate": 9.982256145089347e-05, "loss": 0.028, "step": 4530 }, { "action_loss": 0.023007238283753395, "epoch": 4.073741007194244, "step": 4530 }, { "epoch": 4.08273381294964, "grad_norm": 0.26530519127845764, "learning_rate": 9.982023428222962e-05, "loss": 0.0219, "step": 4540 }, { "action_loss": 0.06456286460161209, "epoch": 4.08273381294964, "step": 4540 }, { "epoch": 4.091726618705036, "grad_norm": 0.3261609673500061, "learning_rate": 9.981789197949403e-05, "loss": 0.0309, "step": 4550 }, { "action_loss": 0.041591282933950424, "epoch": 4.091726618705036, "step": 4550 }, { "epoch": 4.100719424460432, "grad_norm": 0.262571781873703, "learning_rate": 9.98155345433982e-05, "loss": 0.0268, "step": 4560 }, { "action_loss": 0.04522838070988655, "epoch": 4.100719424460432, "step": 4560 }, { "epoch": 4.109712230215828, "grad_norm": 0.26121267676353455, "learning_rate": 9.981316197465831e-05, "loss": 0.0234, "step": 4570 }, { "action_loss": 0.029592866078019142, "epoch": 4.109712230215828, "step": 4570 }, { "epoch": 4.118705035971223, "grad_norm": 0.20616449415683746, "learning_rate": 9.981077427399504e-05, "loss": 0.0238, "step": 4580 }, { "action_loss": 0.03108775056898594, "epoch": 4.118705035971223, "step": 4580 }, { "epoch": 4.127697841726619, "grad_norm": 0.3405033349990845, "learning_rate": 9.980837144213371e-05, "loss": 0.0232, "step": 4590 }, { "action_loss": 0.026937559247016907, "epoch": 4.127697841726619, "step": 4590 }, { "epoch": 4.136690647482014, "grad_norm": 0.33306002616882324, "learning_rate": 9.980595347980426e-05, "loss": 0.0221, "step": 4600 }, { "action_loss": 0.019454939290881157, "epoch": 4.136690647482014, "step": 4600 }, { "epoch": 4.14568345323741, "grad_norm": 0.3894503712654114, "learning_rate": 9.980352038774119e-05, "loss": 0.0307, "step": 4610 }, { "action_loss": 0.029545187950134277, "epoch": 4.14568345323741, "step": 4610 }, { "epoch": 4.154676258992806, "grad_norm": 0.30639705061912537, "learning_rate": 9.98010721666836e-05, "loss": 0.029, "step": 4620 }, { "action_loss": 0.01890009082853794, "epoch": 4.154676258992806, "step": 4620 }, { "epoch": 4.163669064748201, "grad_norm": 0.3019881248474121, "learning_rate": 9.979860881737523e-05, "loss": 0.0205, "step": 4630 }, { "action_loss": 0.01209858525544405, "epoch": 4.163669064748201, "step": 4630 }, { "epoch": 4.172661870503597, "grad_norm": 0.22516261041164398, "learning_rate": 9.979613034056434e-05, "loss": 0.0262, "step": 4640 }, { "action_loss": 0.06308499723672867, "epoch": 4.172661870503597, "step": 4640 }, { "epoch": 4.181654676258993, "grad_norm": 0.29830029606819153, "learning_rate": 9.979363673700386e-05, "loss": 0.0326, "step": 4650 }, { "action_loss": 0.020686091855168343, "epoch": 4.181654676258993, "step": 4650 }, { "epoch": 4.190647482014389, "grad_norm": 0.49315306544303894, "learning_rate": 9.979112800745124e-05, "loss": 0.0305, "step": 4660 }, { "action_loss": 0.04635298252105713, "epoch": 4.190647482014389, "step": 4660 }, { "epoch": 4.1996402877697845, "grad_norm": 0.39859721064567566, "learning_rate": 9.978860415266861e-05, "loss": 0.0281, "step": 4670 }, { "action_loss": 0.04817410185933113, "epoch": 4.1996402877697845, "step": 4670 }, { "epoch": 4.2086330935251794, "grad_norm": 0.35845381021499634, "learning_rate": 9.978606517342262e-05, "loss": 0.0275, "step": 4680 }, { "action_loss": 0.014876492321491241, "epoch": 4.2086330935251794, "step": 4680 }, { "epoch": 4.217625899280575, "grad_norm": 0.25899040699005127, "learning_rate": 9.978351107048456e-05, "loss": 0.0296, "step": 4690 }, { "action_loss": 0.010759949684143066, "epoch": 4.217625899280575, "step": 4690 }, { "epoch": 4.226618705035971, "grad_norm": 0.3403589427471161, "learning_rate": 9.978094184463029e-05, "loss": 0.0265, "step": 4700 }, { "action_loss": 0.051335036754608154, "epoch": 4.226618705035971, "step": 4700 }, { "epoch": 4.235611510791367, "grad_norm": 0.35009220242500305, "learning_rate": 9.977835749664029e-05, "loss": 0.0246, "step": 4710 }, { "action_loss": 0.013120453804731369, "epoch": 4.235611510791367, "step": 4710 }, { "epoch": 4.244604316546763, "grad_norm": 0.35820508003234863, "learning_rate": 9.97757580272996e-05, "loss": 0.0229, "step": 4720 }, { "action_loss": 0.012310606427490711, "epoch": 4.244604316546763, "step": 4720 }, { "epoch": 4.253597122302159, "grad_norm": 0.2771296799182892, "learning_rate": 9.977314343739786e-05, "loss": 0.0188, "step": 4730 }, { "action_loss": 0.046595022082328796, "epoch": 4.253597122302159, "step": 4730 }, { "epoch": 4.262589928057554, "grad_norm": 0.27601373195648193, "learning_rate": 9.977051372772934e-05, "loss": 0.0287, "step": 4740 }, { "action_loss": 0.022428950294852257, "epoch": 4.262589928057554, "step": 4740 }, { "epoch": 4.2715827338129495, "grad_norm": 0.23307658731937408, "learning_rate": 9.976786889909286e-05, "loss": 0.0197, "step": 4750 }, { "action_loss": 0.02427097223699093, "epoch": 4.2715827338129495, "step": 4750 }, { "epoch": 4.280575539568345, "grad_norm": 0.2995770275592804, "learning_rate": 9.976520895229185e-05, "loss": 0.0247, "step": 4760 }, { "action_loss": 0.02717535011470318, "epoch": 4.280575539568345, "step": 4760 }, { "epoch": 4.289568345323741, "grad_norm": 0.3535856306552887, "learning_rate": 9.976253388813433e-05, "loss": 0.0299, "step": 4770 }, { "action_loss": 0.012439538724720478, "epoch": 4.289568345323741, "step": 4770 }, { "epoch": 4.298561151079137, "grad_norm": 0.38778096437454224, "learning_rate": 9.975984370743293e-05, "loss": 0.0264, "step": 4780 }, { "action_loss": 0.0237265732139349, "epoch": 4.298561151079137, "step": 4780 }, { "epoch": 4.307553956834532, "grad_norm": 0.3751932680606842, "learning_rate": 9.975713841100485e-05, "loss": 0.0271, "step": 4790 }, { "action_loss": 0.018653100356459618, "epoch": 4.307553956834532, "step": 4790 }, { "epoch": 4.316546762589928, "grad_norm": 0.28128379583358765, "learning_rate": 9.975441799967187e-05, "loss": 0.0249, "step": 4800 }, { "action_loss": 0.0214959979057312, "epoch": 4.316546762589928, "step": 4800 }, { "epoch": 4.325539568345324, "grad_norm": 0.30558326840400696, "learning_rate": 9.975168247426039e-05, "loss": 0.0248, "step": 4810 }, { "action_loss": 0.0420108400285244, "epoch": 4.325539568345324, "step": 4810 }, { "epoch": 4.33453237410072, "grad_norm": 0.3146475851535797, "learning_rate": 9.974893183560139e-05, "loss": 0.0209, "step": 4820 }, { "action_loss": 0.022813847288489342, "epoch": 4.33453237410072, "step": 4820 }, { "epoch": 4.343525179856115, "grad_norm": 0.29396742582321167, "learning_rate": 9.974616608453045e-05, "loss": 0.0246, "step": 4830 }, { "action_loss": 0.028268784284591675, "epoch": 4.343525179856115, "step": 4830 }, { "epoch": 4.35251798561151, "grad_norm": 0.33749210834503174, "learning_rate": 9.974338522188772e-05, "loss": 0.0235, "step": 4840 }, { "action_loss": 0.027259672060608864, "epoch": 4.35251798561151, "step": 4840 }, { "epoch": 4.361510791366906, "grad_norm": 0.3311636745929718, "learning_rate": 9.974058924851797e-05, "loss": 0.0226, "step": 4850 }, { "action_loss": 0.03586554154753685, "epoch": 4.361510791366906, "step": 4850 }, { "epoch": 4.370503597122302, "grad_norm": 0.33036962151527405, "learning_rate": 9.973777816527051e-05, "loss": 0.0251, "step": 4860 }, { "action_loss": 0.04632480442523956, "epoch": 4.370503597122302, "step": 4860 }, { "epoch": 4.379496402877698, "grad_norm": 0.22766360640525818, "learning_rate": 9.973495197299931e-05, "loss": 0.0255, "step": 4870 }, { "action_loss": 0.011394384317100048, "epoch": 4.379496402877698, "step": 4870 }, { "epoch": 4.388489208633094, "grad_norm": 0.2806468904018402, "learning_rate": 9.973211067256287e-05, "loss": 0.0265, "step": 4880 }, { "action_loss": 0.016243228688836098, "epoch": 4.388489208633094, "step": 4880 }, { "epoch": 4.39748201438849, "grad_norm": 0.3844643831253052, "learning_rate": 9.97292542648243e-05, "loss": 0.0284, "step": 4890 }, { "action_loss": 0.013037190772593021, "epoch": 4.39748201438849, "step": 4890 }, { "epoch": 4.406474820143885, "grad_norm": 0.39588168263435364, "learning_rate": 9.972638275065131e-05, "loss": 0.0249, "step": 4900 }, { "action_loss": 0.04370058700442314, "epoch": 4.406474820143885, "step": 4900 }, { "epoch": 4.41546762589928, "grad_norm": 0.31053972244262695, "learning_rate": 9.972349613091621e-05, "loss": 0.0283, "step": 4910 }, { "action_loss": 0.03532949462532997, "epoch": 4.41546762589928, "step": 4910 }, { "epoch": 4.424460431654676, "grad_norm": 0.3256993293762207, "learning_rate": 9.972059440649584e-05, "loss": 0.0276, "step": 4920 }, { "action_loss": 0.02303057722747326, "epoch": 4.424460431654676, "step": 4920 }, { "epoch": 4.433453237410072, "grad_norm": 0.3752906024456024, "learning_rate": 9.971767757827168e-05, "loss": 0.0237, "step": 4930 }, { "action_loss": 0.013368750922381878, "epoch": 4.433453237410072, "step": 4930 }, { "epoch": 4.442446043165468, "grad_norm": 0.22153939306735992, "learning_rate": 9.971474564712982e-05, "loss": 0.0207, "step": 4940 }, { "action_loss": 0.021088071167469025, "epoch": 4.442446043165468, "step": 4940 }, { "epoch": 4.451438848920863, "grad_norm": 0.26102203130722046, "learning_rate": 9.971179861396084e-05, "loss": 0.0237, "step": 4950 }, { "action_loss": 0.040090661495923996, "epoch": 4.451438848920863, "step": 4950 }, { "epoch": 4.460431654676259, "grad_norm": 0.3784991502761841, "learning_rate": 9.970883647966003e-05, "loss": 0.0274, "step": 4960 }, { "action_loss": 0.039237845689058304, "epoch": 4.460431654676259, "step": 4960 }, { "epoch": 4.469424460431655, "grad_norm": 0.2537654638290405, "learning_rate": 9.970585924512717e-05, "loss": 0.0236, "step": 4970 }, { "action_loss": 0.014817747287452221, "epoch": 4.469424460431655, "step": 4970 }, { "epoch": 4.4784172661870505, "grad_norm": 0.3798190951347351, "learning_rate": 9.970286691126669e-05, "loss": 0.0213, "step": 4980 }, { "action_loss": 0.014359434135258198, "epoch": 4.4784172661870505, "step": 4980 }, { "epoch": 4.487410071942446, "grad_norm": 0.3307293951511383, "learning_rate": 9.969985947898756e-05, "loss": 0.0235, "step": 4990 }, { "action_loss": 0.04016547277569771, "epoch": 4.487410071942446, "step": 4990 }, { "epoch": 4.496402877697841, "grad_norm": 0.3072323203086853, "learning_rate": 9.969683694920337e-05, "loss": 0.0307, "step": 5000 }, { "action_loss": 0.043090593069791794, "epoch": 4.496402877697841, "step": 5000 }, { "epoch": 4.505395683453237, "grad_norm": 0.3567713797092438, "learning_rate": 9.969379932283228e-05, "loss": 0.0275, "step": 5010 }, { "action_loss": 0.016747593879699707, "epoch": 4.505395683453237, "step": 5010 }, { "epoch": 4.514388489208633, "grad_norm": 0.35104435682296753, "learning_rate": 9.969074660079704e-05, "loss": 0.0258, "step": 5020 }, { "action_loss": 0.04544441029429436, "epoch": 4.514388489208633, "step": 5020 }, { "epoch": 4.523381294964029, "grad_norm": 0.3142980933189392, "learning_rate": 9.968767878402501e-05, "loss": 0.0284, "step": 5030 }, { "action_loss": 0.016103215515613556, "epoch": 4.523381294964029, "step": 5030 }, { "epoch": 4.532374100719425, "grad_norm": 0.351368248462677, "learning_rate": 9.968459587344808e-05, "loss": 0.0213, "step": 5040 }, { "action_loss": 0.014422520995140076, "epoch": 4.532374100719425, "step": 5040 }, { "epoch": 4.5413669064748206, "grad_norm": 0.4075821340084076, "learning_rate": 9.968149787000278e-05, "loss": 0.0242, "step": 5050 }, { "action_loss": 0.009900768287479877, "epoch": 4.5413669064748206, "step": 5050 }, { "epoch": 4.5503597122302155, "grad_norm": 0.3434118926525116, "learning_rate": 9.967838477463018e-05, "loss": 0.0218, "step": 5060 }, { "action_loss": 0.028337059542536736, "epoch": 4.5503597122302155, "step": 5060 }, { "epoch": 4.559352517985611, "grad_norm": 0.3729136288166046, "learning_rate": 9.967525658827597e-05, "loss": 0.0263, "step": 5070 }, { "action_loss": 0.03343038633465767, "epoch": 4.559352517985611, "step": 5070 }, { "epoch": 4.568345323741007, "grad_norm": 0.40618979930877686, "learning_rate": 9.967211331189042e-05, "loss": 0.0317, "step": 5080 }, { "action_loss": 0.029543006792664528, "epoch": 4.568345323741007, "step": 5080 }, { "epoch": 4.577338129496403, "grad_norm": 0.31298118829727173, "learning_rate": 9.966895494642834e-05, "loss": 0.0311, "step": 5090 }, { "action_loss": 0.03569566085934639, "epoch": 4.577338129496403, "step": 5090 }, { "epoch": 4.586330935251799, "grad_norm": 0.3764690160751343, "learning_rate": 9.96657814928492e-05, "loss": 0.0312, "step": 5100 }, { "action_loss": 0.01774132251739502, "epoch": 4.586330935251799, "step": 5100 }, { "epoch": 4.595323741007194, "grad_norm": 0.27753910422325134, "learning_rate": 9.966259295211697e-05, "loss": 0.0269, "step": 5110 }, { "action_loss": 0.029032060876488686, "epoch": 4.595323741007194, "step": 5110 }, { "epoch": 4.60431654676259, "grad_norm": 0.2520783543586731, "learning_rate": 9.965938932520028e-05, "loss": 0.024, "step": 5120 }, { "action_loss": 0.04451432824134827, "epoch": 4.60431654676259, "step": 5120 }, { "epoch": 4.613309352517986, "grad_norm": 0.31757646799087524, "learning_rate": 9.965617061307229e-05, "loss": 0.0236, "step": 5130 }, { "action_loss": 0.03614239767193794, "epoch": 4.613309352517986, "step": 5130 }, { "epoch": 4.622302158273381, "grad_norm": 0.24238833785057068, "learning_rate": 9.965293681671077e-05, "loss": 0.0286, "step": 5140 }, { "action_loss": 0.06847670674324036, "epoch": 4.622302158273381, "step": 5140 }, { "epoch": 4.631294964028777, "grad_norm": 0.3294859826564789, "learning_rate": 9.964968793709804e-05, "loss": 0.0278, "step": 5150 }, { "action_loss": 0.020924469456076622, "epoch": 4.631294964028777, "step": 5150 }, { "epoch": 4.640287769784173, "grad_norm": 0.41062191128730774, "learning_rate": 9.964642397522106e-05, "loss": 0.0245, "step": 5160 }, { "action_loss": 0.03398771584033966, "epoch": 4.640287769784173, "step": 5160 }, { "epoch": 4.649280575539568, "grad_norm": 0.445384681224823, "learning_rate": 9.96431449320713e-05, "loss": 0.0251, "step": 5170 }, { "action_loss": 0.022715123370289803, "epoch": 4.649280575539568, "step": 5170 }, { "epoch": 4.658273381294964, "grad_norm": 0.30253687500953674, "learning_rate": 9.963985080864486e-05, "loss": 0.0277, "step": 5180 }, { "action_loss": 0.032288383692502975, "epoch": 4.658273381294964, "step": 5180 }, { "epoch": 4.66726618705036, "grad_norm": 0.25714609026908875, "learning_rate": 9.96365416059424e-05, "loss": 0.0251, "step": 5190 }, { "action_loss": 0.03475961461663246, "epoch": 4.66726618705036, "step": 5190 }, { "epoch": 4.676258992805756, "grad_norm": 0.38877126574516296, "learning_rate": 9.963321732496919e-05, "loss": 0.0263, "step": 5200 }, { "action_loss": 0.027735481038689613, "epoch": 4.676258992805756, "step": 5200 }, { "epoch": 4.685251798561151, "grad_norm": 0.616713285446167, "learning_rate": 9.962987796673506e-05, "loss": 0.0285, "step": 5210 }, { "action_loss": 0.019792115315794945, "epoch": 4.685251798561151, "step": 5210 }, { "epoch": 4.694244604316546, "grad_norm": 0.37211668491363525, "learning_rate": 9.962652353225438e-05, "loss": 0.0207, "step": 5220 }, { "action_loss": 0.017664708197116852, "epoch": 4.694244604316546, "step": 5220 }, { "epoch": 4.703237410071942, "grad_norm": 0.3874094486236572, "learning_rate": 9.962315402254619e-05, "loss": 0.0266, "step": 5230 }, { "action_loss": 0.022797783836722374, "epoch": 4.703237410071942, "step": 5230 }, { "epoch": 4.712230215827338, "grad_norm": 0.24886102974414825, "learning_rate": 9.9619769438634e-05, "loss": 0.0222, "step": 5240 }, { "action_loss": 0.024476030841469765, "epoch": 4.712230215827338, "step": 5240 }, { "epoch": 4.721223021582734, "grad_norm": 0.323323518037796, "learning_rate": 9.9616369781546e-05, "loss": 0.0204, "step": 5250 }, { "action_loss": 0.01104696560651064, "epoch": 4.721223021582734, "step": 5250 }, { "epoch": 4.73021582733813, "grad_norm": 0.34828707575798035, "learning_rate": 9.961295505231491e-05, "loss": 0.0253, "step": 5260 }, { "action_loss": 0.02304929681122303, "epoch": 4.73021582733813, "step": 5260 }, { "epoch": 4.739208633093525, "grad_norm": 0.2568013072013855, "learning_rate": 9.960952525197804e-05, "loss": 0.0232, "step": 5270 }, { "action_loss": 0.00944867730140686, "epoch": 4.739208633093525, "step": 5270 }, { "epoch": 4.748201438848921, "grad_norm": 0.26070430874824524, "learning_rate": 9.960608038157724e-05, "loss": 0.0177, "step": 5280 }, { "action_loss": 0.028322294354438782, "epoch": 4.748201438848921, "step": 5280 }, { "epoch": 4.7571942446043165, "grad_norm": 0.29231885075569153, "learning_rate": 9.960262044215901e-05, "loss": 0.0223, "step": 5290 }, { "action_loss": 0.03455144539475441, "epoch": 4.7571942446043165, "step": 5290 }, { "epoch": 4.766187050359712, "grad_norm": 0.28578993678092957, "learning_rate": 9.959914543477435e-05, "loss": 0.0305, "step": 5300 }, { "action_loss": 0.07603153586387634, "epoch": 4.766187050359712, "step": 5300 }, { "epoch": 4.775179856115108, "grad_norm": 0.41313251852989197, "learning_rate": 9.959565536047892e-05, "loss": 0.0376, "step": 5310 }, { "action_loss": 0.013117033056914806, "epoch": 4.775179856115108, "step": 5310 }, { "epoch": 4.784172661870503, "grad_norm": 0.26269838213920593, "learning_rate": 9.959215022033288e-05, "loss": 0.0237, "step": 5320 }, { "action_loss": 0.02597043476998806, "epoch": 4.784172661870503, "step": 5320 }, { "epoch": 4.793165467625899, "grad_norm": 0.23776237666606903, "learning_rate": 9.9588630015401e-05, "loss": 0.0273, "step": 5330 }, { "action_loss": 0.024512723088264465, "epoch": 4.793165467625899, "step": 5330 }, { "epoch": 4.802158273381295, "grad_norm": 0.28810831904411316, "learning_rate": 9.958509474675264e-05, "loss": 0.0206, "step": 5340 }, { "action_loss": 0.015724128112196922, "epoch": 4.802158273381295, "step": 5340 }, { "epoch": 4.811151079136691, "grad_norm": 0.21297064423561096, "learning_rate": 9.958154441546171e-05, "loss": 0.0311, "step": 5350 }, { "action_loss": 0.02094857580959797, "epoch": 4.811151079136691, "step": 5350 }, { "epoch": 4.820143884892087, "grad_norm": 0.3140030801296234, "learning_rate": 9.957797902260673e-05, "loss": 0.0265, "step": 5360 }, { "action_loss": 0.03181641921401024, "epoch": 4.820143884892087, "step": 5360 }, { "epoch": 4.829136690647482, "grad_norm": 0.3140665590763092, "learning_rate": 9.957439856927073e-05, "loss": 0.0193, "step": 5370 }, { "action_loss": 0.035071659833192825, "epoch": 4.829136690647482, "step": 5370 }, { "epoch": 4.838129496402877, "grad_norm": 0.2850104868412018, "learning_rate": 9.957080305654139e-05, "loss": 0.0295, "step": 5380 }, { "action_loss": 0.02300841547548771, "epoch": 4.838129496402877, "step": 5380 }, { "epoch": 4.847122302158273, "grad_norm": 0.23641403019428253, "learning_rate": 9.956719248551092e-05, "loss": 0.0265, "step": 5390 }, { "action_loss": 0.0094201834872365, "epoch": 4.847122302158273, "step": 5390 }, { "epoch": 4.856115107913669, "grad_norm": 0.41441959142684937, "learning_rate": 9.956356685727612e-05, "loss": 0.0228, "step": 5400 }, { "action_loss": 0.01888631097972393, "epoch": 4.856115107913669, "step": 5400 }, { "epoch": 4.865107913669065, "grad_norm": 0.39588138461112976, "learning_rate": 9.955992617293836e-05, "loss": 0.023, "step": 5410 }, { "action_loss": 0.014529201202094555, "epoch": 4.865107913669065, "step": 5410 }, { "epoch": 4.874100719424461, "grad_norm": 0.3372920751571655, "learning_rate": 9.955627043360358e-05, "loss": 0.0274, "step": 5420 }, { "action_loss": 0.018793301656842232, "epoch": 4.874100719424461, "step": 5420 }, { "epoch": 4.883093525179856, "grad_norm": 0.33297231793403625, "learning_rate": 9.955259964038231e-05, "loss": 0.026, "step": 5430 }, { "action_loss": 0.010988324880599976, "epoch": 4.883093525179856, "step": 5430 }, { "epoch": 4.892086330935252, "grad_norm": 0.4349965751171112, "learning_rate": 9.954891379438962e-05, "loss": 0.0287, "step": 5440 }, { "action_loss": 0.027994418516755104, "epoch": 4.892086330935252, "step": 5440 }, { "epoch": 4.901079136690647, "grad_norm": 0.3073200583457947, "learning_rate": 9.954521289674519e-05, "loss": 0.0319, "step": 5450 }, { "action_loss": 0.025447459891438484, "epoch": 4.901079136690647, "step": 5450 }, { "epoch": 4.910071942446043, "grad_norm": 0.34126517176628113, "learning_rate": 9.954149694857325e-05, "loss": 0.0261, "step": 5460 }, { "action_loss": 0.0201526191085577, "epoch": 4.910071942446043, "step": 5460 }, { "epoch": 4.919064748201439, "grad_norm": 0.40914055705070496, "learning_rate": 9.953776595100258e-05, "loss": 0.032, "step": 5470 }, { "action_loss": 0.030321165919303894, "epoch": 4.919064748201439, "step": 5470 }, { "epoch": 4.928057553956835, "grad_norm": 0.30689045786857605, "learning_rate": 9.95340199051666e-05, "loss": 0.0222, "step": 5480 }, { "action_loss": 0.02058035507798195, "epoch": 4.928057553956835, "step": 5480 }, { "epoch": 4.93705035971223, "grad_norm": 0.2708892226219177, "learning_rate": 9.953025881220325e-05, "loss": 0.0215, "step": 5490 }, { "action_loss": 0.01394437626004219, "epoch": 4.93705035971223, "step": 5490 }, { "epoch": 4.946043165467626, "grad_norm": 0.33026546239852905, "learning_rate": 9.952648267325504e-05, "loss": 0.0201, "step": 5500 }, { "action_loss": 0.04126463830471039, "epoch": 4.946043165467626, "step": 5500 }, { "epoch": 4.955035971223022, "grad_norm": 0.3559112548828125, "learning_rate": 9.952269148946905e-05, "loss": 0.0248, "step": 5510 }, { "action_loss": 0.013420730829238892, "epoch": 4.955035971223022, "step": 5510 }, { "epoch": 4.9640287769784175, "grad_norm": 0.28709161281585693, "learning_rate": 9.951888526199697e-05, "loss": 0.023, "step": 5520 }, { "action_loss": 0.019362663850188255, "epoch": 4.9640287769784175, "step": 5520 }, { "epoch": 4.9730215827338125, "grad_norm": 0.2631833255290985, "learning_rate": 9.951506399199501e-05, "loss": 0.0246, "step": 5530 }, { "action_loss": 0.011975296773016453, "epoch": 4.9730215827338125, "step": 5530 }, { "epoch": 4.982014388489208, "grad_norm": 0.34168514609336853, "learning_rate": 9.951122768062399e-05, "loss": 0.0216, "step": 5540 }, { "action_loss": 0.01707594282925129, "epoch": 4.982014388489208, "step": 5540 }, { "epoch": 4.991007194244604, "grad_norm": 0.24286334216594696, "learning_rate": 9.950737632904927e-05, "loss": 0.0234, "step": 5550 }, { "action_loss": 0.01335492730140686, "epoch": 4.991007194244604, "step": 5550 }, { "epoch": 5.0, "grad_norm": 0.2563371956348419, "learning_rate": 9.950350993844077e-05, "loss": 0.0232, "step": 5560 }, { "action_loss": 0.027163363993167877, "epoch": 5.0, "step": 5560 }, { "epoch": 5.008992805755396, "grad_norm": 0.3239654004573822, "learning_rate": 9.949962850997303e-05, "loss": 0.0239, "step": 5570 }, { "action_loss": 0.010373239405453205, "epoch": 5.008992805755396, "step": 5570 }, { "epoch": 5.017985611510792, "grad_norm": 0.3309156894683838, "learning_rate": 9.949573204482512e-05, "loss": 0.0206, "step": 5580 }, { "action_loss": 0.024982990697026253, "epoch": 5.017985611510792, "step": 5580 }, { "epoch": 5.026978417266187, "grad_norm": 0.2785381078720093, "learning_rate": 9.949182054418064e-05, "loss": 0.0206, "step": 5590 }, { "action_loss": 0.038673821836709976, "epoch": 5.026978417266187, "step": 5590 }, { "epoch": 5.0359712230215825, "grad_norm": 0.230377197265625, "learning_rate": 9.948789400922787e-05, "loss": 0.0249, "step": 5600 }, { "action_loss": 0.03397976979613304, "epoch": 5.0359712230215825, "step": 5600 }, { "epoch": 5.044964028776978, "grad_norm": 0.2904433608055115, "learning_rate": 9.948395244115953e-05, "loss": 0.0255, "step": 5610 }, { "action_loss": 0.028655685484409332, "epoch": 5.044964028776978, "step": 5610 }, { "epoch": 5.053956834532374, "grad_norm": 0.29690679907798767, "learning_rate": 9.9479995841173e-05, "loss": 0.0257, "step": 5620 }, { "action_loss": 0.023609668016433716, "epoch": 5.053956834532374, "step": 5620 }, { "epoch": 5.06294964028777, "grad_norm": 0.2888384461402893, "learning_rate": 9.947602421047017e-05, "loss": 0.0171, "step": 5630 }, { "action_loss": 0.021924100816249847, "epoch": 5.06294964028777, "step": 5630 }, { "epoch": 5.071942446043165, "grad_norm": 0.39031797647476196, "learning_rate": 9.947203755025753e-05, "loss": 0.024, "step": 5640 }, { "action_loss": 0.029272034764289856, "epoch": 5.071942446043165, "step": 5640 }, { "epoch": 5.080935251798561, "grad_norm": 0.3558548390865326, "learning_rate": 9.946803586174611e-05, "loss": 0.0234, "step": 5650 }, { "action_loss": 0.030739352107048035, "epoch": 5.080935251798561, "step": 5650 }, { "epoch": 5.089928057553957, "grad_norm": 0.2957412600517273, "learning_rate": 9.946401914615151e-05, "loss": 0.0254, "step": 5660 }, { "action_loss": 0.021807851269841194, "epoch": 5.089928057553957, "step": 5660 }, { "epoch": 5.098920863309353, "grad_norm": 0.29546797275543213, "learning_rate": 9.945998740469394e-05, "loss": 0.0187, "step": 5670 }, { "action_loss": 0.06505932658910751, "epoch": 5.098920863309353, "step": 5670 }, { "epoch": 5.107913669064748, "grad_norm": 0.3076685965061188, "learning_rate": 9.945594063859809e-05, "loss": 0.0283, "step": 5680 }, { "action_loss": 0.008646477945148945, "epoch": 5.107913669064748, "step": 5680 }, { "epoch": 5.116906474820144, "grad_norm": 0.36809074878692627, "learning_rate": 9.94518788490933e-05, "loss": 0.0217, "step": 5690 }, { "action_loss": 0.049372587352991104, "epoch": 5.116906474820144, "step": 5690 }, { "epoch": 5.125899280575539, "grad_norm": 0.283243864774704, "learning_rate": 9.944780203741341e-05, "loss": 0.0282, "step": 5700 }, { "action_loss": 0.017598090693354607, "epoch": 5.125899280575539, "step": 5700 }, { "epoch": 5.134892086330935, "grad_norm": 0.2421446293592453, "learning_rate": 9.944371020479686e-05, "loss": 0.0233, "step": 5710 }, { "action_loss": 0.020996399223804474, "epoch": 5.134892086330935, "step": 5710 }, { "epoch": 5.143884892086331, "grad_norm": 0.33596745133399963, "learning_rate": 9.943960335248662e-05, "loss": 0.0207, "step": 5720 }, { "action_loss": 0.012293103151023388, "epoch": 5.143884892086331, "step": 5720 }, { "epoch": 5.152877697841727, "grad_norm": 0.30658337473869324, "learning_rate": 9.943548148173027e-05, "loss": 0.0233, "step": 5730 }, { "action_loss": 0.01500836480408907, "epoch": 5.152877697841727, "step": 5730 }, { "epoch": 5.161870503597123, "grad_norm": 0.21959631145000458, "learning_rate": 9.943134459377992e-05, "loss": 0.018, "step": 5740 }, { "action_loss": 0.015616918914020061, "epoch": 5.161870503597123, "step": 5740 }, { "epoch": 5.170863309352518, "grad_norm": 0.3545985519886017, "learning_rate": 9.942719268989222e-05, "loss": 0.0267, "step": 5750 }, { "action_loss": 0.011574986390769482, "epoch": 5.170863309352518, "step": 5750 }, { "epoch": 5.179856115107913, "grad_norm": 0.28039321303367615, "learning_rate": 9.942302577132844e-05, "loss": 0.0209, "step": 5760 }, { "action_loss": 0.02097093127667904, "epoch": 5.179856115107913, "step": 5760 }, { "epoch": 5.188848920863309, "grad_norm": 0.28143665194511414, "learning_rate": 9.941884383935438e-05, "loss": 0.0189, "step": 5770 }, { "action_loss": 0.045225437730550766, "epoch": 5.188848920863309, "step": 5770 }, { "epoch": 5.197841726618705, "grad_norm": 0.2527715563774109, "learning_rate": 9.941464689524039e-05, "loss": 0.0238, "step": 5780 }, { "action_loss": 0.016556909307837486, "epoch": 5.197841726618705, "step": 5780 }, { "epoch": 5.206834532374101, "grad_norm": 0.44966384768486023, "learning_rate": 9.941043494026139e-05, "loss": 0.0268, "step": 5790 }, { "action_loss": 0.031083764508366585, "epoch": 5.206834532374101, "step": 5790 }, { "epoch": 5.215827338129497, "grad_norm": 0.33609887957572937, "learning_rate": 9.940620797569685e-05, "loss": 0.0218, "step": 5800 }, { "action_loss": 0.025287242606282234, "epoch": 5.215827338129497, "step": 5800 }, { "epoch": 5.224820143884892, "grad_norm": 0.36715584993362427, "learning_rate": 9.940196600283082e-05, "loss": 0.034, "step": 5810 }, { "action_loss": 0.019716376438736916, "epoch": 5.224820143884892, "step": 5810 }, { "epoch": 5.233812949640288, "grad_norm": 0.35699567198753357, "learning_rate": 9.939770902295192e-05, "loss": 0.0313, "step": 5820 }, { "action_loss": 0.012856967747211456, "epoch": 5.233812949640288, "step": 5820 }, { "epoch": 5.2428057553956835, "grad_norm": 0.2896405756473541, "learning_rate": 9.939343703735329e-05, "loss": 0.0273, "step": 5830 }, { "action_loss": 0.017900701612234116, "epoch": 5.2428057553956835, "step": 5830 }, { "epoch": 5.251798561151079, "grad_norm": 0.4234370291233063, "learning_rate": 9.938915004733264e-05, "loss": 0.0222, "step": 5840 }, { "action_loss": 0.022063681855797768, "epoch": 5.251798561151079, "step": 5840 }, { "epoch": 5.260791366906475, "grad_norm": 0.20995420217514038, "learning_rate": 9.938484805419224e-05, "loss": 0.0243, "step": 5850 }, { "action_loss": 0.0197770819067955, "epoch": 5.260791366906475, "step": 5850 }, { "epoch": 5.26978417266187, "grad_norm": 0.25802528858184814, "learning_rate": 9.938053105923894e-05, "loss": 0.0213, "step": 5860 }, { "action_loss": 0.009461917914450169, "epoch": 5.26978417266187, "step": 5860 }, { "epoch": 5.278776978417266, "grad_norm": 0.30027246475219727, "learning_rate": 9.937619906378413e-05, "loss": 0.0194, "step": 5870 }, { "action_loss": 0.013573504984378815, "epoch": 5.278776978417266, "step": 5870 }, { "epoch": 5.287769784172662, "grad_norm": 0.20208975672721863, "learning_rate": 9.937185206914374e-05, "loss": 0.0259, "step": 5880 }, { "action_loss": 0.036222513765096664, "epoch": 5.287769784172662, "step": 5880 }, { "epoch": 5.296762589928058, "grad_norm": 0.3093419373035431, "learning_rate": 9.936749007663829e-05, "loss": 0.0203, "step": 5890 }, { "action_loss": 0.01623586006462574, "epoch": 5.296762589928058, "step": 5890 }, { "epoch": 5.305755395683454, "grad_norm": 0.23794186115264893, "learning_rate": 9.93631130875928e-05, "loss": 0.0213, "step": 5900 }, { "action_loss": 0.043483536690473557, "epoch": 5.305755395683454, "step": 5900 }, { "epoch": 5.3147482014388485, "grad_norm": 0.29831525683403015, "learning_rate": 9.935872110333692e-05, "loss": 0.0289, "step": 5910 }, { "action_loss": 0.026459192857146263, "epoch": 5.3147482014388485, "step": 5910 }, { "epoch": 5.323741007194244, "grad_norm": 0.3436129689216614, "learning_rate": 9.935431412520484e-05, "loss": 0.0262, "step": 5920 }, { "action_loss": 0.012056916020810604, "epoch": 5.323741007194244, "step": 5920 }, { "epoch": 5.33273381294964, "grad_norm": 0.2472950965166092, "learning_rate": 9.934989215453523e-05, "loss": 0.0248, "step": 5930 }, { "action_loss": 0.023303547874093056, "epoch": 5.33273381294964, "step": 5930 }, { "epoch": 5.341726618705036, "grad_norm": 0.25868847966194153, "learning_rate": 9.934545519267139e-05, "loss": 0.0276, "step": 5940 }, { "action_loss": 0.019944779574871063, "epoch": 5.341726618705036, "step": 5940 }, { "epoch": 5.350719424460432, "grad_norm": 0.29980236291885376, "learning_rate": 9.934100324096117e-05, "loss": 0.022, "step": 5950 }, { "action_loss": 0.02083607204258442, "epoch": 5.350719424460432, "step": 5950 }, { "epoch": 5.359712230215827, "grad_norm": 0.2834591865539551, "learning_rate": 9.933653630075692e-05, "loss": 0.02, "step": 5960 }, { "action_loss": 0.032648392021656036, "epoch": 5.359712230215827, "step": 5960 }, { "epoch": 5.368705035971223, "grad_norm": 0.2602221667766571, "learning_rate": 9.93320543734156e-05, "loss": 0.0199, "step": 5970 }, { "action_loss": 0.010791163891553879, "epoch": 5.368705035971223, "step": 5970 }, { "epoch": 5.377697841726619, "grad_norm": 0.25319817662239075, "learning_rate": 9.932755746029871e-05, "loss": 0.0162, "step": 5980 }, { "action_loss": 0.004882641602307558, "epoch": 5.377697841726619, "step": 5980 }, { "epoch": 5.386690647482014, "grad_norm": 0.2691398859024048, "learning_rate": 9.932304556277228e-05, "loss": 0.0244, "step": 5990 }, { "action_loss": 0.0197692159563303, "epoch": 5.386690647482014, "step": 5990 }, { "epoch": 5.39568345323741, "grad_norm": 0.3221348524093628, "learning_rate": 9.93185186822069e-05, "loss": 0.0286, "step": 6000 }, { "action_loss": 0.032617464661598206, "epoch": 5.39568345323741, "step": 6000 }, { "epoch": 5.404676258992806, "grad_norm": 0.2821117341518402, "learning_rate": 9.931397681997773e-05, "loss": 0.0234, "step": 6010 }, { "action_loss": 0.010832064785063267, "epoch": 5.404676258992806, "step": 6010 }, { "epoch": 5.413669064748201, "grad_norm": 0.3076544404029846, "learning_rate": 9.930941997746446e-05, "loss": 0.0212, "step": 6020 }, { "action_loss": 0.023929575458168983, "epoch": 5.413669064748201, "step": 6020 }, { "epoch": 5.422661870503597, "grad_norm": 0.28535735607147217, "learning_rate": 9.930484815605134e-05, "loss": 0.0236, "step": 6030 }, { "action_loss": 0.009751994162797928, "epoch": 5.422661870503597, "step": 6030 }, { "epoch": 5.431654676258993, "grad_norm": 0.2701755464076996, "learning_rate": 9.930026135712717e-05, "loss": 0.0271, "step": 6040 }, { "action_loss": 0.013032264076173306, "epoch": 5.431654676258993, "step": 6040 }, { "epoch": 5.440647482014389, "grad_norm": 0.22849522531032562, "learning_rate": 9.92956595820853e-05, "loss": 0.022, "step": 6050 }, { "action_loss": 0.008632118813693523, "epoch": 5.440647482014389, "step": 6050 }, { "epoch": 5.4496402877697845, "grad_norm": 0.3118283450603485, "learning_rate": 9.929104283232362e-05, "loss": 0.0188, "step": 6060 }, { "action_loss": 0.041184842586517334, "epoch": 5.4496402877697845, "step": 6060 }, { "epoch": 5.4586330935251794, "grad_norm": 0.32384634017944336, "learning_rate": 9.92864111092446e-05, "loss": 0.0222, "step": 6070 }, { "action_loss": 0.010606788098812103, "epoch": 5.4586330935251794, "step": 6070 }, { "epoch": 5.467625899280575, "grad_norm": 0.25588229298591614, "learning_rate": 9.92817644142552e-05, "loss": 0.0177, "step": 6080 }, { "action_loss": 0.01856367476284504, "epoch": 5.467625899280575, "step": 6080 }, { "epoch": 5.476618705035971, "grad_norm": 0.4536917805671692, "learning_rate": 9.927710274876698e-05, "loss": 0.0263, "step": 6090 }, { "action_loss": 0.024468334391713142, "epoch": 5.476618705035971, "step": 6090 }, { "epoch": 5.485611510791367, "grad_norm": 0.2564857602119446, "learning_rate": 9.927242611419603e-05, "loss": 0.0247, "step": 6100 }, { "action_loss": 0.030964696779847145, "epoch": 5.485611510791367, "step": 6100 }, { "epoch": 5.494604316546763, "grad_norm": 0.37238016724586487, "learning_rate": 9.926773451196301e-05, "loss": 0.0287, "step": 6110 }, { "action_loss": 0.014985357411205769, "epoch": 5.494604316546763, "step": 6110 }, { "epoch": 5.503597122302159, "grad_norm": 0.2758253216743469, "learning_rate": 9.926302794349306e-05, "loss": 0.0213, "step": 6120 }, { "action_loss": 0.025090830400586128, "epoch": 5.503597122302159, "step": 6120 }, { "epoch": 5.512589928057554, "grad_norm": 0.39346837997436523, "learning_rate": 9.925830641021594e-05, "loss": 0.0309, "step": 6130 }, { "action_loss": 0.04743014648556709, "epoch": 5.512589928057554, "step": 6130 }, { "epoch": 5.5215827338129495, "grad_norm": 0.34027761220932007, "learning_rate": 9.925356991356593e-05, "loss": 0.0242, "step": 6140 }, { "action_loss": 0.06913794577121735, "epoch": 5.5215827338129495, "step": 6140 }, { "epoch": 5.530575539568345, "grad_norm": 0.3092919886112213, "learning_rate": 9.924881845498184e-05, "loss": 0.0229, "step": 6150 }, { "action_loss": 0.018764933571219444, "epoch": 5.530575539568345, "step": 6150 }, { "epoch": 5.539568345323741, "grad_norm": 0.2506648898124695, "learning_rate": 9.924405203590705e-05, "loss": 0.0233, "step": 6160 }, { "action_loss": 0.026662558317184448, "epoch": 5.539568345323741, "step": 6160 }, { "epoch": 5.548561151079137, "grad_norm": 0.27410635352134705, "learning_rate": 9.923927065778946e-05, "loss": 0.0237, "step": 6170 }, { "action_loss": 0.011806663125753403, "epoch": 5.548561151079137, "step": 6170 }, { "epoch": 5.557553956834532, "grad_norm": 0.3871821463108063, "learning_rate": 9.923447432208154e-05, "loss": 0.0248, "step": 6180 }, { "action_loss": 0.017753256484866142, "epoch": 5.557553956834532, "step": 6180 }, { "epoch": 5.566546762589928, "grad_norm": 0.3457862138748169, "learning_rate": 9.922966303024027e-05, "loss": 0.0226, "step": 6190 }, { "action_loss": 0.021887129172682762, "epoch": 5.566546762589928, "step": 6190 }, { "epoch": 5.575539568345324, "grad_norm": 0.26885175704956055, "learning_rate": 9.922483678372721e-05, "loss": 0.029, "step": 6200 }, { "action_loss": 0.040092889219522476, "epoch": 5.575539568345324, "step": 6200 }, { "epoch": 5.58453237410072, "grad_norm": 0.38036850094795227, "learning_rate": 9.921999558400845e-05, "loss": 0.0248, "step": 6210 }, { "action_loss": 0.0228651762008667, "epoch": 5.58453237410072, "step": 6210 }, { "epoch": 5.593525179856115, "grad_norm": 0.3595182001590729, "learning_rate": 9.92151394325546e-05, "loss": 0.0317, "step": 6220 }, { "action_loss": 0.014207729138433933, "epoch": 5.593525179856115, "step": 6220 }, { "epoch": 5.602517985611511, "grad_norm": 0.4633069336414337, "learning_rate": 9.921026833084084e-05, "loss": 0.0221, "step": 6230 }, { "action_loss": 0.021882519125938416, "epoch": 5.602517985611511, "step": 6230 }, { "epoch": 5.611510791366906, "grad_norm": 0.19384166598320007, "learning_rate": 9.920538228034689e-05, "loss": 0.0225, "step": 6240 }, { "action_loss": 0.03229869529604912, "epoch": 5.611510791366906, "step": 6240 }, { "epoch": 5.620503597122302, "grad_norm": 0.22967325150966644, "learning_rate": 9.920048128255699e-05, "loss": 0.0242, "step": 6250 }, { "action_loss": 0.0196670088917017, "epoch": 5.620503597122302, "step": 6250 }, { "epoch": 5.629496402877698, "grad_norm": 0.35668352246284485, "learning_rate": 9.919556533895995e-05, "loss": 0.0279, "step": 6260 }, { "action_loss": 0.028513386845588684, "epoch": 5.629496402877698, "step": 6260 }, { "epoch": 5.638489208633094, "grad_norm": 0.34619325399398804, "learning_rate": 9.919063445104907e-05, "loss": 0.026, "step": 6270 }, { "action_loss": 0.01851581037044525, "epoch": 5.638489208633094, "step": 6270 }, { "epoch": 5.647482014388489, "grad_norm": 0.49450090527534485, "learning_rate": 9.918568862032227e-05, "loss": 0.0251, "step": 6280 }, { "action_loss": 0.03881227970123291, "epoch": 5.647482014388489, "step": 6280 }, { "epoch": 5.656474820143885, "grad_norm": 0.4213472902774811, "learning_rate": 9.918072784828194e-05, "loss": 0.0225, "step": 6290 }, { "action_loss": 0.03689119219779968, "epoch": 5.656474820143885, "step": 6290 }, { "epoch": 5.66546762589928, "grad_norm": 0.343893826007843, "learning_rate": 9.917575213643501e-05, "loss": 0.0267, "step": 6300 }, { "action_loss": 0.021592533215880394, "epoch": 5.66546762589928, "step": 6300 }, { "epoch": 5.674460431654676, "grad_norm": 0.29965564608573914, "learning_rate": 9.917076148629302e-05, "loss": 0.0204, "step": 6310 }, { "action_loss": 0.01544238906353712, "epoch": 5.674460431654676, "step": 6310 }, { "epoch": 5.683453237410072, "grad_norm": 0.4010874330997467, "learning_rate": 9.916575589937196e-05, "loss": 0.0262, "step": 6320 }, { "action_loss": 0.02700967527925968, "epoch": 5.683453237410072, "step": 6320 }, { "epoch": 5.692446043165468, "grad_norm": 0.40871432423591614, "learning_rate": 9.916073537719239e-05, "loss": 0.0248, "step": 6330 }, { "action_loss": 0.017840635031461716, "epoch": 5.692446043165468, "step": 6330 }, { "epoch": 5.701438848920863, "grad_norm": 0.26491454243659973, "learning_rate": 9.915569992127944e-05, "loss": 0.0241, "step": 6340 }, { "action_loss": 0.017726348713040352, "epoch": 5.701438848920863, "step": 6340 }, { "epoch": 5.710431654676259, "grad_norm": 0.28457701206207275, "learning_rate": 9.915064953316273e-05, "loss": 0.0214, "step": 6350 }, { "action_loss": 0.010474559850990772, "epoch": 5.710431654676259, "step": 6350 }, { "epoch": 5.719424460431655, "grad_norm": 0.2591615915298462, "learning_rate": 9.914558421437645e-05, "loss": 0.0208, "step": 6360 }, { "action_loss": 0.01602018065750599, "epoch": 5.719424460431655, "step": 6360 }, { "epoch": 5.7284172661870505, "grad_norm": 0.30492478609085083, "learning_rate": 9.914050396645929e-05, "loss": 0.0231, "step": 6370 }, { "action_loss": 0.026731377467513084, "epoch": 5.7284172661870505, "step": 6370 }, { "epoch": 5.737410071942446, "grad_norm": 0.4478059411048889, "learning_rate": 9.913540879095452e-05, "loss": 0.026, "step": 6380 }, { "action_loss": 0.04380841553211212, "epoch": 5.737410071942446, "step": 6380 }, { "epoch": 5.746402877697841, "grad_norm": 0.2927929759025574, "learning_rate": 9.913029868940987e-05, "loss": 0.0245, "step": 6390 }, { "action_loss": 0.023691745474934578, "epoch": 5.746402877697841, "step": 6390 }, { "epoch": 5.755395683453237, "grad_norm": 0.32243412733078003, "learning_rate": 9.912517366337772e-05, "loss": 0.0233, "step": 6400 }, { "action_loss": 0.023756960406899452, "epoch": 5.755395683453237, "step": 6400 }, { "epoch": 5.764388489208633, "grad_norm": 0.30569541454315186, "learning_rate": 9.912003371441487e-05, "loss": 0.0261, "step": 6410 }, { "action_loss": 0.02709302492439747, "epoch": 5.764388489208633, "step": 6410 }, { "epoch": 5.773381294964029, "grad_norm": 0.25805872678756714, "learning_rate": 9.911487884408271e-05, "loss": 0.0224, "step": 6420 }, { "action_loss": 0.009675618261098862, "epoch": 5.773381294964029, "step": 6420 }, { "epoch": 5.782374100719425, "grad_norm": 0.4004889726638794, "learning_rate": 9.910970905394719e-05, "loss": 0.0218, "step": 6430 }, { "action_loss": 0.014815590344369411, "epoch": 5.782374100719425, "step": 6430 }, { "epoch": 5.7913669064748206, "grad_norm": 0.43229418992996216, "learning_rate": 9.91045243455787e-05, "loss": 0.0196, "step": 6440 }, { "action_loss": 0.016799382865428925, "epoch": 5.7913669064748206, "step": 6440 }, { "epoch": 5.8003597122302155, "grad_norm": 0.25871285796165466, "learning_rate": 9.909932472055225e-05, "loss": 0.0324, "step": 6450 }, { "action_loss": 0.016483532264828682, "epoch": 5.8003597122302155, "step": 6450 }, { "epoch": 5.809352517985611, "grad_norm": 0.305081307888031, "learning_rate": 9.909411018044734e-05, "loss": 0.0211, "step": 6460 }, { "action_loss": 0.030594585463404655, "epoch": 5.809352517985611, "step": 6460 }, { "epoch": 5.818345323741007, "grad_norm": 0.3675025999546051, "learning_rate": 9.908888072684802e-05, "loss": 0.0273, "step": 6470 }, { "action_loss": 0.03044133447110653, "epoch": 5.818345323741007, "step": 6470 }, { "epoch": 5.827338129496403, "grad_norm": 0.33498746156692505, "learning_rate": 9.908363636134285e-05, "loss": 0.021, "step": 6480 }, { "action_loss": 0.026931369677186012, "epoch": 5.827338129496403, "step": 6480 }, { "epoch": 5.836330935251799, "grad_norm": 0.3314896821975708, "learning_rate": 9.907837708552493e-05, "loss": 0.0195, "step": 6490 }, { "action_loss": 0.02005992829799652, "epoch": 5.836330935251799, "step": 6490 }, { "epoch": 5.845323741007194, "grad_norm": 0.2945694327354431, "learning_rate": 9.90731029009919e-05, "loss": 0.0217, "step": 6500 }, { "action_loss": 0.012496884912252426, "epoch": 5.845323741007194, "step": 6500 }, { "epoch": 5.85431654676259, "grad_norm": 0.3818848431110382, "learning_rate": 9.906781380934589e-05, "loss": 0.0313, "step": 6510 }, { "action_loss": 0.03491685166954994, "epoch": 5.85431654676259, "step": 6510 }, { "epoch": 5.863309352517986, "grad_norm": 0.3203330934047699, "learning_rate": 9.906250981219362e-05, "loss": 0.0241, "step": 6520 }, { "action_loss": 0.011835427023470402, "epoch": 5.863309352517986, "step": 6520 }, { "epoch": 5.872302158273381, "grad_norm": 0.2686821222305298, "learning_rate": 9.905719091114628e-05, "loss": 0.0205, "step": 6530 }, { "action_loss": 0.01895216293632984, "epoch": 5.872302158273381, "step": 6530 }, { "epoch": 5.881294964028777, "grad_norm": 0.2751632630825043, "learning_rate": 9.905185710781964e-05, "loss": 0.024, "step": 6540 }, { "action_loss": 0.018423331901431084, "epoch": 5.881294964028777, "step": 6540 }, { "epoch": 5.890287769784173, "grad_norm": 0.328121542930603, "learning_rate": 9.904650840383392e-05, "loss": 0.0251, "step": 6550 }, { "action_loss": 0.030101656913757324, "epoch": 5.890287769784173, "step": 6550 }, { "epoch": 5.899280575539568, "grad_norm": 0.31350478529930115, "learning_rate": 9.904114480081397e-05, "loss": 0.0218, "step": 6560 }, { "action_loss": 0.020221559330821037, "epoch": 5.899280575539568, "step": 6560 }, { "epoch": 5.908273381294964, "grad_norm": 0.34276047348976135, "learning_rate": 9.903576630038906e-05, "loss": 0.0315, "step": 6570 }, { "action_loss": 0.03022821992635727, "epoch": 5.908273381294964, "step": 6570 }, { "epoch": 5.91726618705036, "grad_norm": 0.3026847839355469, "learning_rate": 9.903037290419309e-05, "loss": 0.0212, "step": 6580 }, { "action_loss": 0.01796599291265011, "epoch": 5.91726618705036, "step": 6580 }, { "epoch": 5.926258992805756, "grad_norm": 0.2856846749782562, "learning_rate": 9.902496461386439e-05, "loss": 0.023, "step": 6590 }, { "action_loss": 0.013438313268125057, "epoch": 5.926258992805756, "step": 6590 }, { "epoch": 5.935251798561151, "grad_norm": 0.3276565968990326, "learning_rate": 9.901954143104588e-05, "loss": 0.0217, "step": 6600 }, { "action_loss": 0.014264971017837524, "epoch": 5.935251798561151, "step": 6600 }, { "epoch": 5.944244604316546, "grad_norm": 0.37891119718551636, "learning_rate": 9.901410335738496e-05, "loss": 0.0177, "step": 6610 }, { "action_loss": 0.031192606315016747, "epoch": 5.944244604316546, "step": 6610 }, { "epoch": 5.953237410071942, "grad_norm": 0.34132033586502075, "learning_rate": 9.900865039453358e-05, "loss": 0.0239, "step": 6620 }, { "action_loss": 0.012119499035179615, "epoch": 5.953237410071942, "step": 6620 }, { "epoch": 5.962230215827338, "grad_norm": 0.3534640371799469, "learning_rate": 9.900318254414821e-05, "loss": 0.0233, "step": 6630 }, { "action_loss": 0.033722516149282455, "epoch": 5.962230215827338, "step": 6630 }, { "epoch": 5.971223021582734, "grad_norm": 0.32759276032447815, "learning_rate": 9.899769980788985e-05, "loss": 0.0198, "step": 6640 }, { "action_loss": 0.012425611726939678, "epoch": 5.971223021582734, "step": 6640 }, { "epoch": 5.98021582733813, "grad_norm": 0.3019165098667145, "learning_rate": 9.899220218742398e-05, "loss": 0.02, "step": 6650 }, { "action_loss": 0.014287900179624557, "epoch": 5.98021582733813, "step": 6650 }, { "epoch": 5.989208633093525, "grad_norm": 0.35920071601867676, "learning_rate": 9.898668968442066e-05, "loss": 0.0279, "step": 6660 }, { "action_loss": 0.03595930337905884, "epoch": 5.989208633093525, "step": 6660 }, { "epoch": 5.998201438848921, "grad_norm": 0.24270375072956085, "learning_rate": 9.898116230055443e-05, "loss": 0.0225, "step": 6670 }, { "action_loss": 0.02236766181886196, "epoch": 5.998201438848921, "step": 6670 }, { "epoch": 6.0071942446043165, "grad_norm": 0.30722928047180176, "learning_rate": 9.897562003750437e-05, "loss": 0.0185, "step": 6680 }, { "action_loss": 0.02159014903008938, "epoch": 6.0071942446043165, "step": 6680 }, { "epoch": 6.016187050359712, "grad_norm": 0.3010164201259613, "learning_rate": 9.897006289695407e-05, "loss": 0.0223, "step": 6690 }, { "action_loss": 0.03897896409034729, "epoch": 6.016187050359712, "step": 6690 }, { "epoch": 6.025179856115108, "grad_norm": 0.34724512696266174, "learning_rate": 9.896449088059164e-05, "loss": 0.0303, "step": 6700 }, { "action_loss": 0.01403208076953888, "epoch": 6.025179856115108, "step": 6700 }, { "epoch": 6.034172661870503, "grad_norm": 0.27239227294921875, "learning_rate": 9.89589039901097e-05, "loss": 0.0213, "step": 6710 }, { "action_loss": 0.017718419432640076, "epoch": 6.034172661870503, "step": 6710 }, { "epoch": 6.043165467625899, "grad_norm": 0.27933797240257263, "learning_rate": 9.895330222720542e-05, "loss": 0.0209, "step": 6720 }, { "action_loss": 0.023236781358718872, "epoch": 6.043165467625899, "step": 6720 }, { "epoch": 6.052158273381295, "grad_norm": 0.25486108660697937, "learning_rate": 9.894768559358047e-05, "loss": 0.0282, "step": 6730 }, { "action_loss": 0.052085090428590775, "epoch": 6.052158273381295, "step": 6730 }, { "epoch": 6.061151079136691, "grad_norm": 0.36789849400520325, "learning_rate": 9.894205409094101e-05, "loss": 0.0288, "step": 6740 }, { "action_loss": 0.02962893806397915, "epoch": 6.061151079136691, "step": 6740 }, { "epoch": 6.070143884892087, "grad_norm": 0.3207709789276123, "learning_rate": 9.893640772099777e-05, "loss": 0.0189, "step": 6750 }, { "action_loss": 0.01976187899708748, "epoch": 6.070143884892087, "step": 6750 }, { "epoch": 6.079136690647482, "grad_norm": 0.3644663095474243, "learning_rate": 9.893074648546595e-05, "loss": 0.0284, "step": 6760 }, { "action_loss": 0.023009462282061577, "epoch": 6.079136690647482, "step": 6760 }, { "epoch": 6.088129496402877, "grad_norm": 0.32413777709007263, "learning_rate": 9.892507038606528e-05, "loss": 0.0255, "step": 6770 }, { "action_loss": 0.025942305102944374, "epoch": 6.088129496402877, "step": 6770 }, { "epoch": 6.097122302158273, "grad_norm": 0.25606146454811096, "learning_rate": 9.891937942452003e-05, "loss": 0.0233, "step": 6780 }, { "action_loss": 0.01247031893581152, "epoch": 6.097122302158273, "step": 6780 }, { "epoch": 6.106115107913669, "grad_norm": 0.27619799971580505, "learning_rate": 9.891367360255895e-05, "loss": 0.0188, "step": 6790 }, { "action_loss": 0.03584517911076546, "epoch": 6.106115107913669, "step": 6790 }, { "epoch": 6.115107913669065, "grad_norm": 0.3604428172111511, "learning_rate": 9.890795292191532e-05, "loss": 0.0301, "step": 6800 }, { "action_loss": 0.028396278619766235, "epoch": 6.115107913669065, "step": 6800 }, { "epoch": 6.124100719424461, "grad_norm": 0.3555194139480591, "learning_rate": 9.890221738432694e-05, "loss": 0.0258, "step": 6810 }, { "action_loss": 0.01672998256981373, "epoch": 6.124100719424461, "step": 6810 }, { "epoch": 6.133093525179856, "grad_norm": 0.3376137614250183, "learning_rate": 9.88964669915361e-05, "loss": 0.0216, "step": 6820 }, { "action_loss": 0.02211812697350979, "epoch": 6.133093525179856, "step": 6820 }, { "epoch": 6.142086330935252, "grad_norm": 0.34414786100387573, "learning_rate": 9.889070174528963e-05, "loss": 0.0268, "step": 6830 }, { "action_loss": 0.01933460496366024, "epoch": 6.142086330935252, "step": 6830 }, { "epoch": 6.151079136690647, "grad_norm": 0.3048882782459259, "learning_rate": 9.888492164733883e-05, "loss": 0.0222, "step": 6840 }, { "action_loss": 0.01859980635344982, "epoch": 6.151079136690647, "step": 6840 }, { "epoch": 6.160071942446043, "grad_norm": 0.31629452109336853, "learning_rate": 9.88791266994396e-05, "loss": 0.0235, "step": 6850 }, { "action_loss": 0.010991898365318775, "epoch": 6.160071942446043, "step": 6850 }, { "epoch": 6.169064748201439, "grad_norm": 0.356024831533432, "learning_rate": 9.887331690335223e-05, "loss": 0.0212, "step": 6860 }, { "action_loss": 0.02618209272623062, "epoch": 6.169064748201439, "step": 6860 }, { "epoch": 6.178057553956835, "grad_norm": 0.27273741364479065, "learning_rate": 9.886749226084163e-05, "loss": 0.0207, "step": 6870 }, { "action_loss": 0.015842987224459648, "epoch": 6.178057553956835, "step": 6870 }, { "epoch": 6.18705035971223, "grad_norm": 0.32432082295417786, "learning_rate": 9.886165277367714e-05, "loss": 0.0257, "step": 6880 }, { "action_loss": 0.026533154770731926, "epoch": 6.18705035971223, "step": 6880 }, { "epoch": 6.196043165467626, "grad_norm": 0.3357093632221222, "learning_rate": 9.885579844363265e-05, "loss": 0.019, "step": 6890 }, { "action_loss": 0.02344350516796112, "epoch": 6.196043165467626, "step": 6890 }, { "epoch": 6.205035971223022, "grad_norm": 0.34798166155815125, "learning_rate": 9.884992927248656e-05, "loss": 0.0221, "step": 6900 }, { "action_loss": 0.010418943129479885, "epoch": 6.205035971223022, "step": 6900 }, { "epoch": 6.2140287769784175, "grad_norm": 0.3184918463230133, "learning_rate": 9.884404526202178e-05, "loss": 0.0249, "step": 6910 }, { "action_loss": 0.027707433328032494, "epoch": 6.2140287769784175, "step": 6910 }, { "epoch": 6.223021582733813, "grad_norm": 0.23153001070022583, "learning_rate": 9.883814641402568e-05, "loss": 0.0205, "step": 6920 }, { "action_loss": 0.014671746641397476, "epoch": 6.223021582733813, "step": 6920 }, { "epoch": 6.232014388489208, "grad_norm": 0.19718283414840698, "learning_rate": 9.88322327302902e-05, "loss": 0.0169, "step": 6930 }, { "action_loss": 0.01502750813961029, "epoch": 6.232014388489208, "step": 6930 }, { "epoch": 6.241007194244604, "grad_norm": 0.2145366072654724, "learning_rate": 9.882630421261176e-05, "loss": 0.018, "step": 6940 }, { "action_loss": 0.012581723742187023, "epoch": 6.241007194244604, "step": 6940 }, { "epoch": 6.25, "grad_norm": 0.14946947991847992, "learning_rate": 9.88203608627913e-05, "loss": 0.0176, "step": 6950 }, { "action_loss": 0.02015654556453228, "epoch": 6.25, "step": 6950 }, { "epoch": 6.258992805755396, "grad_norm": 0.28957825899124146, "learning_rate": 9.881440268263422e-05, "loss": 0.0198, "step": 6960 }, { "action_loss": 0.010862755589187145, "epoch": 6.258992805755396, "step": 6960 }, { "epoch": 6.267985611510792, "grad_norm": 0.3026683032512665, "learning_rate": 9.880842967395048e-05, "loss": 0.0185, "step": 6970 }, { "action_loss": 0.020301582291722298, "epoch": 6.267985611510792, "step": 6970 }, { "epoch": 6.276978417266187, "grad_norm": 0.20700383186340332, "learning_rate": 9.880244183855452e-05, "loss": 0.0194, "step": 6980 }, { "action_loss": 0.014841916970908642, "epoch": 6.276978417266187, "step": 6980 }, { "epoch": 6.2859712230215825, "grad_norm": 0.33426690101623535, "learning_rate": 9.879643917826527e-05, "loss": 0.0302, "step": 6990 }, { "action_loss": 0.045035433024168015, "epoch": 6.2859712230215825, "step": 6990 }, { "epoch": 6.294964028776978, "grad_norm": 0.3017018437385559, "learning_rate": 9.87904216949062e-05, "loss": 0.0186, "step": 7000 }, { "action_loss": 0.024630418047308922, "epoch": 6.294964028776978, "step": 7000 }, { "epoch": 6.303956834532374, "grad_norm": 0.3138062357902527, "learning_rate": 9.878438939030526e-05, "loss": 0.0228, "step": 7010 }, { "action_loss": 0.019860530272126198, "epoch": 6.303956834532374, "step": 7010 }, { "epoch": 6.31294964028777, "grad_norm": 0.242841899394989, "learning_rate": 9.877834226629489e-05, "loss": 0.0209, "step": 7020 }, { "action_loss": 0.009156775660812855, "epoch": 6.31294964028777, "step": 7020 }, { "epoch": 6.321942446043165, "grad_norm": 0.21984292566776276, "learning_rate": 9.877228032471206e-05, "loss": 0.0185, "step": 7030 }, { "action_loss": 0.02642202377319336, "epoch": 6.321942446043165, "step": 7030 }, { "epoch": 6.330935251798561, "grad_norm": 0.2979413866996765, "learning_rate": 9.876620356739823e-05, "loss": 0.0199, "step": 7040 }, { "action_loss": 0.0204203762114048, "epoch": 6.330935251798561, "step": 7040 }, { "epoch": 6.339928057553957, "grad_norm": 0.2807735800743103, "learning_rate": 9.876011199619935e-05, "loss": 0.0199, "step": 7050 }, { "action_loss": 0.006892655044794083, "epoch": 6.339928057553957, "step": 7050 }, { "epoch": 6.348920863309353, "grad_norm": 0.30982258915901184, "learning_rate": 9.875400561296589e-05, "loss": 0.0174, "step": 7060 }, { "action_loss": 0.016153693199157715, "epoch": 6.348920863309353, "step": 7060 }, { "epoch": 6.357913669064748, "grad_norm": 0.25397852063179016, "learning_rate": 9.874788441955278e-05, "loss": 0.0219, "step": 7070 }, { "action_loss": 0.016508806496858597, "epoch": 6.357913669064748, "step": 7070 }, { "epoch": 6.366906474820144, "grad_norm": 0.30077314376831055, "learning_rate": 9.874174841781951e-05, "loss": 0.022, "step": 7080 }, { "action_loss": 0.021306678652763367, "epoch": 6.366906474820144, "step": 7080 }, { "epoch": 6.375899280575539, "grad_norm": 0.24570521712303162, "learning_rate": 9.873559760963003e-05, "loss": 0.0178, "step": 7090 }, { "action_loss": 0.035274114459753036, "epoch": 6.375899280575539, "step": 7090 }, { "epoch": 6.384892086330935, "grad_norm": 0.2831066846847534, "learning_rate": 9.872943199685278e-05, "loss": 0.0203, "step": 7100 }, { "action_loss": 0.021252432838082314, "epoch": 6.384892086330935, "step": 7100 }, { "epoch": 6.393884892086331, "grad_norm": 0.3283224105834961, "learning_rate": 9.872325158136071e-05, "loss": 0.0232, "step": 7110 }, { "action_loss": 0.008344274014234543, "epoch": 6.393884892086331, "step": 7110 }, { "epoch": 6.402877697841727, "grad_norm": 0.3598700165748596, "learning_rate": 9.871705636503128e-05, "loss": 0.0204, "step": 7120 }, { "action_loss": 0.020297368988394737, "epoch": 6.402877697841727, "step": 7120 }, { "epoch": 6.411870503597123, "grad_norm": 0.24526067078113556, "learning_rate": 9.871084634974641e-05, "loss": 0.0228, "step": 7130 }, { "action_loss": 0.0343635268509388, "epoch": 6.411870503597123, "step": 7130 }, { "epoch": 6.420863309352518, "grad_norm": 0.29494667053222656, "learning_rate": 9.870462153739257e-05, "loss": 0.0234, "step": 7140 }, { "action_loss": 0.016751598566770554, "epoch": 6.420863309352518, "step": 7140 }, { "epoch": 6.429856115107913, "grad_norm": 0.315438449382782, "learning_rate": 9.869838192986067e-05, "loss": 0.0212, "step": 7150 }, { "action_loss": 0.018617885187268257, "epoch": 6.429856115107913, "step": 7150 }, { "epoch": 6.438848920863309, "grad_norm": 0.25807973742485046, "learning_rate": 9.869212752904616e-05, "loss": 0.0245, "step": 7160 }, { "action_loss": 0.020835846662521362, "epoch": 6.438848920863309, "step": 7160 }, { "epoch": 6.447841726618705, "grad_norm": 0.4024669826030731, "learning_rate": 9.868585833684894e-05, "loss": 0.0187, "step": 7170 }, { "action_loss": 0.020954489707946777, "epoch": 6.447841726618705, "step": 7170 }, { "epoch": 6.456834532374101, "grad_norm": 0.2684771716594696, "learning_rate": 9.867957435517342e-05, "loss": 0.0188, "step": 7180 }, { "action_loss": 0.023254746571183205, "epoch": 6.456834532374101, "step": 7180 }, { "epoch": 6.465827338129497, "grad_norm": 0.3548455238342285, "learning_rate": 9.867327558592854e-05, "loss": 0.0213, "step": 7190 }, { "action_loss": 0.017449496313929558, "epoch": 6.465827338129497, "step": 7190 }, { "epoch": 6.474820143884892, "grad_norm": 0.2983715236186981, "learning_rate": 9.866696203102766e-05, "loss": 0.0242, "step": 7200 }, { "action_loss": 0.007860168814659119, "epoch": 6.474820143884892, "step": 7200 }, { "epoch": 6.483812949640288, "grad_norm": 0.3338952362537384, "learning_rate": 9.86606336923887e-05, "loss": 0.0168, "step": 7210 }, { "action_loss": 0.016489407047629356, "epoch": 6.483812949640288, "step": 7210 }, { "epoch": 6.4928057553956835, "grad_norm": 0.3433644771575928, "learning_rate": 9.865429057193403e-05, "loss": 0.0162, "step": 7220 }, { "action_loss": 0.03174230828881264, "epoch": 6.4928057553956835, "step": 7220 }, { "epoch": 6.501798561151079, "grad_norm": 0.33528950810432434, "learning_rate": 9.864793267159053e-05, "loss": 0.0192, "step": 7230 }, { "action_loss": 0.008824189193546772, "epoch": 6.501798561151079, "step": 7230 }, { "epoch": 6.510791366906475, "grad_norm": 0.22299011051654816, "learning_rate": 9.864155999328957e-05, "loss": 0.0192, "step": 7240 }, { "action_loss": 0.011988602578639984, "epoch": 6.510791366906475, "step": 7240 }, { "epoch": 6.51978417266187, "grad_norm": 0.34687942266464233, "learning_rate": 9.8635172538967e-05, "loss": 0.0216, "step": 7250 }, { "action_loss": 0.03203071653842926, "epoch": 6.51978417266187, "step": 7250 }, { "epoch": 6.528776978417266, "grad_norm": 0.2956157624721527, "learning_rate": 9.862877031056312e-05, "loss": 0.0194, "step": 7260 }, { "action_loss": 0.015532615594565868, "epoch": 6.528776978417266, "step": 7260 }, { "epoch": 6.537769784172662, "grad_norm": 0.2990027964115143, "learning_rate": 9.862235331002279e-05, "loss": 0.0238, "step": 7270 }, { "action_loss": 0.017529157921671867, "epoch": 6.537769784172662, "step": 7270 }, { "epoch": 6.546762589928058, "grad_norm": 0.2941325902938843, "learning_rate": 9.861592153929533e-05, "loss": 0.0177, "step": 7280 }, { "action_loss": 0.010947409085929394, "epoch": 6.546762589928058, "step": 7280 }, { "epoch": 6.555755395683454, "grad_norm": 0.4692065417766571, "learning_rate": 9.860947500033455e-05, "loss": 0.0196, "step": 7290 }, { "action_loss": 0.04297139123082161, "epoch": 6.555755395683454, "step": 7290 }, { "epoch": 6.564748201438849, "grad_norm": 0.34149131178855896, "learning_rate": 9.86030136950987e-05, "loss": 0.0214, "step": 7300 }, { "action_loss": 0.015882808715105057, "epoch": 6.564748201438849, "step": 7300 }, { "epoch": 6.573741007194244, "grad_norm": 0.22664830088615417, "learning_rate": 9.85965376255506e-05, "loss": 0.0175, "step": 7310 }, { "action_loss": 0.012436280958354473, "epoch": 6.573741007194244, "step": 7310 }, { "epoch": 6.58273381294964, "grad_norm": 0.29965439438819885, "learning_rate": 9.859004679365747e-05, "loss": 0.0154, "step": 7320 }, { "action_loss": 0.017414553090929985, "epoch": 6.58273381294964, "step": 7320 }, { "epoch": 6.591726618705036, "grad_norm": 0.20771104097366333, "learning_rate": 9.858354120139108e-05, "loss": 0.0174, "step": 7330 }, { "action_loss": 0.027338100597262383, "epoch": 6.591726618705036, "step": 7330 }, { "epoch": 6.600719424460432, "grad_norm": 0.26575252413749695, "learning_rate": 9.857702085072764e-05, "loss": 0.0188, "step": 7340 }, { "action_loss": 0.012038412503898144, "epoch": 6.600719424460432, "step": 7340 }, { "epoch": 6.609712230215827, "grad_norm": 0.2960476875305176, "learning_rate": 9.857048574364787e-05, "loss": 0.0185, "step": 7350 }, { "action_loss": 0.010211647488176823, "epoch": 6.609712230215827, "step": 7350 }, { "epoch": 6.618705035971223, "grad_norm": 0.21784254908561707, "learning_rate": 9.856393588213698e-05, "loss": 0.02, "step": 7360 }, { "action_loss": 0.014717415906488895, "epoch": 6.618705035971223, "step": 7360 }, { "epoch": 6.627697841726619, "grad_norm": 0.22652465105056763, "learning_rate": 9.855737126818458e-05, "loss": 0.0201, "step": 7370 }, { "action_loss": 0.007539385464042425, "epoch": 6.627697841726619, "step": 7370 }, { "epoch": 6.636690647482014, "grad_norm": 0.29453378915786743, "learning_rate": 9.855079190378491e-05, "loss": 0.0195, "step": 7380 }, { "action_loss": 0.023827454075217247, "epoch": 6.636690647482014, "step": 7380 }, { "epoch": 6.64568345323741, "grad_norm": 0.3060605823993683, "learning_rate": 9.854419779093655e-05, "loss": 0.0185, "step": 7390 }, { "action_loss": 0.019570020958781242, "epoch": 6.64568345323741, "step": 7390 }, { "epoch": 6.654676258992806, "grad_norm": 0.506902813911438, "learning_rate": 9.853758893164264e-05, "loss": 0.0176, "step": 7400 }, { "action_loss": 0.015384330414235592, "epoch": 6.654676258992806, "step": 7400 }, { "epoch": 6.663669064748201, "grad_norm": 0.23968574404716492, "learning_rate": 9.853096532791078e-05, "loss": 0.0185, "step": 7410 }, { "action_loss": 0.017024297267198563, "epoch": 6.663669064748201, "step": 7410 }, { "epoch": 6.672661870503597, "grad_norm": 0.30809682607650757, "learning_rate": 9.852432698175304e-05, "loss": 0.0264, "step": 7420 }, { "action_loss": 0.014294911175966263, "epoch": 6.672661870503597, "step": 7420 }, { "epoch": 6.681654676258993, "grad_norm": 0.2682138979434967, "learning_rate": 9.851767389518597e-05, "loss": 0.0222, "step": 7430 }, { "action_loss": 0.010263250209391117, "epoch": 6.681654676258993, "step": 7430 }, { "epoch": 6.690647482014389, "grad_norm": 0.3465544581413269, "learning_rate": 9.85110060702306e-05, "loss": 0.0174, "step": 7440 }, { "action_loss": 0.009757877327501774, "epoch": 6.690647482014389, "step": 7440 }, { "epoch": 6.6996402877697845, "grad_norm": 0.27929309010505676, "learning_rate": 9.850432350891245e-05, "loss": 0.0143, "step": 7450 }, { "action_loss": 0.013704399578273296, "epoch": 6.6996402877697845, "step": 7450 }, { "epoch": 6.7086330935251794, "grad_norm": 0.2500741183757782, "learning_rate": 9.84976262132615e-05, "loss": 0.0166, "step": 7460 }, { "action_loss": 0.009545352309942245, "epoch": 6.7086330935251794, "step": 7460 }, { "epoch": 6.717625899280575, "grad_norm": 0.17879438400268555, "learning_rate": 9.849091418531222e-05, "loss": 0.0176, "step": 7470 }, { "action_loss": 0.0211211945861578, "epoch": 6.717625899280575, "step": 7470 }, { "epoch": 6.726618705035971, "grad_norm": 0.3373793065547943, "learning_rate": 9.848418742710353e-05, "loss": 0.0218, "step": 7480 }, { "action_loss": 0.006481905933469534, "epoch": 6.726618705035971, "step": 7480 }, { "epoch": 6.735611510791367, "grad_norm": 0.2858169376850128, "learning_rate": 9.847744594067885e-05, "loss": 0.0173, "step": 7490 }, { "action_loss": 0.02018602378666401, "epoch": 6.735611510791367, "step": 7490 }, { "epoch": 6.744604316546763, "grad_norm": 0.25473859906196594, "learning_rate": 9.847068972808607e-05, "loss": 0.0188, "step": 7500 }, { "action_loss": 0.011279801838099957, "epoch": 6.744604316546763, "step": 7500 }, { "epoch": 6.753597122302159, "grad_norm": 0.3563852906227112, "learning_rate": 9.846391879137756e-05, "loss": 0.0156, "step": 7510 }, { "action_loss": 0.011117897927761078, "epoch": 6.753597122302159, "step": 7510 }, { "epoch": 6.762589928057554, "grad_norm": 0.40119317173957825, "learning_rate": 9.845713313261012e-05, "loss": 0.0235, "step": 7520 }, { "action_loss": 0.02026752382516861, "epoch": 6.762589928057554, "step": 7520 }, { "epoch": 6.7715827338129495, "grad_norm": 0.4130205810070038, "learning_rate": 9.845033275384505e-05, "loss": 0.0255, "step": 7530 }, { "action_loss": 0.012912980280816555, "epoch": 6.7715827338129495, "step": 7530 }, { "epoch": 6.780575539568345, "grad_norm": 0.32953473925590515, "learning_rate": 9.844351765714818e-05, "loss": 0.0191, "step": 7540 }, { "action_loss": 0.03568832203745842, "epoch": 6.780575539568345, "step": 7540 }, { "epoch": 6.789568345323741, "grad_norm": 0.2705666124820709, "learning_rate": 9.843668784458971e-05, "loss": 0.021, "step": 7550 }, { "action_loss": 0.008631044067442417, "epoch": 6.789568345323741, "step": 7550 }, { "epoch": 6.798561151079137, "grad_norm": 0.23703652620315552, "learning_rate": 9.842984331824437e-05, "loss": 0.0154, "step": 7560 }, { "action_loss": 0.017413698136806488, "epoch": 6.798561151079137, "step": 7560 }, { "epoch": 6.807553956834532, "grad_norm": 0.2941708564758301, "learning_rate": 9.842298408019133e-05, "loss": 0.0127, "step": 7570 }, { "action_loss": 0.017051542177796364, "epoch": 6.807553956834532, "step": 7570 }, { "epoch": 6.816546762589928, "grad_norm": 0.3064100444316864, "learning_rate": 9.841611013251429e-05, "loss": 0.0197, "step": 7580 }, { "action_loss": 0.007862796075642109, "epoch": 6.816546762589928, "step": 7580 }, { "epoch": 6.825539568345324, "grad_norm": 0.2552568316459656, "learning_rate": 9.840922147730133e-05, "loss": 0.0193, "step": 7590 }, { "action_loss": 0.02449212409555912, "epoch": 6.825539568345324, "step": 7590 }, { "epoch": 6.83453237410072, "grad_norm": 0.3470834791660309, "learning_rate": 9.840231811664506e-05, "loss": 0.0172, "step": 7600 }, { "action_loss": 0.014202051796019077, "epoch": 6.83453237410072, "step": 7600 }, { "epoch": 6.843525179856115, "grad_norm": 0.2551996111869812, "learning_rate": 9.839540005264252e-05, "loss": 0.0168, "step": 7610 }, { "action_loss": 0.015502442605793476, "epoch": 6.843525179856115, "step": 7610 }, { "epoch": 6.852517985611511, "grad_norm": 0.24726749956607819, "learning_rate": 9.838846728739527e-05, "loss": 0.0136, "step": 7620 }, { "action_loss": 0.009455403313040733, "epoch": 6.852517985611511, "step": 7620 }, { "epoch": 6.861510791366906, "grad_norm": 0.281046062707901, "learning_rate": 9.838151982300927e-05, "loss": 0.0254, "step": 7630 }, { "action_loss": 0.01526570599526167, "epoch": 6.861510791366906, "step": 7630 }, { "epoch": 6.870503597122302, "grad_norm": 0.27580952644348145, "learning_rate": 9.8374557661595e-05, "loss": 0.0152, "step": 7640 }, { "action_loss": 0.03343823179602623, "epoch": 6.870503597122302, "step": 7640 }, { "epoch": 6.879496402877698, "grad_norm": 0.24670690298080444, "learning_rate": 9.836758080526735e-05, "loss": 0.0204, "step": 7650 }, { "action_loss": 0.014633969403803349, "epoch": 6.879496402877698, "step": 7650 }, { "epoch": 6.888489208633094, "grad_norm": 0.2815382182598114, "learning_rate": 9.836058925614575e-05, "loss": 0.0161, "step": 7660 }, { "action_loss": 0.009852384217083454, "epoch": 6.888489208633094, "step": 7660 }, { "epoch": 6.897482014388489, "grad_norm": 0.25321075320243835, "learning_rate": 9.8353583016354e-05, "loss": 0.017, "step": 7670 }, { "action_loss": 0.018100498244166374, "epoch": 6.897482014388489, "step": 7670 }, { "epoch": 6.906474820143885, "grad_norm": 0.2240813821554184, "learning_rate": 9.834656208802044e-05, "loss": 0.0216, "step": 7680 }, { "action_loss": 0.026571378111839294, "epoch": 6.906474820143885, "step": 7680 }, { "epoch": 6.91546762589928, "grad_norm": 0.3247012197971344, "learning_rate": 9.833952647327784e-05, "loss": 0.0206, "step": 7690 }, { "action_loss": 0.007810087408870459, "epoch": 6.91546762589928, "step": 7690 }, { "epoch": 6.924460431654676, "grad_norm": 0.40621548891067505, "learning_rate": 9.833247617426342e-05, "loss": 0.0194, "step": 7700 }, { "action_loss": 0.015748845413327217, "epoch": 6.924460431654676, "step": 7700 }, { "epoch": 6.933453237410072, "grad_norm": 0.2708393633365631, "learning_rate": 9.832541119311889e-05, "loss": 0.0196, "step": 7710 }, { "action_loss": 0.030445903539657593, "epoch": 6.933453237410072, "step": 7710 }, { "epoch": 6.942446043165468, "grad_norm": 0.29381605982780457, "learning_rate": 9.83183315319904e-05, "loss": 0.023, "step": 7720 }, { "action_loss": 0.0460466593503952, "epoch": 6.942446043165468, "step": 7720 }, { "epoch": 6.951438848920863, "grad_norm": 0.31997090578079224, "learning_rate": 9.831123719302855e-05, "loss": 0.0204, "step": 7730 }, { "action_loss": 0.01766912080347538, "epoch": 6.951438848920863, "step": 7730 }, { "epoch": 6.960431654676259, "grad_norm": 0.32427239418029785, "learning_rate": 9.830412817838842e-05, "loss": 0.018, "step": 7740 }, { "action_loss": 0.023873144760727882, "epoch": 6.960431654676259, "step": 7740 }, { "epoch": 6.969424460431655, "grad_norm": 0.3192010819911957, "learning_rate": 9.829700449022956e-05, "loss": 0.019, "step": 7750 }, { "action_loss": 0.03593564033508301, "epoch": 6.969424460431655, "step": 7750 }, { "epoch": 6.9784172661870505, "grad_norm": 0.2118740677833557, "learning_rate": 9.828986613071593e-05, "loss": 0.0171, "step": 7760 }, { "action_loss": 0.010955487377941608, "epoch": 6.9784172661870505, "step": 7760 }, { "epoch": 6.987410071942446, "grad_norm": 0.20989638566970825, "learning_rate": 9.828271310201601e-05, "loss": 0.0154, "step": 7770 }, { "action_loss": 0.010604918003082275, "epoch": 6.987410071942446, "step": 7770 }, { "epoch": 6.996402877697841, "grad_norm": 0.25134581327438354, "learning_rate": 9.827554540630268e-05, "loss": 0.0155, "step": 7780 }, { "action_loss": 0.0283664558082819, "epoch": 6.996402877697841, "step": 7780 }, { "epoch": 7.005395683453237, "grad_norm": 0.24590085446834564, "learning_rate": 9.826836304575329e-05, "loss": 0.0222, "step": 7790 }, { "action_loss": 0.017182521522045135, "epoch": 7.005395683453237, "step": 7790 }, { "epoch": 7.014388489208633, "grad_norm": 0.25064170360565186, "learning_rate": 9.826116602254966e-05, "loss": 0.0153, "step": 7800 }, { "action_loss": 0.021077007055282593, "epoch": 7.014388489208633, "step": 7800 }, { "epoch": 7.023381294964029, "grad_norm": 0.2714018225669861, "learning_rate": 9.825395433887805e-05, "loss": 0.0181, "step": 7810 }, { "action_loss": 0.01939513348042965, "epoch": 7.023381294964029, "step": 7810 }, { "epoch": 7.032374100719425, "grad_norm": 0.28524360060691833, "learning_rate": 9.824672799692917e-05, "loss": 0.0169, "step": 7820 }, { "action_loss": 0.016223514452576637, "epoch": 7.032374100719425, "step": 7820 }, { "epoch": 7.0413669064748206, "grad_norm": 0.3766704201698303, "learning_rate": 9.823948699889823e-05, "loss": 0.0217, "step": 7830 }, { "action_loss": 0.010938149876892567, "epoch": 7.0413669064748206, "step": 7830 }, { "epoch": 7.0503597122302155, "grad_norm": 0.376211941242218, "learning_rate": 9.823223134698483e-05, "loss": 0.0173, "step": 7840 }, { "action_loss": 0.02111765183508396, "epoch": 7.0503597122302155, "step": 7840 }, { "epoch": 7.059352517985611, "grad_norm": 0.2566628158092499, "learning_rate": 9.822496104339303e-05, "loss": 0.0205, "step": 7850 }, { "action_loss": 0.020675357431173325, "epoch": 7.059352517985611, "step": 7850 }, { "epoch": 7.068345323741007, "grad_norm": 0.3175516128540039, "learning_rate": 9.821767609033138e-05, "loss": 0.0191, "step": 7860 }, { "action_loss": 0.02791461907327175, "epoch": 7.068345323741007, "step": 7860 }, { "epoch": 7.077338129496403, "grad_norm": 0.3269846737384796, "learning_rate": 9.821037649001284e-05, "loss": 0.0205, "step": 7870 }, { "action_loss": 0.021144067868590355, "epoch": 7.077338129496403, "step": 7870 }, { "epoch": 7.086330935251799, "grad_norm": 0.42428335547447205, "learning_rate": 9.820306224465486e-05, "loss": 0.0187, "step": 7880 }, { "action_loss": 0.01038882602006197, "epoch": 7.086330935251799, "step": 7880 }, { "epoch": 7.095323741007194, "grad_norm": 0.26876944303512573, "learning_rate": 9.819573335647928e-05, "loss": 0.0175, "step": 7890 }, { "action_loss": 0.008972861804068089, "epoch": 7.095323741007194, "step": 7890 }, { "epoch": 7.10431654676259, "grad_norm": 0.3100148141384125, "learning_rate": 9.818838982771246e-05, "loss": 0.0216, "step": 7900 }, { "action_loss": 0.007310878485441208, "epoch": 7.10431654676259, "step": 7900 }, { "epoch": 7.113309352517986, "grad_norm": 0.25345975160598755, "learning_rate": 9.818103166058514e-05, "loss": 0.016, "step": 7910 }, { "action_loss": 0.019732819870114326, "epoch": 7.113309352517986, "step": 7910 }, { "epoch": 7.122302158273381, "grad_norm": 0.3107471466064453, "learning_rate": 9.817365885733254e-05, "loss": 0.0187, "step": 7920 }, { "action_loss": 0.014871921390295029, "epoch": 7.122302158273381, "step": 7920 }, { "epoch": 7.131294964028777, "grad_norm": 0.24996928870677948, "learning_rate": 9.816627142019434e-05, "loss": 0.0173, "step": 7930 }, { "action_loss": 0.01558010559529066, "epoch": 7.131294964028777, "step": 7930 }, { "epoch": 7.140287769784172, "grad_norm": 0.37798625230789185, "learning_rate": 9.815886935141463e-05, "loss": 0.0198, "step": 7940 }, { "action_loss": 0.011112001724541187, "epoch": 7.140287769784172, "step": 7940 }, { "epoch": 7.149280575539568, "grad_norm": 0.2505195736885071, "learning_rate": 9.8151452653242e-05, "loss": 0.0157, "step": 7950 }, { "action_loss": 0.0114620141685009, "epoch": 7.149280575539568, "step": 7950 }, { "epoch": 7.158273381294964, "grad_norm": 0.3305523097515106, "learning_rate": 9.814402132792939e-05, "loss": 0.0176, "step": 7960 }, { "action_loss": 0.034490521997213364, "epoch": 7.158273381294964, "step": 7960 }, { "epoch": 7.16726618705036, "grad_norm": 0.24461966753005981, "learning_rate": 9.813657537773428e-05, "loss": 0.0179, "step": 7970 }, { "action_loss": 0.01829901523888111, "epoch": 7.16726618705036, "step": 7970 }, { "epoch": 7.176258992805756, "grad_norm": 0.35060420632362366, "learning_rate": 9.812911480491854e-05, "loss": 0.0176, "step": 7980 }, { "action_loss": 0.018363287672400475, "epoch": 7.176258992805756, "step": 7980 }, { "epoch": 7.1852517985611515, "grad_norm": 0.362661212682724, "learning_rate": 9.81216396117485e-05, "loss": 0.0189, "step": 7990 }, { "action_loss": 0.022500017657876015, "epoch": 7.1852517985611515, "step": 7990 }, { "epoch": 7.194244604316546, "grad_norm": 0.2479870319366455, "learning_rate": 9.811414980049491e-05, "loss": 0.0177, "step": 8000 }, { "action_loss": 0.024350540712475777, "epoch": 7.194244604316546, "step": 8000 }, { "epoch": 7.203237410071942, "grad_norm": 0.30506283044815063, "learning_rate": 9.810664537343301e-05, "loss": 0.016, "step": 8010 }, { "action_loss": 0.027921190485358238, "epoch": 7.203237410071942, "step": 8010 }, { "epoch": 7.212230215827338, "grad_norm": 0.2214093655347824, "learning_rate": 9.809912633284243e-05, "loss": 0.0161, "step": 8020 }, { "action_loss": 0.011478248052299023, "epoch": 7.212230215827338, "step": 8020 }, { "epoch": 7.221223021582734, "grad_norm": 0.31081050634384155, "learning_rate": 9.809159268100725e-05, "loss": 0.0154, "step": 8030 }, { "action_loss": 0.01288976613432169, "epoch": 7.221223021582734, "step": 8030 }, { "epoch": 7.23021582733813, "grad_norm": 0.3054218590259552, "learning_rate": 9.808404442021599e-05, "loss": 0.0168, "step": 8040 }, { "action_loss": 0.01948205754160881, "epoch": 7.23021582733813, "step": 8040 }, { "epoch": 7.239208633093525, "grad_norm": 0.36993566155433655, "learning_rate": 9.807648155276163e-05, "loss": 0.0174, "step": 8050 }, { "action_loss": 0.034769345074892044, "epoch": 7.239208633093525, "step": 8050 }, { "epoch": 7.248201438848921, "grad_norm": 0.30476313829421997, "learning_rate": 9.806890408094156e-05, "loss": 0.0186, "step": 8060 }, { "action_loss": 0.00989876501262188, "epoch": 7.248201438848921, "step": 8060 }, { "epoch": 7.2571942446043165, "grad_norm": 0.30374887585639954, "learning_rate": 9.806131200705761e-05, "loss": 0.0161, "step": 8070 }, { "action_loss": 0.02147611789405346, "epoch": 7.2571942446043165, "step": 8070 }, { "epoch": 7.266187050359712, "grad_norm": 0.1779385805130005, "learning_rate": 9.805370533341605e-05, "loss": 0.0187, "step": 8080 }, { "action_loss": 0.02041824348270893, "epoch": 7.266187050359712, "step": 8080 }, { "epoch": 7.275179856115108, "grad_norm": 0.22305338084697723, "learning_rate": 9.804608406232762e-05, "loss": 0.0146, "step": 8090 }, { "action_loss": 0.014676245860755444, "epoch": 7.275179856115108, "step": 8090 }, { "epoch": 7.284172661870503, "grad_norm": 0.20044788718223572, "learning_rate": 9.803844819610741e-05, "loss": 0.0165, "step": 8100 }, { "action_loss": 0.03858426213264465, "epoch": 7.284172661870503, "step": 8100 }, { "epoch": 7.293165467625899, "grad_norm": 0.2696290910243988, "learning_rate": 9.803079773707504e-05, "loss": 0.0163, "step": 8110 }, { "action_loss": 0.02034558169543743, "epoch": 7.293165467625899, "step": 8110 }, { "epoch": 7.302158273381295, "grad_norm": 0.20940834283828735, "learning_rate": 9.802313268755447e-05, "loss": 0.0194, "step": 8120 }, { "action_loss": 0.02189931832253933, "epoch": 7.302158273381295, "step": 8120 }, { "epoch": 7.311151079136691, "grad_norm": 0.32224929332733154, "learning_rate": 9.801545304987419e-05, "loss": 0.0163, "step": 8130 }, { "action_loss": 0.018155667930841446, "epoch": 7.311151079136691, "step": 8130 }, { "epoch": 7.320143884892087, "grad_norm": 0.30877360701560974, "learning_rate": 9.800775882636704e-05, "loss": 0.017, "step": 8140 }, { "action_loss": 0.021677568554878235, "epoch": 7.320143884892087, "step": 8140 }, { "epoch": 7.329136690647482, "grad_norm": 0.24781373143196106, "learning_rate": 9.800005001937034e-05, "loss": 0.0148, "step": 8150 }, { "action_loss": 0.024377690628170967, "epoch": 7.329136690647482, "step": 8150 }, { "epoch": 7.338129496402877, "grad_norm": 0.3374313712120056, "learning_rate": 9.79923266312258e-05, "loss": 0.0162, "step": 8160 }, { "action_loss": 0.024024447426199913, "epoch": 7.338129496402877, "step": 8160 }, { "epoch": 7.347122302158273, "grad_norm": 0.25472816824913025, "learning_rate": 9.79845886642796e-05, "loss": 0.017, "step": 8170 }, { "action_loss": 0.011414088308811188, "epoch": 7.347122302158273, "step": 8170 }, { "epoch": 7.356115107913669, "grad_norm": 0.26729273796081543, "learning_rate": 9.797683612088233e-05, "loss": 0.0152, "step": 8180 }, { "action_loss": 0.0191060658544302, "epoch": 7.356115107913669, "step": 8180 }, { "epoch": 7.365107913669065, "grad_norm": 0.2805665135383606, "learning_rate": 9.796906900338898e-05, "loss": 0.0174, "step": 8190 }, { "action_loss": 0.014708306640386581, "epoch": 7.365107913669065, "step": 8190 }, { "epoch": 7.374100719424461, "grad_norm": 0.318004846572876, "learning_rate": 9.796128731415903e-05, "loss": 0.018, "step": 8200 }, { "action_loss": 0.010518890805542469, "epoch": 7.374100719424461, "step": 8200 }, { "epoch": 7.383093525179856, "grad_norm": 0.3092309236526489, "learning_rate": 9.795349105555634e-05, "loss": 0.0154, "step": 8210 }, { "action_loss": 0.027910655364394188, "epoch": 7.383093525179856, "step": 8210 }, { "epoch": 7.392086330935252, "grad_norm": 0.3386319875717163, "learning_rate": 9.794568022994922e-05, "loss": 0.0187, "step": 8220 }, { "action_loss": 0.011870034039020538, "epoch": 7.392086330935252, "step": 8220 }, { "epoch": 7.401079136690647, "grad_norm": 0.3482937812805176, "learning_rate": 9.793785483971034e-05, "loss": 0.0173, "step": 8230 }, { "action_loss": 0.012087489478290081, "epoch": 7.401079136690647, "step": 8230 }, { "epoch": 7.410071942446043, "grad_norm": 0.3390848636627197, "learning_rate": 9.793001488721691e-05, "loss": 0.0175, "step": 8240 }, { "action_loss": 0.02066199667751789, "epoch": 7.410071942446043, "step": 8240 }, { "epoch": 7.419064748201439, "grad_norm": 0.24785485863685608, "learning_rate": 9.792216037485047e-05, "loss": 0.017, "step": 8250 }, { "action_loss": 0.015661532059311867, "epoch": 7.419064748201439, "step": 8250 }, { "epoch": 7.428057553956835, "grad_norm": 0.25718143582344055, "learning_rate": 9.791429130499704e-05, "loss": 0.0167, "step": 8260 }, { "action_loss": 0.013595446944236755, "epoch": 7.428057553956835, "step": 8260 }, { "epoch": 7.43705035971223, "grad_norm": 0.2624969184398651, "learning_rate": 9.790640768004698e-05, "loss": 0.0177, "step": 8270 }, { "action_loss": 0.01919376291334629, "epoch": 7.43705035971223, "step": 8270 }, { "epoch": 7.446043165467626, "grad_norm": 0.3926408886909485, "learning_rate": 9.789850950239518e-05, "loss": 0.0186, "step": 8280 }, { "action_loss": 0.016618112102150917, "epoch": 7.446043165467626, "step": 8280 }, { "epoch": 7.455035971223022, "grad_norm": 0.2522355616092682, "learning_rate": 9.789059677444089e-05, "loss": 0.018, "step": 8290 }, { "action_loss": 0.007784303277730942, "epoch": 7.455035971223022, "step": 8290 }, { "epoch": 7.4640287769784175, "grad_norm": 0.24170051515102386, "learning_rate": 9.788266949858776e-05, "loss": 0.012, "step": 8300 }, { "action_loss": 0.009321973659098148, "epoch": 7.4640287769784175, "step": 8300 }, { "epoch": 7.473021582733813, "grad_norm": 0.40941959619522095, "learning_rate": 9.787472767724392e-05, "loss": 0.0121, "step": 8310 }, { "action_loss": 0.011716899462044239, "epoch": 7.473021582733813, "step": 8310 }, { "epoch": 7.482014388489208, "grad_norm": 0.2935205399990082, "learning_rate": 9.786677131282185e-05, "loss": 0.0178, "step": 8320 }, { "action_loss": 0.022639663890004158, "epoch": 7.482014388489208, "step": 8320 }, { "epoch": 7.491007194244604, "grad_norm": 0.22961322963237762, "learning_rate": 9.785880040773853e-05, "loss": 0.0161, "step": 8330 }, { "action_loss": 0.008655085228383541, "epoch": 7.491007194244604, "step": 8330 }, { "epoch": 7.5, "grad_norm": 0.31397849321365356, "learning_rate": 9.785081496441527e-05, "loss": 0.0229, "step": 8340 }, { "action_loss": 0.019548693671822548, "epoch": 7.5, "step": 8340 }, { "epoch": 7.508992805755396, "grad_norm": 0.2885136008262634, "learning_rate": 9.784281498527785e-05, "loss": 0.015, "step": 8350 }, { "action_loss": 0.015061800368130207, "epoch": 7.508992805755396, "step": 8350 }, { "epoch": 7.517985611510792, "grad_norm": 0.2868187427520752, "learning_rate": 9.783480047275646e-05, "loss": 0.0254, "step": 8360 }, { "action_loss": 0.019447071477770805, "epoch": 7.517985611510792, "step": 8360 }, { "epoch": 7.5269784172661875, "grad_norm": 0.3353743255138397, "learning_rate": 9.78267714292857e-05, "loss": 0.0218, "step": 8370 }, { "action_loss": 0.006489855702966452, "epoch": 7.5269784172661875, "step": 8370 }, { "epoch": 7.5359712230215825, "grad_norm": 0.41778242588043213, "learning_rate": 9.781872785730454e-05, "loss": 0.0176, "step": 8380 }, { "action_loss": 0.016871517524123192, "epoch": 7.5359712230215825, "step": 8380 }, { "epoch": 7.544964028776978, "grad_norm": 0.34224194288253784, "learning_rate": 9.781066975925646e-05, "loss": 0.0165, "step": 8390 }, { "action_loss": 0.02178754098713398, "epoch": 7.544964028776978, "step": 8390 }, { "epoch": 7.553956834532374, "grad_norm": 0.2847379148006439, "learning_rate": 9.780259713758928e-05, "loss": 0.0168, "step": 8400 }, { "action_loss": 0.011870143003761768, "epoch": 7.553956834532374, "step": 8400 }, { "epoch": 7.56294964028777, "grad_norm": 0.33601659536361694, "learning_rate": 9.779450999475524e-05, "loss": 0.015, "step": 8410 }, { "action_loss": 0.014420772902667522, "epoch": 7.56294964028777, "step": 8410 }, { "epoch": 7.571942446043165, "grad_norm": 0.27354931831359863, "learning_rate": 9.7786408333211e-05, "loss": 0.0168, "step": 8420 }, { "action_loss": 0.011135924607515335, "epoch": 7.571942446043165, "step": 8420 }, { "epoch": 7.580935251798561, "grad_norm": 0.28308865427970886, "learning_rate": 9.777829215541764e-05, "loss": 0.0174, "step": 8430 }, { "action_loss": 0.009727212600409985, "epoch": 7.580935251798561, "step": 8430 }, { "epoch": 7.589928057553957, "grad_norm": 0.22149330377578735, "learning_rate": 9.777016146384064e-05, "loss": 0.0141, "step": 8440 }, { "action_loss": 0.03694990649819374, "epoch": 7.589928057553957, "step": 8440 }, { "epoch": 7.598920863309353, "grad_norm": 0.22869479656219482, "learning_rate": 9.776201626094988e-05, "loss": 0.0144, "step": 8450 }, { "action_loss": 0.013583135791122913, "epoch": 7.598920863309353, "step": 8450 }, { "epoch": 7.607913669064748, "grad_norm": 0.278329074382782, "learning_rate": 9.775385654921965e-05, "loss": 0.0137, "step": 8460 }, { "action_loss": 0.010380740277469158, "epoch": 7.607913669064748, "step": 8460 }, { "epoch": 7.616906474820144, "grad_norm": 0.2141515016555786, "learning_rate": 9.774568233112868e-05, "loss": 0.0152, "step": 8470 }, { "action_loss": 0.012108679860830307, "epoch": 7.616906474820144, "step": 8470 }, { "epoch": 7.625899280575539, "grad_norm": 0.26100894808769226, "learning_rate": 9.773749360916007e-05, "loss": 0.0184, "step": 8480 }, { "action_loss": 0.024235790595412254, "epoch": 7.625899280575539, "step": 8480 }, { "epoch": 7.634892086330935, "grad_norm": 0.2596968114376068, "learning_rate": 9.772929038580134e-05, "loss": 0.0168, "step": 8490 }, { "action_loss": 0.014000189490616322, "epoch": 7.634892086330935, "step": 8490 }, { "epoch": 7.643884892086331, "grad_norm": 0.37136101722717285, "learning_rate": 9.772107266354439e-05, "loss": 0.0182, "step": 8500 }, { "action_loss": 0.02258661389350891, "epoch": 7.643884892086331, "step": 8500 }, { "epoch": 7.652877697841727, "grad_norm": 0.3398173153400421, "learning_rate": 9.77128404448856e-05, "loss": 0.0239, "step": 8510 }, { "action_loss": 0.01846482791006565, "epoch": 7.652877697841727, "step": 8510 }, { "epoch": 7.661870503597123, "grad_norm": 0.3226674199104309, "learning_rate": 9.770459373232565e-05, "loss": 0.0145, "step": 8520 }, { "action_loss": 0.011668853461742401, "epoch": 7.661870503597123, "step": 8520 }, { "epoch": 7.670863309352518, "grad_norm": 0.40215155482292175, "learning_rate": 9.769633252836969e-05, "loss": 0.0181, "step": 8530 }, { "action_loss": 0.009986658580601215, "epoch": 7.670863309352518, "step": 8530 }, { "epoch": 7.679856115107913, "grad_norm": 0.3587574064731598, "learning_rate": 9.768805683552724e-05, "loss": 0.0156, "step": 8540 }, { "action_loss": 0.015356283634901047, "epoch": 7.679856115107913, "step": 8540 }, { "epoch": 7.688848920863309, "grad_norm": 0.40315526723861694, "learning_rate": 9.767976665631228e-05, "loss": 0.0158, "step": 8550 }, { "action_loss": 0.010689586400985718, "epoch": 7.688848920863309, "step": 8550 }, { "epoch": 7.697841726618705, "grad_norm": 0.4041995406150818, "learning_rate": 9.767146199324311e-05, "loss": 0.0184, "step": 8560 }, { "action_loss": 0.013316052965819836, "epoch": 7.697841726618705, "step": 8560 }, { "epoch": 7.706834532374101, "grad_norm": 0.2920028269290924, "learning_rate": 9.766314284884249e-05, "loss": 0.014, "step": 8570 }, { "action_loss": 0.029693253338336945, "epoch": 7.706834532374101, "step": 8570 }, { "epoch": 7.715827338129497, "grad_norm": 0.3241631090641022, "learning_rate": 9.765480922563752e-05, "loss": 0.018, "step": 8580 }, { "action_loss": 0.015031122602522373, "epoch": 7.715827338129497, "step": 8580 }, { "epoch": 7.724820143884892, "grad_norm": 0.2464205026626587, "learning_rate": 9.764646112615978e-05, "loss": 0.0168, "step": 8590 }, { "action_loss": 0.026525447145104408, "epoch": 7.724820143884892, "step": 8590 }, { "epoch": 7.733812949640288, "grad_norm": 0.26374974846839905, "learning_rate": 9.763809855294517e-05, "loss": 0.0181, "step": 8600 }, { "action_loss": 0.014443911612033844, "epoch": 7.733812949640288, "step": 8600 }, { "epoch": 7.7428057553956835, "grad_norm": 0.2885948717594147, "learning_rate": 9.762972150853404e-05, "loss": 0.0147, "step": 8610 }, { "action_loss": 0.013333238661289215, "epoch": 7.7428057553956835, "step": 8610 }, { "epoch": 7.751798561151079, "grad_norm": 0.32168322801589966, "learning_rate": 9.762132999547111e-05, "loss": 0.0141, "step": 8620 }, { "action_loss": 0.013493165373802185, "epoch": 7.751798561151079, "step": 8620 }, { "epoch": 7.760791366906475, "grad_norm": 2.8161258697509766, "learning_rate": 9.761292401630549e-05, "loss": 0.0173, "step": 8630 }, { "action_loss": 0.017732417210936546, "epoch": 7.760791366906475, "step": 8630 }, { "epoch": 7.76978417266187, "grad_norm": 0.1761811226606369, "learning_rate": 9.76045035735907e-05, "loss": 0.014, "step": 8640 }, { "action_loss": 0.010661640204489231, "epoch": 7.76978417266187, "step": 8640 }, { "epoch": 7.778776978417266, "grad_norm": 0.2681007981300354, "learning_rate": 9.759606866988464e-05, "loss": 0.0168, "step": 8650 }, { "action_loss": 0.007512385491281748, "epoch": 7.778776978417266, "step": 8650 }, { "epoch": 7.787769784172662, "grad_norm": 0.2414700984954834, "learning_rate": 9.758761930774963e-05, "loss": 0.0112, "step": 8660 }, { "action_loss": 0.015255431644618511, "epoch": 7.787769784172662, "step": 8660 }, { "epoch": 7.796762589928058, "grad_norm": 0.24801546335220337, "learning_rate": 9.757915548975235e-05, "loss": 0.0135, "step": 8670 }, { "action_loss": 0.027334632351994514, "epoch": 7.796762589928058, "step": 8670 }, { "epoch": 7.805755395683454, "grad_norm": 0.2889747619628906, "learning_rate": 9.757067721846389e-05, "loss": 0.0163, "step": 8680 }, { "action_loss": 0.01559519860893488, "epoch": 7.805755395683454, "step": 8680 }, { "epoch": 7.814748201438849, "grad_norm": 0.3539731800556183, "learning_rate": 9.756218449645971e-05, "loss": 0.0155, "step": 8690 }, { "action_loss": 0.01497850101441145, "epoch": 7.814748201438849, "step": 8690 }, { "epoch": 7.823741007194244, "grad_norm": 0.3132030665874481, "learning_rate": 9.75536773263197e-05, "loss": 0.0155, "step": 8700 }, { "action_loss": 0.016106901690363884, "epoch": 7.823741007194244, "step": 8700 }, { "epoch": 7.83273381294964, "grad_norm": 0.26825204491615295, "learning_rate": 9.75451557106281e-05, "loss": 0.0124, "step": 8710 }, { "action_loss": 0.012613081373274326, "epoch": 7.83273381294964, "step": 8710 }, { "epoch": 7.841726618705036, "grad_norm": 0.30213820934295654, "learning_rate": 9.753661965197354e-05, "loss": 0.0152, "step": 8720 }, { "action_loss": 0.014278543181717396, "epoch": 7.841726618705036, "step": 8720 }, { "epoch": 7.850719424460432, "grad_norm": 0.25694119930267334, "learning_rate": 9.752806915294908e-05, "loss": 0.0162, "step": 8730 }, { "action_loss": 0.017463313415646553, "epoch": 7.850719424460432, "step": 8730 }, { "epoch": 7.859712230215827, "grad_norm": 0.2952820956707001, "learning_rate": 9.75195042161521e-05, "loss": 0.0156, "step": 8740 }, { "action_loss": 0.007991152815520763, "epoch": 7.859712230215827, "step": 8740 }, { "epoch": 7.868705035971223, "grad_norm": 0.23262940347194672, "learning_rate": 9.751092484418442e-05, "loss": 0.0136, "step": 8750 }, { "action_loss": 0.014816679991781712, "epoch": 7.868705035971223, "step": 8750 }, { "epoch": 7.877697841726619, "grad_norm": 0.20498643815517426, "learning_rate": 9.750233103965224e-05, "loss": 0.0171, "step": 8760 }, { "action_loss": 0.02057665027678013, "epoch": 7.877697841726619, "step": 8760 }, { "epoch": 7.886690647482014, "grad_norm": 0.2751021683216095, "learning_rate": 9.749372280516611e-05, "loss": 0.0169, "step": 8770 }, { "action_loss": 0.008064048364758492, "epoch": 7.886690647482014, "step": 8770 }, { "epoch": 7.89568345323741, "grad_norm": 0.3488727807998657, "learning_rate": 9.748510014334097e-05, "loss": 0.0188, "step": 8780 }, { "action_loss": 0.017620651051402092, "epoch": 7.89568345323741, "step": 8780 }, { "epoch": 7.904676258992806, "grad_norm": 0.37899500131607056, "learning_rate": 9.747646305679621e-05, "loss": 0.0193, "step": 8790 }, { "action_loss": 0.011020983569324017, "epoch": 7.904676258992806, "step": 8790 }, { "epoch": 7.913669064748201, "grad_norm": 0.29080185294151306, "learning_rate": 9.74678115481555e-05, "loss": 0.0215, "step": 8800 }, { "action_loss": 0.028356149792671204, "epoch": 7.913669064748201, "step": 8800 }, { "epoch": 7.922661870503597, "grad_norm": 0.2745845317840576, "learning_rate": 9.745914562004696e-05, "loss": 0.019, "step": 8810 }, { "action_loss": 0.007663852069526911, "epoch": 7.922661870503597, "step": 8810 }, { "epoch": 7.931654676258993, "grad_norm": 0.3237253725528717, "learning_rate": 9.745046527510307e-05, "loss": 0.0137, "step": 8820 }, { "action_loss": 0.016631731763482094, "epoch": 7.931654676258993, "step": 8820 }, { "epoch": 7.940647482014389, "grad_norm": 0.36165401339530945, "learning_rate": 9.744177051596068e-05, "loss": 0.0184, "step": 8830 }, { "action_loss": 0.015258729457855225, "epoch": 7.940647482014389, "step": 8830 }, { "epoch": 7.9496402877697845, "grad_norm": 0.3750578463077545, "learning_rate": 9.743306134526105e-05, "loss": 0.0176, "step": 8840 }, { "action_loss": 0.029621073976159096, "epoch": 7.9496402877697845, "step": 8840 }, { "epoch": 7.9586330935251794, "grad_norm": 0.27835798263549805, "learning_rate": 9.742433776564977e-05, "loss": 0.0167, "step": 8850 }, { "action_loss": 0.01762593723833561, "epoch": 7.9586330935251794, "step": 8850 }, { "epoch": 7.967625899280575, "grad_norm": 0.2895296514034271, "learning_rate": 9.741559977977683e-05, "loss": 0.0171, "step": 8860 }, { "action_loss": 0.015743795782327652, "epoch": 7.967625899280575, "step": 8860 }, { "epoch": 7.976618705035971, "grad_norm": 0.3090137839317322, "learning_rate": 9.740684739029661e-05, "loss": 0.0176, "step": 8870 }, { "action_loss": 0.01620536856353283, "epoch": 7.976618705035971, "step": 8870 }, { "epoch": 7.985611510791367, "grad_norm": 0.4235110282897949, "learning_rate": 9.739808059986789e-05, "loss": 0.0247, "step": 8880 }, { "action_loss": 0.0118229566141963, "epoch": 7.985611510791367, "step": 8880 }, { "epoch": 7.994604316546763, "grad_norm": 0.19306069612503052, "learning_rate": 9.738929941115373e-05, "loss": 0.0166, "step": 8890 }, { "action_loss": 0.0051538762636482716, "epoch": 7.994604316546763, "step": 8890 }, { "epoch": 8.003597122302159, "grad_norm": 0.21616046130657196, "learning_rate": 9.738050382682167e-05, "loss": 0.0106, "step": 8900 }, { "action_loss": 0.010436066426336765, "epoch": 8.003597122302159, "step": 8900 }, { "epoch": 8.012589928057555, "grad_norm": 0.28681764006614685, "learning_rate": 9.737169384954355e-05, "loss": 0.0188, "step": 8910 }, { "action_loss": 0.022800864651799202, "epoch": 8.012589928057555, "step": 8910 }, { "epoch": 8.02158273381295, "grad_norm": 0.31380990147590637, "learning_rate": 9.736286948199562e-05, "loss": 0.0147, "step": 8920 }, { "action_loss": 0.010170874185860157, "epoch": 8.02158273381295, "step": 8920 }, { "epoch": 8.030575539568344, "grad_norm": 0.25616541504859924, "learning_rate": 9.735403072685848e-05, "loss": 0.0132, "step": 8930 }, { "action_loss": 0.014576482586562634, "epoch": 8.030575539568344, "step": 8930 }, { "epoch": 8.03956834532374, "grad_norm": 0.25750648975372314, "learning_rate": 9.734517758681712e-05, "loss": 0.0147, "step": 8940 }, { "action_loss": 0.009435021318495274, "epoch": 8.03956834532374, "step": 8940 }, { "epoch": 8.048561151079136, "grad_norm": 0.2712211310863495, "learning_rate": 9.733631006456088e-05, "loss": 0.0132, "step": 8950 }, { "action_loss": 0.010658633895218372, "epoch": 8.048561151079136, "step": 8950 }, { "epoch": 8.057553956834532, "grad_norm": 0.22742637991905212, "learning_rate": 9.732742816278348e-05, "loss": 0.0168, "step": 8960 }, { "action_loss": 0.014753817580640316, "epoch": 8.057553956834532, "step": 8960 }, { "epoch": 8.066546762589928, "grad_norm": 0.22270525991916656, "learning_rate": 9.731853188418302e-05, "loss": 0.0133, "step": 8970 }, { "action_loss": 0.050359975546598434, "epoch": 8.066546762589928, "step": 8970 }, { "epoch": 8.075539568345324, "grad_norm": 0.26259034872055054, "learning_rate": 9.730962123146194e-05, "loss": 0.0176, "step": 8980 }, { "action_loss": 0.0076742395758628845, "epoch": 8.075539568345324, "step": 8980 }, { "epoch": 8.08453237410072, "grad_norm": 0.29395073652267456, "learning_rate": 9.730069620732709e-05, "loss": 0.0152, "step": 8990 }, { "action_loss": 0.008993124589323997, "epoch": 8.08453237410072, "step": 8990 }, { "epoch": 8.093525179856115, "grad_norm": 0.27906903624534607, "learning_rate": 9.72917568144896e-05, "loss": 0.0148, "step": 9000 }, { "action_loss": 0.024637555703520775, "epoch": 8.093525179856115, "step": 9000 }, { "epoch": 8.102517985611511, "grad_norm": 0.31651270389556885, "learning_rate": 9.728280305566509e-05, "loss": 0.0207, "step": 9010 }, { "action_loss": 0.023943884298205376, "epoch": 8.102517985611511, "step": 9010 }, { "epoch": 8.111510791366907, "grad_norm": 0.33438119292259216, "learning_rate": 9.727383493357343e-05, "loss": 0.0201, "step": 9020 }, { "action_loss": 0.018046312034130096, "epoch": 8.111510791366907, "step": 9020 }, { "epoch": 8.120503597122303, "grad_norm": 0.4424048662185669, "learning_rate": 9.726485245093891e-05, "loss": 0.0166, "step": 9030 }, { "action_loss": 0.025741154327988625, "epoch": 8.120503597122303, "step": 9030 }, { "epoch": 8.129496402877697, "grad_norm": 0.28983020782470703, "learning_rate": 9.725585561049018e-05, "loss": 0.0144, "step": 9040 }, { "action_loss": 0.023447489365935326, "epoch": 8.129496402877697, "step": 9040 }, { "epoch": 8.138489208633093, "grad_norm": 0.27154919505119324, "learning_rate": 9.724684441496022e-05, "loss": 0.0179, "step": 9050 }, { "action_loss": 0.009132495149970055, "epoch": 8.138489208633093, "step": 9050 }, { "epoch": 8.147482014388489, "grad_norm": 0.24056293070316315, "learning_rate": 9.72378188670864e-05, "loss": 0.0152, "step": 9060 }, { "action_loss": 0.01092219352722168, "epoch": 8.147482014388489, "step": 9060 }, { "epoch": 8.156474820143885, "grad_norm": 0.23776815831661224, "learning_rate": 9.722877896961047e-05, "loss": 0.0148, "step": 9070 }, { "action_loss": 0.012706820853054523, "epoch": 8.156474820143885, "step": 9070 }, { "epoch": 8.16546762589928, "grad_norm": 0.21050557494163513, "learning_rate": 9.721972472527848e-05, "loss": 0.0145, "step": 9080 }, { "action_loss": 0.014120545238256454, "epoch": 8.16546762589928, "step": 9080 }, { "epoch": 8.174460431654676, "grad_norm": 0.3592073619365692, "learning_rate": 9.721065613684089e-05, "loss": 0.0153, "step": 9090 }, { "action_loss": 0.010376949794590473, "epoch": 8.174460431654676, "step": 9090 }, { "epoch": 8.183453237410072, "grad_norm": 0.3288829028606415, "learning_rate": 9.72015732070525e-05, "loss": 0.0138, "step": 9100 }, { "action_loss": 0.03899763152003288, "epoch": 8.183453237410072, "step": 9100 }, { "epoch": 8.192446043165468, "grad_norm": 0.3563796281814575, "learning_rate": 9.719247593867244e-05, "loss": 0.0138, "step": 9110 }, { "action_loss": 0.01771216280758381, "epoch": 8.192446043165468, "step": 9110 }, { "epoch": 8.201438848920864, "grad_norm": 0.27945080399513245, "learning_rate": 9.718336433446423e-05, "loss": 0.0157, "step": 9120 }, { "action_loss": 0.04624907672405243, "epoch": 8.201438848920864, "step": 9120 }, { "epoch": 8.21043165467626, "grad_norm": 0.2764577269554138, "learning_rate": 9.717423839719574e-05, "loss": 0.0169, "step": 9130 }, { "action_loss": 0.012257334776222706, "epoch": 8.21043165467626, "step": 9130 }, { "epoch": 8.219424460431656, "grad_norm": 0.22022369503974915, "learning_rate": 9.71650981296392e-05, "loss": 0.0143, "step": 9140 }, { "action_loss": 0.019806286320090294, "epoch": 8.219424460431656, "step": 9140 }, { "epoch": 8.22841726618705, "grad_norm": 0.2573086619377136, "learning_rate": 9.715594353457118e-05, "loss": 0.0146, "step": 9150 }, { "action_loss": 0.01263060700148344, "epoch": 8.22841726618705, "step": 9150 }, { "epoch": 8.237410071942445, "grad_norm": 0.2937777042388916, "learning_rate": 9.714677461477257e-05, "loss": 0.0166, "step": 9160 }, { "action_loss": 0.01666339300572872, "epoch": 8.237410071942445, "step": 9160 }, { "epoch": 8.246402877697841, "grad_norm": 0.3900650143623352, "learning_rate": 9.713759137302869e-05, "loss": 0.0157, "step": 9170 }, { "action_loss": 0.012830731458961964, "epoch": 8.246402877697841, "step": 9170 }, { "epoch": 8.255395683453237, "grad_norm": 0.3630370497703552, "learning_rate": 9.712839381212914e-05, "loss": 0.0137, "step": 9180 }, { "action_loss": 0.01060282438993454, "epoch": 8.255395683453237, "step": 9180 }, { "epoch": 8.264388489208633, "grad_norm": 0.34272822737693787, "learning_rate": 9.71191819348679e-05, "loss": 0.0145, "step": 9190 }, { "action_loss": 0.011788922362029552, "epoch": 8.264388489208633, "step": 9190 }, { "epoch": 8.273381294964029, "grad_norm": 0.4053408205509186, "learning_rate": 9.710995574404331e-05, "loss": 0.0173, "step": 9200 }, { "action_loss": 0.0198939498513937, "epoch": 8.273381294964029, "step": 9200 }, { "epoch": 8.282374100719425, "grad_norm": 0.3358152508735657, "learning_rate": 9.710071524245802e-05, "loss": 0.0204, "step": 9210 }, { "action_loss": 0.02193991094827652, "epoch": 8.282374100719425, "step": 9210 }, { "epoch": 8.29136690647482, "grad_norm": 0.22730772197246552, "learning_rate": 9.709146043291906e-05, "loss": 0.013, "step": 9220 }, { "action_loss": 0.007245900575071573, "epoch": 8.29136690647482, "step": 9220 }, { "epoch": 8.300359712230216, "grad_norm": 0.2646942436695099, "learning_rate": 9.70821913182378e-05, "loss": 0.017, "step": 9230 }, { "action_loss": 0.009462644346058369, "epoch": 8.300359712230216, "step": 9230 }, { "epoch": 8.309352517985612, "grad_norm": 0.27173912525177, "learning_rate": 9.707290790122995e-05, "loss": 0.0165, "step": 9240 }, { "action_loss": 0.005204794462770224, "epoch": 8.309352517985612, "step": 9240 }, { "epoch": 8.318345323741006, "grad_norm": 0.22455739974975586, "learning_rate": 9.706361018471557e-05, "loss": 0.0165, "step": 9250 }, { "action_loss": 0.02401755005121231, "epoch": 8.318345323741006, "step": 9250 }, { "epoch": 8.327338129496402, "grad_norm": 0.28450873494148254, "learning_rate": 9.705429817151906e-05, "loss": 0.0156, "step": 9260 }, { "action_loss": 0.02362896502017975, "epoch": 8.327338129496402, "step": 9260 }, { "epoch": 8.336330935251798, "grad_norm": 0.2598603665828705, "learning_rate": 9.704497186446917e-05, "loss": 0.0128, "step": 9270 }, { "action_loss": 0.014918888919055462, "epoch": 8.336330935251798, "step": 9270 }, { "epoch": 8.345323741007194, "grad_norm": 0.37709492444992065, "learning_rate": 9.703563126639896e-05, "loss": 0.0196, "step": 9280 }, { "action_loss": 0.012617439962923527, "epoch": 8.345323741007194, "step": 9280 }, { "epoch": 8.35431654676259, "grad_norm": 0.3176873028278351, "learning_rate": 9.70262763801459e-05, "loss": 0.015, "step": 9290 }, { "action_loss": 0.01851900853216648, "epoch": 8.35431654676259, "step": 9290 }, { "epoch": 8.363309352517986, "grad_norm": 0.44462892413139343, "learning_rate": 9.701690720855171e-05, "loss": 0.0195, "step": 9300 }, { "action_loss": 0.011418578214943409, "epoch": 8.363309352517986, "step": 9300 }, { "epoch": 8.372302158273381, "grad_norm": 0.3393900990486145, "learning_rate": 9.700752375446253e-05, "loss": 0.0159, "step": 9310 }, { "action_loss": 0.08849189430475235, "epoch": 8.372302158273381, "step": 9310 }, { "epoch": 8.381294964028777, "grad_norm": 0.327635794878006, "learning_rate": 9.69981260207288e-05, "loss": 0.0272, "step": 9320 }, { "action_loss": 0.021786659955978394, "epoch": 8.381294964028777, "step": 9320 }, { "epoch": 8.390287769784173, "grad_norm": 0.25665000081062317, "learning_rate": 9.698871401020529e-05, "loss": 0.0189, "step": 9330 }, { "action_loss": 0.008059107698500156, "epoch": 8.390287769784173, "step": 9330 }, { "epoch": 8.399280575539569, "grad_norm": 0.3338135778903961, "learning_rate": 9.697928772575112e-05, "loss": 0.0168, "step": 9340 }, { "action_loss": 0.017449961975216866, "epoch": 8.399280575539569, "step": 9340 }, { "epoch": 8.408273381294965, "grad_norm": 0.2615748643875122, "learning_rate": 9.696984717022976e-05, "loss": 0.013, "step": 9350 }, { "action_loss": 0.00916928332298994, "epoch": 8.408273381294965, "step": 9350 }, { "epoch": 8.417266187050359, "grad_norm": 0.22493122518062592, "learning_rate": 9.6960392346509e-05, "loss": 0.0122, "step": 9360 }, { "action_loss": 0.02703102119266987, "epoch": 8.417266187050359, "step": 9360 }, { "epoch": 8.426258992805755, "grad_norm": 0.22602978348731995, "learning_rate": 9.695092325746097e-05, "loss": 0.0248, "step": 9370 }, { "action_loss": 0.008890918456017971, "epoch": 8.426258992805755, "step": 9370 }, { "epoch": 8.43525179856115, "grad_norm": 0.2968832850456238, "learning_rate": 9.694143990596211e-05, "loss": 0.0126, "step": 9380 }, { "action_loss": 0.010460385121405125, "epoch": 8.43525179856115, "step": 9380 }, { "epoch": 8.444244604316546, "grad_norm": 0.29751864075660706, "learning_rate": 9.693194229489325e-05, "loss": 0.0251, "step": 9390 }, { "action_loss": 0.012921761721372604, "epoch": 8.444244604316546, "step": 9390 }, { "epoch": 8.453237410071942, "grad_norm": 0.2515977919101715, "learning_rate": 9.692243042713944e-05, "loss": 0.0136, "step": 9400 }, { "action_loss": 0.030738914385437965, "epoch": 8.453237410071942, "step": 9400 }, { "epoch": 8.462230215827338, "grad_norm": 0.2143365889787674, "learning_rate": 9.691290430559022e-05, "loss": 0.0141, "step": 9410 }, { "action_loss": 0.020962245762348175, "epoch": 8.462230215827338, "step": 9410 }, { "epoch": 8.471223021582734, "grad_norm": 0.23615248501300812, "learning_rate": 9.690336393313932e-05, "loss": 0.0139, "step": 9420 }, { "action_loss": 0.013244337402284145, "epoch": 8.471223021582734, "step": 9420 }, { "epoch": 8.48021582733813, "grad_norm": 0.2536628544330597, "learning_rate": 9.689380931268487e-05, "loss": 0.0154, "step": 9430 }, { "action_loss": 0.013822562992572784, "epoch": 8.48021582733813, "step": 9430 }, { "epoch": 8.489208633093526, "grad_norm": 0.22932858765125275, "learning_rate": 9.688424044712932e-05, "loss": 0.0133, "step": 9440 }, { "action_loss": 0.008303130976855755, "epoch": 8.489208633093526, "step": 9440 }, { "epoch": 8.498201438848922, "grad_norm": 0.2818961441516876, "learning_rate": 9.687465733937942e-05, "loss": 0.0194, "step": 9450 }, { "action_loss": 0.010462905280292034, "epoch": 8.498201438848922, "step": 9450 }, { "epoch": 8.507194244604317, "grad_norm": 0.42326608300209045, "learning_rate": 9.686505999234627e-05, "loss": 0.0129, "step": 9460 }, { "action_loss": 0.017630212008953094, "epoch": 8.507194244604317, "step": 9460 }, { "epoch": 8.516187050359711, "grad_norm": 0.2871032953262329, "learning_rate": 9.685544840894529e-05, "loss": 0.014, "step": 9470 }, { "action_loss": 0.012748775072395802, "epoch": 8.516187050359711, "step": 9470 }, { "epoch": 8.525179856115107, "grad_norm": 0.23615138232707977, "learning_rate": 9.684582259209624e-05, "loss": 0.0149, "step": 9480 }, { "action_loss": 0.01622861810028553, "epoch": 8.525179856115107, "step": 9480 }, { "epoch": 8.534172661870503, "grad_norm": 0.2479349821805954, "learning_rate": 9.683618254472317e-05, "loss": 0.0178, "step": 9490 }, { "action_loss": 0.015529453754425049, "epoch": 8.534172661870503, "step": 9490 }, { "epoch": 8.543165467625899, "grad_norm": 0.4541880190372467, "learning_rate": 9.682652826975449e-05, "loss": 0.0176, "step": 9500 }, { "action_loss": 0.034114737063646317, "epoch": 8.543165467625899, "step": 9500 }, { "epoch": 8.552158273381295, "grad_norm": 0.20144976675510406, "learning_rate": 9.681685977012291e-05, "loss": 0.017, "step": 9510 }, { "action_loss": 0.014896146953105927, "epoch": 8.552158273381295, "step": 9510 }, { "epoch": 8.56115107913669, "grad_norm": 0.28859594464302063, "learning_rate": 9.680717704876546e-05, "loss": 0.0142, "step": 9520 }, { "action_loss": 0.025107121095061302, "epoch": 8.56115107913669, "step": 9520 }, { "epoch": 8.570143884892087, "grad_norm": 0.38482171297073364, "learning_rate": 9.679748010862349e-05, "loss": 0.0163, "step": 9530 }, { "action_loss": 0.013651569373905659, "epoch": 8.570143884892087, "step": 9530 }, { "epoch": 8.579136690647482, "grad_norm": 0.27759334444999695, "learning_rate": 9.678776895264267e-05, "loss": 0.0158, "step": 9540 }, { "action_loss": 0.016578437760472298, "epoch": 8.579136690647482, "step": 9540 }, { "epoch": 8.588129496402878, "grad_norm": 0.2568219304084778, "learning_rate": 9.6778043583773e-05, "loss": 0.0183, "step": 9550 }, { "action_loss": 0.01297936961054802, "epoch": 8.588129496402878, "step": 9550 }, { "epoch": 8.597122302158274, "grad_norm": 0.22143836319446564, "learning_rate": 9.67683040049688e-05, "loss": 0.0122, "step": 9560 }, { "action_loss": 0.015430773608386517, "epoch": 8.597122302158274, "step": 9560 }, { "epoch": 8.60611510791367, "grad_norm": 0.2309945523738861, "learning_rate": 9.675855021918869e-05, "loss": 0.017, "step": 9570 }, { "action_loss": 0.006331531796604395, "epoch": 8.60611510791367, "step": 9570 }, { "epoch": 8.615107913669064, "grad_norm": 0.26381802558898926, "learning_rate": 9.674878222939561e-05, "loss": 0.013, "step": 9580 }, { "action_loss": 0.005363281816244125, "epoch": 8.615107913669064, "step": 9580 }, { "epoch": 8.62410071942446, "grad_norm": 0.33772361278533936, "learning_rate": 9.673900003855681e-05, "loss": 0.014, "step": 9590 }, { "action_loss": 0.013766467571258545, "epoch": 8.62410071942446, "step": 9590 }, { "epoch": 8.633093525179856, "grad_norm": 0.2226131558418274, "learning_rate": 9.672920364964389e-05, "loss": 0.0211, "step": 9600 }, { "action_loss": 0.023220689967274666, "epoch": 8.633093525179856, "step": 9600 }, { "epoch": 8.642086330935252, "grad_norm": 0.26948854327201843, "learning_rate": 9.671939306563269e-05, "loss": 0.0126, "step": 9610 }, { "action_loss": 0.015093029476702213, "epoch": 8.642086330935252, "step": 9610 }, { "epoch": 8.651079136690647, "grad_norm": 0.2865178883075714, "learning_rate": 9.670956828950345e-05, "loss": 0.0165, "step": 9620 }, { "action_loss": 0.01594362035393715, "epoch": 8.651079136690647, "step": 9620 }, { "epoch": 8.660071942446043, "grad_norm": 0.2638600170612335, "learning_rate": 9.669972932424065e-05, "loss": 0.0156, "step": 9630 }, { "action_loss": 0.008138217031955719, "epoch": 8.660071942446043, "step": 9630 }, { "epoch": 8.66906474820144, "grad_norm": 0.2745717763900757, "learning_rate": 9.668987617283312e-05, "loss": 0.0117, "step": 9640 }, { "action_loss": 0.013805876486003399, "epoch": 8.66906474820144, "step": 9640 }, { "epoch": 8.678057553956835, "grad_norm": 0.3509348928928375, "learning_rate": 9.668000883827397e-05, "loss": 0.0184, "step": 9650 }, { "action_loss": 0.010832361876964569, "epoch": 8.678057553956835, "step": 9650 }, { "epoch": 8.68705035971223, "grad_norm": 0.37665218114852905, "learning_rate": 9.667012732356067e-05, "loss": 0.0173, "step": 9660 }, { "action_loss": 0.013185878284275532, "epoch": 8.68705035971223, "step": 9660 }, { "epoch": 8.696043165467627, "grad_norm": 0.2939209043979645, "learning_rate": 9.666023163169493e-05, "loss": 0.0289, "step": 9670 }, { "action_loss": 0.02575361169874668, "epoch": 8.696043165467627, "step": 9670 }, { "epoch": 8.70503597122302, "grad_norm": 0.22494417428970337, "learning_rate": 9.665032176568281e-05, "loss": 0.016, "step": 9680 }, { "action_loss": 0.01397350337356329, "epoch": 8.70503597122302, "step": 9680 }, { "epoch": 8.714028776978417, "grad_norm": 0.2251196801662445, "learning_rate": 9.664039772853469e-05, "loss": 0.018, "step": 9690 }, { "action_loss": 0.01632741279900074, "epoch": 8.714028776978417, "step": 9690 }, { "epoch": 8.723021582733812, "grad_norm": 0.3009944558143616, "learning_rate": 9.663045952326518e-05, "loss": 0.0159, "step": 9700 }, { "action_loss": 0.01399772148579359, "epoch": 8.723021582733812, "step": 9700 }, { "epoch": 8.732014388489208, "grad_norm": 0.23193494975566864, "learning_rate": 9.662050715289328e-05, "loss": 0.0133, "step": 9710 }, { "action_loss": 0.013377104885876179, "epoch": 8.732014388489208, "step": 9710 }, { "epoch": 8.741007194244604, "grad_norm": 0.21087172627449036, "learning_rate": 9.661054062044226e-05, "loss": 0.0214, "step": 9720 }, { "action_loss": 0.01453069131821394, "epoch": 8.741007194244604, "step": 9720 }, { "epoch": 8.75, "grad_norm": 0.21956925094127655, "learning_rate": 9.660055992893968e-05, "loss": 0.0122, "step": 9730 }, { "action_loss": 0.014812623150646687, "epoch": 8.75, "step": 9730 }, { "epoch": 8.758992805755396, "grad_norm": 0.23387178778648376, "learning_rate": 9.659056508141739e-05, "loss": 0.013, "step": 9740 }, { "action_loss": 0.013273040764033794, "epoch": 8.758992805755396, "step": 9740 }, { "epoch": 8.767985611510792, "grad_norm": 0.25471389293670654, "learning_rate": 9.658055608091161e-05, "loss": 0.0192, "step": 9750 }, { "action_loss": 0.01062085572630167, "epoch": 8.767985611510792, "step": 9750 }, { "epoch": 8.776978417266188, "grad_norm": 0.2911389172077179, "learning_rate": 9.657053293046276e-05, "loss": 0.0166, "step": 9760 }, { "action_loss": 0.016881952062249184, "epoch": 8.776978417266188, "step": 9760 }, { "epoch": 8.785971223021583, "grad_norm": 0.264801025390625, "learning_rate": 9.656049563311564e-05, "loss": 0.0126, "step": 9770 }, { "action_loss": 0.01715932972729206, "epoch": 8.785971223021583, "step": 9770 }, { "epoch": 8.79496402877698, "grad_norm": 0.32437360286712646, "learning_rate": 9.655044419191929e-05, "loss": 0.0179, "step": 9780 }, { "action_loss": 0.007909084670245647, "epoch": 8.79496402877698, "step": 9780 }, { "epoch": 8.803956834532373, "grad_norm": 0.256180077791214, "learning_rate": 9.654037860992711e-05, "loss": 0.0148, "step": 9790 }, { "action_loss": 0.022998793050646782, "epoch": 8.803956834532373, "step": 9790 }, { "epoch": 8.81294964028777, "grad_norm": 0.3321721851825714, "learning_rate": 9.653029889019672e-05, "loss": 0.0193, "step": 9800 }, { "action_loss": 0.01654655486345291, "epoch": 8.81294964028777, "step": 9800 }, { "epoch": 8.821942446043165, "grad_norm": 0.202853262424469, "learning_rate": 9.65202050357901e-05, "loss": 0.0155, "step": 9810 }, { "action_loss": 0.034107279032468796, "epoch": 8.821942446043165, "step": 9810 }, { "epoch": 8.83093525179856, "grad_norm": 0.24163658916950226, "learning_rate": 9.651009704977347e-05, "loss": 0.0153, "step": 9820 }, { "action_loss": 0.006904891226440668, "epoch": 8.83093525179856, "step": 9820 }, { "epoch": 8.839928057553957, "grad_norm": 0.3294869363307953, "learning_rate": 9.649997493521738e-05, "loss": 0.0135, "step": 9830 }, { "action_loss": 0.008740592747926712, "epoch": 8.839928057553957, "step": 9830 }, { "epoch": 8.848920863309353, "grad_norm": 0.2725706398487091, "learning_rate": 9.64898386951967e-05, "loss": 0.0136, "step": 9840 }, { "action_loss": 0.019350174814462662, "epoch": 8.848920863309353, "step": 9840 }, { "epoch": 8.857913669064748, "grad_norm": 0.26930728554725647, "learning_rate": 9.647968833279049e-05, "loss": 0.0155, "step": 9850 }, { "action_loss": 0.013404577039182186, "epoch": 8.857913669064748, "step": 9850 }, { "epoch": 8.866906474820144, "grad_norm": 0.32310450077056885, "learning_rate": 9.646952385108218e-05, "loss": 0.0165, "step": 9860 }, { "action_loss": 0.009574498981237411, "epoch": 8.866906474820144, "step": 9860 }, { "epoch": 8.87589928057554, "grad_norm": 0.21517810225486755, "learning_rate": 9.645934525315951e-05, "loss": 0.0142, "step": 9870 }, { "action_loss": 0.003967047203332186, "epoch": 8.87589928057554, "step": 9870 }, { "epoch": 8.884892086330936, "grad_norm": 0.3131691813468933, "learning_rate": 9.644915254211442e-05, "loss": 0.0137, "step": 9880 }, { "action_loss": 0.012738026678562164, "epoch": 8.884892086330936, "step": 9880 }, { "epoch": 8.89388489208633, "grad_norm": 0.2620600759983063, "learning_rate": 9.643894572104321e-05, "loss": 0.0147, "step": 9890 }, { "action_loss": 0.005503382068127394, "epoch": 8.89388489208633, "step": 9890 }, { "epoch": 8.902877697841726, "grad_norm": 0.34518343210220337, "learning_rate": 9.642872479304644e-05, "loss": 0.017, "step": 9900 }, { "action_loss": 0.01732267625629902, "epoch": 8.902877697841726, "step": 9900 }, { "epoch": 8.911870503597122, "grad_norm": 0.31183403730392456, "learning_rate": 9.641848976122895e-05, "loss": 0.0149, "step": 9910 }, { "action_loss": 0.016506114974617958, "epoch": 8.911870503597122, "step": 9910 }, { "epoch": 8.920863309352518, "grad_norm": 0.6225135922431946, "learning_rate": 9.64082406286999e-05, "loss": 0.0119, "step": 9920 }, { "action_loss": 0.009774325415492058, "epoch": 8.920863309352518, "step": 9920 }, { "epoch": 8.929856115107913, "grad_norm": 0.41228440403938293, "learning_rate": 9.639797739857269e-05, "loss": 0.0231, "step": 9930 }, { "action_loss": 0.008811057545244694, "epoch": 8.929856115107913, "step": 9930 }, { "epoch": 8.93884892086331, "grad_norm": 0.2617577910423279, "learning_rate": 9.638770007396498e-05, "loss": 0.0168, "step": 9940 }, { "action_loss": 0.007305547129362822, "epoch": 8.93884892086331, "step": 9940 }, { "epoch": 8.947841726618705, "grad_norm": 0.26526108384132385, "learning_rate": 9.63774086579988e-05, "loss": 0.0226, "step": 9950 }, { "action_loss": 0.020283540710806847, "epoch": 8.947841726618705, "step": 9950 }, { "epoch": 8.956834532374101, "grad_norm": 0.22789669036865234, "learning_rate": 9.63671031538004e-05, "loss": 0.0136, "step": 9960 }, { "action_loss": 0.010536202229559422, "epoch": 8.956834532374101, "step": 9960 }, { "epoch": 8.965827338129497, "grad_norm": 0.27217745780944824, "learning_rate": 9.635678356450031e-05, "loss": 0.0148, "step": 9970 }, { "action_loss": 0.006112740840762854, "epoch": 8.965827338129497, "step": 9970 }, { "epoch": 8.974820143884893, "grad_norm": 0.29234907031059265, "learning_rate": 9.634644989323336e-05, "loss": 0.0158, "step": 9980 }, { "action_loss": 0.01752369850873947, "epoch": 8.974820143884893, "step": 9980 }, { "epoch": 8.983812949640289, "grad_norm": 0.41500887274742126, "learning_rate": 9.633610214313861e-05, "loss": 0.0169, "step": 9990 }, { "action_loss": 0.00919530913233757, "epoch": 8.983812949640289, "step": 9990 }, { "epoch": 8.992805755395683, "grad_norm": 0.3583548963069916, "learning_rate": 9.632574031735951e-05, "loss": 0.0179, "step": 10000 }, { "action_loss": 0.009599669836461544, "epoch": 8.992805755395683, "step": 10000 }, { "epoch": 9.001798561151078, "grad_norm": 0.35170385241508484, "learning_rate": 9.631536441904364e-05, "loss": 0.014, "step": 10010 }, { "action_loss": 0.015244926325976849, "epoch": 9.001798561151078, "step": 10010 }, { "epoch": 9.010791366906474, "grad_norm": 0.3339064419269562, "learning_rate": 9.630497445134293e-05, "loss": 0.0212, "step": 10020 }, { "action_loss": 0.012066683731973171, "epoch": 9.010791366906474, "step": 10020 }, { "epoch": 9.01978417266187, "grad_norm": 0.23217995464801788, "learning_rate": 9.62945704174136e-05, "loss": 0.0155, "step": 10030 }, { "action_loss": 0.015912296250462532, "epoch": 9.01978417266187, "step": 10030 }, { "epoch": 9.028776978417266, "grad_norm": 0.18029797077178955, "learning_rate": 9.628415232041612e-05, "loss": 0.0174, "step": 10040 }, { "action_loss": 0.03644023835659027, "epoch": 9.028776978417266, "step": 10040 }, { "epoch": 9.037769784172662, "grad_norm": 0.3846881091594696, "learning_rate": 9.627372016351524e-05, "loss": 0.0143, "step": 10050 }, { "action_loss": 0.012707076035439968, "epoch": 9.037769784172662, "step": 10050 }, { "epoch": 9.046762589928058, "grad_norm": 0.2576751708984375, "learning_rate": 9.626327394987995e-05, "loss": 0.0147, "step": 10060 }, { "action_loss": 0.007306512910872698, "epoch": 9.046762589928058, "step": 10060 }, { "epoch": 9.055755395683454, "grad_norm": 0.3153473734855652, "learning_rate": 9.625281368268355e-05, "loss": 0.0132, "step": 10070 }, { "action_loss": 0.026030780747532845, "epoch": 9.055755395683454, "step": 10070 }, { "epoch": 9.06474820143885, "grad_norm": 0.18907904624938965, "learning_rate": 9.624233936510357e-05, "loss": 0.0148, "step": 10080 }, { "action_loss": 0.02066877670586109, "epoch": 9.06474820143885, "step": 10080 }, { "epoch": 9.073741007194245, "grad_norm": 0.3182009160518646, "learning_rate": 9.623185100032187e-05, "loss": 0.0193, "step": 10090 }, { "action_loss": 0.016197625547647476, "epoch": 9.073741007194245, "step": 10090 }, { "epoch": 9.082733812949641, "grad_norm": 0.27858680486679077, "learning_rate": 9.62213485915245e-05, "loss": 0.0125, "step": 10100 }, { "action_loss": 0.008292976766824722, "epoch": 9.082733812949641, "step": 10100 }, { "epoch": 9.091726618705035, "grad_norm": 0.16860298812389374, "learning_rate": 9.621083214190186e-05, "loss": 0.0127, "step": 10110 }, { "action_loss": 0.00814994890242815, "epoch": 9.091726618705035, "step": 10110 }, { "epoch": 9.100719424460431, "grad_norm": 0.282276451587677, "learning_rate": 9.62003016546485e-05, "loss": 0.0179, "step": 10120 }, { "action_loss": 0.01729043386876583, "epoch": 9.100719424460431, "step": 10120 }, { "epoch": 9.109712230215827, "grad_norm": 0.27702537178993225, "learning_rate": 9.618975713296339e-05, "loss": 0.0128, "step": 10130 }, { "action_loss": 0.007735930848866701, "epoch": 9.109712230215827, "step": 10130 }, { "epoch": 9.118705035971223, "grad_norm": 0.2035840004682541, "learning_rate": 9.61791985800496e-05, "loss": 0.0107, "step": 10140 }, { "action_loss": 0.017944438382983208, "epoch": 9.118705035971223, "step": 10140 }, { "epoch": 9.127697841726619, "grad_norm": 0.21746709942817688, "learning_rate": 9.616862599911458e-05, "loss": 0.0154, "step": 10150 }, { "action_loss": 0.01785624772310257, "epoch": 9.127697841726619, "step": 10150 }, { "epoch": 9.136690647482014, "grad_norm": 0.26193687319755554, "learning_rate": 9.615803939337e-05, "loss": 0.0161, "step": 10160 }, { "action_loss": 0.017949111759662628, "epoch": 9.136690647482014, "step": 10160 }, { "epoch": 9.14568345323741, "grad_norm": 0.22704699635505676, "learning_rate": 9.614743876603178e-05, "loss": 0.014, "step": 10170 }, { "action_loss": 0.022887833416461945, "epoch": 9.14568345323741, "step": 10170 }, { "epoch": 9.154676258992806, "grad_norm": 0.14701275527477264, "learning_rate": 9.613682412032013e-05, "loss": 0.0158, "step": 10180 }, { "action_loss": 0.021528875455260277, "epoch": 9.154676258992806, "step": 10180 }, { "epoch": 9.163669064748202, "grad_norm": 0.278414249420166, "learning_rate": 9.612619545945947e-05, "loss": 0.0207, "step": 10190 }, { "action_loss": 0.008918863721191883, "epoch": 9.163669064748202, "step": 10190 }, { "epoch": 9.172661870503598, "grad_norm": 0.2666047513484955, "learning_rate": 9.611555278667852e-05, "loss": 0.0133, "step": 10200 }, { "action_loss": 0.019539475440979004, "epoch": 9.172661870503598, "step": 10200 }, { "epoch": 9.181654676258994, "grad_norm": 0.2201440930366516, "learning_rate": 9.610489610521024e-05, "loss": 0.0141, "step": 10210 }, { "action_loss": 0.010602178983390331, "epoch": 9.181654676258994, "step": 10210 }, { "epoch": 9.190647482014388, "grad_norm": 0.31834548711776733, "learning_rate": 9.609422541829187e-05, "loss": 0.0152, "step": 10220 }, { "action_loss": 0.014314155094325542, "epoch": 9.190647482014388, "step": 10220 }, { "epoch": 9.199640287769784, "grad_norm": 0.23527170717716217, "learning_rate": 9.608354072916486e-05, "loss": 0.0205, "step": 10230 }, { "action_loss": 0.014118176884949207, "epoch": 9.199640287769784, "step": 10230 }, { "epoch": 9.20863309352518, "grad_norm": 0.25004497170448303, "learning_rate": 9.607284204107493e-05, "loss": 0.0162, "step": 10240 }, { "action_loss": 0.006921518128365278, "epoch": 9.20863309352518, "step": 10240 }, { "epoch": 9.217625899280575, "grad_norm": 0.28509363532066345, "learning_rate": 9.606212935727208e-05, "loss": 0.0108, "step": 10250 }, { "action_loss": 0.010754439048469067, "epoch": 9.217625899280575, "step": 10250 }, { "epoch": 9.226618705035971, "grad_norm": 0.24086447060108185, "learning_rate": 9.605140268101052e-05, "loss": 0.0135, "step": 10260 }, { "action_loss": 0.006147764157503843, "epoch": 9.226618705035971, "step": 10260 }, { "epoch": 9.235611510791367, "grad_norm": 0.30929550528526306, "learning_rate": 9.604066201554875e-05, "loss": 0.0125, "step": 10270 }, { "action_loss": 0.013402261771261692, "epoch": 9.235611510791367, "step": 10270 }, { "epoch": 9.244604316546763, "grad_norm": 0.36283227801322937, "learning_rate": 9.60299073641495e-05, "loss": 0.013, "step": 10280 }, { "action_loss": 0.009601406753063202, "epoch": 9.244604316546763, "step": 10280 }, { "epoch": 9.253597122302159, "grad_norm": 0.20909905433654785, "learning_rate": 9.601913873007974e-05, "loss": 0.0101, "step": 10290 }, { "action_loss": 0.010845190845429897, "epoch": 9.253597122302159, "step": 10290 }, { "epoch": 9.262589928057555, "grad_norm": 0.15325471758842468, "learning_rate": 9.60083561166107e-05, "loss": 0.0128, "step": 10300 }, { "action_loss": 0.005100726615637541, "epoch": 9.262589928057555, "step": 10300 }, { "epoch": 9.27158273381295, "grad_norm": 0.14769816398620605, "learning_rate": 9.599755952701783e-05, "loss": 0.0144, "step": 10310 }, { "action_loss": 0.013030854053795338, "epoch": 9.27158273381295, "step": 10310 }, { "epoch": 9.280575539568344, "grad_norm": 0.19538357853889465, "learning_rate": 9.598674896458089e-05, "loss": 0.0138, "step": 10320 }, { "action_loss": 0.009673281572759151, "epoch": 9.280575539568344, "step": 10320 }, { "epoch": 9.28956834532374, "grad_norm": 0.17335225641727448, "learning_rate": 9.597592443258383e-05, "loss": 0.0099, "step": 10330 }, { "action_loss": 0.006605807226151228, "epoch": 9.28956834532374, "step": 10330 }, { "epoch": 9.298561151079136, "grad_norm": 0.20707285404205322, "learning_rate": 9.596508593431483e-05, "loss": 0.0092, "step": 10340 }, { "action_loss": 0.0064182891510427, "epoch": 9.298561151079136, "step": 10340 }, { "epoch": 9.307553956834532, "grad_norm": 0.20443663001060486, "learning_rate": 9.59542334730664e-05, "loss": 0.0135, "step": 10350 }, { "action_loss": 0.011455737985670567, "epoch": 9.307553956834532, "step": 10350 }, { "epoch": 9.316546762589928, "grad_norm": 0.2243579477071762, "learning_rate": 9.594336705213516e-05, "loss": 0.0133, "step": 10360 }, { "action_loss": 0.010342369787395, "epoch": 9.316546762589928, "step": 10360 }, { "epoch": 9.325539568345324, "grad_norm": 0.27718082070350647, "learning_rate": 9.593248667482208e-05, "loss": 0.0161, "step": 10370 }, { "action_loss": 0.01788165047764778, "epoch": 9.325539568345324, "step": 10370 }, { "epoch": 9.33453237410072, "grad_norm": 0.3474188446998596, "learning_rate": 9.592159234443233e-05, "loss": 0.0169, "step": 10380 }, { "action_loss": 0.01235173363238573, "epoch": 9.33453237410072, "step": 10380 }, { "epoch": 9.343525179856115, "grad_norm": 0.2942991256713867, "learning_rate": 9.59106840642753e-05, "loss": 0.0183, "step": 10390 }, { "action_loss": 0.011402343399822712, "epoch": 9.343525179856115, "step": 10390 }, { "epoch": 9.352517985611511, "grad_norm": 0.2738039493560791, "learning_rate": 9.589976183766467e-05, "loss": 0.0161, "step": 10400 }, { "action_loss": 0.009285050444304943, "epoch": 9.352517985611511, "step": 10400 }, { "epoch": 9.361510791366907, "grad_norm": 0.1949215531349182, "learning_rate": 9.58888256679183e-05, "loss": 0.0147, "step": 10410 }, { "action_loss": 0.009130876511335373, "epoch": 9.361510791366907, "step": 10410 }, { "epoch": 9.370503597122303, "grad_norm": 0.40858960151672363, "learning_rate": 9.587787555835832e-05, "loss": 0.0135, "step": 10420 }, { "action_loss": 0.01510347705334425, "epoch": 9.370503597122303, "step": 10420 }, { "epoch": 9.379496402877697, "grad_norm": 0.20046822726726532, "learning_rate": 9.586691151231107e-05, "loss": 0.0146, "step": 10430 }, { "action_loss": 0.010212741792201996, "epoch": 9.379496402877697, "step": 10430 }, { "epoch": 9.388489208633093, "grad_norm": 0.2515557110309601, "learning_rate": 9.585593353310715e-05, "loss": 0.0155, "step": 10440 }, { "action_loss": 0.010706084780395031, "epoch": 9.388489208633093, "step": 10440 }, { "epoch": 9.397482014388489, "grad_norm": 0.30812761187553406, "learning_rate": 9.58449416240814e-05, "loss": 0.0165, "step": 10450 }, { "action_loss": 0.008494461886584759, "epoch": 9.397482014388489, "step": 10450 }, { "epoch": 9.406474820143885, "grad_norm": 0.2670956254005432, "learning_rate": 9.583393578857283e-05, "loss": 0.0143, "step": 10460 }, { "action_loss": 0.008812200278043747, "epoch": 9.406474820143885, "step": 10460 }, { "epoch": 9.41546762589928, "grad_norm": 0.20249950885772705, "learning_rate": 9.582291602992474e-05, "loss": 0.0125, "step": 10470 }, { "action_loss": 0.0065284110605716705, "epoch": 9.41546762589928, "step": 10470 }, { "epoch": 9.424460431654676, "grad_norm": 0.22114431858062744, "learning_rate": 9.581188235148466e-05, "loss": 0.0135, "step": 10480 }, { "action_loss": 0.005766628310084343, "epoch": 9.424460431654676, "step": 10480 }, { "epoch": 9.433453237410072, "grad_norm": 0.20645494759082794, "learning_rate": 9.58008347566043e-05, "loss": 0.0104, "step": 10490 }, { "action_loss": 0.01678459905087948, "epoch": 9.433453237410072, "step": 10490 }, { "epoch": 9.442446043165468, "grad_norm": 0.25247183442115784, "learning_rate": 9.578977324863965e-05, "loss": 0.0125, "step": 10500 }, { "action_loss": 0.006976045202463865, "epoch": 9.442446043165468, "step": 10500 }, { "epoch": 9.451438848920864, "grad_norm": 0.24960513412952423, "learning_rate": 9.577869783095089e-05, "loss": 0.0141, "step": 10510 }, { "action_loss": 0.01977880485355854, "epoch": 9.451438848920864, "step": 10510 }, { "epoch": 9.46043165467626, "grad_norm": 0.22323565185070038, "learning_rate": 9.576760850690245e-05, "loss": 0.0146, "step": 10520 }, { "action_loss": 0.016977310180664062, "epoch": 9.46043165467626, "step": 10520 }, { "epoch": 9.469424460431656, "grad_norm": 0.3006635308265686, "learning_rate": 9.575650527986298e-05, "loss": 0.0154, "step": 10530 }, { "action_loss": 0.0373622290790081, "epoch": 9.469424460431656, "step": 10530 }, { "epoch": 9.47841726618705, "grad_norm": 0.2509262263774872, "learning_rate": 9.574538815320531e-05, "loss": 0.0127, "step": 10540 }, { "action_loss": 0.010132753290235996, "epoch": 9.47841726618705, "step": 10540 }, { "epoch": 9.487410071942445, "grad_norm": 0.25646358728408813, "learning_rate": 9.573425713030656e-05, "loss": 0.0164, "step": 10550 }, { "action_loss": 0.007850833237171173, "epoch": 9.487410071942445, "step": 10550 }, { "epoch": 9.496402877697841, "grad_norm": 0.23576496541500092, "learning_rate": 9.572311221454806e-05, "loss": 0.0121, "step": 10560 }, { "action_loss": 0.012952298857271671, "epoch": 9.496402877697841, "step": 10560 }, { "epoch": 9.505395683453237, "grad_norm": 0.20538116991519928, "learning_rate": 9.57119534093153e-05, "loss": 0.017, "step": 10570 }, { "action_loss": 0.011436159722507, "epoch": 9.505395683453237, "step": 10570 }, { "epoch": 9.514388489208633, "grad_norm": 0.2070694863796234, "learning_rate": 9.570078071799806e-05, "loss": 0.0128, "step": 10580 }, { "action_loss": 0.016361458227038383, "epoch": 9.514388489208633, "step": 10580 }, { "epoch": 9.523381294964029, "grad_norm": 0.15874658524990082, "learning_rate": 9.568959414399028e-05, "loss": 0.0134, "step": 10590 }, { "action_loss": 0.023342693224549294, "epoch": 9.523381294964029, "step": 10590 }, { "epoch": 9.532374100719425, "grad_norm": 0.21971538662910461, "learning_rate": 9.567839369069018e-05, "loss": 0.0137, "step": 10600 }, { "action_loss": 0.007612439338117838, "epoch": 9.532374100719425, "step": 10600 }, { "epoch": 9.54136690647482, "grad_norm": 0.19644223153591156, "learning_rate": 9.566717936150013e-05, "loss": 0.0141, "step": 10610 }, { "action_loss": 0.02117706835269928, "epoch": 9.54136690647482, "step": 10610 }, { "epoch": 9.550359712230216, "grad_norm": 0.27308031916618347, "learning_rate": 9.565595115982678e-05, "loss": 0.0124, "step": 10620 }, { "action_loss": 0.01964186131954193, "epoch": 9.550359712230216, "step": 10620 }, { "epoch": 9.559352517985612, "grad_norm": 0.3006889224052429, "learning_rate": 9.564470908908094e-05, "loss": 0.0145, "step": 10630 }, { "action_loss": 0.020930489525198936, "epoch": 9.559352517985612, "step": 10630 }, { "epoch": 9.568345323741006, "grad_norm": 0.19572119414806366, "learning_rate": 9.563345315267764e-05, "loss": 0.0151, "step": 10640 }, { "action_loss": 0.02211488038301468, "epoch": 9.568345323741006, "step": 10640 }, { "epoch": 9.577338129496402, "grad_norm": 0.2808430790901184, "learning_rate": 9.562218335403616e-05, "loss": 0.0126, "step": 10650 }, { "action_loss": 0.01062668114900589, "epoch": 9.577338129496402, "step": 10650 }, { "epoch": 9.586330935251798, "grad_norm": 0.2696729898452759, "learning_rate": 9.561089969657999e-05, "loss": 0.0122, "step": 10660 }, { "action_loss": 0.015711691230535507, "epoch": 9.586330935251798, "step": 10660 }, { "epoch": 9.595323741007194, "grad_norm": 0.1964394450187683, "learning_rate": 9.559960218373673e-05, "loss": 0.0152, "step": 10670 }, { "action_loss": 0.02040705643594265, "epoch": 9.595323741007194, "step": 10670 }, { "epoch": 9.60431654676259, "grad_norm": 0.3290519118309021, "learning_rate": 9.558829081893836e-05, "loss": 0.0142, "step": 10680 }, { "action_loss": 0.01704164408147335, "epoch": 9.60431654676259, "step": 10680 }, { "epoch": 9.613309352517986, "grad_norm": 0.23969392478466034, "learning_rate": 9.55769656056209e-05, "loss": 0.0134, "step": 10690 }, { "action_loss": 0.013689651153981686, "epoch": 9.613309352517986, "step": 10690 }, { "epoch": 9.622302158273381, "grad_norm": 0.26956436038017273, "learning_rate": 9.556562654722469e-05, "loss": 0.0141, "step": 10700 }, { "action_loss": 0.008389039896428585, "epoch": 9.622302158273381, "step": 10700 }, { "epoch": 9.631294964028777, "grad_norm": 0.22657468914985657, "learning_rate": 9.555427364719422e-05, "loss": 0.0145, "step": 10710 }, { "action_loss": 0.015171900391578674, "epoch": 9.631294964028777, "step": 10710 }, { "epoch": 9.640287769784173, "grad_norm": 0.27602243423461914, "learning_rate": 9.55429069089782e-05, "loss": 0.0112, "step": 10720 }, { "action_loss": 0.017615847289562225, "epoch": 9.640287769784173, "step": 10720 }, { "epoch": 9.649280575539569, "grad_norm": 0.24841587245464325, "learning_rate": 9.553152633602956e-05, "loss": 0.0151, "step": 10730 }, { "action_loss": 0.005979012697935104, "epoch": 9.649280575539569, "step": 10730 }, { "epoch": 9.658273381294965, "grad_norm": 0.2623181939125061, "learning_rate": 9.552013193180543e-05, "loss": 0.0147, "step": 10740 }, { "action_loss": 0.005517503246665001, "epoch": 9.658273381294965, "step": 10740 }, { "epoch": 9.667266187050359, "grad_norm": 0.24899061024188995, "learning_rate": 9.550872369976707e-05, "loss": 0.0133, "step": 10750 }, { "action_loss": 0.014927412383258343, "epoch": 9.667266187050359, "step": 10750 }, { "epoch": 9.676258992805755, "grad_norm": 0.25481465458869934, "learning_rate": 9.549730164338007e-05, "loss": 0.0135, "step": 10760 }, { "action_loss": 0.012329007498919964, "epoch": 9.676258992805755, "step": 10760 }, { "epoch": 9.68525179856115, "grad_norm": 0.25208526849746704, "learning_rate": 9.548586576611408e-05, "loss": 0.0114, "step": 10770 }, { "action_loss": 0.014322844333946705, "epoch": 9.68525179856115, "step": 10770 }, { "epoch": 9.694244604316546, "grad_norm": 0.27076542377471924, "learning_rate": 9.54744160714431e-05, "loss": 0.0149, "step": 10780 }, { "action_loss": 0.011962584219872952, "epoch": 9.694244604316546, "step": 10780 }, { "epoch": 9.703237410071942, "grad_norm": 0.2521994709968567, "learning_rate": 9.546295256284516e-05, "loss": 0.0144, "step": 10790 }, { "action_loss": 0.026935750618577003, "epoch": 9.703237410071942, "step": 10790 }, { "epoch": 9.712230215827338, "grad_norm": 0.21745401620864868, "learning_rate": 9.545147524380265e-05, "loss": 0.0162, "step": 10800 }, { "action_loss": 0.015451833605766296, "epoch": 9.712230215827338, "step": 10800 }, { "epoch": 9.721223021582734, "grad_norm": 0.25876384973526, "learning_rate": 9.543998411780201e-05, "loss": 0.0164, "step": 10810 }, { "action_loss": 0.025385161861777306, "epoch": 9.721223021582734, "step": 10810 }, { "epoch": 9.73021582733813, "grad_norm": 0.42329397797584534, "learning_rate": 9.542847918833397e-05, "loss": 0.0161, "step": 10820 }, { "action_loss": 0.02071521431207657, "epoch": 9.73021582733813, "step": 10820 }, { "epoch": 9.739208633093526, "grad_norm": 0.20520678162574768, "learning_rate": 9.541696045889343e-05, "loss": 0.0159, "step": 10830 }, { "action_loss": 0.00973557773977518, "epoch": 9.739208633093526, "step": 10830 }, { "epoch": 9.748201438848922, "grad_norm": 0.2391536980867386, "learning_rate": 9.540542793297947e-05, "loss": 0.0124, "step": 10840 }, { "action_loss": 0.012020159512758255, "epoch": 9.748201438848922, "step": 10840 }, { "epoch": 9.757194244604317, "grad_norm": 0.29179584980010986, "learning_rate": 9.539388161409537e-05, "loss": 0.013, "step": 10850 }, { "action_loss": 0.02068558894097805, "epoch": 9.757194244604317, "step": 10850 }, { "epoch": 9.766187050359711, "grad_norm": 0.31239399313926697, "learning_rate": 9.538232150574857e-05, "loss": 0.0153, "step": 10860 }, { "action_loss": 0.014846828766167164, "epoch": 9.766187050359711, "step": 10860 }, { "epoch": 9.775179856115107, "grad_norm": 0.21418559551239014, "learning_rate": 9.537074761145076e-05, "loss": 0.0109, "step": 10870 }, { "action_loss": 0.009690686129033566, "epoch": 9.775179856115107, "step": 10870 }, { "epoch": 9.784172661870503, "grad_norm": 0.26868095993995667, "learning_rate": 9.535915993471778e-05, "loss": 0.017, "step": 10880 }, { "action_loss": 0.021935582160949707, "epoch": 9.784172661870503, "step": 10880 }, { "epoch": 9.793165467625899, "grad_norm": 0.3320499658584595, "learning_rate": 9.534755847906964e-05, "loss": 0.0218, "step": 10890 }, { "action_loss": 0.006451235618442297, "epoch": 9.793165467625899, "step": 10890 }, { "epoch": 9.802158273381295, "grad_norm": 0.4892677962779999, "learning_rate": 9.533594324803057e-05, "loss": 0.0124, "step": 10900 }, { "action_loss": 0.013841930776834488, "epoch": 9.802158273381295, "step": 10900 }, { "epoch": 9.81115107913669, "grad_norm": 0.28341931104660034, "learning_rate": 9.532431424512895e-05, "loss": 0.0114, "step": 10910 }, { "action_loss": 0.01373403612524271, "epoch": 9.81115107913669, "step": 10910 }, { "epoch": 9.820143884892087, "grad_norm": 0.3715082108974457, "learning_rate": 9.531267147389741e-05, "loss": 0.0166, "step": 10920 }, { "action_loss": 0.02022123523056507, "epoch": 9.820143884892087, "step": 10920 }, { "epoch": 9.829136690647482, "grad_norm": 0.35122811794281006, "learning_rate": 9.530101493787266e-05, "loss": 0.0147, "step": 10930 }, { "action_loss": 0.03254403546452522, "epoch": 9.829136690647482, "step": 10930 }, { "epoch": 9.838129496402878, "grad_norm": 0.22463774681091309, "learning_rate": 9.528934464059571e-05, "loss": 0.014, "step": 10940 }, { "action_loss": 0.010460850782692432, "epoch": 9.838129496402878, "step": 10940 }, { "epoch": 9.847122302158274, "grad_norm": 0.20687606930732727, "learning_rate": 9.527766058561163e-05, "loss": 0.0119, "step": 10950 }, { "action_loss": 0.01016904879361391, "epoch": 9.847122302158274, "step": 10950 }, { "epoch": 9.85611510791367, "grad_norm": 0.36265531182289124, "learning_rate": 9.526596277646976e-05, "loss": 0.0153, "step": 10960 }, { "action_loss": 0.01694272644817829, "epoch": 9.85611510791367, "step": 10960 }, { "epoch": 9.865107913669064, "grad_norm": 0.2829740345478058, "learning_rate": 9.525425121672358e-05, "loss": 0.0126, "step": 10970 }, { "action_loss": 0.005605766084045172, "epoch": 9.865107913669064, "step": 10970 }, { "epoch": 9.87410071942446, "grad_norm": 0.24765709042549133, "learning_rate": 9.524252590993074e-05, "loss": 0.0117, "step": 10980 }, { "action_loss": 0.005050829146057367, "epoch": 9.87410071942446, "step": 10980 }, { "epoch": 9.883093525179856, "grad_norm": 0.23560455441474915, "learning_rate": 9.523078685965309e-05, "loss": 0.014, "step": 10990 }, { "action_loss": 0.004226927179843187, "epoch": 9.883093525179856, "step": 10990 }, { "epoch": 9.892086330935252, "grad_norm": 0.22552727162837982, "learning_rate": 9.521903406945664e-05, "loss": 0.0128, "step": 11000 }, { "action_loss": 0.014468987472355366, "epoch": 9.892086330935252, "step": 11000 }, { "epoch": 9.901079136690647, "grad_norm": 0.22129720449447632, "learning_rate": 9.520726754291158e-05, "loss": 0.0161, "step": 11010 }, { "action_loss": 0.030841568484902382, "epoch": 9.901079136690647, "step": 11010 }, { "epoch": 9.910071942446043, "grad_norm": 0.25968825817108154, "learning_rate": 9.519548728359227e-05, "loss": 0.0146, "step": 11020 }, { "action_loss": 0.015497633256018162, "epoch": 9.910071942446043, "step": 11020 }, { "epoch": 9.91906474820144, "grad_norm": 0.18054689466953278, "learning_rate": 9.518369329507726e-05, "loss": 0.0115, "step": 11030 }, { "action_loss": 0.005836314056068659, "epoch": 9.91906474820144, "step": 11030 }, { "epoch": 9.928057553956835, "grad_norm": 0.2184695452451706, "learning_rate": 9.51718855809492e-05, "loss": 0.011, "step": 11040 }, { "action_loss": 0.012889781035482883, "epoch": 9.928057553956835, "step": 11040 }, { "epoch": 9.93705035971223, "grad_norm": 0.34998252987861633, "learning_rate": 9.516006414479502e-05, "loss": 0.016, "step": 11050 }, { "action_loss": 0.010892960242927074, "epoch": 9.93705035971223, "step": 11050 }, { "epoch": 9.946043165467627, "grad_norm": 0.3171594738960266, "learning_rate": 9.514822899020572e-05, "loss": 0.0224, "step": 11060 }, { "action_loss": 0.014744110405445099, "epoch": 9.946043165467627, "step": 11060 }, { "epoch": 9.95503597122302, "grad_norm": 0.2461501508951187, "learning_rate": 9.513638012077654e-05, "loss": 0.0141, "step": 11070 }, { "action_loss": 0.009264578111469746, "epoch": 9.95503597122302, "step": 11070 }, { "epoch": 9.964028776978417, "grad_norm": 0.23449251055717468, "learning_rate": 9.512451754010683e-05, "loss": 0.0119, "step": 11080 }, { "action_loss": 0.015227854251861572, "epoch": 9.964028776978417, "step": 11080 }, { "epoch": 9.973021582733812, "grad_norm": 0.26672089099884033, "learning_rate": 9.511264125180013e-05, "loss": 0.0095, "step": 11090 }, { "action_loss": 0.01154357846826315, "epoch": 9.973021582733812, "step": 11090 }, { "epoch": 9.982014388489208, "grad_norm": 0.17987670004367828, "learning_rate": 9.510075125946414e-05, "loss": 0.012, "step": 11100 }, { "action_loss": 0.013908810913562775, "epoch": 9.982014388489208, "step": 11100 }, { "epoch": 9.991007194244604, "grad_norm": 0.1981399953365326, "learning_rate": 9.508884756671075e-05, "loss": 0.0146, "step": 11110 }, { "action_loss": 0.008641299791634083, "epoch": 9.991007194244604, "step": 11110 }, { "epoch": 10.0, "grad_norm": 0.292479008436203, "learning_rate": 9.507693017715596e-05, "loss": 0.0137, "step": 11120 }, { "action_loss": 0.011811405420303345, "epoch": 10.0, "step": 11120 }, { "epoch": 10.008992805755396, "grad_norm": 0.2877936363220215, "learning_rate": 9.506499909441997e-05, "loss": 0.0126, "step": 11130 }, { "action_loss": 0.008882119320333004, "epoch": 10.008992805755396, "step": 11130 }, { "epoch": 10.017985611510792, "grad_norm": 0.22126486897468567, "learning_rate": 9.505305432212713e-05, "loss": 0.0109, "step": 11140 }, { "action_loss": 0.01224460918456316, "epoch": 10.017985611510792, "step": 11140 }, { "epoch": 10.026978417266188, "grad_norm": 0.18172717094421387, "learning_rate": 9.504109586390595e-05, "loss": 0.0122, "step": 11150 }, { "action_loss": 0.004088153131306171, "epoch": 10.026978417266188, "step": 11150 }, { "epoch": 10.035971223021583, "grad_norm": 0.22825057804584503, "learning_rate": 9.502912372338908e-05, "loss": 0.0145, "step": 11160 }, { "action_loss": 0.0066033583134412766, "epoch": 10.035971223021583, "step": 11160 }, { "epoch": 10.04496402877698, "grad_norm": 0.29633384943008423, "learning_rate": 9.501713790421335e-05, "loss": 0.0142, "step": 11170 }, { "action_loss": 0.011242208071053028, "epoch": 10.04496402877698, "step": 11170 }, { "epoch": 10.053956834532373, "grad_norm": 0.2534365952014923, "learning_rate": 9.500513841001974e-05, "loss": 0.0133, "step": 11180 }, { "action_loss": 0.01810399256646633, "epoch": 10.053956834532373, "step": 11180 }, { "epoch": 10.06294964028777, "grad_norm": 0.1940629631280899, "learning_rate": 9.499312524445336e-05, "loss": 0.0163, "step": 11190 }, { "action_loss": 0.005986914038658142, "epoch": 10.06294964028777, "step": 11190 }, { "epoch": 10.071942446043165, "grad_norm": 0.36068078875541687, "learning_rate": 9.498109841116351e-05, "loss": 0.014, "step": 11200 }, { "action_loss": 0.018405860289931297, "epoch": 10.071942446043165, "step": 11200 }, { "epoch": 10.08093525179856, "grad_norm": 0.2573581337928772, "learning_rate": 9.496905791380363e-05, "loss": 0.0207, "step": 11210 }, { "action_loss": 0.013841499574482441, "epoch": 10.08093525179856, "step": 11210 }, { "epoch": 10.089928057553957, "grad_norm": 0.2654922604560852, "learning_rate": 9.495700375603129e-05, "loss": 0.014, "step": 11220 }, { "action_loss": 0.014031249098479748, "epoch": 10.089928057553957, "step": 11220 }, { "epoch": 10.098920863309353, "grad_norm": 0.37586459517478943, "learning_rate": 9.494493594150822e-05, "loss": 0.0156, "step": 11230 }, { "action_loss": 0.009104118682444096, "epoch": 10.098920863309353, "step": 11230 }, { "epoch": 10.107913669064748, "grad_norm": 0.2932160198688507, "learning_rate": 9.493285447390032e-05, "loss": 0.0132, "step": 11240 }, { "action_loss": 0.006175471469759941, "epoch": 10.107913669064748, "step": 11240 }, { "epoch": 10.116906474820144, "grad_norm": 0.2015652060508728, "learning_rate": 9.492075935687761e-05, "loss": 0.0144, "step": 11250 }, { "action_loss": 0.012703520245850086, "epoch": 10.116906474820144, "step": 11250 }, { "epoch": 10.12589928057554, "grad_norm": 0.20594099164009094, "learning_rate": 9.490865059411427e-05, "loss": 0.0164, "step": 11260 }, { "action_loss": 0.032187629491090775, "epoch": 10.12589928057554, "step": 11260 }, { "epoch": 10.134892086330936, "grad_norm": 0.22150835394859314, "learning_rate": 9.489652818928863e-05, "loss": 0.018, "step": 11270 }, { "action_loss": 0.017420414835214615, "epoch": 10.134892086330936, "step": 11270 }, { "epoch": 10.14388489208633, "grad_norm": 0.4165448248386383, "learning_rate": 9.488439214608315e-05, "loss": 0.0242, "step": 11280 }, { "action_loss": 0.014628191478550434, "epoch": 10.14388489208633, "step": 11280 }, { "epoch": 10.152877697841726, "grad_norm": 0.39221900701522827, "learning_rate": 9.487224246818444e-05, "loss": 0.0139, "step": 11290 }, { "action_loss": 0.015542320907115936, "epoch": 10.152877697841726, "step": 11290 }, { "epoch": 10.161870503597122, "grad_norm": 0.29286620020866394, "learning_rate": 9.486007915928325e-05, "loss": 0.014, "step": 11300 }, { "action_loss": 0.009632338769733906, "epoch": 10.161870503597122, "step": 11300 }, { "epoch": 10.170863309352518, "grad_norm": 0.3020247220993042, "learning_rate": 9.484790222307448e-05, "loss": 0.0159, "step": 11310 }, { "action_loss": 0.008353358134627342, "epoch": 10.170863309352518, "step": 11310 }, { "epoch": 10.179856115107913, "grad_norm": 0.3265936076641083, "learning_rate": 9.483571166325716e-05, "loss": 0.0166, "step": 11320 }, { "action_loss": 0.011605610139667988, "epoch": 10.179856115107913, "step": 11320 }, { "epoch": 10.18884892086331, "grad_norm": 0.37030982971191406, "learning_rate": 9.482350748353444e-05, "loss": 0.0158, "step": 11330 }, { "action_loss": 0.011231317184865475, "epoch": 10.18884892086331, "step": 11330 }, { "epoch": 10.197841726618705, "grad_norm": 0.2495119869709015, "learning_rate": 9.481128968761363e-05, "loss": 0.013, "step": 11340 }, { "action_loss": 0.015306870453059673, "epoch": 10.197841726618705, "step": 11340 }, { "epoch": 10.206834532374101, "grad_norm": 0.2657620906829834, "learning_rate": 9.479905827920621e-05, "loss": 0.0125, "step": 11350 }, { "action_loss": 0.01368140708655119, "epoch": 10.206834532374101, "step": 11350 }, { "epoch": 10.215827338129497, "grad_norm": 0.3828417956829071, "learning_rate": 9.478681326202773e-05, "loss": 0.0145, "step": 11360 }, { "action_loss": 0.007733335252851248, "epoch": 10.215827338129497, "step": 11360 }, { "epoch": 10.224820143884893, "grad_norm": 0.19370664656162262, "learning_rate": 9.477455463979791e-05, "loss": 0.0124, "step": 11370 }, { "action_loss": 0.011528458446264267, "epoch": 10.224820143884893, "step": 11370 }, { "epoch": 10.233812949640289, "grad_norm": 0.21426156163215637, "learning_rate": 9.476228241624059e-05, "loss": 0.0203, "step": 11380 }, { "action_loss": 0.006453061010688543, "epoch": 10.233812949640289, "step": 11380 }, { "epoch": 10.242805755395683, "grad_norm": 0.26928991079330444, "learning_rate": 9.474999659508374e-05, "loss": 0.012, "step": 11390 }, { "action_loss": 0.00667656259611249, "epoch": 10.242805755395683, "step": 11390 }, { "epoch": 10.251798561151078, "grad_norm": 0.16921688616275787, "learning_rate": 9.47376971800595e-05, "loss": 0.0097, "step": 11400 }, { "action_loss": 0.0058862511068582535, "epoch": 10.251798561151078, "step": 11400 }, { "epoch": 10.260791366906474, "grad_norm": 0.21566759049892426, "learning_rate": 9.472538417490409e-05, "loss": 0.016, "step": 11410 }, { "action_loss": 0.010218393988907337, "epoch": 10.260791366906474, "step": 11410 }, { "epoch": 10.26978417266187, "grad_norm": 0.2969546616077423, "learning_rate": 9.471305758335784e-05, "loss": 0.0145, "step": 11420 }, { "action_loss": 0.0072292243130505085, "epoch": 10.26978417266187, "step": 11420 }, { "epoch": 10.278776978417266, "grad_norm": 0.28596076369285583, "learning_rate": 9.47007174091653e-05, "loss": 0.0194, "step": 11430 }, { "action_loss": 0.01524729747325182, "epoch": 10.278776978417266, "step": 11430 }, { "epoch": 10.287769784172662, "grad_norm": 0.3254227042198181, "learning_rate": 9.468836365607507e-05, "loss": 0.0166, "step": 11440 }, { "action_loss": 0.019051983952522278, "epoch": 10.287769784172662, "step": 11440 }, { "epoch": 10.296762589928058, "grad_norm": 0.2699733078479767, "learning_rate": 9.467599632783988e-05, "loss": 0.0171, "step": 11450 }, { "action_loss": 0.02342410944402218, "epoch": 10.296762589928058, "step": 11450 }, { "epoch": 10.305755395683454, "grad_norm": 0.20659373700618744, "learning_rate": 9.466361542821662e-05, "loss": 0.0145, "step": 11460 }, { "action_loss": 0.010622210800647736, "epoch": 10.305755395683454, "step": 11460 }, { "epoch": 10.31474820143885, "grad_norm": 0.21020738780498505, "learning_rate": 9.465122096096625e-05, "loss": 0.0107, "step": 11470 }, { "action_loss": 0.027585437521338463, "epoch": 10.31474820143885, "step": 11470 }, { "epoch": 10.323741007194245, "grad_norm": 0.19156794250011444, "learning_rate": 9.463881292985391e-05, "loss": 0.0169, "step": 11480 }, { "action_loss": 0.009720508940517902, "epoch": 10.323741007194245, "step": 11480 }, { "epoch": 10.332733812949641, "grad_norm": 0.26070815324783325, "learning_rate": 9.462639133864881e-05, "loss": 0.0115, "step": 11490 }, { "action_loss": 0.01417639572173357, "epoch": 10.332733812949641, "step": 11490 }, { "epoch": 10.341726618705035, "grad_norm": 0.3022635281085968, "learning_rate": 9.461395619112432e-05, "loss": 0.0139, "step": 11500 }, { "action_loss": 0.017616847530007362, "epoch": 10.341726618705035, "step": 11500 }, { "epoch": 10.350719424460431, "grad_norm": 0.1997356414794922, "learning_rate": 9.460150749105791e-05, "loss": 0.0111, "step": 11510 }, { "action_loss": 0.01407239492982626, "epoch": 10.350719424460431, "step": 11510 }, { "epoch": 10.359712230215827, "grad_norm": 0.19450408220291138, "learning_rate": 9.458904524223116e-05, "loss": 0.0138, "step": 11520 }, { "action_loss": 0.02165806293487549, "epoch": 10.359712230215827, "step": 11520 }, { "epoch": 10.368705035971223, "grad_norm": 0.1778336465358734, "learning_rate": 9.457656944842976e-05, "loss": 0.0157, "step": 11530 }, { "action_loss": 0.005291823763400316, "epoch": 10.368705035971223, "step": 11530 }, { "epoch": 10.377697841726619, "grad_norm": 0.17792950570583344, "learning_rate": 9.456408011344353e-05, "loss": 0.0098, "step": 11540 }, { "action_loss": 0.010928322561085224, "epoch": 10.377697841726619, "step": 11540 }, { "epoch": 10.386690647482014, "grad_norm": 0.30177032947540283, "learning_rate": 9.455157724106643e-05, "loss": 0.0141, "step": 11550 }, { "action_loss": 0.02953912876546383, "epoch": 10.386690647482014, "step": 11550 }, { "epoch": 10.39568345323741, "grad_norm": 0.17657989263534546, "learning_rate": 9.453906083509647e-05, "loss": 0.0161, "step": 11560 }, { "action_loss": 0.011946342885494232, "epoch": 10.39568345323741, "step": 11560 }, { "epoch": 10.404676258992806, "grad_norm": 0.18177644908428192, "learning_rate": 9.45265308993358e-05, "loss": 0.0099, "step": 11570 }, { "action_loss": 0.05454118549823761, "epoch": 10.404676258992806, "step": 11570 }, { "epoch": 10.413669064748202, "grad_norm": 0.23717257380485535, "learning_rate": 9.451398743759071e-05, "loss": 0.0177, "step": 11580 }, { "action_loss": 0.009710841812193394, "epoch": 10.413669064748202, "step": 11580 }, { "epoch": 10.422661870503598, "grad_norm": 0.1835690140724182, "learning_rate": 9.450143045367156e-05, "loss": 0.0113, "step": 11590 }, { "action_loss": 0.03190099820494652, "epoch": 10.422661870503598, "step": 11590 }, { "epoch": 10.431654676258994, "grad_norm": 0.2655782103538513, "learning_rate": 9.448885995139283e-05, "loss": 0.0126, "step": 11600 }, { "action_loss": 0.009849860332906246, "epoch": 10.431654676258994, "step": 11600 }, { "epoch": 10.440647482014388, "grad_norm": 0.19336561858654022, "learning_rate": 9.44762759345731e-05, "loss": 0.0113, "step": 11610 }, { "action_loss": 0.010668516159057617, "epoch": 10.440647482014388, "step": 11610 }, { "epoch": 10.449640287769784, "grad_norm": 0.22994790971279144, "learning_rate": 9.446367840703509e-05, "loss": 0.0127, "step": 11620 }, { "action_loss": 0.01042312290519476, "epoch": 10.449640287769784, "step": 11620 }, { "epoch": 10.45863309352518, "grad_norm": 0.25267690420150757, "learning_rate": 9.445106737260556e-05, "loss": 0.0107, "step": 11630 }, { "action_loss": 0.006683869753032923, "epoch": 10.45863309352518, "step": 11630 }, { "epoch": 10.467625899280575, "grad_norm": 0.3809373080730438, "learning_rate": 9.443844283511543e-05, "loss": 0.013, "step": 11640 }, { "action_loss": 0.010634773410856724, "epoch": 10.467625899280575, "step": 11640 }, { "epoch": 10.476618705035971, "grad_norm": 0.22052891552448273, "learning_rate": 9.442580479839968e-05, "loss": 0.0163, "step": 11650 }, { "action_loss": 0.011615514755249023, "epoch": 10.476618705035971, "step": 11650 }, { "epoch": 10.485611510791367, "grad_norm": 0.3157036602497101, "learning_rate": 9.441315326629745e-05, "loss": 0.0153, "step": 11660 }, { "action_loss": 0.012669173069298267, "epoch": 10.485611510791367, "step": 11660 }, { "epoch": 10.494604316546763, "grad_norm": 0.3313906788825989, "learning_rate": 9.44004882426519e-05, "loss": 0.0151, "step": 11670 }, { "action_loss": 0.0114145427942276, "epoch": 10.494604316546763, "step": 11670 }, { "epoch": 10.503597122302159, "grad_norm": 0.3138423264026642, "learning_rate": 9.438780973131037e-05, "loss": 0.013, "step": 11680 }, { "action_loss": 0.015964001417160034, "epoch": 10.503597122302159, "step": 11680 }, { "epoch": 10.512589928057555, "grad_norm": 0.31140801310539246, "learning_rate": 9.437511773612423e-05, "loss": 0.0161, "step": 11690 }, { "action_loss": 0.010848783887922764, "epoch": 10.512589928057555, "step": 11690 }, { "epoch": 10.52158273381295, "grad_norm": 0.20172418653964996, "learning_rate": 9.436241226094896e-05, "loss": 0.0192, "step": 11700 }, { "action_loss": 0.04668717086315155, "epoch": 10.52158273381295, "step": 11700 }, { "epoch": 10.530575539568346, "grad_norm": 0.19283877313137054, "learning_rate": 9.434969330964418e-05, "loss": 0.0137, "step": 11710 }, { "action_loss": 0.022834300994873047, "epoch": 10.530575539568346, "step": 11710 }, { "epoch": 10.53956834532374, "grad_norm": 0.13141030073165894, "learning_rate": 9.433696088607356e-05, "loss": 0.0132, "step": 11720 }, { "action_loss": 0.0189762394875288, "epoch": 10.53956834532374, "step": 11720 }, { "epoch": 10.548561151079136, "grad_norm": 0.14768444001674652, "learning_rate": 9.432421499410486e-05, "loss": 0.0139, "step": 11730 }, { "action_loss": 0.014778549782931805, "epoch": 10.548561151079136, "step": 11730 }, { "epoch": 10.557553956834532, "grad_norm": 0.18860340118408203, "learning_rate": 9.431145563760998e-05, "loss": 0.0129, "step": 11740 }, { "action_loss": 0.015256802551448345, "epoch": 10.557553956834532, "step": 11740 }, { "epoch": 10.566546762589928, "grad_norm": 0.19113437831401825, "learning_rate": 9.429868282046484e-05, "loss": 0.0176, "step": 11750 }, { "action_loss": 0.011466129682958126, "epoch": 10.566546762589928, "step": 11750 }, { "epoch": 10.575539568345324, "grad_norm": 0.25791892409324646, "learning_rate": 9.428589654654951e-05, "loss": 0.0212, "step": 11760 }, { "action_loss": 0.010279097594320774, "epoch": 10.575539568345324, "step": 11760 }, { "epoch": 10.58453237410072, "grad_norm": 0.3384503722190857, "learning_rate": 9.42730968197481e-05, "loss": 0.0135, "step": 11770 }, { "action_loss": 0.007207546383142471, "epoch": 10.58453237410072, "step": 11770 }, { "epoch": 10.593525179856115, "grad_norm": 0.32429319620132446, "learning_rate": 9.426028364394883e-05, "loss": 0.0129, "step": 11780 }, { "action_loss": 0.009093452244997025, "epoch": 10.593525179856115, "step": 11780 }, { "epoch": 10.602517985611511, "grad_norm": 0.2353161722421646, "learning_rate": 9.424745702304402e-05, "loss": 0.0118, "step": 11790 }, { "action_loss": 0.014799843542277813, "epoch": 10.602517985611511, "step": 11790 }, { "epoch": 10.611510791366907, "grad_norm": 0.26095685362815857, "learning_rate": 9.423461696093006e-05, "loss": 0.0169, "step": 11800 }, { "action_loss": 0.03412802517414093, "epoch": 10.611510791366907, "step": 11800 }, { "epoch": 10.620503597122303, "grad_norm": 0.16808409988880157, "learning_rate": 9.422176346150741e-05, "loss": 0.0124, "step": 11810 }, { "action_loss": 0.010971288196742535, "epoch": 10.620503597122303, "step": 11810 }, { "epoch": 10.629496402877697, "grad_norm": 0.17023073136806488, "learning_rate": 9.420889652868063e-05, "loss": 0.0127, "step": 11820 }, { "action_loss": 0.006448884028941393, "epoch": 10.629496402877697, "step": 11820 }, { "epoch": 10.638489208633093, "grad_norm": 0.19032351672649384, "learning_rate": 9.419601616635836e-05, "loss": 0.0126, "step": 11830 }, { "action_loss": 0.00897483341395855, "epoch": 10.638489208633093, "step": 11830 }, { "epoch": 10.647482014388489, "grad_norm": 0.1974281519651413, "learning_rate": 9.418312237845331e-05, "loss": 0.0143, "step": 11840 }, { "action_loss": 0.006100370083004236, "epoch": 10.647482014388489, "step": 11840 }, { "epoch": 10.656474820143885, "grad_norm": 0.3346518576145172, "learning_rate": 9.417021516888225e-05, "loss": 0.0119, "step": 11850 }, { "action_loss": 0.014067583717405796, "epoch": 10.656474820143885, "step": 11850 }, { "epoch": 10.66546762589928, "grad_norm": 0.34490472078323364, "learning_rate": 9.415729454156608e-05, "loss": 0.0111, "step": 11860 }, { "action_loss": 0.015438606031239033, "epoch": 10.66546762589928, "step": 11860 }, { "epoch": 10.674460431654676, "grad_norm": 0.2513369917869568, "learning_rate": 9.414436050042973e-05, "loss": 0.0132, "step": 11870 }, { "action_loss": 0.010019049979746342, "epoch": 10.674460431654676, "step": 11870 }, { "epoch": 10.683453237410072, "grad_norm": 0.26295387744903564, "learning_rate": 9.413141304940223e-05, "loss": 0.0116, "step": 11880 }, { "action_loss": 0.01454076636582613, "epoch": 10.683453237410072, "step": 11880 }, { "epoch": 10.692446043165468, "grad_norm": 0.25342217087745667, "learning_rate": 9.411845219241666e-05, "loss": 0.0116, "step": 11890 }, { "action_loss": 0.004778428003191948, "epoch": 10.692446043165468, "step": 11890 }, { "epoch": 10.701438848920864, "grad_norm": 0.22184303402900696, "learning_rate": 9.410547793341021e-05, "loss": 0.0117, "step": 11900 }, { "action_loss": 0.006390315946191549, "epoch": 10.701438848920864, "step": 11900 }, { "epoch": 10.71043165467626, "grad_norm": 0.23250272870063782, "learning_rate": 9.409249027632408e-05, "loss": 0.011, "step": 11910 }, { "action_loss": 0.00788886845111847, "epoch": 10.71043165467626, "step": 11910 }, { "epoch": 10.719424460431654, "grad_norm": 0.24382691085338593, "learning_rate": 9.407948922510362e-05, "loss": 0.0119, "step": 11920 }, { "action_loss": 0.007186509668827057, "epoch": 10.719424460431654, "step": 11920 }, { "epoch": 10.72841726618705, "grad_norm": 0.16897423565387726, "learning_rate": 9.406647478369817e-05, "loss": 0.0094, "step": 11930 }, { "action_loss": 0.020649274811148643, "epoch": 10.72841726618705, "step": 11930 }, { "epoch": 10.737410071942445, "grad_norm": 0.22432643175125122, "learning_rate": 9.405344695606118e-05, "loss": 0.0115, "step": 11940 }, { "action_loss": 0.006355782970786095, "epoch": 10.737410071942445, "step": 11940 }, { "epoch": 10.746402877697841, "grad_norm": 0.16621705889701843, "learning_rate": 9.404040574615018e-05, "loss": 0.0112, "step": 11950 }, { "action_loss": 0.019021987915039062, "epoch": 10.746402877697841, "step": 11950 }, { "epoch": 10.755395683453237, "grad_norm": 0.22682897746562958, "learning_rate": 9.402735115792674e-05, "loss": 0.0135, "step": 11960 }, { "action_loss": 0.008715816773474216, "epoch": 10.755395683453237, "step": 11960 }, { "epoch": 10.764388489208633, "grad_norm": 0.2276277393102646, "learning_rate": 9.401428319535649e-05, "loss": 0.0093, "step": 11970 }, { "action_loss": 0.010761258192360401, "epoch": 10.764388489208633, "step": 11970 }, { "epoch": 10.773381294964029, "grad_norm": 0.2462696135044098, "learning_rate": 9.400120186240912e-05, "loss": 0.0132, "step": 11980 }, { "action_loss": 0.013502925634384155, "epoch": 10.773381294964029, "step": 11980 }, { "epoch": 10.782374100719425, "grad_norm": 0.2467980533838272, "learning_rate": 9.398810716305844e-05, "loss": 0.0102, "step": 11990 }, { "action_loss": 0.01619064435362816, "epoch": 10.782374100719425, "step": 11990 }, { "epoch": 10.79136690647482, "grad_norm": 0.22025175392627716, "learning_rate": 9.397499910128222e-05, "loss": 0.0126, "step": 12000 }, { "action_loss": 0.006860597524791956, "epoch": 10.79136690647482, "step": 12000 }, { "epoch": 10.800359712230216, "grad_norm": 0.2627471387386322, "learning_rate": 9.396187768106237e-05, "loss": 0.0105, "step": 12010 }, { "action_loss": 0.015230397693812847, "epoch": 10.800359712230216, "step": 12010 }, { "epoch": 10.809352517985612, "grad_norm": 0.27145111560821533, "learning_rate": 9.394874290638482e-05, "loss": 0.0117, "step": 12020 }, { "action_loss": 0.016189007088541985, "epoch": 10.809352517985612, "step": 12020 }, { "epoch": 10.818345323741006, "grad_norm": 0.2497996836900711, "learning_rate": 9.393559478123959e-05, "loss": 0.0137, "step": 12030 }, { "action_loss": 0.013060599565505981, "epoch": 10.818345323741006, "step": 12030 }, { "epoch": 10.827338129496402, "grad_norm": 0.2474067360162735, "learning_rate": 9.39224333096207e-05, "loss": 0.0125, "step": 12040 }, { "action_loss": 0.0063729495741426945, "epoch": 10.827338129496402, "step": 12040 }, { "epoch": 10.836330935251798, "grad_norm": 0.3422824442386627, "learning_rate": 9.390925849552629e-05, "loss": 0.0109, "step": 12050 }, { "action_loss": 0.012669007293879986, "epoch": 10.836330935251798, "step": 12050 }, { "epoch": 10.845323741007194, "grad_norm": 0.1620996594429016, "learning_rate": 9.389607034295849e-05, "loss": 0.0106, "step": 12060 }, { "action_loss": 0.015758713707327843, "epoch": 10.845323741007194, "step": 12060 }, { "epoch": 10.85431654676259, "grad_norm": 0.23054122924804688, "learning_rate": 9.388286885592355e-05, "loss": 0.0164, "step": 12070 }, { "action_loss": 0.00701918127015233, "epoch": 10.85431654676259, "step": 12070 }, { "epoch": 10.863309352517986, "grad_norm": 0.2511259913444519, "learning_rate": 9.386965403843168e-05, "loss": 0.0119, "step": 12080 }, { "action_loss": 0.0060453820042312145, "epoch": 10.863309352517986, "step": 12080 }, { "epoch": 10.872302158273381, "grad_norm": 0.32730358839035034, "learning_rate": 9.385642589449726e-05, "loss": 0.0116, "step": 12090 }, { "action_loss": 0.0045136758126318455, "epoch": 10.872302158273381, "step": 12090 }, { "epoch": 10.881294964028777, "grad_norm": 0.16785641014575958, "learning_rate": 9.38431844281386e-05, "loss": 0.0088, "step": 12100 }, { "action_loss": 0.006944302935153246, "epoch": 10.881294964028777, "step": 12100 }, { "epoch": 10.890287769784173, "grad_norm": 0.327544242143631, "learning_rate": 9.38299296433781e-05, "loss": 0.011, "step": 12110 }, { "action_loss": 0.013271230272948742, "epoch": 10.890287769784173, "step": 12110 }, { "epoch": 10.899280575539569, "grad_norm": 0.16645805537700653, "learning_rate": 9.381666154424226e-05, "loss": 0.0126, "step": 12120 }, { "action_loss": 0.012530148960649967, "epoch": 10.899280575539569, "step": 12120 }, { "epoch": 10.908273381294965, "grad_norm": 0.2577100396156311, "learning_rate": 9.380338013476157e-05, "loss": 0.0111, "step": 12130 }, { "action_loss": 0.02012321911752224, "epoch": 10.908273381294965, "step": 12130 }, { "epoch": 10.917266187050359, "grad_norm": 0.249300017952919, "learning_rate": 9.379008541897054e-05, "loss": 0.0119, "step": 12140 }, { "action_loss": 0.0055541894398629665, "epoch": 10.917266187050359, "step": 12140 }, { "epoch": 10.926258992805755, "grad_norm": 0.1997479349374771, "learning_rate": 9.377677740090777e-05, "loss": 0.014, "step": 12150 }, { "action_loss": 0.00912472140043974, "epoch": 10.926258992805755, "step": 12150 }, { "epoch": 10.93525179856115, "grad_norm": 0.26821020245552063, "learning_rate": 9.376345608461588e-05, "loss": 0.012, "step": 12160 }, { "action_loss": 0.009067613631486893, "epoch": 10.93525179856115, "step": 12160 }, { "epoch": 10.944244604316546, "grad_norm": 0.24310313165187836, "learning_rate": 9.375012147414155e-05, "loss": 0.0105, "step": 12170 }, { "action_loss": 0.016236087307333946, "epoch": 10.944244604316546, "step": 12170 }, { "epoch": 10.953237410071942, "grad_norm": 0.18759043514728546, "learning_rate": 9.373677357353545e-05, "loss": 0.0116, "step": 12180 }, { "action_loss": 0.013622519560158253, "epoch": 10.953237410071942, "step": 12180 }, { "epoch": 10.962230215827338, "grad_norm": 0.22238586843013763, "learning_rate": 9.372341238685237e-05, "loss": 0.0092, "step": 12190 }, { "action_loss": 0.007018445059657097, "epoch": 10.962230215827338, "step": 12190 }, { "epoch": 10.971223021582734, "grad_norm": 0.2155579924583435, "learning_rate": 9.371003791815102e-05, "loss": 0.0095, "step": 12200 }, { "action_loss": 0.004172974266111851, "epoch": 10.971223021582734, "step": 12200 }, { "epoch": 10.98021582733813, "grad_norm": 0.2383420765399933, "learning_rate": 9.369665017149429e-05, "loss": 0.0101, "step": 12210 }, { "action_loss": 0.013356275856494904, "epoch": 10.98021582733813, "step": 12210 }, { "epoch": 10.989208633093526, "grad_norm": 0.18747790157794952, "learning_rate": 9.368324915094895e-05, "loss": 0.0086, "step": 12220 }, { "action_loss": 0.01454315334558487, "epoch": 10.989208633093526, "step": 12220 }, { "epoch": 10.998201438848922, "grad_norm": 0.2157469540834427, "learning_rate": 9.366983486058591e-05, "loss": 0.0139, "step": 12230 }, { "action_loss": 0.010511855594813824, "epoch": 10.998201438848922, "step": 12230 }, { "epoch": 11.007194244604317, "grad_norm": 0.2523621916770935, "learning_rate": 9.365640730448009e-05, "loss": 0.0117, "step": 12240 }, { "action_loss": 0.007300753612071276, "epoch": 11.007194244604317, "step": 12240 }, { "epoch": 11.016187050359711, "grad_norm": 0.4295431971549988, "learning_rate": 9.36429664867104e-05, "loss": 0.0137, "step": 12250 }, { "action_loss": 0.008021090179681778, "epoch": 11.016187050359711, "step": 12250 }, { "epoch": 11.025179856115107, "grad_norm": 0.29219427704811096, "learning_rate": 9.362951241135982e-05, "loss": 0.0124, "step": 12260 }, { "action_loss": 0.014677825383841991, "epoch": 11.025179856115107, "step": 12260 }, { "epoch": 11.034172661870503, "grad_norm": 0.24315740168094635, "learning_rate": 9.361604508251534e-05, "loss": 0.0112, "step": 12270 }, { "action_loss": 0.008657961152493954, "epoch": 11.034172661870503, "step": 12270 }, { "epoch": 11.043165467625899, "grad_norm": 0.15249501168727875, "learning_rate": 9.360256450426799e-05, "loss": 0.0097, "step": 12280 }, { "action_loss": 0.011071895249187946, "epoch": 11.043165467625899, "step": 12280 }, { "epoch": 11.052158273381295, "grad_norm": 0.23468761146068573, "learning_rate": 9.358907068071279e-05, "loss": 0.0103, "step": 12290 }, { "action_loss": 0.007087660487741232, "epoch": 11.052158273381295, "step": 12290 }, { "epoch": 11.06115107913669, "grad_norm": 0.21666784584522247, "learning_rate": 9.357556361594882e-05, "loss": 0.0113, "step": 12300 }, { "action_loss": 0.00826634094119072, "epoch": 11.06115107913669, "step": 12300 }, { "epoch": 11.070143884892087, "grad_norm": 0.22644583880901337, "learning_rate": 9.356204331407917e-05, "loss": 0.0133, "step": 12310 }, { "action_loss": 0.057997360825538635, "epoch": 11.070143884892087, "step": 12310 }, { "epoch": 11.079136690647482, "grad_norm": 0.2392810732126236, "learning_rate": 9.354850977921094e-05, "loss": 0.0174, "step": 12320 }, { "action_loss": 0.020433582365512848, "epoch": 11.079136690647482, "step": 12320 }, { "epoch": 11.088129496402878, "grad_norm": 0.18660950660705566, "learning_rate": 9.353496301545529e-05, "loss": 0.0143, "step": 12330 }, { "action_loss": 0.0090286610648036, "epoch": 11.088129496402878, "step": 12330 }, { "epoch": 11.097122302158274, "grad_norm": 0.24792979657649994, "learning_rate": 9.352140302692733e-05, "loss": 0.014, "step": 12340 }, { "action_loss": 0.007097234483808279, "epoch": 11.097122302158274, "step": 12340 }, { "epoch": 11.10611510791367, "grad_norm": 0.17772747576236725, "learning_rate": 9.350782981774627e-05, "loss": 0.0127, "step": 12350 }, { "action_loss": 0.007280575577169657, "epoch": 11.10611510791367, "step": 12350 }, { "epoch": 11.115107913669064, "grad_norm": 0.24630776047706604, "learning_rate": 9.349424339203526e-05, "loss": 0.012, "step": 12360 }, { "action_loss": 0.009998099878430367, "epoch": 11.115107913669064, "step": 12360 }, { "epoch": 11.12410071942446, "grad_norm": 0.21328900754451752, "learning_rate": 9.34806437539215e-05, "loss": 0.0133, "step": 12370 }, { "action_loss": 0.017094038426876068, "epoch": 11.12410071942446, "step": 12370 }, { "epoch": 11.133093525179856, "grad_norm": 0.298000305891037, "learning_rate": 9.346703090753622e-05, "loss": 0.0152, "step": 12380 }, { "action_loss": 0.01814722828567028, "epoch": 11.133093525179856, "step": 12380 }, { "epoch": 11.142086330935252, "grad_norm": 0.30183127522468567, "learning_rate": 9.345340485701461e-05, "loss": 0.0159, "step": 12390 }, { "action_loss": 0.005897633731365204, "epoch": 11.142086330935252, "step": 12390 }, { "epoch": 11.151079136690647, "grad_norm": 0.337181031703949, "learning_rate": 9.343976560649595e-05, "loss": 0.0114, "step": 12400 }, { "action_loss": 0.010236662812530994, "epoch": 11.151079136690647, "step": 12400 }, { "epoch": 11.160071942446043, "grad_norm": 0.2844184935092926, "learning_rate": 9.342611316012344e-05, "loss": 0.0123, "step": 12410 }, { "action_loss": 0.013678297400474548, "epoch": 11.160071942446043, "step": 12410 }, { "epoch": 11.16906474820144, "grad_norm": 0.34161999821662903, "learning_rate": 9.341244752204437e-05, "loss": 0.013, "step": 12420 }, { "action_loss": 0.016064735129475594, "epoch": 11.16906474820144, "step": 12420 }, { "epoch": 11.178057553956835, "grad_norm": 0.158321812748909, "learning_rate": 9.339876869640995e-05, "loss": 0.0117, "step": 12430 }, { "action_loss": 0.012173702009022236, "epoch": 11.178057553956835, "step": 12430 }, { "epoch": 11.18705035971223, "grad_norm": 0.21908201277256012, "learning_rate": 9.33850766873755e-05, "loss": 0.013, "step": 12440 }, { "action_loss": 0.021311094984412193, "epoch": 11.18705035971223, "step": 12440 }, { "epoch": 11.196043165467627, "grad_norm": 0.267159640789032, "learning_rate": 9.337137149910028e-05, "loss": 0.0145, "step": 12450 }, { "action_loss": 0.009980720467865467, "epoch": 11.196043165467627, "step": 12450 }, { "epoch": 11.20503597122302, "grad_norm": 0.27515873312950134, "learning_rate": 9.335765313574753e-05, "loss": 0.0107, "step": 12460 }, { "action_loss": 0.00804548803716898, "epoch": 11.20503597122302, "step": 12460 }, { "epoch": 11.214028776978417, "grad_norm": 0.22428317368030548, "learning_rate": 9.334392160148457e-05, "loss": 0.011, "step": 12470 }, { "action_loss": 0.00894415657967329, "epoch": 11.214028776978417, "step": 12470 }, { "epoch": 11.223021582733812, "grad_norm": 0.2996908128261566, "learning_rate": 9.333017690048264e-05, "loss": 0.0119, "step": 12480 }, { "action_loss": 0.00963718444108963, "epoch": 11.223021582733812, "step": 12480 }, { "epoch": 11.232014388489208, "grad_norm": 0.27322882413864136, "learning_rate": 9.331641903691706e-05, "loss": 0.0121, "step": 12490 }, { "action_loss": 0.02152620255947113, "epoch": 11.232014388489208, "step": 12490 }, { "epoch": 11.241007194244604, "grad_norm": 0.2253570258617401, "learning_rate": 9.330264801496707e-05, "loss": 0.0178, "step": 12500 }, { "action_loss": 0.02521231584250927, "epoch": 11.241007194244604, "step": 12500 }, { "epoch": 11.25, "grad_norm": 0.2373943030834198, "learning_rate": 9.328886383881594e-05, "loss": 0.0136, "step": 12510 }, { "action_loss": 0.010687462985515594, "epoch": 11.25, "step": 12510 }, { "epoch": 11.258992805755396, "grad_norm": 0.3247913122177124, "learning_rate": 9.327506651265095e-05, "loss": 0.0157, "step": 12520 }, { "action_loss": 0.013040532357990742, "epoch": 11.258992805755396, "step": 12520 }, { "epoch": 11.267985611510792, "grad_norm": 0.21769241988658905, "learning_rate": 9.326125604066338e-05, "loss": 0.0141, "step": 12530 }, { "action_loss": 0.010813474655151367, "epoch": 11.267985611510792, "step": 12530 }, { "epoch": 11.276978417266188, "grad_norm": 0.24335241317749023, "learning_rate": 9.324743242704847e-05, "loss": 0.0136, "step": 12540 }, { "action_loss": 0.010574751533567905, "epoch": 11.276978417266188, "step": 12540 }, { "epoch": 11.285971223021583, "grad_norm": 0.21646450459957123, "learning_rate": 9.323359567600546e-05, "loss": 0.013, "step": 12550 }, { "action_loss": 0.03561858832836151, "epoch": 11.285971223021583, "step": 12550 }, { "epoch": 11.29496402877698, "grad_norm": 0.3109317421913147, "learning_rate": 9.321974579173761e-05, "loss": 0.0132, "step": 12560 }, { "action_loss": 0.008018912747502327, "epoch": 11.29496402877698, "step": 12560 }, { "epoch": 11.303956834532373, "grad_norm": 0.36417990922927856, "learning_rate": 9.320588277845213e-05, "loss": 0.0189, "step": 12570 }, { "action_loss": 0.013679797761142254, "epoch": 11.303956834532373, "step": 12570 }, { "epoch": 11.31294964028777, "grad_norm": 0.26456180214881897, "learning_rate": 9.319200664036026e-05, "loss": 0.0115, "step": 12580 }, { "action_loss": 0.004963792860507965, "epoch": 11.31294964028777, "step": 12580 }, { "epoch": 11.321942446043165, "grad_norm": 0.29199472069740295, "learning_rate": 9.31781173816772e-05, "loss": 0.0111, "step": 12590 }, { "action_loss": 0.022594278678297997, "epoch": 11.321942446043165, "step": 12590 }, { "epoch": 11.33093525179856, "grad_norm": 0.17772331833839417, "learning_rate": 9.316421500662212e-05, "loss": 0.0137, "step": 12600 }, { "action_loss": 0.004244859330356121, "epoch": 11.33093525179856, "step": 12600 }, { "epoch": 11.339928057553957, "grad_norm": 0.24226108193397522, "learning_rate": 9.31502995194182e-05, "loss": 0.0117, "step": 12610 }, { "action_loss": 0.01218556147068739, "epoch": 11.339928057553957, "step": 12610 }, { "epoch": 11.348920863309353, "grad_norm": 0.2749866247177124, "learning_rate": 9.31363709242926e-05, "loss": 0.014, "step": 12620 }, { "action_loss": 0.010798218660056591, "epoch": 11.348920863309353, "step": 12620 }, { "epoch": 11.357913669064748, "grad_norm": 0.24259242415428162, "learning_rate": 9.312242922547647e-05, "loss": 0.0091, "step": 12630 }, { "action_loss": 0.011391867883503437, "epoch": 11.357913669064748, "step": 12630 }, { "epoch": 11.366906474820144, "grad_norm": 0.3766513466835022, "learning_rate": 9.310847442720492e-05, "loss": 0.0126, "step": 12640 }, { "action_loss": 0.010065059177577496, "epoch": 11.366906474820144, "step": 12640 }, { "epoch": 11.37589928057554, "grad_norm": 0.2353486567735672, "learning_rate": 9.309450653371706e-05, "loss": 0.011, "step": 12650 }, { "action_loss": 0.0156943928450346, "epoch": 11.37589928057554, "step": 12650 }, { "epoch": 11.384892086330936, "grad_norm": 0.31464219093322754, "learning_rate": 9.308052554925595e-05, "loss": 0.013, "step": 12660 }, { "action_loss": 0.031301889568567276, "epoch": 11.384892086330936, "step": 12660 }, { "epoch": 11.39388489208633, "grad_norm": 0.3397119641304016, "learning_rate": 9.306653147806867e-05, "loss": 0.0143, "step": 12670 }, { "action_loss": 0.00417751632630825, "epoch": 11.39388489208633, "step": 12670 }, { "epoch": 11.402877697841726, "grad_norm": 0.24968095123767853, "learning_rate": 9.305252432440622e-05, "loss": 0.0128, "step": 12680 }, { "action_loss": 0.010597646236419678, "epoch": 11.402877697841726, "step": 12680 }, { "epoch": 11.411870503597122, "grad_norm": 0.2923843264579773, "learning_rate": 9.303850409252361e-05, "loss": 0.0123, "step": 12690 }, { "action_loss": 0.007286036387085915, "epoch": 11.411870503597122, "step": 12690 }, { "epoch": 11.420863309352518, "grad_norm": 0.306421160697937, "learning_rate": 9.302447078667985e-05, "loss": 0.0109, "step": 12700 }, { "action_loss": 0.014869936741888523, "epoch": 11.420863309352518, "step": 12700 }, { "epoch": 11.429856115107913, "grad_norm": 0.31967493891716003, "learning_rate": 9.301042441113783e-05, "loss": 0.012, "step": 12710 }, { "action_loss": 0.011156692169606686, "epoch": 11.429856115107913, "step": 12710 }, { "epoch": 11.43884892086331, "grad_norm": 0.21685394644737244, "learning_rate": 9.299636497016451e-05, "loss": 0.0144, "step": 12720 }, { "action_loss": 0.023473970592021942, "epoch": 11.43884892086331, "step": 12720 }, { "epoch": 11.447841726618705, "grad_norm": 0.22883249819278717, "learning_rate": 9.298229246803076e-05, "loss": 0.0125, "step": 12730 }, { "action_loss": 0.006666136439889669, "epoch": 11.447841726618705, "step": 12730 }, { "epoch": 11.456834532374101, "grad_norm": 0.27738428115844727, "learning_rate": 9.296820690901144e-05, "loss": 0.0124, "step": 12740 }, { "action_loss": 0.0059501491487026215, "epoch": 11.456834532374101, "step": 12740 }, { "epoch": 11.465827338129497, "grad_norm": 0.24129392206668854, "learning_rate": 9.295410829738539e-05, "loss": 0.0113, "step": 12750 }, { "action_loss": 0.014142495580017567, "epoch": 11.465827338129497, "step": 12750 }, { "epoch": 11.474820143884893, "grad_norm": 0.28635868430137634, "learning_rate": 9.293999663743535e-05, "loss": 0.0111, "step": 12760 }, { "action_loss": 0.009554491378366947, "epoch": 11.474820143884893, "step": 12760 }, { "epoch": 11.483812949640289, "grad_norm": 0.27012717723846436, "learning_rate": 9.292587193344813e-05, "loss": 0.015, "step": 12770 }, { "action_loss": 0.0053887139074504375, "epoch": 11.483812949640289, "step": 12770 }, { "epoch": 11.492805755395683, "grad_norm": 0.2431098073720932, "learning_rate": 9.291173418971437e-05, "loss": 0.012, "step": 12780 }, { "action_loss": 0.01620277389883995, "epoch": 11.492805755395683, "step": 12780 }, { "epoch": 11.501798561151078, "grad_norm": 0.2163645625114441, "learning_rate": 9.28975834105288e-05, "loss": 0.0126, "step": 12790 }, { "action_loss": 0.019971448928117752, "epoch": 11.501798561151078, "step": 12790 }, { "epoch": 11.510791366906474, "grad_norm": 0.2119782418012619, "learning_rate": 9.288341960019004e-05, "loss": 0.013, "step": 12800 }, { "action_loss": 0.006142882164567709, "epoch": 11.510791366906474, "step": 12800 }, { "epoch": 11.51978417266187, "grad_norm": 0.21699465811252594, "learning_rate": 9.286924276300067e-05, "loss": 0.0149, "step": 12810 }, { "action_loss": 0.006560306530445814, "epoch": 11.51978417266187, "step": 12810 }, { "epoch": 11.528776978417266, "grad_norm": 0.2756546139717102, "learning_rate": 9.285505290326726e-05, "loss": 0.0122, "step": 12820 }, { "action_loss": 0.007467752788215876, "epoch": 11.528776978417266, "step": 12820 }, { "epoch": 11.537769784172662, "grad_norm": 0.19824539124965668, "learning_rate": 9.284085002530027e-05, "loss": 0.0099, "step": 12830 }, { "action_loss": 0.009666386991739273, "epoch": 11.537769784172662, "step": 12830 }, { "epoch": 11.546762589928058, "grad_norm": 0.33725523948669434, "learning_rate": 9.282663413341422e-05, "loss": 0.0161, "step": 12840 }, { "action_loss": 0.00650668004527688, "epoch": 11.546762589928058, "step": 12840 }, { "epoch": 11.555755395683454, "grad_norm": 0.26766619086265564, "learning_rate": 9.281240523192747e-05, "loss": 0.01, "step": 12850 }, { "action_loss": 0.00769910030066967, "epoch": 11.555755395683454, "step": 12850 }, { "epoch": 11.56474820143885, "grad_norm": 0.2506140172481537, "learning_rate": 9.279816332516242e-05, "loss": 0.0107, "step": 12860 }, { "action_loss": 0.017151402309536934, "epoch": 11.56474820143885, "step": 12860 }, { "epoch": 11.573741007194245, "grad_norm": 0.2276519387960434, "learning_rate": 9.278390841744536e-05, "loss": 0.0132, "step": 12870 }, { "action_loss": 0.006606527138501406, "epoch": 11.573741007194245, "step": 12870 }, { "epoch": 11.582733812949641, "grad_norm": 0.20398865640163422, "learning_rate": 9.276964051310658e-05, "loss": 0.0122, "step": 12880 }, { "action_loss": 0.03511577472090721, "epoch": 11.582733812949641, "step": 12880 }, { "epoch": 11.591726618705035, "grad_norm": 0.14954577386379242, "learning_rate": 9.275535961648027e-05, "loss": 0.0095, "step": 12890 }, { "action_loss": 0.009906775318086147, "epoch": 11.591726618705035, "step": 12890 }, { "epoch": 11.600719424460431, "grad_norm": 0.1871732771396637, "learning_rate": 9.274106573190459e-05, "loss": 0.0097, "step": 12900 }, { "action_loss": 0.0093820346519351, "epoch": 11.600719424460431, "step": 12900 }, { "epoch": 11.609712230215827, "grad_norm": 0.19669052958488464, "learning_rate": 9.272675886372168e-05, "loss": 0.0087, "step": 12910 }, { "action_loss": 0.023208187893033028, "epoch": 11.609712230215827, "step": 12910 }, { "epoch": 11.618705035971223, "grad_norm": 0.1711721122264862, "learning_rate": 9.271243901627754e-05, "loss": 0.0125, "step": 12920 }, { "action_loss": 0.009959659539163113, "epoch": 11.618705035971223, "step": 12920 }, { "epoch": 11.627697841726619, "grad_norm": 0.3421352505683899, "learning_rate": 9.269810619392219e-05, "loss": 0.0104, "step": 12930 }, { "action_loss": 0.012014958076179028, "epoch": 11.627697841726619, "step": 12930 }, { "epoch": 11.636690647482014, "grad_norm": 0.23093609511852264, "learning_rate": 9.268376040100955e-05, "loss": 0.0167, "step": 12940 }, { "action_loss": 0.03326217830181122, "epoch": 11.636690647482014, "step": 12940 }, { "epoch": 11.64568345323741, "grad_norm": 0.2429996281862259, "learning_rate": 9.266940164189752e-05, "loss": 0.0107, "step": 12950 }, { "action_loss": 0.02836751751601696, "epoch": 11.64568345323741, "step": 12950 }, { "epoch": 11.654676258992806, "grad_norm": 0.35956010222435, "learning_rate": 9.265502992094787e-05, "loss": 0.0129, "step": 12960 }, { "action_loss": 0.009586526080965996, "epoch": 11.654676258992806, "step": 12960 }, { "epoch": 11.663669064748202, "grad_norm": 0.25157037377357483, "learning_rate": 9.264064524252638e-05, "loss": 0.0094, "step": 12970 }, { "action_loss": 0.006156706716865301, "epoch": 11.663669064748202, "step": 12970 }, { "epoch": 11.672661870503598, "grad_norm": 0.16550837457180023, "learning_rate": 9.262624761100271e-05, "loss": 0.0142, "step": 12980 }, { "action_loss": 0.005836793687194586, "epoch": 11.672661870503598, "step": 12980 }, { "epoch": 11.681654676258994, "grad_norm": 0.22809529304504395, "learning_rate": 9.261183703075051e-05, "loss": 0.0113, "step": 12990 }, { "action_loss": 0.010073759593069553, "epoch": 11.681654676258994, "step": 12990 }, { "epoch": 11.690647482014388, "grad_norm": 0.2879735827445984, "learning_rate": 9.259741350614733e-05, "loss": 0.0111, "step": 13000 }, { "action_loss": 0.009551134891808033, "epoch": 11.690647482014388, "step": 13000 }, { "epoch": 11.699640287769784, "grad_norm": 0.22697895765304565, "learning_rate": 9.258297704157464e-05, "loss": 0.0114, "step": 13010 }, { "action_loss": 0.007721488829702139, "epoch": 11.699640287769784, "step": 13010 }, { "epoch": 11.70863309352518, "grad_norm": 0.29763567447662354, "learning_rate": 9.256852764141786e-05, "loss": 0.0133, "step": 13020 }, { "action_loss": 0.00854636449366808, "epoch": 11.70863309352518, "step": 13020 }, { "epoch": 11.717625899280575, "grad_norm": 0.24202404916286469, "learning_rate": 9.255406531006634e-05, "loss": 0.0099, "step": 13030 }, { "action_loss": 0.013083945959806442, "epoch": 11.717625899280575, "step": 13030 }, { "epoch": 11.726618705035971, "grad_norm": 0.31478971242904663, "learning_rate": 9.253959005191335e-05, "loss": 0.0148, "step": 13040 }, { "action_loss": 0.005998184904456139, "epoch": 11.726618705035971, "step": 13040 }, { "epoch": 11.735611510791367, "grad_norm": 0.23908017575740814, "learning_rate": 9.25251018713561e-05, "loss": 0.0139, "step": 13050 }, { "action_loss": 0.0099075548350811, "epoch": 11.735611510791367, "step": 13050 }, { "epoch": 11.744604316546763, "grad_norm": 0.20250548422336578, "learning_rate": 9.251060077279571e-05, "loss": 0.0107, "step": 13060 }, { "action_loss": 0.0071225292049348354, "epoch": 11.744604316546763, "step": 13060 }, { "epoch": 11.753597122302159, "grad_norm": 0.27801352739334106, "learning_rate": 9.249608676063724e-05, "loss": 0.0116, "step": 13070 }, { "action_loss": 0.05696031451225281, "epoch": 11.753597122302159, "step": 13070 }, { "epoch": 11.762589928057555, "grad_norm": 0.2152371108531952, "learning_rate": 9.248155983928964e-05, "loss": 0.0145, "step": 13080 }, { "action_loss": 0.012199019081890583, "epoch": 11.762589928057555, "step": 13080 }, { "epoch": 11.77158273381295, "grad_norm": 0.26193368434906006, "learning_rate": 9.246702001316583e-05, "loss": 0.0117, "step": 13090 }, { "action_loss": 0.017383143305778503, "epoch": 11.77158273381295, "step": 13090 }, { "epoch": 11.780575539568346, "grad_norm": 0.28330838680267334, "learning_rate": 9.245246728668262e-05, "loss": 0.0127, "step": 13100 }, { "action_loss": 0.011070680804550648, "epoch": 11.780575539568346, "step": 13100 }, { "epoch": 11.78956834532374, "grad_norm": 0.33194980025291443, "learning_rate": 9.243790166426073e-05, "loss": 0.0137, "step": 13110 }, { "action_loss": 0.013420328497886658, "epoch": 11.78956834532374, "step": 13110 }, { "epoch": 11.798561151079136, "grad_norm": 0.1804259717464447, "learning_rate": 9.242332315032484e-05, "loss": 0.0108, "step": 13120 }, { "action_loss": 0.005434712860733271, "epoch": 11.798561151079136, "step": 13120 }, { "epoch": 11.807553956834532, "grad_norm": 0.2874605655670166, "learning_rate": 9.240873174930349e-05, "loss": 0.0157, "step": 13130 }, { "action_loss": 0.01469593495130539, "epoch": 11.807553956834532, "step": 13130 }, { "epoch": 11.816546762589928, "grad_norm": 0.3673083782196045, "learning_rate": 9.239412746562917e-05, "loss": 0.0175, "step": 13140 }, { "action_loss": 0.03268320485949516, "epoch": 11.816546762589928, "step": 13140 }, { "epoch": 11.825539568345324, "grad_norm": 0.3207368552684784, "learning_rate": 9.237951030373828e-05, "loss": 0.0201, "step": 13150 }, { "action_loss": 0.02386474609375, "epoch": 11.825539568345324, "step": 13150 }, { "epoch": 11.83453237410072, "grad_norm": 0.28133416175842285, "learning_rate": 9.236488026807113e-05, "loss": 0.0188, "step": 13160 }, { "action_loss": 0.011382083408534527, "epoch": 11.83453237410072, "step": 13160 }, { "epoch": 11.843525179856115, "grad_norm": 0.25425177812576294, "learning_rate": 9.235023736307193e-05, "loss": 0.0137, "step": 13170 }, { "action_loss": 0.010013873688876629, "epoch": 11.843525179856115, "step": 13170 }, { "epoch": 11.852517985611511, "grad_norm": 0.300441175699234, "learning_rate": 9.233558159318881e-05, "loss": 0.0131, "step": 13180 }, { "action_loss": 0.007067045196890831, "epoch": 11.852517985611511, "step": 13180 }, { "epoch": 11.861510791366907, "grad_norm": 0.24423456192016602, "learning_rate": 9.232091296287382e-05, "loss": 0.0105, "step": 13190 }, { "action_loss": 0.008456406183540821, "epoch": 11.861510791366907, "step": 13190 }, { "epoch": 11.870503597122303, "grad_norm": 0.2685339152812958, "learning_rate": 9.230623147658288e-05, "loss": 0.0123, "step": 13200 }, { "action_loss": 0.00880520697683096, "epoch": 11.870503597122303, "step": 13200 }, { "epoch": 11.879496402877697, "grad_norm": 0.2012057602405548, "learning_rate": 9.229153713877586e-05, "loss": 0.0098, "step": 13210 }, { "action_loss": 0.016232484951615334, "epoch": 11.879496402877697, "step": 13210 }, { "epoch": 11.888489208633093, "grad_norm": 0.16789649426937103, "learning_rate": 9.227682995391649e-05, "loss": 0.0132, "step": 13220 }, { "action_loss": 0.015352356247603893, "epoch": 11.888489208633093, "step": 13220 }, { "epoch": 11.897482014388489, "grad_norm": 0.3280501067638397, "learning_rate": 9.226210992647243e-05, "loss": 0.0136, "step": 13230 }, { "action_loss": 0.009051922708749771, "epoch": 11.897482014388489, "step": 13230 }, { "epoch": 11.906474820143885, "grad_norm": 0.2523534893989563, "learning_rate": 9.224737706091525e-05, "loss": 0.0099, "step": 13240 }, { "action_loss": 0.02901877462863922, "epoch": 11.906474820143885, "step": 13240 }, { "epoch": 11.91546762589928, "grad_norm": 0.4514784812927246, "learning_rate": 9.223263136172039e-05, "loss": 0.0129, "step": 13250 }, { "action_loss": 0.047158557921648026, "epoch": 11.91546762589928, "step": 13250 }, { "epoch": 11.924460431654676, "grad_norm": 0.26622310280799866, "learning_rate": 9.22178728333672e-05, "loss": 0.0156, "step": 13260 }, { "action_loss": 0.007948927581310272, "epoch": 11.924460431654676, "step": 13260 }, { "epoch": 11.933453237410072, "grad_norm": 0.22075802087783813, "learning_rate": 9.220310148033897e-05, "loss": 0.0114, "step": 13270 }, { "action_loss": 0.011694841086864471, "epoch": 11.933453237410072, "step": 13270 }, { "epoch": 11.942446043165468, "grad_norm": 0.2221226543188095, "learning_rate": 9.21883173071228e-05, "loss": 0.0119, "step": 13280 }, { "action_loss": 0.00866122916340828, "epoch": 11.942446043165468, "step": 13280 }, { "epoch": 11.951438848920864, "grad_norm": 0.23074643313884735, "learning_rate": 9.217352031820976e-05, "loss": 0.0088, "step": 13290 }, { "action_loss": 0.020118890330195427, "epoch": 11.951438848920864, "step": 13290 }, { "epoch": 11.96043165467626, "grad_norm": 0.17807811498641968, "learning_rate": 9.215871051809477e-05, "loss": 0.0117, "step": 13300 }, { "action_loss": 0.003357183188199997, "epoch": 11.96043165467626, "step": 13300 }, { "epoch": 11.969424460431654, "grad_norm": 0.29887664318084717, "learning_rate": 9.214388791127666e-05, "loss": 0.0119, "step": 13310 }, { "action_loss": 0.021086186170578003, "epoch": 11.969424460431654, "step": 13310 }, { "epoch": 11.97841726618705, "grad_norm": 0.15928511321544647, "learning_rate": 9.212905250225814e-05, "loss": 0.0116, "step": 13320 }, { "action_loss": 0.008145565167069435, "epoch": 11.97841726618705, "step": 13320 }, { "epoch": 11.987410071942445, "grad_norm": 0.34926745295524597, "learning_rate": 9.211420429554583e-05, "loss": 0.0114, "step": 13330 }, { "action_loss": 0.004602735862135887, "epoch": 11.987410071942445, "step": 13330 }, { "epoch": 11.996402877697841, "grad_norm": 0.21624675393104553, "learning_rate": 9.209934329565022e-05, "loss": 0.0093, "step": 13340 }, { "action_loss": 0.003711448283866048, "epoch": 11.996402877697841, "step": 13340 }, { "epoch": 12.005395683453237, "grad_norm": 0.3764955699443817, "learning_rate": 9.208446950708568e-05, "loss": 0.0145, "step": 13350 }, { "action_loss": 0.028440484777092934, "epoch": 12.005395683453237, "step": 13350 }, { "epoch": 12.014388489208633, "grad_norm": 0.2361287772655487, "learning_rate": 9.20695829343705e-05, "loss": 0.0124, "step": 13360 }, { "action_loss": 0.004994475282728672, "epoch": 12.014388489208633, "step": 13360 }, { "epoch": 12.023381294964029, "grad_norm": 0.25310614705085754, "learning_rate": 9.205468358202678e-05, "loss": 0.0113, "step": 13370 }, { "action_loss": 0.010934297926723957, "epoch": 12.023381294964029, "step": 13370 }, { "epoch": 12.032374100719425, "grad_norm": 0.32450059056282043, "learning_rate": 9.203977145458059e-05, "loss": 0.0146, "step": 13380 }, { "action_loss": 0.016277117654681206, "epoch": 12.032374100719425, "step": 13380 }, { "epoch": 12.04136690647482, "grad_norm": 0.21429571509361267, "learning_rate": 9.202484655656182e-05, "loss": 0.0129, "step": 13390 }, { "action_loss": 0.02328612469136715, "epoch": 12.04136690647482, "step": 13390 }, { "epoch": 12.050359712230216, "grad_norm": 0.2799832224845886, "learning_rate": 9.200990889250427e-05, "loss": 0.0149, "step": 13400 }, { "action_loss": 0.00663284445181489, "epoch": 12.050359712230216, "step": 13400 }, { "epoch": 12.059352517985612, "grad_norm": 0.2247687578201294, "learning_rate": 9.19949584669456e-05, "loss": 0.0115, "step": 13410 }, { "action_loss": 0.007603574078530073, "epoch": 12.059352517985612, "step": 13410 }, { "epoch": 12.068345323741006, "grad_norm": 0.2622045874595642, "learning_rate": 9.197999528442738e-05, "loss": 0.0144, "step": 13420 }, { "action_loss": 0.011339555494487286, "epoch": 12.068345323741006, "step": 13420 }, { "epoch": 12.077338129496402, "grad_norm": 0.20330609381198883, "learning_rate": 9.196501934949499e-05, "loss": 0.0117, "step": 13430 }, { "action_loss": 0.006732931826263666, "epoch": 12.077338129496402, "step": 13430 }, { "epoch": 12.086330935251798, "grad_norm": 0.3156539499759674, "learning_rate": 9.195003066669776e-05, "loss": 0.0122, "step": 13440 }, { "action_loss": 0.008520550094544888, "epoch": 12.086330935251798, "step": 13440 }, { "epoch": 12.095323741007194, "grad_norm": 0.22749853134155273, "learning_rate": 9.193502924058884e-05, "loss": 0.0129, "step": 13450 }, { "action_loss": 0.0093768909573555, "epoch": 12.095323741007194, "step": 13450 }, { "epoch": 12.10431654676259, "grad_norm": 0.3187251389026642, "learning_rate": 9.192001507572526e-05, "loss": 0.016, "step": 13460 }, { "action_loss": 0.019473331049084663, "epoch": 12.10431654676259, "step": 13460 }, { "epoch": 12.113309352517986, "grad_norm": 0.3849773406982422, "learning_rate": 9.190498817666793e-05, "loss": 0.0173, "step": 13470 }, { "action_loss": 0.025170346722006798, "epoch": 12.113309352517986, "step": 13470 }, { "epoch": 12.122302158273381, "grad_norm": 0.2297884225845337, "learning_rate": 9.188994854798163e-05, "loss": 0.0141, "step": 13480 }, { "action_loss": 0.009828987531363964, "epoch": 12.122302158273381, "step": 13480 }, { "epoch": 12.131294964028777, "grad_norm": 0.20044292509555817, "learning_rate": 9.187489619423499e-05, "loss": 0.0119, "step": 13490 }, { "action_loss": 0.008510943502187729, "epoch": 12.131294964028777, "step": 13490 }, { "epoch": 12.140287769784173, "grad_norm": 0.2497270554304123, "learning_rate": 9.185983112000056e-05, "loss": 0.0116, "step": 13500 }, { "action_loss": 0.005108103156089783, "epoch": 12.140287769784173, "step": 13500 }, { "epoch": 12.149280575539569, "grad_norm": 0.24180322885513306, "learning_rate": 9.184475332985464e-05, "loss": 0.0106, "step": 13510 }, { "action_loss": 0.011296920478343964, "epoch": 12.149280575539569, "step": 13510 }, { "epoch": 12.158273381294965, "grad_norm": 0.24624711275100708, "learning_rate": 9.182966282837754e-05, "loss": 0.0122, "step": 13520 }, { "action_loss": 0.008491276763379574, "epoch": 12.158273381294965, "step": 13520 }, { "epoch": 12.167266187050359, "grad_norm": 0.19368144869804382, "learning_rate": 9.18145596201533e-05, "loss": 0.008, "step": 13530 }, { "action_loss": 0.008832038380205631, "epoch": 12.167266187050359, "step": 13530 }, { "epoch": 12.176258992805755, "grad_norm": 0.2307862937450409, "learning_rate": 9.179944370976991e-05, "loss": 0.0113, "step": 13540 }, { "action_loss": 0.012097594328224659, "epoch": 12.176258992805755, "step": 13540 }, { "epoch": 12.18525179856115, "grad_norm": 0.24410530924797058, "learning_rate": 9.178431510181918e-05, "loss": 0.0096, "step": 13550 }, { "action_loss": 0.0065515548922121525, "epoch": 12.18525179856115, "step": 13550 }, { "epoch": 12.194244604316546, "grad_norm": 0.23421710729599, "learning_rate": 9.176917380089675e-05, "loss": 0.0134, "step": 13560 }, { "action_loss": 0.0050657968968153, "epoch": 12.194244604316546, "step": 13560 }, { "epoch": 12.203237410071942, "grad_norm": 0.220724955201149, "learning_rate": 9.175401981160219e-05, "loss": 0.0128, "step": 13570 }, { "action_loss": 0.00551524618640542, "epoch": 12.203237410071942, "step": 13570 }, { "epoch": 12.212230215827338, "grad_norm": 0.2243482768535614, "learning_rate": 9.173885313853885e-05, "loss": 0.01, "step": 13580 }, { "action_loss": 0.007977188564836979, "epoch": 12.212230215827338, "step": 13580 }, { "epoch": 12.221223021582734, "grad_norm": 0.37192586064338684, "learning_rate": 9.172367378631398e-05, "loss": 0.0147, "step": 13590 }, { "action_loss": 0.007737305480986834, "epoch": 12.221223021582734, "step": 13590 }, { "epoch": 12.23021582733813, "grad_norm": 0.2587644159793854, "learning_rate": 9.170848175953866e-05, "loss": 0.0174, "step": 13600 }, { "action_loss": 0.0049790069460868835, "epoch": 12.23021582733813, "step": 13600 }, { "epoch": 12.239208633093526, "grad_norm": 0.35283705592155457, "learning_rate": 9.169327706282784e-05, "loss": 0.0113, "step": 13610 }, { "action_loss": 0.013898685574531555, "epoch": 12.239208633093526, "step": 13610 }, { "epoch": 12.248201438848922, "grad_norm": 0.2251444309949875, "learning_rate": 9.167805970080029e-05, "loss": 0.0106, "step": 13620 }, { "action_loss": 0.02796424925327301, "epoch": 12.248201438848922, "step": 13620 }, { "epoch": 12.257194244604317, "grad_norm": 0.2803332805633545, "learning_rate": 9.166282967807864e-05, "loss": 0.0124, "step": 13630 }, { "action_loss": 0.010778813622891903, "epoch": 12.257194244604317, "step": 13630 }, { "epoch": 12.266187050359711, "grad_norm": 0.32206472754478455, "learning_rate": 9.16475869992894e-05, "loss": 0.0084, "step": 13640 }, { "action_loss": 0.01019508857280016, "epoch": 12.266187050359711, "step": 13640 }, { "epoch": 12.275179856115107, "grad_norm": 0.2137516885995865, "learning_rate": 9.163233166906284e-05, "loss": 0.0081, "step": 13650 }, { "action_loss": 0.006148523185402155, "epoch": 12.275179856115107, "step": 13650 }, { "epoch": 12.284172661870503, "grad_norm": 0.26588818430900574, "learning_rate": 9.161706369203317e-05, "loss": 0.0116, "step": 13660 }, { "action_loss": 0.007537781726568937, "epoch": 12.284172661870503, "step": 13660 }, { "epoch": 12.293165467625899, "grad_norm": 0.22020910680294037, "learning_rate": 9.16017830728384e-05, "loss": 0.0078, "step": 13670 }, { "action_loss": 0.004278406500816345, "epoch": 12.293165467625899, "step": 13670 }, { "epoch": 12.302158273381295, "grad_norm": 0.23832912743091583, "learning_rate": 9.158648981612035e-05, "loss": 0.0113, "step": 13680 }, { "action_loss": 0.008045884780585766, "epoch": 12.302158273381295, "step": 13680 }, { "epoch": 12.31115107913669, "grad_norm": 0.26923036575317383, "learning_rate": 9.157118392652472e-05, "loss": 0.0101, "step": 13690 }, { "action_loss": 0.0169986579567194, "epoch": 12.31115107913669, "step": 13690 }, { "epoch": 12.320143884892087, "grad_norm": 0.2124391496181488, "learning_rate": 9.155586540870104e-05, "loss": 0.0094, "step": 13700 }, { "action_loss": 0.008614831604063511, "epoch": 12.320143884892087, "step": 13700 }, { "epoch": 12.329136690647482, "grad_norm": 0.2481149584054947, "learning_rate": 9.154053426730267e-05, "loss": 0.0102, "step": 13710 }, { "action_loss": 0.007437629159539938, "epoch": 12.329136690647482, "step": 13710 }, { "epoch": 12.338129496402878, "grad_norm": 0.3009470999240875, "learning_rate": 9.15251905069868e-05, "loss": 0.0135, "step": 13720 }, { "action_loss": 0.010145424865186214, "epoch": 12.338129496402878, "step": 13720 }, { "epoch": 12.347122302158274, "grad_norm": 0.22024253010749817, "learning_rate": 9.150983413241446e-05, "loss": 0.0113, "step": 13730 }, { "action_loss": 0.015053960494697094, "epoch": 12.347122302158274, "step": 13730 }, { "epoch": 12.35611510791367, "grad_norm": 0.27834567427635193, "learning_rate": 9.149446514825051e-05, "loss": 0.0147, "step": 13740 }, { "action_loss": 0.008012999780476093, "epoch": 12.35611510791367, "step": 13740 }, { "epoch": 12.365107913669064, "grad_norm": 0.3397466242313385, "learning_rate": 9.147908355916365e-05, "loss": 0.0145, "step": 13750 }, { "action_loss": 0.03232710063457489, "epoch": 12.365107913669064, "step": 13750 }, { "epoch": 12.37410071942446, "grad_norm": 0.23243694007396698, "learning_rate": 9.146368936982642e-05, "loss": 0.0158, "step": 13760 }, { "action_loss": 0.01684962399303913, "epoch": 12.37410071942446, "step": 13760 }, { "epoch": 12.383093525179856, "grad_norm": 0.27829912304878235, "learning_rate": 9.144828258491511e-05, "loss": 0.0138, "step": 13770 }, { "action_loss": 0.01733005791902542, "epoch": 12.383093525179856, "step": 13770 }, { "epoch": 12.392086330935252, "grad_norm": 0.22933849692344666, "learning_rate": 9.143286320910996e-05, "loss": 0.0127, "step": 13780 }, { "action_loss": 0.01096145436167717, "epoch": 12.392086330935252, "step": 13780 }, { "epoch": 12.401079136690647, "grad_norm": 0.20513588190078735, "learning_rate": 9.141743124709491e-05, "loss": 0.0114, "step": 13790 }, { "action_loss": 0.009947034530341625, "epoch": 12.401079136690647, "step": 13790 }, { "epoch": 12.410071942446043, "grad_norm": 0.2164459079504013, "learning_rate": 9.140198670355784e-05, "loss": 0.0133, "step": 13800 }, { "action_loss": 0.016273409128189087, "epoch": 12.410071942446043, "step": 13800 }, { "epoch": 12.41906474820144, "grad_norm": 0.22978852689266205, "learning_rate": 9.138652958319034e-05, "loss": 0.0141, "step": 13810 }, { "action_loss": 0.003234636038541794, "epoch": 12.41906474820144, "step": 13810 }, { "epoch": 12.428057553956835, "grad_norm": 0.23382647335529327, "learning_rate": 9.137105989068791e-05, "loss": 0.01, "step": 13820 }, { "action_loss": 0.010297977365553379, "epoch": 12.428057553956835, "step": 13820 }, { "epoch": 12.43705035971223, "grad_norm": 0.30847880244255066, "learning_rate": 9.135557763074983e-05, "loss": 0.021, "step": 13830 }, { "action_loss": 0.00982830673456192, "epoch": 12.43705035971223, "step": 13830 }, { "epoch": 12.446043165467627, "grad_norm": 0.2639608085155487, "learning_rate": 9.13400828080792e-05, "loss": 0.014, "step": 13840 }, { "action_loss": 0.0028966397512704134, "epoch": 12.446043165467627, "step": 13840 }, { "epoch": 12.45503597122302, "grad_norm": 0.30912303924560547, "learning_rate": 9.132457542738292e-05, "loss": 0.0113, "step": 13850 }, { "action_loss": 0.00822411198168993, "epoch": 12.45503597122302, "step": 13850 }, { "epoch": 12.464028776978417, "grad_norm": 0.1606307327747345, "learning_rate": 9.130905549337174e-05, "loss": 0.0095, "step": 13860 }, { "action_loss": 0.014808309264481068, "epoch": 12.464028776978417, "step": 13860 }, { "epoch": 12.473021582733812, "grad_norm": 0.16191361844539642, "learning_rate": 9.129352301076021e-05, "loss": 0.0102, "step": 13870 }, { "action_loss": 0.011754644103348255, "epoch": 12.473021582733812, "step": 13870 }, { "epoch": 12.482014388489208, "grad_norm": 0.30705320835113525, "learning_rate": 9.127797798426668e-05, "loss": 0.0094, "step": 13880 }, { "action_loss": 0.03806769475340843, "epoch": 12.482014388489208, "step": 13880 }, { "epoch": 12.491007194244604, "grad_norm": 0.20415274798870087, "learning_rate": 9.126242041861333e-05, "loss": 0.0152, "step": 13890 }, { "action_loss": 0.010308545082807541, "epoch": 12.491007194244604, "step": 13890 }, { "epoch": 12.5, "grad_norm": 0.2593599855899811, "learning_rate": 9.124685031852611e-05, "loss": 0.0134, "step": 13900 }, { "action_loss": 0.007477956358343363, "epoch": 12.5, "step": 13900 }, { "epoch": 12.508992805755396, "grad_norm": 0.20404170453548431, "learning_rate": 9.123126768873482e-05, "loss": 0.0119, "step": 13910 }, { "action_loss": 0.007872771471738815, "epoch": 12.508992805755396, "step": 13910 }, { "epoch": 12.517985611510792, "grad_norm": 0.31604909896850586, "learning_rate": 9.121567253397308e-05, "loss": 0.014, "step": 13920 }, { "action_loss": 0.008633049204945564, "epoch": 12.517985611510792, "step": 13920 }, { "epoch": 12.526978417266188, "grad_norm": 0.17841729521751404, "learning_rate": 9.120006485897824e-05, "loss": 0.0106, "step": 13930 }, { "action_loss": 0.004605371970683336, "epoch": 12.526978417266188, "step": 13930 }, { "epoch": 12.535971223021583, "grad_norm": 0.2673490047454834, "learning_rate": 9.118444466849152e-05, "loss": 0.0168, "step": 13940 }, { "action_loss": 0.020306343212723732, "epoch": 12.535971223021583, "step": 13940 }, { "epoch": 12.54496402877698, "grad_norm": 0.3059149384498596, "learning_rate": 9.116881196725793e-05, "loss": 0.0123, "step": 13950 }, { "action_loss": 0.008698499761521816, "epoch": 12.54496402877698, "step": 13950 }, { "epoch": 12.553956834532373, "grad_norm": 0.2541482746601105, "learning_rate": 9.115316676002627e-05, "loss": 0.0118, "step": 13960 }, { "action_loss": 0.018991312012076378, "epoch": 12.553956834532373, "step": 13960 }, { "epoch": 12.56294964028777, "grad_norm": 0.2203819900751114, "learning_rate": 9.113750905154911e-05, "loss": 0.0127, "step": 13970 }, { "action_loss": 0.008545152842998505, "epoch": 12.56294964028777, "step": 13970 }, { "epoch": 12.571942446043165, "grad_norm": 0.2063889503479004, "learning_rate": 9.112183884658289e-05, "loss": 0.0158, "step": 13980 }, { "action_loss": 0.005872846115380526, "epoch": 12.571942446043165, "step": 13980 }, { "epoch": 12.58093525179856, "grad_norm": 0.19738860428333282, "learning_rate": 9.11061561498878e-05, "loss": 0.0091, "step": 13990 }, { "action_loss": 0.018877947703003883, "epoch": 12.58093525179856, "step": 13990 }, { "epoch": 12.589928057553957, "grad_norm": 0.28676837682724, "learning_rate": 9.109046096622779e-05, "loss": 0.0146, "step": 14000 }, { "action_loss": 0.01190924271941185, "epoch": 12.589928057553957, "step": 14000 }, { "epoch": 12.598920863309353, "grad_norm": 0.3188031017780304, "learning_rate": 9.107475330037069e-05, "loss": 0.0119, "step": 14010 }, { "action_loss": 0.019411412999033928, "epoch": 12.598920863309353, "step": 14010 }, { "epoch": 12.607913669064748, "grad_norm": 0.24322617053985596, "learning_rate": 9.105903315708806e-05, "loss": 0.0127, "step": 14020 }, { "action_loss": 0.005736805498600006, "epoch": 12.607913669064748, "step": 14020 }, { "epoch": 12.616906474820144, "grad_norm": 0.24806302785873413, "learning_rate": 9.104330054115524e-05, "loss": 0.0101, "step": 14030 }, { "action_loss": 0.009932088665664196, "epoch": 12.616906474820144, "step": 14030 }, { "epoch": 12.62589928057554, "grad_norm": 0.33651992678642273, "learning_rate": 9.102755545735141e-05, "loss": 0.0101, "step": 14040 }, { "action_loss": 0.010303331539034843, "epoch": 12.62589928057554, "step": 14040 }, { "epoch": 12.634892086330936, "grad_norm": 0.25872722268104553, "learning_rate": 9.10117979104595e-05, "loss": 0.012, "step": 14050 }, { "action_loss": 0.010326973162591457, "epoch": 12.634892086330936, "step": 14050 }, { "epoch": 12.64388489208633, "grad_norm": 0.25871768593788147, "learning_rate": 9.099602790526624e-05, "loss": 0.0125, "step": 14060 }, { "action_loss": 0.013089846819639206, "epoch": 12.64388489208633, "step": 14060 }, { "epoch": 12.652877697841726, "grad_norm": 0.2563643157482147, "learning_rate": 9.098024544656212e-05, "loss": 0.0098, "step": 14070 }, { "action_loss": 0.00661334628239274, "epoch": 12.652877697841726, "step": 14070 }, { "epoch": 12.661870503597122, "grad_norm": 0.3307753801345825, "learning_rate": 9.096445053914148e-05, "loss": 0.0161, "step": 14080 }, { "action_loss": 0.005886258091777563, "epoch": 12.661870503597122, "step": 14080 }, { "epoch": 12.670863309352518, "grad_norm": 0.29055559635162354, "learning_rate": 9.094864318780236e-05, "loss": 0.013, "step": 14090 }, { "action_loss": 0.06850409507751465, "epoch": 12.670863309352518, "step": 14090 }, { "epoch": 12.679856115107913, "grad_norm": 0.2976270318031311, "learning_rate": 9.093282339734663e-05, "loss": 0.0222, "step": 14100 }, { "action_loss": 0.007720213383436203, "epoch": 12.679856115107913, "step": 14100 }, { "epoch": 12.68884892086331, "grad_norm": 0.23707382380962372, "learning_rate": 9.091699117257992e-05, "loss": 0.0105, "step": 14110 }, { "action_loss": 0.005838294979184866, "epoch": 12.68884892086331, "step": 14110 }, { "epoch": 12.697841726618705, "grad_norm": 0.24990737438201904, "learning_rate": 9.090114651831163e-05, "loss": 0.0098, "step": 14120 }, { "action_loss": 0.02990124374628067, "epoch": 12.697841726618705, "step": 14120 }, { "epoch": 12.706834532374101, "grad_norm": 0.2687983512878418, "learning_rate": 9.088528943935497e-05, "loss": 0.0154, "step": 14130 }, { "action_loss": 0.012100324966013432, "epoch": 12.706834532374101, "step": 14130 }, { "epoch": 12.715827338129497, "grad_norm": 0.15641799569129944, "learning_rate": 9.086941994052689e-05, "loss": 0.0091, "step": 14140 }, { "action_loss": 0.007500557228922844, "epoch": 12.715827338129497, "step": 14140 }, { "epoch": 12.724820143884893, "grad_norm": 0.18714559078216553, "learning_rate": 9.085353802664813e-05, "loss": 0.0092, "step": 14150 }, { "action_loss": 0.016127504408359528, "epoch": 12.724820143884893, "step": 14150 }, { "epoch": 12.733812949640289, "grad_norm": 0.22788788378238678, "learning_rate": 9.08376437025432e-05, "loss": 0.0101, "step": 14160 }, { "action_loss": 0.00810652133077383, "epoch": 12.733812949640289, "step": 14160 }, { "epoch": 12.742805755395683, "grad_norm": 0.1981242299079895, "learning_rate": 9.082173697304035e-05, "loss": 0.0121, "step": 14170 }, { "action_loss": 0.010129089467227459, "epoch": 12.742805755395683, "step": 14170 }, { "epoch": 12.751798561151078, "grad_norm": 0.25197651982307434, "learning_rate": 9.080581784297166e-05, "loss": 0.0076, "step": 14180 }, { "action_loss": 0.024153396487236023, "epoch": 12.751798561151078, "step": 14180 }, { "epoch": 12.760791366906474, "grad_norm": 0.2578153908252716, "learning_rate": 9.078988631717291e-05, "loss": 0.0114, "step": 14190 }, { "action_loss": 0.006745573133230209, "epoch": 12.760791366906474, "step": 14190 }, { "epoch": 12.76978417266187, "grad_norm": 0.25685107707977295, "learning_rate": 9.077394240048369e-05, "loss": 0.0096, "step": 14200 }, { "action_loss": 0.007059965282678604, "epoch": 12.76978417266187, "step": 14200 }, { "epoch": 12.778776978417266, "grad_norm": 0.22633937001228333, "learning_rate": 9.075798609774736e-05, "loss": 0.0104, "step": 14210 }, { "action_loss": 0.00304403156042099, "epoch": 12.778776978417266, "step": 14210 }, { "epoch": 12.787769784172662, "grad_norm": 0.19373536109924316, "learning_rate": 9.0742017413811e-05, "loss": 0.0096, "step": 14220 }, { "action_loss": 0.009536311961710453, "epoch": 12.787769784172662, "step": 14220 }, { "epoch": 12.796762589928058, "grad_norm": 0.20348277688026428, "learning_rate": 9.072603635352548e-05, "loss": 0.0101, "step": 14230 }, { "action_loss": 0.008217125199735165, "epoch": 12.796762589928058, "step": 14230 }, { "epoch": 12.805755395683454, "grad_norm": 0.27059224247932434, "learning_rate": 9.071004292174541e-05, "loss": 0.0125, "step": 14240 }, { "action_loss": 0.00959592591971159, "epoch": 12.805755395683454, "step": 14240 }, { "epoch": 12.81474820143885, "grad_norm": 0.30651313066482544, "learning_rate": 9.06940371233292e-05, "loss": 0.0097, "step": 14250 }, { "action_loss": 0.0099340146407485, "epoch": 12.81474820143885, "step": 14250 }, { "epoch": 12.823741007194245, "grad_norm": 0.2185366004705429, "learning_rate": 9.067801896313898e-05, "loss": 0.0126, "step": 14260 }, { "action_loss": 0.005657325964421034, "epoch": 12.823741007194245, "step": 14260 }, { "epoch": 12.832733812949641, "grad_norm": 0.15714889764785767, "learning_rate": 9.066198844604064e-05, "loss": 0.0109, "step": 14270 }, { "action_loss": 0.008231615647673607, "epoch": 12.832733812949641, "step": 14270 }, { "epoch": 12.841726618705035, "grad_norm": 0.2992229163646698, "learning_rate": 9.06459455769038e-05, "loss": 0.0149, "step": 14280 }, { "action_loss": 0.015875186771154404, "epoch": 12.841726618705035, "step": 14280 }, { "epoch": 12.850719424460431, "grad_norm": 0.34321168065071106, "learning_rate": 9.062989036060193e-05, "loss": 0.0179, "step": 14290 }, { "action_loss": 0.011631536297500134, "epoch": 12.850719424460431, "step": 14290 }, { "epoch": 12.859712230215827, "grad_norm": 0.19083981215953827, "learning_rate": 9.061382280201212e-05, "loss": 0.0088, "step": 14300 }, { "action_loss": 0.04836638644337654, "epoch": 12.859712230215827, "step": 14300 }, { "epoch": 12.868705035971223, "grad_norm": 0.15655501186847687, "learning_rate": 9.059774290601528e-05, "loss": 0.0165, "step": 14310 }, { "action_loss": 0.0049276407808065414, "epoch": 12.868705035971223, "step": 14310 }, { "epoch": 12.877697841726619, "grad_norm": 0.15345443785190582, "learning_rate": 9.058165067749606e-05, "loss": 0.0111, "step": 14320 }, { "action_loss": 0.010580095462501049, "epoch": 12.877697841726619, "step": 14320 }, { "epoch": 12.886690647482014, "grad_norm": 0.21543729305267334, "learning_rate": 9.056554612134288e-05, "loss": 0.0125, "step": 14330 }, { "action_loss": 0.010992306284606457, "epoch": 12.886690647482014, "step": 14330 }, { "epoch": 12.89568345323741, "grad_norm": 0.25188860297203064, "learning_rate": 9.054942924244785e-05, "loss": 0.0122, "step": 14340 }, { "action_loss": 0.01085882168263197, "epoch": 12.89568345323741, "step": 14340 }, { "epoch": 12.904676258992806, "grad_norm": 0.3998895585536957, "learning_rate": 9.053330004570686e-05, "loss": 0.0149, "step": 14350 }, { "action_loss": 0.010534670203924179, "epoch": 12.904676258992806, "step": 14350 }, { "epoch": 12.913669064748202, "grad_norm": 0.16919514536857605, "learning_rate": 9.051715853601955e-05, "loss": 0.0115, "step": 14360 }, { "action_loss": 0.01863585039973259, "epoch": 12.913669064748202, "step": 14360 }, { "epoch": 12.922661870503598, "grad_norm": 0.18152520060539246, "learning_rate": 9.050100471828926e-05, "loss": 0.0161, "step": 14370 }, { "action_loss": 0.012085516937077045, "epoch": 12.922661870503598, "step": 14370 }, { "epoch": 12.931654676258994, "grad_norm": 0.20569217205047607, "learning_rate": 9.048483859742311e-05, "loss": 0.0155, "step": 14380 }, { "action_loss": 0.014036157168447971, "epoch": 12.931654676258994, "step": 14380 }, { "epoch": 12.940647482014388, "grad_norm": 0.3050883114337921, "learning_rate": 9.046866017833193e-05, "loss": 0.0105, "step": 14390 }, { "action_loss": 0.010719113983213902, "epoch": 12.940647482014388, "step": 14390 }, { "epoch": 12.949640287769784, "grad_norm": 0.1719186007976532, "learning_rate": 9.045246946593029e-05, "loss": 0.0113, "step": 14400 }, { "action_loss": 0.005042276810854673, "epoch": 12.949640287769784, "step": 14400 }, { "epoch": 12.95863309352518, "grad_norm": 0.23578433692455292, "learning_rate": 9.043626646513652e-05, "loss": 0.0102, "step": 14410 }, { "action_loss": 0.006584996823221445, "epoch": 12.95863309352518, "step": 14410 }, { "epoch": 12.967625899280575, "grad_norm": 0.23993298411369324, "learning_rate": 9.042005118087267e-05, "loss": 0.009, "step": 14420 }, { "action_loss": 0.0030936377588659525, "epoch": 12.967625899280575, "step": 14420 }, { "epoch": 12.976618705035971, "grad_norm": 0.2061760425567627, "learning_rate": 9.040382361806448e-05, "loss": 0.0113, "step": 14430 }, { "action_loss": 0.006193298846483231, "epoch": 12.976618705035971, "step": 14430 }, { "epoch": 12.985611510791367, "grad_norm": 0.1918719857931137, "learning_rate": 9.038758378164148e-05, "loss": 0.0141, "step": 14440 }, { "action_loss": 0.011604671366512775, "epoch": 12.985611510791367, "step": 14440 }, { "epoch": 12.994604316546763, "grad_norm": 0.24416309595108032, "learning_rate": 9.037133167653691e-05, "loss": 0.0091, "step": 14450 }, { "action_loss": 0.013819555751979351, "epoch": 12.994604316546763, "step": 14450 }, { "epoch": 13.003597122302159, "grad_norm": 0.17111288011074066, "learning_rate": 9.035506730768771e-05, "loss": 0.0089, "step": 14460 }, { "action_loss": 0.0052556139416992664, "epoch": 13.003597122302159, "step": 14460 }, { "epoch": 13.012589928057555, "grad_norm": 0.2296372950077057, "learning_rate": 9.033879068003458e-05, "loss": 0.0102, "step": 14470 }, { "action_loss": 0.016311803832650185, "epoch": 13.012589928057555, "step": 14470 }, { "epoch": 13.02158273381295, "grad_norm": 0.13268013298511505, "learning_rate": 9.032250179852193e-05, "loss": 0.0109, "step": 14480 }, { "action_loss": 0.011990183033049107, "epoch": 13.02158273381295, "step": 14480 }, { "epoch": 13.030575539568344, "grad_norm": 0.12153936177492142, "learning_rate": 9.030620066809787e-05, "loss": 0.0088, "step": 14490 }, { "action_loss": 0.006126588676124811, "epoch": 13.030575539568344, "step": 14490 }, { "epoch": 13.03956834532374, "grad_norm": 0.271782785654068, "learning_rate": 9.028988729371428e-05, "loss": 0.0101, "step": 14500 }, { "action_loss": 0.014023877680301666, "epoch": 13.03956834532374, "step": 14500 }, { "epoch": 13.048561151079136, "grad_norm": 0.16505476832389832, "learning_rate": 9.027356168032673e-05, "loss": 0.0104, "step": 14510 }, { "action_loss": 0.005434045102447271, "epoch": 13.048561151079136, "step": 14510 }, { "epoch": 13.057553956834532, "grad_norm": 0.23306868970394135, "learning_rate": 9.02572238328945e-05, "loss": 0.0118, "step": 14520 }, { "action_loss": 0.008070726878941059, "epoch": 13.057553956834532, "step": 14520 }, { "epoch": 13.066546762589928, "grad_norm": 0.2217799872159958, "learning_rate": 9.02408737563806e-05, "loss": 0.0114, "step": 14530 }, { "action_loss": 0.006348100956529379, "epoch": 13.066546762589928, "step": 14530 }, { "epoch": 13.075539568345324, "grad_norm": 0.6270132660865784, "learning_rate": 9.022451145575174e-05, "loss": 0.0116, "step": 14540 }, { "action_loss": 0.006792196538299322, "epoch": 13.075539568345324, "step": 14540 }, { "epoch": 13.08453237410072, "grad_norm": 0.22792796790599823, "learning_rate": 9.02081369359784e-05, "loss": 0.0103, "step": 14550 }, { "action_loss": 0.008327766321599483, "epoch": 13.08453237410072, "step": 14550 }, { "epoch": 13.093525179856115, "grad_norm": 0.23103047907352448, "learning_rate": 9.019175020203465e-05, "loss": 0.0092, "step": 14560 }, { "action_loss": 0.006858964916318655, "epoch": 13.093525179856115, "step": 14560 }, { "epoch": 13.102517985611511, "grad_norm": 0.21650712192058563, "learning_rate": 9.017535125889842e-05, "loss": 0.01, "step": 14570 }, { "action_loss": 0.022163867950439453, "epoch": 13.102517985611511, "step": 14570 }, { "epoch": 13.111510791366907, "grad_norm": 0.1898585706949234, "learning_rate": 9.015894011155124e-05, "loss": 0.0134, "step": 14580 }, { "action_loss": 0.016699522733688354, "epoch": 13.111510791366907, "step": 14580 }, { "epoch": 13.120503597122303, "grad_norm": 0.2547851800918579, "learning_rate": 9.014251676497838e-05, "loss": 0.0158, "step": 14590 }, { "action_loss": 0.006801715586334467, "epoch": 13.120503597122303, "step": 14590 }, { "epoch": 13.129496402877697, "grad_norm": 0.15850858390331268, "learning_rate": 9.012608122416884e-05, "loss": 0.0091, "step": 14600 }, { "action_loss": 0.00610365578904748, "epoch": 13.129496402877697, "step": 14600 }, { "epoch": 13.138489208633093, "grad_norm": 0.23415927588939667, "learning_rate": 9.010963349411529e-05, "loss": 0.0133, "step": 14610 }, { "action_loss": 0.019365357235074043, "epoch": 13.138489208633093, "step": 14610 }, { "epoch": 13.147482014388489, "grad_norm": 0.17642714083194733, "learning_rate": 9.00931735798141e-05, "loss": 0.0133, "step": 14620 }, { "action_loss": 0.006572484504431486, "epoch": 13.147482014388489, "step": 14620 }, { "epoch": 13.156474820143885, "grad_norm": 0.21715454757213593, "learning_rate": 9.00767014862654e-05, "loss": 0.0086, "step": 14630 }, { "action_loss": 0.006429564207792282, "epoch": 13.156474820143885, "step": 14630 }, { "epoch": 13.16546762589928, "grad_norm": 0.2437906414270401, "learning_rate": 9.006021721847295e-05, "loss": 0.0096, "step": 14640 }, { "action_loss": 0.013513305224478245, "epoch": 13.16546762589928, "step": 14640 }, { "epoch": 13.174460431654676, "grad_norm": 0.32988226413726807, "learning_rate": 9.004372078144423e-05, "loss": 0.0125, "step": 14650 }, { "action_loss": 0.018483279272913933, "epoch": 13.174460431654676, "step": 14650 }, { "epoch": 13.183453237410072, "grad_norm": 0.23820607364177704, "learning_rate": 9.002721218019043e-05, "loss": 0.0131, "step": 14660 }, { "action_loss": 0.006969983223825693, "epoch": 13.183453237410072, "step": 14660 }, { "epoch": 13.192446043165468, "grad_norm": 0.16850216686725616, "learning_rate": 9.001069141972642e-05, "loss": 0.0085, "step": 14670 }, { "action_loss": 0.006680784281343222, "epoch": 13.192446043165468, "step": 14670 }, { "epoch": 13.201438848920864, "grad_norm": 0.28216108679771423, "learning_rate": 8.99941585050708e-05, "loss": 0.0128, "step": 14680 }, { "action_loss": 0.010454983450472355, "epoch": 13.201438848920864, "step": 14680 }, { "epoch": 13.21043165467626, "grad_norm": 0.19718502461910248, "learning_rate": 8.997761344124578e-05, "loss": 0.0097, "step": 14690 }, { "action_loss": 0.004636302124708891, "epoch": 13.21043165467626, "step": 14690 }, { "epoch": 13.219424460431656, "grad_norm": 0.18945366144180298, "learning_rate": 8.996105623327737e-05, "loss": 0.0084, "step": 14700 }, { "action_loss": 0.006189648527652025, "epoch": 13.219424460431656, "step": 14700 }, { "epoch": 13.22841726618705, "grad_norm": 0.12576007843017578, "learning_rate": 8.994448688619517e-05, "loss": 0.0096, "step": 14710 }, { "action_loss": 0.006218239665031433, "epoch": 13.22841726618705, "step": 14710 }, { "epoch": 13.237410071942445, "grad_norm": 0.19900667667388916, "learning_rate": 8.992790540503253e-05, "loss": 0.0111, "step": 14720 }, { "action_loss": 0.007311702240258455, "epoch": 13.237410071942445, "step": 14720 }, { "epoch": 13.246402877697841, "grad_norm": 0.190497025847435, "learning_rate": 8.991131179482648e-05, "loss": 0.0088, "step": 14730 }, { "action_loss": 0.012665134854614735, "epoch": 13.246402877697841, "step": 14730 }, { "epoch": 13.255395683453237, "grad_norm": 0.24936853349208832, "learning_rate": 8.989470606061768e-05, "loss": 0.011, "step": 14740 }, { "action_loss": 0.01177298929542303, "epoch": 13.255395683453237, "step": 14740 }, { "epoch": 13.264388489208633, "grad_norm": 0.285243958234787, "learning_rate": 8.987808820745056e-05, "loss": 0.0146, "step": 14750 }, { "action_loss": 0.011541238985955715, "epoch": 13.264388489208633, "step": 14750 }, { "epoch": 13.273381294964029, "grad_norm": 0.249977707862854, "learning_rate": 8.986145824037315e-05, "loss": 0.0102, "step": 14760 }, { "action_loss": 0.019903169944882393, "epoch": 13.273381294964029, "step": 14760 }, { "epoch": 13.282374100719425, "grad_norm": 0.1743282675743103, "learning_rate": 8.984481616443721e-05, "loss": 0.0165, "step": 14770 }, { "action_loss": 0.00939895398914814, "epoch": 13.282374100719425, "step": 14770 }, { "epoch": 13.29136690647482, "grad_norm": 0.2776021659374237, "learning_rate": 8.982816198469815e-05, "loss": 0.01, "step": 14780 }, { "action_loss": 0.0066107227467000484, "epoch": 13.29136690647482, "step": 14780 }, { "epoch": 13.300359712230216, "grad_norm": 0.17749732732772827, "learning_rate": 8.98114957062151e-05, "loss": 0.0128, "step": 14790 }, { "action_loss": 0.006274792831391096, "epoch": 13.300359712230216, "step": 14790 }, { "epoch": 13.309352517985612, "grad_norm": 0.19573774933815002, "learning_rate": 8.97948173340508e-05, "loss": 0.0085, "step": 14800 }, { "action_loss": 0.009219297207891941, "epoch": 13.309352517985612, "step": 14800 }, { "epoch": 13.318345323741006, "grad_norm": 0.22367003560066223, "learning_rate": 8.977812687327172e-05, "loss": 0.0079, "step": 14810 }, { "action_loss": 0.024293014779686928, "epoch": 13.318345323741006, "step": 14810 }, { "epoch": 13.327338129496402, "grad_norm": 0.294828325510025, "learning_rate": 8.976142432894798e-05, "loss": 0.0104, "step": 14820 }, { "action_loss": 0.008657276630401611, "epoch": 13.327338129496402, "step": 14820 }, { "epoch": 13.336330935251798, "grad_norm": 0.15719756484031677, "learning_rate": 8.974470970615336e-05, "loss": 0.0129, "step": 14830 }, { "action_loss": 0.02064882218837738, "epoch": 13.336330935251798, "step": 14830 }, { "epoch": 13.345323741007194, "grad_norm": 0.21846450865268707, "learning_rate": 8.972798300996534e-05, "loss": 0.0113, "step": 14840 }, { "action_loss": 0.005648563150316477, "epoch": 13.345323741007194, "step": 14840 }, { "epoch": 13.35431654676259, "grad_norm": 0.23717983067035675, "learning_rate": 8.971124424546504e-05, "loss": 0.0094, "step": 14850 }, { "action_loss": 0.008182740770280361, "epoch": 13.35431654676259, "step": 14850 }, { "epoch": 13.363309352517986, "grad_norm": 0.21348953247070312, "learning_rate": 8.969449341773724e-05, "loss": 0.0128, "step": 14860 }, { "action_loss": 0.011304083280265331, "epoch": 13.363309352517986, "step": 14860 }, { "epoch": 13.372302158273381, "grad_norm": 0.17157262563705444, "learning_rate": 8.967773053187042e-05, "loss": 0.0163, "step": 14870 }, { "action_loss": 0.013841983862221241, "epoch": 13.372302158273381, "step": 14870 }, { "epoch": 13.381294964028777, "grad_norm": 0.17953577637672424, "learning_rate": 8.966095559295668e-05, "loss": 0.0101, "step": 14880 }, { "action_loss": 0.006487491074949503, "epoch": 13.381294964028777, "step": 14880 }, { "epoch": 13.390287769784173, "grad_norm": 0.23035334050655365, "learning_rate": 8.964416860609184e-05, "loss": 0.0119, "step": 14890 }, { "action_loss": 0.00620088167488575, "epoch": 13.390287769784173, "step": 14890 }, { "epoch": 13.399280575539569, "grad_norm": 0.27289438247680664, "learning_rate": 8.962736957637532e-05, "loss": 0.0087, "step": 14900 }, { "action_loss": 0.0029364535585045815, "epoch": 13.399280575539569, "step": 14900 }, { "epoch": 13.408273381294965, "grad_norm": 0.1751924306154251, "learning_rate": 8.96105585089102e-05, "loss": 0.0074, "step": 14910 }, { "action_loss": 0.027226844802498817, "epoch": 13.408273381294965, "step": 14910 }, { "epoch": 13.417266187050359, "grad_norm": 0.18615563213825226, "learning_rate": 8.959373540880329e-05, "loss": 0.0156, "step": 14920 }, { "action_loss": 0.006610570941120386, "epoch": 13.417266187050359, "step": 14920 }, { "epoch": 13.426258992805755, "grad_norm": 0.17929449677467346, "learning_rate": 8.957690028116495e-05, "loss": 0.0104, "step": 14930 }, { "action_loss": 0.006237198133021593, "epoch": 13.426258992805755, "step": 14930 }, { "epoch": 13.43525179856115, "grad_norm": 0.18619203567504883, "learning_rate": 8.956005313110928e-05, "loss": 0.0098, "step": 14940 }, { "action_loss": 0.008099451661109924, "epoch": 13.43525179856115, "step": 14940 }, { "epoch": 13.444244604316546, "grad_norm": 0.30862438678741455, "learning_rate": 8.9543193963754e-05, "loss": 0.0116, "step": 14950 }, { "action_loss": 0.009838483296334743, "epoch": 13.444244604316546, "step": 14950 }, { "epoch": 13.453237410071942, "grad_norm": 0.2531769573688507, "learning_rate": 8.952632278422048e-05, "loss": 0.0095, "step": 14960 }, { "action_loss": 0.008660838007926941, "epoch": 13.453237410071942, "step": 14960 }, { "epoch": 13.462230215827338, "grad_norm": 0.19345510005950928, "learning_rate": 8.95094395976337e-05, "loss": 0.0121, "step": 14970 }, { "action_loss": 0.011525381356477737, "epoch": 13.462230215827338, "step": 14970 }, { "epoch": 13.471223021582734, "grad_norm": 0.17231369018554688, "learning_rate": 8.949254440912239e-05, "loss": 0.0102, "step": 14980 }, { "action_loss": 0.010068506002426147, "epoch": 13.471223021582734, "step": 14980 }, { "epoch": 13.48021582733813, "grad_norm": 0.2594931125640869, "learning_rate": 8.94756372238188e-05, "loss": 0.0109, "step": 14990 }, { "action_loss": 0.010173849761486053, "epoch": 13.48021582733813, "step": 14990 }, { "epoch": 13.489208633093526, "grad_norm": 0.23685236275196075, "learning_rate": 8.945871804685892e-05, "loss": 0.0102, "step": 15000 }, { "action_loss": 0.010093546472489834, "epoch": 13.489208633093526, "step": 15000 }, { "epoch": 13.498201438848922, "grad_norm": 0.22111020982265472, "learning_rate": 8.944178688338236e-05, "loss": 0.0089, "step": 15010 }, { "action_loss": 0.019468406215310097, "epoch": 13.498201438848922, "step": 15010 }, { "epoch": 13.507194244604317, "grad_norm": 0.2556350529193878, "learning_rate": 8.942484373853233e-05, "loss": 0.0083, "step": 15020 }, { "action_loss": 0.0048954603262245655, "epoch": 13.507194244604317, "step": 15020 }, { "epoch": 13.516187050359711, "grad_norm": 0.19523903727531433, "learning_rate": 8.940788861745572e-05, "loss": 0.0143, "step": 15030 }, { "action_loss": 0.005887482315301895, "epoch": 13.516187050359711, "step": 15030 }, { "epoch": 13.525179856115107, "grad_norm": 0.2501077353954315, "learning_rate": 8.939092152530308e-05, "loss": 0.0111, "step": 15040 }, { "action_loss": 0.004726849962025881, "epoch": 13.525179856115107, "step": 15040 }, { "epoch": 13.534172661870503, "grad_norm": 0.24875408411026, "learning_rate": 8.937394246722853e-05, "loss": 0.008, "step": 15050 }, { "action_loss": 0.0169881209731102, "epoch": 13.534172661870503, "step": 15050 }, { "epoch": 13.543165467625899, "grad_norm": 0.19346854090690613, "learning_rate": 8.935695144838984e-05, "loss": 0.0146, "step": 15060 }, { "action_loss": 0.012566882185637951, "epoch": 13.543165467625899, "step": 15060 }, { "epoch": 13.552158273381295, "grad_norm": 0.22155563533306122, "learning_rate": 8.933994847394849e-05, "loss": 0.0103, "step": 15070 }, { "action_loss": 0.008096255362033844, "epoch": 13.552158273381295, "step": 15070 }, { "epoch": 13.56115107913669, "grad_norm": 0.19473329186439514, "learning_rate": 8.932293354906949e-05, "loss": 0.0081, "step": 15080 }, { "action_loss": 0.009537707082927227, "epoch": 13.56115107913669, "step": 15080 }, { "epoch": 13.570143884892087, "grad_norm": 0.2043183296918869, "learning_rate": 8.930590667892153e-05, "loss": 0.0099, "step": 15090 }, { "action_loss": 0.006654616445302963, "epoch": 13.570143884892087, "step": 15090 }, { "epoch": 13.579136690647482, "grad_norm": 0.22634011507034302, "learning_rate": 8.928886786867696e-05, "loss": 0.0097, "step": 15100 }, { "action_loss": 0.011737977154552937, "epoch": 13.579136690647482, "step": 15100 }, { "epoch": 13.588129496402878, "grad_norm": 0.13515286147594452, "learning_rate": 8.927181712351168e-05, "loss": 0.0093, "step": 15110 }, { "action_loss": 0.00852353498339653, "epoch": 13.588129496402878, "step": 15110 }, { "epoch": 13.597122302158274, "grad_norm": 0.21553152799606323, "learning_rate": 8.925475444860527e-05, "loss": 0.01, "step": 15120 }, { "action_loss": 0.007195642683655024, "epoch": 13.597122302158274, "step": 15120 }, { "epoch": 13.60611510791367, "grad_norm": 0.26905152201652527, "learning_rate": 8.923767984914092e-05, "loss": 0.007, "step": 15130 }, { "action_loss": 0.005144788417965174, "epoch": 13.60611510791367, "step": 15130 }, { "epoch": 13.615107913669064, "grad_norm": 0.2669892907142639, "learning_rate": 8.922059333030545e-05, "loss": 0.0141, "step": 15140 }, { "action_loss": 0.003425148082897067, "epoch": 13.615107913669064, "step": 15140 }, { "epoch": 13.62410071942446, "grad_norm": 0.277300089597702, "learning_rate": 8.920349489728928e-05, "loss": 0.0094, "step": 15150 }, { "action_loss": 0.006604915950447321, "epoch": 13.62410071942446, "step": 15150 }, { "epoch": 13.633093525179856, "grad_norm": 0.18756797909736633, "learning_rate": 8.918638455528646e-05, "loss": 0.0132, "step": 15160 }, { "action_loss": 0.0061837309040129185, "epoch": 13.633093525179856, "step": 15160 }, { "epoch": 13.642086330935252, "grad_norm": 0.2183966040611267, "learning_rate": 8.916926230949468e-05, "loss": 0.0103, "step": 15170 }, { "action_loss": 0.00824018195271492, "epoch": 13.642086330935252, "step": 15170 }, { "epoch": 13.651079136690647, "grad_norm": 0.222181499004364, "learning_rate": 8.915212816511522e-05, "loss": 0.0109, "step": 15180 }, { "action_loss": 0.004725402221083641, "epoch": 13.651079136690647, "step": 15180 }, { "epoch": 13.660071942446043, "grad_norm": 0.2601371705532074, "learning_rate": 8.913498212735296e-05, "loss": 0.0128, "step": 15190 }, { "action_loss": 0.0160373505204916, "epoch": 13.660071942446043, "step": 15190 }, { "epoch": 13.66906474820144, "grad_norm": 0.18498629331588745, "learning_rate": 8.911782420141643e-05, "loss": 0.0134, "step": 15200 }, { "action_loss": 0.005528612527996302, "epoch": 13.66906474820144, "step": 15200 }, { "epoch": 13.678057553956835, "grad_norm": 0.24098651111125946, "learning_rate": 8.910065439251775e-05, "loss": 0.009, "step": 15210 }, { "action_loss": 0.006524829193949699, "epoch": 13.678057553956835, "step": 15210 }, { "epoch": 13.68705035971223, "grad_norm": 0.2607560157775879, "learning_rate": 8.908347270587268e-05, "loss": 0.0108, "step": 15220 }, { "action_loss": 0.003674806095659733, "epoch": 13.68705035971223, "step": 15220 }, { "epoch": 13.696043165467627, "grad_norm": 0.17578767240047455, "learning_rate": 8.906627914670054e-05, "loss": 0.0096, "step": 15230 }, { "action_loss": 0.009686210192739964, "epoch": 13.696043165467627, "step": 15230 }, { "epoch": 13.70503597122302, "grad_norm": 0.13121749460697174, "learning_rate": 8.904907372022427e-05, "loss": 0.0102, "step": 15240 }, { "action_loss": 0.005869031418114901, "epoch": 13.70503597122302, "step": 15240 }, { "epoch": 13.714028776978417, "grad_norm": 0.29902753233909607, "learning_rate": 8.903185643167042e-05, "loss": 0.0106, "step": 15250 }, { "action_loss": 0.003971523139625788, "epoch": 13.714028776978417, "step": 15250 }, { "epoch": 13.723021582733812, "grad_norm": 0.18690557777881622, "learning_rate": 8.901462728626919e-05, "loss": 0.0093, "step": 15260 }, { "action_loss": 0.015371008776128292, "epoch": 13.723021582733812, "step": 15260 }, { "epoch": 13.732014388489208, "grad_norm": 0.26054880023002625, "learning_rate": 8.899738628925429e-05, "loss": 0.0116, "step": 15270 }, { "action_loss": 0.008001143112778664, "epoch": 13.732014388489208, "step": 15270 }, { "epoch": 13.741007194244604, "grad_norm": 0.20228534936904907, "learning_rate": 8.898013344586312e-05, "loss": 0.0088, "step": 15280 }, { "action_loss": 0.041941434144973755, "epoch": 13.741007194244604, "step": 15280 }, { "epoch": 13.75, "grad_norm": 0.1628432422876358, "learning_rate": 8.896286876133661e-05, "loss": 0.016, "step": 15290 }, { "action_loss": 0.008327441290020943, "epoch": 13.75, "step": 15290 }, { "epoch": 13.758992805755396, "grad_norm": 0.2703859210014343, "learning_rate": 8.894559224091933e-05, "loss": 0.0101, "step": 15300 }, { "action_loss": 0.019162284210324287, "epoch": 13.758992805755396, "step": 15300 }, { "epoch": 13.767985611510792, "grad_norm": 0.1710381805896759, "learning_rate": 8.892830388985942e-05, "loss": 0.0125, "step": 15310 }, { "action_loss": 0.006434546783566475, "epoch": 13.767985611510792, "step": 15310 }, { "epoch": 13.776978417266188, "grad_norm": 0.16282899677753448, "learning_rate": 8.891100371340864e-05, "loss": 0.0084, "step": 15320 }, { "action_loss": 0.017453685402870178, "epoch": 13.776978417266188, "step": 15320 }, { "epoch": 13.785971223021583, "grad_norm": 0.18066111207008362, "learning_rate": 8.889369171682231e-05, "loss": 0.0138, "step": 15330 }, { "action_loss": 0.006083623971790075, "epoch": 13.785971223021583, "step": 15330 }, { "epoch": 13.79496402877698, "grad_norm": 0.14879001677036285, "learning_rate": 8.887636790535936e-05, "loss": 0.0087, "step": 15340 }, { "action_loss": 0.005037772003561258, "epoch": 13.79496402877698, "step": 15340 }, { "epoch": 13.803956834532373, "grad_norm": 0.16442808508872986, "learning_rate": 8.885903228428231e-05, "loss": 0.0089, "step": 15350 }, { "action_loss": 0.009699915535748005, "epoch": 13.803956834532373, "step": 15350 }, { "epoch": 13.81294964028777, "grad_norm": 0.2829403877258301, "learning_rate": 8.884168485885727e-05, "loss": 0.0102, "step": 15360 }, { "action_loss": 0.006376301869750023, "epoch": 13.81294964028777, "step": 15360 }, { "epoch": 13.821942446043165, "grad_norm": 0.18104682862758636, "learning_rate": 8.882432563435393e-05, "loss": 0.0106, "step": 15370 }, { "action_loss": 0.009042232297360897, "epoch": 13.821942446043165, "step": 15370 }, { "epoch": 13.83093525179856, "grad_norm": 0.1780705451965332, "learning_rate": 8.880695461604556e-05, "loss": 0.0085, "step": 15380 }, { "action_loss": 0.003148149698972702, "epoch": 13.83093525179856, "step": 15380 }, { "epoch": 13.839928057553957, "grad_norm": 0.19709886610507965, "learning_rate": 8.878957180920901e-05, "loss": 0.0108, "step": 15390 }, { "action_loss": 0.004016393795609474, "epoch": 13.839928057553957, "step": 15390 }, { "epoch": 13.848920863309353, "grad_norm": 0.16950419545173645, "learning_rate": 8.877217721912473e-05, "loss": 0.0105, "step": 15400 }, { "action_loss": 0.004445131402462721, "epoch": 13.848920863309353, "step": 15400 }, { "epoch": 13.857913669064748, "grad_norm": 0.20551595091819763, "learning_rate": 8.875477085107673e-05, "loss": 0.0094, "step": 15410 }, { "action_loss": 0.006220432464033365, "epoch": 13.857913669064748, "step": 15410 }, { "epoch": 13.866906474820144, "grad_norm": 0.24491986632347107, "learning_rate": 8.87373527103526e-05, "loss": 0.0123, "step": 15420 }, { "action_loss": 0.0052312458865344524, "epoch": 13.866906474820144, "step": 15420 }, { "epoch": 13.87589928057554, "grad_norm": 0.23792380094528198, "learning_rate": 8.871992280224353e-05, "loss": 0.0094, "step": 15430 }, { "action_loss": 0.009299023076891899, "epoch": 13.87589928057554, "step": 15430 }, { "epoch": 13.884892086330936, "grad_norm": 0.2011430859565735, "learning_rate": 8.870248113204422e-05, "loss": 0.0092, "step": 15440 }, { "action_loss": 0.009583408012986183, "epoch": 13.884892086330936, "step": 15440 }, { "epoch": 13.89388489208633, "grad_norm": 0.2748376727104187, "learning_rate": 8.868502770505306e-05, "loss": 0.0134, "step": 15450 }, { "action_loss": 0.014187309890985489, "epoch": 13.89388489208633, "step": 15450 }, { "epoch": 13.902877697841726, "grad_norm": 0.13685598969459534, "learning_rate": 8.86675625265719e-05, "loss": 0.0135, "step": 15460 }, { "action_loss": 0.008949688635766506, "epoch": 13.902877697841726, "step": 15460 }, { "epoch": 13.911870503597122, "grad_norm": 0.3290807008743286, "learning_rate": 8.865008560190618e-05, "loss": 0.0091, "step": 15470 }, { "action_loss": 0.03911004588007927, "epoch": 13.911870503597122, "step": 15470 }, { "epoch": 13.920863309352518, "grad_norm": 0.18667295575141907, "learning_rate": 8.863259693636496e-05, "loss": 0.0121, "step": 15480 }, { "action_loss": 0.015702800825238228, "epoch": 13.920863309352518, "step": 15480 }, { "epoch": 13.929856115107913, "grad_norm": 0.22879983484745026, "learning_rate": 8.861509653526083e-05, "loss": 0.0108, "step": 15490 }, { "action_loss": 0.009688816033303738, "epoch": 13.929856115107913, "step": 15490 }, { "epoch": 13.93884892086331, "grad_norm": 0.21944062411785126, "learning_rate": 8.859758440390993e-05, "loss": 0.0103, "step": 15500 }, { "action_loss": 0.009658467955887318, "epoch": 13.93884892086331, "step": 15500 }, { "epoch": 13.947841726618705, "grad_norm": 0.22608374059200287, "learning_rate": 8.858006054763202e-05, "loss": 0.009, "step": 15510 }, { "action_loss": 0.01034872978925705, "epoch": 13.947841726618705, "step": 15510 }, { "epoch": 13.956834532374101, "grad_norm": 0.1610858291387558, "learning_rate": 8.856252497175035e-05, "loss": 0.0111, "step": 15520 }, { "action_loss": 0.0049360948614776134, "epoch": 13.956834532374101, "step": 15520 }, { "epoch": 13.965827338129497, "grad_norm": 0.12734539806842804, "learning_rate": 8.854497768159178e-05, "loss": 0.0066, "step": 15530 }, { "action_loss": 0.010796758346259594, "epoch": 13.965827338129497, "step": 15530 }, { "epoch": 13.974820143884893, "grad_norm": 0.24670104682445526, "learning_rate": 8.852741868248671e-05, "loss": 0.0082, "step": 15540 }, { "action_loss": 0.010541598312556744, "epoch": 13.974820143884893, "step": 15540 }, { "epoch": 13.983812949640289, "grad_norm": 0.22507509589195251, "learning_rate": 8.85098479797691e-05, "loss": 0.0099, "step": 15550 }, { "action_loss": 0.007291846442967653, "epoch": 13.983812949640289, "step": 15550 }, { "epoch": 13.992805755395683, "grad_norm": 0.3104310929775238, "learning_rate": 8.849226557877646e-05, "loss": 0.0096, "step": 15560 }, { "action_loss": 0.006667677313089371, "epoch": 13.992805755395683, "step": 15560 }, { "epoch": 14.001798561151078, "grad_norm": 0.19322626292705536, "learning_rate": 8.84746714848499e-05, "loss": 0.0097, "step": 15570 }, { "action_loss": 0.0060628303326666355, "epoch": 14.001798561151078, "step": 15570 }, { "epoch": 14.010791366906474, "grad_norm": 0.23645132780075073, "learning_rate": 8.845706570333397e-05, "loss": 0.0086, "step": 15580 }, { "action_loss": 0.009021353907883167, "epoch": 14.010791366906474, "step": 15580 }, { "epoch": 14.01978417266187, "grad_norm": 0.25028732419013977, "learning_rate": 8.84394482395769e-05, "loss": 0.0129, "step": 15590 }, { "action_loss": 0.0032961114775389433, "epoch": 14.01978417266187, "step": 15590 }, { "epoch": 14.028776978417266, "grad_norm": 0.18850433826446533, "learning_rate": 8.842181909893038e-05, "loss": 0.0081, "step": 15600 }, { "action_loss": 0.006954707205295563, "epoch": 14.028776978417266, "step": 15600 }, { "epoch": 14.037769784172662, "grad_norm": 0.14318977296352386, "learning_rate": 8.840417828674969e-05, "loss": 0.0088, "step": 15610 }, { "action_loss": 0.005288396030664444, "epoch": 14.037769784172662, "step": 15610 }, { "epoch": 14.046762589928058, "grad_norm": 0.20923607051372528, "learning_rate": 8.838652580839364e-05, "loss": 0.0122, "step": 15620 }, { "action_loss": 0.009235751815140247, "epoch": 14.046762589928058, "step": 15620 }, { "epoch": 14.055755395683454, "grad_norm": 0.22431035339832306, "learning_rate": 8.836886166922458e-05, "loss": 0.0171, "step": 15630 }, { "action_loss": 0.004769779741764069, "epoch": 14.055755395683454, "step": 15630 }, { "epoch": 14.06474820143885, "grad_norm": 0.25215232372283936, "learning_rate": 8.835118587460844e-05, "loss": 0.0103, "step": 15640 }, { "action_loss": 0.014890889637172222, "epoch": 14.06474820143885, "step": 15640 }, { "epoch": 14.073741007194245, "grad_norm": 0.25303465127944946, "learning_rate": 8.83334984299146e-05, "loss": 0.0148, "step": 15650 }, { "action_loss": 0.01586548052728176, "epoch": 14.073741007194245, "step": 15650 }, { "epoch": 14.082733812949641, "grad_norm": 0.2879065275192261, "learning_rate": 8.83157993405161e-05, "loss": 0.0172, "step": 15660 }, { "action_loss": 0.00528493570163846, "epoch": 14.082733812949641, "step": 15660 }, { "epoch": 14.091726618705035, "grad_norm": 0.2805195152759552, "learning_rate": 8.829808861178943e-05, "loss": 0.0107, "step": 15670 }, { "action_loss": 0.00847789365798235, "epoch": 14.091726618705035, "step": 15670 }, { "epoch": 14.100719424460431, "grad_norm": 0.2566278874874115, "learning_rate": 8.828036624911464e-05, "loss": 0.0143, "step": 15680 }, { "action_loss": 0.009280174970626831, "epoch": 14.100719424460431, "step": 15680 }, { "epoch": 14.109712230215827, "grad_norm": 0.2629733979701996, "learning_rate": 8.826263225787532e-05, "loss": 0.0109, "step": 15690 }, { "action_loss": 0.0053116981871426105, "epoch": 14.109712230215827, "step": 15690 }, { "epoch": 14.118705035971223, "grad_norm": 0.21318110823631287, "learning_rate": 8.824488664345858e-05, "loss": 0.01, "step": 15700 }, { "action_loss": 0.01689392328262329, "epoch": 14.118705035971223, "step": 15700 }, { "epoch": 14.127697841726619, "grad_norm": 0.35238876938819885, "learning_rate": 8.822712941125508e-05, "loss": 0.0103, "step": 15710 }, { "action_loss": 0.02162989415228367, "epoch": 14.127697841726619, "step": 15710 }, { "epoch": 14.136690647482014, "grad_norm": 0.16035287082195282, "learning_rate": 8.820936056665898e-05, "loss": 0.011, "step": 15720 }, { "action_loss": 0.011643536388874054, "epoch": 14.136690647482014, "step": 15720 }, { "epoch": 14.14568345323741, "grad_norm": 0.18076346814632416, "learning_rate": 8.819158011506801e-05, "loss": 0.0113, "step": 15730 }, { "action_loss": 0.0038051072042435408, "epoch": 14.14568345323741, "step": 15730 }, { "epoch": 14.154676258992806, "grad_norm": 0.20267944037914276, "learning_rate": 8.81737880618834e-05, "loss": 0.0092, "step": 15740 }, { "action_loss": 0.009753101505339146, "epoch": 14.154676258992806, "step": 15740 }, { "epoch": 14.163669064748202, "grad_norm": 0.20396959781646729, "learning_rate": 8.815598441250987e-05, "loss": 0.0124, "step": 15750 }, { "action_loss": 0.004117417149245739, "epoch": 14.163669064748202, "step": 15750 }, { "epoch": 14.172661870503598, "grad_norm": 0.24128437042236328, "learning_rate": 8.813816917235576e-05, "loss": 0.0098, "step": 15760 }, { "action_loss": 0.01274713221937418, "epoch": 14.172661870503598, "step": 15760 }, { "epoch": 14.181654676258994, "grad_norm": 0.23625800013542175, "learning_rate": 8.812034234683282e-05, "loss": 0.0124, "step": 15770 }, { "action_loss": 0.009679482318460941, "epoch": 14.181654676258994, "step": 15770 }, { "epoch": 14.190647482014388, "grad_norm": 0.288845956325531, "learning_rate": 8.810250394135637e-05, "loss": 0.0108, "step": 15780 }, { "action_loss": 0.01905333437025547, "epoch": 14.190647482014388, "step": 15780 }, { "epoch": 14.199640287769784, "grad_norm": 0.2414684146642685, "learning_rate": 8.808465396134529e-05, "loss": 0.0115, "step": 15790 }, { "action_loss": 0.007342397700995207, "epoch": 14.199640287769784, "step": 15790 }, { "epoch": 14.20863309352518, "grad_norm": 0.2542566955089569, "learning_rate": 8.806679241222189e-05, "loss": 0.0092, "step": 15800 }, { "action_loss": 0.005552105605602264, "epoch": 14.20863309352518, "step": 15800 }, { "epoch": 14.217625899280575, "grad_norm": 0.3933878540992737, "learning_rate": 8.804891929941203e-05, "loss": 0.0137, "step": 15810 }, { "action_loss": 0.011762566864490509, "epoch": 14.217625899280575, "step": 15810 }, { "epoch": 14.226618705035971, "grad_norm": 0.1898978352546692, "learning_rate": 8.803103462834514e-05, "loss": 0.0132, "step": 15820 }, { "action_loss": 0.010355427861213684, "epoch": 14.226618705035971, "step": 15820 }, { "epoch": 14.235611510791367, "grad_norm": 0.24710151553153992, "learning_rate": 8.801313840445408e-05, "loss": 0.0116, "step": 15830 }, { "action_loss": 0.007253700401633978, "epoch": 14.235611510791367, "step": 15830 }, { "epoch": 14.244604316546763, "grad_norm": 0.21159155666828156, "learning_rate": 8.799523063317524e-05, "loss": 0.0106, "step": 15840 }, { "action_loss": 0.006415786687284708, "epoch": 14.244604316546763, "step": 15840 }, { "epoch": 14.253597122302159, "grad_norm": 0.2257319688796997, "learning_rate": 8.797731131994854e-05, "loss": 0.0086, "step": 15850 }, { "action_loss": 0.015983939170837402, "epoch": 14.253597122302159, "step": 15850 }, { "epoch": 14.262589928057555, "grad_norm": 0.3192407786846161, "learning_rate": 8.795938047021739e-05, "loss": 0.0137, "step": 15860 }, { "action_loss": 0.007395199034363031, "epoch": 14.262589928057555, "step": 15860 }, { "epoch": 14.27158273381295, "grad_norm": 0.24406802654266357, "learning_rate": 8.794143808942872e-05, "loss": 0.011, "step": 15870 }, { "action_loss": 0.0144041134044528, "epoch": 14.27158273381295, "step": 15870 }, { "epoch": 14.280575539568344, "grad_norm": 0.48582732677459717, "learning_rate": 8.792348418303296e-05, "loss": 0.0125, "step": 15880 }, { "action_loss": 0.03518524393439293, "epoch": 14.280575539568344, "step": 15880 }, { "epoch": 14.28956834532374, "grad_norm": 0.3031846284866333, "learning_rate": 8.790551875648398e-05, "loss": 0.016, "step": 15890 }, { "action_loss": 0.01797819696366787, "epoch": 14.28956834532374, "step": 15890 }, { "epoch": 14.298561151079136, "grad_norm": 0.2424100637435913, "learning_rate": 8.788754181523926e-05, "loss": 0.017, "step": 15900 }, { "action_loss": 0.049281712621450424, "epoch": 14.298561151079136, "step": 15900 }, { "epoch": 14.307553956834532, "grad_norm": 0.23458759486675262, "learning_rate": 8.78695533647597e-05, "loss": 0.0155, "step": 15910 }, { "action_loss": 0.009438589215278625, "epoch": 14.307553956834532, "step": 15910 }, { "epoch": 14.316546762589928, "grad_norm": 0.2509286403656006, "learning_rate": 8.785155341050972e-05, "loss": 0.0166, "step": 15920 }, { "action_loss": 0.010044742375612259, "epoch": 14.316546762589928, "step": 15920 }, { "epoch": 14.325539568345324, "grad_norm": 0.19068565964698792, "learning_rate": 8.783354195795721e-05, "loss": 0.0112, "step": 15930 }, { "action_loss": 0.015231507830321789, "epoch": 14.325539568345324, "step": 15930 }, { "epoch": 14.33453237410072, "grad_norm": 0.28255340456962585, "learning_rate": 8.78155190125736e-05, "loss": 0.0112, "step": 15940 }, { "action_loss": 0.011446218006312847, "epoch": 14.33453237410072, "step": 15940 }, { "epoch": 14.343525179856115, "grad_norm": 0.2350977212190628, "learning_rate": 8.779748457983378e-05, "loss": 0.0162, "step": 15950 }, { "action_loss": 0.007586787920445204, "epoch": 14.343525179856115, "step": 15950 }, { "epoch": 14.352517985611511, "grad_norm": 0.21246568858623505, "learning_rate": 8.777943866521612e-05, "loss": 0.016, "step": 15960 }, { "action_loss": 0.006897843908518553, "epoch": 14.352517985611511, "step": 15960 }, { "epoch": 14.361510791366907, "grad_norm": 0.20586882531642914, "learning_rate": 8.77613812742025e-05, "loss": 0.0078, "step": 15970 }, { "action_loss": 0.015072452835738659, "epoch": 14.361510791366907, "step": 15970 }, { "epoch": 14.370503597122303, "grad_norm": 0.18809638917446136, "learning_rate": 8.774331241227829e-05, "loss": 0.0146, "step": 15980 }, { "action_loss": 0.005815151613205671, "epoch": 14.370503597122303, "step": 15980 }, { "epoch": 14.379496402877697, "grad_norm": 0.17466336488723755, "learning_rate": 8.772523208493232e-05, "loss": 0.0083, "step": 15990 }, { "action_loss": 0.009886990301311016, "epoch": 14.379496402877697, "step": 15990 }, { "epoch": 14.388489208633093, "grad_norm": 0.24061104655265808, "learning_rate": 8.770714029765692e-05, "loss": 0.0122, "step": 16000 }, { "action_loss": 0.024341508746147156, "epoch": 14.388489208633093, "step": 16000 }, { "epoch": 14.397482014388489, "grad_norm": 0.22189991176128387, "learning_rate": 8.768903705594789e-05, "loss": 0.012, "step": 16010 }, { "action_loss": 0.015531525947153568, "epoch": 14.397482014388489, "step": 16010 }, { "epoch": 14.406474820143885, "grad_norm": 0.2763811945915222, "learning_rate": 8.767092236530453e-05, "loss": 0.0138, "step": 16020 }, { "action_loss": 0.01987144909799099, "epoch": 14.406474820143885, "step": 16020 }, { "epoch": 14.41546762589928, "grad_norm": 0.2855939269065857, "learning_rate": 8.76527962312296e-05, "loss": 0.0132, "step": 16030 }, { "action_loss": 0.005301580298691988, "epoch": 14.41546762589928, "step": 16030 }, { "epoch": 14.424460431654676, "grad_norm": 0.23053398728370667, "learning_rate": 8.763465865922934e-05, "loss": 0.0093, "step": 16040 }, { "action_loss": 0.036037907004356384, "epoch": 14.424460431654676, "step": 16040 }, { "epoch": 14.433453237410072, "grad_norm": 0.3774193227291107, "learning_rate": 8.761650965481347e-05, "loss": 0.0147, "step": 16050 }, { "action_loss": 0.010776795446872711, "epoch": 14.433453237410072, "step": 16050 }, { "epoch": 14.442446043165468, "grad_norm": 0.23442310094833374, "learning_rate": 8.759834922349516e-05, "loss": 0.0103, "step": 16060 }, { "action_loss": 0.0076825241558253765, "epoch": 14.442446043165468, "step": 16060 }, { "epoch": 14.451438848920864, "grad_norm": 0.19531938433647156, "learning_rate": 8.758017737079108e-05, "loss": 0.012, "step": 16070 }, { "action_loss": 0.013743425719439983, "epoch": 14.451438848920864, "step": 16070 }, { "epoch": 14.46043165467626, "grad_norm": 0.2959880232810974, "learning_rate": 8.756199410222137e-05, "loss": 0.0089, "step": 16080 }, { "action_loss": 0.008406239561736584, "epoch": 14.46043165467626, "step": 16080 }, { "epoch": 14.469424460431656, "grad_norm": 0.21631242334842682, "learning_rate": 8.754379942330963e-05, "loss": 0.01, "step": 16090 }, { "action_loss": 0.007757499814033508, "epoch": 14.469424460431656, "step": 16090 }, { "epoch": 14.47841726618705, "grad_norm": 0.2160843163728714, "learning_rate": 8.75255933395829e-05, "loss": 0.0104, "step": 16100 }, { "action_loss": 0.008011925965547562, "epoch": 14.47841726618705, "step": 16100 }, { "epoch": 14.487410071942445, "grad_norm": 0.33324989676475525, "learning_rate": 8.750737585657171e-05, "loss": 0.0126, "step": 16110 }, { "action_loss": 0.007674610707908869, "epoch": 14.487410071942445, "step": 16110 }, { "epoch": 14.496402877697841, "grad_norm": 0.2485661506652832, "learning_rate": 8.748914697981008e-05, "loss": 0.0119, "step": 16120 }, { "action_loss": 0.01405277382582426, "epoch": 14.496402877697841, "step": 16120 }, { "epoch": 14.505395683453237, "grad_norm": 0.2686319053173065, "learning_rate": 8.747090671483542e-05, "loss": 0.0118, "step": 16130 }, { "action_loss": 0.0045008878223598, "epoch": 14.505395683453237, "step": 16130 }, { "epoch": 14.514388489208633, "grad_norm": 0.14808392524719238, "learning_rate": 8.745265506718869e-05, "loss": 0.008, "step": 16140 }, { "action_loss": 0.007504696492105722, "epoch": 14.514388489208633, "step": 16140 }, { "epoch": 14.523381294964029, "grad_norm": 0.27283939719200134, "learning_rate": 8.74343920424142e-05, "loss": 0.011, "step": 16150 }, { "action_loss": 0.011027917265892029, "epoch": 14.523381294964029, "step": 16150 }, { "epoch": 14.532374100719425, "grad_norm": 0.2137925624847412, "learning_rate": 8.741611764605982e-05, "loss": 0.0095, "step": 16160 }, { "action_loss": 0.005167327355593443, "epoch": 14.532374100719425, "step": 16160 }, { "epoch": 14.54136690647482, "grad_norm": 0.5247704982757568, "learning_rate": 8.739783188367682e-05, "loss": 0.0099, "step": 16170 }, { "action_loss": 0.005997943226248026, "epoch": 14.54136690647482, "step": 16170 }, { "epoch": 14.550359712230216, "grad_norm": 0.16892018914222717, "learning_rate": 8.737953476081991e-05, "loss": 0.0087, "step": 16180 }, { "action_loss": 0.010444454848766327, "epoch": 14.550359712230216, "step": 16180 }, { "epoch": 14.559352517985612, "grad_norm": 0.2506927251815796, "learning_rate": 8.73612262830473e-05, "loss": 0.01, "step": 16190 }, { "action_loss": 0.012290634214878082, "epoch": 14.559352517985612, "step": 16190 }, { "epoch": 14.568345323741006, "grad_norm": 0.3235742151737213, "learning_rate": 8.734290645592061e-05, "loss": 0.0108, "step": 16200 }, { "action_loss": 0.010644855909049511, "epoch": 14.568345323741006, "step": 16200 }, { "epoch": 14.577338129496402, "grad_norm": 0.23377928137779236, "learning_rate": 8.732457528500493e-05, "loss": 0.0127, "step": 16210 }, { "action_loss": 0.006487483624368906, "epoch": 14.577338129496402, "step": 16210 }, { "epoch": 14.586330935251798, "grad_norm": 0.21812710165977478, "learning_rate": 8.730623277586875e-05, "loss": 0.0114, "step": 16220 }, { "action_loss": 0.006674082484096289, "epoch": 14.586330935251798, "step": 16220 }, { "epoch": 14.595323741007194, "grad_norm": 0.17506881058216095, "learning_rate": 8.72878789340841e-05, "loss": 0.0102, "step": 16230 }, { "action_loss": 0.005503739695996046, "epoch": 14.595323741007194, "step": 16230 }, { "epoch": 14.60431654676259, "grad_norm": 0.18230471014976501, "learning_rate": 8.726951376522635e-05, "loss": 0.012, "step": 16240 }, { "action_loss": 0.010482375510036945, "epoch": 14.60431654676259, "step": 16240 }, { "epoch": 14.613309352517986, "grad_norm": 0.2898683547973633, "learning_rate": 8.725113727487435e-05, "loss": 0.0107, "step": 16250 }, { "action_loss": 0.01198925543576479, "epoch": 14.613309352517986, "step": 16250 }, { "epoch": 14.622302158273381, "grad_norm": 0.16251803934574127, "learning_rate": 8.723274946861042e-05, "loss": 0.0079, "step": 16260 }, { "action_loss": 0.004122225102037191, "epoch": 14.622302158273381, "step": 16260 }, { "epoch": 14.631294964028777, "grad_norm": 0.1989322155714035, "learning_rate": 8.721435035202026e-05, "loss": 0.0085, "step": 16270 }, { "action_loss": 0.019411325454711914, "epoch": 14.631294964028777, "step": 16270 }, { "epoch": 14.640287769784173, "grad_norm": 0.21940147876739502, "learning_rate": 8.719593993069306e-05, "loss": 0.0135, "step": 16280 }, { "action_loss": 0.010018705390393734, "epoch": 14.640287769784173, "step": 16280 }, { "epoch": 14.649280575539569, "grad_norm": 0.24296163022518158, "learning_rate": 8.717751821022139e-05, "loss": 0.0103, "step": 16290 }, { "action_loss": 0.006845859345048666, "epoch": 14.649280575539569, "step": 16290 }, { "epoch": 14.658273381294965, "grad_norm": 0.20034855604171753, "learning_rate": 8.715908519620134e-05, "loss": 0.0114, "step": 16300 }, { "action_loss": 0.014154368080198765, "epoch": 14.658273381294965, "step": 16300 }, { "epoch": 14.667266187050359, "grad_norm": 0.24003322422504425, "learning_rate": 8.71406408942323e-05, "loss": 0.0115, "step": 16310 }, { "action_loss": 0.011912505142390728, "epoch": 14.667266187050359, "step": 16310 }, { "epoch": 14.676258992805755, "grad_norm": 0.21353699266910553, "learning_rate": 8.712218530991723e-05, "loss": 0.0083, "step": 16320 }, { "action_loss": 0.013258646242320538, "epoch": 14.676258992805755, "step": 16320 }, { "epoch": 14.68525179856115, "grad_norm": 0.30736297369003296, "learning_rate": 8.710371844886241e-05, "loss": 0.0094, "step": 16330 }, { "action_loss": 0.030004078522324562, "epoch": 14.68525179856115, "step": 16330 }, { "epoch": 14.694244604316546, "grad_norm": 0.21284030377864838, "learning_rate": 8.708524031667758e-05, "loss": 0.0115, "step": 16340 }, { "action_loss": 0.02042505517601967, "epoch": 14.694244604316546, "step": 16340 }, { "epoch": 14.703237410071942, "grad_norm": 0.23439352214336395, "learning_rate": 8.706675091897592e-05, "loss": 0.0122, "step": 16350 }, { "action_loss": 0.006865268107503653, "epoch": 14.703237410071942, "step": 16350 }, { "epoch": 14.712230215827338, "grad_norm": 0.20691661536693573, "learning_rate": 8.704825026137404e-05, "loss": 0.0129, "step": 16360 }, { "action_loss": 0.018364889547228813, "epoch": 14.712230215827338, "step": 16360 }, { "epoch": 14.721223021582734, "grad_norm": 0.27207067608833313, "learning_rate": 8.702973834949192e-05, "loss": 0.0135, "step": 16370 }, { "action_loss": 0.007454417645931244, "epoch": 14.721223021582734, "step": 16370 }, { "epoch": 14.73021582733813, "grad_norm": 0.1438688039779663, "learning_rate": 8.701121518895301e-05, "loss": 0.0102, "step": 16380 }, { "action_loss": 0.015101033262908459, "epoch": 14.73021582733813, "step": 16380 }, { "epoch": 14.739208633093526, "grad_norm": 0.23916788399219513, "learning_rate": 8.699268078538414e-05, "loss": 0.0128, "step": 16390 }, { "action_loss": 0.0041299727745354176, "epoch": 14.739208633093526, "step": 16390 }, { "epoch": 14.748201438848922, "grad_norm": 0.22658413648605347, "learning_rate": 8.69741351444156e-05, "loss": 0.009, "step": 16400 }, { "action_loss": 0.009874588809907436, "epoch": 14.748201438848922, "step": 16400 }, { "epoch": 14.757194244604317, "grad_norm": 0.25240957736968994, "learning_rate": 8.695557827168101e-05, "loss": 0.0102, "step": 16410 }, { "action_loss": 0.008615595288574696, "epoch": 14.757194244604317, "step": 16410 }, { "epoch": 14.766187050359711, "grad_norm": 0.17283256351947784, "learning_rate": 8.693701017281753e-05, "loss": 0.012, "step": 16420 }, { "action_loss": 0.012754961848258972, "epoch": 14.766187050359711, "step": 16420 }, { "epoch": 14.775179856115107, "grad_norm": 0.24443376064300537, "learning_rate": 8.691843085346563e-05, "loss": 0.0132, "step": 16430 }, { "action_loss": 0.005871517583727837, "epoch": 14.775179856115107, "step": 16430 }, { "epoch": 14.784172661870503, "grad_norm": 0.18242554366588593, "learning_rate": 8.689984031926919e-05, "loss": 0.0092, "step": 16440 }, { "action_loss": 0.00581279257312417, "epoch": 14.784172661870503, "step": 16440 }, { "epoch": 14.793165467625899, "grad_norm": 0.16670478880405426, "learning_rate": 8.688123857587555e-05, "loss": 0.01, "step": 16450 }, { "action_loss": 0.004637567792087793, "epoch": 14.793165467625899, "step": 16450 }, { "epoch": 14.802158273381295, "grad_norm": 0.23623010516166687, "learning_rate": 8.686262562893544e-05, "loss": 0.012, "step": 16460 }, { "action_loss": 0.007223410066217184, "epoch": 14.802158273381295, "step": 16460 }, { "epoch": 14.81115107913669, "grad_norm": 0.14201359450817108, "learning_rate": 8.684400148410294e-05, "loss": 0.0089, "step": 16470 }, { "action_loss": 0.0035678783897310495, "epoch": 14.81115107913669, "step": 16470 }, { "epoch": 14.820143884892087, "grad_norm": 0.23028802871704102, "learning_rate": 8.682536614703562e-05, "loss": 0.0101, "step": 16480 }, { "action_loss": 0.010506014339625835, "epoch": 14.820143884892087, "step": 16480 }, { "epoch": 14.829136690647482, "grad_norm": 0.23031634092330933, "learning_rate": 8.680671962339437e-05, "loss": 0.0096, "step": 16490 }, { "action_loss": 0.008959825150668621, "epoch": 14.829136690647482, "step": 16490 }, { "epoch": 14.838129496402878, "grad_norm": 0.19084297120571136, "learning_rate": 8.678806191884352e-05, "loss": 0.0098, "step": 16500 }, { "action_loss": 0.01297452300786972, "epoch": 14.838129496402878, "step": 16500 }, { "epoch": 14.847122302158274, "grad_norm": 0.22476555407047272, "learning_rate": 8.67693930390508e-05, "loss": 0.0099, "step": 16510 }, { "action_loss": 0.012541033327579498, "epoch": 14.847122302158274, "step": 16510 }, { "epoch": 14.85611510791367, "grad_norm": 0.16831044852733612, "learning_rate": 8.67507129896873e-05, "loss": 0.0084, "step": 16520 }, { "action_loss": 0.01793571561574936, "epoch": 14.85611510791367, "step": 16520 }, { "epoch": 14.865107913669064, "grad_norm": 0.27709466218948364, "learning_rate": 8.673202177642757e-05, "loss": 0.0155, "step": 16530 }, { "action_loss": 0.01613786444067955, "epoch": 14.865107913669064, "step": 16530 }, { "epoch": 14.87410071942446, "grad_norm": 0.217535138130188, "learning_rate": 8.671331940494945e-05, "loss": 0.0171, "step": 16540 }, { "action_loss": 0.00757959857583046, "epoch": 14.87410071942446, "step": 16540 }, { "epoch": 14.883093525179856, "grad_norm": 0.19612793624401093, "learning_rate": 8.669460588093427e-05, "loss": 0.0116, "step": 16550 }, { "action_loss": 0.0090004438534379, "epoch": 14.883093525179856, "step": 16550 }, { "epoch": 14.892086330935252, "grad_norm": 0.3218361437320709, "learning_rate": 8.667588121006667e-05, "loss": 0.0103, "step": 16560 }, { "action_loss": 0.03109695203602314, "epoch": 14.892086330935252, "step": 16560 }, { "epoch": 14.901079136690647, "grad_norm": 0.2306791990995407, "learning_rate": 8.665714539803475e-05, "loss": 0.0139, "step": 16570 }, { "action_loss": 0.0066493358463048935, "epoch": 14.901079136690647, "step": 16570 }, { "epoch": 14.910071942446043, "grad_norm": 0.1881714016199112, "learning_rate": 8.663839845052993e-05, "loss": 0.008, "step": 16580 }, { "action_loss": 0.008830192498862743, "epoch": 14.910071942446043, "step": 16580 }, { "epoch": 14.91906474820144, "grad_norm": 0.3641505837440491, "learning_rate": 8.661964037324703e-05, "loss": 0.0106, "step": 16590 }, { "action_loss": 0.033625513315200806, "epoch": 14.91906474820144, "step": 16590 }, { "epoch": 14.928057553956835, "grad_norm": 0.1769459843635559, "learning_rate": 8.660087117188427e-05, "loss": 0.0079, "step": 16600 }, { "action_loss": 0.01158375758677721, "epoch": 14.928057553956835, "step": 16600 }, { "epoch": 14.93705035971223, "grad_norm": 0.2621781826019287, "learning_rate": 8.658209085214325e-05, "loss": 0.0115, "step": 16610 }, { "action_loss": 0.013469324447214603, "epoch": 14.93705035971223, "step": 16610 }, { "epoch": 14.946043165467627, "grad_norm": 0.20150762796401978, "learning_rate": 8.656329941972891e-05, "loss": 0.0176, "step": 16620 }, { "action_loss": 0.006154300179332495, "epoch": 14.946043165467627, "step": 16620 }, { "epoch": 14.95503597122302, "grad_norm": 0.16015049815177917, "learning_rate": 8.654449688034963e-05, "loss": 0.0102, "step": 16630 }, { "action_loss": 0.010725826025009155, "epoch": 14.95503597122302, "step": 16630 }, { "epoch": 14.964028776978417, "grad_norm": 0.19630274176597595, "learning_rate": 8.652568323971706e-05, "loss": 0.0117, "step": 16640 }, { "action_loss": 0.004647372290492058, "epoch": 14.964028776978417, "step": 16640 }, { "epoch": 14.973021582733812, "grad_norm": 0.29582110047340393, "learning_rate": 8.650685850354636e-05, "loss": 0.0098, "step": 16650 }, { "action_loss": 0.011551610194146633, "epoch": 14.973021582733812, "step": 16650 }, { "epoch": 14.982014388489208, "grad_norm": 0.2523828148841858, "learning_rate": 8.648802267755593e-05, "loss": 0.0119, "step": 16660 }, { "action_loss": 0.004050752148032188, "epoch": 14.982014388489208, "step": 16660 }, { "epoch": 14.991007194244604, "grad_norm": 0.25901103019714355, "learning_rate": 8.646917576746764e-05, "loss": 0.0112, "step": 16670 }, { "action_loss": 0.006382229272276163, "epoch": 14.991007194244604, "step": 16670 }, { "epoch": 15.0, "grad_norm": 0.21660549938678741, "learning_rate": 8.645031777900666e-05, "loss": 0.0092, "step": 16680 }, { "action_loss": 0.010484539903700352, "epoch": 15.0, "step": 16680 }, { "epoch": 15.008992805755396, "grad_norm": 0.2933081388473511, "learning_rate": 8.643144871790154e-05, "loss": 0.0137, "step": 16690 }, { "action_loss": 0.01024081651121378, "epoch": 15.008992805755396, "step": 16690 }, { "epoch": 15.017985611510792, "grad_norm": 0.2831355631351471, "learning_rate": 8.641256858988424e-05, "loss": 0.0098, "step": 16700 }, { "action_loss": 0.006430538836866617, "epoch": 15.017985611510792, "step": 16700 }, { "epoch": 15.026978417266188, "grad_norm": 0.2382008284330368, "learning_rate": 8.639367740069e-05, "loss": 0.0109, "step": 16710 }, { "action_loss": 0.008966697379946709, "epoch": 15.026978417266188, "step": 16710 }, { "epoch": 15.035971223021583, "grad_norm": 0.25995081663131714, "learning_rate": 8.63747751560575e-05, "loss": 0.0108, "step": 16720 }, { "action_loss": 0.0037057166919112206, "epoch": 15.035971223021583, "step": 16720 }, { "epoch": 15.04496402877698, "grad_norm": 0.23915569484233856, "learning_rate": 8.635586186172871e-05, "loss": 0.0085, "step": 16730 }, { "action_loss": 0.009666066616773605, "epoch": 15.04496402877698, "step": 16730 }, { "epoch": 15.053956834532373, "grad_norm": 0.18584656715393066, "learning_rate": 8.633693752344902e-05, "loss": 0.0087, "step": 16740 }, { "action_loss": 0.008847067132592201, "epoch": 15.053956834532373, "step": 16740 }, { "epoch": 15.06294964028777, "grad_norm": 0.2065318375825882, "learning_rate": 8.631800214696713e-05, "loss": 0.0101, "step": 16750 }, { "action_loss": 0.009491491131484509, "epoch": 15.06294964028777, "step": 16750 }, { "epoch": 15.071942446043165, "grad_norm": 0.24264545738697052, "learning_rate": 8.629905573803511e-05, "loss": 0.0115, "step": 16760 }, { "action_loss": 0.005465380847454071, "epoch": 15.071942446043165, "step": 16760 }, { "epoch": 15.08093525179856, "grad_norm": 0.11694072186946869, "learning_rate": 8.628009830240839e-05, "loss": 0.011, "step": 16770 }, { "action_loss": 0.0051947045139968395, "epoch": 15.08093525179856, "step": 16770 }, { "epoch": 15.089928057553957, "grad_norm": 0.15007494390010834, "learning_rate": 8.626112984584571e-05, "loss": 0.0115, "step": 16780 }, { "action_loss": 0.005531588103622198, "epoch": 15.089928057553957, "step": 16780 }, { "epoch": 15.098920863309353, "grad_norm": 0.16287541389465332, "learning_rate": 8.62421503741092e-05, "loss": 0.0101, "step": 16790 }, { "action_loss": 0.0060801119543612, "epoch": 15.098920863309353, "step": 16790 }, { "epoch": 15.107913669064748, "grad_norm": 0.13021458685398102, "learning_rate": 8.622315989296432e-05, "loss": 0.0072, "step": 16800 }, { "action_loss": 0.003833244787529111, "epoch": 15.107913669064748, "step": 16800 }, { "epoch": 15.116906474820144, "grad_norm": 0.17404602468013763, "learning_rate": 8.62041584081799e-05, "loss": 0.0098, "step": 16810 }, { "action_loss": 0.011150885373353958, "epoch": 15.116906474820144, "step": 16810 }, { "epoch": 15.12589928057554, "grad_norm": 0.20425613224506378, "learning_rate": 8.618514592552807e-05, "loss": 0.0096, "step": 16820 }, { "action_loss": 0.007730359211564064, "epoch": 15.12589928057554, "step": 16820 }, { "epoch": 15.134892086330936, "grad_norm": 0.14734841883182526, "learning_rate": 8.616612245078431e-05, "loss": 0.0083, "step": 16830 }, { "action_loss": 0.012803517282009125, "epoch": 15.134892086330936, "step": 16830 }, { "epoch": 15.14388489208633, "grad_norm": 0.20544999837875366, "learning_rate": 8.614708798972746e-05, "loss": 0.0105, "step": 16840 }, { "action_loss": 0.012042440474033356, "epoch": 15.14388489208633, "step": 16840 }, { "epoch": 15.152877697841726, "grad_norm": 0.21669703722000122, "learning_rate": 8.61280425481397e-05, "loss": 0.0086, "step": 16850 }, { "action_loss": 0.014228160493075848, "epoch": 15.152877697841726, "step": 16850 }, { "epoch": 15.161870503597122, "grad_norm": 0.27732691168785095, "learning_rate": 8.61089861318065e-05, "loss": 0.0094, "step": 16860 }, { "action_loss": 0.009141410700976849, "epoch": 15.161870503597122, "step": 16860 }, { "epoch": 15.170863309352518, "grad_norm": 0.2889055013656616, "learning_rate": 8.608991874651673e-05, "loss": 0.0083, "step": 16870 }, { "action_loss": 0.01285418588668108, "epoch": 15.170863309352518, "step": 16870 }, { "epoch": 15.179856115107913, "grad_norm": 0.23704661428928375, "learning_rate": 8.607084039806255e-05, "loss": 0.0105, "step": 16880 }, { "action_loss": 0.01777210645377636, "epoch": 15.179856115107913, "step": 16880 }, { "epoch": 15.18884892086331, "grad_norm": 0.23408591747283936, "learning_rate": 8.605175109223944e-05, "loss": 0.0089, "step": 16890 }, { "action_loss": 0.010842613875865936, "epoch": 15.18884892086331, "step": 16890 }, { "epoch": 15.197841726618705, "grad_norm": 0.2160569727420807, "learning_rate": 8.603265083484624e-05, "loss": 0.0093, "step": 16900 }, { "action_loss": 0.006694912910461426, "epoch": 15.197841726618705, "step": 16900 }, { "epoch": 15.206834532374101, "grad_norm": 0.23998382687568665, "learning_rate": 8.60135396316851e-05, "loss": 0.0082, "step": 16910 }, { "action_loss": 0.011820182204246521, "epoch": 15.206834532374101, "step": 16910 }, { "epoch": 15.215827338129497, "grad_norm": 0.2000211775302887, "learning_rate": 8.599441748856152e-05, "loss": 0.008, "step": 16920 }, { "action_loss": 0.014515489339828491, "epoch": 15.215827338129497, "step": 16920 }, { "epoch": 15.224820143884893, "grad_norm": 0.2558310329914093, "learning_rate": 8.597528441128427e-05, "loss": 0.01, "step": 16930 }, { "action_loss": 0.00453386502340436, "epoch": 15.224820143884893, "step": 16930 }, { "epoch": 15.233812949640289, "grad_norm": 0.22538691759109497, "learning_rate": 8.595614040566549e-05, "loss": 0.008, "step": 16940 }, { "action_loss": 0.004414958413690329, "epoch": 15.233812949640289, "step": 16940 }, { "epoch": 15.242805755395683, "grad_norm": 0.17366383969783783, "learning_rate": 8.593698547752063e-05, "loss": 0.0096, "step": 16950 }, { "action_loss": 0.0063809906132519245, "epoch": 15.242805755395683, "step": 16950 }, { "epoch": 15.251798561151078, "grad_norm": 0.3405308425426483, "learning_rate": 8.591781963266843e-05, "loss": 0.0195, "step": 16960 }, { "action_loss": 0.0036588404327630997, "epoch": 15.251798561151078, "step": 16960 }, { "epoch": 15.260791366906474, "grad_norm": 0.26123151183128357, "learning_rate": 8.5898642876931e-05, "loss": 0.0091, "step": 16970 }, { "action_loss": 0.022815952077507973, "epoch": 15.260791366906474, "step": 16970 }, { "epoch": 15.26978417266187, "grad_norm": 0.2692583203315735, "learning_rate": 8.587945521613369e-05, "loss": 0.0159, "step": 16980 }, { "action_loss": 0.006749898195266724, "epoch": 15.26978417266187, "step": 16980 }, { "epoch": 15.278776978417266, "grad_norm": 0.2952899634838104, "learning_rate": 8.586025665610524e-05, "loss": 0.0087, "step": 16990 }, { "action_loss": 0.00834197923541069, "epoch": 15.278776978417266, "step": 16990 }, { "epoch": 15.287769784172662, "grad_norm": 0.2423599511384964, "learning_rate": 8.584104720267765e-05, "loss": 0.0106, "step": 17000 }, { "action_loss": 0.012782103382050991, "epoch": 15.287769784172662, "step": 17000 }, { "epoch": 15.296762589928058, "grad_norm": 0.22271522879600525, "learning_rate": 8.582182686168625e-05, "loss": 0.0111, "step": 17010 }, { "action_loss": 0.011186987161636353, "epoch": 15.296762589928058, "step": 17010 }, { "epoch": 15.305755395683454, "grad_norm": 0.23757986724376678, "learning_rate": 8.580259563896967e-05, "loss": 0.0118, "step": 17020 }, { "action_loss": 0.013721674680709839, "epoch": 15.305755395683454, "step": 17020 }, { "epoch": 15.31474820143885, "grad_norm": 0.2770782709121704, "learning_rate": 8.578335354036983e-05, "loss": 0.0116, "step": 17030 }, { "action_loss": 0.016173189505934715, "epoch": 15.31474820143885, "step": 17030 }, { "epoch": 15.323741007194245, "grad_norm": 0.24865581095218658, "learning_rate": 8.576410057173201e-05, "loss": 0.0105, "step": 17040 }, { "action_loss": 0.005086393561214209, "epoch": 15.323741007194245, "step": 17040 }, { "epoch": 15.332733812949641, "grad_norm": 0.2443179488182068, "learning_rate": 8.574483673890474e-05, "loss": 0.0095, "step": 17050 }, { "action_loss": 0.003882194636389613, "epoch": 15.332733812949641, "step": 17050 }, { "epoch": 15.341726618705035, "grad_norm": 0.2613908648490906, "learning_rate": 8.572556204773983e-05, "loss": 0.0088, "step": 17060 }, { "action_loss": 0.007885896600782871, "epoch": 15.341726618705035, "step": 17060 }, { "epoch": 15.350719424460431, "grad_norm": 0.2979147732257843, "learning_rate": 8.570627650409246e-05, "loss": 0.0092, "step": 17070 }, { "action_loss": 0.002916003344580531, "epoch": 15.350719424460431, "step": 17070 }, { "epoch": 15.359712230215827, "grad_norm": 0.21614277362823486, "learning_rate": 8.568698011382107e-05, "loss": 0.0095, "step": 17080 }, { "action_loss": 0.024522289633750916, "epoch": 15.359712230215827, "step": 17080 }, { "epoch": 15.368705035971223, "grad_norm": 0.2562743127346039, "learning_rate": 8.566767288278738e-05, "loss": 0.0114, "step": 17090 }, { "action_loss": 0.004291533958166838, "epoch": 15.368705035971223, "step": 17090 }, { "epoch": 15.377697841726619, "grad_norm": 0.14691361784934998, "learning_rate": 8.56483548168564e-05, "loss": 0.0088, "step": 17100 }, { "action_loss": 0.004397443495690823, "epoch": 15.377697841726619, "step": 17100 }, { "epoch": 15.386690647482014, "grad_norm": 0.19900549948215485, "learning_rate": 8.562902592189648e-05, "loss": 0.0094, "step": 17110 }, { "action_loss": 0.004438679665327072, "epoch": 15.386690647482014, "step": 17110 }, { "epoch": 15.39568345323741, "grad_norm": 0.13796553015708923, "learning_rate": 8.560968620377921e-05, "loss": 0.0082, "step": 17120 }, { "action_loss": 0.02108895219862461, "epoch": 15.39568345323741, "step": 17120 }, { "epoch": 15.404676258992806, "grad_norm": 0.21495136618614197, "learning_rate": 8.559033566837951e-05, "loss": 0.0113, "step": 17130 }, { "action_loss": 0.003942868206650019, "epoch": 15.404676258992806, "step": 17130 }, { "epoch": 15.413669064748202, "grad_norm": 0.5186951756477356, "learning_rate": 8.557097432157551e-05, "loss": 0.0121, "step": 17140 }, { "action_loss": 0.00900996197015047, "epoch": 15.413669064748202, "step": 17140 }, { "epoch": 15.422661870503598, "grad_norm": 0.15883225202560425, "learning_rate": 8.555160216924872e-05, "loss": 0.0088, "step": 17150 }, { "action_loss": 0.009870551526546478, "epoch": 15.422661870503598, "step": 17150 }, { "epoch": 15.431654676258994, "grad_norm": 0.18471090495586395, "learning_rate": 8.55322192172839e-05, "loss": 0.0103, "step": 17160 }, { "action_loss": 0.004489114508032799, "epoch": 15.431654676258994, "step": 17160 }, { "epoch": 15.440647482014388, "grad_norm": 0.1648828685283661, "learning_rate": 8.551282547156902e-05, "loss": 0.0065, "step": 17170 }, { "action_loss": 0.012177656404674053, "epoch": 15.440647482014388, "step": 17170 }, { "epoch": 15.449640287769784, "grad_norm": 0.2533032298088074, "learning_rate": 8.549342093799544e-05, "loss": 0.0108, "step": 17180 }, { "action_loss": 0.0060962773859500885, "epoch": 15.449640287769784, "step": 17180 }, { "epoch": 15.45863309352518, "grad_norm": 0.313279926776886, "learning_rate": 8.547400562245773e-05, "loss": 0.0134, "step": 17190 }, { "action_loss": 0.01859934628009796, "epoch": 15.45863309352518, "step": 17190 }, { "epoch": 15.467625899280575, "grad_norm": 0.24596858024597168, "learning_rate": 8.545457953085374e-05, "loss": 0.0108, "step": 17200 }, { "action_loss": 0.007480008993297815, "epoch": 15.467625899280575, "step": 17200 }, { "epoch": 15.476618705035971, "grad_norm": 0.1863003969192505, "learning_rate": 8.543514266908463e-05, "loss": 0.0089, "step": 17210 }, { "action_loss": 0.009177283383905888, "epoch": 15.476618705035971, "step": 17210 }, { "epoch": 15.485611510791367, "grad_norm": 0.2873975336551666, "learning_rate": 8.541569504305478e-05, "loss": 0.011, "step": 17220 }, { "action_loss": 0.008230752311646938, "epoch": 15.485611510791367, "step": 17220 }, { "epoch": 15.494604316546763, "grad_norm": 0.26422572135925293, "learning_rate": 8.539623665867187e-05, "loss": 0.0107, "step": 17230 }, { "action_loss": 0.023705026134848595, "epoch": 15.494604316546763, "step": 17230 }, { "epoch": 15.503597122302159, "grad_norm": 0.15524283051490784, "learning_rate": 8.537676752184685e-05, "loss": 0.0095, "step": 17240 }, { "action_loss": 0.006777789443731308, "epoch": 15.503597122302159, "step": 17240 }, { "epoch": 15.512589928057555, "grad_norm": 0.1975548267364502, "learning_rate": 8.53572876384939e-05, "loss": 0.0091, "step": 17250 }, { "action_loss": 0.008129540830850601, "epoch": 15.512589928057555, "step": 17250 }, { "epoch": 15.52158273381295, "grad_norm": 0.34275999665260315, "learning_rate": 8.533779701453056e-05, "loss": 0.0108, "step": 17260 }, { "action_loss": 0.031054990366101265, "epoch": 15.52158273381295, "step": 17260 }, { "epoch": 15.530575539568346, "grad_norm": 0.2297963798046112, "learning_rate": 8.53182956558775e-05, "loss": 0.0111, "step": 17270 }, { "action_loss": 0.004517614375799894, "epoch": 15.530575539568346, "step": 17270 }, { "epoch": 15.53956834532374, "grad_norm": 0.21004721522331238, "learning_rate": 8.529878356845877e-05, "loss": 0.0077, "step": 17280 }, { "action_loss": 0.00711780833080411, "epoch": 15.53956834532374, "step": 17280 }, { "epoch": 15.548561151079136, "grad_norm": 0.3668104112148285, "learning_rate": 8.527926075820158e-05, "loss": 0.012, "step": 17290 }, { "action_loss": 0.009537632577121258, "epoch": 15.548561151079136, "step": 17290 }, { "epoch": 15.557553956834532, "grad_norm": 0.1907770335674286, "learning_rate": 8.525972723103648e-05, "loss": 0.014, "step": 17300 }, { "action_loss": 0.006800906267017126, "epoch": 15.557553956834532, "step": 17300 }, { "epoch": 15.566546762589928, "grad_norm": 0.13252948224544525, "learning_rate": 8.524018299289722e-05, "loss": 0.0093, "step": 17310 }, { "action_loss": 0.006272571161389351, "epoch": 15.566546762589928, "step": 17310 }, { "epoch": 15.575539568345324, "grad_norm": 0.35211411118507385, "learning_rate": 8.522062804972083e-05, "loss": 0.0105, "step": 17320 }, { "action_loss": 0.011184747330844402, "epoch": 15.575539568345324, "step": 17320 }, { "epoch": 15.58453237410072, "grad_norm": 0.18581007421016693, "learning_rate": 8.520106240744759e-05, "loss": 0.0096, "step": 17330 }, { "action_loss": 0.0070960731245577335, "epoch": 15.58453237410072, "step": 17330 }, { "epoch": 15.593525179856115, "grad_norm": 0.23663726449012756, "learning_rate": 8.518148607202102e-05, "loss": 0.0128, "step": 17340 }, { "action_loss": 0.009475691244006157, "epoch": 15.593525179856115, "step": 17340 }, { "epoch": 15.602517985611511, "grad_norm": 0.3730270564556122, "learning_rate": 8.51618990493879e-05, "loss": 0.0114, "step": 17350 }, { "action_loss": 0.011923025362193584, "epoch": 15.602517985611511, "step": 17350 }, { "epoch": 15.611510791366907, "grad_norm": 0.25648120045661926, "learning_rate": 8.514230134549823e-05, "loss": 0.0132, "step": 17360 }, { "action_loss": 0.01535051316022873, "epoch": 15.611510791366907, "step": 17360 }, { "epoch": 15.620503597122303, "grad_norm": 0.25358453392982483, "learning_rate": 8.51226929663053e-05, "loss": 0.0105, "step": 17370 }, { "action_loss": 0.013995704241096973, "epoch": 15.620503597122303, "step": 17370 }, { "epoch": 15.629496402877697, "grad_norm": 0.23843896389007568, "learning_rate": 8.51030739177656e-05, "loss": 0.0104, "step": 17380 }, { "action_loss": 0.005525650922209024, "epoch": 15.629496402877697, "step": 17380 }, { "epoch": 15.638489208633093, "grad_norm": 0.2046632617712021, "learning_rate": 8.508344420583889e-05, "loss": 0.0123, "step": 17390 }, { "action_loss": 0.012236557900905609, "epoch": 15.638489208633093, "step": 17390 }, { "epoch": 15.647482014388489, "grad_norm": 0.17085091769695282, "learning_rate": 8.506380383648816e-05, "loss": 0.0089, "step": 17400 }, { "action_loss": 0.0069626737385988235, "epoch": 15.647482014388489, "step": 17400 }, { "epoch": 15.656474820143885, "grad_norm": 0.18665461242198944, "learning_rate": 8.504415281567963e-05, "loss": 0.0084, "step": 17410 }, { "action_loss": 0.005389265716075897, "epoch": 15.656474820143885, "step": 17410 }, { "epoch": 15.66546762589928, "grad_norm": 0.21579976379871368, "learning_rate": 8.502449114938275e-05, "loss": 0.0073, "step": 17420 }, { "action_loss": 0.005245847161859274, "epoch": 15.66546762589928, "step": 17420 }, { "epoch": 15.674460431654676, "grad_norm": 0.18158665299415588, "learning_rate": 8.500481884357025e-05, "loss": 0.0092, "step": 17430 }, { "action_loss": 0.009230188094079494, "epoch": 15.674460431654676, "step": 17430 }, { "epoch": 15.683453237410072, "grad_norm": 0.2860008478164673, "learning_rate": 8.498513590421801e-05, "loss": 0.0088, "step": 17440 }, { "action_loss": 0.031285859644412994, "epoch": 15.683453237410072, "step": 17440 }, { "epoch": 15.692446043165468, "grad_norm": 0.15960803627967834, "learning_rate": 8.496544233730522e-05, "loss": 0.0116, "step": 17450 }, { "action_loss": 0.004768053535372019, "epoch": 15.692446043165468, "step": 17450 }, { "epoch": 15.701438848920864, "grad_norm": 0.22406511008739471, "learning_rate": 8.494573814881426e-05, "loss": 0.0102, "step": 17460 }, { "action_loss": 0.006602596025913954, "epoch": 15.701438848920864, "step": 17460 }, { "epoch": 15.71043165467626, "grad_norm": 0.20406921207904816, "learning_rate": 8.492602334473074e-05, "loss": 0.0078, "step": 17470 }, { "action_loss": 0.010769397020339966, "epoch": 15.71043165467626, "step": 17470 }, { "epoch": 15.719424460431654, "grad_norm": 0.21095730364322662, "learning_rate": 8.49062979310435e-05, "loss": 0.0123, "step": 17480 }, { "action_loss": 0.01351234596222639, "epoch": 15.719424460431654, "step": 17480 }, { "epoch": 15.72841726618705, "grad_norm": 0.20147952437400818, "learning_rate": 8.488656191374458e-05, "loss": 0.0117, "step": 17490 }, { "action_loss": 0.006098552141338587, "epoch": 15.72841726618705, "step": 17490 }, { "epoch": 15.737410071942445, "grad_norm": 0.1590120643377304, "learning_rate": 8.48668152988293e-05, "loss": 0.009, "step": 17500 }, { "action_loss": 0.00767420744523406, "epoch": 15.737410071942445, "step": 17500 }, { "epoch": 15.746402877697841, "grad_norm": 0.23901169002056122, "learning_rate": 8.484705809229612e-05, "loss": 0.01, "step": 17510 }, { "action_loss": 0.019763736054301262, "epoch": 15.746402877697841, "step": 17510 }, { "epoch": 15.755395683453237, "grad_norm": 0.3252105116844177, "learning_rate": 8.482729030014677e-05, "loss": 0.0133, "step": 17520 }, { "action_loss": 0.006700409576296806, "epoch": 15.755395683453237, "step": 17520 }, { "epoch": 15.764388489208633, "grad_norm": 0.14376412332057953, "learning_rate": 8.48075119283862e-05, "loss": 0.0092, "step": 17530 }, { "action_loss": 0.015013921074569225, "epoch": 15.764388489208633, "step": 17530 }, { "epoch": 15.773381294964029, "grad_norm": 0.24405650794506073, "learning_rate": 8.478772298302254e-05, "loss": 0.0103, "step": 17540 }, { "action_loss": 0.007661942858248949, "epoch": 15.773381294964029, "step": 17540 }, { "epoch": 15.782374100719425, "grad_norm": 0.2626705467700958, "learning_rate": 8.476792347006716e-05, "loss": 0.0119, "step": 17550 }, { "action_loss": 0.008981923572719097, "epoch": 15.782374100719425, "step": 17550 }, { "epoch": 15.79136690647482, "grad_norm": 0.2143138200044632, "learning_rate": 8.474811339553462e-05, "loss": 0.0085, "step": 17560 }, { "action_loss": 0.011930788867175579, "epoch": 15.79136690647482, "step": 17560 }, { "epoch": 15.800359712230216, "grad_norm": 0.3408883512020111, "learning_rate": 8.47282927654427e-05, "loss": 0.0152, "step": 17570 }, { "action_loss": 0.005094032268971205, "epoch": 15.800359712230216, "step": 17570 }, { "epoch": 15.809352517985612, "grad_norm": 0.1621675342321396, "learning_rate": 8.470846158581238e-05, "loss": 0.0069, "step": 17580 }, { "action_loss": 0.015179750509560108, "epoch": 15.809352517985612, "step": 17580 }, { "epoch": 15.818345323741006, "grad_norm": 0.20968493819236755, "learning_rate": 8.468861986266787e-05, "loss": 0.0103, "step": 17590 }, { "action_loss": 0.0160656925290823, "epoch": 15.818345323741006, "step": 17590 }, { "epoch": 15.827338129496402, "grad_norm": 0.1625586897134781, "learning_rate": 8.466876760203654e-05, "loss": 0.01, "step": 17600 }, { "action_loss": 0.046507686376571655, "epoch": 15.827338129496402, "step": 17600 }, { "epoch": 15.836330935251798, "grad_norm": 0.16704733669757843, "learning_rate": 8.464890480994898e-05, "loss": 0.0137, "step": 17610 }, { "action_loss": 0.006548552308231592, "epoch": 15.836330935251798, "step": 17610 }, { "epoch": 15.845323741007194, "grad_norm": 0.15581151843070984, "learning_rate": 8.462903149243899e-05, "loss": 0.0084, "step": 17620 }, { "action_loss": 0.007108557969331741, "epoch": 15.845323741007194, "step": 17620 }, { "epoch": 15.85431654676259, "grad_norm": 0.21096472442150116, "learning_rate": 8.460914765554357e-05, "loss": 0.0079, "step": 17630 }, { "action_loss": 0.006329250987619162, "epoch": 15.85431654676259, "step": 17630 }, { "epoch": 15.863309352517986, "grad_norm": 0.3213602602481842, "learning_rate": 8.458925330530288e-05, "loss": 0.0149, "step": 17640 }, { "action_loss": 0.0035150593612343073, "epoch": 15.863309352517986, "step": 17640 }, { "epoch": 15.872302158273381, "grad_norm": 0.19492709636688232, "learning_rate": 8.456934844776032e-05, "loss": 0.0091, "step": 17650 }, { "action_loss": 0.0037809591740369797, "epoch": 15.872302158273381, "step": 17650 }, { "epoch": 15.881294964028777, "grad_norm": 0.3040752410888672, "learning_rate": 8.454943308896246e-05, "loss": 0.0097, "step": 17660 }, { "action_loss": 0.01565525308251381, "epoch": 15.881294964028777, "step": 17660 }, { "epoch": 15.890287769784173, "grad_norm": 0.21043892204761505, "learning_rate": 8.452950723495905e-05, "loss": 0.0076, "step": 17670 }, { "action_loss": 0.008226854726672173, "epoch": 15.890287769784173, "step": 17670 }, { "epoch": 15.899280575539569, "grad_norm": 0.1603776216506958, "learning_rate": 8.450957089180303e-05, "loss": 0.0095, "step": 17680 }, { "action_loss": 0.006192255299538374, "epoch": 15.899280575539569, "step": 17680 }, { "epoch": 15.908273381294965, "grad_norm": 0.34587541222572327, "learning_rate": 8.448962406555055e-05, "loss": 0.0099, "step": 17690 }, { "action_loss": 0.027589725330471992, "epoch": 15.908273381294965, "step": 17690 }, { "epoch": 15.917266187050359, "grad_norm": 0.1664194017648697, "learning_rate": 8.446966676226093e-05, "loss": 0.0128, "step": 17700 }, { "action_loss": 0.005139971151947975, "epoch": 15.917266187050359, "step": 17700 }, { "epoch": 15.926258992805755, "grad_norm": 0.29087594151496887, "learning_rate": 8.444969898799667e-05, "loss": 0.011, "step": 17710 }, { "action_loss": 0.027372220531105995, "epoch": 15.926258992805755, "step": 17710 }, { "epoch": 15.93525179856115, "grad_norm": 0.19632773101329803, "learning_rate": 8.442972074882343e-05, "loss": 0.0109, "step": 17720 }, { "action_loss": 0.015935517847537994, "epoch": 15.93525179856115, "step": 17720 }, { "epoch": 15.944244604316546, "grad_norm": 0.1992701143026352, "learning_rate": 8.44097320508101e-05, "loss": 0.01, "step": 17730 }, { "action_loss": 0.0034303292632102966, "epoch": 15.944244604316546, "step": 17730 }, { "epoch": 15.953237410071942, "grad_norm": 0.21575576066970825, "learning_rate": 8.43897329000287e-05, "loss": 0.0061, "step": 17740 }, { "action_loss": 0.011971097439527512, "epoch": 15.953237410071942, "step": 17740 }, { "epoch": 15.962230215827338, "grad_norm": 0.2506984770298004, "learning_rate": 8.436972330255448e-05, "loss": 0.0101, "step": 17750 }, { "action_loss": 0.005552444141358137, "epoch": 15.962230215827338, "step": 17750 }, { "epoch": 15.971223021582734, "grad_norm": 0.26275578141212463, "learning_rate": 8.434970326446579e-05, "loss": 0.0106, "step": 17760 }, { "action_loss": 0.009339623153209686, "epoch": 15.971223021582734, "step": 17760 }, { "epoch": 15.98021582733813, "grad_norm": 0.2224963903427124, "learning_rate": 8.432967279184418e-05, "loss": 0.0092, "step": 17770 }, { "action_loss": 0.015612625516951084, "epoch": 15.98021582733813, "step": 17770 }, { "epoch": 15.989208633093526, "grad_norm": 0.29470282793045044, "learning_rate": 8.430963189077441e-05, "loss": 0.0086, "step": 17780 }, { "action_loss": 0.004808672238141298, "epoch": 15.989208633093526, "step": 17780 }, { "epoch": 15.998201438848922, "grad_norm": 0.3001865744590759, "learning_rate": 8.428958056734437e-05, "loss": 0.0077, "step": 17790 }, { "action_loss": 0.01112986821681261, "epoch": 15.998201438848922, "step": 17790 }, { "epoch": 16.007194244604317, "grad_norm": 0.24607549607753754, "learning_rate": 8.426951882764513e-05, "loss": 0.0149, "step": 17800 }, { "action_loss": 0.00914299488067627, "epoch": 16.007194244604317, "step": 17800 }, { "epoch": 16.01618705035971, "grad_norm": 0.2267346829175949, "learning_rate": 8.424944667777089e-05, "loss": 0.0078, "step": 17810 }, { "action_loss": 0.006575508508831263, "epoch": 16.01618705035971, "step": 17810 }, { "epoch": 16.02517985611511, "grad_norm": 0.2625866234302521, "learning_rate": 8.422936412381905e-05, "loss": 0.0086, "step": 17820 }, { "action_loss": 0.010831966064870358, "epoch": 16.02517985611511, "step": 17820 }, { "epoch": 16.034172661870503, "grad_norm": 0.21101975440979004, "learning_rate": 8.420927117189017e-05, "loss": 0.0088, "step": 17830 }, { "action_loss": 0.012523583136498928, "epoch": 16.034172661870503, "step": 17830 }, { "epoch": 16.0431654676259, "grad_norm": 0.24337852001190186, "learning_rate": 8.418916782808795e-05, "loss": 0.0087, "step": 17840 }, { "action_loss": 0.006654132157564163, "epoch": 16.0431654676259, "step": 17840 }, { "epoch": 16.052158273381295, "grad_norm": 0.2025495171546936, "learning_rate": 8.416905409851926e-05, "loss": 0.0091, "step": 17850 }, { "action_loss": 0.007533674594014883, "epoch": 16.052158273381295, "step": 17850 }, { "epoch": 16.06115107913669, "grad_norm": 0.12606146931648254, "learning_rate": 8.41489299892941e-05, "loss": 0.0074, "step": 17860 }, { "action_loss": 0.005313577130436897, "epoch": 16.06115107913669, "step": 17860 }, { "epoch": 16.070143884892087, "grad_norm": 0.18742813169956207, "learning_rate": 8.412879550652566e-05, "loss": 0.0085, "step": 17870 }, { "action_loss": 0.0061636981554329395, "epoch": 16.070143884892087, "step": 17870 }, { "epoch": 16.07913669064748, "grad_norm": 0.3275853395462036, "learning_rate": 8.410865065633029e-05, "loss": 0.0096, "step": 17880 }, { "action_loss": 0.0037497959565371275, "epoch": 16.07913669064748, "step": 17880 }, { "epoch": 16.08812949640288, "grad_norm": 0.17808526754379272, "learning_rate": 8.408849544482742e-05, "loss": 0.0107, "step": 17890 }, { "action_loss": 0.012092210352420807, "epoch": 16.08812949640288, "step": 17890 }, { "epoch": 16.097122302158272, "grad_norm": 0.3206789791584015, "learning_rate": 8.406832987813968e-05, "loss": 0.0095, "step": 17900 }, { "action_loss": 0.02131524123251438, "epoch": 16.097122302158272, "step": 17900 }, { "epoch": 16.10611510791367, "grad_norm": 0.20129333436489105, "learning_rate": 8.404815396239286e-05, "loss": 0.0103, "step": 17910 }, { "action_loss": 0.0033640319015830755, "epoch": 16.10611510791367, "step": 17910 }, { "epoch": 16.115107913669064, "grad_norm": 0.2777731418609619, "learning_rate": 8.402796770371587e-05, "loss": 0.0093, "step": 17920 }, { "action_loss": 0.0070523410104215145, "epoch": 16.115107913669064, "step": 17920 }, { "epoch": 16.12410071942446, "grad_norm": 0.26314669847488403, "learning_rate": 8.400777110824071e-05, "loss": 0.0093, "step": 17930 }, { "action_loss": 0.014741440303623676, "epoch": 16.12410071942446, "step": 17930 }, { "epoch": 16.133093525179856, "grad_norm": 0.21615733206272125, "learning_rate": 8.398756418210263e-05, "loss": 0.0096, "step": 17940 }, { "action_loss": 0.004170247353613377, "epoch": 16.133093525179856, "step": 17940 }, { "epoch": 16.142086330935253, "grad_norm": 0.2068701535463333, "learning_rate": 8.396734693143993e-05, "loss": 0.0093, "step": 17950 }, { "action_loss": 0.007653153967112303, "epoch": 16.142086330935253, "step": 17950 }, { "epoch": 16.151079136690647, "grad_norm": 0.21067024767398834, "learning_rate": 8.39471193623941e-05, "loss": 0.0164, "step": 17960 }, { "action_loss": 0.005970851052552462, "epoch": 16.151079136690647, "step": 17960 }, { "epoch": 16.16007194244604, "grad_norm": 0.21479639410972595, "learning_rate": 8.392688148110974e-05, "loss": 0.0101, "step": 17970 }, { "action_loss": 0.004572233185172081, "epoch": 16.16007194244604, "step": 17970 }, { "epoch": 16.16906474820144, "grad_norm": 0.18854427337646484, "learning_rate": 8.390663329373456e-05, "loss": 0.0098, "step": 17980 }, { "action_loss": 0.007647769991308451, "epoch": 16.16906474820144, "step": 17980 }, { "epoch": 16.178057553956833, "grad_norm": 0.21706995368003845, "learning_rate": 8.388637480641944e-05, "loss": 0.0128, "step": 17990 }, { "action_loss": 0.00916984397917986, "epoch": 16.178057553956833, "step": 17990 }, { "epoch": 16.18705035971223, "grad_norm": 0.23807355761528015, "learning_rate": 8.386610602531837e-05, "loss": 0.0127, "step": 18000 }, { "action_loss": 0.0036767572164535522, "epoch": 16.18705035971223, "step": 18000 }, { "epoch": 16.196043165467625, "grad_norm": 0.25997307896614075, "learning_rate": 8.384582695658847e-05, "loss": 0.0092, "step": 18010 }, { "action_loss": 0.014830698259174824, "epoch": 16.196043165467625, "step": 18010 }, { "epoch": 16.205035971223023, "grad_norm": 0.2000599354505539, "learning_rate": 8.382553760638999e-05, "loss": 0.0092, "step": 18020 }, { "action_loss": 0.0072667947970330715, "epoch": 16.205035971223023, "step": 18020 }, { "epoch": 16.214028776978417, "grad_norm": 0.1874963641166687, "learning_rate": 8.380523798088631e-05, "loss": 0.007, "step": 18030 }, { "action_loss": 0.0042214891873300076, "epoch": 16.214028776978417, "step": 18030 }, { "epoch": 16.223021582733814, "grad_norm": 0.14981989562511444, "learning_rate": 8.378492808624389e-05, "loss": 0.0089, "step": 18040 }, { "action_loss": 0.009533163160085678, "epoch": 16.223021582733814, "step": 18040 }, { "epoch": 16.23201438848921, "grad_norm": 1.0224609375, "learning_rate": 8.376460792863237e-05, "loss": 0.0115, "step": 18050 }, { "action_loss": 0.006192393600940704, "epoch": 16.23201438848921, "step": 18050 }, { "epoch": 16.241007194244606, "grad_norm": 0.18594999611377716, "learning_rate": 8.374427751422444e-05, "loss": 0.0062, "step": 18060 }, { "action_loss": 0.005481118336319923, "epoch": 16.241007194244606, "step": 18060 }, { "epoch": 16.25, "grad_norm": 0.2056271880865097, "learning_rate": 8.3723936849196e-05, "loss": 0.0077, "step": 18070 }, { "action_loss": 0.005153298377990723, "epoch": 16.25, "step": 18070 }, { "epoch": 16.258992805755394, "grad_norm": 0.3489379286766052, "learning_rate": 8.370358593972595e-05, "loss": 0.0089, "step": 18080 }, { "action_loss": 0.008045974187552929, "epoch": 16.258992805755394, "step": 18080 }, { "epoch": 16.26798561151079, "grad_norm": 0.29401424527168274, "learning_rate": 8.36832247919964e-05, "loss": 0.0091, "step": 18090 }, { "action_loss": 0.017039425671100616, "epoch": 16.26798561151079, "step": 18090 }, { "epoch": 16.276978417266186, "grad_norm": 0.24287442862987518, "learning_rate": 8.36628534121925e-05, "loss": 0.0125, "step": 18100 }, { "action_loss": 0.005019850563257933, "epoch": 16.276978417266186, "step": 18100 }, { "epoch": 16.285971223021583, "grad_norm": 0.19815832376480103, "learning_rate": 8.364247180650254e-05, "loss": 0.0128, "step": 18110 }, { "action_loss": 0.009811647236347198, "epoch": 16.285971223021583, "step": 18110 }, { "epoch": 16.294964028776977, "grad_norm": 0.18895860016345978, "learning_rate": 8.362207998111794e-05, "loss": 0.0066, "step": 18120 }, { "action_loss": 0.005942077841609716, "epoch": 16.294964028776977, "step": 18120 }, { "epoch": 16.303956834532375, "grad_norm": 0.12601394951343536, "learning_rate": 8.360167794223318e-05, "loss": 0.0082, "step": 18130 }, { "action_loss": 0.010325410403311253, "epoch": 16.303956834532375, "step": 18130 }, { "epoch": 16.31294964028777, "grad_norm": 0.2055911272764206, "learning_rate": 8.358126569604586e-05, "loss": 0.0102, "step": 18140 }, { "action_loss": 0.006974340882152319, "epoch": 16.31294964028777, "step": 18140 }, { "epoch": 16.321942446043167, "grad_norm": 0.16128648817539215, "learning_rate": 8.356084324875668e-05, "loss": 0.0119, "step": 18150 }, { "action_loss": 0.005864445120096207, "epoch": 16.321942446043167, "step": 18150 }, { "epoch": 16.33093525179856, "grad_norm": 0.12938207387924194, "learning_rate": 8.354041060656945e-05, "loss": 0.008, "step": 18160 }, { "action_loss": 0.0023093558847904205, "epoch": 16.33093525179856, "step": 18160 }, { "epoch": 16.33992805755396, "grad_norm": 0.1867188811302185, "learning_rate": 8.351996777569106e-05, "loss": 0.0072, "step": 18170 }, { "action_loss": 0.01158940326422453, "epoch": 16.33992805755396, "step": 18170 }, { "epoch": 16.348920863309353, "grad_norm": 0.21349100768566132, "learning_rate": 8.349951476233148e-05, "loss": 0.0106, "step": 18180 }, { "action_loss": 0.008450666442513466, "epoch": 16.348920863309353, "step": 18180 }, { "epoch": 16.357913669064747, "grad_norm": 0.18581371009349823, "learning_rate": 8.347905157270386e-05, "loss": 0.0077, "step": 18190 }, { "action_loss": 0.015796272084116936, "epoch": 16.357913669064747, "step": 18190 }, { "epoch": 16.366906474820144, "grad_norm": 0.20778490602970123, "learning_rate": 8.345857821302432e-05, "loss": 0.0112, "step": 18200 }, { "action_loss": 0.02002527378499508, "epoch": 16.366906474820144, "step": 18200 }, { "epoch": 16.37589928057554, "grad_norm": 0.2286689430475235, "learning_rate": 8.343809468951213e-05, "loss": 0.0098, "step": 18210 }, { "action_loss": 0.010302604176104069, "epoch": 16.37589928057554, "step": 18210 }, { "epoch": 16.384892086330936, "grad_norm": 0.13486558198928833, "learning_rate": 8.341760100838965e-05, "loss": 0.0123, "step": 18220 }, { "action_loss": 0.00849244650453329, "epoch": 16.384892086330936, "step": 18220 }, { "epoch": 16.39388489208633, "grad_norm": 0.1341772824525833, "learning_rate": 8.339709717588233e-05, "loss": 0.0111, "step": 18230 }, { "action_loss": 0.005878578871488571, "epoch": 16.39388489208633, "step": 18230 }, { "epoch": 16.402877697841728, "grad_norm": 0.23018215596675873, "learning_rate": 8.33765831982187e-05, "loss": 0.0093, "step": 18240 }, { "action_loss": 0.003172955708578229, "epoch": 16.402877697841728, "step": 18240 }, { "epoch": 16.41187050359712, "grad_norm": 0.1793804168701172, "learning_rate": 8.335605908163035e-05, "loss": 0.0065, "step": 18250 }, { "action_loss": 0.007425482850521803, "epoch": 16.41187050359712, "step": 18250 }, { "epoch": 16.42086330935252, "grad_norm": 0.16305290162563324, "learning_rate": 8.333552483235196e-05, "loss": 0.0078, "step": 18260 }, { "action_loss": 0.009237010963261127, "epoch": 16.42086330935252, "step": 18260 }, { "epoch": 16.429856115107913, "grad_norm": 0.2302873283624649, "learning_rate": 8.33149804566213e-05, "loss": 0.0091, "step": 18270 }, { "action_loss": 0.007972832769155502, "epoch": 16.429856115107913, "step": 18270 }, { "epoch": 16.43884892086331, "grad_norm": 0.21266238391399384, "learning_rate": 8.329442596067921e-05, "loss": 0.0085, "step": 18280 }, { "action_loss": 0.002223769435659051, "epoch": 16.43884892086331, "step": 18280 }, { "epoch": 16.447841726618705, "grad_norm": 0.24273672699928284, "learning_rate": 8.32738613507696e-05, "loss": 0.0097, "step": 18290 }, { "action_loss": 0.012153483927249908, "epoch": 16.447841726618705, "step": 18290 }, { "epoch": 16.4568345323741, "grad_norm": 0.18385063111782074, "learning_rate": 8.325328663313946e-05, "loss": 0.0122, "step": 18300 }, { "action_loss": 0.019333234056830406, "epoch": 16.4568345323741, "step": 18300 }, { "epoch": 16.465827338129497, "grad_norm": 0.22007529437541962, "learning_rate": 8.323270181403884e-05, "loss": 0.0094, "step": 18310 }, { "action_loss": 0.013388399966061115, "epoch": 16.465827338129497, "step": 18310 }, { "epoch": 16.47482014388489, "grad_norm": 0.25074583292007446, "learning_rate": 8.321210689972086e-05, "loss": 0.0103, "step": 18320 }, { "action_loss": 0.006054062396287918, "epoch": 16.47482014388489, "step": 18320 }, { "epoch": 16.48381294964029, "grad_norm": 0.2197338491678238, "learning_rate": 8.319150189644174e-05, "loss": 0.0091, "step": 18330 }, { "action_loss": 0.007605769205838442, "epoch": 16.48381294964029, "step": 18330 }, { "epoch": 16.492805755395683, "grad_norm": 0.22155681252479553, "learning_rate": 8.31708868104607e-05, "loss": 0.0163, "step": 18340 }, { "action_loss": 0.013824944384396076, "epoch": 16.492805755395683, "step": 18340 }, { "epoch": 16.50179856115108, "grad_norm": 0.17102102935314178, "learning_rate": 8.315026164804007e-05, "loss": 0.0099, "step": 18350 }, { "action_loss": 0.0054177273996174335, "epoch": 16.50179856115108, "step": 18350 }, { "epoch": 16.510791366906474, "grad_norm": 0.24091823399066925, "learning_rate": 8.312962641544524e-05, "loss": 0.0106, "step": 18360 }, { "action_loss": 0.0034769782796502113, "epoch": 16.510791366906474, "step": 18360 }, { "epoch": 16.519784172661872, "grad_norm": 0.19231140613555908, "learning_rate": 8.310898111894465e-05, "loss": 0.0087, "step": 18370 }, { "action_loss": 0.007193325087428093, "epoch": 16.519784172661872, "step": 18370 }, { "epoch": 16.528776978417266, "grad_norm": 0.14912667870521545, "learning_rate": 8.308832576480977e-05, "loss": 0.0086, "step": 18380 }, { "action_loss": 0.00626642070710659, "epoch": 16.528776978417266, "step": 18380 }, { "epoch": 16.53776978417266, "grad_norm": 0.24825477600097656, "learning_rate": 8.306766035931519e-05, "loss": 0.0113, "step": 18390 }, { "action_loss": 0.007597594987601042, "epoch": 16.53776978417266, "step": 18390 }, { "epoch": 16.546762589928058, "grad_norm": 0.22003011405467987, "learning_rate": 8.304698490873847e-05, "loss": 0.0085, "step": 18400 }, { "action_loss": 0.004743050318211317, "epoch": 16.546762589928058, "step": 18400 }, { "epoch": 16.555755395683452, "grad_norm": 0.20296187698841095, "learning_rate": 8.30262994193603e-05, "loss": 0.0117, "step": 18410 }, { "action_loss": 0.004290398210287094, "epoch": 16.555755395683452, "step": 18410 }, { "epoch": 16.56474820143885, "grad_norm": 0.3224628269672394, "learning_rate": 8.300560389746438e-05, "loss": 0.0084, "step": 18420 }, { "action_loss": 0.01127914059907198, "epoch": 16.56474820143885, "step": 18420 }, { "epoch": 16.573741007194243, "grad_norm": 0.22663269937038422, "learning_rate": 8.298489834933745e-05, "loss": 0.0107, "step": 18430 }, { "action_loss": 0.0028684900607913733, "epoch": 16.573741007194243, "step": 18430 }, { "epoch": 16.58273381294964, "grad_norm": 0.19944612681865692, "learning_rate": 8.296418278126934e-05, "loss": 0.0128, "step": 18440 }, { "action_loss": 0.005739451851695776, "epoch": 16.58273381294964, "step": 18440 }, { "epoch": 16.591726618705035, "grad_norm": 0.17370477318763733, "learning_rate": 8.294345719955284e-05, "loss": 0.0112, "step": 18450 }, { "action_loss": 0.0063750906847417355, "epoch": 16.591726618705035, "step": 18450 }, { "epoch": 16.600719424460433, "grad_norm": 0.22264590859413147, "learning_rate": 8.29227216104839e-05, "loss": 0.0095, "step": 18460 }, { "action_loss": 0.011957096867263317, "epoch": 16.600719424460433, "step": 18460 }, { "epoch": 16.609712230215827, "grad_norm": 0.189870685338974, "learning_rate": 8.290197602036137e-05, "loss": 0.0123, "step": 18470 }, { "action_loss": 0.0038481615483760834, "epoch": 16.609712230215827, "step": 18470 }, { "epoch": 16.618705035971225, "grad_norm": 0.18489935994148254, "learning_rate": 8.288122043548725e-05, "loss": 0.0072, "step": 18480 }, { "action_loss": 0.008116144686937332, "epoch": 16.618705035971225, "step": 18480 }, { "epoch": 16.62769784172662, "grad_norm": 0.20784203708171844, "learning_rate": 8.286045486216657e-05, "loss": 0.0127, "step": 18490 }, { "action_loss": 0.014684710651636124, "epoch": 16.62769784172662, "step": 18490 }, { "epoch": 16.636690647482013, "grad_norm": 0.3081582188606262, "learning_rate": 8.283967930670733e-05, "loss": 0.0098, "step": 18500 }, { "action_loss": 0.012832234613597393, "epoch": 16.636690647482013, "step": 18500 }, { "epoch": 16.64568345323741, "grad_norm": 0.1827545017004013, "learning_rate": 8.281889377542058e-05, "loss": 0.0092, "step": 18510 }, { "action_loss": 0.009356457740068436, "epoch": 16.64568345323741, "step": 18510 }, { "epoch": 16.654676258992804, "grad_norm": 0.21133002638816833, "learning_rate": 8.279809827462045e-05, "loss": 0.0089, "step": 18520 }, { "action_loss": 0.011272042989730835, "epoch": 16.654676258992804, "step": 18520 }, { "epoch": 16.663669064748202, "grad_norm": 0.1629801243543625, "learning_rate": 8.277729281062402e-05, "loss": 0.0104, "step": 18530 }, { "action_loss": 0.005285380408167839, "epoch": 16.663669064748202, "step": 18530 }, { "epoch": 16.672661870503596, "grad_norm": 0.2214611917734146, "learning_rate": 8.27564773897515e-05, "loss": 0.0077, "step": 18540 }, { "action_loss": 0.004915168043226004, "epoch": 16.672661870503596, "step": 18540 }, { "epoch": 16.681654676258994, "grad_norm": 0.21203315258026123, "learning_rate": 8.273565201832602e-05, "loss": 0.0071, "step": 18550 }, { "action_loss": 0.011743300594389439, "epoch": 16.681654676258994, "step": 18550 }, { "epoch": 16.690647482014388, "grad_norm": 0.22288256883621216, "learning_rate": 8.27148167026738e-05, "loss": 0.0083, "step": 18560 }, { "action_loss": 0.011695724911987782, "epoch": 16.690647482014388, "step": 18560 }, { "epoch": 16.699640287769785, "grad_norm": 0.18421587347984314, "learning_rate": 8.269397144912405e-05, "loss": 0.0094, "step": 18570 }, { "action_loss": 0.007239362224936485, "epoch": 16.699640287769785, "step": 18570 }, { "epoch": 16.70863309352518, "grad_norm": 0.14339208602905273, "learning_rate": 8.267311626400899e-05, "loss": 0.0081, "step": 18580 }, { "action_loss": 0.008421077392995358, "epoch": 16.70863309352518, "step": 18580 }, { "epoch": 16.717625899280577, "grad_norm": 0.28478914499282837, "learning_rate": 8.26522511536639e-05, "loss": 0.0115, "step": 18590 }, { "action_loss": 0.010547470301389694, "epoch": 16.717625899280577, "step": 18590 }, { "epoch": 16.72661870503597, "grad_norm": 0.17263726890087128, "learning_rate": 8.263137612442706e-05, "loss": 0.0095, "step": 18600 }, { "action_loss": 0.003828004002571106, "epoch": 16.72661870503597, "step": 18600 }, { "epoch": 16.735611510791365, "grad_norm": 0.31732794642448425, "learning_rate": 8.261049118263971e-05, "loss": 0.0082, "step": 18610 }, { "action_loss": 0.006383488420397043, "epoch": 16.735611510791365, "step": 18610 }, { "epoch": 16.744604316546763, "grad_norm": 0.16240061819553375, "learning_rate": 8.258959633464619e-05, "loss": 0.0104, "step": 18620 }, { "action_loss": 0.01398294884711504, "epoch": 16.744604316546763, "step": 18620 }, { "epoch": 16.753597122302157, "grad_norm": 0.17282043397426605, "learning_rate": 8.256869158679377e-05, "loss": 0.0084, "step": 18630 }, { "action_loss": 0.012958993203938007, "epoch": 16.753597122302157, "step": 18630 }, { "epoch": 16.762589928057555, "grad_norm": 0.22877682745456696, "learning_rate": 8.254777694543278e-05, "loss": 0.0117, "step": 18640 }, { "action_loss": 0.004456240218132734, "epoch": 16.762589928057555, "step": 18640 }, { "epoch": 16.77158273381295, "grad_norm": 0.2063291072845459, "learning_rate": 8.252685241691651e-05, "loss": 0.0096, "step": 18650 }, { "action_loss": 0.019348660483956337, "epoch": 16.77158273381295, "step": 18650 }, { "epoch": 16.780575539568346, "grad_norm": 0.2644779086112976, "learning_rate": 8.250591800760133e-05, "loss": 0.0122, "step": 18660 }, { "action_loss": 0.00790927279740572, "epoch": 16.780575539568346, "step": 18660 }, { "epoch": 16.78956834532374, "grad_norm": 0.22270949184894562, "learning_rate": 8.248497372384649e-05, "loss": 0.0105, "step": 18670 }, { "action_loss": 0.007438526954501867, "epoch": 16.78956834532374, "step": 18670 }, { "epoch": 16.798561151079138, "grad_norm": 0.21741516888141632, "learning_rate": 8.246401957201437e-05, "loss": 0.0084, "step": 18680 }, { "action_loss": 0.010585375130176544, "epoch": 16.798561151079138, "step": 18680 }, { "epoch": 16.807553956834532, "grad_norm": 0.17617350816726685, "learning_rate": 8.244305555847027e-05, "loss": 0.0072, "step": 18690 }, { "action_loss": 0.008565199561417103, "epoch": 16.807553956834532, "step": 18690 }, { "epoch": 16.81654676258993, "grad_norm": 0.19609957933425903, "learning_rate": 8.24220816895825e-05, "loss": 0.0073, "step": 18700 }, { "action_loss": 0.0070334202609956264, "epoch": 16.81654676258993, "step": 18700 }, { "epoch": 16.825539568345324, "grad_norm": 0.20393837988376617, "learning_rate": 8.240109797172237e-05, "loss": 0.0099, "step": 18710 }, { "action_loss": 0.005740316119045019, "epoch": 16.825539568345324, "step": 18710 }, { "epoch": 16.834532374100718, "grad_norm": 0.26195597648620605, "learning_rate": 8.238010441126416e-05, "loss": 0.0148, "step": 18720 }, { "action_loss": 0.015371373854577541, "epoch": 16.834532374100718, "step": 18720 }, { "epoch": 16.843525179856115, "grad_norm": 0.13595840334892273, "learning_rate": 8.23591010145852e-05, "loss": 0.0153, "step": 18730 }, { "action_loss": 0.03870846703648567, "epoch": 16.843525179856115, "step": 18730 }, { "epoch": 16.85251798561151, "grad_norm": 0.23271188139915466, "learning_rate": 8.233808778806571e-05, "loss": 0.0115, "step": 18740 }, { "action_loss": 0.005900323390960693, "epoch": 16.85251798561151, "step": 18740 }, { "epoch": 16.861510791366907, "grad_norm": 0.18180230259895325, "learning_rate": 8.231706473808903e-05, "loss": 0.0103, "step": 18750 }, { "action_loss": 0.007925919257104397, "epoch": 16.861510791366907, "step": 18750 }, { "epoch": 16.8705035971223, "grad_norm": 0.2280338853597641, "learning_rate": 8.229603187104133e-05, "loss": 0.012, "step": 18760 }, { "action_loss": 0.006297904998064041, "epoch": 16.8705035971223, "step": 18760 }, { "epoch": 16.8794964028777, "grad_norm": 0.28047457337379456, "learning_rate": 8.22749891933119e-05, "loss": 0.0103, "step": 18770 }, { "action_loss": 0.017863625660538673, "epoch": 16.8794964028777, "step": 18770 }, { "epoch": 16.888489208633093, "grad_norm": 0.2641085088253021, "learning_rate": 8.225393671129291e-05, "loss": 0.0126, "step": 18780 }, { "action_loss": 0.00582933658733964, "epoch": 16.888489208633093, "step": 18780 }, { "epoch": 16.89748201438849, "grad_norm": 0.18190035223960876, "learning_rate": 8.223287443137957e-05, "loss": 0.0078, "step": 18790 }, { "action_loss": 0.004603276494890451, "epoch": 16.89748201438849, "step": 18790 }, { "epoch": 16.906474820143885, "grad_norm": 0.2260672003030777, "learning_rate": 8.221180235997004e-05, "loss": 0.0095, "step": 18800 }, { "action_loss": 0.00505041005089879, "epoch": 16.906474820143885, "step": 18800 }, { "epoch": 16.915467625899282, "grad_norm": 0.21418555080890656, "learning_rate": 8.219072050346544e-05, "loss": 0.0098, "step": 18810 }, { "action_loss": 0.009438076056540012, "epoch": 16.915467625899282, "step": 18810 }, { "epoch": 16.924460431654676, "grad_norm": 0.349031001329422, "learning_rate": 8.216962886826992e-05, "loss": 0.0097, "step": 18820 }, { "action_loss": 0.004292272962629795, "epoch": 16.924460431654676, "step": 18820 }, { "epoch": 16.93345323741007, "grad_norm": 0.19080720841884613, "learning_rate": 8.214852746079054e-05, "loss": 0.011, "step": 18830 }, { "action_loss": 0.004045136738568544, "epoch": 16.93345323741007, "step": 18830 }, { "epoch": 16.942446043165468, "grad_norm": 0.28479698300361633, "learning_rate": 8.212741628743732e-05, "loss": 0.0081, "step": 18840 }, { "action_loss": 0.0086263632401824, "epoch": 16.942446043165468, "step": 18840 }, { "epoch": 16.951438848920862, "grad_norm": 0.13910533487796783, "learning_rate": 8.210629535462333e-05, "loss": 0.0078, "step": 18850 }, { "action_loss": 0.00692864740267396, "epoch": 16.951438848920862, "step": 18850 }, { "epoch": 16.96043165467626, "grad_norm": 0.2260861098766327, "learning_rate": 8.208516466876453e-05, "loss": 0.0091, "step": 18860 }, { "action_loss": 0.004914450459182262, "epoch": 16.96043165467626, "step": 18860 }, { "epoch": 16.969424460431654, "grad_norm": 0.2220604419708252, "learning_rate": 8.206402423627986e-05, "loss": 0.006, "step": 18870 }, { "action_loss": 0.013022647239267826, "epoch": 16.969424460431654, "step": 18870 }, { "epoch": 16.97841726618705, "grad_norm": 0.24364691972732544, "learning_rate": 8.204287406359124e-05, "loss": 0.0161, "step": 18880 }, { "action_loss": 0.00908965989947319, "epoch": 16.97841726618705, "step": 18880 }, { "epoch": 16.987410071942445, "grad_norm": 0.21344313025474548, "learning_rate": 8.20217141571235e-05, "loss": 0.0093, "step": 18890 }, { "action_loss": 0.0034623704850673676, "epoch": 16.987410071942445, "step": 18890 }, { "epoch": 16.996402877697843, "grad_norm": 0.2197875678539276, "learning_rate": 8.200054452330449e-05, "loss": 0.0073, "step": 18900 }, { "action_loss": 0.0028728535398840904, "epoch": 16.996402877697843, "step": 18900 }, { "epoch": 17.005395683453237, "grad_norm": 0.2382371723651886, "learning_rate": 8.197936516856499e-05, "loss": 0.0104, "step": 18910 }, { "action_loss": 0.009937523864209652, "epoch": 17.005395683453237, "step": 18910 }, { "epoch": 17.014388489208635, "grad_norm": 0.2326676845550537, "learning_rate": 8.195817609933871e-05, "loss": 0.0107, "step": 18920 }, { "action_loss": 0.011093917302787304, "epoch": 17.014388489208635, "step": 18920 }, { "epoch": 17.02338129496403, "grad_norm": 0.22258147597312927, "learning_rate": 8.193697732206233e-05, "loss": 0.0093, "step": 18930 }, { "action_loss": 0.008004039525985718, "epoch": 17.02338129496403, "step": 18930 }, { "epoch": 17.032374100719423, "grad_norm": 0.2522454857826233, "learning_rate": 8.19157688431755e-05, "loss": 0.0113, "step": 18940 }, { "action_loss": 0.00741262873634696, "epoch": 17.032374100719423, "step": 18940 }, { "epoch": 17.04136690647482, "grad_norm": 0.23870806396007538, "learning_rate": 8.189455066912077e-05, "loss": 0.0085, "step": 18950 }, { "action_loss": 0.005295222159475088, "epoch": 17.04136690647482, "step": 18950 }, { "epoch": 17.050359712230215, "grad_norm": 0.17556731402873993, "learning_rate": 8.187332280634369e-05, "loss": 0.0092, "step": 18960 }, { "action_loss": 0.010876978747546673, "epoch": 17.050359712230215, "step": 18960 }, { "epoch": 17.059352517985612, "grad_norm": 0.11615103483200073, "learning_rate": 8.18520852612927e-05, "loss": 0.0095, "step": 18970 }, { "action_loss": 0.00748521089553833, "epoch": 17.059352517985612, "step": 18970 }, { "epoch": 17.068345323741006, "grad_norm": 0.18129996955394745, "learning_rate": 8.183083804041921e-05, "loss": 0.0118, "step": 18980 }, { "action_loss": 0.010215013287961483, "epoch": 17.068345323741006, "step": 18980 }, { "epoch": 17.077338129496404, "grad_norm": 0.17160551249980927, "learning_rate": 8.180958115017757e-05, "loss": 0.0087, "step": 18990 }, { "action_loss": 0.002832920989021659, "epoch": 17.077338129496404, "step": 18990 }, { "epoch": 17.086330935251798, "grad_norm": 0.30005112290382385, "learning_rate": 8.178831459702505e-05, "loss": 0.0164, "step": 19000 }, { "action_loss": 0.0058489576913416386, "epoch": 17.086330935251798, "step": 19000 }, { "epoch": 17.095323741007196, "grad_norm": 0.1654212474822998, "learning_rate": 8.17670383874219e-05, "loss": 0.0122, "step": 19010 }, { "action_loss": 0.02097267657518387, "epoch": 17.095323741007196, "step": 19010 }, { "epoch": 17.10431654676259, "grad_norm": 0.24707384407520294, "learning_rate": 8.174575252783124e-05, "loss": 0.009, "step": 19020 }, { "action_loss": 0.04105601832270622, "epoch": 17.10431654676259, "step": 19020 }, { "epoch": 17.113309352517987, "grad_norm": 0.23225513100624084, "learning_rate": 8.172445702471914e-05, "loss": 0.0135, "step": 19030 }, { "action_loss": 0.005654436070472002, "epoch": 17.113309352517987, "step": 19030 }, { "epoch": 17.12230215827338, "grad_norm": 0.24328231811523438, "learning_rate": 8.170315188455466e-05, "loss": 0.0109, "step": 19040 }, { "action_loss": 0.0105905057862401, "epoch": 17.12230215827338, "step": 19040 }, { "epoch": 17.131294964028775, "grad_norm": 0.28510355949401855, "learning_rate": 8.168183711380969e-05, "loss": 0.0099, "step": 19050 }, { "action_loss": 0.012047712691128254, "epoch": 17.131294964028775, "step": 19050 }, { "epoch": 17.140287769784173, "grad_norm": 0.2379259169101715, "learning_rate": 8.166051271895913e-05, "loss": 0.0109, "step": 19060 }, { "action_loss": 0.006058002356439829, "epoch": 17.140287769784173, "step": 19060 }, { "epoch": 17.149280575539567, "grad_norm": 0.3160102963447571, "learning_rate": 8.163917870648075e-05, "loss": 0.0165, "step": 19070 }, { "action_loss": 0.005674629006534815, "epoch": 17.149280575539567, "step": 19070 }, { "epoch": 17.158273381294965, "grad_norm": 0.22540715336799622, "learning_rate": 8.161783508285526e-05, "loss": 0.0085, "step": 19080 }, { "action_loss": 0.023181358352303505, "epoch": 17.158273381294965, "step": 19080 }, { "epoch": 17.16726618705036, "grad_norm": 0.21591392159461975, "learning_rate": 8.159648185456628e-05, "loss": 0.0102, "step": 19090 }, { "action_loss": 0.004647594876587391, "epoch": 17.16726618705036, "step": 19090 }, { "epoch": 17.176258992805757, "grad_norm": 0.29011496901512146, "learning_rate": 8.157511902810038e-05, "loss": 0.0125, "step": 19100 }, { "action_loss": 0.007497320417314768, "epoch": 17.176258992805757, "step": 19100 }, { "epoch": 17.18525179856115, "grad_norm": 0.26561301946640015, "learning_rate": 8.155374660994701e-05, "loss": 0.0095, "step": 19110 }, { "action_loss": 0.0060380869545042515, "epoch": 17.18525179856115, "step": 19110 }, { "epoch": 17.194244604316548, "grad_norm": 0.22005021572113037, "learning_rate": 8.153236460659857e-05, "loss": 0.0138, "step": 19120 }, { "action_loss": 0.01541776955127716, "epoch": 17.194244604316548, "step": 19120 }, { "epoch": 17.203237410071942, "grad_norm": 0.30434858798980713, "learning_rate": 8.151097302455031e-05, "loss": 0.0084, "step": 19130 }, { "action_loss": 0.00802411139011383, "epoch": 17.203237410071942, "step": 19130 }, { "epoch": 17.21223021582734, "grad_norm": 0.2985208332538605, "learning_rate": 8.148957187030044e-05, "loss": 0.0134, "step": 19140 }, { "action_loss": 0.004628507886081934, "epoch": 17.21223021582734, "step": 19140 }, { "epoch": 17.221223021582734, "grad_norm": 0.19851164519786835, "learning_rate": 8.146816115035006e-05, "loss": 0.0099, "step": 19150 }, { "action_loss": 0.004597961436957121, "epoch": 17.221223021582734, "step": 19150 }, { "epoch": 17.230215827338128, "grad_norm": 0.280388206243515, "learning_rate": 8.14467408712032e-05, "loss": 0.0126, "step": 19160 }, { "action_loss": 0.004645479377359152, "epoch": 17.230215827338128, "step": 19160 }, { "epoch": 17.239208633093526, "grad_norm": 0.2699349820613861, "learning_rate": 8.142531103936678e-05, "loss": 0.0099, "step": 19170 }, { "action_loss": 0.004436276387423277, "epoch": 17.239208633093526, "step": 19170 }, { "epoch": 17.24820143884892, "grad_norm": 0.17565743625164032, "learning_rate": 8.14038716613506e-05, "loss": 0.0058, "step": 19180 }, { "action_loss": 0.018808109685778618, "epoch": 17.24820143884892, "step": 19180 }, { "epoch": 17.257194244604317, "grad_norm": 0.1827138513326645, "learning_rate": 8.138242274366736e-05, "loss": 0.009, "step": 19190 }, { "action_loss": 0.0037263736594468355, "epoch": 17.257194244604317, "step": 19190 }, { "epoch": 17.26618705035971, "grad_norm": 0.2208307832479477, "learning_rate": 8.136096429283271e-05, "loss": 0.0114, "step": 19200 }, { "action_loss": 0.010594993829727173, "epoch": 17.26618705035971, "step": 19200 }, { "epoch": 17.27517985611511, "grad_norm": 0.2380528748035431, "learning_rate": 8.133949631536515e-05, "loss": 0.0128, "step": 19210 }, { "action_loss": 0.011997734196484089, "epoch": 17.27517985611511, "step": 19210 }, { "epoch": 17.284172661870503, "grad_norm": 0.2532579302787781, "learning_rate": 8.131801881778607e-05, "loss": 0.0123, "step": 19220 }, { "action_loss": 0.005245825741440058, "epoch": 17.284172661870503, "step": 19220 }, { "epoch": 17.2931654676259, "grad_norm": 0.21679404377937317, "learning_rate": 8.129653180661978e-05, "loss": 0.0097, "step": 19230 }, { "action_loss": 0.005565930157899857, "epoch": 17.2931654676259, "step": 19230 }, { "epoch": 17.302158273381295, "grad_norm": 0.1446785181760788, "learning_rate": 8.127503528839346e-05, "loss": 0.0104, "step": 19240 }, { "action_loss": 0.016885867342352867, "epoch": 17.302158273381295, "step": 19240 }, { "epoch": 17.31115107913669, "grad_norm": 0.1798495352268219, "learning_rate": 8.125352926963721e-05, "loss": 0.0134, "step": 19250 }, { "action_loss": 0.00469796359539032, "epoch": 17.31115107913669, "step": 19250 }, { "epoch": 17.320143884892087, "grad_norm": 0.2105385810136795, "learning_rate": 8.123201375688395e-05, "loss": 0.0088, "step": 19260 }, { "action_loss": 0.002662096405401826, "epoch": 17.320143884892087, "step": 19260 }, { "epoch": 17.32913669064748, "grad_norm": 0.2202799916267395, "learning_rate": 8.121048875666954e-05, "loss": 0.0093, "step": 19270 }, { "action_loss": 0.0059076338075101376, "epoch": 17.32913669064748, "step": 19270 }, { "epoch": 17.33812949640288, "grad_norm": 0.19014693796634674, "learning_rate": 8.118895427553274e-05, "loss": 0.0066, "step": 19280 }, { "action_loss": 0.0074182734824717045, "epoch": 17.33812949640288, "step": 19280 }, { "epoch": 17.347122302158272, "grad_norm": 0.21913188695907593, "learning_rate": 8.116741032001511e-05, "loss": 0.01, "step": 19290 }, { "action_loss": 0.008713665418326855, "epoch": 17.347122302158272, "step": 19290 }, { "epoch": 17.35611510791367, "grad_norm": 0.22789537906646729, "learning_rate": 8.114585689666114e-05, "loss": 0.0095, "step": 19300 }, { "action_loss": 0.004113106057047844, "epoch": 17.35611510791367, "step": 19300 }, { "epoch": 17.365107913669064, "grad_norm": 0.12772445380687714, "learning_rate": 8.112429401201821e-05, "loss": 0.0066, "step": 19310 }, { "action_loss": 0.03062845766544342, "epoch": 17.365107913669064, "step": 19310 }, { "epoch": 17.37410071942446, "grad_norm": 0.15456581115722656, "learning_rate": 8.110272167263656e-05, "loss": 0.0106, "step": 19320 }, { "action_loss": 0.0067685507237911224, "epoch": 17.37410071942446, "step": 19320 }, { "epoch": 17.383093525179856, "grad_norm": 0.15293893218040466, "learning_rate": 8.108113988506929e-05, "loss": 0.0069, "step": 19330 }, { "action_loss": 0.021721867844462395, "epoch": 17.383093525179856, "step": 19330 }, { "epoch": 17.392086330935253, "grad_norm": 0.16624338924884796, "learning_rate": 8.105954865587235e-05, "loss": 0.0099, "step": 19340 }, { "action_loss": 0.003430613549426198, "epoch": 17.392086330935253, "step": 19340 }, { "epoch": 17.401079136690647, "grad_norm": 0.20897755026817322, "learning_rate": 8.103794799160463e-05, "loss": 0.0094, "step": 19350 }, { "action_loss": 0.025790488347411156, "epoch": 17.401079136690647, "step": 19350 }, { "epoch": 17.41007194244604, "grad_norm": 0.2959343492984772, "learning_rate": 8.101633789882781e-05, "loss": 0.0113, "step": 19360 }, { "action_loss": 0.0059258886612951756, "epoch": 17.41007194244604, "step": 19360 }, { "epoch": 17.41906474820144, "grad_norm": 0.22108019888401031, "learning_rate": 8.099471838410648e-05, "loss": 0.0072, "step": 19370 }, { "action_loss": 0.004303060006350279, "epoch": 17.41906474820144, "step": 19370 }, { "epoch": 17.428057553956833, "grad_norm": 0.23685915768146515, "learning_rate": 8.097308945400806e-05, "loss": 0.009, "step": 19380 }, { "action_loss": 0.003936311695724726, "epoch": 17.428057553956833, "step": 19380 }, { "epoch": 17.43705035971223, "grad_norm": 0.28173378109931946, "learning_rate": 8.095145111510288e-05, "loss": 0.0091, "step": 19390 }, { "action_loss": 0.0048834835179150105, "epoch": 17.43705035971223, "step": 19390 }, { "epoch": 17.446043165467625, "grad_norm": 0.2002529501914978, "learning_rate": 8.092980337396406e-05, "loss": 0.0074, "step": 19400 }, { "action_loss": 0.009486679919064045, "epoch": 17.446043165467625, "step": 19400 }, { "epoch": 17.455035971223023, "grad_norm": 0.19714292883872986, "learning_rate": 8.090814623716763e-05, "loss": 0.0109, "step": 19410 }, { "action_loss": 0.003748035989701748, "epoch": 17.455035971223023, "step": 19410 }, { "epoch": 17.464028776978417, "grad_norm": 0.1689855009317398, "learning_rate": 8.088647971129246e-05, "loss": 0.0112, "step": 19420 }, { "action_loss": 0.04475485160946846, "epoch": 17.464028776978417, "step": 19420 }, { "epoch": 17.473021582733814, "grad_norm": 0.18175248801708221, "learning_rate": 8.086480380292026e-05, "loss": 0.011, "step": 19430 }, { "action_loss": 0.0032858988270163536, "epoch": 17.473021582733814, "step": 19430 }, { "epoch": 17.48201438848921, "grad_norm": 0.25582045316696167, "learning_rate": 8.084311851863562e-05, "loss": 0.0093, "step": 19440 }, { "action_loss": 0.007327117025852203, "epoch": 17.48201438848921, "step": 19440 }, { "epoch": 17.491007194244606, "grad_norm": 0.24707315862178802, "learning_rate": 8.082142386502591e-05, "loss": 0.0078, "step": 19450 }, { "action_loss": 0.01224090438336134, "epoch": 17.491007194244606, "step": 19450 }, { "epoch": 17.5, "grad_norm": 0.2000337690114975, "learning_rate": 8.079971984868145e-05, "loss": 0.0114, "step": 19460 }, { "action_loss": 0.005571372341364622, "epoch": 17.5, "step": 19460 }, { "epoch": 17.508992805755394, "grad_norm": 0.1738201230764389, "learning_rate": 8.077800647619532e-05, "loss": 0.0095, "step": 19470 }, { "action_loss": 0.004229952115565538, "epoch": 17.508992805755394, "step": 19470 }, { "epoch": 17.51798561151079, "grad_norm": 0.1345003843307495, "learning_rate": 8.075628375416345e-05, "loss": 0.007, "step": 19480 }, { "action_loss": 0.0033322153612971306, "epoch": 17.51798561151079, "step": 19480 }, { "epoch": 17.526978417266186, "grad_norm": 0.2893873453140259, "learning_rate": 8.073455168918464e-05, "loss": 0.0103, "step": 19490 }, { "action_loss": 0.005260177422314882, "epoch": 17.526978417266186, "step": 19490 }, { "epoch": 17.535971223021583, "grad_norm": 0.22704876959323883, "learning_rate": 8.071281028786055e-05, "loss": 0.0079, "step": 19500 }, { "action_loss": 0.0056886146776378155, "epoch": 17.535971223021583, "step": 19500 }, { "epoch": 17.544964028776977, "grad_norm": 0.2089758813381195, "learning_rate": 8.069105955679562e-05, "loss": 0.0078, "step": 19510 }, { "action_loss": 0.006711902562528849, "epoch": 17.544964028776977, "step": 19510 }, { "epoch": 17.553956834532375, "grad_norm": 0.2130993902683258, "learning_rate": 8.066929950259713e-05, "loss": 0.0093, "step": 19520 }, { "action_loss": 0.006289638113230467, "epoch": 17.553956834532375, "step": 19520 }, { "epoch": 17.56294964028777, "grad_norm": 0.19351713359355927, "learning_rate": 8.064753013187522e-05, "loss": 0.0093, "step": 19530 }, { "action_loss": 0.01225685328245163, "epoch": 17.56294964028777, "step": 19530 }, { "epoch": 17.571942446043167, "grad_norm": 0.16063925623893738, "learning_rate": 8.062575145124289e-05, "loss": 0.0087, "step": 19540 }, { "action_loss": 0.007875475101172924, "epoch": 17.571942446043167, "step": 19540 }, { "epoch": 17.58093525179856, "grad_norm": 0.1960582435131073, "learning_rate": 8.060396346731587e-05, "loss": 0.0088, "step": 19550 }, { "action_loss": 0.008787919767200947, "epoch": 17.58093525179856, "step": 19550 }, { "epoch": 17.58992805755396, "grad_norm": 0.24369345605373383, "learning_rate": 8.058216618671281e-05, "loss": 0.0074, "step": 19560 }, { "action_loss": 0.008768380619585514, "epoch": 17.58992805755396, "step": 19560 }, { "epoch": 17.598920863309353, "grad_norm": 0.24331483244895935, "learning_rate": 8.056035961605514e-05, "loss": 0.0069, "step": 19570 }, { "action_loss": 0.00510036526247859, "epoch": 17.598920863309353, "step": 19570 }, { "epoch": 17.607913669064747, "grad_norm": 0.19932840764522552, "learning_rate": 8.05385437619671e-05, "loss": 0.0129, "step": 19580 }, { "action_loss": 0.0030231710989028215, "epoch": 17.607913669064747, "step": 19580 }, { "epoch": 17.616906474820144, "grad_norm": 0.203800231218338, "learning_rate": 8.05167186310758e-05, "loss": 0.0064, "step": 19590 }, { "action_loss": 0.008844985626637936, "epoch": 17.616906474820144, "step": 19590 }, { "epoch": 17.62589928057554, "grad_norm": 0.23062504827976227, "learning_rate": 8.049488423001113e-05, "loss": 0.0079, "step": 19600 }, { "action_loss": 0.01645776815712452, "epoch": 17.62589928057554, "step": 19600 }, { "epoch": 17.634892086330936, "grad_norm": 0.1677994430065155, "learning_rate": 8.047304056540581e-05, "loss": 0.0083, "step": 19610 }, { "action_loss": 0.01066769938915968, "epoch": 17.634892086330936, "step": 19610 }, { "epoch": 17.64388489208633, "grad_norm": 0.26818400621414185, "learning_rate": 8.045118764389534e-05, "loss": 0.0106, "step": 19620 }, { "action_loss": 0.0034186148550361395, "epoch": 17.64388489208633, "step": 19620 }, { "epoch": 17.652877697841728, "grad_norm": 0.1411317139863968, "learning_rate": 8.042932547211809e-05, "loss": 0.0057, "step": 19630 }, { "action_loss": 0.010247032158076763, "epoch": 17.652877697841728, "step": 19630 }, { "epoch": 17.66187050359712, "grad_norm": 0.2795291543006897, "learning_rate": 8.04074540567152e-05, "loss": 0.0083, "step": 19640 }, { "action_loss": 0.004414144437760115, "epoch": 17.66187050359712, "step": 19640 }, { "epoch": 17.67086330935252, "grad_norm": 0.19313272833824158, "learning_rate": 8.038557340433063e-05, "loss": 0.0162, "step": 19650 }, { "action_loss": 0.008439202792942524, "epoch": 17.67086330935252, "step": 19650 }, { "epoch": 17.679856115107913, "grad_norm": 0.2037813663482666, "learning_rate": 8.036368352161115e-05, "loss": 0.0082, "step": 19660 }, { "action_loss": 0.01733061857521534, "epoch": 17.679856115107913, "step": 19660 }, { "epoch": 17.68884892086331, "grad_norm": 0.20796361565589905, "learning_rate": 8.034178441520633e-05, "loss": 0.0098, "step": 19670 }, { "action_loss": 0.007011938374489546, "epoch": 17.68884892086331, "step": 19670 }, { "epoch": 17.697841726618705, "grad_norm": 0.32461026310920715, "learning_rate": 8.031987609176852e-05, "loss": 0.0117, "step": 19680 }, { "action_loss": 0.0048201116733253, "epoch": 17.697841726618705, "step": 19680 }, { "epoch": 17.7068345323741, "grad_norm": 0.1746530532836914, "learning_rate": 8.02979585579529e-05, "loss": 0.0111, "step": 19690 }, { "action_loss": 0.015913113951683044, "epoch": 17.7068345323741, "step": 19690 }, { "epoch": 17.715827338129497, "grad_norm": 0.18649034202098846, "learning_rate": 8.027603182041745e-05, "loss": 0.012, "step": 19700 }, { "action_loss": 0.009011824615299702, "epoch": 17.715827338129497, "step": 19700 }, { "epoch": 17.72482014388489, "grad_norm": 0.17812515795230865, "learning_rate": 8.025409588582292e-05, "loss": 0.0113, "step": 19710 }, { "action_loss": 0.005356657784432173, "epoch": 17.72482014388489, "step": 19710 }, { "epoch": 17.73381294964029, "grad_norm": 0.23955130577087402, "learning_rate": 8.023215076083288e-05, "loss": 0.0095, "step": 19720 }, { "action_loss": 0.004949863534420729, "epoch": 17.73381294964029, "step": 19720 }, { "epoch": 17.742805755395683, "grad_norm": 0.18688678741455078, "learning_rate": 8.021019645211367e-05, "loss": 0.0071, "step": 19730 }, { "action_loss": 0.007168920710682869, "epoch": 17.742805755395683, "step": 19730 }, { "epoch": 17.75179856115108, "grad_norm": 0.24726377427577972, "learning_rate": 8.018823296633441e-05, "loss": 0.0085, "step": 19740 }, { "action_loss": 0.015873968601226807, "epoch": 17.75179856115108, "step": 19740 }, { "epoch": 17.760791366906474, "grad_norm": 0.14423313736915588, "learning_rate": 8.016626031016708e-05, "loss": 0.0126, "step": 19750 }, { "action_loss": 0.008878420107066631, "epoch": 17.760791366906474, "step": 19750 }, { "epoch": 17.769784172661872, "grad_norm": 0.1911161243915558, "learning_rate": 8.014427849028636e-05, "loss": 0.0082, "step": 19760 }, { "action_loss": 0.012019089423120022, "epoch": 17.769784172661872, "step": 19760 }, { "epoch": 17.778776978417266, "grad_norm": 0.17485931515693665, "learning_rate": 8.012228751336974e-05, "loss": 0.0103, "step": 19770 }, { "action_loss": 0.009836345911026001, "epoch": 17.778776978417266, "step": 19770 }, { "epoch": 17.78776978417266, "grad_norm": 0.22130590677261353, "learning_rate": 8.01002873860975e-05, "loss": 0.0144, "step": 19780 }, { "action_loss": 0.008010435849428177, "epoch": 17.78776978417266, "step": 19780 }, { "epoch": 17.796762589928058, "grad_norm": 0.4495745897293091, "learning_rate": 8.00782781151527e-05, "loss": 0.0083, "step": 19790 }, { "action_loss": 0.022274889051914215, "epoch": 17.796762589928058, "step": 19790 }, { "epoch": 17.805755395683452, "grad_norm": 0.11634178459644318, "learning_rate": 8.005625970722119e-05, "loss": 0.0112, "step": 19800 }, { "action_loss": 0.005606465507298708, "epoch": 17.805755395683452, "step": 19800 }, { "epoch": 17.81474820143885, "grad_norm": 0.2526608407497406, "learning_rate": 8.003423216899158e-05, "loss": 0.0075, "step": 19810 }, { "action_loss": 0.00342174363322556, "epoch": 17.81474820143885, "step": 19810 }, { "epoch": 17.823741007194243, "grad_norm": 0.19827459752559662, "learning_rate": 8.001219550715522e-05, "loss": 0.0123, "step": 19820 }, { "action_loss": 0.0056924219243228436, "epoch": 17.823741007194243, "step": 19820 }, { "epoch": 17.83273381294964, "grad_norm": 0.17092213034629822, "learning_rate": 7.999014972840632e-05, "loss": 0.015, "step": 19830 }, { "action_loss": 0.009641985408961773, "epoch": 17.83273381294964, "step": 19830 }, { "epoch": 17.841726618705035, "grad_norm": 0.21824924647808075, "learning_rate": 7.996809483944174e-05, "loss": 0.008, "step": 19840 }, { "action_loss": 0.008183944039046764, "epoch": 17.841726618705035, "step": 19840 }, { "epoch": 17.850719424460433, "grad_norm": 0.1760798692703247, "learning_rate": 7.994603084696124e-05, "loss": 0.0053, "step": 19850 }, { "action_loss": 0.02864723466336727, "epoch": 17.850719424460433, "step": 19850 }, { "epoch": 17.859712230215827, "grad_norm": 0.1432153284549713, "learning_rate": 7.992395775766724e-05, "loss": 0.0082, "step": 19860 }, { "action_loss": 0.005709193181246519, "epoch": 17.859712230215827, "step": 19860 }, { "epoch": 17.868705035971225, "grad_norm": 0.11634006351232529, "learning_rate": 7.990187557826497e-05, "loss": 0.0119, "step": 19870 }, { "action_loss": 0.013783000409603119, "epoch": 17.868705035971225, "step": 19870 }, { "epoch": 17.87769784172662, "grad_norm": 0.1724015176296234, "learning_rate": 7.987978431546242e-05, "loss": 0.0103, "step": 19880 }, { "action_loss": 0.008877906948328018, "epoch": 17.87769784172662, "step": 19880 }, { "epoch": 17.886690647482013, "grad_norm": 0.15060782432556152, "learning_rate": 7.985768397597031e-05, "loss": 0.0103, "step": 19890 }, { "action_loss": 0.009010291658341885, "epoch": 17.886690647482013, "step": 19890 }, { "epoch": 17.89568345323741, "grad_norm": 0.2977357506752014, "learning_rate": 7.983557456650216e-05, "loss": 0.0124, "step": 19900 }, { "action_loss": 0.024862416088581085, "epoch": 17.89568345323741, "step": 19900 }, { "epoch": 17.904676258992804, "grad_norm": 0.3505290448665619, "learning_rate": 7.981345609377422e-05, "loss": 0.0135, "step": 19910 }, { "action_loss": 0.014141512103378773, "epoch": 17.904676258992804, "step": 19910 }, { "epoch": 17.913669064748202, "grad_norm": 0.16079692542552948, "learning_rate": 7.97913285645055e-05, "loss": 0.0082, "step": 19920 }, { "action_loss": 0.015198186039924622, "epoch": 17.913669064748202, "step": 19920 }, { "epoch": 17.922661870503596, "grad_norm": 0.19509829580783844, "learning_rate": 7.976919198541776e-05, "loss": 0.0083, "step": 19930 }, { "action_loss": 0.006171118468046188, "epoch": 17.922661870503596, "step": 19930 }, { "epoch": 17.931654676258994, "grad_norm": 0.3407134711742401, "learning_rate": 7.974704636323548e-05, "loss": 0.0106, "step": 19940 }, { "action_loss": 0.00963605660945177, "epoch": 17.931654676258994, "step": 19940 }, { "epoch": 17.940647482014388, "grad_norm": 0.15796855092048645, "learning_rate": 7.972489170468597e-05, "loss": 0.0078, "step": 19950 }, { "action_loss": 0.004826905205845833, "epoch": 17.940647482014388, "step": 19950 }, { "epoch": 17.949640287769785, "grad_norm": 0.27389463782310486, "learning_rate": 7.970272801649918e-05, "loss": 0.0125, "step": 19960 }, { "action_loss": 0.006460535805672407, "epoch": 17.949640287769785, "step": 19960 }, { "epoch": 17.95863309352518, "grad_norm": 0.1410493105649948, "learning_rate": 7.96805553054079e-05, "loss": 0.0102, "step": 19970 }, { "action_loss": 0.002070711925625801, "epoch": 17.95863309352518, "step": 19970 }, { "epoch": 17.967625899280577, "grad_norm": 0.22021792829036713, "learning_rate": 7.965837357814756e-05, "loss": 0.0095, "step": 19980 }, { "action_loss": 0.012165154330432415, "epoch": 17.967625899280577, "step": 19980 }, { "epoch": 17.97661870503597, "grad_norm": 0.2388472557067871, "learning_rate": 7.963618284145643e-05, "loss": 0.0126, "step": 19990 }, { "action_loss": 0.008186462335288525, "epoch": 17.97661870503597, "step": 19990 }, { "epoch": 17.985611510791365, "grad_norm": 0.24513326585292816, "learning_rate": 7.961398310207544e-05, "loss": 0.0117, "step": 20000 }, { "action_loss": 0.005126564297825098, "epoch": 17.985611510791365, "step": 20000 }, { "epoch": 17.994604316546763, "grad_norm": 0.32629385590553284, "learning_rate": 7.95917743667483e-05, "loss": 0.0096, "step": 20010 }, { "action_loss": 0.0072692218236625195, "epoch": 17.994604316546763, "step": 20010 }, { "epoch": 18.003597122302157, "grad_norm": 0.24934010207653046, "learning_rate": 7.956955664222144e-05, "loss": 0.0124, "step": 20020 }, { "action_loss": 0.008273731917142868, "epoch": 18.003597122302157, "step": 20020 }, { "epoch": 18.012589928057555, "grad_norm": 0.2101213037967682, "learning_rate": 7.954732993524399e-05, "loss": 0.0107, "step": 20030 }, { "action_loss": 0.011486474424600601, "epoch": 18.012589928057555, "step": 20030 }, { "epoch": 18.02158273381295, "grad_norm": 0.2476350963115692, "learning_rate": 7.952509425256786e-05, "loss": 0.0101, "step": 20040 }, { "action_loss": 0.006222423166036606, "epoch": 18.02158273381295, "step": 20040 }, { "epoch": 18.030575539568346, "grad_norm": 0.1676161140203476, "learning_rate": 7.950284960094767e-05, "loss": 0.0104, "step": 20050 }, { "action_loss": 0.005001325160264969, "epoch": 18.030575539568346, "step": 20050 }, { "epoch": 18.03956834532374, "grad_norm": 0.2225935310125351, "learning_rate": 7.948059598714076e-05, "loss": 0.0087, "step": 20060 }, { "action_loss": 0.0069574229419231415, "epoch": 18.03956834532374, "step": 20060 }, { "epoch": 18.048561151079138, "grad_norm": 0.3154411315917969, "learning_rate": 7.945833341790717e-05, "loss": 0.0113, "step": 20070 }, { "action_loss": 0.00982277374714613, "epoch": 18.048561151079138, "step": 20070 }, { "epoch": 18.057553956834532, "grad_norm": 0.2768946886062622, "learning_rate": 7.94360619000097e-05, "loss": 0.0125, "step": 20080 }, { "action_loss": 0.00548091484233737, "epoch": 18.057553956834532, "step": 20080 }, { "epoch": 18.06654676258993, "grad_norm": 0.17274028062820435, "learning_rate": 7.941378144021381e-05, "loss": 0.0087, "step": 20090 }, { "action_loss": 0.030523300170898438, "epoch": 18.06654676258993, "step": 20090 }, { "epoch": 18.075539568345324, "grad_norm": 0.22687412798404694, "learning_rate": 7.939149204528777e-05, "loss": 0.0084, "step": 20100 }, { "action_loss": 0.011581700295209885, "epoch": 18.075539568345324, "step": 20100 }, { "epoch": 18.084532374100718, "grad_norm": 0.2360266149044037, "learning_rate": 7.936919372200246e-05, "loss": 0.0109, "step": 20110 }, { "action_loss": 0.007176635321229696, "epoch": 18.084532374100718, "step": 20110 }, { "epoch": 18.093525179856115, "grad_norm": 0.18989917635917664, "learning_rate": 7.934688647713158e-05, "loss": 0.01, "step": 20120 }, { "action_loss": 0.004537501838058233, "epoch": 18.093525179856115, "step": 20120 }, { "epoch": 18.10251798561151, "grad_norm": 0.23362146317958832, "learning_rate": 7.932457031745143e-05, "loss": 0.0088, "step": 20130 }, { "action_loss": 0.07379051297903061, "epoch": 18.10251798561151, "step": 20130 }, { "epoch": 18.111510791366907, "grad_norm": 0.2181040495634079, "learning_rate": 7.930224524974108e-05, "loss": 0.011, "step": 20140 }, { "action_loss": 0.014855044893920422, "epoch": 18.111510791366907, "step": 20140 }, { "epoch": 18.1205035971223, "grad_norm": 0.223477303981781, "learning_rate": 7.927991128078232e-05, "loss": 0.0102, "step": 20150 }, { "action_loss": 0.008297242224216461, "epoch": 18.1205035971223, "step": 20150 }, { "epoch": 18.1294964028777, "grad_norm": 0.17137625813484192, "learning_rate": 7.925756841735958e-05, "loss": 0.0085, "step": 20160 }, { "action_loss": 0.01119390781968832, "epoch": 18.1294964028777, "step": 20160 }, { "epoch": 18.138489208633093, "grad_norm": 0.13868814706802368, "learning_rate": 7.923521666626008e-05, "loss": 0.0078, "step": 20170 }, { "action_loss": 0.02791132591664791, "epoch": 18.138489208633093, "step": 20170 }, { "epoch": 18.14748201438849, "grad_norm": 0.14399966597557068, "learning_rate": 7.921285603427366e-05, "loss": 0.0086, "step": 20180 }, { "action_loss": 0.005000338423997164, "epoch": 18.14748201438849, "step": 20180 }, { "epoch": 18.156474820143885, "grad_norm": 0.2588208317756653, "learning_rate": 7.91904865281929e-05, "loss": 0.0083, "step": 20190 }, { "action_loss": 0.004220695700496435, "epoch": 18.156474820143885, "step": 20190 }, { "epoch": 18.165467625899282, "grad_norm": 0.14708517491817474, "learning_rate": 7.916810815481307e-05, "loss": 0.0077, "step": 20200 }, { "action_loss": 0.0032561852131038904, "epoch": 18.165467625899282, "step": 20200 }, { "epoch": 18.174460431654676, "grad_norm": 0.4302561283111572, "learning_rate": 7.914572092093211e-05, "loss": 0.0087, "step": 20210 }, { "action_loss": 0.01634838432073593, "epoch": 18.174460431654676, "step": 20210 }, { "epoch": 18.18345323741007, "grad_norm": 0.19365449249744415, "learning_rate": 7.912332483335068e-05, "loss": 0.009, "step": 20220 }, { "action_loss": 0.01433591265231371, "epoch": 18.18345323741007, "step": 20220 }, { "epoch": 18.192446043165468, "grad_norm": 0.1738012135028839, "learning_rate": 7.910091989887213e-05, "loss": 0.007, "step": 20230 }, { "action_loss": 0.007740553468465805, "epoch": 18.192446043165468, "step": 20230 }, { "epoch": 18.201438848920862, "grad_norm": 0.2717387080192566, "learning_rate": 7.907850612430248e-05, "loss": 0.0093, "step": 20240 }, { "action_loss": 0.008243859745562077, "epoch": 18.201438848920862, "step": 20240 }, { "epoch": 18.21043165467626, "grad_norm": 0.2542426884174347, "learning_rate": 7.905608351645044e-05, "loss": 0.0075, "step": 20250 }, { "action_loss": 0.009606101550161839, "epoch": 18.21043165467626, "step": 20250 }, { "epoch": 18.219424460431654, "grad_norm": 0.1648532748222351, "learning_rate": 7.90336520821274e-05, "loss": 0.0084, "step": 20260 }, { "action_loss": 0.01267912145704031, "epoch": 18.219424460431654, "step": 20260 }, { "epoch": 18.22841726618705, "grad_norm": 0.15935105085372925, "learning_rate": 7.901121182814746e-05, "loss": 0.007, "step": 20270 }, { "action_loss": 0.002878364874050021, "epoch": 18.22841726618705, "step": 20270 }, { "epoch": 18.237410071942445, "grad_norm": 0.20276951789855957, "learning_rate": 7.898876276132736e-05, "loss": 0.0096, "step": 20280 }, { "action_loss": 0.004663757514208555, "epoch": 18.237410071942445, "step": 20280 }, { "epoch": 18.246402877697843, "grad_norm": 0.12789423763751984, "learning_rate": 7.896630488848654e-05, "loss": 0.0072, "step": 20290 }, { "action_loss": 0.010604034177958965, "epoch": 18.246402877697843, "step": 20290 }, { "epoch": 18.255395683453237, "grad_norm": 0.2513076663017273, "learning_rate": 7.89438382164471e-05, "loss": 0.008, "step": 20300 }, { "action_loss": 0.008899644017219543, "epoch": 18.255395683453237, "step": 20300 }, { "epoch": 18.264388489208635, "grad_norm": 0.23370042443275452, "learning_rate": 7.892136275203383e-05, "loss": 0.0074, "step": 20310 }, { "action_loss": 0.003585333237424493, "epoch": 18.264388489208635, "step": 20310 }, { "epoch": 18.27338129496403, "grad_norm": 0.23704206943511963, "learning_rate": 7.889887850207418e-05, "loss": 0.0088, "step": 20320 }, { "action_loss": 0.01834457740187645, "epoch": 18.27338129496403, "step": 20320 }, { "epoch": 18.282374100719423, "grad_norm": 0.2448732703924179, "learning_rate": 7.887638547339827e-05, "loss": 0.011, "step": 20330 }, { "action_loss": 0.010140648111701012, "epoch": 18.282374100719423, "step": 20330 }, { "epoch": 18.29136690647482, "grad_norm": 0.1795477569103241, "learning_rate": 7.885388367283891e-05, "loss": 0.0082, "step": 20340 }, { "action_loss": 0.006890670862048864, "epoch": 18.29136690647482, "step": 20340 }, { "epoch": 18.300359712230215, "grad_norm": 0.13223864138126373, "learning_rate": 7.88313731072315e-05, "loss": 0.0075, "step": 20350 }, { "action_loss": 0.0054402779787778854, "epoch": 18.300359712230215, "step": 20350 }, { "epoch": 18.309352517985612, "grad_norm": 0.24004212021827698, "learning_rate": 7.88088537834142e-05, "loss": 0.0088, "step": 20360 }, { "action_loss": 0.020507188513875008, "epoch": 18.309352517985612, "step": 20360 }, { "epoch": 18.318345323741006, "grad_norm": 0.17857296764850616, "learning_rate": 7.878632570822778e-05, "loss": 0.0085, "step": 20370 }, { "action_loss": 0.017209799960255623, "epoch": 18.318345323741006, "step": 20370 }, { "epoch": 18.327338129496404, "grad_norm": 0.247942253947258, "learning_rate": 7.876378888851567e-05, "loss": 0.0089, "step": 20380 }, { "action_loss": 0.011834275908768177, "epoch": 18.327338129496404, "step": 20380 }, { "epoch": 18.336330935251798, "grad_norm": 0.21836751699447632, "learning_rate": 7.874124333112396e-05, "loss": 0.01, "step": 20390 }, { "action_loss": 0.020254777744412422, "epoch": 18.336330935251798, "step": 20390 }, { "epoch": 18.345323741007196, "grad_norm": 0.19831669330596924, "learning_rate": 7.871868904290138e-05, "loss": 0.0086, "step": 20400 }, { "action_loss": 0.022091900929808617, "epoch": 18.345323741007196, "step": 20400 }, { "epoch": 18.35431654676259, "grad_norm": 0.19460846483707428, "learning_rate": 7.869612603069935e-05, "loss": 0.0111, "step": 20410 }, { "action_loss": 0.007521296385675669, "epoch": 18.35431654676259, "step": 20410 }, { "epoch": 18.363309352517987, "grad_norm": 0.20090964436531067, "learning_rate": 7.867355430137192e-05, "loss": 0.0087, "step": 20420 }, { "action_loss": 0.006470280233770609, "epoch": 18.363309352517987, "step": 20420 }, { "epoch": 18.37230215827338, "grad_norm": 0.3029523193836212, "learning_rate": 7.865097386177577e-05, "loss": 0.0075, "step": 20430 }, { "action_loss": 0.02553418278694153, "epoch": 18.37230215827338, "step": 20430 }, { "epoch": 18.381294964028775, "grad_norm": 0.20197583734989166, "learning_rate": 7.862838471877023e-05, "loss": 0.0103, "step": 20440 }, { "action_loss": 0.019915951415896416, "epoch": 18.381294964028775, "step": 20440 }, { "epoch": 18.390287769784173, "grad_norm": 0.1561577320098877, "learning_rate": 7.860578687921731e-05, "loss": 0.0109, "step": 20450 }, { "action_loss": 0.011482243426144123, "epoch": 18.390287769784173, "step": 20450 }, { "epoch": 18.399280575539567, "grad_norm": 0.2271159291267395, "learning_rate": 7.858318034998164e-05, "loss": 0.0095, "step": 20460 }, { "action_loss": 0.007799225393682718, "epoch": 18.399280575539567, "step": 20460 }, { "epoch": 18.408273381294965, "grad_norm": 0.1865697205066681, "learning_rate": 7.856056513793046e-05, "loss": 0.0071, "step": 20470 }, { "action_loss": 0.00889469776302576, "epoch": 18.408273381294965, "step": 20470 }, { "epoch": 18.41726618705036, "grad_norm": 0.147688090801239, "learning_rate": 7.85379412499337e-05, "loss": 0.0086, "step": 20480 }, { "action_loss": 0.007767139468342066, "epoch": 18.41726618705036, "step": 20480 }, { "epoch": 18.426258992805757, "grad_norm": 0.20295217633247375, "learning_rate": 7.851530869286389e-05, "loss": 0.0076, "step": 20490 }, { "action_loss": 0.0032030094880610704, "epoch": 18.426258992805757, "step": 20490 }, { "epoch": 18.43525179856115, "grad_norm": 0.24887016415596008, "learning_rate": 7.849266747359619e-05, "loss": 0.0077, "step": 20500 }, { "action_loss": 0.002162693068385124, "epoch": 18.43525179856115, "step": 20500 }, { "epoch": 18.444244604316548, "grad_norm": 0.2418728619813919, "learning_rate": 7.847001759900843e-05, "loss": 0.0059, "step": 20510 }, { "action_loss": 0.003987821284681559, "epoch": 18.444244604316548, "step": 20510 }, { "epoch": 18.453237410071942, "grad_norm": 0.17885825037956238, "learning_rate": 7.844735907598102e-05, "loss": 0.0129, "step": 20520 }, { "action_loss": 0.027897438034415245, "epoch": 18.453237410071942, "step": 20520 }, { "epoch": 18.46223021582734, "grad_norm": 0.11752184480428696, "learning_rate": 7.842469191139703e-05, "loss": 0.0079, "step": 20530 }, { "action_loss": 0.003605502424761653, "epoch": 18.46223021582734, "step": 20530 }, { "epoch": 18.471223021582734, "grad_norm": 0.13788571953773499, "learning_rate": 7.840201611214215e-05, "loss": 0.0082, "step": 20540 }, { "action_loss": 0.015346932224929333, "epoch": 18.471223021582734, "step": 20540 }, { "epoch": 18.480215827338128, "grad_norm": 0.169762521982193, "learning_rate": 7.837933168510469e-05, "loss": 0.0126, "step": 20550 }, { "action_loss": 0.004756185691803694, "epoch": 18.480215827338128, "step": 20550 }, { "epoch": 18.489208633093526, "grad_norm": 0.2631162703037262, "learning_rate": 7.835663863717559e-05, "loss": 0.0103, "step": 20560 }, { "action_loss": 0.013165662996470928, "epoch": 18.489208633093526, "step": 20560 }, { "epoch": 18.49820143884892, "grad_norm": 0.24662083387374878, "learning_rate": 7.833393697524838e-05, "loss": 0.0131, "step": 20570 }, { "action_loss": 0.003745561232790351, "epoch": 18.49820143884892, "step": 20570 }, { "epoch": 18.507194244604317, "grad_norm": 0.3417291045188904, "learning_rate": 7.831122670621922e-05, "loss": 0.0097, "step": 20580 }, { "action_loss": 0.005051580723375082, "epoch": 18.507194244604317, "step": 20580 }, { "epoch": 18.51618705035971, "grad_norm": 0.1898583173751831, "learning_rate": 7.82885078369869e-05, "loss": 0.0096, "step": 20590 }, { "action_loss": 0.005452748853713274, "epoch": 18.51618705035971, "step": 20590 }, { "epoch": 18.52517985611511, "grad_norm": 0.2339041531085968, "learning_rate": 7.826578037445283e-05, "loss": 0.0087, "step": 20600 }, { "action_loss": 0.0068887933157384396, "epoch": 18.52517985611511, "step": 20600 }, { "epoch": 18.534172661870503, "grad_norm": 0.18606799840927124, "learning_rate": 7.824304432552097e-05, "loss": 0.0065, "step": 20610 }, { "action_loss": 0.011911869049072266, "epoch": 18.534172661870503, "step": 20610 }, { "epoch": 18.5431654676259, "grad_norm": 0.15503238141536713, "learning_rate": 7.822029969709798e-05, "loss": 0.0102, "step": 20620 }, { "action_loss": 0.008727394975721836, "epoch": 18.5431654676259, "step": 20620 }, { "epoch": 18.552158273381295, "grad_norm": 0.14382612705230713, "learning_rate": 7.819754649609306e-05, "loss": 0.0128, "step": 20630 }, { "action_loss": 0.0051151118241250515, "epoch": 18.552158273381295, "step": 20630 }, { "epoch": 18.56115107913669, "grad_norm": 0.1330321580171585, "learning_rate": 7.817478472941802e-05, "loss": 0.0091, "step": 20640 }, { "action_loss": 0.004097567871212959, "epoch": 18.56115107913669, "step": 20640 }, { "epoch": 18.570143884892087, "grad_norm": 0.1679443120956421, "learning_rate": 7.815201440398727e-05, "loss": 0.0055, "step": 20650 }, { "action_loss": 0.021587073802947998, "epoch": 18.570143884892087, "step": 20650 }, { "epoch": 18.57913669064748, "grad_norm": 0.215484157204628, "learning_rate": 7.812923552671789e-05, "loss": 0.0099, "step": 20660 }, { "action_loss": 0.006297626998275518, "epoch": 18.57913669064748, "step": 20660 }, { "epoch": 18.58812949640288, "grad_norm": 0.21035686135292053, "learning_rate": 7.810644810452945e-05, "loss": 0.0066, "step": 20670 }, { "action_loss": 0.006686761509627104, "epoch": 18.58812949640288, "step": 20670 }, { "epoch": 18.597122302158272, "grad_norm": 0.2512339651584625, "learning_rate": 7.808365214434417e-05, "loss": 0.0082, "step": 20680 }, { "action_loss": 0.005928585771471262, "epoch": 18.597122302158272, "step": 20680 }, { "epoch": 18.60611510791367, "grad_norm": 0.20226852595806122, "learning_rate": 7.80608476530869e-05, "loss": 0.0088, "step": 20690 }, { "action_loss": 0.004063164349645376, "epoch": 18.60611510791367, "step": 20690 }, { "epoch": 18.615107913669064, "grad_norm": 0.15333811938762665, "learning_rate": 7.8038034637685e-05, "loss": 0.0077, "step": 20700 }, { "action_loss": 0.012052484788000584, "epoch": 18.615107913669064, "step": 20700 }, { "epoch": 18.62410071942446, "grad_norm": 0.16526183485984802, "learning_rate": 7.801521310506848e-05, "loss": 0.0059, "step": 20710 }, { "action_loss": 0.006713879760354757, "epoch": 18.62410071942446, "step": 20710 }, { "epoch": 18.633093525179856, "grad_norm": 0.17503729462623596, "learning_rate": 7.799238306216994e-05, "loss": 0.0053, "step": 20720 }, { "action_loss": 0.011450417339801788, "epoch": 18.633093525179856, "step": 20720 }, { "epoch": 18.642086330935253, "grad_norm": 0.16134242713451385, "learning_rate": 7.796954451592448e-05, "loss": 0.0106, "step": 20730 }, { "action_loss": 0.003997180610895157, "epoch": 18.642086330935253, "step": 20730 }, { "epoch": 18.651079136690647, "grad_norm": 0.24736341834068298, "learning_rate": 7.794669747326992e-05, "loss": 0.0098, "step": 20740 }, { "action_loss": 0.00632501719519496, "epoch": 18.651079136690647, "step": 20740 }, { "epoch": 18.66007194244604, "grad_norm": 0.19312727451324463, "learning_rate": 7.792384194114654e-05, "loss": 0.0072, "step": 20750 }, { "action_loss": 0.008962146006524563, "epoch": 18.66007194244604, "step": 20750 }, { "epoch": 18.66906474820144, "grad_norm": 0.23261593282222748, "learning_rate": 7.790097792649729e-05, "loss": 0.01, "step": 20760 }, { "action_loss": 0.02198917232453823, "epoch": 18.66906474820144, "step": 20760 }, { "epoch": 18.678057553956833, "grad_norm": 0.17243137955665588, "learning_rate": 7.787810543626762e-05, "loss": 0.0077, "step": 20770 }, { "action_loss": 0.01268542930483818, "epoch": 18.678057553956833, "step": 20770 }, { "epoch": 18.68705035971223, "grad_norm": 0.2032301276922226, "learning_rate": 7.785522447740558e-05, "loss": 0.0067, "step": 20780 }, { "action_loss": 0.006179291754961014, "epoch": 18.68705035971223, "step": 20780 }, { "epoch": 18.696043165467625, "grad_norm": 0.21367786824703217, "learning_rate": 7.783233505686182e-05, "loss": 0.0137, "step": 20790 }, { "action_loss": 0.007095439359545708, "epoch": 18.696043165467625, "step": 20790 }, { "epoch": 18.705035971223023, "grad_norm": 0.14992262423038483, "learning_rate": 7.780943718158955e-05, "loss": 0.0065, "step": 20800 }, { "action_loss": 0.00481870211660862, "epoch": 18.705035971223023, "step": 20800 }, { "epoch": 18.714028776978417, "grad_norm": 0.18624518811702728, "learning_rate": 7.778653085854453e-05, "loss": 0.0099, "step": 20810 }, { "action_loss": 0.004388011526316404, "epoch": 18.714028776978417, "step": 20810 }, { "epoch": 18.723021582733814, "grad_norm": 0.16071774065494537, "learning_rate": 7.77636160946851e-05, "loss": 0.0062, "step": 20820 }, { "action_loss": 0.004619579296559095, "epoch": 18.723021582733814, "step": 20820 }, { "epoch": 18.73201438848921, "grad_norm": 0.15230391919612885, "learning_rate": 7.774069289697215e-05, "loss": 0.0094, "step": 20830 }, { "action_loss": 0.007358746603131294, "epoch": 18.73201438848921, "step": 20830 }, { "epoch": 18.741007194244606, "grad_norm": 0.11305782198905945, "learning_rate": 7.771776127236913e-05, "loss": 0.0067, "step": 20840 }, { "action_loss": 0.006906405091285706, "epoch": 18.741007194244606, "step": 20840 }, { "epoch": 18.75, "grad_norm": 0.15819956362247467, "learning_rate": 7.769482122784212e-05, "loss": 0.0064, "step": 20850 }, { "action_loss": 0.01442248746752739, "epoch": 18.75, "step": 20850 }, { "epoch": 18.758992805755394, "grad_norm": 0.2714841961860657, "learning_rate": 7.767187277035963e-05, "loss": 0.0091, "step": 20860 }, { "action_loss": 0.014238952659070492, "epoch": 18.758992805755394, "step": 20860 }, { "epoch": 18.76798561151079, "grad_norm": 0.2036842554807663, "learning_rate": 7.764891590689285e-05, "loss": 0.0096, "step": 20870 }, { "action_loss": 0.007357116788625717, "epoch": 18.76798561151079, "step": 20870 }, { "epoch": 18.776978417266186, "grad_norm": 0.16616401076316833, "learning_rate": 7.762595064441542e-05, "loss": 0.0088, "step": 20880 }, { "action_loss": 0.018369844183325768, "epoch": 18.776978417266186, "step": 20880 }, { "epoch": 18.785971223021583, "grad_norm": 0.19886158406734467, "learning_rate": 7.760297698990362e-05, "loss": 0.0083, "step": 20890 }, { "action_loss": 0.0047739422880113125, "epoch": 18.785971223021583, "step": 20890 }, { "epoch": 18.794964028776977, "grad_norm": 0.1847456395626068, "learning_rate": 7.757999495033623e-05, "loss": 0.0089, "step": 20900 }, { "action_loss": 0.005444550421088934, "epoch": 18.794964028776977, "step": 20900 }, { "epoch": 18.803956834532375, "grad_norm": 0.1727980524301529, "learning_rate": 7.755700453269456e-05, "loss": 0.0051, "step": 20910 }, { "action_loss": 0.005701408255845308, "epoch": 18.803956834532375, "step": 20910 }, { "epoch": 18.81294964028777, "grad_norm": 0.1726711243391037, "learning_rate": 7.753400574396254e-05, "loss": 0.0093, "step": 20920 }, { "action_loss": 0.005699180066585541, "epoch": 18.81294964028777, "step": 20920 }, { "epoch": 18.821942446043167, "grad_norm": 0.2733538746833801, "learning_rate": 7.751099859112655e-05, "loss": 0.0109, "step": 20930 }, { "action_loss": 0.013662486337125301, "epoch": 18.821942446043167, "step": 20930 }, { "epoch": 18.83093525179856, "grad_norm": 0.18407224118709564, "learning_rate": 7.748798308117557e-05, "loss": 0.0067, "step": 20940 }, { "action_loss": 0.005629505962133408, "epoch": 18.83093525179856, "step": 20940 }, { "epoch": 18.83992805755396, "grad_norm": 0.38910314440727234, "learning_rate": 7.746495922110112e-05, "loss": 0.0091, "step": 20950 }, { "action_loss": 0.005844546016305685, "epoch": 18.83992805755396, "step": 20950 }, { "epoch": 18.848920863309353, "grad_norm": 0.20589077472686768, "learning_rate": 7.744192701789723e-05, "loss": 0.0073, "step": 20960 }, { "action_loss": 0.0047689336352050304, "epoch": 18.848920863309353, "step": 20960 }, { "epoch": 18.857913669064747, "grad_norm": 0.17878605425357819, "learning_rate": 7.741888647856046e-05, "loss": 0.008, "step": 20970 }, { "action_loss": 0.009863133542239666, "epoch": 18.857913669064747, "step": 20970 }, { "epoch": 18.866906474820144, "grad_norm": 0.1763806790113449, "learning_rate": 7.739583761008994e-05, "loss": 0.0079, "step": 20980 }, { "action_loss": 0.02046710066497326, "epoch": 18.866906474820144, "step": 20980 }, { "epoch": 18.87589928057554, "grad_norm": 0.21128767728805542, "learning_rate": 7.73727804194873e-05, "loss": 0.0074, "step": 20990 }, { "action_loss": 0.005824835505336523, "epoch": 18.87589928057554, "step": 20990 }, { "epoch": 18.884892086330936, "grad_norm": 0.1995343565940857, "learning_rate": 7.734971491375671e-05, "loss": 0.0068, "step": 21000 }, { "action_loss": 0.0026492441538721323, "epoch": 18.884892086330936, "step": 21000 }, { "epoch": 18.89388489208633, "grad_norm": 0.15942250192165375, "learning_rate": 7.732664109990485e-05, "loss": 0.0078, "step": 21010 }, { "action_loss": 0.009411497972905636, "epoch": 18.89388489208633, "step": 21010 }, { "epoch": 18.902877697841728, "grad_norm": 0.25668880343437195, "learning_rate": 7.730355898494095e-05, "loss": 0.0148, "step": 21020 }, { "action_loss": 0.012968994677066803, "epoch": 18.902877697841728, "step": 21020 }, { "epoch": 18.91187050359712, "grad_norm": 0.20967046916484833, "learning_rate": 7.728046857587673e-05, "loss": 0.0129, "step": 21030 }, { "action_loss": 0.007493140641599894, "epoch": 18.91187050359712, "step": 21030 }, { "epoch": 18.92086330935252, "grad_norm": 0.25970861315727234, "learning_rate": 7.725736987972647e-05, "loss": 0.0089, "step": 21040 }, { "action_loss": 0.01165104191750288, "epoch": 18.92086330935252, "step": 21040 }, { "epoch": 18.929856115107913, "grad_norm": 0.26779571175575256, "learning_rate": 7.723426290350691e-05, "loss": 0.0106, "step": 21050 }, { "action_loss": 0.012318949215114117, "epoch": 18.929856115107913, "step": 21050 }, { "epoch": 18.93884892086331, "grad_norm": 0.16862963140010834, "learning_rate": 7.721114765423736e-05, "loss": 0.009, "step": 21060 }, { "action_loss": 0.00603319751098752, "epoch": 18.93884892086331, "step": 21060 }, { "epoch": 18.947841726618705, "grad_norm": 0.17681342363357544, "learning_rate": 7.718802413893963e-05, "loss": 0.0073, "step": 21070 }, { "action_loss": 0.007388132158666849, "epoch": 18.947841726618705, "step": 21070 }, { "epoch": 18.9568345323741, "grad_norm": 0.18666793406009674, "learning_rate": 7.716489236463802e-05, "loss": 0.0075, "step": 21080 }, { "action_loss": 0.011021911166608334, "epoch": 18.9568345323741, "step": 21080 }, { "epoch": 18.965827338129497, "grad_norm": 0.21403300762176514, "learning_rate": 7.714175233835936e-05, "loss": 0.0069, "step": 21090 }, { "action_loss": 0.0034966059029102325, "epoch": 18.965827338129497, "step": 21090 }, { "epoch": 18.97482014388489, "grad_norm": 0.14463497698307037, "learning_rate": 7.711860406713299e-05, "loss": 0.0074, "step": 21100 }, { "action_loss": 0.007290974259376526, "epoch": 18.97482014388489, "step": 21100 }, { "epoch": 18.98381294964029, "grad_norm": 0.13392770290374756, "learning_rate": 7.70954475579907e-05, "loss": 0.0077, "step": 21110 }, { "action_loss": 0.003756424644961953, "epoch": 18.98381294964029, "step": 21110 }, { "epoch": 18.992805755395683, "grad_norm": 0.21205170452594757, "learning_rate": 7.707228281796688e-05, "loss": 0.0087, "step": 21120 }, { "action_loss": 0.0028859165031462908, "epoch": 18.992805755395683, "step": 21120 }, { "epoch": 19.00179856115108, "grad_norm": 0.2655096650123596, "learning_rate": 7.704910985409833e-05, "loss": 0.0079, "step": 21130 }, { "action_loss": 0.007244945969432592, "epoch": 19.00179856115108, "step": 21130 }, { "epoch": 19.010791366906474, "grad_norm": 0.17650999128818512, "learning_rate": 7.702592867342439e-05, "loss": 0.0089, "step": 21140 }, { "action_loss": 0.007643180433660746, "epoch": 19.010791366906474, "step": 21140 }, { "epoch": 19.019784172661872, "grad_norm": 0.14501003921031952, "learning_rate": 7.700273928298691e-05, "loss": 0.0067, "step": 21150 }, { "action_loss": 0.005257011856883764, "epoch": 19.019784172661872, "step": 21150 }, { "epoch": 19.028776978417266, "grad_norm": 0.3272881507873535, "learning_rate": 7.697954168983021e-05, "loss": 0.0076, "step": 21160 }, { "action_loss": 0.004716851282864809, "epoch": 19.028776978417266, "step": 21160 }, { "epoch": 19.037769784172664, "grad_norm": 0.26269280910491943, "learning_rate": 7.695633590100109e-05, "loss": 0.0092, "step": 21170 }, { "action_loss": 0.0052789063192903996, "epoch": 19.037769784172664, "step": 21170 }, { "epoch": 19.046762589928058, "grad_norm": 0.22928482294082642, "learning_rate": 7.693312192354886e-05, "loss": 0.0088, "step": 21180 }, { "action_loss": 0.007982605136930943, "epoch": 19.046762589928058, "step": 21180 }, { "epoch": 19.055755395683452, "grad_norm": 0.13137094676494598, "learning_rate": 7.690989976452532e-05, "loss": 0.0069, "step": 21190 }, { "action_loss": 0.007149322424083948, "epoch": 19.055755395683452, "step": 21190 }, { "epoch": 19.06474820143885, "grad_norm": 0.2561053931713104, "learning_rate": 7.688666943098475e-05, "loss": 0.0081, "step": 21200 }, { "action_loss": 0.006886918563395739, "epoch": 19.06474820143885, "step": 21200 }, { "epoch": 19.073741007194243, "grad_norm": 0.10856591910123825, "learning_rate": 7.686343092998389e-05, "loss": 0.0064, "step": 21210 }, { "action_loss": 0.004704066086560488, "epoch": 19.073741007194243, "step": 21210 }, { "epoch": 19.08273381294964, "grad_norm": 0.21657340228557587, "learning_rate": 7.684018426858202e-05, "loss": 0.0087, "step": 21220 }, { "action_loss": 0.004533433821052313, "epoch": 19.08273381294964, "step": 21220 }, { "epoch": 19.091726618705035, "grad_norm": 0.2413180023431778, "learning_rate": 7.681692945384084e-05, "loss": 0.0094, "step": 21230 }, { "action_loss": 0.005128027405589819, "epoch": 19.091726618705035, "step": 21230 }, { "epoch": 19.100719424460433, "grad_norm": 0.16075220704078674, "learning_rate": 7.679366649282456e-05, "loss": 0.0062, "step": 21240 }, { "action_loss": 0.012836617417633533, "epoch": 19.100719424460433, "step": 21240 }, { "epoch": 19.109712230215827, "grad_norm": 0.19779072701931, "learning_rate": 7.677039539259983e-05, "loss": 0.0087, "step": 21250 }, { "action_loss": 0.0076679750345647335, "epoch": 19.109712230215827, "step": 21250 }, { "epoch": 19.118705035971225, "grad_norm": 0.19537582993507385, "learning_rate": 7.674711616023581e-05, "loss": 0.0079, "step": 21260 }, { "action_loss": 0.013349774293601513, "epoch": 19.118705035971225, "step": 21260 }, { "epoch": 19.12769784172662, "grad_norm": 0.22264908254146576, "learning_rate": 7.672382880280413e-05, "loss": 0.0084, "step": 21270 }, { "action_loss": 0.0065826233476400375, "epoch": 19.12769784172662, "step": 21270 }, { "epoch": 19.136690647482013, "grad_norm": 0.22084854543209076, "learning_rate": 7.670053332737885e-05, "loss": 0.0076, "step": 21280 }, { "action_loss": 0.00286172516644001, "epoch": 19.136690647482013, "step": 21280 }, { "epoch": 19.14568345323741, "grad_norm": 0.2593780755996704, "learning_rate": 7.667722974103654e-05, "loss": 0.0063, "step": 21290 }, { "action_loss": 0.002874700352549553, "epoch": 19.14568345323741, "step": 21290 }, { "epoch": 19.154676258992804, "grad_norm": 0.18743440508842468, "learning_rate": 7.66539180508562e-05, "loss": 0.0077, "step": 21300 }, { "action_loss": 0.006059533450752497, "epoch": 19.154676258992804, "step": 21300 }, { "epoch": 19.163669064748202, "grad_norm": 0.21506164968013763, "learning_rate": 7.663059826391932e-05, "loss": 0.0107, "step": 21310 }, { "action_loss": 0.0057852123863995075, "epoch": 19.163669064748202, "step": 21310 }, { "epoch": 19.172661870503596, "grad_norm": 0.1760522723197937, "learning_rate": 7.660727038730981e-05, "loss": 0.0085, "step": 21320 }, { "action_loss": 0.017958739772439003, "epoch": 19.172661870503596, "step": 21320 }, { "epoch": 19.181654676258994, "grad_norm": 0.20419539511203766, "learning_rate": 7.65839344281141e-05, "loss": 0.0084, "step": 21330 }, { "action_loss": 0.005917006637901068, "epoch": 19.181654676258994, "step": 21330 }, { "epoch": 19.190647482014388, "grad_norm": 0.2732223570346832, "learning_rate": 7.656059039342101e-05, "loss": 0.0065, "step": 21340 }, { "action_loss": 0.009301885962486267, "epoch": 19.190647482014388, "step": 21340 }, { "epoch": 19.199640287769785, "grad_norm": 0.20364487171173096, "learning_rate": 7.653723829032187e-05, "loss": 0.0098, "step": 21350 }, { "action_loss": 0.004452172666788101, "epoch": 19.199640287769785, "step": 21350 }, { "epoch": 19.20863309352518, "grad_norm": 0.2183813899755478, "learning_rate": 7.65138781259104e-05, "loss": 0.0139, "step": 21360 }, { "action_loss": 0.005985850468277931, "epoch": 19.20863309352518, "step": 21360 }, { "epoch": 19.217625899280577, "grad_norm": 0.1968453824520111, "learning_rate": 7.649050990728279e-05, "loss": 0.0063, "step": 21370 }, { "action_loss": 0.0052959248423576355, "epoch": 19.217625899280577, "step": 21370 }, { "epoch": 19.22661870503597, "grad_norm": 0.2485559731721878, "learning_rate": 7.646713364153774e-05, "loss": 0.0088, "step": 21380 }, { "action_loss": 0.004395440686494112, "epoch": 19.22661870503597, "step": 21380 }, { "epoch": 19.235611510791365, "grad_norm": 0.16294004023075104, "learning_rate": 7.64437493357763e-05, "loss": 0.0075, "step": 21390 }, { "action_loss": 0.006098985206335783, "epoch": 19.235611510791365, "step": 21390 }, { "epoch": 19.244604316546763, "grad_norm": 0.24896365404129028, "learning_rate": 7.642035699710202e-05, "loss": 0.0088, "step": 21400 }, { "action_loss": 0.005133091937750578, "epoch": 19.244604316546763, "step": 21400 }, { "epoch": 19.253597122302157, "grad_norm": 0.1846647411584854, "learning_rate": 7.639695663262089e-05, "loss": 0.007, "step": 21410 }, { "action_loss": 0.01810459978878498, "epoch": 19.253597122302157, "step": 21410 }, { "epoch": 19.262589928057555, "grad_norm": 0.19828101992607117, "learning_rate": 7.637354824944128e-05, "loss": 0.0159, "step": 21420 }, { "action_loss": 0.00805431604385376, "epoch": 19.262589928057555, "step": 21420 }, { "epoch": 19.27158273381295, "grad_norm": 0.17002786695957184, "learning_rate": 7.635013185467408e-05, "loss": 0.0078, "step": 21430 }, { "action_loss": 0.005147634074091911, "epoch": 19.27158273381295, "step": 21430 }, { "epoch": 19.280575539568346, "grad_norm": 0.13336551189422607, "learning_rate": 7.632670745543256e-05, "loss": 0.0072, "step": 21440 }, { "action_loss": 0.004882786888629198, "epoch": 19.280575539568346, "step": 21440 }, { "epoch": 19.28956834532374, "grad_norm": 0.2255191206932068, "learning_rate": 7.630327505883242e-05, "loss": 0.0093, "step": 21450 }, { "action_loss": 0.002694790018722415, "epoch": 19.28956834532374, "step": 21450 }, { "epoch": 19.298561151079138, "grad_norm": 0.20609109103679657, "learning_rate": 7.627983467199182e-05, "loss": 0.0059, "step": 21460 }, { "action_loss": 0.00896462146192789, "epoch": 19.298561151079138, "step": 21460 }, { "epoch": 19.307553956834532, "grad_norm": 0.19809351861476898, "learning_rate": 7.625638630203132e-05, "loss": 0.0112, "step": 21470 }, { "action_loss": 0.007802131120115519, "epoch": 19.307553956834532, "step": 21470 }, { "epoch": 19.31654676258993, "grad_norm": 0.21316871047019958, "learning_rate": 7.623292995607394e-05, "loss": 0.009, "step": 21480 }, { "action_loss": 0.011664905585348606, "epoch": 19.31654676258993, "step": 21480 }, { "epoch": 19.325539568345324, "grad_norm": 0.23708242177963257, "learning_rate": 7.620946564124507e-05, "loss": 0.0093, "step": 21490 }, { "action_loss": 0.008555297739803791, "epoch": 19.325539568345324, "step": 21490 }, { "epoch": 19.334532374100718, "grad_norm": 0.3030557930469513, "learning_rate": 7.618599336467256e-05, "loss": 0.0083, "step": 21500 }, { "action_loss": 0.010437190532684326, "epoch": 19.334532374100718, "step": 21500 }, { "epoch": 19.343525179856115, "grad_norm": 0.24367433786392212, "learning_rate": 7.616251313348666e-05, "loss": 0.0082, "step": 21510 }, { "action_loss": 0.010902986861765385, "epoch": 19.343525179856115, "step": 21510 }, { "epoch": 19.35251798561151, "grad_norm": 0.2771756947040558, "learning_rate": 7.613902495482005e-05, "loss": 0.0108, "step": 21520 }, { "action_loss": 0.006596747320145369, "epoch": 19.35251798561151, "step": 21520 }, { "epoch": 19.361510791366907, "grad_norm": 0.24327388405799866, "learning_rate": 7.611552883580784e-05, "loss": 0.0075, "step": 21530 }, { "action_loss": 0.006108853965997696, "epoch": 19.361510791366907, "step": 21530 }, { "epoch": 19.3705035971223, "grad_norm": 0.12194714695215225, "learning_rate": 7.609202478358748e-05, "loss": 0.0077, "step": 21540 }, { "action_loss": 0.010077365674078465, "epoch": 19.3705035971223, "step": 21540 }, { "epoch": 19.3794964028777, "grad_norm": 0.23517577350139618, "learning_rate": 7.606851280529895e-05, "loss": 0.0083, "step": 21550 }, { "action_loss": 0.005222307052463293, "epoch": 19.3794964028777, "step": 21550 }, { "epoch": 19.388489208633093, "grad_norm": 0.1471744030714035, "learning_rate": 7.604499290808449e-05, "loss": 0.0071, "step": 21560 }, { "action_loss": 0.013375997543334961, "epoch": 19.388489208633093, "step": 21560 }, { "epoch": 19.39748201438849, "grad_norm": 0.2222723513841629, "learning_rate": 7.602146509908888e-05, "loss": 0.0086, "step": 21570 }, { "action_loss": 0.0035063924733549356, "epoch": 19.39748201438849, "step": 21570 }, { "epoch": 19.406474820143885, "grad_norm": 0.1525919884443283, "learning_rate": 7.599792938545921e-05, "loss": 0.0138, "step": 21580 }, { "action_loss": 0.008805987425148487, "epoch": 19.406474820143885, "step": 21580 }, { "epoch": 19.415467625899282, "grad_norm": 0.23246540129184723, "learning_rate": 7.597438577434506e-05, "loss": 0.0085, "step": 21590 }, { "action_loss": 0.009694830514490604, "epoch": 19.415467625899282, "step": 21590 }, { "epoch": 19.424460431654676, "grad_norm": 0.2138703614473343, "learning_rate": 7.595083427289831e-05, "loss": 0.0067, "step": 21600 }, { "action_loss": 0.0029326248914003372, "epoch": 19.424460431654676, "step": 21600 }, { "epoch": 19.43345323741007, "grad_norm": 0.12312845885753632, "learning_rate": 7.59272748882733e-05, "loss": 0.0061, "step": 21610 }, { "action_loss": 0.024205660447478294, "epoch": 19.43345323741007, "step": 21610 }, { "epoch": 19.442446043165468, "grad_norm": 0.0907752513885498, "learning_rate": 7.590370762762675e-05, "loss": 0.0086, "step": 21620 }, { "action_loss": 0.0034496020525693893, "epoch": 19.442446043165468, "step": 21620 }, { "epoch": 19.451438848920862, "grad_norm": 0.1753624975681305, "learning_rate": 7.588013249811777e-05, "loss": 0.012, "step": 21630 }, { "action_loss": 0.00958588533103466, "epoch": 19.451438848920862, "step": 21630 }, { "epoch": 19.46043165467626, "grad_norm": 0.20570842921733856, "learning_rate": 7.585654950690786e-05, "loss": 0.0072, "step": 21640 }, { "action_loss": 0.006258545909076929, "epoch": 19.46043165467626, "step": 21640 }, { "epoch": 19.469424460431654, "grad_norm": 0.24902881681919098, "learning_rate": 7.583295866116091e-05, "loss": 0.0097, "step": 21650 }, { "action_loss": 0.010253609158098698, "epoch": 19.469424460431654, "step": 21650 }, { "epoch": 19.47841726618705, "grad_norm": 0.20655323565006256, "learning_rate": 7.580935996804321e-05, "loss": 0.0108, "step": 21660 }, { "action_loss": 0.009466725401580334, "epoch": 19.47841726618705, "step": 21660 }, { "epoch": 19.487410071942445, "grad_norm": 0.14199502766132355, "learning_rate": 7.57857534347234e-05, "loss": 0.0084, "step": 21670 }, { "action_loss": 0.004184072837233543, "epoch": 19.487410071942445, "step": 21670 }, { "epoch": 19.496402877697843, "grad_norm": 0.1957370787858963, "learning_rate": 7.576213906837254e-05, "loss": 0.0066, "step": 21680 }, { "action_loss": 0.0185465719550848, "epoch": 19.496402877697843, "step": 21680 }, { "epoch": 19.505395683453237, "grad_norm": 0.18445183336734772, "learning_rate": 7.573851687616403e-05, "loss": 0.0089, "step": 21690 }, { "action_loss": 0.01041702926158905, "epoch": 19.505395683453237, "step": 21690 }, { "epoch": 19.514388489208635, "grad_norm": 0.15471501648426056, "learning_rate": 7.571488686527368e-05, "loss": 0.0058, "step": 21700 }, { "action_loss": 0.004746763035655022, "epoch": 19.514388489208635, "step": 21700 }, { "epoch": 19.52338129496403, "grad_norm": 0.14041778445243835, "learning_rate": 7.569124904287968e-05, "loss": 0.0066, "step": 21710 }, { "action_loss": 0.0067334361374378204, "epoch": 19.52338129496403, "step": 21710 }, { "epoch": 19.532374100719423, "grad_norm": 0.14816594123840332, "learning_rate": 7.566760341616254e-05, "loss": 0.0094, "step": 21720 }, { "action_loss": 0.00351141020655632, "epoch": 19.532374100719423, "step": 21720 }, { "epoch": 19.54136690647482, "grad_norm": 0.2905491888523102, "learning_rate": 7.564394999230519e-05, "loss": 0.0088, "step": 21730 }, { "action_loss": 0.008186480961740017, "epoch": 19.54136690647482, "step": 21730 }, { "epoch": 19.550359712230215, "grad_norm": 0.15073134005069733, "learning_rate": 7.562028877849294e-05, "loss": 0.0056, "step": 21740 }, { "action_loss": 0.007572650909423828, "epoch": 19.550359712230215, "step": 21740 }, { "epoch": 19.559352517985612, "grad_norm": 0.20557065308094025, "learning_rate": 7.559661978191341e-05, "loss": 0.0082, "step": 21750 }, { "action_loss": 0.00941926147788763, "epoch": 19.559352517985612, "step": 21750 }, { "epoch": 19.568345323741006, "grad_norm": 0.20909352600574493, "learning_rate": 7.557294300975664e-05, "loss": 0.0081, "step": 21760 }, { "action_loss": 0.005279388278722763, "epoch": 19.568345323741006, "step": 21760 }, { "epoch": 19.577338129496404, "grad_norm": 0.17101126909255981, "learning_rate": 7.554925846921499e-05, "loss": 0.0083, "step": 21770 }, { "action_loss": 0.0028251137118786573, "epoch": 19.577338129496404, "step": 21770 }, { "epoch": 19.586330935251798, "grad_norm": 0.4243297278881073, "learning_rate": 7.552556616748321e-05, "loss": 0.009, "step": 21780 }, { "action_loss": 0.008372331969439983, "epoch": 19.586330935251798, "step": 21780 }, { "epoch": 19.595323741007196, "grad_norm": 0.16207194328308105, "learning_rate": 7.550186611175838e-05, "loss": 0.0104, "step": 21790 }, { "action_loss": 0.006384681910276413, "epoch": 19.595323741007196, "step": 21790 }, { "epoch": 19.60431654676259, "grad_norm": 0.1669103503227234, "learning_rate": 7.547815830923998e-05, "loss": 0.0056, "step": 21800 }, { "action_loss": 0.00979629810899496, "epoch": 19.60431654676259, "step": 21800 }, { "epoch": 19.613309352517987, "grad_norm": 0.2999824285507202, "learning_rate": 7.54544427671298e-05, "loss": 0.0081, "step": 21810 }, { "action_loss": 0.02248980849981308, "epoch": 19.613309352517987, "step": 21810 }, { "epoch": 19.62230215827338, "grad_norm": 0.18277278542518616, "learning_rate": 7.543071949263198e-05, "loss": 0.0081, "step": 21820 }, { "action_loss": 0.005235469434410334, "epoch": 19.62230215827338, "step": 21820 }, { "epoch": 19.631294964028775, "grad_norm": 0.16710613667964935, "learning_rate": 7.540698849295305e-05, "loss": 0.0056, "step": 21830 }, { "action_loss": 0.0043526641093194485, "epoch": 19.631294964028775, "step": 21830 }, { "epoch": 19.640287769784173, "grad_norm": 0.1880234181880951, "learning_rate": 7.538324977530183e-05, "loss": 0.007, "step": 21840 }, { "action_loss": 0.01631828211247921, "epoch": 19.640287769784173, "step": 21840 }, { "epoch": 19.649280575539567, "grad_norm": 0.2531283497810364, "learning_rate": 7.535950334688955e-05, "loss": 0.0092, "step": 21850 }, { "action_loss": 0.0030680957715958357, "epoch": 19.649280575539567, "step": 21850 }, { "epoch": 19.658273381294965, "grad_norm": 0.18195706605911255, "learning_rate": 7.533574921492972e-05, "loss": 0.0076, "step": 21860 }, { "action_loss": 0.0030365053098648787, "epoch": 19.658273381294965, "step": 21860 }, { "epoch": 19.66726618705036, "grad_norm": 0.13976404070854187, "learning_rate": 7.531198738663824e-05, "loss": 0.0058, "step": 21870 }, { "action_loss": 0.0075400336645543575, "epoch": 19.66726618705036, "step": 21870 }, { "epoch": 19.676258992805757, "grad_norm": 0.21928493678569794, "learning_rate": 7.528821786923333e-05, "loss": 0.0068, "step": 21880 }, { "action_loss": 0.010702043771743774, "epoch": 19.676258992805757, "step": 21880 }, { "epoch": 19.68525179856115, "grad_norm": 0.29206258058547974, "learning_rate": 7.52644406699355e-05, "loss": 0.0094, "step": 21890 }, { "action_loss": 0.004339374136179686, "epoch": 19.68525179856115, "step": 21890 }, { "epoch": 19.694244604316548, "grad_norm": 0.23128336668014526, "learning_rate": 7.524065579596766e-05, "loss": 0.0067, "step": 21900 }, { "action_loss": 0.007012244313955307, "epoch": 19.694244604316548, "step": 21900 }, { "epoch": 19.703237410071942, "grad_norm": 0.1872025728225708, "learning_rate": 7.521686325455506e-05, "loss": 0.0067, "step": 21910 }, { "action_loss": 0.0064806812442839146, "epoch": 19.703237410071942, "step": 21910 }, { "epoch": 19.71223021582734, "grad_norm": 0.200097918510437, "learning_rate": 7.51930630529252e-05, "loss": 0.0083, "step": 21920 }, { "action_loss": 0.009184766560792923, "epoch": 19.71223021582734, "step": 21920 }, { "epoch": 19.721223021582734, "grad_norm": 0.2127733677625656, "learning_rate": 7.516925519830797e-05, "loss": 0.0078, "step": 21930 }, { "action_loss": 0.007219628896564245, "epoch": 19.721223021582734, "step": 21930 }, { "epoch": 19.730215827338128, "grad_norm": 0.21835938096046448, "learning_rate": 7.514543969793557e-05, "loss": 0.0086, "step": 21940 }, { "action_loss": 0.005481050815433264, "epoch": 19.730215827338128, "step": 21940 }, { "epoch": 19.739208633093526, "grad_norm": 0.18478375673294067, "learning_rate": 7.512161655904251e-05, "loss": 0.0106, "step": 21950 }, { "action_loss": 0.00283195823431015, "epoch": 19.739208633093526, "step": 21950 }, { "epoch": 19.74820143884892, "grad_norm": 0.21221265196800232, "learning_rate": 7.509778578886563e-05, "loss": 0.008, "step": 21960 }, { "action_loss": 0.0030982906464487314, "epoch": 19.74820143884892, "step": 21960 }, { "epoch": 19.757194244604317, "grad_norm": 0.1720249354839325, "learning_rate": 7.507394739464412e-05, "loss": 0.0078, "step": 21970 }, { "action_loss": 0.005197260063141584, "epoch": 19.757194244604317, "step": 21970 }, { "epoch": 19.76618705035971, "grad_norm": 0.18006108701229095, "learning_rate": 7.50501013836194e-05, "loss": 0.0117, "step": 21980 }, { "action_loss": 0.0020136612001806498, "epoch": 19.76618705035971, "step": 21980 }, { "epoch": 19.77517985611511, "grad_norm": 0.21758705377578735, "learning_rate": 7.50262477630353e-05, "loss": 0.0103, "step": 21990 }, { "action_loss": 0.01317024976015091, "epoch": 19.77517985611511, "step": 21990 }, { "epoch": 19.784172661870503, "grad_norm": 0.22143210470676422, "learning_rate": 7.500238654013794e-05, "loss": 0.0113, "step": 22000 }, { "action_loss": 0.004775590728968382, "epoch": 19.784172661870503, "step": 22000 }, { "epoch": 19.7931654676259, "grad_norm": 0.17564962804317474, "learning_rate": 7.497851772217566e-05, "loss": 0.0082, "step": 22010 }, { "action_loss": 0.007489632349461317, "epoch": 19.7931654676259, "step": 22010 }, { "epoch": 19.802158273381295, "grad_norm": 0.17814713716506958, "learning_rate": 7.495464131639924e-05, "loss": 0.0062, "step": 22020 }, { "action_loss": 0.0073175434954464436, "epoch": 19.802158273381295, "step": 22020 }, { "epoch": 19.81115107913669, "grad_norm": 0.2347571700811386, "learning_rate": 7.493075733006166e-05, "loss": 0.0084, "step": 22030 }, { "action_loss": 0.006015876308083534, "epoch": 19.81115107913669, "step": 22030 }, { "epoch": 19.820143884892087, "grad_norm": 0.24342626333236694, "learning_rate": 7.490686577041828e-05, "loss": 0.0089, "step": 22040 }, { "action_loss": 0.007913434877991676, "epoch": 19.820143884892087, "step": 22040 }, { "epoch": 19.82913669064748, "grad_norm": 0.19371949136257172, "learning_rate": 7.488296664472668e-05, "loss": 0.0127, "step": 22050 }, { "action_loss": 0.00852704606950283, "epoch": 19.82913669064748, "step": 22050 }, { "epoch": 19.83812949640288, "grad_norm": 0.1896480917930603, "learning_rate": 7.485905996024682e-05, "loss": 0.0077, "step": 22060 }, { "action_loss": 0.009158775210380554, "epoch": 19.83812949640288, "step": 22060 }, { "epoch": 19.847122302158272, "grad_norm": 0.23788173496723175, "learning_rate": 7.483514572424093e-05, "loss": 0.0068, "step": 22070 }, { "action_loss": 0.011253059841692448, "epoch": 19.847122302158272, "step": 22070 }, { "epoch": 19.85611510791367, "grad_norm": 0.11846936494112015, "learning_rate": 7.481122394397349e-05, "loss": 0.0075, "step": 22080 }, { "action_loss": 0.007100744638592005, "epoch": 19.85611510791367, "step": 22080 }, { "epoch": 19.865107913669064, "grad_norm": 0.10964561998844147, "learning_rate": 7.478729462671131e-05, "loss": 0.0063, "step": 22090 }, { "action_loss": 0.011941474862396717, "epoch": 19.865107913669064, "step": 22090 }, { "epoch": 19.87410071942446, "grad_norm": 0.24352210760116577, "learning_rate": 7.47633577797235e-05, "loss": 0.0105, "step": 22100 }, { "action_loss": 0.003386145457625389, "epoch": 19.87410071942446, "step": 22100 }, { "epoch": 19.883093525179856, "grad_norm": 0.18886688351631165, "learning_rate": 7.473941341028144e-05, "loss": 0.006, "step": 22110 }, { "action_loss": 0.003445591777563095, "epoch": 19.883093525179856, "step": 22110 }, { "epoch": 19.892086330935253, "grad_norm": 0.1503303349018097, "learning_rate": 7.471546152565879e-05, "loss": 0.0054, "step": 22120 }, { "action_loss": 0.011691411025822163, "epoch": 19.892086330935253, "step": 22120 }, { "epoch": 19.901079136690647, "grad_norm": 0.16096831858158112, "learning_rate": 7.46915021331315e-05, "loss": 0.011, "step": 22130 }, { "action_loss": 0.01738612912595272, "epoch": 19.901079136690647, "step": 22130 }, { "epoch": 19.91007194244604, "grad_norm": 0.22688831388950348, "learning_rate": 7.466753523997778e-05, "loss": 0.0093, "step": 22140 }, { "action_loss": 0.006079282145947218, "epoch": 19.91007194244604, "step": 22140 }, { "epoch": 19.91906474820144, "grad_norm": 0.1736181676387787, "learning_rate": 7.464356085347819e-05, "loss": 0.0088, "step": 22150 }, { "action_loss": 0.004955649375915527, "epoch": 19.91906474820144, "step": 22150 }, { "epoch": 19.928057553956833, "grad_norm": 0.18056143820285797, "learning_rate": 7.461957898091548e-05, "loss": 0.0084, "step": 22160 }, { "action_loss": 0.0030214665457606316, "epoch": 19.928057553956833, "step": 22160 }, { "epoch": 19.93705035971223, "grad_norm": 0.18832792341709137, "learning_rate": 7.459558962957473e-05, "loss": 0.0095, "step": 22170 }, { "action_loss": 0.004896229133009911, "epoch": 19.93705035971223, "step": 22170 }, { "epoch": 19.946043165467625, "grad_norm": 0.22117480635643005, "learning_rate": 7.457159280674326e-05, "loss": 0.0078, "step": 22180 }, { "action_loss": 0.005757184699177742, "epoch": 19.946043165467625, "step": 22180 }, { "epoch": 19.955035971223023, "grad_norm": 0.2187502086162567, "learning_rate": 7.454758851971066e-05, "loss": 0.0088, "step": 22190 }, { "action_loss": 0.009190305136144161, "epoch": 19.955035971223023, "step": 22190 }, { "epoch": 19.964028776978417, "grad_norm": 0.22806048393249512, "learning_rate": 7.45235767757688e-05, "loss": 0.0082, "step": 22200 }, { "action_loss": 0.030458247289061546, "epoch": 19.964028776978417, "step": 22200 }, { "epoch": 19.973021582733814, "grad_norm": 0.19102220237255096, "learning_rate": 7.449955758221183e-05, "loss": 0.0114, "step": 22210 }, { "action_loss": 0.01645147055387497, "epoch": 19.973021582733814, "step": 22210 }, { "epoch": 19.98201438848921, "grad_norm": 0.1548631638288498, "learning_rate": 7.447553094633615e-05, "loss": 0.0076, "step": 22220 }, { "action_loss": 0.007263150066137314, "epoch": 19.98201438848921, "step": 22220 }, { "epoch": 19.991007194244606, "grad_norm": 0.21480408310890198, "learning_rate": 7.445149687544039e-05, "loss": 0.0082, "step": 22230 }, { "action_loss": 0.012338069267570972, "epoch": 19.991007194244606, "step": 22230 }, { "epoch": 20.0, "grad_norm": 0.16842852532863617, "learning_rate": 7.44274553768255e-05, "loss": 0.0081, "step": 22240 }, { "action_loss": 0.0063732280395925045, "epoch": 20.0, "step": 22240 }, { "epoch": 20.008992805755394, "grad_norm": 0.16584336757659912, "learning_rate": 7.440340645779464e-05, "loss": 0.0074, "step": 22250 }, { "action_loss": 0.008919387124478817, "epoch": 20.008992805755394, "step": 22250 }, { "epoch": 20.01798561151079, "grad_norm": 0.14004014432430267, "learning_rate": 7.437935012565322e-05, "loss": 0.0083, "step": 22260 }, { "action_loss": 0.0030624030623584986, "epoch": 20.01798561151079, "step": 22260 }, { "epoch": 20.026978417266186, "grad_norm": 0.1489056497812271, "learning_rate": 7.435528638770893e-05, "loss": 0.0099, "step": 22270 }, { "action_loss": 0.004307681694626808, "epoch": 20.026978417266186, "step": 22270 }, { "epoch": 20.035971223021583, "grad_norm": 0.20055603981018066, "learning_rate": 7.433121525127171e-05, "loss": 0.0125, "step": 22280 }, { "action_loss": 0.019008323550224304, "epoch": 20.035971223021583, "step": 22280 }, { "epoch": 20.044964028776977, "grad_norm": 0.18555326759815216, "learning_rate": 7.430713672365371e-05, "loss": 0.0209, "step": 22290 }, { "action_loss": 0.005928193684667349, "epoch": 20.044964028776977, "step": 22290 }, { "epoch": 20.053956834532375, "grad_norm": 0.18647095561027527, "learning_rate": 7.428305081216938e-05, "loss": 0.0071, "step": 22300 }, { "action_loss": 0.00848074909299612, "epoch": 20.053956834532375, "step": 22300 }, { "epoch": 20.06294964028777, "grad_norm": 0.1998233199119568, "learning_rate": 7.425895752413536e-05, "loss": 0.0085, "step": 22310 }, { "action_loss": 0.03307202085852623, "epoch": 20.06294964028777, "step": 22310 }, { "epoch": 20.071942446043167, "grad_norm": 0.13299287855625153, "learning_rate": 7.423485686687057e-05, "loss": 0.0158, "step": 22320 }, { "action_loss": 0.009341318160295486, "epoch": 20.071942446043167, "step": 22320 }, { "epoch": 20.08093525179856, "grad_norm": 0.2224365919828415, "learning_rate": 7.421074884769616e-05, "loss": 0.0086, "step": 22330 }, { "action_loss": 0.02496633678674698, "epoch": 20.08093525179856, "step": 22330 }, { "epoch": 20.08992805755396, "grad_norm": 0.1961645781993866, "learning_rate": 7.418663347393548e-05, "loss": 0.0112, "step": 22340 }, { "action_loss": 0.01881243847310543, "epoch": 20.08992805755396, "step": 22340 }, { "epoch": 20.098920863309353, "grad_norm": 0.23118136823177338, "learning_rate": 7.416251075291418e-05, "loss": 0.0104, "step": 22350 }, { "action_loss": 0.0039435215294361115, "epoch": 20.098920863309353, "step": 22350 }, { "epoch": 20.107913669064747, "grad_norm": 0.13861456513404846, "learning_rate": 7.413838069196007e-05, "loss": 0.0055, "step": 22360 }, { "action_loss": 0.002753024222329259, "epoch": 20.107913669064747, "step": 22360 }, { "epoch": 20.116906474820144, "grad_norm": 0.24351468682289124, "learning_rate": 7.411424329840324e-05, "loss": 0.0104, "step": 22370 }, { "action_loss": 0.005380488932132721, "epoch": 20.116906474820144, "step": 22370 }, { "epoch": 20.12589928057554, "grad_norm": 0.13353294134140015, "learning_rate": 7.409009857957601e-05, "loss": 0.0155, "step": 22380 }, { "action_loss": 0.025717491284012794, "epoch": 20.12589928057554, "step": 22380 }, { "epoch": 20.134892086330936, "grad_norm": 0.23287691175937653, "learning_rate": 7.40659465428129e-05, "loss": 0.0137, "step": 22390 }, { "action_loss": 0.013813338242471218, "epoch": 20.134892086330936, "step": 22390 }, { "epoch": 20.14388489208633, "grad_norm": 0.17880018055438995, "learning_rate": 7.404178719545063e-05, "loss": 0.0103, "step": 22400 }, { "action_loss": 0.00871275831013918, "epoch": 20.14388489208633, "step": 22400 }, { "epoch": 20.152877697841728, "grad_norm": 0.2713959217071533, "learning_rate": 7.401762054482822e-05, "loss": 0.0142, "step": 22410 }, { "action_loss": 0.009859290905296803, "epoch": 20.152877697841728, "step": 22410 }, { "epoch": 20.16187050359712, "grad_norm": 0.20822373032569885, "learning_rate": 7.39934465982868e-05, "loss": 0.009, "step": 22420 }, { "action_loss": 0.007911301217973232, "epoch": 20.16187050359712, "step": 22420 }, { "epoch": 20.17086330935252, "grad_norm": 0.2765510380268097, "learning_rate": 7.396926536316984e-05, "loss": 0.008, "step": 22430 }, { "action_loss": 0.009154986590147018, "epoch": 20.17086330935252, "step": 22430 }, { "epoch": 20.179856115107913, "grad_norm": 0.19881406426429749, "learning_rate": 7.394507684682293e-05, "loss": 0.01, "step": 22440 }, { "action_loss": 0.005388645920902491, "epoch": 20.179856115107913, "step": 22440 }, { "epoch": 20.18884892086331, "grad_norm": 0.31262803077697754, "learning_rate": 7.392088105659393e-05, "loss": 0.0096, "step": 22450 }, { "action_loss": 0.005937641952186823, "epoch": 20.18884892086331, "step": 22450 }, { "epoch": 20.197841726618705, "grad_norm": 0.2696605622768402, "learning_rate": 7.389667799983284e-05, "loss": 0.0121, "step": 22460 }, { "action_loss": 0.009620709344744682, "epoch": 20.197841726618705, "step": 22460 }, { "epoch": 20.2068345323741, "grad_norm": 0.275890588760376, "learning_rate": 7.387246768389193e-05, "loss": 0.0088, "step": 22470 }, { "action_loss": 0.014946055598556995, "epoch": 20.2068345323741, "step": 22470 }, { "epoch": 20.215827338129497, "grad_norm": 0.15948888659477234, "learning_rate": 7.384825011612563e-05, "loss": 0.0063, "step": 22480 }, { "action_loss": 0.0070164501667022705, "epoch": 20.215827338129497, "step": 22480 }, { "epoch": 20.22482014388489, "grad_norm": 0.20310725271701813, "learning_rate": 7.382402530389066e-05, "loss": 0.0125, "step": 22490 }, { "action_loss": 0.00641770102083683, "epoch": 20.22482014388489, "step": 22490 }, { "epoch": 20.23381294964029, "grad_norm": 0.19080118834972382, "learning_rate": 7.379979325454582e-05, "loss": 0.0091, "step": 22500 }, { "action_loss": 0.004657667130231857, "epoch": 20.23381294964029, "step": 22500 }, { "epoch": 20.242805755395683, "grad_norm": 0.20507794618606567, "learning_rate": 7.37755539754522e-05, "loss": 0.0077, "step": 22510 }, { "action_loss": 0.0072086104191839695, "epoch": 20.242805755395683, "step": 22510 }, { "epoch": 20.25179856115108, "grad_norm": 0.18907855451107025, "learning_rate": 7.375130747397302e-05, "loss": 0.0059, "step": 22520 }, { "action_loss": 0.0029415201861411333, "epoch": 20.25179856115108, "step": 22520 }, { "epoch": 20.260791366906474, "grad_norm": 0.17702436447143555, "learning_rate": 7.372705375747377e-05, "loss": 0.007, "step": 22530 }, { "action_loss": 0.007242890074849129, "epoch": 20.260791366906474, "step": 22530 }, { "epoch": 20.269784172661872, "grad_norm": 0.23854847252368927, "learning_rate": 7.370279283332205e-05, "loss": 0.0071, "step": 22540 }, { "action_loss": 0.01378629356622696, "epoch": 20.269784172661872, "step": 22540 }, { "epoch": 20.278776978417266, "grad_norm": 0.16326451301574707, "learning_rate": 7.36785247088877e-05, "loss": 0.0074, "step": 22550 }, { "action_loss": 0.007287242915481329, "epoch": 20.278776978417266, "step": 22550 }, { "epoch": 20.28776978417266, "grad_norm": 0.12853758037090302, "learning_rate": 7.365424939154275e-05, "loss": 0.008, "step": 22560 }, { "action_loss": 0.008108788169920444, "epoch": 20.28776978417266, "step": 22560 }, { "epoch": 20.296762589928058, "grad_norm": 0.18093673884868622, "learning_rate": 7.362996688866138e-05, "loss": 0.0076, "step": 22570 }, { "action_loss": 0.0032943368423730135, "epoch": 20.296762589928058, "step": 22570 }, { "epoch": 20.305755395683452, "grad_norm": 0.14084206521511078, "learning_rate": 7.360567720761999e-05, "loss": 0.0095, "step": 22580 }, { "action_loss": 0.009153669700026512, "epoch": 20.305755395683452, "step": 22580 }, { "epoch": 20.31474820143885, "grad_norm": 0.1631649136543274, "learning_rate": 7.358138035579711e-05, "loss": 0.0086, "step": 22590 }, { "action_loss": 0.005205696914345026, "epoch": 20.31474820143885, "step": 22590 }, { "epoch": 20.323741007194243, "grad_norm": 0.20120251178741455, "learning_rate": 7.355707634057354e-05, "loss": 0.0083, "step": 22600 }, { "action_loss": 0.007248582784086466, "epoch": 20.323741007194243, "step": 22600 }, { "epoch": 20.33273381294964, "grad_norm": 0.1370997428894043, "learning_rate": 7.353276516933215e-05, "loss": 0.0051, "step": 22610 }, { "action_loss": 0.011012191884219646, "epoch": 20.33273381294964, "step": 22610 }, { "epoch": 20.341726618705035, "grad_norm": 0.147031769156456, "learning_rate": 7.350844684945806e-05, "loss": 0.0093, "step": 22620 }, { "action_loss": 0.0030063565354794264, "epoch": 20.341726618705035, "step": 22620 }, { "epoch": 20.350719424460433, "grad_norm": 0.1504417210817337, "learning_rate": 7.348412138833851e-05, "loss": 0.0064, "step": 22630 }, { "action_loss": 0.0026784788351505995, "epoch": 20.350719424460433, "step": 22630 }, { "epoch": 20.359712230215827, "grad_norm": 0.22641149163246155, "learning_rate": 7.345978879336295e-05, "loss": 0.0064, "step": 22640 }, { "action_loss": 0.01192396879196167, "epoch": 20.359712230215827, "step": 22640 }, { "epoch": 20.368705035971225, "grad_norm": 0.14269457757472992, "learning_rate": 7.343544907192296e-05, "loss": 0.0112, "step": 22650 }, { "action_loss": 0.008679695427417755, "epoch": 20.368705035971225, "step": 22650 }, { "epoch": 20.37769784172662, "grad_norm": 0.13760629296302795, "learning_rate": 7.341110223141235e-05, "loss": 0.0077, "step": 22660 }, { "action_loss": 0.006720038130879402, "epoch": 20.37769784172662, "step": 22660 }, { "epoch": 20.386690647482013, "grad_norm": 0.2044304758310318, "learning_rate": 7.3386748279227e-05, "loss": 0.0069, "step": 22670 }, { "action_loss": 0.010623051784932613, "epoch": 20.386690647482013, "step": 22670 }, { "epoch": 20.39568345323741, "grad_norm": 0.288906067609787, "learning_rate": 7.336238722276501e-05, "loss": 0.0113, "step": 22680 }, { "action_loss": 0.006818966940045357, "epoch": 20.39568345323741, "step": 22680 }, { "epoch": 20.404676258992804, "grad_norm": 0.1850249022245407, "learning_rate": 7.333801906942663e-05, "loss": 0.0075, "step": 22690 }, { "action_loss": 0.007404295261949301, "epoch": 20.404676258992804, "step": 22690 }, { "epoch": 20.413669064748202, "grad_norm": 0.15708298981189728, "learning_rate": 7.331364382661428e-05, "loss": 0.0081, "step": 22700 }, { "action_loss": 0.007177736610174179, "epoch": 20.413669064748202, "step": 22700 }, { "epoch": 20.422661870503596, "grad_norm": 0.20630761981010437, "learning_rate": 7.328926150173248e-05, "loss": 0.0068, "step": 22710 }, { "action_loss": 0.003629835322499275, "epoch": 20.422661870503596, "step": 22710 }, { "epoch": 20.431654676258994, "grad_norm": 0.1579301506280899, "learning_rate": 7.326487210218795e-05, "loss": 0.007, "step": 22720 }, { "action_loss": 0.004335880745202303, "epoch": 20.431654676258994, "step": 22720 }, { "epoch": 20.440647482014388, "grad_norm": 0.1922936886548996, "learning_rate": 7.324047563538955e-05, "loss": 0.0106, "step": 22730 }, { "action_loss": 0.003527480410411954, "epoch": 20.440647482014388, "step": 22730 }, { "epoch": 20.449640287769785, "grad_norm": 0.13886961340904236, "learning_rate": 7.321607210874828e-05, "loss": 0.0093, "step": 22740 }, { "action_loss": 0.01477610319852829, "epoch": 20.449640287769785, "step": 22740 }, { "epoch": 20.45863309352518, "grad_norm": 0.15451768040657043, "learning_rate": 7.31916615296773e-05, "loss": 0.0072, "step": 22750 }, { "action_loss": 0.005959614645689726, "epoch": 20.45863309352518, "step": 22750 }, { "epoch": 20.467625899280577, "grad_norm": 0.17249111831188202, "learning_rate": 7.316724390559188e-05, "loss": 0.0073, "step": 22760 }, { "action_loss": 0.0031710807234048843, "epoch": 20.467625899280577, "step": 22760 }, { "epoch": 20.47661870503597, "grad_norm": 0.14143459498882294, "learning_rate": 7.314281924390946e-05, "loss": 0.0048, "step": 22770 }, { "action_loss": 0.022645920515060425, "epoch": 20.47661870503597, "step": 22770 }, { "epoch": 20.485611510791365, "grad_norm": 0.22136031091213226, "learning_rate": 7.311838755204959e-05, "loss": 0.0083, "step": 22780 }, { "action_loss": 0.0038232996594160795, "epoch": 20.485611510791365, "step": 22780 }, { "epoch": 20.494604316546763, "grad_norm": 0.15315409004688263, "learning_rate": 7.3093948837434e-05, "loss": 0.0071, "step": 22790 }, { "action_loss": 0.002629471942782402, "epoch": 20.494604316546763, "step": 22790 }, { "epoch": 20.503597122302157, "grad_norm": 0.15800617635250092, "learning_rate": 7.306950310748651e-05, "loss": 0.0059, "step": 22800 }, { "action_loss": 0.003591792657971382, "epoch": 20.503597122302157, "step": 22800 }, { "epoch": 20.512589928057555, "grad_norm": 0.2468406707048416, "learning_rate": 7.304505036963311e-05, "loss": 0.0083, "step": 22810 }, { "action_loss": 0.008731418289244175, "epoch": 20.512589928057555, "step": 22810 }, { "epoch": 20.52158273381295, "grad_norm": 0.21900680661201477, "learning_rate": 7.302059063130186e-05, "loss": 0.0076, "step": 22820 }, { "action_loss": 0.009093270637094975, "epoch": 20.52158273381295, "step": 22820 }, { "epoch": 20.530575539568346, "grad_norm": 0.17938974499702454, "learning_rate": 7.2996123899923e-05, "loss": 0.0074, "step": 22830 }, { "action_loss": 0.005184283014386892, "epoch": 20.530575539568346, "step": 22830 }, { "epoch": 20.53956834532374, "grad_norm": 0.18003924190998077, "learning_rate": 7.297165018292886e-05, "loss": 0.0098, "step": 22840 }, { "action_loss": 0.006768319755792618, "epoch": 20.53956834532374, "step": 22840 }, { "epoch": 20.548561151079138, "grad_norm": 0.1822422295808792, "learning_rate": 7.294716948775396e-05, "loss": 0.0073, "step": 22850 }, { "action_loss": 0.010101770050823689, "epoch": 20.548561151079138, "step": 22850 }, { "epoch": 20.557553956834532, "grad_norm": 0.24374370276927948, "learning_rate": 7.292268182183484e-05, "loss": 0.0085, "step": 22860 }, { "action_loss": 0.0063483999110758305, "epoch": 20.557553956834532, "step": 22860 }, { "epoch": 20.56654676258993, "grad_norm": 0.19227305054664612, "learning_rate": 7.28981871926102e-05, "loss": 0.0061, "step": 22870 }, { "action_loss": 0.012185211293399334, "epoch": 20.56654676258993, "step": 22870 }, { "epoch": 20.575539568345324, "grad_norm": 0.14748574793338776, "learning_rate": 7.28736856075209e-05, "loss": 0.009, "step": 22880 }, { "action_loss": 0.006155370268970728, "epoch": 20.575539568345324, "step": 22880 }, { "epoch": 20.584532374100718, "grad_norm": 0.1858328878879547, "learning_rate": 7.284917707400985e-05, "loss": 0.0071, "step": 22890 }, { "action_loss": 0.01668647862970829, "epoch": 20.584532374100718, "step": 22890 }, { "epoch": 20.593525179856115, "grad_norm": 0.16579143702983856, "learning_rate": 7.282466159952212e-05, "loss": 0.0098, "step": 22900 }, { "action_loss": 0.006889352574944496, "epoch": 20.593525179856115, "step": 22900 }, { "epoch": 20.60251798561151, "grad_norm": 0.13470691442489624, "learning_rate": 7.280013919150483e-05, "loss": 0.01, "step": 22910 }, { "action_loss": 0.04196096584200859, "epoch": 20.60251798561151, "step": 22910 }, { "epoch": 20.611510791366907, "grad_norm": 0.20049673318862915, "learning_rate": 7.277560985740728e-05, "loss": 0.0153, "step": 22920 }, { "action_loss": 0.01143342163413763, "epoch": 20.611510791366907, "step": 22920 }, { "epoch": 20.6205035971223, "grad_norm": 0.24437223374843597, "learning_rate": 7.275107360468079e-05, "loss": 0.0096, "step": 22930 }, { "action_loss": 0.012949523515999317, "epoch": 20.6205035971223, "step": 22930 }, { "epoch": 20.6294964028777, "grad_norm": 0.2239946722984314, "learning_rate": 7.272653044077885e-05, "loss": 0.0093, "step": 22940 }, { "action_loss": 0.05094825103878975, "epoch": 20.6294964028777, "step": 22940 }, { "epoch": 20.638489208633093, "grad_norm": 0.23627758026123047, "learning_rate": 7.270198037315703e-05, "loss": 0.0117, "step": 22950 }, { "action_loss": 0.00729367695748806, "epoch": 20.638489208633093, "step": 22950 }, { "epoch": 20.64748201438849, "grad_norm": 0.1496235728263855, "learning_rate": 7.267742340927297e-05, "loss": 0.0067, "step": 22960 }, { "action_loss": 0.0044881789945065975, "epoch": 20.64748201438849, "step": 22960 }, { "epoch": 20.656474820143885, "grad_norm": 0.1499747484922409, "learning_rate": 7.265285955658645e-05, "loss": 0.0048, "step": 22970 }, { "action_loss": 0.008359517902135849, "epoch": 20.656474820143885, "step": 22970 }, { "epoch": 20.665467625899282, "grad_norm": 0.1944257616996765, "learning_rate": 7.26282888225593e-05, "loss": 0.0068, "step": 22980 }, { "action_loss": 0.009795290417969227, "epoch": 20.665467625899282, "step": 22980 }, { "epoch": 20.674460431654676, "grad_norm": 0.22884786128997803, "learning_rate": 7.260371121465548e-05, "loss": 0.007, "step": 22990 }, { "action_loss": 0.005143909249454737, "epoch": 20.674460431654676, "step": 22990 }, { "epoch": 20.68345323741007, "grad_norm": 0.21228009462356567, "learning_rate": 7.2579126740341e-05, "loss": 0.0097, "step": 23000 }, { "action_loss": 0.009120824746787548, "epoch": 20.68345323741007, "step": 23000 }, { "epoch": 20.692446043165468, "grad_norm": 0.18994128704071045, "learning_rate": 7.2554535407084e-05, "loss": 0.0064, "step": 23010 }, { "action_loss": 0.0037620123475790024, "epoch": 20.692446043165468, "step": 23010 }, { "epoch": 20.701438848920862, "grad_norm": 0.17003467679023743, "learning_rate": 7.252993722235464e-05, "loss": 0.0082, "step": 23020 }, { "action_loss": 0.004131094086915255, "epoch": 20.701438848920862, "step": 23020 }, { "epoch": 20.71043165467626, "grad_norm": 0.22276917099952698, "learning_rate": 7.250533219362523e-05, "loss": 0.0059, "step": 23030 }, { "action_loss": 0.011556588113307953, "epoch": 20.71043165467626, "step": 23030 }, { "epoch": 20.719424460431654, "grad_norm": 0.17796197533607483, "learning_rate": 7.248072032837012e-05, "loss": 0.0109, "step": 23040 }, { "action_loss": 0.004908470902591944, "epoch": 20.719424460431654, "step": 23040 }, { "epoch": 20.72841726618705, "grad_norm": 0.23780982196331024, "learning_rate": 7.245610163406575e-05, "loss": 0.0076, "step": 23050 }, { "action_loss": 0.010338310152292252, "epoch": 20.72841726618705, "step": 23050 }, { "epoch": 20.737410071942445, "grad_norm": 0.14592190086841583, "learning_rate": 7.243147611819061e-05, "loss": 0.0086, "step": 23060 }, { "action_loss": 0.002038456266745925, "epoch": 20.737410071942445, "step": 23060 }, { "epoch": 20.746402877697843, "grad_norm": 0.13225057721138, "learning_rate": 7.240684378822531e-05, "loss": 0.0057, "step": 23070 }, { "action_loss": 0.006313402205705643, "epoch": 20.746402877697843, "step": 23070 }, { "epoch": 20.755395683453237, "grad_norm": 0.21513617038726807, "learning_rate": 7.238220465165248e-05, "loss": 0.0068, "step": 23080 }, { "action_loss": 0.00410242797806859, "epoch": 20.755395683453237, "step": 23080 }, { "epoch": 20.764388489208635, "grad_norm": 0.2302303910255432, "learning_rate": 7.235755871595684e-05, "loss": 0.009, "step": 23090 }, { "action_loss": 0.004138943273574114, "epoch": 20.764388489208635, "step": 23090 }, { "epoch": 20.77338129496403, "grad_norm": 0.25199344754219055, "learning_rate": 7.233290598862517e-05, "loss": 0.0082, "step": 23100 }, { "action_loss": 0.006084933876991272, "epoch": 20.77338129496403, "step": 23100 }, { "epoch": 20.782374100719423, "grad_norm": 0.2695482075214386, "learning_rate": 7.230824647714635e-05, "loss": 0.0083, "step": 23110 }, { "action_loss": 0.011256582103669643, "epoch": 20.782374100719423, "step": 23110 }, { "epoch": 20.79136690647482, "grad_norm": 0.18867109715938568, "learning_rate": 7.228358018901124e-05, "loss": 0.008, "step": 23120 }, { "action_loss": 0.011245347559452057, "epoch": 20.79136690647482, "step": 23120 }, { "epoch": 20.800359712230215, "grad_norm": 0.31868648529052734, "learning_rate": 7.225890713171286e-05, "loss": 0.0097, "step": 23130 }, { "action_loss": 0.013539138250052929, "epoch": 20.800359712230215, "step": 23130 }, { "epoch": 20.809352517985612, "grad_norm": 0.2134377658367157, "learning_rate": 7.223422731274618e-05, "loss": 0.0067, "step": 23140 }, { "action_loss": 0.004787426441907883, "epoch": 20.809352517985612, "step": 23140 }, { "epoch": 20.818345323741006, "grad_norm": 0.17153286933898926, "learning_rate": 7.220954073960832e-05, "loss": 0.006, "step": 23150 }, { "action_loss": 0.012258830480277538, "epoch": 20.818345323741006, "step": 23150 }, { "epoch": 20.827338129496404, "grad_norm": 0.2055276334285736, "learning_rate": 7.218484741979838e-05, "loss": 0.0067, "step": 23160 }, { "action_loss": 0.0033874784130603075, "epoch": 20.827338129496404, "step": 23160 }, { "epoch": 20.836330935251798, "grad_norm": 0.191467747092247, "learning_rate": 7.216014736081756e-05, "loss": 0.01, "step": 23170 }, { "action_loss": 0.0038159515243023634, "epoch": 20.836330935251798, "step": 23170 }, { "epoch": 20.845323741007196, "grad_norm": 0.19529403746128082, "learning_rate": 7.213544057016906e-05, "loss": 0.0079, "step": 23180 }, { "action_loss": 0.005057960748672485, "epoch": 20.845323741007196, "step": 23180 }, { "epoch": 20.85431654676259, "grad_norm": 0.19044533371925354, "learning_rate": 7.211072705535819e-05, "loss": 0.0064, "step": 23190 }, { "action_loss": 0.0038686124607920647, "epoch": 20.85431654676259, "step": 23190 }, { "epoch": 20.863309352517987, "grad_norm": 0.16289348900318146, "learning_rate": 7.208600682389224e-05, "loss": 0.0067, "step": 23200 }, { "action_loss": 0.003754387842491269, "epoch": 20.863309352517987, "step": 23200 }, { "epoch": 20.87230215827338, "grad_norm": 0.2513732612133026, "learning_rate": 7.206127988328055e-05, "loss": 0.0077, "step": 23210 }, { "action_loss": 0.00565515598282218, "epoch": 20.87230215827338, "step": 23210 }, { "epoch": 20.881294964028775, "grad_norm": 0.19888390600681305, "learning_rate": 7.203654624103453e-05, "loss": 0.0063, "step": 23220 }, { "action_loss": 0.00987471267580986, "epoch": 20.881294964028775, "step": 23220 }, { "epoch": 20.890287769784173, "grad_norm": 0.21589218080043793, "learning_rate": 7.201180590466761e-05, "loss": 0.0081, "step": 23230 }, { "action_loss": 0.00491236662492156, "epoch": 20.890287769784173, "step": 23230 }, { "epoch": 20.899280575539567, "grad_norm": 0.20656202733516693, "learning_rate": 7.198705888169523e-05, "loss": 0.0103, "step": 23240 }, { "action_loss": 0.0038977109361439943, "epoch": 20.899280575539567, "step": 23240 }, { "epoch": 20.908273381294965, "grad_norm": 0.23845772445201874, "learning_rate": 7.196230517963491e-05, "loss": 0.0071, "step": 23250 }, { "action_loss": 0.025724591687321663, "epoch": 20.908273381294965, "step": 23250 }, { "epoch": 20.91726618705036, "grad_norm": 0.1491488814353943, "learning_rate": 7.193754480600615e-05, "loss": 0.0099, "step": 23260 }, { "action_loss": 0.0043571400456130505, "epoch": 20.91726618705036, "step": 23260 }, { "epoch": 20.926258992805757, "grad_norm": 0.2598053812980652, "learning_rate": 7.19127777683305e-05, "loss": 0.0133, "step": 23270 }, { "action_loss": 0.0037214600015431643, "epoch": 20.926258992805757, "step": 23270 }, { "epoch": 20.93525179856115, "grad_norm": 0.15227746963500977, "learning_rate": 7.188800407413156e-05, "loss": 0.0059, "step": 23280 }, { "action_loss": 0.004179796669632196, "epoch": 20.93525179856115, "step": 23280 }, { "epoch": 20.944244604316548, "grad_norm": 0.16772209107875824, "learning_rate": 7.186322373093489e-05, "loss": 0.0096, "step": 23290 }, { "action_loss": 0.0028528468683362007, "epoch": 20.944244604316548, "step": 23290 }, { "epoch": 20.953237410071942, "grad_norm": 0.14017707109451294, "learning_rate": 7.18384367462681e-05, "loss": 0.0059, "step": 23300 }, { "action_loss": 0.012046821415424347, "epoch": 20.953237410071942, "step": 23300 }, { "epoch": 20.96223021582734, "grad_norm": 0.2913222312927246, "learning_rate": 7.181364312766085e-05, "loss": 0.0093, "step": 23310 }, { "action_loss": 0.007462609559297562, "epoch": 20.96223021582734, "step": 23310 }, { "epoch": 20.971223021582734, "grad_norm": 0.21468494832515717, "learning_rate": 7.178884288264477e-05, "loss": 0.0069, "step": 23320 }, { "action_loss": 0.01244859118014574, "epoch": 20.971223021582734, "step": 23320 }, { "epoch": 20.980215827338128, "grad_norm": 0.2315952330827713, "learning_rate": 7.176403601875353e-05, "loss": 0.0095, "step": 23330 }, { "action_loss": 0.005020382348448038, "epoch": 20.980215827338128, "step": 23330 }, { "epoch": 20.989208633093526, "grad_norm": 0.1817358285188675, "learning_rate": 7.173922254352279e-05, "loss": 0.0089, "step": 23340 }, { "action_loss": 0.0037873368710279465, "epoch": 20.989208633093526, "step": 23340 }, { "epoch": 20.99820143884892, "grad_norm": 0.1469842940568924, "learning_rate": 7.171440246449024e-05, "loss": 0.0067, "step": 23350 }, { "action_loss": 0.004673713818192482, "epoch": 20.99820143884892, "step": 23350 }, { "epoch": 21.007194244604317, "grad_norm": 0.19306716322898865, "learning_rate": 7.168957578919555e-05, "loss": 0.0065, "step": 23360 }, { "action_loss": 0.005602356046438217, "epoch": 21.007194244604317, "step": 23360 }, { "epoch": 21.01618705035971, "grad_norm": 0.15274327993392944, "learning_rate": 7.16647425251804e-05, "loss": 0.0084, "step": 23370 }, { "action_loss": 0.009849070571362972, "epoch": 21.01618705035971, "step": 23370 }, { "epoch": 21.02517985611511, "grad_norm": 0.18460512161254883, "learning_rate": 7.163990267998852e-05, "loss": 0.0096, "step": 23380 }, { "action_loss": 0.008367396891117096, "epoch": 21.02517985611511, "step": 23380 }, { "epoch": 21.034172661870503, "grad_norm": 0.22124935686588287, "learning_rate": 7.161505626116556e-05, "loss": 0.0092, "step": 23390 }, { "action_loss": 0.004540977533906698, "epoch": 21.034172661870503, "step": 23390 }, { "epoch": 21.0431654676259, "grad_norm": 0.27994805574417114, "learning_rate": 7.159020327625923e-05, "loss": 0.0079, "step": 23400 }, { "action_loss": 0.01845858059823513, "epoch": 21.0431654676259, "step": 23400 }, { "epoch": 21.052158273381295, "grad_norm": 0.25097528100013733, "learning_rate": 7.15653437328192e-05, "loss": 0.0127, "step": 23410 }, { "action_loss": 0.004321586340665817, "epoch": 21.052158273381295, "step": 23410 }, { "epoch": 21.06115107913669, "grad_norm": 0.16573040187358856, "learning_rate": 7.154047763839713e-05, "loss": 0.0082, "step": 23420 }, { "action_loss": 0.003927540499716997, "epoch": 21.06115107913669, "step": 23420 }, { "epoch": 21.070143884892087, "grad_norm": 0.23642416298389435, "learning_rate": 7.15156050005467e-05, "loss": 0.0063, "step": 23430 }, { "action_loss": 0.012293796986341476, "epoch": 21.070143884892087, "step": 23430 }, { "epoch": 21.07913669064748, "grad_norm": 0.17267200350761414, "learning_rate": 7.149072582682357e-05, "loss": 0.0068, "step": 23440 }, { "action_loss": 0.009426597505807877, "epoch": 21.07913669064748, "step": 23440 }, { "epoch": 21.08812949640288, "grad_norm": 0.12878663837909698, "learning_rate": 7.146584012478535e-05, "loss": 0.0068, "step": 23450 }, { "action_loss": 0.01661032624542713, "epoch": 21.08812949640288, "step": 23450 }, { "epoch": 21.097122302158272, "grad_norm": 0.20787298679351807, "learning_rate": 7.144094790199169e-05, "loss": 0.0086, "step": 23460 }, { "action_loss": 0.008389174938201904, "epoch": 21.097122302158272, "step": 23460 }, { "epoch": 21.10611510791367, "grad_norm": 0.16765809059143066, "learning_rate": 7.141604916600415e-05, "loss": 0.0076, "step": 23470 }, { "action_loss": 0.011002592742443085, "epoch": 21.10611510791367, "step": 23470 }, { "epoch": 21.115107913669064, "grad_norm": 0.22263625264167786, "learning_rate": 7.139114392438635e-05, "loss": 0.0066, "step": 23480 }, { "action_loss": 0.03737957775592804, "epoch": 21.115107913669064, "step": 23480 }, { "epoch": 21.12410071942446, "grad_norm": 0.2128181904554367, "learning_rate": 7.136623218470382e-05, "loss": 0.0097, "step": 23490 }, { "action_loss": 0.004002816043794155, "epoch": 21.12410071942446, "step": 23490 }, { "epoch": 21.133093525179856, "grad_norm": 0.14899994432926178, "learning_rate": 7.13413139545241e-05, "loss": 0.0072, "step": 23500 }, { "action_loss": 0.009092777036130428, "epoch": 21.133093525179856, "step": 23500 }, { "epoch": 21.142086330935253, "grad_norm": 0.16630885004997253, "learning_rate": 7.131638924141668e-05, "loss": 0.01, "step": 23510 }, { "action_loss": 0.0038644925225526094, "epoch": 21.142086330935253, "step": 23510 }, { "epoch": 21.151079136690647, "grad_norm": 0.21285380423069, "learning_rate": 7.129145805295304e-05, "loss": 0.01, "step": 23520 }, { "action_loss": 0.005112530197948217, "epoch": 21.151079136690647, "step": 23520 }, { "epoch": 21.16007194244604, "grad_norm": 0.1578381061553955, "learning_rate": 7.126652039670661e-05, "loss": 0.0054, "step": 23530 }, { "action_loss": 0.0080324811860919, "epoch": 21.16007194244604, "step": 23530 }, { "epoch": 21.16906474820144, "grad_norm": 0.19920091331005096, "learning_rate": 7.124157628025278e-05, "loss": 0.007, "step": 23540 }, { "action_loss": 0.004845503717660904, "epoch": 21.16906474820144, "step": 23540 }, { "epoch": 21.178057553956833, "grad_norm": 0.16713516414165497, "learning_rate": 7.121662571116894e-05, "loss": 0.0075, "step": 23550 }, { "action_loss": 0.00419691251590848, "epoch": 21.178057553956833, "step": 23550 }, { "epoch": 21.18705035971223, "grad_norm": 0.15361501276493073, "learning_rate": 7.119166869703441e-05, "loss": 0.0061, "step": 23560 }, { "action_loss": 0.0030425291042774916, "epoch": 21.18705035971223, "step": 23560 }, { "epoch": 21.196043165467625, "grad_norm": 0.2369118481874466, "learning_rate": 7.116670524543044e-05, "loss": 0.0096, "step": 23570 }, { "action_loss": 0.005598843097686768, "epoch": 21.196043165467625, "step": 23570 }, { "epoch": 21.205035971223023, "grad_norm": 0.2468172013759613, "learning_rate": 7.114173536394032e-05, "loss": 0.0085, "step": 23580 }, { "action_loss": 0.007778229657560587, "epoch": 21.205035971223023, "step": 23580 }, { "epoch": 21.214028776978417, "grad_norm": 0.16686871647834778, "learning_rate": 7.111675906014917e-05, "loss": 0.0103, "step": 23590 }, { "action_loss": 0.01101483404636383, "epoch": 21.214028776978417, "step": 23590 }, { "epoch": 21.223021582733814, "grad_norm": 0.16228517889976501, "learning_rate": 7.109177634164421e-05, "loss": 0.0057, "step": 23600 }, { "action_loss": 0.0029356598388403654, "epoch": 21.223021582733814, "step": 23600 }, { "epoch": 21.23201438848921, "grad_norm": 0.16599802672863007, "learning_rate": 7.106678721601449e-05, "loss": 0.006, "step": 23610 }, { "action_loss": 0.009498621337115765, "epoch": 21.23201438848921, "step": 23610 }, { "epoch": 21.241007194244606, "grad_norm": 0.17534302175045013, "learning_rate": 7.104179169085103e-05, "loss": 0.0084, "step": 23620 }, { "action_loss": 0.005764516536146402, "epoch": 21.241007194244606, "step": 23620 }, { "epoch": 21.25, "grad_norm": 0.2469436228275299, "learning_rate": 7.101678977374683e-05, "loss": 0.011, "step": 23630 }, { "action_loss": 0.0043459185399115086, "epoch": 21.25, "step": 23630 }, { "epoch": 21.258992805755394, "grad_norm": 0.18762856721878052, "learning_rate": 7.099178147229685e-05, "loss": 0.0074, "step": 23640 }, { "action_loss": 0.013841879554092884, "epoch": 21.258992805755394, "step": 23640 }, { "epoch": 21.26798561151079, "grad_norm": 0.13173328340053558, "learning_rate": 7.096676679409789e-05, "loss": 0.0091, "step": 23650 }, { "action_loss": 0.01250920444726944, "epoch": 21.26798561151079, "step": 23650 }, { "epoch": 21.276978417266186, "grad_norm": 0.21675808727741241, "learning_rate": 7.094174574674877e-05, "loss": 0.0075, "step": 23660 }, { "action_loss": 0.009490559808909893, "epoch": 21.276978417266186, "step": 23660 }, { "epoch": 21.285971223021583, "grad_norm": 0.3795468807220459, "learning_rate": 7.091671833785025e-05, "loss": 0.0059, "step": 23670 }, { "action_loss": 0.004214289598166943, "epoch": 21.285971223021583, "step": 23670 }, { "epoch": 21.294964028776977, "grad_norm": 0.2005041241645813, "learning_rate": 7.089168457500493e-05, "loss": 0.0075, "step": 23680 }, { "action_loss": 0.006988169625401497, "epoch": 21.294964028776977, "step": 23680 }, { "epoch": 21.303956834532375, "grad_norm": 0.1482883095741272, "learning_rate": 7.086664446581747e-05, "loss": 0.0122, "step": 23690 }, { "action_loss": 0.007329729851335287, "epoch": 21.303956834532375, "step": 23690 }, { "epoch": 21.31294964028777, "grad_norm": 0.22309449315071106, "learning_rate": 7.084159801789438e-05, "loss": 0.0071, "step": 23700 }, { "action_loss": 0.0035292841494083405, "epoch": 21.31294964028777, "step": 23700 }, { "epoch": 21.321942446043167, "grad_norm": 0.18454791605472565, "learning_rate": 7.081654523884411e-05, "loss": 0.009, "step": 23710 }, { "action_loss": 0.003915117587894201, "epoch": 21.321942446043167, "step": 23710 }, { "epoch": 21.33093525179856, "grad_norm": 0.18160992860794067, "learning_rate": 7.0791486136277e-05, "loss": 0.0096, "step": 23720 }, { "action_loss": 0.00536201661452651, "epoch": 21.33093525179856, "step": 23720 }, { "epoch": 21.33992805755396, "grad_norm": 0.21383975446224213, "learning_rate": 7.07664207178054e-05, "loss": 0.0112, "step": 23730 }, { "action_loss": 0.003179708728566766, "epoch": 21.33992805755396, "step": 23730 }, { "epoch": 21.348920863309353, "grad_norm": 0.13733917474746704, "learning_rate": 7.074134899104345e-05, "loss": 0.008, "step": 23740 }, { "action_loss": 0.003072229214012623, "epoch": 21.348920863309353, "step": 23740 }, { "epoch": 21.357913669064747, "grad_norm": 0.14658880233764648, "learning_rate": 7.071627096360735e-05, "loss": 0.0069, "step": 23750 }, { "action_loss": 0.004779106471687555, "epoch": 21.357913669064747, "step": 23750 }, { "epoch": 21.366906474820144, "grad_norm": 0.14727815985679626, "learning_rate": 7.069118664311511e-05, "loss": 0.0064, "step": 23760 }, { "action_loss": 0.004908356349915266, "epoch": 21.366906474820144, "step": 23760 }, { "epoch": 21.37589928057554, "grad_norm": 0.12272965908050537, "learning_rate": 7.06660960371867e-05, "loss": 0.0052, "step": 23770 }, { "action_loss": 0.00892298761755228, "epoch": 21.37589928057554, "step": 23770 }, { "epoch": 21.384892086330936, "grad_norm": 0.23779284954071045, "learning_rate": 7.064099915344396e-05, "loss": 0.0091, "step": 23780 }, { "action_loss": 0.004410672467201948, "epoch": 21.384892086330936, "step": 23780 }, { "epoch": 21.39388489208633, "grad_norm": 0.24336405098438263, "learning_rate": 7.061589599951066e-05, "loss": 0.0075, "step": 23790 }, { "action_loss": 0.0033437900710850954, "epoch": 21.39388489208633, "step": 23790 }, { "epoch": 21.402877697841728, "grad_norm": 0.1459948569536209, "learning_rate": 7.05907865830125e-05, "loss": 0.0134, "step": 23800 }, { "action_loss": 0.003850052133202553, "epoch": 21.402877697841728, "step": 23800 }, { "epoch": 21.41187050359712, "grad_norm": 0.1701558530330658, "learning_rate": 7.056567091157703e-05, "loss": 0.0061, "step": 23810 }, { "action_loss": 0.010410338640213013, "epoch": 21.41187050359712, "step": 23810 }, { "epoch": 21.42086330935252, "grad_norm": 0.10179203748703003, "learning_rate": 7.054054899283375e-05, "loss": 0.0087, "step": 23820 }, { "action_loss": 0.005078563001006842, "epoch": 21.42086330935252, "step": 23820 }, { "epoch": 21.429856115107913, "grad_norm": 0.15630625188350677, "learning_rate": 7.051542083441403e-05, "loss": 0.0065, "step": 23830 }, { "action_loss": 0.00592600554227829, "epoch": 21.429856115107913, "step": 23830 }, { "epoch": 21.43884892086331, "grad_norm": 0.19004321098327637, "learning_rate": 7.049028644395113e-05, "loss": 0.008, "step": 23840 }, { "action_loss": 0.0030234914738684893, "epoch": 21.43884892086331, "step": 23840 }, { "epoch": 21.447841726618705, "grad_norm": 0.12230594456195831, "learning_rate": 7.046514582908024e-05, "loss": 0.005, "step": 23850 }, { "action_loss": 0.006866913288831711, "epoch": 21.447841726618705, "step": 23850 }, { "epoch": 21.4568345323741, "grad_norm": 0.12787915766239166, "learning_rate": 7.043999899743838e-05, "loss": 0.0069, "step": 23860 }, { "action_loss": 0.011648669838905334, "epoch": 21.4568345323741, "step": 23860 }, { "epoch": 21.465827338129497, "grad_norm": 0.18746206164360046, "learning_rate": 7.041484595666451e-05, "loss": 0.0101, "step": 23870 }, { "action_loss": 0.007352550979703665, "epoch": 21.465827338129497, "step": 23870 }, { "epoch": 21.47482014388489, "grad_norm": 0.18776001036167145, "learning_rate": 7.038968671439948e-05, "loss": 0.0066, "step": 23880 }, { "action_loss": 0.007439889013767242, "epoch": 21.47482014388489, "step": 23880 }, { "epoch": 21.48381294964029, "grad_norm": 0.1486494094133377, "learning_rate": 7.036452127828596e-05, "loss": 0.0089, "step": 23890 }, { "action_loss": 0.003973896149545908, "epoch": 21.48381294964029, "step": 23890 }, { "epoch": 21.492805755395683, "grad_norm": 0.19819031655788422, "learning_rate": 7.033934965596859e-05, "loss": 0.0058, "step": 23900 }, { "action_loss": 0.012233932502567768, "epoch": 21.492805755395683, "step": 23900 }, { "epoch": 21.50179856115108, "grad_norm": 0.3781743049621582, "learning_rate": 7.031417185509381e-05, "loss": 0.0109, "step": 23910 }, { "action_loss": 0.009913337416946888, "epoch": 21.50179856115108, "step": 23910 }, { "epoch": 21.510791366906474, "grad_norm": 0.20002418756484985, "learning_rate": 7.028898788331e-05, "loss": 0.008, "step": 23920 }, { "action_loss": 0.07120924443006516, "epoch": 21.510791366906474, "step": 23920 }, { "epoch": 21.519784172661872, "grad_norm": 0.19774070382118225, "learning_rate": 7.026379774826736e-05, "loss": 0.015, "step": 23930 }, { "action_loss": 0.011414294131100178, "epoch": 21.519784172661872, "step": 23930 }, { "epoch": 21.528776978417266, "grad_norm": 0.309614896774292, "learning_rate": 7.0238601457618e-05, "loss": 0.0069, "step": 23940 }, { "action_loss": 0.01742647774517536, "epoch": 21.528776978417266, "step": 23940 }, { "epoch": 21.53776978417266, "grad_norm": 0.22058919072151184, "learning_rate": 7.02133990190159e-05, "loss": 0.008, "step": 23950 }, { "action_loss": 0.003889224724844098, "epoch": 21.53776978417266, "step": 23950 }, { "epoch": 21.546762589928058, "grad_norm": 0.2302980273962021, "learning_rate": 7.018819044011687e-05, "loss": 0.0087, "step": 23960 }, { "action_loss": 0.00588051974773407, "epoch": 21.546762589928058, "step": 23960 }, { "epoch": 21.555755395683452, "grad_norm": 0.18192540109157562, "learning_rate": 7.016297572857863e-05, "loss": 0.006, "step": 23970 }, { "action_loss": 0.0035333328414708376, "epoch": 21.555755395683452, "step": 23970 }, { "epoch": 21.56474820143885, "grad_norm": 0.17990249395370483, "learning_rate": 7.013775489206072e-05, "loss": 0.0073, "step": 23980 }, { "action_loss": 0.010865111835300922, "epoch": 21.56474820143885, "step": 23980 }, { "epoch": 21.573741007194243, "grad_norm": 0.20883974432945251, "learning_rate": 7.01125279382246e-05, "loss": 0.0062, "step": 23990 }, { "action_loss": 0.009278252720832825, "epoch": 21.573741007194243, "step": 23990 }, { "epoch": 21.58273381294964, "grad_norm": 0.181902214884758, "learning_rate": 7.008729487473351e-05, "loss": 0.0079, "step": 24000 }, { "action_loss": 0.003895450383424759, "epoch": 21.58273381294964, "step": 24000 }, { "epoch": 21.591726618705035, "grad_norm": 0.15114033222198486, "learning_rate": 7.006205570925263e-05, "loss": 0.0097, "step": 24010 }, { "action_loss": 0.006326522212475538, "epoch": 21.591726618705035, "step": 24010 }, { "epoch": 21.600719424460433, "grad_norm": 0.2100273221731186, "learning_rate": 7.003681044944892e-05, "loss": 0.0071, "step": 24020 }, { "action_loss": 0.0040070838294923306, "epoch": 21.600719424460433, "step": 24020 }, { "epoch": 21.609712230215827, "grad_norm": 0.15865489840507507, "learning_rate": 7.001155910299126e-05, "loss": 0.0074, "step": 24030 }, { "action_loss": 0.022001339122653008, "epoch": 21.609712230215827, "step": 24030 }, { "epoch": 21.618705035971225, "grad_norm": 0.2014824002981186, "learning_rate": 6.99863016775503e-05, "loss": 0.0119, "step": 24040 }, { "action_loss": 0.006638436112552881, "epoch": 21.618705035971225, "step": 24040 }, { "epoch": 21.62769784172662, "grad_norm": 0.18744301795959473, "learning_rate": 6.996103818079859e-05, "loss": 0.008, "step": 24050 }, { "action_loss": 0.010468810796737671, "epoch": 21.62769784172662, "step": 24050 }, { "epoch": 21.636690647482013, "grad_norm": 0.16045624017715454, "learning_rate": 6.993576862041054e-05, "loss": 0.0076, "step": 24060 }, { "action_loss": 0.009822622872889042, "epoch": 21.636690647482013, "step": 24060 }, { "epoch": 21.64568345323741, "grad_norm": 0.21648098528385162, "learning_rate": 6.991049300406235e-05, "loss": 0.0057, "step": 24070 }, { "action_loss": 0.007508756127208471, "epoch": 21.64568345323741, "step": 24070 }, { "epoch": 21.654676258992804, "grad_norm": 0.18839283287525177, "learning_rate": 6.988521133943209e-05, "loss": 0.0073, "step": 24080 }, { "action_loss": 0.021437084302306175, "epoch": 21.654676258992804, "step": 24080 }, { "epoch": 21.663669064748202, "grad_norm": 0.1708398163318634, "learning_rate": 6.985992363419966e-05, "loss": 0.009, "step": 24090 }, { "action_loss": 0.0031278806272894144, "epoch": 21.663669064748202, "step": 24090 }, { "epoch": 21.672661870503596, "grad_norm": 0.3748316466808319, "learning_rate": 6.983462989604682e-05, "loss": 0.0089, "step": 24100 }, { "action_loss": 0.003963697236031294, "epoch": 21.672661870503596, "step": 24100 }, { "epoch": 21.681654676258994, "grad_norm": 0.2077457159757614, "learning_rate": 6.980933013265709e-05, "loss": 0.006, "step": 24110 }, { "action_loss": 0.009133714251220226, "epoch": 21.681654676258994, "step": 24110 }, { "epoch": 21.690647482014388, "grad_norm": 0.30219465494155884, "learning_rate": 6.978402435171592e-05, "loss": 0.0142, "step": 24120 }, { "action_loss": 0.00897566694766283, "epoch": 21.690647482014388, "step": 24120 }, { "epoch": 21.699640287769785, "grad_norm": 0.16504615545272827, "learning_rate": 6.975871256091052e-05, "loss": 0.0092, "step": 24130 }, { "action_loss": 0.0032644979655742645, "epoch": 21.699640287769785, "step": 24130 }, { "epoch": 21.70863309352518, "grad_norm": 0.1591460257768631, "learning_rate": 6.973339476792995e-05, "loss": 0.0064, "step": 24140 }, { "action_loss": 0.0044375816360116005, "epoch": 21.70863309352518, "step": 24140 }, { "epoch": 21.717625899280577, "grad_norm": 0.16439886391162872, "learning_rate": 6.970807098046505e-05, "loss": 0.0062, "step": 24150 }, { "action_loss": 0.0039009058382362127, "epoch": 21.717625899280577, "step": 24150 }, { "epoch": 21.72661870503597, "grad_norm": 0.1950024962425232, "learning_rate": 6.968274120620858e-05, "loss": 0.0081, "step": 24160 }, { "action_loss": 0.0026050356682389975, "epoch": 21.72661870503597, "step": 24160 }, { "epoch": 21.735611510791365, "grad_norm": 0.25988075137138367, "learning_rate": 6.965740545285499e-05, "loss": 0.0057, "step": 24170 }, { "action_loss": 0.0041802856139838696, "epoch": 21.735611510791365, "step": 24170 }, { "epoch": 21.744604316546763, "grad_norm": 0.15007635951042175, "learning_rate": 6.963206372810068e-05, "loss": 0.0071, "step": 24180 }, { "action_loss": 0.007391380611807108, "epoch": 21.744604316546763, "step": 24180 }, { "epoch": 21.753597122302157, "grad_norm": 0.16217945516109467, "learning_rate": 6.960671603964375e-05, "loss": 0.0095, "step": 24190 }, { "action_loss": 0.02685329131782055, "epoch": 21.753597122302157, "step": 24190 }, { "epoch": 21.762589928057555, "grad_norm": 0.16889652609825134, "learning_rate": 6.958136239518418e-05, "loss": 0.008, "step": 24200 }, { "action_loss": 0.002654423238709569, "epoch": 21.762589928057555, "step": 24200 }, { "epoch": 21.77158273381295, "grad_norm": 0.17876559495925903, "learning_rate": 6.955600280242371e-05, "loss": 0.0059, "step": 24210 }, { "action_loss": 0.005593838635832071, "epoch": 21.77158273381295, "step": 24210 }, { "epoch": 21.780575539568346, "grad_norm": 0.14808006584644318, "learning_rate": 6.953063726906596e-05, "loss": 0.0077, "step": 24220 }, { "action_loss": 0.008778471499681473, "epoch": 21.780575539568346, "step": 24220 }, { "epoch": 21.78956834532374, "grad_norm": 0.15167106688022614, "learning_rate": 6.950526580281626e-05, "loss": 0.0067, "step": 24230 }, { "action_loss": 0.003541960148140788, "epoch": 21.78956834532374, "step": 24230 }, { "epoch": 21.798561151079138, "grad_norm": 0.22205618023872375, "learning_rate": 6.947988841138184e-05, "loss": 0.0086, "step": 24240 }, { "action_loss": 0.0032157518435269594, "epoch": 21.798561151079138, "step": 24240 }, { "epoch": 21.807553956834532, "grad_norm": 0.18886157870292664, "learning_rate": 6.945450510247165e-05, "loss": 0.0064, "step": 24250 }, { "action_loss": 0.007425365503877401, "epoch": 21.807553956834532, "step": 24250 }, { "epoch": 21.81654676258993, "grad_norm": 0.16203109920024872, "learning_rate": 6.942911588379647e-05, "loss": 0.0061, "step": 24260 }, { "action_loss": 0.005007555242627859, "epoch": 21.81654676258993, "step": 24260 }, { "epoch": 21.825539568345324, "grad_norm": 0.2342839539051056, "learning_rate": 6.940372076306888e-05, "loss": 0.007, "step": 24270 }, { "action_loss": 0.008742528967559338, "epoch": 21.825539568345324, "step": 24270 }, { "epoch": 21.834532374100718, "grad_norm": 0.19454863667488098, "learning_rate": 6.937831974800326e-05, "loss": 0.0072, "step": 24280 }, { "action_loss": 0.006123280618339777, "epoch": 21.834532374100718, "step": 24280 }, { "epoch": 21.843525179856115, "grad_norm": 0.25106170773506165, "learning_rate": 6.935291284631574e-05, "loss": 0.0095, "step": 24290 }, { "action_loss": 0.01647345907986164, "epoch": 21.843525179856115, "step": 24290 }, { "epoch": 21.85251798561151, "grad_norm": 0.24194934964179993, "learning_rate": 6.932750006572428e-05, "loss": 0.0056, "step": 24300 }, { "action_loss": 0.007764898706227541, "epoch": 21.85251798561151, "step": 24300 }, { "epoch": 21.861510791366907, "grad_norm": 0.12046214938163757, "learning_rate": 6.930208141394863e-05, "loss": 0.0196, "step": 24310 }, { "action_loss": 0.007500977721065283, "epoch": 21.861510791366907, "step": 24310 }, { "epoch": 21.8705035971223, "grad_norm": 0.20936240255832672, "learning_rate": 6.927665689871026e-05, "loss": 0.0067, "step": 24320 }, { "action_loss": 0.012201376259326935, "epoch": 21.8705035971223, "step": 24320 }, { "epoch": 21.8794964028777, "grad_norm": 0.20058490335941315, "learning_rate": 6.925122652773253e-05, "loss": 0.0067, "step": 24330 }, { "action_loss": 0.00446776719763875, "epoch": 21.8794964028777, "step": 24330 }, { "epoch": 21.888489208633093, "grad_norm": 0.16050651669502258, "learning_rate": 6.922579030874046e-05, "loss": 0.0076, "step": 24340 }, { "action_loss": 0.015770575031638145, "epoch": 21.888489208633093, "step": 24340 }, { "epoch": 21.89748201438849, "grad_norm": 0.2056291550397873, "learning_rate": 6.920034824946093e-05, "loss": 0.0108, "step": 24350 }, { "action_loss": 0.005722854286432266, "epoch": 21.89748201438849, "step": 24350 }, { "epoch": 21.906474820143885, "grad_norm": 0.10734368115663528, "learning_rate": 6.917490035762255e-05, "loss": 0.0096, "step": 24360 }, { "action_loss": 0.03336096182465553, "epoch": 21.906474820143885, "step": 24360 }, { "epoch": 21.915467625899282, "grad_norm": 0.18242226541042328, "learning_rate": 6.914944664095573e-05, "loss": 0.012, "step": 24370 }, { "action_loss": 0.0035267763305455446, "epoch": 21.915467625899282, "step": 24370 }, { "epoch": 21.924460431654676, "grad_norm": 0.14238813519477844, "learning_rate": 6.912398710719264e-05, "loss": 0.0071, "step": 24380 }, { "action_loss": 0.004928385838866234, "epoch": 21.924460431654676, "step": 24380 }, { "epoch": 21.93345323741007, "grad_norm": 0.13385052978992462, "learning_rate": 6.90985217640672e-05, "loss": 0.005, "step": 24390 }, { "action_loss": 0.004074884578585625, "epoch": 21.93345323741007, "step": 24390 }, { "epoch": 21.942446043165468, "grad_norm": 0.1934206336736679, "learning_rate": 6.90730506193151e-05, "loss": 0.0071, "step": 24400 }, { "action_loss": 0.006826108321547508, "epoch": 21.942446043165468, "step": 24400 }, { "epoch": 21.951438848920862, "grad_norm": 0.18604226410388947, "learning_rate": 6.904757368067384e-05, "loss": 0.0086, "step": 24410 }, { "action_loss": 0.007064439356327057, "epoch": 21.951438848920862, "step": 24410 }, { "epoch": 21.96043165467626, "grad_norm": 0.22949378192424774, "learning_rate": 6.90220909558826e-05, "loss": 0.0097, "step": 24420 }, { "action_loss": 0.006139734294265509, "epoch": 21.96043165467626, "step": 24420 }, { "epoch": 21.969424460431654, "grad_norm": 0.27015748620033264, "learning_rate": 6.899660245268237e-05, "loss": 0.0072, "step": 24430 }, { "action_loss": 0.002878188854083419, "epoch": 21.969424460431654, "step": 24430 }, { "epoch": 21.97841726618705, "grad_norm": 0.1574489176273346, "learning_rate": 6.897110817881592e-05, "loss": 0.0085, "step": 24440 }, { "action_loss": 0.007045367266982794, "epoch": 21.97841726618705, "step": 24440 }, { "epoch": 21.987410071942445, "grad_norm": 0.2649923861026764, "learning_rate": 6.894560814202769e-05, "loss": 0.0088, "step": 24450 }, { "action_loss": 0.0030259585473686457, "epoch": 21.987410071942445, "step": 24450 }, { "epoch": 21.996402877697843, "grad_norm": 0.24255450069904327, "learning_rate": 6.892010235006394e-05, "loss": 0.0121, "step": 24460 }, { "action_loss": 0.004432907793670893, "epoch": 21.996402877697843, "step": 24460 }, { "epoch": 22.005395683453237, "grad_norm": 0.18661901354789734, "learning_rate": 6.889459081067264e-05, "loss": 0.0062, "step": 24470 }, { "action_loss": 0.013010963797569275, "epoch": 22.005395683453237, "step": 24470 }, { "epoch": 22.014388489208635, "grad_norm": 0.16528061032295227, "learning_rate": 6.886907353160356e-05, "loss": 0.0081, "step": 24480 }, { "action_loss": 0.014159935526549816, "epoch": 22.014388489208635, "step": 24480 }, { "epoch": 22.02338129496403, "grad_norm": 0.15194614231586456, "learning_rate": 6.884355052060814e-05, "loss": 0.0075, "step": 24490 }, { "action_loss": 0.007021886762231588, "epoch": 22.02338129496403, "step": 24490 }, { "epoch": 22.032374100719423, "grad_norm": 0.18309152126312256, "learning_rate": 6.88180217854396e-05, "loss": 0.0075, "step": 24500 }, { "action_loss": 0.0037400107830762863, "epoch": 22.032374100719423, "step": 24500 }, { "epoch": 22.04136690647482, "grad_norm": 0.19072702527046204, "learning_rate": 6.87924873338529e-05, "loss": 0.01, "step": 24510 }, { "action_loss": 0.009073772467672825, "epoch": 22.04136690647482, "step": 24510 }, { "epoch": 22.050359712230215, "grad_norm": 0.32484209537506104, "learning_rate": 6.876694717360475e-05, "loss": 0.0083, "step": 24520 }, { "action_loss": 0.011846080422401428, "epoch": 22.050359712230215, "step": 24520 }, { "epoch": 22.059352517985612, "grad_norm": 0.18768644332885742, "learning_rate": 6.874140131245355e-05, "loss": 0.0083, "step": 24530 }, { "action_loss": 0.011260812170803547, "epoch": 22.059352517985612, "step": 24530 }, { "epoch": 22.068345323741006, "grad_norm": 0.270301878452301, "learning_rate": 6.871584975815948e-05, "loss": 0.0093, "step": 24540 }, { "action_loss": 0.0029678530991077423, "epoch": 22.068345323741006, "step": 24540 }, { "epoch": 22.077338129496404, "grad_norm": 0.16093985736370087, "learning_rate": 6.86902925184844e-05, "loss": 0.0062, "step": 24550 }, { "action_loss": 0.0028567283879965544, "epoch": 22.077338129496404, "step": 24550 }, { "epoch": 22.086330935251798, "grad_norm": 0.24060700833797455, "learning_rate": 6.866472960119195e-05, "loss": 0.0091, "step": 24560 }, { "action_loss": 0.004968944936990738, "epoch": 22.086330935251798, "step": 24560 }, { "epoch": 22.095323741007196, "grad_norm": 0.1252395510673523, "learning_rate": 6.863916101404748e-05, "loss": 0.0067, "step": 24570 }, { "action_loss": 0.0033804134000092745, "epoch": 22.095323741007196, "step": 24570 }, { "epoch": 22.10431654676259, "grad_norm": 0.14150917530059814, "learning_rate": 6.8613586764818e-05, "loss": 0.0059, "step": 24580 }, { "action_loss": 0.002879348350688815, "epoch": 22.10431654676259, "step": 24580 }, { "epoch": 22.113309352517987, "grad_norm": 0.23377811908721924, "learning_rate": 6.858800686127233e-05, "loss": 0.0058, "step": 24590 }, { "action_loss": 0.011698402464389801, "epoch": 22.113309352517987, "step": 24590 }, { "epoch": 22.12230215827338, "grad_norm": 0.12243029475212097, "learning_rate": 6.856242131118097e-05, "loss": 0.0067, "step": 24600 }, { "action_loss": 0.0073942462913692, "epoch": 22.12230215827338, "step": 24600 }, { "epoch": 22.131294964028775, "grad_norm": 0.1988018900156021, "learning_rate": 6.853683012231614e-05, "loss": 0.007, "step": 24610 }, { "action_loss": 0.0021802426781505346, "epoch": 22.131294964028775, "step": 24610 }, { "epoch": 22.140287769784173, "grad_norm": 0.1987009197473526, "learning_rate": 6.851123330245173e-05, "loss": 0.0069, "step": 24620 }, { "action_loss": 0.011484324000775814, "epoch": 22.140287769784173, "step": 24620 }, { "epoch": 22.149280575539567, "grad_norm": 0.16415292024612427, "learning_rate": 6.848563085936343e-05, "loss": 0.0072, "step": 24630 }, { "action_loss": 0.0028865996282547712, "epoch": 22.149280575539567, "step": 24630 }, { "epoch": 22.158273381294965, "grad_norm": 0.11259188503026962, "learning_rate": 6.846002280082853e-05, "loss": 0.0065, "step": 24640 }, { "action_loss": 0.004539028275758028, "epoch": 22.158273381294965, "step": 24640 }, { "epoch": 22.16726618705036, "grad_norm": 0.24010318517684937, "learning_rate": 6.843440913462614e-05, "loss": 0.006, "step": 24650 }, { "action_loss": 0.004156643990427256, "epoch": 22.16726618705036, "step": 24650 }, { "epoch": 22.176258992805757, "grad_norm": 0.22828492522239685, "learning_rate": 6.840878986853698e-05, "loss": 0.0081, "step": 24660 }, { "action_loss": 0.0053710900247097015, "epoch": 22.176258992805757, "step": 24660 }, { "epoch": 22.18525179856115, "grad_norm": 0.22074347734451294, "learning_rate": 6.838316501034352e-05, "loss": 0.0073, "step": 24670 }, { "action_loss": 0.007526783272624016, "epoch": 22.18525179856115, "step": 24670 }, { "epoch": 22.194244604316548, "grad_norm": 0.19530469179153442, "learning_rate": 6.83575345678299e-05, "loss": 0.0057, "step": 24680 }, { "action_loss": 0.00991456862539053, "epoch": 22.194244604316548, "step": 24680 }, { "epoch": 22.203237410071942, "grad_norm": 0.19420164823532104, "learning_rate": 6.833189854878196e-05, "loss": 0.0084, "step": 24690 }, { "action_loss": 0.0032188829500228167, "epoch": 22.203237410071942, "step": 24690 }, { "epoch": 22.21223021582734, "grad_norm": 0.21651877462863922, "learning_rate": 6.83062569609873e-05, "loss": 0.0074, "step": 24700 }, { "action_loss": 0.005433250218629837, "epoch": 22.21223021582734, "step": 24700 }, { "epoch": 22.221223021582734, "grad_norm": 0.18991561233997345, "learning_rate": 6.828060981223512e-05, "loss": 0.0054, "step": 24710 }, { "action_loss": 0.0032891370356082916, "epoch": 22.221223021582734, "step": 24710 }, { "epoch": 22.230215827338128, "grad_norm": 0.2474704533815384, "learning_rate": 6.825495711031634e-05, "loss": 0.0065, "step": 24720 }, { "action_loss": 0.007869507186114788, "epoch": 22.230215827338128, "step": 24720 }, { "epoch": 22.239208633093526, "grad_norm": 0.24457013607025146, "learning_rate": 6.822929886302359e-05, "loss": 0.0059, "step": 24730 }, { "action_loss": 0.002490781946107745, "epoch": 22.239208633093526, "step": 24730 }, { "epoch": 22.24820143884892, "grad_norm": 0.1464802324771881, "learning_rate": 6.820363507815116e-05, "loss": 0.0048, "step": 24740 }, { "action_loss": 0.004855165258049965, "epoch": 22.24820143884892, "step": 24740 }, { "epoch": 22.257194244604317, "grad_norm": 0.20740707218647003, "learning_rate": 6.817796576349501e-05, "loss": 0.0096, "step": 24750 }, { "action_loss": 0.005706506315618753, "epoch": 22.257194244604317, "step": 24750 }, { "epoch": 22.26618705035971, "grad_norm": 0.17627830803394318, "learning_rate": 6.815229092685285e-05, "loss": 0.0093, "step": 24760 }, { "action_loss": 0.008148792199790478, "epoch": 22.26618705035971, "step": 24760 }, { "epoch": 22.27517985611511, "grad_norm": 0.1291944682598114, "learning_rate": 6.812661057602399e-05, "loss": 0.0061, "step": 24770 }, { "action_loss": 0.0059443204663693905, "epoch": 22.27517985611511, "step": 24770 }, { "epoch": 22.284172661870503, "grad_norm": 0.10961876064538956, "learning_rate": 6.810092471880943e-05, "loss": 0.0075, "step": 24780 }, { "action_loss": 0.009718070738017559, "epoch": 22.284172661870503, "step": 24780 }, { "epoch": 22.2931654676259, "grad_norm": 0.23672804236412048, "learning_rate": 6.807523336301187e-05, "loss": 0.0083, "step": 24790 }, { "action_loss": 0.008717664517462254, "epoch": 22.2931654676259, "step": 24790 }, { "epoch": 22.302158273381295, "grad_norm": 0.24058397114276886, "learning_rate": 6.804953651643566e-05, "loss": 0.0093, "step": 24800 }, { "action_loss": 0.0145854027941823, "epoch": 22.302158273381295, "step": 24800 }, { "epoch": 22.31115107913669, "grad_norm": 0.16134314239025116, "learning_rate": 6.802383418688685e-05, "loss": 0.0092, "step": 24810 }, { "action_loss": 0.008707929402589798, "epoch": 22.31115107913669, "step": 24810 }, { "epoch": 22.320143884892087, "grad_norm": 0.21315298974514008, "learning_rate": 6.799812638217309e-05, "loss": 0.0079, "step": 24820 }, { "action_loss": 0.004332589451223612, "epoch": 22.320143884892087, "step": 24820 }, { "epoch": 22.32913669064748, "grad_norm": 0.17766164243221283, "learning_rate": 6.797241311010373e-05, "loss": 0.0079, "step": 24830 }, { "action_loss": 0.019784729927778244, "epoch": 22.32913669064748, "step": 24830 }, { "epoch": 22.33812949640288, "grad_norm": 0.19542817771434784, "learning_rate": 6.794669437848982e-05, "loss": 0.0093, "step": 24840 }, { "action_loss": 0.014186781831085682, "epoch": 22.33812949640288, "step": 24840 }, { "epoch": 22.347122302158272, "grad_norm": 0.16274318099021912, "learning_rate": 6.792097019514402e-05, "loss": 0.0075, "step": 24850 }, { "action_loss": 0.004757207352668047, "epoch": 22.347122302158272, "step": 24850 }, { "epoch": 22.35611510791367, "grad_norm": 0.2633272111415863, "learning_rate": 6.789524056788064e-05, "loss": 0.0079, "step": 24860 }, { "action_loss": 0.009520848281681538, "epoch": 22.35611510791367, "step": 24860 }, { "epoch": 22.365107913669064, "grad_norm": 0.12494265288114548, "learning_rate": 6.786950550451567e-05, "loss": 0.006, "step": 24870 }, { "action_loss": 0.0063351900316774845, "epoch": 22.365107913669064, "step": 24870 }, { "epoch": 22.37410071942446, "grad_norm": 0.15769217908382416, "learning_rate": 6.784376501286676e-05, "loss": 0.0072, "step": 24880 }, { "action_loss": 0.0036963678430765867, "epoch": 22.37410071942446, "step": 24880 }, { "epoch": 22.383093525179856, "grad_norm": 0.1824931800365448, "learning_rate": 6.781801910075316e-05, "loss": 0.005, "step": 24890 }, { "action_loss": 0.009250151924788952, "epoch": 22.383093525179856, "step": 24890 }, { "epoch": 22.392086330935253, "grad_norm": 0.20041118562221527, "learning_rate": 6.779226777599581e-05, "loss": 0.007, "step": 24900 }, { "action_loss": 0.004026030655950308, "epoch": 22.392086330935253, "step": 24900 }, { "epoch": 22.401079136690647, "grad_norm": 0.1305554062128067, "learning_rate": 6.776651104641729e-05, "loss": 0.009, "step": 24910 }, { "action_loss": 0.0076621831394732, "epoch": 22.401079136690647, "step": 24910 }, { "epoch": 22.41007194244604, "grad_norm": 0.30532291531562805, "learning_rate": 6.774074891984183e-05, "loss": 0.0081, "step": 24920 }, { "action_loss": 0.0029895177576690912, "epoch": 22.41007194244604, "step": 24920 }, { "epoch": 22.41906474820144, "grad_norm": 0.14048856496810913, "learning_rate": 6.771498140409526e-05, "loss": 0.0072, "step": 24930 }, { "action_loss": 0.003332085208967328, "epoch": 22.41906474820144, "step": 24930 }, { "epoch": 22.428057553956833, "grad_norm": 0.14345723390579224, "learning_rate": 6.768920850700506e-05, "loss": 0.0055, "step": 24940 }, { "action_loss": 0.0334392786026001, "epoch": 22.428057553956833, "step": 24940 }, { "epoch": 22.43705035971223, "grad_norm": 0.16042396426200867, "learning_rate": 6.766343023640039e-05, "loss": 0.0107, "step": 24950 }, { "action_loss": 0.004634654615074396, "epoch": 22.43705035971223, "step": 24950 }, { "epoch": 22.446043165467625, "grad_norm": 0.12379732728004456, "learning_rate": 6.763764660011198e-05, "loss": 0.0064, "step": 24960 }, { "action_loss": 0.0027959111612290144, "epoch": 22.446043165467625, "step": 24960 }, { "epoch": 22.455035971223023, "grad_norm": 0.18637776374816895, "learning_rate": 6.761185760597223e-05, "loss": 0.0054, "step": 24970 }, { "action_loss": 0.006093184929341078, "epoch": 22.455035971223023, "step": 24970 }, { "epoch": 22.464028776978417, "grad_norm": 0.15709108114242554, "learning_rate": 6.758606326181515e-05, "loss": 0.0075, "step": 24980 }, { "action_loss": 0.003387474687770009, "epoch": 22.464028776978417, "step": 24980 }, { "epoch": 22.473021582733814, "grad_norm": 0.19215330481529236, "learning_rate": 6.75602635754764e-05, "loss": 0.005, "step": 24990 }, { "action_loss": 0.005008829291909933, "epoch": 22.473021582733814, "step": 24990 }, { "epoch": 22.48201438848921, "grad_norm": 0.2436009794473648, "learning_rate": 6.75344585547932e-05, "loss": 0.0089, "step": 25000 }, { "action_loss": 0.003135506296530366, "epoch": 22.48201438848921, "step": 25000 }, { "epoch": 22.491007194244606, "grad_norm": 0.32376688718795776, "learning_rate": 6.750864820760449e-05, "loss": 0.0054, "step": 25010 }, { "action_loss": 0.0031721426639705896, "epoch": 22.491007194244606, "step": 25010 }, { "epoch": 22.5, "grad_norm": 0.2078847438097, "learning_rate": 6.748283254175072e-05, "loss": 0.0069, "step": 25020 }, { "action_loss": 0.01579139195382595, "epoch": 22.5, "step": 25020 }, { "epoch": 22.508992805755394, "grad_norm": 0.152187779545784, "learning_rate": 6.745701156507404e-05, "loss": 0.0074, "step": 25030 }, { "action_loss": 0.0021575551945716143, "epoch": 22.508992805755394, "step": 25030 }, { "epoch": 22.51798561151079, "grad_norm": 0.1215171068906784, "learning_rate": 6.743118528541818e-05, "loss": 0.0061, "step": 25040 }, { "action_loss": 0.003527992172166705, "epoch": 22.51798561151079, "step": 25040 }, { "epoch": 22.526978417266186, "grad_norm": 0.21581298112869263, "learning_rate": 6.740535371062846e-05, "loss": 0.0073, "step": 25050 }, { "action_loss": 0.00615744898095727, "epoch": 22.526978417266186, "step": 25050 }, { "epoch": 22.535971223021583, "grad_norm": 0.16210106015205383, "learning_rate": 6.737951684855185e-05, "loss": 0.0057, "step": 25060 }, { "action_loss": 0.006880644243210554, "epoch": 22.535971223021583, "step": 25060 }, { "epoch": 22.544964028776977, "grad_norm": 0.19416463375091553, "learning_rate": 6.735367470703691e-05, "loss": 0.0059, "step": 25070 }, { "action_loss": 0.005931349005550146, "epoch": 22.544964028776977, "step": 25070 }, { "epoch": 22.553956834532375, "grad_norm": 0.16869153082370758, "learning_rate": 6.732782729393379e-05, "loss": 0.0062, "step": 25080 }, { "action_loss": 0.00519176758825779, "epoch": 22.553956834532375, "step": 25080 }, { "epoch": 22.56294964028777, "grad_norm": 0.17948120832443237, "learning_rate": 6.730197461709425e-05, "loss": 0.0056, "step": 25090 }, { "action_loss": 0.0037317313253879547, "epoch": 22.56294964028777, "step": 25090 }, { "epoch": 22.571942446043167, "grad_norm": 0.14869344234466553, "learning_rate": 6.727611668437164e-05, "loss": 0.0105, "step": 25100 }, { "action_loss": 0.008288188837468624, "epoch": 22.571942446043167, "step": 25100 }, { "epoch": 22.58093525179856, "grad_norm": 0.11118009686470032, "learning_rate": 6.725025350362094e-05, "loss": 0.0056, "step": 25110 }, { "action_loss": 0.0043613794259727, "epoch": 22.58093525179856, "step": 25110 }, { "epoch": 22.58992805755396, "grad_norm": 0.2711292803287506, "learning_rate": 6.72243850826987e-05, "loss": 0.0053, "step": 25120 }, { "action_loss": 0.005017570685595274, "epoch": 22.58992805755396, "step": 25120 }, { "epoch": 22.598920863309353, "grad_norm": 0.2610696256160736, "learning_rate": 6.719851142946305e-05, "loss": 0.0064, "step": 25130 }, { "action_loss": 0.005182316061109304, "epoch": 22.598920863309353, "step": 25130 }, { "epoch": 22.607913669064747, "grad_norm": 0.2656976580619812, "learning_rate": 6.717263255177372e-05, "loss": 0.0104, "step": 25140 }, { "action_loss": 0.005355069879442453, "epoch": 22.607913669064747, "step": 25140 }, { "epoch": 22.616906474820144, "grad_norm": 0.17583343386650085, "learning_rate": 6.714674845749205e-05, "loss": 0.0062, "step": 25150 }, { "action_loss": 0.007698066532611847, "epoch": 22.616906474820144, "step": 25150 }, { "epoch": 22.62589928057554, "grad_norm": 0.16469411551952362, "learning_rate": 6.712085915448092e-05, "loss": 0.0072, "step": 25160 }, { "action_loss": 0.005245862063020468, "epoch": 22.62589928057554, "step": 25160 }, { "epoch": 22.634892086330936, "grad_norm": 0.1597890555858612, "learning_rate": 6.709496465060486e-05, "loss": 0.0071, "step": 25170 }, { "action_loss": 0.004108446184545755, "epoch": 22.634892086330936, "step": 25170 }, { "epoch": 22.64388489208633, "grad_norm": 0.1496053785085678, "learning_rate": 6.706906495372987e-05, "loss": 0.0051, "step": 25180 }, { "action_loss": 0.0037240181118249893, "epoch": 22.64388489208633, "step": 25180 }, { "epoch": 22.652877697841728, "grad_norm": 0.2223612666130066, "learning_rate": 6.704316007172365e-05, "loss": 0.0065, "step": 25190 }, { "action_loss": 0.005589833948761225, "epoch": 22.652877697841728, "step": 25190 }, { "epoch": 22.66187050359712, "grad_norm": 0.22664543986320496, "learning_rate": 6.701725001245539e-05, "loss": 0.0092, "step": 25200 }, { "action_loss": 0.01876606047153473, "epoch": 22.66187050359712, "step": 25200 }, { "epoch": 22.67086330935252, "grad_norm": 0.16628271341323853, "learning_rate": 6.699133478379588e-05, "loss": 0.0064, "step": 25210 }, { "action_loss": 0.005488768219947815, "epoch": 22.67086330935252, "step": 25210 }, { "epoch": 22.679856115107913, "grad_norm": 0.1461479514837265, "learning_rate": 6.69654143936175e-05, "loss": 0.0059, "step": 25220 }, { "action_loss": 0.004341562744230032, "epoch": 22.679856115107913, "step": 25220 }, { "epoch": 22.68884892086331, "grad_norm": 0.18176475167274475, "learning_rate": 6.693948884979419e-05, "loss": 0.0078, "step": 25230 }, { "action_loss": 0.0048492527566850185, "epoch": 22.68884892086331, "step": 25230 }, { "epoch": 22.697841726618705, "grad_norm": 0.27984851598739624, "learning_rate": 6.691355816020142e-05, "loss": 0.0051, "step": 25240 }, { "action_loss": 0.0073423832654953, "epoch": 22.697841726618705, "step": 25240 }, { "epoch": 22.7068345323741, "grad_norm": 0.21961817145347595, "learning_rate": 6.688762233271624e-05, "loss": 0.0069, "step": 25250 }, { "action_loss": 0.005739209707826376, "epoch": 22.7068345323741, "step": 25250 }, { "epoch": 22.715827338129497, "grad_norm": 0.13482393324375153, "learning_rate": 6.68616813752173e-05, "loss": 0.0082, "step": 25260 }, { "action_loss": 0.0027786523569375277, "epoch": 22.715827338129497, "step": 25260 }, { "epoch": 22.72482014388489, "grad_norm": 0.182160422205925, "learning_rate": 6.683573529558477e-05, "loss": 0.0059, "step": 25270 }, { "action_loss": 0.0026588321197777987, "epoch": 22.72482014388489, "step": 25270 }, { "epoch": 22.73381294964029, "grad_norm": 0.154049351811409, "learning_rate": 6.680978410170037e-05, "loss": 0.0059, "step": 25280 }, { "action_loss": 0.0030145731288939714, "epoch": 22.73381294964029, "step": 25280 }, { "epoch": 22.742805755395683, "grad_norm": 0.1341085135936737, "learning_rate": 6.678382780144741e-05, "loss": 0.0063, "step": 25290 }, { "action_loss": 0.013444068841636181, "epoch": 22.742805755395683, "step": 25290 }, { "epoch": 22.75179856115108, "grad_norm": 0.16040809452533722, "learning_rate": 6.675786640271071e-05, "loss": 0.0055, "step": 25300 }, { "action_loss": 0.0015500813024118543, "epoch": 22.75179856115108, "step": 25300 }, { "epoch": 22.760791366906474, "grad_norm": 0.14701315760612488, "learning_rate": 6.673189991337665e-05, "loss": 0.0063, "step": 25310 }, { "action_loss": 0.005601355340331793, "epoch": 22.760791366906474, "step": 25310 }, { "epoch": 22.769784172661872, "grad_norm": 0.20537371933460236, "learning_rate": 6.670592834133317e-05, "loss": 0.0102, "step": 25320 }, { "action_loss": 0.005065672565251589, "epoch": 22.769784172661872, "step": 25320 }, { "epoch": 22.778776978417266, "grad_norm": 0.20282988250255585, "learning_rate": 6.667995169446979e-05, "loss": 0.0058, "step": 25330 }, { "action_loss": 0.007182369474321604, "epoch": 22.778776978417266, "step": 25330 }, { "epoch": 22.78776978417266, "grad_norm": 0.19175919890403748, "learning_rate": 6.665396998067747e-05, "loss": 0.0111, "step": 25340 }, { "action_loss": 0.0071662552654743195, "epoch": 22.78776978417266, "step": 25340 }, { "epoch": 22.796762589928058, "grad_norm": 0.22460147738456726, "learning_rate": 6.66279832078488e-05, "loss": 0.0058, "step": 25350 }, { "action_loss": 0.007447266951203346, "epoch": 22.796762589928058, "step": 25350 }, { "epoch": 22.805755395683452, "grad_norm": 0.20831766724586487, "learning_rate": 6.660199138387786e-05, "loss": 0.0047, "step": 25360 }, { "action_loss": 0.011249467730522156, "epoch": 22.805755395683452, "step": 25360 }, { "epoch": 22.81474820143885, "grad_norm": 0.21179147064685822, "learning_rate": 6.65759945166603e-05, "loss": 0.0104, "step": 25370 }, { "action_loss": 0.00356329046189785, "epoch": 22.81474820143885, "step": 25370 }, { "epoch": 22.823741007194243, "grad_norm": 0.17504015564918518, "learning_rate": 6.654999261409326e-05, "loss": 0.011, "step": 25380 }, { "action_loss": 0.020964229479432106, "epoch": 22.823741007194243, "step": 25380 }, { "epoch": 22.83273381294964, "grad_norm": 0.160370871424675, "learning_rate": 6.652398568407544e-05, "loss": 0.0081, "step": 25390 }, { "action_loss": 0.019145401194691658, "epoch": 22.83273381294964, "step": 25390 }, { "epoch": 22.841726618705035, "grad_norm": 0.3017446994781494, "learning_rate": 6.649797373450707e-05, "loss": 0.0099, "step": 25400 }, { "action_loss": 0.010059191845357418, "epoch": 22.841726618705035, "step": 25400 }, { "epoch": 22.850719424460433, "grad_norm": 0.11364187300205231, "learning_rate": 6.647195677328988e-05, "loss": 0.0061, "step": 25410 }, { "action_loss": 0.0024974618572741747, "epoch": 22.850719424460433, "step": 25410 }, { "epoch": 22.859712230215827, "grad_norm": 0.11117319017648697, "learning_rate": 6.644593480832712e-05, "loss": 0.0064, "step": 25420 }, { "action_loss": 0.0033497719559818506, "epoch": 22.859712230215827, "step": 25420 }, { "epoch": 22.868705035971225, "grad_norm": 0.2060272991657257, "learning_rate": 6.641990784752363e-05, "loss": 0.0076, "step": 25430 }, { "action_loss": 0.005229191854596138, "epoch": 22.868705035971225, "step": 25430 }, { "epoch": 22.87769784172662, "grad_norm": 0.20247027277946472, "learning_rate": 6.639387589878566e-05, "loss": 0.0073, "step": 25440 }, { "action_loss": 0.004508265759795904, "epoch": 22.87769784172662, "step": 25440 }, { "epoch": 22.886690647482013, "grad_norm": 0.21533088386058807, "learning_rate": 6.636783897002103e-05, "loss": 0.0065, "step": 25450 }, { "action_loss": 0.010308276861906052, "epoch": 22.886690647482013, "step": 25450 }, { "epoch": 22.89568345323741, "grad_norm": 0.24611243605613708, "learning_rate": 6.63417970691391e-05, "loss": 0.0076, "step": 25460 }, { "action_loss": 0.002924887463450432, "epoch": 22.89568345323741, "step": 25460 }, { "epoch": 22.904676258992804, "grad_norm": 0.2137889564037323, "learning_rate": 6.63157502040507e-05, "loss": 0.0058, "step": 25470 }, { "action_loss": 0.018409045413136482, "epoch": 22.904676258992804, "step": 25470 }, { "epoch": 22.913669064748202, "grad_norm": 0.1659458577632904, "learning_rate": 6.628969838266819e-05, "loss": 0.0093, "step": 25480 }, { "action_loss": 0.016628919169306755, "epoch": 22.913669064748202, "step": 25480 }, { "epoch": 22.922661870503596, "grad_norm": 0.1570654809474945, "learning_rate": 6.626364161290541e-05, "loss": 0.0073, "step": 25490 }, { "action_loss": 0.0056835138238966465, "epoch": 22.922661870503596, "step": 25490 }, { "epoch": 22.931654676258994, "grad_norm": 0.13597950339317322, "learning_rate": 6.623757990267774e-05, "loss": 0.0076, "step": 25500 }, { "action_loss": 0.011166672222316265, "epoch": 22.931654676258994, "step": 25500 }, { "epoch": 22.940647482014388, "grad_norm": 0.19811466336250305, "learning_rate": 6.621151325990201e-05, "loss": 0.0062, "step": 25510 }, { "action_loss": 0.010392846539616585, "epoch": 22.940647482014388, "step": 25510 }, { "epoch": 22.949640287769785, "grad_norm": 0.12184985727071762, "learning_rate": 6.618544169249657e-05, "loss": 0.0081, "step": 25520 }, { "action_loss": 0.0021552180405706167, "epoch": 22.949640287769785, "step": 25520 }, { "epoch": 22.95863309352518, "grad_norm": 0.16590359807014465, "learning_rate": 6.615936520838133e-05, "loss": 0.008, "step": 25530 }, { "action_loss": 0.0019282953580841422, "epoch": 22.95863309352518, "step": 25530 }, { "epoch": 22.967625899280577, "grad_norm": 0.14127396047115326, "learning_rate": 6.613328381547759e-05, "loss": 0.0041, "step": 25540 }, { "action_loss": 0.0053606475703418255, "epoch": 22.967625899280577, "step": 25540 }, { "epoch": 22.97661870503597, "grad_norm": 0.3037321865558624, "learning_rate": 6.610719752170821e-05, "loss": 0.0166, "step": 25550 }, { "action_loss": 0.003001978388056159, "epoch": 22.97661870503597, "step": 25550 }, { "epoch": 22.985611510791365, "grad_norm": 0.196933776140213, "learning_rate": 6.60811063349975e-05, "loss": 0.007, "step": 25560 }, { "action_loss": 0.003404170274734497, "epoch": 22.985611510791365, "step": 25560 }, { "epoch": 22.994604316546763, "grad_norm": 0.22942976653575897, "learning_rate": 6.605501026327127e-05, "loss": 0.0063, "step": 25570 }, { "action_loss": 0.009617277421057224, "epoch": 22.994604316546763, "step": 25570 }, { "epoch": 23.003597122302157, "grad_norm": 0.16177168488502502, "learning_rate": 6.602890931445685e-05, "loss": 0.0115, "step": 25580 }, { "action_loss": 0.008153991773724556, "epoch": 23.003597122302157, "step": 25580 }, { "epoch": 23.012589928057555, "grad_norm": 0.21570737659931183, "learning_rate": 6.6002803496483e-05, "loss": 0.0091, "step": 25590 }, { "action_loss": 0.0026050040032714605, "epoch": 23.012589928057555, "step": 25590 }, { "epoch": 23.02158273381295, "grad_norm": 0.305534303188324, "learning_rate": 6.597669281727997e-05, "loss": 0.0098, "step": 25600 }, { "action_loss": 0.01769830286502838, "epoch": 23.02158273381295, "step": 25600 }, { "epoch": 23.030575539568346, "grad_norm": 0.17155268788337708, "learning_rate": 6.595057728477949e-05, "loss": 0.0094, "step": 25610 }, { "action_loss": 0.008607055060565472, "epoch": 23.030575539568346, "step": 25610 }, { "epoch": 23.03956834532374, "grad_norm": 0.21638955175876617, "learning_rate": 6.59244569069148e-05, "loss": 0.0073, "step": 25620 }, { "action_loss": 0.014612269587814808, "epoch": 23.03956834532374, "step": 25620 }, { "epoch": 23.048561151079138, "grad_norm": 0.21442373096942902, "learning_rate": 6.589833169162054e-05, "loss": 0.0074, "step": 25630 }, { "action_loss": 0.00579841248691082, "epoch": 23.048561151079138, "step": 25630 }, { "epoch": 23.057553956834532, "grad_norm": 0.2130850851535797, "learning_rate": 6.587220164683291e-05, "loss": 0.0094, "step": 25640 }, { "action_loss": 0.011310514993965626, "epoch": 23.057553956834532, "step": 25640 }, { "epoch": 23.06654676258993, "grad_norm": 0.17699137330055237, "learning_rate": 6.58460667804895e-05, "loss": 0.0071, "step": 25650 }, { "action_loss": 0.007493786513805389, "epoch": 23.06654676258993, "step": 25650 }, { "epoch": 23.075539568345324, "grad_norm": 0.21053552627563477, "learning_rate": 6.581992710052938e-05, "loss": 0.0093, "step": 25660 }, { "action_loss": 0.006743710953742266, "epoch": 23.075539568345324, "step": 25660 }, { "epoch": 23.084532374100718, "grad_norm": 0.24042491614818573, "learning_rate": 6.579378261489311e-05, "loss": 0.0077, "step": 25670 }, { "action_loss": 0.013153756968677044, "epoch": 23.084532374100718, "step": 25670 }, { "epoch": 23.093525179856115, "grad_norm": 0.17000503838062286, "learning_rate": 6.576763333152268e-05, "loss": 0.0086, "step": 25680 }, { "action_loss": 0.009525801986455917, "epoch": 23.093525179856115, "step": 25680 }, { "epoch": 23.10251798561151, "grad_norm": 0.2355850487947464, "learning_rate": 6.574147925836159e-05, "loss": 0.0061, "step": 25690 }, { "action_loss": 0.016708338633179665, "epoch": 23.10251798561151, "step": 25690 }, { "epoch": 23.111510791366907, "grad_norm": 0.24713163077831268, "learning_rate": 6.571532040335472e-05, "loss": 0.0068, "step": 25700 }, { "action_loss": 0.002848467556759715, "epoch": 23.111510791366907, "step": 25700 }, { "epoch": 23.1205035971223, "grad_norm": 0.3123553395271301, "learning_rate": 6.568915677444845e-05, "loss": 0.0069, "step": 25710 }, { "action_loss": 0.01915588229894638, "epoch": 23.1205035971223, "step": 25710 }, { "epoch": 23.1294964028777, "grad_norm": 0.17659570276737213, "learning_rate": 6.56629883795906e-05, "loss": 0.0082, "step": 25720 }, { "action_loss": 0.005022871773689985, "epoch": 23.1294964028777, "step": 25720 }, { "epoch": 23.138489208633093, "grad_norm": 0.121311254799366, "learning_rate": 6.563681522673043e-05, "loss": 0.0068, "step": 25730 }, { "action_loss": 0.009539895690977573, "epoch": 23.138489208633093, "step": 25730 }, { "epoch": 23.14748201438849, "grad_norm": 0.24038323760032654, "learning_rate": 6.561063732381867e-05, "loss": 0.0081, "step": 25740 }, { "action_loss": 0.0062636397778987885, "epoch": 23.14748201438849, "step": 25740 }, { "epoch": 23.156474820143885, "grad_norm": 0.11750558018684387, "learning_rate": 6.558445467880745e-05, "loss": 0.0064, "step": 25750 }, { "action_loss": 0.0044312370009720325, "epoch": 23.156474820143885, "step": 25750 }, { "epoch": 23.165467625899282, "grad_norm": 0.17958366870880127, "learning_rate": 6.55582672996504e-05, "loss": 0.0068, "step": 25760 }, { "action_loss": 0.004670004826039076, "epoch": 23.165467625899282, "step": 25760 }, { "epoch": 23.174460431654676, "grad_norm": 0.20511792600154877, "learning_rate": 6.553207519430253e-05, "loss": 0.0061, "step": 25770 }, { "action_loss": 0.007833369076251984, "epoch": 23.174460431654676, "step": 25770 }, { "epoch": 23.18345323741007, "grad_norm": 0.09098143875598907, "learning_rate": 6.550587837072032e-05, "loss": 0.008, "step": 25780 }, { "action_loss": 0.011267437599599361, "epoch": 23.18345323741007, "step": 25780 }, { "epoch": 23.192446043165468, "grad_norm": 0.12897925078868866, "learning_rate": 6.547967683686166e-05, "loss": 0.0074, "step": 25790 }, { "action_loss": 0.007474867627024651, "epoch": 23.192446043165468, "step": 25790 }, { "epoch": 23.201438848920862, "grad_norm": 0.13759845495224, "learning_rate": 6.545347060068591e-05, "loss": 0.0049, "step": 25800 }, { "action_loss": 0.005593905691057444, "epoch": 23.201438848920862, "step": 25800 }, { "epoch": 23.21043165467626, "grad_norm": 0.20175805687904358, "learning_rate": 6.542725967015382e-05, "loss": 0.0072, "step": 25810 }, { "action_loss": 0.002638584701344371, "epoch": 23.21043165467626, "step": 25810 }, { "epoch": 23.219424460431654, "grad_norm": 0.22185632586479187, "learning_rate": 6.540104405322757e-05, "loss": 0.0048, "step": 25820 }, { "action_loss": 0.0038144588470458984, "epoch": 23.219424460431654, "step": 25820 }, { "epoch": 23.22841726618705, "grad_norm": 0.12406900525093079, "learning_rate": 6.537482375787077e-05, "loss": 0.0087, "step": 25830 }, { "action_loss": 0.003700933651998639, "epoch": 23.22841726618705, "step": 25830 }, { "epoch": 23.237410071942445, "grad_norm": 0.24841922521591187, "learning_rate": 6.534859879204845e-05, "loss": 0.0084, "step": 25840 }, { "action_loss": 0.013388384133577347, "epoch": 23.237410071942445, "step": 25840 }, { "epoch": 23.246402877697843, "grad_norm": 0.18611714243888855, "learning_rate": 6.532236916372709e-05, "loss": 0.0073, "step": 25850 }, { "action_loss": 0.006374584510922432, "epoch": 23.246402877697843, "step": 25850 }, { "epoch": 23.255395683453237, "grad_norm": 0.1813172996044159, "learning_rate": 6.529613488087454e-05, "loss": 0.006, "step": 25860 }, { "action_loss": 0.002507203258574009, "epoch": 23.255395683453237, "step": 25860 }, { "epoch": 23.264388489208635, "grad_norm": 0.23571789264678955, "learning_rate": 6.526989595146009e-05, "loss": 0.0052, "step": 25870 }, { "action_loss": 0.003712643636390567, "epoch": 23.264388489208635, "step": 25870 }, { "epoch": 23.27338129496403, "grad_norm": 0.10846464335918427, "learning_rate": 6.524365238345441e-05, "loss": 0.0052, "step": 25880 }, { "action_loss": 0.01681937463581562, "epoch": 23.27338129496403, "step": 25880 }, { "epoch": 23.282374100719423, "grad_norm": 0.144648477435112, "learning_rate": 6.521740418482964e-05, "loss": 0.0075, "step": 25890 }, { "action_loss": 0.004674294497817755, "epoch": 23.282374100719423, "step": 25890 }, { "epoch": 23.29136690647482, "grad_norm": 0.1599372923374176, "learning_rate": 6.519115136355925e-05, "loss": 0.0067, "step": 25900 }, { "action_loss": 0.008591997437179089, "epoch": 23.29136690647482, "step": 25900 }, { "epoch": 23.300359712230215, "grad_norm": 0.12603536248207092, "learning_rate": 6.51648939276182e-05, "loss": 0.0068, "step": 25910 }, { "action_loss": 0.013803903013467789, "epoch": 23.300359712230215, "step": 25910 }, { "epoch": 23.309352517985612, "grad_norm": 0.1625729203224182, "learning_rate": 6.513863188498277e-05, "loss": 0.0064, "step": 25920 }, { "action_loss": 0.0036650430411100388, "epoch": 23.309352517985612, "step": 25920 }, { "epoch": 23.318345323741006, "grad_norm": 0.16697999835014343, "learning_rate": 6.511236524363068e-05, "loss": 0.0059, "step": 25930 }, { "action_loss": 0.007273148745298386, "epoch": 23.318345323741006, "step": 25930 }, { "epoch": 23.327338129496404, "grad_norm": 0.15562020242214203, "learning_rate": 6.508609401154104e-05, "loss": 0.0073, "step": 25940 }, { "action_loss": 0.0021960437297821045, "epoch": 23.327338129496404, "step": 25940 }, { "epoch": 23.336330935251798, "grad_norm": 0.11927028000354767, "learning_rate": 6.505981819669439e-05, "loss": 0.0072, "step": 25950 }, { "action_loss": 0.01756816916167736, "epoch": 23.336330935251798, "step": 25950 }, { "epoch": 23.345323741007196, "grad_norm": 0.15839827060699463, "learning_rate": 6.503353780707258e-05, "loss": 0.0086, "step": 25960 }, { "action_loss": 0.0030785761773586273, "epoch": 23.345323741007196, "step": 25960 }, { "epoch": 23.35431654676259, "grad_norm": 0.1945168375968933, "learning_rate": 6.500725285065895e-05, "loss": 0.0074, "step": 25970 }, { "action_loss": 0.008338036946952343, "epoch": 23.35431654676259, "step": 25970 }, { "epoch": 23.363309352517987, "grad_norm": 0.22957438230514526, "learning_rate": 6.498096333543813e-05, "loss": 0.0086, "step": 25980 }, { "action_loss": 0.0028473520651459694, "epoch": 23.363309352517987, "step": 25980 }, { "epoch": 23.37230215827338, "grad_norm": 0.2062073051929474, "learning_rate": 6.49546692693962e-05, "loss": 0.0047, "step": 25990 }, { "action_loss": 0.011846408247947693, "epoch": 23.37230215827338, "step": 25990 }, { "epoch": 23.381294964028775, "grad_norm": 0.18735769391059875, "learning_rate": 6.492837066052059e-05, "loss": 0.0099, "step": 26000 }, { "action_loss": 0.0033527191262692213, "epoch": 23.381294964028775, "step": 26000 }, { "epoch": 23.390287769784173, "grad_norm": 0.26462647318840027, "learning_rate": 6.490206751680014e-05, "loss": 0.01, "step": 26010 }, { "action_loss": 0.0021455571986734867, "epoch": 23.390287769784173, "step": 26010 }, { "epoch": 23.399280575539567, "grad_norm": 0.15053246915340424, "learning_rate": 6.487575984622505e-05, "loss": 0.0105, "step": 26020 }, { "action_loss": 0.008739530108869076, "epoch": 23.399280575539567, "step": 26020 }, { "epoch": 23.408273381294965, "grad_norm": 0.15939372777938843, "learning_rate": 6.484944765678689e-05, "loss": 0.0072, "step": 26030 }, { "action_loss": 0.005049699917435646, "epoch": 23.408273381294965, "step": 26030 }, { "epoch": 23.41726618705036, "grad_norm": 0.1715230643749237, "learning_rate": 6.482313095647861e-05, "loss": 0.007, "step": 26040 }, { "action_loss": 0.008892064914107323, "epoch": 23.41726618705036, "step": 26040 }, { "epoch": 23.426258992805757, "grad_norm": 0.17297221720218658, "learning_rate": 6.479680975329451e-05, "loss": 0.009, "step": 26050 }, { "action_loss": 0.0030787617433816195, "epoch": 23.426258992805757, "step": 26050 }, { "epoch": 23.43525179856115, "grad_norm": 0.15224182605743408, "learning_rate": 6.477048405523031e-05, "loss": 0.0068, "step": 26060 }, { "action_loss": 0.0017565869493409991, "epoch": 23.43525179856115, "step": 26060 }, { "epoch": 23.444244604316548, "grad_norm": 0.16950435936450958, "learning_rate": 6.474415387028304e-05, "loss": 0.0082, "step": 26070 }, { "action_loss": 0.0049308291636407375, "epoch": 23.444244604316548, "step": 26070 }, { "epoch": 23.453237410071942, "grad_norm": 0.19186779856681824, "learning_rate": 6.471781920645114e-05, "loss": 0.0052, "step": 26080 }, { "action_loss": 0.010149862617254257, "epoch": 23.453237410071942, "step": 26080 }, { "epoch": 23.46223021582734, "grad_norm": 0.2013535052537918, "learning_rate": 6.469148007173434e-05, "loss": 0.008, "step": 26090 }, { "action_loss": 0.0037928142119199038, "epoch": 23.46223021582734, "step": 26090 }, { "epoch": 23.471223021582734, "grad_norm": 0.18151600658893585, "learning_rate": 6.466513647413381e-05, "loss": 0.0079, "step": 26100 }, { "action_loss": 0.004668459761887789, "epoch": 23.471223021582734, "step": 26100 }, { "epoch": 23.480215827338128, "grad_norm": 0.18460704386234283, "learning_rate": 6.463878842165203e-05, "loss": 0.0073, "step": 26110 }, { "action_loss": 0.002861914224922657, "epoch": 23.480215827338128, "step": 26110 }, { "epoch": 23.489208633093526, "grad_norm": 0.1446414291858673, "learning_rate": 6.461243592229286e-05, "loss": 0.0066, "step": 26120 }, { "action_loss": 0.002122667618095875, "epoch": 23.489208633093526, "step": 26120 }, { "epoch": 23.49820143884892, "grad_norm": 0.11490065604448318, "learning_rate": 6.458607898406146e-05, "loss": 0.0042, "step": 26130 }, { "action_loss": 0.005168154835700989, "epoch": 23.49820143884892, "step": 26130 }, { "epoch": 23.507194244604317, "grad_norm": 0.2060752809047699, "learning_rate": 6.455971761496439e-05, "loss": 0.0043, "step": 26140 }, { "action_loss": 0.003685617120936513, "epoch": 23.507194244604317, "step": 26140 }, { "epoch": 23.51618705035971, "grad_norm": 0.16670259833335876, "learning_rate": 6.453335182300953e-05, "loss": 0.0053, "step": 26150 }, { "action_loss": 0.0075791687704622746, "epoch": 23.51618705035971, "step": 26150 }, { "epoch": 23.52517985611511, "grad_norm": 0.13710521161556244, "learning_rate": 6.450698161620612e-05, "loss": 0.0055, "step": 26160 }, { "action_loss": 0.0022754815872758627, "epoch": 23.52517985611511, "step": 26160 }, { "epoch": 23.534172661870503, "grad_norm": 0.25661200284957886, "learning_rate": 6.448060700256473e-05, "loss": 0.0111, "step": 26170 }, { "action_loss": 0.003253682516515255, "epoch": 23.534172661870503, "step": 26170 }, { "epoch": 23.5431654676259, "grad_norm": 0.11017633229494095, "learning_rate": 6.445422799009726e-05, "loss": 0.0048, "step": 26180 }, { "action_loss": 0.004237747751176357, "epoch": 23.5431654676259, "step": 26180 }, { "epoch": 23.552158273381295, "grad_norm": 0.26733842492103577, "learning_rate": 6.442784458681699e-05, "loss": 0.0077, "step": 26190 }, { "action_loss": 0.00425491901114583, "epoch": 23.552158273381295, "step": 26190 }, { "epoch": 23.56115107913669, "grad_norm": 0.20402055978775024, "learning_rate": 6.440145680073847e-05, "loss": 0.0065, "step": 26200 }, { "action_loss": 0.004019901156425476, "epoch": 23.56115107913669, "step": 26200 }, { "epoch": 23.570143884892087, "grad_norm": 0.2079840749502182, "learning_rate": 6.437506463987762e-05, "loss": 0.0093, "step": 26210 }, { "action_loss": 0.007857399992644787, "epoch": 23.570143884892087, "step": 26210 }, { "epoch": 23.57913669064748, "grad_norm": 0.14626489579677582, "learning_rate": 6.434866811225168e-05, "loss": 0.005, "step": 26220 }, { "action_loss": 0.0038379747420549393, "epoch": 23.57913669064748, "step": 26220 }, { "epoch": 23.58812949640288, "grad_norm": 0.1819944679737091, "learning_rate": 6.432226722587923e-05, "loss": 0.0079, "step": 26230 }, { "action_loss": 0.006439406890422106, "epoch": 23.58812949640288, "step": 26230 }, { "epoch": 23.597122302158272, "grad_norm": 0.17903123795986176, "learning_rate": 6.429586198878015e-05, "loss": 0.0075, "step": 26240 }, { "action_loss": 0.08116664737462997, "epoch": 23.597122302158272, "step": 26240 }, { "epoch": 23.60611510791367, "grad_norm": 0.17144116759300232, "learning_rate": 6.426945240897566e-05, "loss": 0.0136, "step": 26250 }, { "action_loss": 0.012109808623790741, "epoch": 23.60611510791367, "step": 26250 }, { "epoch": 23.615107913669064, "grad_norm": 0.1471407413482666, "learning_rate": 6.424303849448829e-05, "loss": 0.0068, "step": 26260 }, { "action_loss": 0.002007201546803117, "epoch": 23.615107913669064, "step": 26260 }, { "epoch": 23.62410071942446, "grad_norm": 0.19666828215122223, "learning_rate": 6.42166202533419e-05, "loss": 0.0063, "step": 26270 }, { "action_loss": 0.0029909054283052683, "epoch": 23.62410071942446, "step": 26270 }, { "epoch": 23.633093525179856, "grad_norm": 0.16711944341659546, "learning_rate": 6.419019769356164e-05, "loss": 0.0056, "step": 26280 }, { "action_loss": 0.007120687048882246, "epoch": 23.633093525179856, "step": 26280 }, { "epoch": 23.642086330935253, "grad_norm": 0.23754458129405975, "learning_rate": 6.416377082317398e-05, "loss": 0.0066, "step": 26290 }, { "action_loss": 0.004515997599810362, "epoch": 23.642086330935253, "step": 26290 }, { "epoch": 23.651079136690647, "grad_norm": 0.22380876541137695, "learning_rate": 6.413733965020674e-05, "loss": 0.0063, "step": 26300 }, { "action_loss": 0.010014165192842484, "epoch": 23.651079136690647, "step": 26300 }, { "epoch": 23.66007194244604, "grad_norm": 0.18316639959812164, "learning_rate": 6.411090418268896e-05, "loss": 0.005, "step": 26310 }, { "action_loss": 0.0035199541598558426, "epoch": 23.66007194244604, "step": 26310 }, { "epoch": 23.66906474820144, "grad_norm": 0.18718074262142181, "learning_rate": 6.408446442865109e-05, "loss": 0.0048, "step": 26320 }, { "action_loss": 0.002865511691197753, "epoch": 23.66906474820144, "step": 26320 }, { "epoch": 23.678057553956833, "grad_norm": 0.1580868512392044, "learning_rate": 6.405802039612479e-05, "loss": 0.008, "step": 26330 }, { "action_loss": 0.0030180674511939287, "epoch": 23.678057553956833, "step": 26330 }, { "epoch": 23.68705035971223, "grad_norm": 0.1903744786977768, "learning_rate": 6.403157209314308e-05, "loss": 0.0078, "step": 26340 }, { "action_loss": 0.007025512401014566, "epoch": 23.68705035971223, "step": 26340 }, { "epoch": 23.696043165467625, "grad_norm": 0.137442484498024, "learning_rate": 6.400511952774024e-05, "loss": 0.0066, "step": 26350 }, { "action_loss": 0.007002606522291899, "epoch": 23.696043165467625, "step": 26350 }, { "epoch": 23.705035971223023, "grad_norm": 0.135738343000412, "learning_rate": 6.397866270795187e-05, "loss": 0.0069, "step": 26360 }, { "action_loss": 0.005191440228372812, "epoch": 23.705035971223023, "step": 26360 }, { "epoch": 23.714028776978417, "grad_norm": 0.19550731778144836, "learning_rate": 6.395220164181489e-05, "loss": 0.0055, "step": 26370 }, { "action_loss": 0.013365724124014378, "epoch": 23.714028776978417, "step": 26370 }, { "epoch": 23.723021582733814, "grad_norm": 0.18997442722320557, "learning_rate": 6.39257363373674e-05, "loss": 0.0056, "step": 26380 }, { "action_loss": 0.0059707327745854855, "epoch": 23.723021582733814, "step": 26380 }, { "epoch": 23.73201438848921, "grad_norm": 0.2314041256904602, "learning_rate": 6.389926680264892e-05, "loss": 0.0092, "step": 26390 }, { "action_loss": 0.005274735391139984, "epoch": 23.73201438848921, "step": 26390 }, { "epoch": 23.741007194244606, "grad_norm": 0.19691155850887299, "learning_rate": 6.387279304570017e-05, "loss": 0.0117, "step": 26400 }, { "action_loss": 0.0037266502622514963, "epoch": 23.741007194244606, "step": 26400 }, { "epoch": 23.75, "grad_norm": 0.1531476080417633, "learning_rate": 6.384631507456319e-05, "loss": 0.0077, "step": 26410 }, { "action_loss": 0.005449966993182898, "epoch": 23.75, "step": 26410 }, { "epoch": 23.758992805755394, "grad_norm": 0.22754043340682983, "learning_rate": 6.381983289728126e-05, "loss": 0.0068, "step": 26420 }, { "action_loss": 0.0031229157466441393, "epoch": 23.758992805755394, "step": 26420 }, { "epoch": 23.76798561151079, "grad_norm": 0.16961069405078888, "learning_rate": 6.3793346521899e-05, "loss": 0.0078, "step": 26430 }, { "action_loss": 0.005889866966754198, "epoch": 23.76798561151079, "step": 26430 }, { "epoch": 23.776978417266186, "grad_norm": 0.2265051007270813, "learning_rate": 6.376685595646226e-05, "loss": 0.0078, "step": 26440 }, { "action_loss": 0.017656756564974785, "epoch": 23.776978417266186, "step": 26440 }, { "epoch": 23.785971223021583, "grad_norm": 0.1799442023038864, "learning_rate": 6.374036120901816e-05, "loss": 0.0096, "step": 26450 }, { "action_loss": 0.011066923849284649, "epoch": 23.785971223021583, "step": 26450 }, { "epoch": 23.794964028776977, "grad_norm": 0.2598797380924225, "learning_rate": 6.371386228761514e-05, "loss": 0.007, "step": 26460 }, { "action_loss": 0.0066643753089010715, "epoch": 23.794964028776977, "step": 26460 }, { "epoch": 23.803956834532375, "grad_norm": 0.29125502705574036, "learning_rate": 6.368735920030283e-05, "loss": 0.0103, "step": 26470 }, { "action_loss": 0.004755785223096609, "epoch": 23.803956834532375, "step": 26470 }, { "epoch": 23.81294964028777, "grad_norm": 0.15530356764793396, "learning_rate": 6.366085195513218e-05, "loss": 0.0096, "step": 26480 }, { "action_loss": 0.005403988528996706, "epoch": 23.81294964028777, "step": 26480 }, { "epoch": 23.821942446043167, "grad_norm": 0.1750209778547287, "learning_rate": 6.363434056015543e-05, "loss": 0.013, "step": 26490 }, { "action_loss": 0.007606126833707094, "epoch": 23.821942446043167, "step": 26490 }, { "epoch": 23.83093525179856, "grad_norm": 0.13678397238254547, "learning_rate": 6.360782502342599e-05, "loss": 0.0057, "step": 26500 }, { "action_loss": 0.003239157609641552, "epoch": 23.83093525179856, "step": 26500 }, { "epoch": 23.83992805755396, "grad_norm": 0.21588382124900818, "learning_rate": 6.358130535299862e-05, "loss": 0.0088, "step": 26510 }, { "action_loss": 0.003184481291100383, "epoch": 23.83992805755396, "step": 26510 }, { "epoch": 23.848920863309353, "grad_norm": 0.1835767924785614, "learning_rate": 6.355478155692926e-05, "loss": 0.0087, "step": 26520 }, { "action_loss": 0.00674862926825881, "epoch": 23.848920863309353, "step": 26520 }, { "epoch": 23.857913669064747, "grad_norm": 0.19586634635925293, "learning_rate": 6.352825364327517e-05, "loss": 0.0066, "step": 26530 }, { "action_loss": 0.01468248013406992, "epoch": 23.857913669064747, "step": 26530 }, { "epoch": 23.866906474820144, "grad_norm": 0.1551462709903717, "learning_rate": 6.350172162009482e-05, "loss": 0.0106, "step": 26540 }, { "action_loss": 0.004236435052007437, "epoch": 23.866906474820144, "step": 26540 }, { "epoch": 23.87589928057554, "grad_norm": 0.3288169801235199, "learning_rate": 6.347518549544793e-05, "loss": 0.0079, "step": 26550 }, { "action_loss": 0.0024800533428788185, "epoch": 23.87589928057554, "step": 26550 }, { "epoch": 23.884892086330936, "grad_norm": 0.2239164263010025, "learning_rate": 6.344864527739547e-05, "loss": 0.0054, "step": 26560 }, { "action_loss": 0.007363034877926111, "epoch": 23.884892086330936, "step": 26560 }, { "epoch": 23.89388489208633, "grad_norm": 0.15586252510547638, "learning_rate": 6.342210097399966e-05, "loss": 0.0073, "step": 26570 }, { "action_loss": 0.010196668095886707, "epoch": 23.89388489208633, "step": 26570 }, { "epoch": 23.902877697841728, "grad_norm": 0.17147360742092133, "learning_rate": 6.339555259332398e-05, "loss": 0.0125, "step": 26580 }, { "action_loss": 0.008406589739024639, "epoch": 23.902877697841728, "step": 26580 }, { "epoch": 23.91187050359712, "grad_norm": 0.2530252933502197, "learning_rate": 6.33690001434331e-05, "loss": 0.0132, "step": 26590 }, { "action_loss": 0.014356805942952633, "epoch": 23.91187050359712, "step": 26590 }, { "epoch": 23.92086330935252, "grad_norm": 0.2764173150062561, "learning_rate": 6.334244363239296e-05, "loss": 0.0086, "step": 26600 }, { "action_loss": 0.005398759618401527, "epoch": 23.92086330935252, "step": 26600 }, { "epoch": 23.929856115107913, "grad_norm": 0.18750743567943573, "learning_rate": 6.331588306827073e-05, "loss": 0.0061, "step": 26610 }, { "action_loss": 0.003869471838697791, "epoch": 23.929856115107913, "step": 26610 }, { "epoch": 23.93884892086331, "grad_norm": 0.10648311674594879, "learning_rate": 6.328931845913483e-05, "loss": 0.0058, "step": 26620 }, { "action_loss": 0.0036035235971212387, "epoch": 23.93884892086331, "step": 26620 }, { "epoch": 23.947841726618705, "grad_norm": 0.16321855783462524, "learning_rate": 6.326274981305484e-05, "loss": 0.0052, "step": 26630 }, { "action_loss": 0.007855335250496864, "epoch": 23.947841726618705, "step": 26630 }, { "epoch": 23.9568345323741, "grad_norm": 0.22009579837322235, "learning_rate": 6.323617713810166e-05, "loss": 0.0089, "step": 26640 }, { "action_loss": 0.002715133363381028, "epoch": 23.9568345323741, "step": 26640 }, { "epoch": 23.965827338129497, "grad_norm": 0.14568863809108734, "learning_rate": 6.320960044234734e-05, "loss": 0.0066, "step": 26650 }, { "action_loss": 0.005918632727116346, "epoch": 23.965827338129497, "step": 26650 }, { "epoch": 23.97482014388489, "grad_norm": 0.23680615425109863, "learning_rate": 6.318301973386518e-05, "loss": 0.012, "step": 26660 }, { "action_loss": 0.00967490952461958, "epoch": 23.97482014388489, "step": 26660 }, { "epoch": 23.98381294964029, "grad_norm": 0.25847887992858887, "learning_rate": 6.315643502072971e-05, "loss": 0.0082, "step": 26670 }, { "action_loss": 0.0032490305602550507, "epoch": 23.98381294964029, "step": 26670 }, { "epoch": 23.992805755395683, "grad_norm": 0.14884215593338013, "learning_rate": 6.312984631101667e-05, "loss": 0.0056, "step": 26680 }, { "action_loss": 0.0035201471764594316, "epoch": 23.992805755395683, "step": 26680 }, { "epoch": 24.00179856115108, "grad_norm": 0.1710968166589737, "learning_rate": 6.310325361280297e-05, "loss": 0.0053, "step": 26690 }, { "action_loss": 0.002570182317867875, "epoch": 24.00179856115108, "step": 26690 }, { "epoch": 24.010791366906474, "grad_norm": 0.1624165177345276, "learning_rate": 6.30766569341668e-05, "loss": 0.0049, "step": 26700 }, { "action_loss": 0.0030122266616672277, "epoch": 24.010791366906474, "step": 26700 }, { "epoch": 24.019784172661872, "grad_norm": 0.15059679746627808, "learning_rate": 6.305005628318753e-05, "loss": 0.0077, "step": 26710 }, { "action_loss": 0.004212601576000452, "epoch": 24.019784172661872, "step": 26710 }, { "epoch": 24.028776978417266, "grad_norm": 0.10761700570583344, "learning_rate": 6.302345166794572e-05, "loss": 0.0056, "step": 26720 }, { "action_loss": 0.009943044744431973, "epoch": 24.028776978417266, "step": 26720 }, { "epoch": 24.037769784172664, "grad_norm": 0.08567365258932114, "learning_rate": 6.299684309652316e-05, "loss": 0.0065, "step": 26730 }, { "action_loss": 0.002054210053756833, "epoch": 24.037769784172664, "step": 26730 }, { "epoch": 24.046762589928058, "grad_norm": 0.16927345097064972, "learning_rate": 6.297023057700283e-05, "loss": 0.0051, "step": 26740 }, { "action_loss": 0.02221936732530594, "epoch": 24.046762589928058, "step": 26740 }, { "epoch": 24.055755395683452, "grad_norm": 0.12161923944950104, "learning_rate": 6.294361411746891e-05, "loss": 0.0068, "step": 26750 }, { "action_loss": 0.005396475549787283, "epoch": 24.055755395683452, "step": 26750 }, { "epoch": 24.06474820143885, "grad_norm": 0.12005402147769928, "learning_rate": 6.291699372600677e-05, "loss": 0.0065, "step": 26760 }, { "action_loss": 0.005229733884334564, "epoch": 24.06474820143885, "step": 26760 }, { "epoch": 24.073741007194243, "grad_norm": 0.2288879007101059, "learning_rate": 6.2890369410703e-05, "loss": 0.0093, "step": 26770 }, { "action_loss": 0.003331375541165471, "epoch": 24.073741007194243, "step": 26770 }, { "epoch": 24.08273381294964, "grad_norm": 0.15614676475524902, "learning_rate": 6.286374117964534e-05, "loss": 0.0067, "step": 26780 }, { "action_loss": 0.004131868947297335, "epoch": 24.08273381294964, "step": 26780 }, { "epoch": 24.091726618705035, "grad_norm": 0.151165172457695, "learning_rate": 6.283710904092277e-05, "loss": 0.0059, "step": 26790 }, { "action_loss": 0.01206302922219038, "epoch": 24.091726618705035, "step": 26790 }, { "epoch": 24.100719424460433, "grad_norm": 0.20732292532920837, "learning_rate": 6.281047300262542e-05, "loss": 0.0062, "step": 26800 }, { "action_loss": 0.004182090517133474, "epoch": 24.100719424460433, "step": 26800 }, { "epoch": 24.109712230215827, "grad_norm": 0.3230375349521637, "learning_rate": 6.278383307284461e-05, "loss": 0.0074, "step": 26810 }, { "action_loss": 0.005773420911282301, "epoch": 24.109712230215827, "step": 26810 }, { "epoch": 24.118705035971225, "grad_norm": 0.11760028451681137, "learning_rate": 6.275718925967284e-05, "loss": 0.0062, "step": 26820 }, { "action_loss": 0.01016550324857235, "epoch": 24.118705035971225, "step": 26820 }, { "epoch": 24.12769784172662, "grad_norm": 0.2214633822441101, "learning_rate": 6.273054157120382e-05, "loss": 0.0075, "step": 26830 }, { "action_loss": 0.006248770747333765, "epoch": 24.12769784172662, "step": 26830 }, { "epoch": 24.136690647482013, "grad_norm": 0.199753075838089, "learning_rate": 6.270389001553238e-05, "loss": 0.0074, "step": 26840 }, { "action_loss": 0.004700257908552885, "epoch": 24.136690647482013, "step": 26840 }, { "epoch": 24.14568345323741, "grad_norm": 0.16346916556358337, "learning_rate": 6.26772346007546e-05, "loss": 0.0084, "step": 26850 }, { "action_loss": 0.010172552429139614, "epoch": 24.14568345323741, "step": 26850 }, { "epoch": 24.154676258992804, "grad_norm": 0.20086364448070526, "learning_rate": 6.265057533496767e-05, "loss": 0.0076, "step": 26860 }, { "action_loss": 0.005728188902139664, "epoch": 24.154676258992804, "step": 26860 }, { "epoch": 24.163669064748202, "grad_norm": 0.27376511693000793, "learning_rate": 6.262391222626997e-05, "loss": 0.0093, "step": 26870 }, { "action_loss": 0.0036973869428038597, "epoch": 24.163669064748202, "step": 26870 }, { "epoch": 24.172661870503596, "grad_norm": 0.14069390296936035, "learning_rate": 6.259724528276106e-05, "loss": 0.012, "step": 26880 }, { "action_loss": 0.007021488156169653, "epoch": 24.172661870503596, "step": 26880 }, { "epoch": 24.181654676258994, "grad_norm": 0.2366764396429062, "learning_rate": 6.257057451254162e-05, "loss": 0.0066, "step": 26890 }, { "action_loss": 0.005297382827848196, "epoch": 24.181654676258994, "step": 26890 }, { "epoch": 24.190647482014388, "grad_norm": 0.18841424584388733, "learning_rate": 6.254389992371357e-05, "loss": 0.0067, "step": 26900 }, { "action_loss": 0.019571542739868164, "epoch": 24.190647482014388, "step": 26900 }, { "epoch": 24.199640287769785, "grad_norm": 0.1735343188047409, "learning_rate": 6.25172215243799e-05, "loss": 0.0077, "step": 26910 }, { "action_loss": 0.005661568138748407, "epoch": 24.199640287769785, "step": 26910 }, { "epoch": 24.20863309352518, "grad_norm": 0.16007918119430542, "learning_rate": 6.249053932264486e-05, "loss": 0.009, "step": 26920 }, { "action_loss": 0.030323302373290062, "epoch": 24.20863309352518, "step": 26920 }, { "epoch": 24.217625899280577, "grad_norm": 0.15092985332012177, "learning_rate": 6.246385332661376e-05, "loss": 0.0073, "step": 26930 }, { "action_loss": 0.016815589740872383, "epoch": 24.217625899280577, "step": 26930 }, { "epoch": 24.22661870503597, "grad_norm": 0.15332980453968048, "learning_rate": 6.24371635443931e-05, "loss": 0.0083, "step": 26940 }, { "action_loss": 0.0035382185596972704, "epoch": 24.22661870503597, "step": 26940 }, { "epoch": 24.235611510791365, "grad_norm": 0.13876430690288544, "learning_rate": 6.241046998409054e-05, "loss": 0.004, "step": 26950 }, { "action_loss": 0.004919067490845919, "epoch": 24.235611510791365, "step": 26950 }, { "epoch": 24.244604316546763, "grad_norm": 0.2076234519481659, "learning_rate": 6.238377265381489e-05, "loss": 0.0063, "step": 26960 }, { "action_loss": 0.0054917712695896626, "epoch": 24.244604316546763, "step": 26960 }, { "epoch": 24.253597122302157, "grad_norm": 0.20901159942150116, "learning_rate": 6.235707156167607e-05, "loss": 0.0112, "step": 26970 }, { "action_loss": 0.008129957132041454, "epoch": 24.253597122302157, "step": 26970 }, { "epoch": 24.262589928057555, "grad_norm": 0.14947108924388885, "learning_rate": 6.233036671578519e-05, "loss": 0.0107, "step": 26980 }, { "action_loss": 0.034011539071798325, "epoch": 24.262589928057555, "step": 26980 }, { "epoch": 24.27158273381295, "grad_norm": 0.16297507286071777, "learning_rate": 6.230365812425445e-05, "loss": 0.0078, "step": 26990 }, { "action_loss": 0.014660525135695934, "epoch": 24.27158273381295, "step": 26990 }, { "epoch": 24.280575539568346, "grad_norm": 0.14921040832996368, "learning_rate": 6.227694579519724e-05, "loss": 0.0089, "step": 27000 }, { "action_loss": 0.013552593998610973, "epoch": 24.280575539568346, "step": 27000 }, { "epoch": 24.28956834532374, "grad_norm": 0.11742975562810898, "learning_rate": 6.225022973672805e-05, "loss": 0.0075, "step": 27010 }, { "action_loss": 0.03357116878032684, "epoch": 24.28956834532374, "step": 27010 }, { "epoch": 24.298561151079138, "grad_norm": 0.231123149394989, "learning_rate": 6.222350995696253e-05, "loss": 0.0117, "step": 27020 }, { "action_loss": 0.010401152074337006, "epoch": 24.298561151079138, "step": 27020 }, { "epoch": 24.307553956834532, "grad_norm": 0.21366530656814575, "learning_rate": 6.21967864640174e-05, "loss": 0.0057, "step": 27030 }, { "action_loss": 0.003209909424185753, "epoch": 24.307553956834532, "step": 27030 }, { "epoch": 24.31654676258993, "grad_norm": 0.20492470264434814, "learning_rate": 6.217005926601059e-05, "loss": 0.0048, "step": 27040 }, { "action_loss": 0.017653964459896088, "epoch": 24.31654676258993, "step": 27040 }, { "epoch": 24.325539568345324, "grad_norm": 0.19387950003147125, "learning_rate": 6.214332837106111e-05, "loss": 0.0054, "step": 27050 }, { "action_loss": 0.005160643719136715, "epoch": 24.325539568345324, "step": 27050 }, { "epoch": 24.334532374100718, "grad_norm": 0.1272025853395462, "learning_rate": 6.21165937872891e-05, "loss": 0.0061, "step": 27060 }, { "action_loss": 0.025145409628748894, "epoch": 24.334532374100718, "step": 27060 }, { "epoch": 24.343525179856115, "grad_norm": 0.2553445100784302, "learning_rate": 6.208985552281582e-05, "loss": 0.0087, "step": 27070 }, { "action_loss": 0.007860440760850906, "epoch": 24.343525179856115, "step": 27070 }, { "epoch": 24.35251798561151, "grad_norm": 0.22471553087234497, "learning_rate": 6.206311358576364e-05, "loss": 0.0129, "step": 27080 }, { "action_loss": 0.004864891059696674, "epoch": 24.35251798561151, "step": 27080 }, { "epoch": 24.361510791366907, "grad_norm": 0.14965270459651947, "learning_rate": 6.203636798425608e-05, "loss": 0.0071, "step": 27090 }, { "action_loss": 0.009400002658367157, "epoch": 24.361510791366907, "step": 27090 }, { "epoch": 24.3705035971223, "grad_norm": 0.2761859893798828, "learning_rate": 6.20096187264177e-05, "loss": 0.0093, "step": 27100 }, { "action_loss": 0.01018498558551073, "epoch": 24.3705035971223, "step": 27100 }, { "epoch": 24.3794964028777, "grad_norm": 0.17277827858924866, "learning_rate": 6.198286582037425e-05, "loss": 0.0075, "step": 27110 }, { "action_loss": 0.009521705098450184, "epoch": 24.3794964028777, "step": 27110 }, { "epoch": 24.388489208633093, "grad_norm": 0.2416364550590515, "learning_rate": 6.195610927425256e-05, "loss": 0.0101, "step": 27120 }, { "action_loss": 0.005032530054450035, "epoch": 24.388489208633093, "step": 27120 }, { "epoch": 24.39748201438849, "grad_norm": 0.20154063403606415, "learning_rate": 6.192934909618056e-05, "loss": 0.0045, "step": 27130 }, { "action_loss": 0.005304480437189341, "epoch": 24.39748201438849, "step": 27130 }, { "epoch": 24.406474820143885, "grad_norm": 0.1218758225440979, "learning_rate": 6.190258529428728e-05, "loss": 0.0064, "step": 27140 }, { "action_loss": 0.08168824017047882, "epoch": 24.406474820143885, "step": 27140 }, { "epoch": 24.415467625899282, "grad_norm": 0.1344750076532364, "learning_rate": 6.187581787670285e-05, "loss": 0.0148, "step": 27150 }, { "action_loss": 0.0043977792374789715, "epoch": 24.415467625899282, "step": 27150 }, { "epoch": 24.424460431654676, "grad_norm": 0.2504812777042389, "learning_rate": 6.184904685155852e-05, "loss": 0.0071, "step": 27160 }, { "action_loss": 0.011011180467903614, "epoch": 24.424460431654676, "step": 27160 }, { "epoch": 24.43345323741007, "grad_norm": 0.16069146990776062, "learning_rate": 6.18222722269866e-05, "loss": 0.0089, "step": 27170 }, { "action_loss": 0.005152558442205191, "epoch": 24.43345323741007, "step": 27170 }, { "epoch": 24.442446043165468, "grad_norm": 0.13116808235645294, "learning_rate": 6.179549401112053e-05, "loss": 0.0046, "step": 27180 }, { "action_loss": 0.008590475656092167, "epoch": 24.442446043165468, "step": 27180 }, { "epoch": 24.451438848920862, "grad_norm": 0.18226589262485504, "learning_rate": 6.176871221209482e-05, "loss": 0.0085, "step": 27190 }, { "action_loss": 0.010083479806780815, "epoch": 24.451438848920862, "step": 27190 }, { "epoch": 24.46043165467626, "grad_norm": 0.14459075033664703, "learning_rate": 6.174192683804508e-05, "loss": 0.0071, "step": 27200 }, { "action_loss": 0.006518195848912001, "epoch": 24.46043165467626, "step": 27200 }, { "epoch": 24.469424460431654, "grad_norm": 0.17436693608760834, "learning_rate": 6.1715137897108e-05, "loss": 0.0058, "step": 27210 }, { "action_loss": 0.0027146178763359785, "epoch": 24.469424460431654, "step": 27210 }, { "epoch": 24.47841726618705, "grad_norm": 0.17817947268486023, "learning_rate": 6.168834539742134e-05, "loss": 0.0098, "step": 27220 }, { "action_loss": 0.008606728166341782, "epoch": 24.47841726618705, "step": 27220 }, { "epoch": 24.487410071942445, "grad_norm": 0.26355212926864624, "learning_rate": 6.166154934712397e-05, "loss": 0.0088, "step": 27230 }, { "action_loss": 0.004204914439469576, "epoch": 24.487410071942445, "step": 27230 }, { "epoch": 24.496402877697843, "grad_norm": 0.21806193888187408, "learning_rate": 6.163474975435581e-05, "loss": 0.006, "step": 27240 }, { "action_loss": 0.012085716240108013, "epoch": 24.496402877697843, "step": 27240 }, { "epoch": 24.505395683453237, "grad_norm": 0.23302747309207916, "learning_rate": 6.160794662725787e-05, "loss": 0.0085, "step": 27250 }, { "action_loss": 0.0023602237924933434, "epoch": 24.505395683453237, "step": 27250 }, { "epoch": 24.514388489208635, "grad_norm": 0.16019442677497864, "learning_rate": 6.158113997397222e-05, "loss": 0.0069, "step": 27260 }, { "action_loss": 0.007646080106496811, "epoch": 24.514388489208635, "step": 27260 }, { "epoch": 24.52338129496403, "grad_norm": 0.1598278284072876, "learning_rate": 6.155432980264205e-05, "loss": 0.0063, "step": 27270 }, { "action_loss": 0.0028771348297595978, "epoch": 24.52338129496403, "step": 27270 }, { "epoch": 24.532374100719423, "grad_norm": 0.1145181804895401, "learning_rate": 6.152751612141156e-05, "loss": 0.0095, "step": 27280 }, { "action_loss": 0.0023641493171453476, "epoch": 24.532374100719423, "step": 27280 }, { "epoch": 24.54136690647482, "grad_norm": 0.12746277451515198, "learning_rate": 6.150069893842602e-05, "loss": 0.0063, "step": 27290 }, { "action_loss": 0.004416308831423521, "epoch": 24.54136690647482, "step": 27290 }, { "epoch": 24.550359712230215, "grad_norm": 0.18272386491298676, "learning_rate": 6.147387826183182e-05, "loss": 0.0058, "step": 27300 }, { "action_loss": 0.026332950219511986, "epoch": 24.550359712230215, "step": 27300 }, { "epoch": 24.559352517985612, "grad_norm": 0.26283812522888184, "learning_rate": 6.144705409977635e-05, "loss": 0.0114, "step": 27310 }, { "action_loss": 0.004050513729453087, "epoch": 24.559352517985612, "step": 27310 }, { "epoch": 24.568345323741006, "grad_norm": 0.09707880020141602, "learning_rate": 6.142022646040808e-05, "loss": 0.0062, "step": 27320 }, { "action_loss": 0.011593532748520374, "epoch": 24.568345323741006, "step": 27320 }, { "epoch": 24.577338129496404, "grad_norm": 0.13673649728298187, "learning_rate": 6.139339535187653e-05, "loss": 0.0062, "step": 27330 }, { "action_loss": 0.004941877909004688, "epoch": 24.577338129496404, "step": 27330 }, { "epoch": 24.586330935251798, "grad_norm": 0.13634683191776276, "learning_rate": 6.136656078233232e-05, "loss": 0.0042, "step": 27340 }, { "action_loss": 0.0017499903915449977, "epoch": 24.586330935251798, "step": 27340 }, { "epoch": 24.595323741007196, "grad_norm": 0.1634853482246399, "learning_rate": 6.133972275992707e-05, "loss": 0.0078, "step": 27350 }, { "action_loss": 0.002150443848222494, "epoch": 24.595323741007196, "step": 27350 }, { "epoch": 24.60431654676259, "grad_norm": 0.16691954433918, "learning_rate": 6.131288129281342e-05, "loss": 0.0081, "step": 27360 }, { "action_loss": 0.0131762670353055, "epoch": 24.60431654676259, "step": 27360 }, { "epoch": 24.613309352517987, "grad_norm": 0.20846104621887207, "learning_rate": 6.128603638914516e-05, "loss": 0.0082, "step": 27370 }, { "action_loss": 0.005263570696115494, "epoch": 24.613309352517987, "step": 27370 }, { "epoch": 24.62230215827338, "grad_norm": 0.20416833460330963, "learning_rate": 6.125918805707704e-05, "loss": 0.006, "step": 27380 }, { "action_loss": 0.009708833880722523, "epoch": 24.62230215827338, "step": 27380 }, { "epoch": 24.631294964028775, "grad_norm": 0.1524936705827713, "learning_rate": 6.123233630476485e-05, "loss": 0.0077, "step": 27390 }, { "action_loss": 0.0022190420422703028, "epoch": 24.631294964028775, "step": 27390 }, { "epoch": 24.640287769784173, "grad_norm": 0.14402730762958527, "learning_rate": 6.120548114036547e-05, "loss": 0.006, "step": 27400 }, { "action_loss": 0.0038791708648204803, "epoch": 24.640287769784173, "step": 27400 }, { "epoch": 24.649280575539567, "grad_norm": 0.3101344406604767, "learning_rate": 6.117862257203679e-05, "loss": 0.007, "step": 27410 }, { "action_loss": 0.0067169624380767345, "epoch": 24.649280575539567, "step": 27410 }, { "epoch": 24.658273381294965, "grad_norm": 0.2248082011938095, "learning_rate": 6.115176060793771e-05, "loss": 0.0091, "step": 27420 }, { "action_loss": 0.0027923285961151123, "epoch": 24.658273381294965, "step": 27420 }, { "epoch": 24.66726618705036, "grad_norm": 0.11181396245956421, "learning_rate": 6.112489525622822e-05, "loss": 0.0039, "step": 27430 }, { "action_loss": 0.004604620393365622, "epoch": 24.66726618705036, "step": 27430 }, { "epoch": 24.676258992805757, "grad_norm": 0.2763071656227112, "learning_rate": 6.109802652506928e-05, "loss": 0.0064, "step": 27440 }, { "action_loss": 0.00586102157831192, "epoch": 24.676258992805757, "step": 27440 }, { "epoch": 24.68525179856115, "grad_norm": 0.24866363406181335, "learning_rate": 6.107115442262291e-05, "loss": 0.0063, "step": 27450 }, { "action_loss": 0.003044355660676956, "epoch": 24.68525179856115, "step": 27450 }, { "epoch": 24.694244604316548, "grad_norm": 0.14758656919002533, "learning_rate": 6.104427895705214e-05, "loss": 0.007, "step": 27460 }, { "action_loss": 0.0028201586101204157, "epoch": 24.694244604316548, "step": 27460 }, { "epoch": 24.703237410071942, "grad_norm": 0.13637225329875946, "learning_rate": 6.101740013652103e-05, "loss": 0.0045, "step": 27470 }, { "action_loss": 0.015639206394553185, "epoch": 24.703237410071942, "step": 27470 }, { "epoch": 24.71223021582734, "grad_norm": 0.20006275177001953, "learning_rate": 6.099051796919465e-05, "loss": 0.0083, "step": 27480 }, { "action_loss": 0.06448420882225037, "epoch": 24.71223021582734, "step": 27480 }, { "epoch": 24.721223021582734, "grad_norm": 0.17440883815288544, "learning_rate": 6.096363246323911e-05, "loss": 0.0113, "step": 27490 }, { "action_loss": 0.004535691812634468, "epoch": 24.721223021582734, "step": 27490 }, { "epoch": 24.730215827338128, "grad_norm": 0.22064562141895294, "learning_rate": 6.0936743626821504e-05, "loss": 0.0095, "step": 27500 }, { "action_loss": 0.0025802862364798784, "epoch": 24.730215827338128, "step": 27500 }, { "epoch": 24.739208633093526, "grad_norm": 0.1327032446861267, "learning_rate": 6.090985146810996e-05, "loss": 0.0058, "step": 27510 }, { "action_loss": 0.0038005157839506865, "epoch": 24.739208633093526, "step": 27510 }, { "epoch": 24.74820143884892, "grad_norm": 0.23521243035793304, "learning_rate": 6.088295599527357e-05, "loss": 0.0073, "step": 27520 }, { "action_loss": 0.008443430997431278, "epoch": 24.74820143884892, "step": 27520 }, { "epoch": 24.757194244604317, "grad_norm": 0.14085084199905396, "learning_rate": 6.085605721648252e-05, "loss": 0.0051, "step": 27530 }, { "action_loss": 0.021256454288959503, "epoch": 24.757194244604317, "step": 27530 }, { "epoch": 24.76618705035971, "grad_norm": 0.2930562496185303, "learning_rate": 6.082915513990792e-05, "loss": 0.0067, "step": 27540 }, { "action_loss": 0.0061865863390266895, "epoch": 24.76618705035971, "step": 27540 }, { "epoch": 24.77517985611511, "grad_norm": 0.11843178421258926, "learning_rate": 6.080224977372192e-05, "loss": 0.0063, "step": 27550 }, { "action_loss": 0.012277175672352314, "epoch": 24.77517985611511, "step": 27550 }, { "epoch": 24.784172661870503, "grad_norm": 0.2154674082994461, "learning_rate": 6.0775341126097666e-05, "loss": 0.0059, "step": 27560 }, { "action_loss": 0.005234729498624802, "epoch": 24.784172661870503, "step": 27560 }, { "epoch": 24.7931654676259, "grad_norm": 0.1795222908258438, "learning_rate": 6.074842920520926e-05, "loss": 0.0071, "step": 27570 }, { "action_loss": 0.005352567881345749, "epoch": 24.7931654676259, "step": 27570 }, { "epoch": 24.802158273381295, "grad_norm": 0.26032188534736633, "learning_rate": 6.072151401923186e-05, "loss": 0.0059, "step": 27580 }, { "action_loss": 0.005448120180517435, "epoch": 24.802158273381295, "step": 27580 }, { "epoch": 24.81115107913669, "grad_norm": 0.20118975639343262, "learning_rate": 6.069459557634159e-05, "loss": 0.0082, "step": 27590 }, { "action_loss": 0.004090479575097561, "epoch": 24.81115107913669, "step": 27590 }, { "epoch": 24.820143884892087, "grad_norm": 0.24252991378307343, "learning_rate": 6.066767388471557e-05, "loss": 0.0082, "step": 27600 }, { "action_loss": 0.015682900324463844, "epoch": 24.820143884892087, "step": 27600 }, { "epoch": 24.82913669064748, "grad_norm": 0.2077145129442215, "learning_rate": 6.064074895253188e-05, "loss": 0.0084, "step": 27610 }, { "action_loss": 0.006579292938113213, "epoch": 24.82913669064748, "step": 27610 }, { "epoch": 24.83812949640288, "grad_norm": 0.16654358804225922, "learning_rate": 6.061382078796961e-05, "loss": 0.0108, "step": 27620 }, { "action_loss": 0.011950559914112091, "epoch": 24.83812949640288, "step": 27620 }, { "epoch": 24.847122302158272, "grad_norm": 0.16378742456436157, "learning_rate": 6.0586889399208814e-05, "loss": 0.0053, "step": 27630 }, { "action_loss": 0.004962754901498556, "epoch": 24.847122302158272, "step": 27630 }, { "epoch": 24.85611510791367, "grad_norm": 0.2736908793449402, "learning_rate": 6.0559954794430565e-05, "loss": 0.0079, "step": 27640 }, { "action_loss": 0.03301658853888512, "epoch": 24.85611510791367, "step": 27640 }, { "epoch": 24.865107913669064, "grad_norm": 0.19839423894882202, "learning_rate": 6.053301698181687e-05, "loss": 0.0098, "step": 27650 }, { "action_loss": 0.002046469831839204, "epoch": 24.865107913669064, "step": 27650 }, { "epoch": 24.87410071942446, "grad_norm": 0.1865941435098648, "learning_rate": 6.0506075969550725e-05, "loss": 0.007, "step": 27660 }, { "action_loss": 0.007677340414375067, "epoch": 24.87410071942446, "step": 27660 }, { "epoch": 24.883093525179856, "grad_norm": 0.13051538169384003, "learning_rate": 6.047913176581609e-05, "loss": 0.0068, "step": 27670 }, { "action_loss": 0.0038401552010327578, "epoch": 24.883093525179856, "step": 27670 }, { "epoch": 24.892086330935253, "grad_norm": 0.17967697978019714, "learning_rate": 6.0452184378797904e-05, "loss": 0.0074, "step": 27680 }, { "action_loss": 0.005953822284936905, "epoch": 24.892086330935253, "step": 27680 }, { "epoch": 24.901079136690647, "grad_norm": 0.15399278700351715, "learning_rate": 6.042523381668209e-05, "loss": 0.0074, "step": 27690 }, { "action_loss": 0.0048840828239917755, "epoch": 24.901079136690647, "step": 27690 }, { "epoch": 24.91007194244604, "grad_norm": 0.1573292315006256, "learning_rate": 6.03982800876555e-05, "loss": 0.0061, "step": 27700 }, { "action_loss": 0.02026214264333248, "epoch": 24.91007194244604, "step": 27700 }, { "epoch": 24.91906474820144, "grad_norm": 0.1857946813106537, "learning_rate": 6.0371323199905975e-05, "loss": 0.0116, "step": 27710 }, { "action_loss": 0.003499342128634453, "epoch": 24.91906474820144, "step": 27710 }, { "epoch": 24.928057553956833, "grad_norm": 0.15070584416389465, "learning_rate": 6.03443631616223e-05, "loss": 0.0083, "step": 27720 }, { "action_loss": 0.003845737548545003, "epoch": 24.928057553956833, "step": 27720 }, { "epoch": 24.93705035971223, "grad_norm": 0.21433156728744507, "learning_rate": 6.031739998099421e-05, "loss": 0.0067, "step": 27730 }, { "action_loss": 0.01126716285943985, "epoch": 24.93705035971223, "step": 27730 }, { "epoch": 24.946043165467625, "grad_norm": 0.19472399353981018, "learning_rate": 6.029043366621243e-05, "loss": 0.0069, "step": 27740 }, { "action_loss": 0.005048638675361872, "epoch": 24.946043165467625, "step": 27740 }, { "epoch": 24.955035971223023, "grad_norm": 0.1986308991909027, "learning_rate": 6.0263464225468615e-05, "loss": 0.0074, "step": 27750 }, { "action_loss": 0.0027843473944813013, "epoch": 24.955035971223023, "step": 27750 }, { "epoch": 24.964028776978417, "grad_norm": 0.23360227048397064, "learning_rate": 6.023649166695534e-05, "loss": 0.0051, "step": 27760 }, { "action_loss": 0.006834814790636301, "epoch": 24.964028776978417, "step": 27760 }, { "epoch": 24.973021582733814, "grad_norm": 0.17862099409103394, "learning_rate": 6.0209515998866186e-05, "loss": 0.0065, "step": 27770 }, { "action_loss": 0.002864471636712551, "epoch": 24.973021582733814, "step": 27770 }, { "epoch": 24.98201438848921, "grad_norm": 0.11080106347799301, "learning_rate": 6.018253722939563e-05, "loss": 0.0063, "step": 27780 }, { "action_loss": 0.005005735903978348, "epoch": 24.98201438848921, "step": 27780 }, { "epoch": 24.991007194244606, "grad_norm": 0.1929609477519989, "learning_rate": 6.015555536673914e-05, "loss": 0.0077, "step": 27790 }, { "action_loss": 0.003374137682840228, "epoch": 24.991007194244606, "step": 27790 }, { "epoch": 25.0, "grad_norm": 0.14846403896808624, "learning_rate": 6.0128570419093054e-05, "loss": 0.0076, "step": 27800 }, { "action_loss": 0.004306785296648741, "epoch": 25.0, "step": 27800 }, { "epoch": 25.008992805755394, "grad_norm": 0.15481281280517578, "learning_rate": 6.010158239465471e-05, "loss": 0.0047, "step": 27810 }, { "action_loss": 0.005644617602229118, "epoch": 25.008992805755394, "step": 27810 }, { "epoch": 25.01798561151079, "grad_norm": 0.17032292485237122, "learning_rate": 6.007459130162235e-05, "loss": 0.0074, "step": 27820 }, { "action_loss": 0.009892837144434452, "epoch": 25.01798561151079, "step": 27820 }, { "epoch": 25.026978417266186, "grad_norm": 0.18058200180530548, "learning_rate": 6.004759714819516e-05, "loss": 0.0073, "step": 27830 }, { "action_loss": 0.0021230627316981554, "epoch": 25.026978417266186, "step": 27830 }, { "epoch": 25.035971223021583, "grad_norm": 0.18757767975330353, "learning_rate": 6.002059994257323e-05, "loss": 0.0074, "step": 27840 }, { "action_loss": 0.006017230451107025, "epoch": 25.035971223021583, "step": 27840 }, { "epoch": 25.044964028776977, "grad_norm": 0.2169152945280075, "learning_rate": 5.999359969295764e-05, "loss": 0.0063, "step": 27850 }, { "action_loss": 0.006537595298141241, "epoch": 25.044964028776977, "step": 27850 }, { "epoch": 25.053956834532375, "grad_norm": 0.08961370587348938, "learning_rate": 5.9966596407550314e-05, "loss": 0.004, "step": 27860 }, { "action_loss": 0.01616012491285801, "epoch": 25.053956834532375, "step": 27860 }, { "epoch": 25.06294964028777, "grad_norm": 0.18219506740570068, "learning_rate": 5.993959009455416e-05, "loss": 0.0112, "step": 27870 }, { "action_loss": 0.0036740582436323166, "epoch": 25.06294964028777, "step": 27870 }, { "epoch": 25.071942446043167, "grad_norm": 0.22144325077533722, "learning_rate": 5.991258076217298e-05, "loss": 0.0054, "step": 27880 }, { "action_loss": 0.010311704128980637, "epoch": 25.071942446043167, "step": 27880 }, { "epoch": 25.08093525179856, "grad_norm": 0.1655147224664688, "learning_rate": 5.988556841861147e-05, "loss": 0.0076, "step": 27890 }, { "action_loss": 0.006519686430692673, "epoch": 25.08093525179856, "step": 27890 }, { "epoch": 25.08992805755396, "grad_norm": 0.18808554112911224, "learning_rate": 5.985855307207531e-05, "loss": 0.0092, "step": 27900 }, { "action_loss": 0.003805197076871991, "epoch": 25.08992805755396, "step": 27900 }, { "epoch": 25.098920863309353, "grad_norm": 0.10196071118116379, "learning_rate": 5.9831534730771e-05, "loss": 0.0057, "step": 27910 }, { "action_loss": 0.0042135934345424175, "epoch": 25.098920863309353, "step": 27910 }, { "epoch": 25.107913669064747, "grad_norm": 0.13657228648662567, "learning_rate": 5.980451340290605e-05, "loss": 0.0066, "step": 27920 }, { "action_loss": 0.003571780165657401, "epoch": 25.107913669064747, "step": 27920 }, { "epoch": 25.116906474820144, "grad_norm": 0.21234583854675293, "learning_rate": 5.97774890966888e-05, "loss": 0.0054, "step": 27930 }, { "action_loss": 0.0044363983906805515, "epoch": 25.116906474820144, "step": 27930 }, { "epoch": 25.12589928057554, "grad_norm": 0.12879769504070282, "learning_rate": 5.975046182032851e-05, "loss": 0.0067, "step": 27940 }, { "action_loss": 0.01155869010835886, "epoch": 25.12589928057554, "step": 27940 }, { "epoch": 25.134892086330936, "grad_norm": 0.21053099632263184, "learning_rate": 5.972343158203537e-05, "loss": 0.0068, "step": 27950 }, { "action_loss": 0.010801712982356548, "epoch": 25.134892086330936, "step": 27950 }, { "epoch": 25.14388489208633, "grad_norm": 0.11955180019140244, "learning_rate": 5.969639839002045e-05, "loss": 0.006, "step": 27960 }, { "action_loss": 0.00824193749576807, "epoch": 25.14388489208633, "step": 27960 }, { "epoch": 25.152877697841728, "grad_norm": 0.17218536138534546, "learning_rate": 5.966936225249572e-05, "loss": 0.0069, "step": 27970 }, { "action_loss": 0.0156702920794487, "epoch": 25.152877697841728, "step": 27970 }, { "epoch": 25.16187050359712, "grad_norm": 0.18227174878120422, "learning_rate": 5.9642323177674044e-05, "loss": 0.0058, "step": 27980 }, { "action_loss": 0.002300631022080779, "epoch": 25.16187050359712, "step": 27980 }, { "epoch": 25.17086330935252, "grad_norm": 0.21914421021938324, "learning_rate": 5.9615281173769154e-05, "loss": 0.0069, "step": 27990 }, { "action_loss": 0.015993839129805565, "epoch": 25.17086330935252, "step": 27990 }, { "epoch": 25.179856115107913, "grad_norm": 0.11779730767011642, "learning_rate": 5.958823624899574e-05, "loss": 0.0055, "step": 28000 }, { "action_loss": 0.004753395915031433, "epoch": 25.179856115107913, "step": 28000 }, { "epoch": 25.18884892086331, "grad_norm": 0.3196759819984436, "learning_rate": 5.956118841156933e-05, "loss": 0.0064, "step": 28010 }, { "action_loss": 0.004583323840051889, "epoch": 25.18884892086331, "step": 28010 }, { "epoch": 25.197841726618705, "grad_norm": 0.11544834822416306, "learning_rate": 5.953413766970631e-05, "loss": 0.0074, "step": 28020 }, { "action_loss": 0.005893411580473185, "epoch": 25.197841726618705, "step": 28020 }, { "epoch": 25.2068345323741, "grad_norm": 0.2322479784488678, "learning_rate": 5.9507084031624e-05, "loss": 0.0092, "step": 28030 }, { "action_loss": 0.01192386168986559, "epoch": 25.2068345323741, "step": 28030 }, { "epoch": 25.215827338129497, "grad_norm": 0.22988498210906982, "learning_rate": 5.948002750554058e-05, "loss": 0.0093, "step": 28040 }, { "action_loss": 0.010006865486502647, "epoch": 25.215827338129497, "step": 28040 }, { "epoch": 25.22482014388489, "grad_norm": 0.19923321902751923, "learning_rate": 5.9452968099675124e-05, "loss": 0.0056, "step": 28050 }, { "action_loss": 0.0051930309273302555, "epoch": 25.22482014388489, "step": 28050 }, { "epoch": 25.23381294964029, "grad_norm": 0.15037532150745392, "learning_rate": 5.9425905822247527e-05, "loss": 0.0062, "step": 28060 }, { "action_loss": 0.008793509565293789, "epoch": 25.23381294964029, "step": 28060 }, { "epoch": 25.242805755395683, "grad_norm": 0.21925437450408936, "learning_rate": 5.939884068147864e-05, "loss": 0.0065, "step": 28070 }, { "action_loss": 0.0023133226204663515, "epoch": 25.242805755395683, "step": 28070 }, { "epoch": 25.25179856115108, "grad_norm": 0.14594332873821259, "learning_rate": 5.937177268559011e-05, "loss": 0.0051, "step": 28080 }, { "action_loss": 0.003089766949415207, "epoch": 25.25179856115108, "step": 28080 }, { "epoch": 25.260791366906474, "grad_norm": 0.1798734813928604, "learning_rate": 5.934470184280448e-05, "loss": 0.0045, "step": 28090 }, { "action_loss": 0.0035325840581208467, "epoch": 25.260791366906474, "step": 28090 }, { "epoch": 25.269784172661872, "grad_norm": 0.24660290777683258, "learning_rate": 5.931762816134516e-05, "loss": 0.0054, "step": 28100 }, { "action_loss": 0.00598405534401536, "epoch": 25.269784172661872, "step": 28100 }, { "epoch": 25.278776978417266, "grad_norm": 0.18817031383514404, "learning_rate": 5.9290551649436434e-05, "loss": 0.0054, "step": 28110 }, { "action_loss": 0.0044563463889062405, "epoch": 25.278776978417266, "step": 28110 }, { "epoch": 25.28776978417266, "grad_norm": 0.25088930130004883, "learning_rate": 5.9263472315303416e-05, "loss": 0.0074, "step": 28120 }, { "action_loss": 0.006838271394371986, "epoch": 25.28776978417266, "step": 28120 }, { "epoch": 25.296762589928058, "grad_norm": 0.2606767416000366, "learning_rate": 5.9236390167172096e-05, "loss": 0.0051, "step": 28130 }, { "action_loss": 0.012503874488174915, "epoch": 25.296762589928058, "step": 28130 }, { "epoch": 25.305755395683452, "grad_norm": 0.23138189315795898, "learning_rate": 5.920930521326932e-05, "loss": 0.008, "step": 28140 }, { "action_loss": 0.021412665024399757, "epoch": 25.305755395683452, "step": 28140 }, { "epoch": 25.31474820143885, "grad_norm": 0.1768191009759903, "learning_rate": 5.918221746182276e-05, "loss": 0.0071, "step": 28150 }, { "action_loss": 0.005617212969809771, "epoch": 25.31474820143885, "step": 28150 }, { "epoch": 25.323741007194243, "grad_norm": 0.15306469798088074, "learning_rate": 5.9155126921061e-05, "loss": 0.0056, "step": 28160 }, { "action_loss": 0.0029997460078448057, "epoch": 25.323741007194243, "step": 28160 }, { "epoch": 25.33273381294964, "grad_norm": 0.1229260265827179, "learning_rate": 5.91280335992134e-05, "loss": 0.0046, "step": 28170 }, { "action_loss": 0.002983778715133667, "epoch": 25.33273381294964, "step": 28170 }, { "epoch": 25.341726618705035, "grad_norm": 0.11964299529790878, "learning_rate": 5.91009375045102e-05, "loss": 0.006, "step": 28180 }, { "action_loss": 0.003161915810778737, "epoch": 25.341726618705035, "step": 28180 }, { "epoch": 25.350719424460433, "grad_norm": 0.1635494977235794, "learning_rate": 5.9073838645182476e-05, "loss": 0.0052, "step": 28190 }, { "action_loss": 0.0026891983579844236, "epoch": 25.350719424460433, "step": 28190 }, { "epoch": 25.359712230215827, "grad_norm": 0.2365798056125641, "learning_rate": 5.904673702946217e-05, "loss": 0.0076, "step": 28200 }, { "action_loss": 0.005025256425142288, "epoch": 25.359712230215827, "step": 28200 }, { "epoch": 25.368705035971225, "grad_norm": 0.119349405169487, "learning_rate": 5.9019632665582004e-05, "loss": 0.0041, "step": 28210 }, { "action_loss": 0.0016589075094088912, "epoch": 25.368705035971225, "step": 28210 }, { "epoch": 25.37769784172662, "grad_norm": 0.1756434589624405, "learning_rate": 5.899252556177559e-05, "loss": 0.0046, "step": 28220 }, { "action_loss": 0.0052918740548193455, "epoch": 25.37769784172662, "step": 28220 }, { "epoch": 25.386690647482013, "grad_norm": 0.25125521421432495, "learning_rate": 5.896541572627735e-05, "loss": 0.006, "step": 28230 }, { "action_loss": 0.0025484394282102585, "epoch": 25.386690647482013, "step": 28230 }, { "epoch": 25.39568345323741, "grad_norm": 0.12734150886535645, "learning_rate": 5.893830316732253e-05, "loss": 0.0055, "step": 28240 }, { "action_loss": 0.005568737629801035, "epoch": 25.39568345323741, "step": 28240 }, { "epoch": 25.404676258992804, "grad_norm": 0.11822926253080368, "learning_rate": 5.8911187893147214e-05, "loss": 0.0075, "step": 28250 }, { "action_loss": 0.00404207082465291, "epoch": 25.404676258992804, "step": 28250 }, { "epoch": 25.413669064748202, "grad_norm": 0.23653657734394073, "learning_rate": 5.888406991198828e-05, "loss": 0.0074, "step": 28260 }, { "action_loss": 0.007630884647369385, "epoch": 25.413669064748202, "step": 28260 }, { "epoch": 25.422661870503596, "grad_norm": 0.18475057184696198, "learning_rate": 5.885694923208349e-05, "loss": 0.0107, "step": 28270 }, { "action_loss": 0.00565377390012145, "epoch": 25.422661870503596, "step": 28270 }, { "epoch": 25.431654676258994, "grad_norm": 0.1136021614074707, "learning_rate": 5.882982586167138e-05, "loss": 0.0073, "step": 28280 }, { "action_loss": 0.0033803973346948624, "epoch": 25.431654676258994, "step": 28280 }, { "epoch": 25.440647482014388, "grad_norm": 0.13992908596992493, "learning_rate": 5.880269980899131e-05, "loss": 0.0061, "step": 28290 }, { "action_loss": 0.0029714189004153013, "epoch": 25.440647482014388, "step": 28290 }, { "epoch": 25.449640287769785, "grad_norm": 0.12944157421588898, "learning_rate": 5.8775571082283465e-05, "loss": 0.0056, "step": 28300 }, { "action_loss": 0.005701074842363596, "epoch": 25.449640287769785, "step": 28300 }, { "epoch": 25.45863309352518, "grad_norm": 0.10780289769172668, "learning_rate": 5.8748439689788824e-05, "loss": 0.0084, "step": 28310 }, { "action_loss": 0.004093511030077934, "epoch": 25.45863309352518, "step": 28310 }, { "epoch": 25.467625899280577, "grad_norm": 0.12965820729732513, "learning_rate": 5.87213056397492e-05, "loss": 0.0059, "step": 28320 }, { "action_loss": 0.007943250238895416, "epoch": 25.467625899280577, "step": 28320 }, { "epoch": 25.47661870503597, "grad_norm": 0.20218412578105927, "learning_rate": 5.869416894040719e-05, "loss": 0.0097, "step": 28330 }, { "action_loss": 0.0050156912766397, "epoch": 25.47661870503597, "step": 28330 }, { "epoch": 25.485611510791365, "grad_norm": 0.12664973735809326, "learning_rate": 5.866702960000621e-05, "loss": 0.0054, "step": 28340 }, { "action_loss": 0.005457429680973291, "epoch": 25.485611510791365, "step": 28340 }, { "epoch": 25.494604316546763, "grad_norm": 0.2798655331134796, "learning_rate": 5.863988762679048e-05, "loss": 0.0082, "step": 28350 }, { "action_loss": 0.0027938615530729294, "epoch": 25.494604316546763, "step": 28350 }, { "epoch": 25.503597122302157, "grad_norm": 0.18157827854156494, "learning_rate": 5.8612743029005e-05, "loss": 0.0057, "step": 28360 }, { "action_loss": 0.0020625039469450712, "epoch": 25.503597122302157, "step": 28360 }, { "epoch": 25.512589928057555, "grad_norm": 0.17453689873218536, "learning_rate": 5.858559581489561e-05, "loss": 0.0054, "step": 28370 }, { "action_loss": 0.018276233226060867, "epoch": 25.512589928057555, "step": 28370 }, { "epoch": 25.52158273381295, "grad_norm": 0.20649147033691406, "learning_rate": 5.85584459927089e-05, "loss": 0.0094, "step": 28380 }, { "action_loss": 0.01156875491142273, "epoch": 25.52158273381295, "step": 28380 }, { "epoch": 25.530575539568346, "grad_norm": 0.16281577944755554, "learning_rate": 5.853129357069227e-05, "loss": 0.0057, "step": 28390 }, { "action_loss": 0.009040066972374916, "epoch": 25.530575539568346, "step": 28390 }, { "epoch": 25.53956834532374, "grad_norm": 0.17165523767471313, "learning_rate": 5.8504138557093913e-05, "loss": 0.0066, "step": 28400 }, { "action_loss": 0.017548656091094017, "epoch": 25.53956834532374, "step": 28400 }, { "epoch": 25.548561151079138, "grad_norm": 0.1468936800956726, "learning_rate": 5.8476980960162784e-05, "loss": 0.0078, "step": 28410 }, { "action_loss": 0.005928610917180777, "epoch": 25.548561151079138, "step": 28410 }, { "epoch": 25.557553956834532, "grad_norm": 0.21079005300998688, "learning_rate": 5.844982078814868e-05, "loss": 0.0061, "step": 28420 }, { "action_loss": 0.0070652742870152, "epoch": 25.557553956834532, "step": 28420 }, { "epoch": 25.56654676258993, "grad_norm": 0.18479257822036743, "learning_rate": 5.842265804930211e-05, "loss": 0.0104, "step": 28430 }, { "action_loss": 0.0034962992649525404, "epoch": 25.56654676258993, "step": 28430 }, { "epoch": 25.575539568345324, "grad_norm": 0.23713985085487366, "learning_rate": 5.839549275187444e-05, "loss": 0.0055, "step": 28440 }, { "action_loss": 0.011292710900306702, "epoch": 25.575539568345324, "step": 28440 }, { "epoch": 25.584532374100718, "grad_norm": 0.15468885004520416, "learning_rate": 5.836832490411771e-05, "loss": 0.007, "step": 28450 }, { "action_loss": 0.0030247438699007034, "epoch": 25.584532374100718, "step": 28450 }, { "epoch": 25.593525179856115, "grad_norm": 0.17588134109973907, "learning_rate": 5.834115451428485e-05, "loss": 0.0074, "step": 28460 }, { "action_loss": 0.010310311801731586, "epoch": 25.593525179856115, "step": 28460 }, { "epoch": 25.60251798561151, "grad_norm": 0.10854402184486389, "learning_rate": 5.831398159062946e-05, "loss": 0.0046, "step": 28470 }, { "action_loss": 0.0018888757331296802, "epoch": 25.60251798561151, "step": 28470 }, { "epoch": 25.611510791366907, "grad_norm": 0.23758195340633392, "learning_rate": 5.828680614140599e-05, "loss": 0.0052, "step": 28480 }, { "action_loss": 0.009583516046404839, "epoch": 25.611510791366907, "step": 28480 }, { "epoch": 25.6205035971223, "grad_norm": 0.23257790505886078, "learning_rate": 5.825962817486962e-05, "loss": 0.0073, "step": 28490 }, { "action_loss": 0.0024456840474158525, "epoch": 25.6205035971223, "step": 28490 }, { "epoch": 25.6294964028777, "grad_norm": 0.16681374609470367, "learning_rate": 5.823244769927629e-05, "loss": 0.0064, "step": 28500 }, { "action_loss": 0.009149775840342045, "epoch": 25.6294964028777, "step": 28500 }, { "epoch": 25.638489208633093, "grad_norm": 0.18367421627044678, "learning_rate": 5.8205264722882716e-05, "loss": 0.0097, "step": 28510 }, { "action_loss": 0.0021215612068772316, "epoch": 25.638489208633093, "step": 28510 }, { "epoch": 25.64748201438849, "grad_norm": 0.2020399272441864, "learning_rate": 5.817807925394636e-05, "loss": 0.0052, "step": 28520 }, { "action_loss": 0.005580571945756674, "epoch": 25.64748201438849, "step": 28520 }, { "epoch": 25.656474820143885, "grad_norm": 0.1338655799627304, "learning_rate": 5.815089130072546e-05, "loss": 0.0074, "step": 28530 }, { "action_loss": 0.006228750571608543, "epoch": 25.656474820143885, "step": 28530 }, { "epoch": 25.665467625899282, "grad_norm": 0.14783433079719543, "learning_rate": 5.8123700871479e-05, "loss": 0.0047, "step": 28540 }, { "action_loss": 0.003182983258739114, "epoch": 25.665467625899282, "step": 28540 }, { "epoch": 25.674460431654676, "grad_norm": 0.12519259750843048, "learning_rate": 5.809650797446671e-05, "loss": 0.0051, "step": 28550 }, { "action_loss": 0.0031836864072829485, "epoch": 25.674460431654676, "step": 28550 }, { "epoch": 25.68345323741007, "grad_norm": 0.147896870970726, "learning_rate": 5.806931261794907e-05, "loss": 0.0067, "step": 28560 }, { "action_loss": 0.0035088874865323305, "epoch": 25.68345323741007, "step": 28560 }, { "epoch": 25.692446043165468, "grad_norm": 0.15182504057884216, "learning_rate": 5.804211481018731e-05, "loss": 0.0038, "step": 28570 }, { "action_loss": 0.007058005779981613, "epoch": 25.692446043165468, "step": 28570 }, { "epoch": 25.701438848920862, "grad_norm": 0.11579908430576324, "learning_rate": 5.801491455944341e-05, "loss": 0.0039, "step": 28580 }, { "action_loss": 0.004732710774987936, "epoch": 25.701438848920862, "step": 28580 }, { "epoch": 25.71043165467626, "grad_norm": 0.11281891167163849, "learning_rate": 5.79877118739801e-05, "loss": 0.0049, "step": 28590 }, { "action_loss": 0.026647472754120827, "epoch": 25.71043165467626, "step": 28590 }, { "epoch": 25.719424460431654, "grad_norm": 0.12936291098594666, "learning_rate": 5.7960506762060816e-05, "loss": 0.0078, "step": 28600 }, { "action_loss": 0.004236731678247452, "epoch": 25.719424460431654, "step": 28600 }, { "epoch": 25.72841726618705, "grad_norm": 0.29227402806282043, "learning_rate": 5.793329923194977e-05, "loss": 0.0048, "step": 28610 }, { "action_loss": 0.004438381176441908, "epoch": 25.72841726618705, "step": 28610 }, { "epoch": 25.737410071942445, "grad_norm": 0.2389744222164154, "learning_rate": 5.790608929191187e-05, "loss": 0.0054, "step": 28620 }, { "action_loss": 0.0024681577924638987, "epoch": 25.737410071942445, "step": 28620 }, { "epoch": 25.746402877697843, "grad_norm": 0.14685216546058655, "learning_rate": 5.78788769502128e-05, "loss": 0.0097, "step": 28630 }, { "action_loss": 0.003958769608289003, "epoch": 25.746402877697843, "step": 28630 }, { "epoch": 25.755395683453237, "grad_norm": 0.20858871936798096, "learning_rate": 5.785166221511894e-05, "loss": 0.0071, "step": 28640 }, { "action_loss": 0.005610515829175711, "epoch": 25.755395683453237, "step": 28640 }, { "epoch": 25.764388489208635, "grad_norm": 0.16457344591617584, "learning_rate": 5.7824445094897415e-05, "loss": 0.0081, "step": 28650 }, { "action_loss": 0.007370718289166689, "epoch": 25.764388489208635, "step": 28650 }, { "epoch": 25.77338129496403, "grad_norm": 0.1570291817188263, "learning_rate": 5.7797225597816065e-05, "loss": 0.0067, "step": 28660 }, { "action_loss": 0.0031145282555371523, "epoch": 25.77338129496403, "step": 28660 }, { "epoch": 25.782374100719423, "grad_norm": 0.2035796344280243, "learning_rate": 5.777000373214345e-05, "loss": 0.0053, "step": 28670 }, { "action_loss": 0.0033676077146083117, "epoch": 25.782374100719423, "step": 28670 }, { "epoch": 25.79136690647482, "grad_norm": 0.15215113759040833, "learning_rate": 5.774277950614885e-05, "loss": 0.0083, "step": 28680 }, { "action_loss": 0.009355072863399982, "epoch": 25.79136690647482, "step": 28680 }, { "epoch": 25.800359712230215, "grad_norm": 0.14018797874450684, "learning_rate": 5.771555292810227e-05, "loss": 0.0056, "step": 28690 }, { "action_loss": 0.003299490548670292, "epoch": 25.800359712230215, "step": 28690 }, { "epoch": 25.809352517985612, "grad_norm": 0.12129317969083786, "learning_rate": 5.768832400627444e-05, "loss": 0.0049, "step": 28700 }, { "action_loss": 0.0072538829408586025, "epoch": 25.809352517985612, "step": 28700 }, { "epoch": 25.818345323741006, "grad_norm": 0.24561281502246857, "learning_rate": 5.7661092748936775e-05, "loss": 0.0072, "step": 28710 }, { "action_loss": 0.0072798654437065125, "epoch": 25.818345323741006, "step": 28710 }, { "epoch": 25.827338129496404, "grad_norm": 0.12062850594520569, "learning_rate": 5.76338591643614e-05, "loss": 0.0098, "step": 28720 }, { "action_loss": 0.009211038239300251, "epoch": 25.827338129496404, "step": 28720 }, { "epoch": 25.836330935251798, "grad_norm": 0.2223058044910431, "learning_rate": 5.760662326082118e-05, "loss": 0.0084, "step": 28730 }, { "action_loss": 0.003535911673679948, "epoch": 25.836330935251798, "step": 28730 }, { "epoch": 25.845323741007196, "grad_norm": 0.20590591430664062, "learning_rate": 5.757938504658965e-05, "loss": 0.0057, "step": 28740 }, { "action_loss": 0.006096918135881424, "epoch": 25.845323741007196, "step": 28740 }, { "epoch": 25.85431654676259, "grad_norm": 0.11980995535850525, "learning_rate": 5.755214452994107e-05, "loss": 0.0064, "step": 28750 }, { "action_loss": 0.005984318908303976, "epoch": 25.85431654676259, "step": 28750 }, { "epoch": 25.863309352517987, "grad_norm": 0.10484819859266281, "learning_rate": 5.752490171915039e-05, "loss": 0.0068, "step": 28760 }, { "action_loss": 0.0028816331177949905, "epoch": 25.863309352517987, "step": 28760 }, { "epoch": 25.87230215827338, "grad_norm": 0.191078782081604, "learning_rate": 5.749765662249324e-05, "loss": 0.0085, "step": 28770 }, { "action_loss": 0.0035460342187434435, "epoch": 25.87230215827338, "step": 28770 }, { "epoch": 25.881294964028775, "grad_norm": 0.136060431599617, "learning_rate": 5.747040924824596e-05, "loss": 0.0032, "step": 28780 }, { "action_loss": 0.003388959914445877, "epoch": 25.881294964028775, "step": 28780 }, { "epoch": 25.890287769784173, "grad_norm": 0.1416979432106018, "learning_rate": 5.7443159604685613e-05, "loss": 0.005, "step": 28790 }, { "action_loss": 0.015496904961764812, "epoch": 25.890287769784173, "step": 28790 }, { "epoch": 25.899280575539567, "grad_norm": 0.09681093692779541, "learning_rate": 5.74159077000899e-05, "loss": 0.0074, "step": 28800 }, { "action_loss": 0.0072349864058196545, "epoch": 25.899280575539567, "step": 28800 }, { "epoch": 25.908273381294965, "grad_norm": 0.12815621495246887, "learning_rate": 5.7388653542737235e-05, "loss": 0.0061, "step": 28810 }, { "action_loss": 0.006410546600818634, "epoch": 25.908273381294965, "step": 28810 }, { "epoch": 25.91726618705036, "grad_norm": 0.10985634475946426, "learning_rate": 5.736139714090672e-05, "loss": 0.0084, "step": 28820 }, { "action_loss": 0.021169232204556465, "epoch": 25.91726618705036, "step": 28820 }, { "epoch": 25.926258992805757, "grad_norm": 0.10379619896411896, "learning_rate": 5.73341385028781e-05, "loss": 0.0072, "step": 28830 }, { "action_loss": 0.008101525716483593, "epoch": 25.926258992805757, "step": 28830 }, { "epoch": 25.93525179856115, "grad_norm": 0.09288699179887772, "learning_rate": 5.7306877636931855e-05, "loss": 0.0047, "step": 28840 }, { "action_loss": 0.0029772419948130846, "epoch": 25.93525179856115, "step": 28840 }, { "epoch": 25.944244604316548, "grad_norm": 0.26125368475914, "learning_rate": 5.7279614551349125e-05, "loss": 0.0059, "step": 28850 }, { "action_loss": 0.0027737903874367476, "epoch": 25.944244604316548, "step": 28850 }, { "epoch": 25.953237410071942, "grad_norm": 0.18024586141109467, "learning_rate": 5.725234925441169e-05, "loss": 0.0042, "step": 28860 }, { "action_loss": 0.0014884299598634243, "epoch": 25.953237410071942, "step": 28860 }, { "epoch": 25.96223021582734, "grad_norm": 0.22687247395515442, "learning_rate": 5.7225081754402044e-05, "loss": 0.0053, "step": 28870 }, { "action_loss": 0.005952184554189444, "epoch": 25.96223021582734, "step": 28870 }, { "epoch": 25.971223021582734, "grad_norm": 0.20481924712657928, "learning_rate": 5.7197812059603326e-05, "loss": 0.0061, "step": 28880 }, { "action_loss": 0.011766999959945679, "epoch": 25.971223021582734, "step": 28880 }, { "epoch": 25.980215827338128, "grad_norm": 0.2235724925994873, "learning_rate": 5.717054017829934e-05, "loss": 0.0069, "step": 28890 }, { "action_loss": 0.0033545575570315123, "epoch": 25.980215827338128, "step": 28890 }, { "epoch": 25.989208633093526, "grad_norm": 0.16290545463562012, "learning_rate": 5.7143266118774584e-05, "loss": 0.0101, "step": 28900 }, { "action_loss": 0.003248353721573949, "epoch": 25.989208633093526, "step": 28900 }, { "epoch": 25.99820143884892, "grad_norm": 0.1910596489906311, "learning_rate": 5.711598988931418e-05, "loss": 0.0049, "step": 28910 }, { "action_loss": 0.0039381771348416805, "epoch": 25.99820143884892, "step": 28910 }, { "epoch": 26.007194244604317, "grad_norm": 0.17690208554267883, "learning_rate": 5.7088711498203954e-05, "loss": 0.005, "step": 28920 }, { "action_loss": 0.006380293518304825, "epoch": 26.007194244604317, "step": 28920 }, { "epoch": 26.01618705035971, "grad_norm": 0.1833426058292389, "learning_rate": 5.706143095373033e-05, "loss": 0.0109, "step": 28930 }, { "action_loss": 0.004411828238517046, "epoch": 26.01618705035971, "step": 28930 }, { "epoch": 26.02517985611511, "grad_norm": 0.189108207821846, "learning_rate": 5.703414826418042e-05, "loss": 0.0057, "step": 28940 }, { "action_loss": 0.014094457030296326, "epoch": 26.02517985611511, "step": 28940 }, { "epoch": 26.034172661870503, "grad_norm": 0.12990878522396088, "learning_rate": 5.7006863437842007e-05, "loss": 0.0059, "step": 28950 }, { "action_loss": 0.004184704273939133, "epoch": 26.034172661870503, "step": 28950 }, { "epoch": 26.0431654676259, "grad_norm": 0.17027916014194489, "learning_rate": 5.697957648300348e-05, "loss": 0.0118, "step": 28960 }, { "action_loss": 0.006324875634163618, "epoch": 26.0431654676259, "step": 28960 }, { "epoch": 26.052158273381295, "grad_norm": 0.17452143132686615, "learning_rate": 5.695228740795391e-05, "loss": 0.0059, "step": 28970 }, { "action_loss": 0.003864784725010395, "epoch": 26.052158273381295, "step": 28970 }, { "epoch": 26.06115107913669, "grad_norm": 0.22880470752716064, "learning_rate": 5.6924996220982985e-05, "loss": 0.0072, "step": 28980 }, { "action_loss": 0.004371912684291601, "epoch": 26.06115107913669, "step": 28980 }, { "epoch": 26.070143884892087, "grad_norm": 0.12940722703933716, "learning_rate": 5.6897702930381045e-05, "loss": 0.0046, "step": 28990 }, { "action_loss": 0.0018204035004600883, "epoch": 26.070143884892087, "step": 28990 }, { "epoch": 26.07913669064748, "grad_norm": 0.12389986962080002, "learning_rate": 5.687040754443908e-05, "loss": 0.004, "step": 29000 }, { "action_loss": 0.013779208064079285, "epoch": 26.07913669064748, "step": 29000 }, { "epoch": 26.08812949640288, "grad_norm": 0.12733253836631775, "learning_rate": 5.6843110071448725e-05, "loss": 0.0066, "step": 29010 }, { "action_loss": 0.010155808180570602, "epoch": 26.08812949640288, "step": 29010 }, { "epoch": 26.097122302158272, "grad_norm": 0.17569522559642792, "learning_rate": 5.6815810519702194e-05, "loss": 0.006, "step": 29020 }, { "action_loss": 0.0031358206178992987, "epoch": 26.097122302158272, "step": 29020 }, { "epoch": 26.10611510791367, "grad_norm": 0.1377001851797104, "learning_rate": 5.6788508897492396e-05, "loss": 0.0034, "step": 29030 }, { "action_loss": 0.0017807319527491927, "epoch": 26.10611510791367, "step": 29030 }, { "epoch": 26.115107913669064, "grad_norm": 0.1845407634973526, "learning_rate": 5.676120521311282e-05, "loss": 0.0048, "step": 29040 }, { "action_loss": 0.008604230359196663, "epoch": 26.115107913669064, "step": 29040 }, { "epoch": 26.12410071942446, "grad_norm": 0.14137132465839386, "learning_rate": 5.6733899474857634e-05, "loss": 0.0085, "step": 29050 }, { "action_loss": 0.0028466086369007826, "epoch": 26.12410071942446, "step": 29050 }, { "epoch": 26.133093525179856, "grad_norm": 0.1785772740840912, "learning_rate": 5.670659169102157e-05, "loss": 0.0057, "step": 29060 }, { "action_loss": 0.006592895369976759, "epoch": 26.133093525179856, "step": 29060 }, { "epoch": 26.142086330935253, "grad_norm": 0.22325469553470612, "learning_rate": 5.6679281869900044e-05, "loss": 0.0071, "step": 29070 }, { "action_loss": 0.0033557582646608353, "epoch": 26.142086330935253, "step": 29070 }, { "epoch": 26.151079136690647, "grad_norm": 0.16031157970428467, "learning_rate": 5.6651970019789045e-05, "loss": 0.0059, "step": 29080 }, { "action_loss": 0.007092930842190981, "epoch": 26.151079136690647, "step": 29080 }, { "epoch": 26.16007194244604, "grad_norm": 0.1525280773639679, "learning_rate": 5.662465614898519e-05, "loss": 0.0045, "step": 29090 }, { "action_loss": 0.004367548041045666, "epoch": 26.16007194244604, "step": 29090 }, { "epoch": 26.16906474820144, "grad_norm": 0.14224077761173248, "learning_rate": 5.6597340265785695e-05, "loss": 0.0104, "step": 29100 }, { "action_loss": 0.0031743415165692568, "epoch": 26.16906474820144, "step": 29100 }, { "epoch": 26.178057553956833, "grad_norm": 0.1385968029499054, "learning_rate": 5.657002237848843e-05, "loss": 0.0062, "step": 29110 }, { "action_loss": 0.0022625105921179056, "epoch": 26.178057553956833, "step": 29110 }, { "epoch": 26.18705035971223, "grad_norm": 0.14908543229103088, "learning_rate": 5.654270249539183e-05, "loss": 0.0051, "step": 29120 }, { "action_loss": 0.015101701021194458, "epoch": 26.18705035971223, "step": 29120 }, { "epoch": 26.196043165467625, "grad_norm": 0.1520550698041916, "learning_rate": 5.651538062479498e-05, "loss": 0.0081, "step": 29130 }, { "action_loss": 0.01457652822136879, "epoch": 26.196043165467625, "step": 29130 }, { "epoch": 26.205035971223023, "grad_norm": 0.25687021017074585, "learning_rate": 5.648805677499751e-05, "loss": 0.0062, "step": 29140 }, { "action_loss": 0.0021664102096110582, "epoch": 26.205035971223023, "step": 29140 }, { "epoch": 26.214028776978417, "grad_norm": 0.10996376723051071, "learning_rate": 5.646073095429969e-05, "loss": 0.0066, "step": 29150 }, { "action_loss": 0.010187872685492039, "epoch": 26.214028776978417, "step": 29150 }, { "epoch": 26.223021582733814, "grad_norm": 0.15455476939678192, "learning_rate": 5.643340317100241e-05, "loss": 0.0056, "step": 29160 }, { "action_loss": 0.004637738689780235, "epoch": 26.223021582733814, "step": 29160 }, { "epoch": 26.23201438848921, "grad_norm": 0.21796368062496185, "learning_rate": 5.64060734334071e-05, "loss": 0.0059, "step": 29170 }, { "action_loss": 0.005683042109012604, "epoch": 26.23201438848921, "step": 29170 }, { "epoch": 26.241007194244606, "grad_norm": 0.12521040439605713, "learning_rate": 5.637874174981583e-05, "loss": 0.0049, "step": 29180 }, { "action_loss": 0.005334462970495224, "epoch": 26.241007194244606, "step": 29180 }, { "epoch": 26.25, "grad_norm": 0.1189488098025322, "learning_rate": 5.635140812853124e-05, "loss": 0.005, "step": 29190 }, { "action_loss": 0.0028282671701163054, "epoch": 26.25, "step": 29190 }, { "epoch": 26.258992805755394, "grad_norm": 0.10900675505399704, "learning_rate": 5.6324072577856544e-05, "loss": 0.0055, "step": 29200 }, { "action_loss": 0.015506037510931492, "epoch": 26.258992805755394, "step": 29200 }, { "epoch": 26.26798561151079, "grad_norm": 0.22870725393295288, "learning_rate": 5.629673510609559e-05, "loss": 0.0064, "step": 29210 }, { "action_loss": 0.002641235711053014, "epoch": 26.26798561151079, "step": 29210 }, { "epoch": 26.276978417266186, "grad_norm": 0.13441689312458038, "learning_rate": 5.626939572155276e-05, "loss": 0.0078, "step": 29220 }, { "action_loss": 0.012049607932567596, "epoch": 26.276978417266186, "step": 29220 }, { "epoch": 26.285971223021583, "grad_norm": 0.18648380041122437, "learning_rate": 5.6242054432533054e-05, "loss": 0.0058, "step": 29230 }, { "action_loss": 0.004803711082786322, "epoch": 26.285971223021583, "step": 29230 }, { "epoch": 26.294964028776977, "grad_norm": 0.2803126871585846, "learning_rate": 5.621471124734201e-05, "loss": 0.0059, "step": 29240 }, { "action_loss": 0.006031669210642576, "epoch": 26.294964028776977, "step": 29240 }, { "epoch": 26.303956834532375, "grad_norm": 0.2833508849143982, "learning_rate": 5.6187366174285794e-05, "loss": 0.0119, "step": 29250 }, { "action_loss": 0.0056455605663359165, "epoch": 26.303956834532375, "step": 29250 }, { "epoch": 26.31294964028777, "grad_norm": 0.24361053109169006, "learning_rate": 5.616001922167109e-05, "loss": 0.0075, "step": 29260 }, { "action_loss": 0.004039957653731108, "epoch": 26.31294964028777, "step": 29260 }, { "epoch": 26.321942446043167, "grad_norm": 0.19960348308086395, "learning_rate": 5.61326703978052e-05, "loss": 0.0085, "step": 29270 }, { "action_loss": 0.002930249320343137, "epoch": 26.321942446043167, "step": 29270 }, { "epoch": 26.33093525179856, "grad_norm": 0.21072888374328613, "learning_rate": 5.6105319710995964e-05, "loss": 0.0063, "step": 29280 }, { "action_loss": 0.003945857286453247, "epoch": 26.33093525179856, "step": 29280 }, { "epoch": 26.33992805755396, "grad_norm": 0.20563143491744995, "learning_rate": 5.60779671695518e-05, "loss": 0.0067, "step": 29290 }, { "action_loss": 0.003530883463099599, "epoch": 26.33992805755396, "step": 29290 }, { "epoch": 26.348920863309353, "grad_norm": 0.20711858570575714, "learning_rate": 5.6050612781781684e-05, "loss": 0.005, "step": 29300 }, { "action_loss": 0.0018496810225769877, "epoch": 26.348920863309353, "step": 29300 }, { "epoch": 26.357913669064747, "grad_norm": 0.19321781396865845, "learning_rate": 5.602325655599516e-05, "loss": 0.0057, "step": 29310 }, { "action_loss": 0.006059248466044664, "epoch": 26.357913669064747, "step": 29310 }, { "epoch": 26.366906474820144, "grad_norm": 0.1890125572681427, "learning_rate": 5.599589850050234e-05, "loss": 0.0046, "step": 29320 }, { "action_loss": 0.0027813464403152466, "epoch": 26.366906474820144, "step": 29320 }, { "epoch": 26.37589928057554, "grad_norm": 0.12964874505996704, "learning_rate": 5.5968538623613874e-05, "loss": 0.0057, "step": 29330 }, { "action_loss": 0.0022246025037020445, "epoch": 26.37589928057554, "step": 29330 }, { "epoch": 26.384892086330936, "grad_norm": 0.10641132295131683, "learning_rate": 5.594117693364095e-05, "loss": 0.0047, "step": 29340 }, { "action_loss": 0.0064079780131578445, "epoch": 26.384892086330936, "step": 29340 }, { "epoch": 26.39388489208633, "grad_norm": 0.14126360416412354, "learning_rate": 5.591381343889535e-05, "loss": 0.0059, "step": 29350 }, { "action_loss": 0.01119255181401968, "epoch": 26.39388489208633, "step": 29350 }, { "epoch": 26.402877697841728, "grad_norm": 0.15001332759857178, "learning_rate": 5.5886448147689355e-05, "loss": 0.0064, "step": 29360 }, { "action_loss": 0.007201697677373886, "epoch": 26.402877697841728, "step": 29360 }, { "epoch": 26.41187050359712, "grad_norm": 0.15684054791927338, "learning_rate": 5.585908106833585e-05, "loss": 0.0042, "step": 29370 }, { "action_loss": 0.005432176869362593, "epoch": 26.41187050359712, "step": 29370 }, { "epoch": 26.42086330935252, "grad_norm": 0.1090569719672203, "learning_rate": 5.5831712209148226e-05, "loss": 0.0061, "step": 29380 }, { "action_loss": 0.017821887508034706, "epoch": 26.42086330935252, "step": 29380 }, { "epoch": 26.429856115107913, "grad_norm": 0.153082475066185, "learning_rate": 5.58043415784404e-05, "loss": 0.0072, "step": 29390 }, { "action_loss": 0.004043468274176121, "epoch": 26.429856115107913, "step": 29390 }, { "epoch": 26.43884892086331, "grad_norm": 0.13197173178195953, "learning_rate": 5.577696918452686e-05, "loss": 0.0052, "step": 29400 }, { "action_loss": 0.004531872924417257, "epoch": 26.43884892086331, "step": 29400 }, { "epoch": 26.447841726618705, "grad_norm": 0.1369091421365738, "learning_rate": 5.5749595035722604e-05, "loss": 0.0171, "step": 29410 }, { "action_loss": 0.0020371677819639444, "epoch": 26.447841726618705, "step": 29410 }, { "epoch": 26.4568345323741, "grad_norm": 0.14069567620754242, "learning_rate": 5.5722219140343193e-05, "loss": 0.0091, "step": 29420 }, { "action_loss": 0.003136542858555913, "epoch": 26.4568345323741, "step": 29420 }, { "epoch": 26.465827338129497, "grad_norm": 0.14808745682239532, "learning_rate": 5.56948415067047e-05, "loss": 0.0066, "step": 29430 }, { "action_loss": 0.012341509573161602, "epoch": 26.465827338129497, "step": 29430 }, { "epoch": 26.47482014388489, "grad_norm": 0.21259404718875885, "learning_rate": 5.5667462143123704e-05, "loss": 0.0082, "step": 29440 }, { "action_loss": 0.003449729410931468, "epoch": 26.47482014388489, "step": 29440 }, { "epoch": 26.48381294964029, "grad_norm": 0.1819027066230774, "learning_rate": 5.564008105791737e-05, "loss": 0.0054, "step": 29450 }, { "action_loss": 0.003943863324820995, "epoch": 26.48381294964029, "step": 29450 }, { "epoch": 26.492805755395683, "grad_norm": 0.18438483774662018, "learning_rate": 5.5612698259403316e-05, "loss": 0.0092, "step": 29460 }, { "action_loss": 0.004528260324150324, "epoch": 26.492805755395683, "step": 29460 }, { "epoch": 26.50179856115108, "grad_norm": 0.17730934917926788, "learning_rate": 5.5585313755899724e-05, "loss": 0.0086, "step": 29470 }, { "action_loss": 0.00836500246077776, "epoch": 26.50179856115108, "step": 29470 }, { "epoch": 26.510791366906474, "grad_norm": 0.28631624579429626, "learning_rate": 5.5557927555725285e-05, "loss": 0.0104, "step": 29480 }, { "action_loss": 0.002964884042739868, "epoch": 26.510791366906474, "step": 29480 }, { "epoch": 26.519784172661872, "grad_norm": 0.20461587607860565, "learning_rate": 5.55305396671992e-05, "loss": 0.0051, "step": 29490 }, { "action_loss": 0.0025882439222186804, "epoch": 26.519784172661872, "step": 29490 }, { "epoch": 26.528776978417266, "grad_norm": 0.1588565558195114, "learning_rate": 5.55031500986412e-05, "loss": 0.0097, "step": 29500 }, { "action_loss": 0.0032503341790288687, "epoch": 26.528776978417266, "step": 29500 }, { "epoch": 26.53776978417266, "grad_norm": 0.27184486389160156, "learning_rate": 5.547575885837149e-05, "loss": 0.0084, "step": 29510 }, { "action_loss": 0.00791573990136385, "epoch": 26.53776978417266, "step": 29510 }, { "epoch": 26.546762589928058, "grad_norm": 0.12515394389629364, "learning_rate": 5.5448365954710825e-05, "loss": 0.0065, "step": 29520 }, { "action_loss": 0.014257761649787426, "epoch": 26.546762589928058, "step": 29520 }, { "epoch": 26.555755395683452, "grad_norm": 0.13217288255691528, "learning_rate": 5.5420971395980446e-05, "loss": 0.009, "step": 29530 }, { "action_loss": 0.007677870336920023, "epoch": 26.555755395683452, "step": 29530 }, { "epoch": 26.56474820143885, "grad_norm": 0.19850727915763855, "learning_rate": 5.539357519050209e-05, "loss": 0.0085, "step": 29540 }, { "action_loss": 0.003468402661383152, "epoch": 26.56474820143885, "step": 29540 }, { "epoch": 26.573741007194243, "grad_norm": 0.1533202975988388, "learning_rate": 5.536617734659799e-05, "loss": 0.0076, "step": 29550 }, { "action_loss": 0.0022318202536553144, "epoch": 26.573741007194243, "step": 29550 }, { "epoch": 26.58273381294964, "grad_norm": 0.16868968307971954, "learning_rate": 5.533877787259091e-05, "loss": 0.0055, "step": 29560 }, { "action_loss": 0.004831458907574415, "epoch": 26.58273381294964, "step": 29560 }, { "epoch": 26.591726618705035, "grad_norm": 0.1179380863904953, "learning_rate": 5.5311376776804044e-05, "loss": 0.0058, "step": 29570 }, { "action_loss": 0.002689264714717865, "epoch": 26.591726618705035, "step": 29570 }, { "epoch": 26.600719424460433, "grad_norm": 0.16207854449748993, "learning_rate": 5.528397406756118e-05, "loss": 0.0075, "step": 29580 }, { "action_loss": 0.006823399569839239, "epoch": 26.600719424460433, "step": 29580 }, { "epoch": 26.609712230215827, "grad_norm": 0.12124978005886078, "learning_rate": 5.525656975318652e-05, "loss": 0.0049, "step": 29590 }, { "action_loss": 0.006302046123892069, "epoch": 26.609712230215827, "step": 29590 }, { "epoch": 26.618705035971225, "grad_norm": 0.07998238503932953, "learning_rate": 5.522916384200474e-05, "loss": 0.0034, "step": 29600 }, { "action_loss": 0.009437769651412964, "epoch": 26.618705035971225, "step": 29600 }, { "epoch": 26.62769784172662, "grad_norm": 0.12100249528884888, "learning_rate": 5.520175634234106e-05, "loss": 0.0069, "step": 29610 }, { "action_loss": 0.008808827959001064, "epoch": 26.62769784172662, "step": 29610 }, { "epoch": 26.636690647482013, "grad_norm": 0.22185374796390533, "learning_rate": 5.517434726252113e-05, "loss": 0.0059, "step": 29620 }, { "action_loss": 0.006635757628828287, "epoch": 26.636690647482013, "step": 29620 }, { "epoch": 26.64568345323741, "grad_norm": 0.18000765144824982, "learning_rate": 5.514693661087113e-05, "loss": 0.007, "step": 29630 }, { "action_loss": 0.005206082947552204, "epoch": 26.64568345323741, "step": 29630 }, { "epoch": 26.654676258992804, "grad_norm": 0.22345633804798126, "learning_rate": 5.511952439571769e-05, "loss": 0.0052, "step": 29640 }, { "action_loss": 0.008557816036045551, "epoch": 26.654676258992804, "step": 29640 }, { "epoch": 26.663669064748202, "grad_norm": 0.1394289880990982, "learning_rate": 5.509211062538791e-05, "loss": 0.0046, "step": 29650 }, { "action_loss": 0.010511676780879498, "epoch": 26.663669064748202, "step": 29650 }, { "epoch": 26.672661870503596, "grad_norm": 0.20514963567256927, "learning_rate": 5.506469530820939e-05, "loss": 0.0085, "step": 29660 }, { "action_loss": 0.003722533816471696, "epoch": 26.672661870503596, "step": 29660 }, { "epoch": 26.681654676258994, "grad_norm": 0.12907010316848755, "learning_rate": 5.503727845251014e-05, "loss": 0.004, "step": 29670 }, { "action_loss": 0.006029106676578522, "epoch": 26.681654676258994, "step": 29670 }, { "epoch": 26.690647482014388, "grad_norm": 0.16107186675071716, "learning_rate": 5.50098600666187e-05, "loss": 0.0064, "step": 29680 }, { "action_loss": 0.004330539144575596, "epoch": 26.690647482014388, "step": 29680 }, { "epoch": 26.699640287769785, "grad_norm": 0.18919013440608978, "learning_rate": 5.498244015886406e-05, "loss": 0.0058, "step": 29690 }, { "action_loss": 0.006505388300865889, "epoch": 26.699640287769785, "step": 29690 }, { "epoch": 26.70863309352518, "grad_norm": 0.10566605627536774, "learning_rate": 5.495501873757565e-05, "loss": 0.0047, "step": 29700 }, { "action_loss": 0.005350956693291664, "epoch": 26.70863309352518, "step": 29700 }, { "epoch": 26.717625899280577, "grad_norm": 0.17759937047958374, "learning_rate": 5.492759581108336e-05, "loss": 0.0052, "step": 29710 }, { "action_loss": 0.0060439333319664, "epoch": 26.717625899280577, "step": 29710 }, { "epoch": 26.72661870503597, "grad_norm": 0.1699092984199524, "learning_rate": 5.490017138771759e-05, "loss": 0.0066, "step": 29720 }, { "action_loss": 0.008447960950434208, "epoch": 26.72661870503597, "step": 29720 }, { "epoch": 26.735611510791365, "grad_norm": 0.1819498986005783, "learning_rate": 5.487274547580912e-05, "loss": 0.0064, "step": 29730 }, { "action_loss": 0.004678858909755945, "epoch": 26.735611510791365, "step": 29730 }, { "epoch": 26.744604316546763, "grad_norm": 0.21631106734275818, "learning_rate": 5.484531808368923e-05, "loss": 0.0048, "step": 29740 }, { "action_loss": 0.005619628354907036, "epoch": 26.744604316546763, "step": 29740 }, { "epoch": 26.753597122302157, "grad_norm": 0.14427877962589264, "learning_rate": 5.4817889219689656e-05, "loss": 0.0064, "step": 29750 }, { "action_loss": 0.0013939826749265194, "epoch": 26.753597122302157, "step": 29750 }, { "epoch": 26.762589928057555, "grad_norm": 0.20836201310157776, "learning_rate": 5.4790458892142536e-05, "loss": 0.0044, "step": 29760 }, { "action_loss": 0.002509573008865118, "epoch": 26.762589928057555, "step": 29760 }, { "epoch": 26.77158273381295, "grad_norm": 0.18250952661037445, "learning_rate": 5.476302710938048e-05, "loss": 0.0045, "step": 29770 }, { "action_loss": 0.004349156748503447, "epoch": 26.77158273381295, "step": 29770 }, { "epoch": 26.780575539568346, "grad_norm": 0.16498902440071106, "learning_rate": 5.473559387973657e-05, "loss": 0.0069, "step": 29780 }, { "action_loss": 0.008125096559524536, "epoch": 26.780575539568346, "step": 29780 }, { "epoch": 26.78956834532374, "grad_norm": 0.15162701904773712, "learning_rate": 5.470815921154425e-05, "loss": 0.0072, "step": 29790 }, { "action_loss": 0.002479015151038766, "epoch": 26.78956834532374, "step": 29790 }, { "epoch": 26.798561151079138, "grad_norm": 0.1481660157442093, "learning_rate": 5.468072311313749e-05, "loss": 0.004, "step": 29800 }, { "action_loss": 0.006190174724906683, "epoch": 26.798561151079138, "step": 29800 }, { "epoch": 26.807553956834532, "grad_norm": 0.106402687728405, "learning_rate": 5.465328559285063e-05, "loss": 0.0061, "step": 29810 }, { "action_loss": 0.0033867398742586374, "epoch": 26.807553956834532, "step": 29810 }, { "epoch": 26.81654676258993, "grad_norm": 0.1944720298051834, "learning_rate": 5.462584665901849e-05, "loss": 0.005, "step": 29820 }, { "action_loss": 0.0022679700050503016, "epoch": 26.81654676258993, "step": 29820 }, { "epoch": 26.825539568345324, "grad_norm": 0.2785419225692749, "learning_rate": 5.4598406319976235e-05, "loss": 0.008, "step": 29830 }, { "action_loss": 0.04277410730719566, "epoch": 26.825539568345324, "step": 29830 }, { "epoch": 26.834532374100718, "grad_norm": 0.2549719512462616, "learning_rate": 5.457096458405958e-05, "loss": 0.0115, "step": 29840 }, { "action_loss": 0.005109663587063551, "epoch": 26.834532374100718, "step": 29840 }, { "epoch": 26.843525179856115, "grad_norm": 0.16173681616783142, "learning_rate": 5.454352145960457e-05, "loss": 0.0069, "step": 29850 }, { "action_loss": 0.003534388029947877, "epoch": 26.843525179856115, "step": 29850 }, { "epoch": 26.85251798561151, "grad_norm": 0.10353996604681015, "learning_rate": 5.4516076954947715e-05, "loss": 0.0056, "step": 29860 }, { "action_loss": 0.005355047527700663, "epoch": 26.85251798561151, "step": 29860 }, { "epoch": 26.861510791366907, "grad_norm": 0.2164662629365921, "learning_rate": 5.448863107842591e-05, "loss": 0.0065, "step": 29870 }, { "action_loss": 0.008643033914268017, "epoch": 26.861510791366907, "step": 29870 }, { "epoch": 26.8705035971223, "grad_norm": 0.1790129393339157, "learning_rate": 5.446118383837651e-05, "loss": 0.0057, "step": 29880 }, { "action_loss": 0.03020597994327545, "epoch": 26.8705035971223, "step": 29880 }, { "epoch": 26.8794964028777, "grad_norm": 0.2342766970396042, "learning_rate": 5.443373524313722e-05, "loss": 0.0087, "step": 29890 }, { "action_loss": 0.002641193801537156, "epoch": 26.8794964028777, "step": 29890 }, { "epoch": 26.888489208633093, "grad_norm": 0.16787683963775635, "learning_rate": 5.440628530104626e-05, "loss": 0.0048, "step": 29900 }, { "action_loss": 0.0017597953556105494, "epoch": 26.888489208633093, "step": 29900 }, { "epoch": 26.89748201438849, "grad_norm": 0.13809111714363098, "learning_rate": 5.4378834020442146e-05, "loss": 0.0048, "step": 29910 }, { "action_loss": 0.008924613706767559, "epoch": 26.89748201438849, "step": 29910 }, { "epoch": 26.906474820143885, "grad_norm": 0.17308877408504486, "learning_rate": 5.4351381409663884e-05, "loss": 0.0047, "step": 29920 }, { "action_loss": 0.003633956192061305, "epoch": 26.906474820143885, "step": 29920 }, { "epoch": 26.915467625899282, "grad_norm": 0.21053944528102875, "learning_rate": 5.432392747705084e-05, "loss": 0.005, "step": 29930 }, { "action_loss": 0.006381102371960878, "epoch": 26.915467625899282, "step": 29930 }, { "epoch": 26.924460431654676, "grad_norm": 0.13842996954917908, "learning_rate": 5.429647223094278e-05, "loss": 0.0048, "step": 29940 }, { "action_loss": 0.006157532334327698, "epoch": 26.924460431654676, "step": 29940 }, { "epoch": 26.93345323741007, "grad_norm": 0.11074921488761902, "learning_rate": 5.4269015679679924e-05, "loss": 0.0059, "step": 29950 }, { "action_loss": 0.001359401154331863, "epoch": 26.93345323741007, "step": 29950 }, { "epoch": 26.942446043165468, "grad_norm": 0.10575352609157562, "learning_rate": 5.424155783160281e-05, "loss": 0.0035, "step": 29960 }, { "action_loss": 0.008758346550166607, "epoch": 26.942446043165468, "step": 29960 }, { "epoch": 26.951438848920862, "grad_norm": 0.245342418551445, "learning_rate": 5.4214098695052415e-05, "loss": 0.0079, "step": 29970 }, { "action_loss": 0.008713547140359879, "epoch": 26.951438848920862, "step": 29970 }, { "epoch": 26.96043165467626, "grad_norm": 0.18484589457511902, "learning_rate": 5.418663827837012e-05, "loss": 0.007, "step": 29980 }, { "action_loss": 0.004255614709109068, "epoch": 26.96043165467626, "step": 29980 }, { "epoch": 26.969424460431654, "grad_norm": 0.10118108242750168, "learning_rate": 5.415917658989763e-05, "loss": 0.0046, "step": 29990 }, { "action_loss": 0.0018853945657610893, "epoch": 26.969424460431654, "step": 29990 }, { "epoch": 26.97841726618705, "grad_norm": 0.10440142452716827, "learning_rate": 5.413171363797713e-05, "loss": 0.0074, "step": 30000 }, { "action_loss": 0.007809922099113464, "epoch": 26.97841726618705, "step": 30000 }, { "epoch": 26.987410071942445, "grad_norm": 0.0724162682890892, "learning_rate": 5.4104249430951116e-05, "loss": 0.0053, "step": 30010 }, { "action_loss": 0.003189631039276719, "epoch": 26.987410071942445, "step": 30010 }, { "epoch": 26.996402877697843, "grad_norm": 0.19644750654697418, "learning_rate": 5.4076783977162494e-05, "loss": 0.0049, "step": 30020 }, { "action_loss": 0.005914701148867607, "epoch": 26.996402877697843, "step": 30020 }, { "epoch": 27.005395683453237, "grad_norm": 0.19056132435798645, "learning_rate": 5.4049317284954525e-05, "loss": 0.004, "step": 30030 }, { "action_loss": 0.0068975486792624, "epoch": 27.005395683453237, "step": 30030 }, { "epoch": 27.014388489208635, "grad_norm": 0.1261182576417923, "learning_rate": 5.4021849362670884e-05, "loss": 0.0049, "step": 30040 }, { "action_loss": 0.0031920212786644697, "epoch": 27.014388489208635, "step": 30040 }, { "epoch": 27.02338129496403, "grad_norm": 0.1480836719274521, "learning_rate": 5.3994380218655604e-05, "loss": 0.0075, "step": 30050 }, { "action_loss": 0.0077644637785851955, "epoch": 27.02338129496403, "step": 30050 }, { "epoch": 27.032374100719423, "grad_norm": 0.16858524084091187, "learning_rate": 5.396690986125309e-05, "loss": 0.0054, "step": 30060 }, { "action_loss": 0.0023877713829278946, "epoch": 27.032374100719423, "step": 30060 }, { "epoch": 27.04136690647482, "grad_norm": 0.13185831904411316, "learning_rate": 5.3939438298808075e-05, "loss": 0.0054, "step": 30070 }, { "action_loss": 0.014563512988388538, "epoch": 27.04136690647482, "step": 30070 }, { "epoch": 27.050359712230215, "grad_norm": 0.19035901129245758, "learning_rate": 5.3911965539665744e-05, "loss": 0.0094, "step": 30080 }, { "action_loss": 0.004552046302706003, "epoch": 27.050359712230215, "step": 30080 }, { "epoch": 27.059352517985612, "grad_norm": 0.11976151168346405, "learning_rate": 5.388449159217156e-05, "loss": 0.0067, "step": 30090 }, { "action_loss": 0.0059364535845816135, "epoch": 27.059352517985612, "step": 30090 }, { "epoch": 27.068345323741006, "grad_norm": 0.1257142424583435, "learning_rate": 5.3857016464671385e-05, "loss": 0.0047, "step": 30100 }, { "action_loss": 0.0037567184772342443, "epoch": 27.068345323741006, "step": 30100 }, { "epoch": 27.077338129496404, "grad_norm": 0.11075224727392197, "learning_rate": 5.382954016551146e-05, "loss": 0.0041, "step": 30110 }, { "action_loss": 0.0023909711744636297, "epoch": 27.077338129496404, "step": 30110 }, { "epoch": 27.086330935251798, "grad_norm": 0.12492867559194565, "learning_rate": 5.380206270303835e-05, "loss": 0.0054, "step": 30120 }, { "action_loss": 0.007748603820800781, "epoch": 27.086330935251798, "step": 30120 }, { "epoch": 27.095323741007196, "grad_norm": 0.1985221952199936, "learning_rate": 5.377458408559897e-05, "loss": 0.0068, "step": 30130 }, { "action_loss": 0.002110666362568736, "epoch": 27.095323741007196, "step": 30130 }, { "epoch": 27.10431654676259, "grad_norm": 0.2069699466228485, "learning_rate": 5.374710432154061e-05, "loss": 0.0049, "step": 30140 }, { "action_loss": 0.003557831048965454, "epoch": 27.10431654676259, "step": 30140 }, { "epoch": 27.113309352517987, "grad_norm": 0.132942333817482, "learning_rate": 5.3719623419210886e-05, "loss": 0.004, "step": 30150 }, { "action_loss": 0.005059730727225542, "epoch": 27.113309352517987, "step": 30150 }, { "epoch": 27.12230215827338, "grad_norm": 0.2374311238527298, "learning_rate": 5.3692141386957786e-05, "loss": 0.0047, "step": 30160 }, { "action_loss": 0.003490593284368515, "epoch": 27.12230215827338, "step": 30160 }, { "epoch": 27.131294964028775, "grad_norm": 0.13508741557598114, "learning_rate": 5.3664658233129616e-05, "loss": 0.007, "step": 30170 }, { "action_loss": 0.003031250089406967, "epoch": 27.131294964028775, "step": 30170 }, { "epoch": 27.140287769784173, "grad_norm": 0.10933595150709152, "learning_rate": 5.363717396607504e-05, "loss": 0.0047, "step": 30180 }, { "action_loss": 0.0032481809612363577, "epoch": 27.140287769784173, "step": 30180 }, { "epoch": 27.149280575539567, "grad_norm": 0.23680686950683594, "learning_rate": 5.360968859414305e-05, "loss": 0.005, "step": 30190 }, { "action_loss": 0.002442854456603527, "epoch": 27.149280575539567, "step": 30190 }, { "epoch": 27.158273381294965, "grad_norm": 0.2680995762348175, "learning_rate": 5.358220212568295e-05, "loss": 0.0038, "step": 30200 }, { "action_loss": 0.020666714757680893, "epoch": 27.158273381294965, "step": 30200 }, { "epoch": 27.16726618705036, "grad_norm": 0.18587282299995422, "learning_rate": 5.355471456904444e-05, "loss": 0.0053, "step": 30210 }, { "action_loss": 0.015758486464619637, "epoch": 27.16726618705036, "step": 30210 }, { "epoch": 27.176258992805757, "grad_norm": 0.25749486684799194, "learning_rate": 5.3527225932577495e-05, "loss": 0.0109, "step": 30220 }, { "action_loss": 0.035682979971170425, "epoch": 27.176258992805757, "step": 30220 }, { "epoch": 27.18525179856115, "grad_norm": 0.1196666806936264, "learning_rate": 5.349973622463246e-05, "loss": 0.0114, "step": 30230 }, { "action_loss": 0.012760109268128872, "epoch": 27.18525179856115, "step": 30230 }, { "epoch": 27.194244604316548, "grad_norm": 0.12291476875543594, "learning_rate": 5.3472245453559956e-05, "loss": 0.005, "step": 30240 }, { "action_loss": 0.007502495776861906, "epoch": 27.194244604316548, "step": 30240 }, { "epoch": 27.203237410071942, "grad_norm": 0.1343008130788803, "learning_rate": 5.3444753627710955e-05, "loss": 0.0073, "step": 30250 }, { "action_loss": 0.006442483980208635, "epoch": 27.203237410071942, "step": 30250 }, { "epoch": 27.21223021582734, "grad_norm": 0.15649470686912537, "learning_rate": 5.341726075543676e-05, "loss": 0.0052, "step": 30260 }, { "action_loss": 0.002405178966000676, "epoch": 27.21223021582734, "step": 30260 }, { "epoch": 27.221223021582734, "grad_norm": 0.20029446482658386, "learning_rate": 5.338976684508898e-05, "loss": 0.0042, "step": 30270 }, { "action_loss": 0.007359000388532877, "epoch": 27.221223021582734, "step": 30270 }, { "epoch": 27.230215827338128, "grad_norm": 0.1930844932794571, "learning_rate": 5.336227190501953e-05, "loss": 0.0042, "step": 30280 }, { "action_loss": 0.0038813829887658358, "epoch": 27.230215827338128, "step": 30280 }, { "epoch": 27.239208633093526, "grad_norm": 0.12894800305366516, "learning_rate": 5.3334775943580664e-05, "loss": 0.0062, "step": 30290 }, { "action_loss": 0.004477975890040398, "epoch": 27.239208633093526, "step": 30290 }, { "epoch": 27.24820143884892, "grad_norm": 0.11655488610267639, "learning_rate": 5.330727896912491e-05, "loss": 0.005, "step": 30300 }, { "action_loss": 0.0019794630352407694, "epoch": 27.24820143884892, "step": 30300 }, { "epoch": 27.257194244604317, "grad_norm": 0.15181665122509003, "learning_rate": 5.327978099000511e-05, "loss": 0.0047, "step": 30310 }, { "action_loss": 0.0027792006731033325, "epoch": 27.257194244604317, "step": 30310 }, { "epoch": 27.26618705035971, "grad_norm": 0.20076483488082886, "learning_rate": 5.3252282014574465e-05, "loss": 0.005, "step": 30320 }, { "action_loss": 0.0035275917034596205, "epoch": 27.26618705035971, "step": 30320 }, { "epoch": 27.27517985611511, "grad_norm": 0.09707235544919968, "learning_rate": 5.322478205118641e-05, "loss": 0.0043, "step": 30330 }, { "action_loss": 0.008635195903480053, "epoch": 27.27517985611511, "step": 30330 }, { "epoch": 27.284172661870503, "grad_norm": 0.14524312317371368, "learning_rate": 5.3197281108194704e-05, "loss": 0.0079, "step": 30340 }, { "action_loss": 0.005228547845035791, "epoch": 27.284172661870503, "step": 30340 }, { "epoch": 27.2931654676259, "grad_norm": 0.12165479362010956, "learning_rate": 5.316977919395342e-05, "loss": 0.0039, "step": 30350 }, { "action_loss": 0.013916929252445698, "epoch": 27.2931654676259, "step": 30350 }, { "epoch": 27.302158273381295, "grad_norm": 0.11313320696353912, "learning_rate": 5.314227631681691e-05, "loss": 0.0062, "step": 30360 }, { "action_loss": 0.0060323067009449005, "epoch": 27.302158273381295, "step": 30360 }, { "epoch": 27.31115107913669, "grad_norm": 0.2904048264026642, "learning_rate": 5.311477248513982e-05, "loss": 0.0051, "step": 30370 }, { "action_loss": 0.004818856716156006, "epoch": 27.31115107913669, "step": 30370 }, { "epoch": 27.320143884892087, "grad_norm": 0.20573309063911438, "learning_rate": 5.30872677072771e-05, "loss": 0.0077, "step": 30380 }, { "action_loss": 0.0028482310008257627, "epoch": 27.320143884892087, "step": 30380 }, { "epoch": 27.32913669064748, "grad_norm": 0.22898751497268677, "learning_rate": 5.3059761991583954e-05, "loss": 0.0083, "step": 30390 }, { "action_loss": 0.009625975973904133, "epoch": 27.32913669064748, "step": 30390 }, { "epoch": 27.33812949640288, "grad_norm": 0.16427795588970184, "learning_rate": 5.303225534641592e-05, "loss": 0.0068, "step": 30400 }, { "action_loss": 0.006840739399194717, "epoch": 27.33812949640288, "step": 30400 }, { "epoch": 27.347122302158272, "grad_norm": 0.14706666767597198, "learning_rate": 5.300474778012875e-05, "loss": 0.0055, "step": 30410 }, { "action_loss": 0.003046702593564987, "epoch": 27.347122302158272, "step": 30410 }, { "epoch": 27.35611510791367, "grad_norm": 0.16635781526565552, "learning_rate": 5.297723930107855e-05, "loss": 0.0051, "step": 30420 }, { "action_loss": 0.009646881371736526, "epoch": 27.35611510791367, "step": 30420 }, { "epoch": 27.365107913669064, "grad_norm": 0.1377170830965042, "learning_rate": 5.294972991762167e-05, "loss": 0.0058, "step": 30430 }, { "action_loss": 0.005540148820728064, "epoch": 27.365107913669064, "step": 30430 }, { "epoch": 27.37410071942446, "grad_norm": 0.1931273490190506, "learning_rate": 5.292221963811472e-05, "loss": 0.005, "step": 30440 }, { "action_loss": 0.005470707546919584, "epoch": 27.37410071942446, "step": 30440 }, { "epoch": 27.383093525179856, "grad_norm": 0.10947663336992264, "learning_rate": 5.28947084709146e-05, "loss": 0.0046, "step": 30450 }, { "action_loss": 0.003662779927253723, "epoch": 27.383093525179856, "step": 30450 }, { "epoch": 27.392086330935253, "grad_norm": 0.17280641198158264, "learning_rate": 5.2867196424378465e-05, "loss": 0.0061, "step": 30460 }, { "action_loss": 0.003803956089541316, "epoch": 27.392086330935253, "step": 30460 }, { "epoch": 27.401079136690647, "grad_norm": 0.1693195253610611, "learning_rate": 5.2839683506863765e-05, "loss": 0.0075, "step": 30470 }, { "action_loss": 0.0023866144474595785, "epoch": 27.401079136690647, "step": 30470 }, { "epoch": 27.41007194244604, "grad_norm": 0.14455151557922363, "learning_rate": 5.281216972672821e-05, "loss": 0.0056, "step": 30480 }, { "action_loss": 0.005255203694105148, "epoch": 27.41007194244604, "step": 30480 }, { "epoch": 27.41906474820144, "grad_norm": 0.1371055692434311, "learning_rate": 5.278465509232973e-05, "loss": 0.0083, "step": 30490 }, { "action_loss": 0.0011210934026166797, "epoch": 27.41906474820144, "step": 30490 }, { "epoch": 27.428057553956833, "grad_norm": 0.13812537491321564, "learning_rate": 5.275713961202655e-05, "loss": 0.0055, "step": 30500 }, { "action_loss": 0.003508833469823003, "epoch": 27.428057553956833, "step": 30500 }, { "epoch": 27.43705035971223, "grad_norm": 0.18815000355243683, "learning_rate": 5.2729623294177165e-05, "loss": 0.0041, "step": 30510 }, { "action_loss": 0.0019753461237996817, "epoch": 27.43705035971223, "step": 30510 }, { "epoch": 27.446043165467625, "grad_norm": 0.18971005082130432, "learning_rate": 5.270210614714028e-05, "loss": 0.006, "step": 30520 }, { "action_loss": 0.0038363647181540728, "epoch": 27.446043165467625, "step": 30520 }, { "epoch": 27.455035971223023, "grad_norm": 0.25813764333724976, "learning_rate": 5.267458817927491e-05, "loss": 0.0087, "step": 30530 }, { "action_loss": 0.0020773587748408318, "epoch": 27.455035971223023, "step": 30530 }, { "epoch": 27.464028776978417, "grad_norm": 0.19224204123020172, "learning_rate": 5.264706939894026e-05, "loss": 0.0054, "step": 30540 }, { "action_loss": 0.003099639667198062, "epoch": 27.464028776978417, "step": 30540 }, { "epoch": 27.473021582733814, "grad_norm": 0.12517523765563965, "learning_rate": 5.261954981449584e-05, "loss": 0.0059, "step": 30550 }, { "action_loss": 0.013598092831671238, "epoch": 27.473021582733814, "step": 30550 }, { "epoch": 27.48201438848921, "grad_norm": 0.22127275168895721, "learning_rate": 5.2592029434301324e-05, "loss": 0.006, "step": 30560 }, { "action_loss": 0.00971341785043478, "epoch": 27.48201438848921, "step": 30560 }, { "epoch": 27.491007194244606, "grad_norm": 0.2567920982837677, "learning_rate": 5.256450826671672e-05, "loss": 0.009, "step": 30570 }, { "action_loss": 0.003890870837494731, "epoch": 27.491007194244606, "step": 30570 }, { "epoch": 27.5, "grad_norm": 0.14348575472831726, "learning_rate": 5.253698632010221e-05, "loss": 0.0077, "step": 30580 }, { "action_loss": 0.011513431556522846, "epoch": 27.5, "step": 30580 }, { "epoch": 27.508992805755394, "grad_norm": 0.14883993566036224, "learning_rate": 5.2509463602818246e-05, "loss": 0.0059, "step": 30590 }, { "action_loss": 0.004720291122794151, "epoch": 27.508992805755394, "step": 30590 }, { "epoch": 27.51798561151079, "grad_norm": 0.12029989063739777, "learning_rate": 5.248194012322549e-05, "loss": 0.0055, "step": 30600 }, { "action_loss": 0.013537108898162842, "epoch": 27.51798561151079, "step": 30600 }, { "epoch": 27.526978417266186, "grad_norm": 0.18253004550933838, "learning_rate": 5.245441588968486e-05, "loss": 0.0061, "step": 30610 }, { "action_loss": 0.002686257241293788, "epoch": 27.526978417266186, "step": 30610 }, { "epoch": 27.535971223021583, "grad_norm": 0.22093920409679413, "learning_rate": 5.242689091055748e-05, "loss": 0.0077, "step": 30620 }, { "action_loss": 0.0031191117595881224, "epoch": 27.535971223021583, "step": 30620 }, { "epoch": 27.544964028776977, "grad_norm": 0.2007552534341812, "learning_rate": 5.239936519420473e-05, "loss": 0.0107, "step": 30630 }, { "action_loss": 0.012141961604356766, "epoch": 27.544964028776977, "step": 30630 }, { "epoch": 27.553956834532375, "grad_norm": 0.1890653371810913, "learning_rate": 5.2371838748988175e-05, "loss": 0.0096, "step": 30640 }, { "action_loss": 0.011801425367593765, "epoch": 27.553956834532375, "step": 30640 }, { "epoch": 27.56294964028777, "grad_norm": 0.10726410895586014, "learning_rate": 5.234431158326965e-05, "loss": 0.0075, "step": 30650 }, { "action_loss": 0.013540484011173248, "epoch": 27.56294964028777, "step": 30650 }, { "epoch": 27.571942446043167, "grad_norm": 0.16465060412883759, "learning_rate": 5.231678370541115e-05, "loss": 0.0064, "step": 30660 }, { "action_loss": 0.02194678969681263, "epoch": 27.571942446043167, "step": 30660 }, { "epoch": 27.58093525179856, "grad_norm": 0.19106745719909668, "learning_rate": 5.228925512377495e-05, "loss": 0.0072, "step": 30670 }, { "action_loss": 0.005316261202096939, "epoch": 27.58093525179856, "step": 30670 }, { "epoch": 27.58992805755396, "grad_norm": 0.1698937714099884, "learning_rate": 5.2261725846723465e-05, "loss": 0.0062, "step": 30680 }, { "action_loss": 0.0034858928993344307, "epoch": 27.58992805755396, "step": 30680 }, { "epoch": 27.598920863309353, "grad_norm": 0.0969182699918747, "learning_rate": 5.22341958826194e-05, "loss": 0.0054, "step": 30690 }, { "action_loss": 0.012635838240385056, "epoch": 27.598920863309353, "step": 30690 }, { "epoch": 27.607913669064747, "grad_norm": 0.1483546942472458, "learning_rate": 5.22066652398256e-05, "loss": 0.0053, "step": 30700 }, { "action_loss": 0.006914839148521423, "epoch": 27.607913669064747, "step": 30700 }, { "epoch": 27.616906474820144, "grad_norm": 0.210397407412529, "learning_rate": 5.2179133926705185e-05, "loss": 0.0059, "step": 30710 }, { "action_loss": 0.003052501706406474, "epoch": 27.616906474820144, "step": 30710 }, { "epoch": 27.62589928057554, "grad_norm": 0.11005605012178421, "learning_rate": 5.215160195162141e-05, "loss": 0.0053, "step": 30720 }, { "action_loss": 0.0049638571217656136, "epoch": 27.62589928057554, "step": 30720 }, { "epoch": 27.634892086330936, "grad_norm": 0.1084856167435646, "learning_rate": 5.212406932293776e-05, "loss": 0.0043, "step": 30730 }, { "action_loss": 0.00388311012648046, "epoch": 27.634892086330936, "step": 30730 }, { "epoch": 27.64388489208633, "grad_norm": 0.10020298510789871, "learning_rate": 5.209653604901795e-05, "loss": 0.0051, "step": 30740 }, { "action_loss": 0.0037398890126496553, "epoch": 27.64388489208633, "step": 30740 }, { "epoch": 27.652877697841728, "grad_norm": 0.22406809031963348, "learning_rate": 5.206900213822584e-05, "loss": 0.0055, "step": 30750 }, { "action_loss": 0.002339221304282546, "epoch": 27.652877697841728, "step": 30750 }, { "epoch": 27.66187050359712, "grad_norm": 0.12047087401151657, "learning_rate": 5.204146759892551e-05, "loss": 0.0066, "step": 30760 }, { "action_loss": 0.0034212127793580294, "epoch": 27.66187050359712, "step": 30760 }, { "epoch": 27.67086330935252, "grad_norm": 0.16061556339263916, "learning_rate": 5.2013932439481216e-05, "loss": 0.0053, "step": 30770 }, { "action_loss": 0.021115193143486977, "epoch": 27.67086330935252, "step": 30770 }, { "epoch": 27.679856115107913, "grad_norm": 0.20737160742282867, "learning_rate": 5.198639666825743e-05, "loss": 0.0121, "step": 30780 }, { "action_loss": 0.008395876735448837, "epoch": 27.679856115107913, "step": 30780 }, { "epoch": 27.68884892086331, "grad_norm": 0.26145631074905396, "learning_rate": 5.195886029361877e-05, "loss": 0.0066, "step": 30790 }, { "action_loss": 0.005513227079063654, "epoch": 27.68884892086331, "step": 30790 }, { "epoch": 27.697841726618705, "grad_norm": 0.2470031976699829, "learning_rate": 5.193132332393009e-05, "loss": 0.009, "step": 30800 }, { "action_loss": 0.02709432877600193, "epoch": 27.697841726618705, "step": 30800 }, { "epoch": 27.7068345323741, "grad_norm": 0.1292891800403595, "learning_rate": 5.1903785767556376e-05, "loss": 0.007, "step": 30810 }, { "action_loss": 0.0024447550531476736, "epoch": 27.7068345323741, "step": 30810 }, { "epoch": 27.715827338129497, "grad_norm": 0.2288363128900528, "learning_rate": 5.187624763286282e-05, "loss": 0.0052, "step": 30820 }, { "action_loss": 0.010846992023289204, "epoch": 27.715827338129497, "step": 30820 }, { "epoch": 27.72482014388489, "grad_norm": 0.23318243026733398, "learning_rate": 5.184870892821475e-05, "loss": 0.013, "step": 30830 }, { "action_loss": 0.0029563114512711763, "epoch": 27.72482014388489, "step": 30830 }, { "epoch": 27.73381294964029, "grad_norm": 0.20645803213119507, "learning_rate": 5.182116966197773e-05, "loss": 0.0063, "step": 30840 }, { "action_loss": 0.0036227067466825247, "epoch": 27.73381294964029, "step": 30840 }, { "epoch": 27.742805755395683, "grad_norm": 0.12316912412643433, "learning_rate": 5.1793629842517466e-05, "loss": 0.0048, "step": 30850 }, { "action_loss": 0.005053875036537647, "epoch": 27.742805755395683, "step": 30850 }, { "epoch": 27.75179856115108, "grad_norm": 0.16131587326526642, "learning_rate": 5.17660894781998e-05, "loss": 0.0053, "step": 30860 }, { "action_loss": 0.0016919021727517247, "epoch": 27.75179856115108, "step": 30860 }, { "epoch": 27.760791366906474, "grad_norm": 0.18443840742111206, "learning_rate": 5.173854857739079e-05, "loss": 0.0062, "step": 30870 }, { "action_loss": 0.0024465376045554876, "epoch": 27.760791366906474, "step": 30870 }, { "epoch": 27.769784172661872, "grad_norm": 0.1172894760966301, "learning_rate": 5.171100714845661e-05, "loss": 0.0048, "step": 30880 }, { "action_loss": 0.011595609597861767, "epoch": 27.769784172661872, "step": 30880 }, { "epoch": 27.778776978417266, "grad_norm": 0.17095553874969482, "learning_rate": 5.1683465199763646e-05, "loss": 0.0085, "step": 30890 }, { "action_loss": 0.0028986476827412844, "epoch": 27.778776978417266, "step": 30890 }, { "epoch": 27.78776978417266, "grad_norm": 0.16310685873031616, "learning_rate": 5.16559227396784e-05, "loss": 0.0049, "step": 30900 }, { "action_loss": 0.005019988398998976, "epoch": 27.78776978417266, "step": 30900 }, { "epoch": 27.796762589928058, "grad_norm": 0.16318489611148834, "learning_rate": 5.1628379776567556e-05, "loss": 0.0058, "step": 30910 }, { "action_loss": 0.0026012046728283167, "epoch": 27.796762589928058, "step": 30910 }, { "epoch": 27.805755395683452, "grad_norm": 0.1219649612903595, "learning_rate": 5.160083631879792e-05, "loss": 0.0057, "step": 30920 }, { "action_loss": 0.0015961574390530586, "epoch": 27.805755395683452, "step": 30920 }, { "epoch": 27.81474820143885, "grad_norm": 0.17476755380630493, "learning_rate": 5.1573292374736484e-05, "loss": 0.0053, "step": 30930 }, { "action_loss": 0.0035662937443703413, "epoch": 27.81474820143885, "step": 30930 }, { "epoch": 27.823741007194243, "grad_norm": 0.21247680485248566, "learning_rate": 5.1545747952750356e-05, "loss": 0.0068, "step": 30940 }, { "action_loss": 0.007843057624995708, "epoch": 27.823741007194243, "step": 30940 }, { "epoch": 27.83273381294964, "grad_norm": 0.1200575977563858, "learning_rate": 5.151820306120682e-05, "loss": 0.005, "step": 30950 }, { "action_loss": 0.003701508976519108, "epoch": 27.83273381294964, "step": 30950 }, { "epoch": 27.841726618705035, "grad_norm": 0.15039066970348358, "learning_rate": 5.149065770847328e-05, "loss": 0.0042, "step": 30960 }, { "action_loss": 0.006230426486581564, "epoch": 27.841726618705035, "step": 30960 }, { "epoch": 27.850719424460433, "grad_norm": 0.21307159960269928, "learning_rate": 5.1463111902917297e-05, "loss": 0.0062, "step": 30970 }, { "action_loss": 0.002677223877981305, "epoch": 27.850719424460433, "step": 30970 }, { "epoch": 27.859712230215827, "grad_norm": 0.16696034371852875, "learning_rate": 5.143556565290654e-05, "loss": 0.0079, "step": 30980 }, { "action_loss": 0.0032088139560073614, "epoch": 27.859712230215827, "step": 30980 }, { "epoch": 27.868705035971225, "grad_norm": 0.24424612522125244, "learning_rate": 5.140801896680882e-05, "loss": 0.0057, "step": 30990 }, { "action_loss": 0.0062757874839007854, "epoch": 27.868705035971225, "step": 30990 }, { "epoch": 27.87769784172662, "grad_norm": 0.15866176784038544, "learning_rate": 5.1380471852992144e-05, "loss": 0.0058, "step": 31000 }, { "action_loss": 0.004781441763043404, "epoch": 27.87769784172662, "step": 31000 }, { "epoch": 27.886690647482013, "grad_norm": 0.12714384496212006, "learning_rate": 5.135292431982457e-05, "loss": 0.0054, "step": 31010 }, { "action_loss": 0.0037463281769305468, "epoch": 27.886690647482013, "step": 31010 }, { "epoch": 27.89568345323741, "grad_norm": 0.1693248748779297, "learning_rate": 5.1325376375674294e-05, "loss": 0.0057, "step": 31020 }, { "action_loss": 0.013146749697625637, "epoch": 27.89568345323741, "step": 31020 }, { "epoch": 27.904676258992804, "grad_norm": 0.12331540882587433, "learning_rate": 5.129782802890968e-05, "loss": 0.0068, "step": 31030 }, { "action_loss": 0.0021048153284937143, "epoch": 27.904676258992804, "step": 31030 }, { "epoch": 27.913669064748202, "grad_norm": 0.11795571446418762, "learning_rate": 5.127027928789916e-05, "loss": 0.0052, "step": 31040 }, { "action_loss": 0.007154427468776703, "epoch": 27.913669064748202, "step": 31040 }, { "epoch": 27.922661870503596, "grad_norm": 0.20892807841300964, "learning_rate": 5.124273016101135e-05, "loss": 0.0154, "step": 31050 }, { "action_loss": 0.0034760190173983574, "epoch": 27.922661870503596, "step": 31050 }, { "epoch": 27.931654676258994, "grad_norm": 0.18768876791000366, "learning_rate": 5.121518065661492e-05, "loss": 0.0051, "step": 31060 }, { "action_loss": 0.0017942688427865505, "epoch": 27.931654676258994, "step": 31060 }, { "epoch": 27.940647482014388, "grad_norm": 0.17380131781101227, "learning_rate": 5.11876307830787e-05, "loss": 0.0053, "step": 31070 }, { "action_loss": 0.008200188167393208, "epoch": 27.940647482014388, "step": 31070 }, { "epoch": 27.949640287769785, "grad_norm": 0.172417551279068, "learning_rate": 5.1160080548771596e-05, "loss": 0.0086, "step": 31080 }, { "action_loss": 0.0038679635617882013, "epoch": 27.949640287769785, "step": 31080 }, { "epoch": 27.95863309352518, "grad_norm": 0.2234964817762375, "learning_rate": 5.1132529962062656e-05, "loss": 0.0063, "step": 31090 }, { "action_loss": 0.0020237669814378023, "epoch": 27.95863309352518, "step": 31090 }, { "epoch": 27.967625899280577, "grad_norm": 0.2042674869298935, "learning_rate": 5.110497903132101e-05, "loss": 0.0059, "step": 31100 }, { "action_loss": 0.0013924529775977135, "epoch": 27.967625899280577, "step": 31100 }, { "epoch": 27.97661870503597, "grad_norm": 0.22257062792778015, "learning_rate": 5.107742776491592e-05, "loss": 0.0053, "step": 31110 }, { "action_loss": 0.002978762611746788, "epoch": 27.97661870503597, "step": 31110 }, { "epoch": 27.985611510791365, "grad_norm": 0.17257492244243622, "learning_rate": 5.104987617121673e-05, "loss": 0.007, "step": 31120 }, { "action_loss": 0.002136671682819724, "epoch": 27.985611510791365, "step": 31120 }, { "epoch": 27.994604316546763, "grad_norm": 0.18709908425807953, "learning_rate": 5.102232425859287e-05, "loss": 0.0053, "step": 31130 }, { "action_loss": 0.0024887584149837494, "epoch": 27.994604316546763, "step": 31130 }, { "epoch": 28.003597122302157, "grad_norm": 0.14217734336853027, "learning_rate": 5.09947720354139e-05, "loss": 0.0038, "step": 31140 }, { "action_loss": 0.009303737431764603, "epoch": 28.003597122302157, "step": 31140 }, { "epoch": 28.012589928057555, "grad_norm": 0.145476296544075, "learning_rate": 5.096721951004942e-05, "loss": 0.0068, "step": 31150 }, { "action_loss": 0.0023905730340629816, "epoch": 28.012589928057555, "step": 31150 }, { "epoch": 28.02158273381295, "grad_norm": 0.1358090043067932, "learning_rate": 5.0939666690869227e-05, "loss": 0.0043, "step": 31160 }, { "action_loss": 0.007369091268628836, "epoch": 28.02158273381295, "step": 31160 }, { "epoch": 28.030575539568346, "grad_norm": 0.18338538706302643, "learning_rate": 5.0912113586243096e-05, "loss": 0.0047, "step": 31170 }, { "action_loss": 0.0034713828936219215, "epoch": 28.030575539568346, "step": 31170 }, { "epoch": 28.03956834532374, "grad_norm": 0.16795064508914948, "learning_rate": 5.0884560204540935e-05, "loss": 0.0067, "step": 31180 }, { "action_loss": 0.01366813387721777, "epoch": 28.03956834532374, "step": 31180 }, { "epoch": 28.048561151079138, "grad_norm": 0.168686643242836, "learning_rate": 5.0857006554132736e-05, "loss": 0.0065, "step": 31190 }, { "action_loss": 0.00369562697596848, "epoch": 28.048561151079138, "step": 31190 }, { "epoch": 28.057553956834532, "grad_norm": 0.17628149688243866, "learning_rate": 5.0829452643388575e-05, "loss": 0.004, "step": 31200 }, { "action_loss": 0.004211730789393187, "epoch": 28.057553956834532, "step": 31200 }, { "epoch": 28.06654676258993, "grad_norm": 0.11525247991085052, "learning_rate": 5.08018984806786e-05, "loss": 0.004, "step": 31210 }, { "action_loss": 0.0027103822212666273, "epoch": 28.06654676258993, "step": 31210 }, { "epoch": 28.075539568345324, "grad_norm": 0.1018332690000534, "learning_rate": 5.0774344074373036e-05, "loss": 0.0072, "step": 31220 }, { "action_loss": 0.003663616953417659, "epoch": 28.075539568345324, "step": 31220 }, { "epoch": 28.084532374100718, "grad_norm": 0.2101675420999527, "learning_rate": 5.07467894328422e-05, "loss": 0.0052, "step": 31230 }, { "action_loss": 0.013068781234323978, "epoch": 28.084532374100718, "step": 31230 }, { "epoch": 28.093525179856115, "grad_norm": 0.18993732333183289, "learning_rate": 5.0719234564456454e-05, "loss": 0.0067, "step": 31240 }, { "action_loss": 0.0070052314549684525, "epoch": 28.093525179856115, "step": 31240 }, { "epoch": 28.10251798561151, "grad_norm": 0.14532685279846191, "learning_rate": 5.0691679477586216e-05, "loss": 0.0032, "step": 31250 }, { "action_loss": 0.0045542954467237, "epoch": 28.10251798561151, "step": 31250 }, { "epoch": 28.111510791366907, "grad_norm": 0.08664608001708984, "learning_rate": 5.0664124180602035e-05, "loss": 0.0043, "step": 31260 }, { "action_loss": 0.002132120542228222, "epoch": 28.111510791366907, "step": 31260 }, { "epoch": 28.1205035971223, "grad_norm": 0.19612471759319305, "learning_rate": 5.063656868187447e-05, "loss": 0.0049, "step": 31270 }, { "action_loss": 0.005671955645084381, "epoch": 28.1205035971223, "step": 31270 }, { "epoch": 28.1294964028777, "grad_norm": 0.16763876378536224, "learning_rate": 5.060901298977413e-05, "loss": 0.0058, "step": 31280 }, { "action_loss": 0.012257386930286884, "epoch": 28.1294964028777, "step": 31280 }, { "epoch": 28.138489208633093, "grad_norm": 0.25941774249076843, "learning_rate": 5.0581457112671725e-05, "loss": 0.0121, "step": 31290 }, { "action_loss": 0.0027184830978512764, "epoch": 28.138489208633093, "step": 31290 }, { "epoch": 28.14748201438849, "grad_norm": 0.15615473687648773, "learning_rate": 5.0553901058938016e-05, "loss": 0.0046, "step": 31300 }, { "action_loss": 0.002738617593422532, "epoch": 28.14748201438849, "step": 31300 }, { "epoch": 28.156474820143885, "grad_norm": 0.12458856403827667, "learning_rate": 5.052634483694377e-05, "loss": 0.0048, "step": 31310 }, { "action_loss": 0.002633674070239067, "epoch": 28.156474820143885, "step": 31310 }, { "epoch": 28.165467625899282, "grad_norm": 0.21670134365558624, "learning_rate": 5.049878845505988e-05, "loss": 0.0089, "step": 31320 }, { "action_loss": 0.0030130634550005198, "epoch": 28.165467625899282, "step": 31320 }, { "epoch": 28.174460431654676, "grad_norm": 0.15192362666130066, "learning_rate": 5.047123192165721e-05, "loss": 0.0052, "step": 31330 }, { "action_loss": 0.003287122817710042, "epoch": 28.174460431654676, "step": 31330 }, { "epoch": 28.18345323741007, "grad_norm": 0.19994474947452545, "learning_rate": 5.0443675245106735e-05, "loss": 0.0067, "step": 31340 }, { "action_loss": 0.0027849257458001375, "epoch": 28.18345323741007, "step": 31340 }, { "epoch": 28.192446043165468, "grad_norm": 0.22778278589248657, "learning_rate": 5.0416118433779426e-05, "loss": 0.0043, "step": 31350 }, { "action_loss": 0.016071785241365433, "epoch": 28.192446043165468, "step": 31350 }, { "epoch": 28.201438848920862, "grad_norm": 0.13868281245231628, "learning_rate": 5.038856149604633e-05, "loss": 0.0054, "step": 31360 }, { "action_loss": 0.006699769292026758, "epoch": 28.201438848920862, "step": 31360 }, { "epoch": 28.21043165467626, "grad_norm": 0.11464237421751022, "learning_rate": 5.03610044402785e-05, "loss": 0.0058, "step": 31370 }, { "action_loss": 0.0031100325286388397, "epoch": 28.21043165467626, "step": 31370 }, { "epoch": 28.219424460431654, "grad_norm": 0.19763076305389404, "learning_rate": 5.033344727484707e-05, "loss": 0.0075, "step": 31380 }, { "action_loss": 0.010348348878324032, "epoch": 28.219424460431654, "step": 31380 }, { "epoch": 28.22841726618705, "grad_norm": 0.1436319500207901, "learning_rate": 5.030589000812315e-05, "loss": 0.0056, "step": 31390 }, { "action_loss": 0.003416970372200012, "epoch": 28.22841726618705, "step": 31390 }, { "epoch": 28.237410071942445, "grad_norm": 0.16039323806762695, "learning_rate": 5.027833264847793e-05, "loss": 0.0044, "step": 31400 }, { "action_loss": 0.005698188673704863, "epoch": 28.237410071942445, "step": 31400 }, { "epoch": 28.246402877697843, "grad_norm": 0.15770062804222107, "learning_rate": 5.025077520428258e-05, "loss": 0.0085, "step": 31410 }, { "action_loss": 0.001610871753655374, "epoch": 28.246402877697843, "step": 31410 }, { "epoch": 28.255395683453237, "grad_norm": 0.11713063716888428, "learning_rate": 5.022321768390837e-05, "loss": 0.0043, "step": 31420 }, { "action_loss": 0.018334886059165, "epoch": 28.255395683453237, "step": 31420 }, { "epoch": 28.264388489208635, "grad_norm": 0.28556033968925476, "learning_rate": 5.0195660095726516e-05, "loss": 0.0075, "step": 31430 }, { "action_loss": 0.004205886274576187, "epoch": 28.264388489208635, "step": 31430 }, { "epoch": 28.27338129496403, "grad_norm": 0.14595866203308105, "learning_rate": 5.016810244810829e-05, "loss": 0.0061, "step": 31440 }, { "action_loss": 0.00992415752261877, "epoch": 28.27338129496403, "step": 31440 }, { "epoch": 28.282374100719423, "grad_norm": 0.23431847989559174, "learning_rate": 5.0140544749424976e-05, "loss": 0.0049, "step": 31450 }, { "action_loss": 0.003243759972974658, "epoch": 28.282374100719423, "step": 31450 }, { "epoch": 28.29136690647482, "grad_norm": 0.205271914601326, "learning_rate": 5.0112987008047874e-05, "loss": 0.0041, "step": 31460 }, { "action_loss": 0.0063445125706493855, "epoch": 28.29136690647482, "step": 31460 }, { "epoch": 28.300359712230215, "grad_norm": 0.18347784876823425, "learning_rate": 5.008542923234831e-05, "loss": 0.0064, "step": 31470 }, { "action_loss": 0.005662388633936644, "epoch": 28.300359712230215, "step": 31470 }, { "epoch": 28.309352517985612, "grad_norm": 0.165969118475914, "learning_rate": 5.00578714306976e-05, "loss": 0.0063, "step": 31480 }, { "action_loss": 0.0022168278228491545, "epoch": 28.309352517985612, "step": 31480 }, { "epoch": 28.318345323741006, "grad_norm": 0.15840695798397064, "learning_rate": 5.0030313611467084e-05, "loss": 0.0052, "step": 31490 }, { "action_loss": 0.0068636368960142136, "epoch": 28.318345323741006, "step": 31490 }, { "epoch": 28.327338129496404, "grad_norm": 0.17320366203784943, "learning_rate": 5.0002755783028074e-05, "loss": 0.0055, "step": 31500 }, { "action_loss": 0.016985232010483742, "epoch": 28.327338129496404, "step": 31500 }, { "epoch": 28.336330935251798, "grad_norm": 0.22699779272079468, "learning_rate": 4.997519795375194e-05, "loss": 0.0067, "step": 31510 }, { "action_loss": 0.0021436612587422132, "epoch": 28.336330935251798, "step": 31510 }, { "epoch": 28.345323741007196, "grad_norm": 0.16209180653095245, "learning_rate": 4.9947640132010016e-05, "loss": 0.0065, "step": 31520 }, { "action_loss": 0.014489325694739819, "epoch": 28.345323741007196, "step": 31520 }, { "epoch": 28.35431654676259, "grad_norm": 0.19610580801963806, "learning_rate": 4.9920082326173625e-05, "loss": 0.0065, "step": 31530 }, { "action_loss": 0.026059769093990326, "epoch": 28.35431654676259, "step": 31530 }, { "epoch": 28.363309352517987, "grad_norm": 0.23293562233448029, "learning_rate": 4.9892524544614114e-05, "loss": 0.007, "step": 31540 }, { "action_loss": 0.003466070629656315, "epoch": 28.363309352517987, "step": 31540 }, { "epoch": 28.37230215827338, "grad_norm": 0.21980948746204376, "learning_rate": 4.986496679570283e-05, "loss": 0.0062, "step": 31550 }, { "action_loss": 0.00454751355573535, "epoch": 28.37230215827338, "step": 31550 }, { "epoch": 28.381294964028775, "grad_norm": 0.22937938570976257, "learning_rate": 4.983740908781105e-05, "loss": 0.0074, "step": 31560 }, { "action_loss": 0.0028462449554353952, "epoch": 28.381294964028775, "step": 31560 }, { "epoch": 28.390287769784173, "grad_norm": 0.14397893846035004, "learning_rate": 4.9809851429310116e-05, "loss": 0.0043, "step": 31570 }, { "action_loss": 0.004955565091222525, "epoch": 28.390287769784173, "step": 31570 }, { "epoch": 28.399280575539567, "grad_norm": 0.1504996120929718, "learning_rate": 4.9782293828571275e-05, "loss": 0.0071, "step": 31580 }, { "action_loss": 0.0036843784619122744, "epoch": 28.399280575539567, "step": 31580 }, { "epoch": 28.408273381294965, "grad_norm": 0.1248478889465332, "learning_rate": 4.9754736293965846e-05, "loss": 0.0056, "step": 31590 }, { "action_loss": 0.005251800175756216, "epoch": 28.408273381294965, "step": 31590 }, { "epoch": 28.41726618705036, "grad_norm": 0.12368396669626236, "learning_rate": 4.972717883386502e-05, "loss": 0.0072, "step": 31600 }, { "action_loss": 0.0014718115562573075, "epoch": 28.41726618705036, "step": 31600 }, { "epoch": 28.426258992805757, "grad_norm": 0.1424037367105484, "learning_rate": 4.9699621456640075e-05, "loss": 0.0058, "step": 31610 }, { "action_loss": 0.0020361600909382105, "epoch": 28.426258992805757, "step": 31610 }, { "epoch": 28.43525179856115, "grad_norm": 0.14032918214797974, "learning_rate": 4.9672064170662214e-05, "loss": 0.004, "step": 31620 }, { "action_loss": 0.001975604332983494, "epoch": 28.43525179856115, "step": 31620 }, { "epoch": 28.444244604316548, "grad_norm": 0.22725433111190796, "learning_rate": 4.9644506984302583e-05, "loss": 0.0103, "step": 31630 }, { "action_loss": 0.003394604893401265, "epoch": 28.444244604316548, "step": 31630 }, { "epoch": 28.453237410071942, "grad_norm": 0.1900215595960617, "learning_rate": 4.9616949905932356e-05, "loss": 0.006, "step": 31640 }, { "action_loss": 0.008065402507781982, "epoch": 28.453237410071942, "step": 31640 }, { "epoch": 28.46223021582734, "grad_norm": 0.12904302775859833, "learning_rate": 4.9589392943922615e-05, "loss": 0.0062, "step": 31650 }, { "action_loss": 0.0033481514547020197, "epoch": 28.46223021582734, "step": 31650 }, { "epoch": 28.471223021582734, "grad_norm": 0.16254600882530212, "learning_rate": 4.956183610664447e-05, "loss": 0.0067, "step": 31660 }, { "action_loss": 0.0052274116314947605, "epoch": 28.471223021582734, "step": 31660 }, { "epoch": 28.480215827338128, "grad_norm": 0.11787531524896622, "learning_rate": 4.9534279402468945e-05, "loss": 0.0048, "step": 31670 }, { "action_loss": 0.012314923107624054, "epoch": 28.480215827338128, "step": 31670 }, { "epoch": 28.489208633093526, "grad_norm": 0.1623777598142624, "learning_rate": 4.9506722839767036e-05, "loss": 0.005, "step": 31680 }, { "action_loss": 0.007082457188516855, "epoch": 28.489208633093526, "step": 31680 }, { "epoch": 28.49820143884892, "grad_norm": 0.20687924325466156, "learning_rate": 4.947916642690972e-05, "loss": 0.0123, "step": 31690 }, { "action_loss": 0.004385590553283691, "epoch": 28.49820143884892, "step": 31690 }, { "epoch": 28.507194244604317, "grad_norm": 0.12048545479774475, "learning_rate": 4.9451610172267874e-05, "loss": 0.0035, "step": 31700 }, { "action_loss": 0.009135883301496506, "epoch": 28.507194244604317, "step": 31700 }, { "epoch": 28.51618705035971, "grad_norm": 0.23100003600120544, "learning_rate": 4.9424054084212376e-05, "loss": 0.0049, "step": 31710 }, { "action_loss": 0.005126160103827715, "epoch": 28.51618705035971, "step": 31710 }, { "epoch": 28.52517985611511, "grad_norm": 0.08990144729614258, "learning_rate": 4.939649817111407e-05, "loss": 0.0052, "step": 31720 }, { "action_loss": 0.00529879005625844, "epoch": 28.52517985611511, "step": 31720 }, { "epoch": 28.534172661870503, "grad_norm": 0.17509596049785614, "learning_rate": 4.936894244134365e-05, "loss": 0.0057, "step": 31730 }, { "action_loss": 0.0066828192211687565, "epoch": 28.534172661870503, "step": 31730 }, { "epoch": 28.5431654676259, "grad_norm": 0.16966210305690765, "learning_rate": 4.9341386903271886e-05, "loss": 0.0042, "step": 31740 }, { "action_loss": 0.01041239034384489, "epoch": 28.5431654676259, "step": 31740 }, { "epoch": 28.552158273381295, "grad_norm": 0.18303976953029633, "learning_rate": 4.931383156526936e-05, "loss": 0.0092, "step": 31750 }, { "action_loss": 0.002426518127322197, "epoch": 28.552158273381295, "step": 31750 }, { "epoch": 28.56115107913669, "grad_norm": 0.1842227578163147, "learning_rate": 4.92862764357067e-05, "loss": 0.0067, "step": 31760 }, { "action_loss": 0.0018286282429471612, "epoch": 28.56115107913669, "step": 31760 }, { "epoch": 28.570143884892087, "grad_norm": 0.10304879397153854, "learning_rate": 4.925872152295443e-05, "loss": 0.0038, "step": 31770 }, { "action_loss": 0.0025874627754092216, "epoch": 28.570143884892087, "step": 31770 }, { "epoch": 28.57913669064748, "grad_norm": 0.13716216385364532, "learning_rate": 4.923116683538296e-05, "loss": 0.0056, "step": 31780 }, { "action_loss": 0.005116531625390053, "epoch": 28.57913669064748, "step": 31780 }, { "epoch": 28.58812949640288, "grad_norm": 0.1269758939743042, "learning_rate": 4.920361238136273e-05, "loss": 0.0062, "step": 31790 }, { "action_loss": 0.002122503239661455, "epoch": 28.58812949640288, "step": 31790 }, { "epoch": 28.597122302158272, "grad_norm": 0.1636357605457306, "learning_rate": 4.9176058169264014e-05, "loss": 0.0069, "step": 31800 }, { "action_loss": 0.007228087168186903, "epoch": 28.597122302158272, "step": 31800 }, { "epoch": 28.60611510791367, "grad_norm": 0.16548170149326324, "learning_rate": 4.9148504207457074e-05, "loss": 0.0072, "step": 31810 }, { "action_loss": 0.016261501237750053, "epoch": 28.60611510791367, "step": 31810 }, { "epoch": 28.615107913669064, "grad_norm": 0.14362561702728271, "learning_rate": 4.912095050431208e-05, "loss": 0.007, "step": 31820 }, { "action_loss": 0.0023802309297025204, "epoch": 28.615107913669064, "step": 31820 }, { "epoch": 28.62410071942446, "grad_norm": 0.16250573098659515, "learning_rate": 4.909339706819911e-05, "loss": 0.0063, "step": 31830 }, { "action_loss": 0.004693806171417236, "epoch": 28.62410071942446, "step": 31830 }, { "epoch": 28.633093525179856, "grad_norm": 0.13649949431419373, "learning_rate": 4.906584390748819e-05, "loss": 0.0058, "step": 31840 }, { "action_loss": 0.009752485901117325, "epoch": 28.633093525179856, "step": 31840 }, { "epoch": 28.642086330935253, "grad_norm": 0.20415781438350677, "learning_rate": 4.9038291030549195e-05, "loss": 0.0129, "step": 31850 }, { "action_loss": 0.006244605872780085, "epoch": 28.642086330935253, "step": 31850 }, { "epoch": 28.651079136690647, "grad_norm": 0.16882699728012085, "learning_rate": 4.9010738445751995e-05, "loss": 0.0049, "step": 31860 }, { "action_loss": 0.009628755040466785, "epoch": 28.651079136690647, "step": 31860 }, { "epoch": 28.66007194244604, "grad_norm": 0.12706923484802246, "learning_rate": 4.8983186161466364e-05, "loss": 0.0038, "step": 31870 }, { "action_loss": 0.006188192870467901, "epoch": 28.66007194244604, "step": 31870 }, { "epoch": 28.66906474820144, "grad_norm": 0.16827301681041718, "learning_rate": 4.89556341860619e-05, "loss": 0.0052, "step": 31880 }, { "action_loss": 0.0023276382125914097, "epoch": 28.66906474820144, "step": 31880 }, { "epoch": 28.678057553956833, "grad_norm": 0.20643648505210876, "learning_rate": 4.892808252790822e-05, "loss": 0.0066, "step": 31890 }, { "action_loss": 0.0023188749328255653, "epoch": 28.678057553956833, "step": 31890 }, { "epoch": 28.68705035971223, "grad_norm": 0.14823424816131592, "learning_rate": 4.890053119537475e-05, "loss": 0.0046, "step": 31900 }, { "action_loss": 0.014143206179141998, "epoch": 28.68705035971223, "step": 31900 }, { "epoch": 28.696043165467625, "grad_norm": 0.11403631418943405, "learning_rate": 4.887298019683087e-05, "loss": 0.0073, "step": 31910 }, { "action_loss": 0.0430288203060627, "epoch": 28.696043165467625, "step": 31910 }, { "epoch": 28.705035971223023, "grad_norm": 0.16127648949623108, "learning_rate": 4.884542954064587e-05, "loss": 0.0086, "step": 31920 }, { "action_loss": 0.00933394767343998, "epoch": 28.705035971223023, "step": 31920 }, { "epoch": 28.714028776978417, "grad_norm": 0.19123728573322296, "learning_rate": 4.881787923518887e-05, "loss": 0.0067, "step": 31930 }, { "action_loss": 0.001920990995131433, "epoch": 28.714028776978417, "step": 31930 }, { "epoch": 28.723021582733814, "grad_norm": 0.19034357368946075, "learning_rate": 4.879032928882896e-05, "loss": 0.0045, "step": 31940 }, { "action_loss": 0.007877820171415806, "epoch": 28.723021582733814, "step": 31940 }, { "epoch": 28.73201438848921, "grad_norm": 0.11420286446809769, "learning_rate": 4.876277970993505e-05, "loss": 0.0046, "step": 31950 }, { "action_loss": 0.00718873692676425, "epoch": 28.73201438848921, "step": 31950 }, { "epoch": 28.741007194244606, "grad_norm": 0.13355891406536102, "learning_rate": 4.873523050687602e-05, "loss": 0.0057, "step": 31960 }, { "action_loss": 0.0028886247891932726, "epoch": 28.741007194244606, "step": 31960 }, { "epoch": 28.75, "grad_norm": 0.10466490685939789, "learning_rate": 4.870768168802056e-05, "loss": 0.0041, "step": 31970 }, { "action_loss": 0.005370657425373793, "epoch": 28.75, "step": 31970 }, { "epoch": 28.758992805755394, "grad_norm": 0.10963890701532364, "learning_rate": 4.868013326173728e-05, "loss": 0.0055, "step": 31980 }, { "action_loss": 0.0038489194121211767, "epoch": 28.758992805755394, "step": 31980 }, { "epoch": 28.76798561151079, "grad_norm": 0.1509462147951126, "learning_rate": 4.865258523639468e-05, "loss": 0.0057, "step": 31990 }, { "action_loss": 0.0067045134492218494, "epoch": 28.76798561151079, "step": 31990 }, { "epoch": 28.776978417266186, "grad_norm": 0.13335682451725006, "learning_rate": 4.862503762036109e-05, "loss": 0.0038, "step": 32000 }, { "action_loss": 0.00483557628467679, "epoch": 28.776978417266186, "step": 32000 }, { "epoch": 28.785971223021583, "grad_norm": 0.13828125596046448, "learning_rate": 4.859749042200478e-05, "loss": 0.0044, "step": 32010 }, { "action_loss": 0.0030418161768466234, "epoch": 28.785971223021583, "step": 32010 }, { "epoch": 28.794964028776977, "grad_norm": 0.08789143711328506, "learning_rate": 4.856994364969384e-05, "loss": 0.0044, "step": 32020 }, { "action_loss": 0.002371652750298381, "epoch": 28.794964028776977, "step": 32020 }, { "epoch": 28.803956834532375, "grad_norm": 0.08657830953598022, "learning_rate": 4.854239731179625e-05, "loss": 0.0025, "step": 32030 }, { "action_loss": 0.0024758458603173494, "epoch": 28.803956834532375, "step": 32030 }, { "epoch": 28.81294964028777, "grad_norm": 0.11449398100376129, "learning_rate": 4.85148514166799e-05, "loss": 0.0037, "step": 32040 }, { "action_loss": 0.0038709405343979597, "epoch": 28.81294964028777, "step": 32040 }, { "epoch": 28.821942446043167, "grad_norm": 0.10887183248996735, "learning_rate": 4.8487305972712456e-05, "loss": 0.0039, "step": 32050 }, { "action_loss": 0.005988209042698145, "epoch": 28.821942446043167, "step": 32050 }, { "epoch": 28.83093525179856, "grad_norm": 0.20213760435581207, "learning_rate": 4.8459760988261526e-05, "loss": 0.0037, "step": 32060 }, { "action_loss": 0.005463588982820511, "epoch": 28.83093525179856, "step": 32060 }, { "epoch": 28.83992805755396, "grad_norm": 0.15354520082473755, "learning_rate": 4.843221647169453e-05, "loss": 0.0062, "step": 32070 }, { "action_loss": 0.005617031827569008, "epoch": 28.83992805755396, "step": 32070 }, { "epoch": 28.848920863309353, "grad_norm": 0.11473777890205383, "learning_rate": 4.840467243137878e-05, "loss": 0.0061, "step": 32080 }, { "action_loss": 0.002520590089261532, "epoch": 28.848920863309353, "step": 32080 }, { "epoch": 28.857913669064747, "grad_norm": 0.20399178564548492, "learning_rate": 4.837712887568143e-05, "loss": 0.0056, "step": 32090 }, { "action_loss": 0.0031152989249676466, "epoch": 28.857913669064747, "step": 32090 }, { "epoch": 28.866906474820144, "grad_norm": 0.18000325560569763, "learning_rate": 4.8349585812969464e-05, "loss": 0.0056, "step": 32100 }, { "action_loss": 0.006461186800152063, "epoch": 28.866906474820144, "step": 32100 }, { "epoch": 28.87589928057554, "grad_norm": 0.12558236718177795, "learning_rate": 4.8322043251609775e-05, "loss": 0.0046, "step": 32110 }, { "action_loss": 0.008622649125754833, "epoch": 28.87589928057554, "step": 32110 }, { "epoch": 28.884892086330936, "grad_norm": 0.12384388595819473, "learning_rate": 4.8294501199969015e-05, "loss": 0.0055, "step": 32120 }, { "action_loss": 0.00234884861856699, "epoch": 28.884892086330936, "step": 32120 }, { "epoch": 28.89388489208633, "grad_norm": 0.2544441819190979, "learning_rate": 4.826695966641376e-05, "loss": 0.0042, "step": 32130 }, { "action_loss": 0.004866607487201691, "epoch": 28.89388489208633, "step": 32130 }, { "epoch": 28.902877697841728, "grad_norm": 0.2425723820924759, "learning_rate": 4.823941865931043e-05, "loss": 0.0072, "step": 32140 }, { "action_loss": 0.00776498531922698, "epoch": 28.902877697841728, "step": 32140 }, { "epoch": 28.91187050359712, "grad_norm": 0.25034013390541077, "learning_rate": 4.82118781870252e-05, "loss": 0.0078, "step": 32150 }, { "action_loss": 0.004914560820907354, "epoch": 28.91187050359712, "step": 32150 }, { "epoch": 28.92086330935252, "grad_norm": 0.19836606085300446, "learning_rate": 4.8184338257924185e-05, "loss": 0.0052, "step": 32160 }, { "action_loss": 0.008160057477653027, "epoch": 28.92086330935252, "step": 32160 }, { "epoch": 28.929856115107913, "grad_norm": 0.1985417604446411, "learning_rate": 4.815679888037324e-05, "loss": 0.0068, "step": 32170 }, { "action_loss": 0.002495685825124383, "epoch": 28.929856115107913, "step": 32170 }, { "epoch": 28.93884892086331, "grad_norm": 0.17399702966213226, "learning_rate": 4.8129260062738135e-05, "loss": 0.0047, "step": 32180 }, { "action_loss": 0.0011280480539426208, "epoch": 28.93884892086331, "step": 32180 }, { "epoch": 28.947841726618705, "grad_norm": 0.22931566834449768, "learning_rate": 4.810172181338445e-05, "loss": 0.0067, "step": 32190 }, { "action_loss": 0.0048710256814956665, "epoch": 28.947841726618705, "step": 32190 }, { "epoch": 28.9568345323741, "grad_norm": 0.11346275359392166, "learning_rate": 4.807418414067753e-05, "loss": 0.0032, "step": 32200 }, { "action_loss": 0.0035219071432948112, "epoch": 28.9568345323741, "step": 32200 }, { "epoch": 28.965827338129497, "grad_norm": 0.07679864019155502, "learning_rate": 4.804664705298264e-05, "loss": 0.0072, "step": 32210 }, { "action_loss": 0.002622833475470543, "epoch": 28.965827338129497, "step": 32210 }, { "epoch": 28.97482014388489, "grad_norm": 0.2086927741765976, "learning_rate": 4.80191105586648e-05, "loss": 0.0043, "step": 32220 }, { "action_loss": 0.0025844078045338392, "epoch": 28.97482014388489, "step": 32220 }, { "epoch": 28.98381294964029, "grad_norm": 0.2102992832660675, "learning_rate": 4.799157466608886e-05, "loss": 0.0039, "step": 32230 }, { "action_loss": 0.005667116492986679, "epoch": 28.98381294964029, "step": 32230 }, { "epoch": 28.992805755395683, "grad_norm": 0.11899196356534958, "learning_rate": 4.796403938361951e-05, "loss": 0.0058, "step": 32240 }, { "action_loss": 0.004360164515674114, "epoch": 28.992805755395683, "step": 32240 }, { "epoch": 29.00179856115108, "grad_norm": 0.14352302253246307, "learning_rate": 4.793650471962123e-05, "loss": 0.0036, "step": 32250 }, { "action_loss": 0.006091163959354162, "epoch": 29.00179856115108, "step": 32250 }, { "epoch": 29.010791366906474, "grad_norm": 0.13490796089172363, "learning_rate": 4.790897068245835e-05, "loss": 0.0049, "step": 32260 }, { "action_loss": 0.002703597769141197, "epoch": 29.010791366906474, "step": 32260 }, { "epoch": 29.019784172661872, "grad_norm": 0.1450120061635971, "learning_rate": 4.7881437280494954e-05, "loss": 0.0068, "step": 32270 }, { "action_loss": 0.003619243623688817, "epoch": 29.019784172661872, "step": 32270 }, { "epoch": 29.028776978417266, "grad_norm": 0.137419193983078, "learning_rate": 4.7853904522094965e-05, "loss": 0.005, "step": 32280 }, { "action_loss": 0.0033396256621927023, "epoch": 29.028776978417266, "step": 32280 }, { "epoch": 29.037769784172664, "grad_norm": 0.09546667337417603, "learning_rate": 4.782637241562215e-05, "loss": 0.0058, "step": 32290 }, { "action_loss": 0.001468741917051375, "epoch": 29.037769784172664, "step": 32290 }, { "epoch": 29.046762589928058, "grad_norm": 0.1665782630443573, "learning_rate": 4.779884096943997e-05, "loss": 0.0052, "step": 32300 }, { "action_loss": 0.003889539511874318, "epoch": 29.046762589928058, "step": 32300 }, { "epoch": 29.055755395683452, "grad_norm": 0.20494875311851501, "learning_rate": 4.777131019191182e-05, "loss": 0.0108, "step": 32310 }, { "action_loss": 0.0059480369091033936, "epoch": 29.055755395683452, "step": 32310 }, { "epoch": 29.06474820143885, "grad_norm": 0.23383566737174988, "learning_rate": 4.774378009140076e-05, "loss": 0.0054, "step": 32320 }, { "action_loss": 0.007574106100946665, "epoch": 29.06474820143885, "step": 32320 }, { "epoch": 29.073741007194243, "grad_norm": 0.17554450035095215, "learning_rate": 4.7716250676269735e-05, "loss": 0.0046, "step": 32330 }, { "action_loss": 0.0026915802154690027, "epoch": 29.073741007194243, "step": 32330 }, { "epoch": 29.08273381294964, "grad_norm": 0.14951825141906738, "learning_rate": 4.7688721954881485e-05, "loss": 0.0042, "step": 32340 }, { "action_loss": 0.002677908865734935, "epoch": 29.08273381294964, "step": 32340 }, { "epoch": 29.091726618705035, "grad_norm": 0.15005648136138916, "learning_rate": 4.7661193935598446e-05, "loss": 0.0099, "step": 32350 }, { "action_loss": 0.01248880848288536, "epoch": 29.091726618705035, "step": 32350 }, { "epoch": 29.100719424460433, "grad_norm": 0.11922179907560349, "learning_rate": 4.763366662678296e-05, "loss": 0.0068, "step": 32360 }, { "action_loss": 0.006066882982850075, "epoch": 29.100719424460433, "step": 32360 }, { "epoch": 29.109712230215827, "grad_norm": 0.24741266667842865, "learning_rate": 4.7606140036797064e-05, "loss": 0.0071, "step": 32370 }, { "action_loss": 0.0018846277380362153, "epoch": 29.109712230215827, "step": 32370 }, { "epoch": 29.118705035971225, "grad_norm": 0.1329672485589981, "learning_rate": 4.7578614174002614e-05, "loss": 0.0039, "step": 32380 }, { "action_loss": 0.005050967913120985, "epoch": 29.118705035971225, "step": 32380 }, { "epoch": 29.12769784172662, "grad_norm": 0.20193469524383545, "learning_rate": 4.755108904676125e-05, "loss": 0.0064, "step": 32390 }, { "action_loss": 0.0033319033682346344, "epoch": 29.12769784172662, "step": 32390 }, { "epoch": 29.136690647482013, "grad_norm": 0.18373920023441315, "learning_rate": 4.752356466343436e-05, "loss": 0.0089, "step": 32400 }, { "action_loss": 0.002939510392025113, "epoch": 29.136690647482013, "step": 32400 }, { "epoch": 29.14568345323741, "grad_norm": 0.17677603662014008, "learning_rate": 4.7496041032383174e-05, "loss": 0.004, "step": 32410 }, { "action_loss": 0.0024766046553850174, "epoch": 29.14568345323741, "step": 32410 }, { "epoch": 29.154676258992804, "grad_norm": 0.13584545254707336, "learning_rate": 4.746851816196858e-05, "loss": 0.0044, "step": 32420 }, { "action_loss": 0.002666155807673931, "epoch": 29.154676258992804, "step": 32420 }, { "epoch": 29.163669064748202, "grad_norm": 0.23270048201084137, "learning_rate": 4.744099606055135e-05, "loss": 0.005, "step": 32430 }, { "action_loss": 0.013638593256473541, "epoch": 29.163669064748202, "step": 32430 }, { "epoch": 29.172661870503596, "grad_norm": 0.1780572235584259, "learning_rate": 4.741347473649193e-05, "loss": 0.0064, "step": 32440 }, { "action_loss": 0.012159675359725952, "epoch": 29.172661870503596, "step": 32440 }, { "epoch": 29.181654676258994, "grad_norm": 0.0921807587146759, "learning_rate": 4.738595419815058e-05, "loss": 0.0041, "step": 32450 }, { "action_loss": 0.014057359658181667, "epoch": 29.181654676258994, "step": 32450 }, { "epoch": 29.190647482014388, "grad_norm": 0.21221671998500824, "learning_rate": 4.7358434453887365e-05, "loss": 0.0074, "step": 32460 }, { "action_loss": 0.004314637277275324, "epoch": 29.190647482014388, "step": 32460 }, { "epoch": 29.199640287769785, "grad_norm": 0.17270101606845856, "learning_rate": 4.7330915512061976e-05, "loss": 0.006, "step": 32470 }, { "action_loss": 0.002792035462334752, "epoch": 29.199640287769785, "step": 32470 }, { "epoch": 29.20863309352518, "grad_norm": 0.10254539549350739, "learning_rate": 4.730339738103402e-05, "loss": 0.0059, "step": 32480 }, { "action_loss": 0.003595242975279689, "epoch": 29.20863309352518, "step": 32480 }, { "epoch": 29.217625899280577, "grad_norm": 0.17853474617004395, "learning_rate": 4.727588006916271e-05, "loss": 0.0057, "step": 32490 }, { "action_loss": 0.0010226123267784715, "epoch": 29.217625899280577, "step": 32490 }, { "epoch": 29.22661870503597, "grad_norm": 0.13054420053958893, "learning_rate": 4.724836358480711e-05, "loss": 0.0037, "step": 32500 }, { "action_loss": 0.009098289534449577, "epoch": 29.22661870503597, "step": 32500 }, { "epoch": 29.235611510791365, "grad_norm": 0.20593656599521637, "learning_rate": 4.722084793632601e-05, "loss": 0.0065, "step": 32510 }, { "action_loss": 0.003791986731812358, "epoch": 29.235611510791365, "step": 32510 }, { "epoch": 29.244604316546763, "grad_norm": 0.1758202612400055, "learning_rate": 4.719333313207792e-05, "loss": 0.0072, "step": 32520 }, { "action_loss": 0.00425088033080101, "epoch": 29.244604316546763, "step": 32520 }, { "epoch": 29.253597122302157, "grad_norm": 0.24324503540992737, "learning_rate": 4.716581918042114e-05, "loss": 0.0062, "step": 32530 }, { "action_loss": 0.01667129434645176, "epoch": 29.253597122302157, "step": 32530 }, { "epoch": 29.262589928057555, "grad_norm": 0.10840892791748047, "learning_rate": 4.7138306089713636e-05, "loss": 0.0046, "step": 32540 }, { "action_loss": 0.0011408604914322495, "epoch": 29.262589928057555, "step": 32540 }, { "epoch": 29.27158273381295, "grad_norm": 0.125026673078537, "learning_rate": 4.7110793868313183e-05, "loss": 0.004, "step": 32550 }, { "action_loss": 0.005044683814048767, "epoch": 29.27158273381295, "step": 32550 }, { "epoch": 29.280575539568346, "grad_norm": 0.18036343157291412, "learning_rate": 4.708328252457729e-05, "loss": 0.0065, "step": 32560 }, { "action_loss": 0.008093138225376606, "epoch": 29.280575539568346, "step": 32560 }, { "epoch": 29.28956834532374, "grad_norm": 0.13513024151325226, "learning_rate": 4.7055772066863135e-05, "loss": 0.0096, "step": 32570 }, { "action_loss": 0.006257271859794855, "epoch": 29.28956834532374, "step": 32570 }, { "epoch": 29.298561151079138, "grad_norm": 0.19018171727657318, "learning_rate": 4.702826250352771e-05, "loss": 0.0035, "step": 32580 }, { "action_loss": 0.0033388163428753614, "epoch": 29.298561151079138, "step": 32580 }, { "epoch": 29.307553956834532, "grad_norm": 0.08861023932695389, "learning_rate": 4.7000753842927653e-05, "loss": 0.0044, "step": 32590 }, { "action_loss": 0.002465900033712387, "epoch": 29.307553956834532, "step": 32590 }, { "epoch": 29.31654676258993, "grad_norm": 0.14386162161827087, "learning_rate": 4.6973246093419384e-05, "loss": 0.0058, "step": 32600 }, { "action_loss": 0.0032784126233309507, "epoch": 29.31654676258993, "step": 32600 }, { "epoch": 29.325539568345324, "grad_norm": 0.12887997925281525, "learning_rate": 4.694573926335906e-05, "loss": 0.0073, "step": 32610 }, { "action_loss": 0.004360506311058998, "epoch": 29.325539568345324, "step": 32610 }, { "epoch": 29.334532374100718, "grad_norm": 0.16825072467327118, "learning_rate": 4.6918233361102476e-05, "loss": 0.0049, "step": 32620 }, { "action_loss": 0.004111437126994133, "epoch": 29.334532374100718, "step": 32620 }, { "epoch": 29.343525179856115, "grad_norm": 0.17656749486923218, "learning_rate": 4.689072839500525e-05, "loss": 0.0058, "step": 32630 }, { "action_loss": 0.0035258110146969557, "epoch": 29.343525179856115, "step": 32630 }, { "epoch": 29.35251798561151, "grad_norm": 0.15129591524600983, "learning_rate": 4.6863224373422635e-05, "loss": 0.0046, "step": 32640 }, { "action_loss": 0.009141707792878151, "epoch": 29.35251798561151, "step": 32640 }, { "epoch": 29.361510791366907, "grad_norm": 0.09821996092796326, "learning_rate": 4.683572130470962e-05, "loss": 0.0042, "step": 32650 }, { "action_loss": 0.00592675618827343, "epoch": 29.361510791366907, "step": 32650 }, { "epoch": 29.3705035971223, "grad_norm": 0.2196304053068161, "learning_rate": 4.680821919722094e-05, "loss": 0.009, "step": 32660 }, { "action_loss": 0.006477812770754099, "epoch": 29.3705035971223, "step": 32660 }, { "epoch": 29.3794964028777, "grad_norm": 0.18127068877220154, "learning_rate": 4.6780718059310975e-05, "loss": 0.0053, "step": 32670 }, { "action_loss": 0.0025407106149941683, "epoch": 29.3794964028777, "step": 32670 }, { "epoch": 29.388489208633093, "grad_norm": 0.20814579725265503, "learning_rate": 4.675321789933389e-05, "loss": 0.0044, "step": 32680 }, { "action_loss": 0.0030664626974612474, "epoch": 29.388489208633093, "step": 32680 }, { "epoch": 29.39748201438849, "grad_norm": 0.1488213986158371, "learning_rate": 4.6725718725643464e-05, "loss": 0.0038, "step": 32690 }, { "action_loss": 0.022811023518443108, "epoch": 29.39748201438849, "step": 32690 }, { "epoch": 29.406474820143885, "grad_norm": 0.14591176807880402, "learning_rate": 4.669822054659323e-05, "loss": 0.0078, "step": 32700 }, { "action_loss": 0.008157234638929367, "epoch": 29.406474820143885, "step": 32700 }, { "epoch": 29.415467625899282, "grad_norm": 0.15245233476161957, "learning_rate": 4.667072337053644e-05, "loss": 0.0061, "step": 32710 }, { "action_loss": 0.008450617082417011, "epoch": 29.415467625899282, "step": 32710 }, { "epoch": 29.424460431654676, "grad_norm": 0.14472149312496185, "learning_rate": 4.6643227205825965e-05, "loss": 0.0058, "step": 32720 }, { "action_loss": 0.0026622151490300894, "epoch": 29.424460431654676, "step": 32720 }, { "epoch": 29.43345323741007, "grad_norm": 0.175339937210083, "learning_rate": 4.6615732060814454e-05, "loss": 0.0048, "step": 32730 }, { "action_loss": 0.00251362775452435, "epoch": 29.43345323741007, "step": 32730 }, { "epoch": 29.442446043165468, "grad_norm": 0.13119733333587646, "learning_rate": 4.658823794385417e-05, "loss": 0.0051, "step": 32740 }, { "action_loss": 0.009929952211678028, "epoch": 29.442446043165468, "step": 32740 }, { "epoch": 29.451438848920862, "grad_norm": 0.12766233086585999, "learning_rate": 4.6560744863297115e-05, "loss": 0.0074, "step": 32750 }, { "action_loss": 0.0038688620552420616, "epoch": 29.451438848920862, "step": 32750 }, { "epoch": 29.46043165467626, "grad_norm": 0.20430266857147217, "learning_rate": 4.653325282749498e-05, "loss": 0.0067, "step": 32760 }, { "action_loss": 0.007418098393827677, "epoch": 29.46043165467626, "step": 32760 }, { "epoch": 29.469424460431654, "grad_norm": 0.25871381163597107, "learning_rate": 4.6505761844799075e-05, "loss": 0.0065, "step": 32770 }, { "action_loss": 0.0029265638440847397, "epoch": 29.469424460431654, "step": 32770 }, { "epoch": 29.47841726618705, "grad_norm": 0.24526555836200714, "learning_rate": 4.647827192356048e-05, "loss": 0.006, "step": 32780 }, { "action_loss": 0.009160465560853481, "epoch": 29.47841726618705, "step": 32780 }, { "epoch": 29.487410071942445, "grad_norm": 0.23500317335128784, "learning_rate": 4.645078307212989e-05, "loss": 0.0053, "step": 32790 }, { "action_loss": 0.003560426877811551, "epoch": 29.487410071942445, "step": 32790 }, { "epoch": 29.496402877697843, "grad_norm": 0.15407826006412506, "learning_rate": 4.642329529885768e-05, "loss": 0.0048, "step": 32800 }, { "action_loss": 0.00364125519990921, "epoch": 29.496402877697843, "step": 32800 }, { "epoch": 29.505395683453237, "grad_norm": 0.2830604612827301, "learning_rate": 4.639580861209393e-05, "loss": 0.006, "step": 32810 }, { "action_loss": 0.003535536816343665, "epoch": 29.505395683453237, "step": 32810 }, { "epoch": 29.514388489208635, "grad_norm": 0.20695599913597107, "learning_rate": 4.636832302018835e-05, "loss": 0.0075, "step": 32820 }, { "action_loss": 0.006181629840284586, "epoch": 29.514388489208635, "step": 32820 }, { "epoch": 29.52338129496403, "grad_norm": 0.15834563970565796, "learning_rate": 4.6340838531490365e-05, "loss": 0.0044, "step": 32830 }, { "action_loss": 0.0013466986129060388, "epoch": 29.52338129496403, "step": 32830 }, { "epoch": 29.532374100719423, "grad_norm": 0.13053330779075623, "learning_rate": 4.6313355154349e-05, "loss": 0.0038, "step": 32840 }, { "action_loss": 0.003684710944071412, "epoch": 29.532374100719423, "step": 32840 }, { "epoch": 29.54136690647482, "grad_norm": 0.1494014710187912, "learning_rate": 4.6285872897113025e-05, "loss": 0.0044, "step": 32850 }, { "action_loss": 0.020409563556313515, "epoch": 29.54136690647482, "step": 32850 }, { "epoch": 29.550359712230215, "grad_norm": 0.11919257789850235, "learning_rate": 4.625839176813077e-05, "loss": 0.004, "step": 32860 }, { "action_loss": 0.0038842912763357162, "epoch": 29.550359712230215, "step": 32860 }, { "epoch": 29.559352517985612, "grad_norm": 0.18896035850048065, "learning_rate": 4.623091177575031e-05, "loss": 0.0052, "step": 32870 }, { "action_loss": 0.0034875720739364624, "epoch": 29.559352517985612, "step": 32870 }, { "epoch": 29.568345323741006, "grad_norm": 0.20706243813037872, "learning_rate": 4.620343292831936e-05, "loss": 0.0043, "step": 32880 }, { "action_loss": 0.002823899732902646, "epoch": 29.568345323741006, "step": 32880 }, { "epoch": 29.577338129496404, "grad_norm": 0.15009726583957672, "learning_rate": 4.6175955234185206e-05, "loss": 0.0052, "step": 32890 }, { "action_loss": 0.002438480732962489, "epoch": 29.577338129496404, "step": 32890 }, { "epoch": 29.586330935251798, "grad_norm": 0.3469263017177582, "learning_rate": 4.614847870169492e-05, "loss": 0.0037, "step": 32900 }, { "action_loss": 0.006195411551743746, "epoch": 29.586330935251798, "step": 32900 }, { "epoch": 29.595323741007196, "grad_norm": 0.12735971808433533, "learning_rate": 4.612100333919509e-05, "loss": 0.0048, "step": 32910 }, { "action_loss": 0.0025785774923861027, "epoch": 29.595323741007196, "step": 32910 }, { "epoch": 29.60431654676259, "grad_norm": 0.15243011713027954, "learning_rate": 4.609352915503202e-05, "loss": 0.0065, "step": 32920 }, { "action_loss": 0.0026785435620695353, "epoch": 29.60431654676259, "step": 32920 }, { "epoch": 29.613309352517987, "grad_norm": 0.15500785410404205, "learning_rate": 4.606605615755166e-05, "loss": 0.0046, "step": 32930 }, { "action_loss": 0.01941402442753315, "epoch": 29.613309352517987, "step": 32930 }, { "epoch": 29.62230215827338, "grad_norm": 0.18453441560268402, "learning_rate": 4.6038584355099576e-05, "loss": 0.0076, "step": 32940 }, { "action_loss": 0.0044769481755793095, "epoch": 29.62230215827338, "step": 32940 }, { "epoch": 29.631294964028775, "grad_norm": 0.12859943509101868, "learning_rate": 4.6011113756020964e-05, "loss": 0.0048, "step": 32950 }, { "action_loss": 0.024794066324830055, "epoch": 29.631294964028775, "step": 32950 }, { "epoch": 29.640287769784173, "grad_norm": 0.11993979662656784, "learning_rate": 4.598364436866066e-05, "loss": 0.0075, "step": 32960 }, { "action_loss": 0.01890515349805355, "epoch": 29.640287769784173, "step": 32960 }, { "epoch": 29.649280575539567, "grad_norm": 0.18807344138622284, "learning_rate": 4.595617620136316e-05, "loss": 0.0074, "step": 32970 }, { "action_loss": 0.0045926631428301334, "epoch": 29.649280575539567, "step": 32970 }, { "epoch": 29.658273381294965, "grad_norm": 0.27023881673812866, "learning_rate": 4.592870926247257e-05, "loss": 0.0057, "step": 32980 }, { "action_loss": 0.004909850656986237, "epoch": 29.658273381294965, "step": 32980 }, { "epoch": 29.66726618705036, "grad_norm": 0.1719294786453247, "learning_rate": 4.5901243560332594e-05, "loss": 0.006, "step": 32990 }, { "action_loss": 0.00241340440697968, "epoch": 29.66726618705036, "step": 32990 }, { "epoch": 29.676258992805757, "grad_norm": 0.21460272371768951, "learning_rate": 4.587377910328662e-05, "loss": 0.0057, "step": 33000 }, { "action_loss": 0.0018977097934111953, "epoch": 29.676258992805757, "step": 33000 }, { "epoch": 29.68525179856115, "grad_norm": 0.16347485780715942, "learning_rate": 4.5846315899677586e-05, "loss": 0.0036, "step": 33010 }, { "action_loss": 0.0023391626309603453, "epoch": 29.68525179856115, "step": 33010 }, { "epoch": 29.694244604316548, "grad_norm": 0.19701649248600006, "learning_rate": 4.5818853957848114e-05, "loss": 0.0061, "step": 33020 }, { "action_loss": 0.005644577089697123, "epoch": 29.694244604316548, "step": 33020 }, { "epoch": 29.703237410071942, "grad_norm": 0.13295628130435944, "learning_rate": 4.579139328614043e-05, "loss": 0.0046, "step": 33030 }, { "action_loss": 0.011600449681282043, "epoch": 29.703237410071942, "step": 33030 }, { "epoch": 29.71223021582734, "grad_norm": 0.13275246322155, "learning_rate": 4.576393389289633e-05, "loss": 0.0051, "step": 33040 }, { "action_loss": 0.004648147150874138, "epoch": 29.71223021582734, "step": 33040 }, { "epoch": 29.721223021582734, "grad_norm": 0.1524185836315155, "learning_rate": 4.573647578645728e-05, "loss": 0.0074, "step": 33050 }, { "action_loss": 0.002649415284395218, "epoch": 29.721223021582734, "step": 33050 }, { "epoch": 29.730215827338128, "grad_norm": 0.12776760756969452, "learning_rate": 4.57090189751643e-05, "loss": 0.0034, "step": 33060 }, { "action_loss": 0.007255095988512039, "epoch": 29.730215827338128, "step": 33060 }, { "epoch": 29.739208633093526, "grad_norm": 0.18902458250522614, "learning_rate": 4.568156346735806e-05, "loss": 0.0059, "step": 33070 }, { "action_loss": 0.0025254145730286837, "epoch": 29.739208633093526, "step": 33070 }, { "epoch": 29.74820143884892, "grad_norm": 0.10829278081655502, "learning_rate": 4.565410927137882e-05, "loss": 0.0045, "step": 33080 }, { "action_loss": 0.0019942522048950195, "epoch": 29.74820143884892, "step": 33080 }, { "epoch": 29.757194244604317, "grad_norm": 0.14621339738368988, "learning_rate": 4.562665639556644e-05, "loss": 0.0063, "step": 33090 }, { "action_loss": 0.0032634595409035683, "epoch": 29.757194244604317, "step": 33090 }, { "epoch": 29.76618705035971, "grad_norm": 0.1862104833126068, "learning_rate": 4.559920484826037e-05, "loss": 0.0048, "step": 33100 }, { "action_loss": 0.0018550591776147485, "epoch": 29.76618705035971, "step": 33100 }, { "epoch": 29.77517985611511, "grad_norm": 0.18448631465435028, "learning_rate": 4.5571754637799665e-05, "loss": 0.0052, "step": 33110 }, { "action_loss": 0.009179041720926762, "epoch": 29.77517985611511, "step": 33110 }, { "epoch": 29.784172661870503, "grad_norm": 0.21595709025859833, "learning_rate": 4.554430577252298e-05, "loss": 0.0041, "step": 33120 }, { "action_loss": 0.0035345538053661585, "epoch": 29.784172661870503, "step": 33120 }, { "epoch": 29.7931654676259, "grad_norm": 0.1287144422531128, "learning_rate": 4.551685826076858e-05, "loss": 0.0042, "step": 33130 }, { "action_loss": 0.005958449095487595, "epoch": 29.7931654676259, "step": 33130 }, { "epoch": 29.802158273381295, "grad_norm": 0.11494258046150208, "learning_rate": 4.5489412110874246e-05, "loss": 0.0048, "step": 33140 }, { "action_loss": 0.004358955193310976, "epoch": 29.802158273381295, "step": 33140 }, { "epoch": 29.81115107913669, "grad_norm": 0.13211588561534882, "learning_rate": 4.5461967331177444e-05, "loss": 0.0065, "step": 33150 }, { "action_loss": 0.003987807314842939, "epoch": 29.81115107913669, "step": 33150 }, { "epoch": 29.820143884892087, "grad_norm": 0.2614014148712158, "learning_rate": 4.5434523930015115e-05, "loss": 0.0068, "step": 33160 }, { "action_loss": 0.004954623524099588, "epoch": 29.820143884892087, "step": 33160 }, { "epoch": 29.82913669064748, "grad_norm": 0.15281614661216736, "learning_rate": 4.540708191572388e-05, "loss": 0.0054, "step": 33170 }, { "action_loss": 0.002401924692094326, "epoch": 29.82913669064748, "step": 33170 }, { "epoch": 29.83812949640288, "grad_norm": 0.2093532681465149, "learning_rate": 4.537964129663991e-05, "loss": 0.0036, "step": 33180 }, { "action_loss": 0.003982599824666977, "epoch": 29.83812949640288, "step": 33180 }, { "epoch": 29.847122302158272, "grad_norm": 0.21370141208171844, "learning_rate": 4.535220208109889e-05, "loss": 0.0067, "step": 33190 }, { "action_loss": 0.0025283137802034616, "epoch": 29.847122302158272, "step": 33190 }, { "epoch": 29.85611510791367, "grad_norm": 0.09498254209756851, "learning_rate": 4.5324764277436194e-05, "loss": 0.0037, "step": 33200 }, { "action_loss": 0.004845042712986469, "epoch": 29.85611510791367, "step": 33200 }, { "epoch": 29.865107913669064, "grad_norm": 0.13250048458576202, "learning_rate": 4.529732789398664e-05, "loss": 0.0054, "step": 33210 }, { "action_loss": 0.0036603438202291727, "epoch": 29.865107913669064, "step": 33210 }, { "epoch": 29.87410071942446, "grad_norm": 0.12318070232868195, "learning_rate": 4.526989293908472e-05, "loss": 0.0065, "step": 33220 }, { "action_loss": 0.003267916152253747, "epoch": 29.87410071942446, "step": 33220 }, { "epoch": 29.883093525179856, "grad_norm": 0.19238661229610443, "learning_rate": 4.524245942106442e-05, "loss": 0.0047, "step": 33230 }, { "action_loss": 0.005768248345702887, "epoch": 29.883093525179856, "step": 33230 }, { "epoch": 29.892086330935253, "grad_norm": 0.13761518895626068, "learning_rate": 4.5215027348259345e-05, "loss": 0.0057, "step": 33240 }, { "action_loss": 0.005762409884482622, "epoch": 29.892086330935253, "step": 33240 }, { "epoch": 29.901079136690647, "grad_norm": 0.12296947091817856, "learning_rate": 4.5187596729002616e-05, "loss": 0.0061, "step": 33250 }, { "action_loss": 0.012683678418397903, "epoch": 29.901079136690647, "step": 33250 }, { "epoch": 29.91007194244604, "grad_norm": 0.15823955833911896, "learning_rate": 4.516016757162693e-05, "loss": 0.0047, "step": 33260 }, { "action_loss": 0.015611748211085796, "epoch": 29.91007194244604, "step": 33260 }, { "epoch": 29.91906474820144, "grad_norm": 0.08510354906320572, "learning_rate": 4.513273988446457e-05, "loss": 0.0053, "step": 33270 }, { "action_loss": 0.00596267357468605, "epoch": 29.91906474820144, "step": 33270 }, { "epoch": 29.928057553956833, "grad_norm": 0.13246724009513855, "learning_rate": 4.5105313675847296e-05, "loss": 0.0041, "step": 33280 }, { "action_loss": 0.004516930785030127, "epoch": 29.928057553956833, "step": 33280 }, { "epoch": 29.93705035971223, "grad_norm": 0.21747775375843048, "learning_rate": 4.5077888954106495e-05, "loss": 0.0054, "step": 33290 }, { "action_loss": 0.002679109573364258, "epoch": 29.93705035971223, "step": 33290 }, { "epoch": 29.946043165467625, "grad_norm": 0.08564575016498566, "learning_rate": 4.505046572757309e-05, "loss": 0.0067, "step": 33300 }, { "action_loss": 0.007316459435969591, "epoch": 29.946043165467625, "step": 33300 }, { "epoch": 29.955035971223023, "grad_norm": 0.13222002983093262, "learning_rate": 4.502304400457749e-05, "loss": 0.0063, "step": 33310 }, { "action_loss": 0.00230195838958025, "epoch": 29.955035971223023, "step": 33310 }, { "epoch": 29.964028776978417, "grad_norm": 0.14821858704090118, "learning_rate": 4.499562379344973e-05, "loss": 0.0031, "step": 33320 }, { "action_loss": 0.005080599803477526, "epoch": 29.964028776978417, "step": 33320 }, { "epoch": 29.973021582733814, "grad_norm": 0.2557485103607178, "learning_rate": 4.4968205102519306e-05, "loss": 0.0054, "step": 33330 }, { "action_loss": 0.003441083477810025, "epoch": 29.973021582733814, "step": 33330 }, { "epoch": 29.98201438848921, "grad_norm": 0.20884908735752106, "learning_rate": 4.494078794011532e-05, "loss": 0.0054, "step": 33340 }, { "action_loss": 0.003448090748861432, "epoch": 29.98201438848921, "step": 33340 }, { "epoch": 29.991007194244606, "grad_norm": 0.20754146575927734, "learning_rate": 4.491337231456639e-05, "loss": 0.0051, "step": 33350 }, { "action_loss": 0.003092937171459198, "epoch": 29.991007194244606, "step": 33350 }, { "epoch": 30.0, "grad_norm": 0.1522912234067917, "learning_rate": 4.4885958234200634e-05, "loss": 0.0036, "step": 33360 }, { "action_loss": 0.001971425721421838, "epoch": 30.0, "step": 33360 }, { "epoch": 30.008992805755394, "grad_norm": 0.12131766229867935, "learning_rate": 4.485854570734575e-05, "loss": 0.004, "step": 33370 }, { "action_loss": 0.015228454023599625, "epoch": 30.008992805755394, "step": 33370 }, { "epoch": 30.01798561151079, "grad_norm": 0.22017285227775574, "learning_rate": 4.483113474232891e-05, "loss": 0.0062, "step": 33380 }, { "action_loss": 0.0035521232057362795, "epoch": 30.01798561151079, "step": 33380 }, { "epoch": 30.026978417266186, "grad_norm": 0.16474512219429016, "learning_rate": 4.480372534747688e-05, "loss": 0.0062, "step": 33390 }, { "action_loss": 0.0017712466651573777, "epoch": 30.026978417266186, "step": 33390 }, { "epoch": 30.035971223021583, "grad_norm": 0.2649337947368622, "learning_rate": 4.477631753111588e-05, "loss": 0.0096, "step": 33400 }, { "action_loss": 0.005485096946358681, "epoch": 30.035971223021583, "step": 33400 }, { "epoch": 30.044964028776977, "grad_norm": 0.20248901844024658, "learning_rate": 4.4748911301571686e-05, "loss": 0.0042, "step": 33410 }, { "action_loss": 0.001991107128560543, "epoch": 30.044964028776977, "step": 33410 }, { "epoch": 30.053956834532375, "grad_norm": 0.10330434888601303, "learning_rate": 4.472150666716961e-05, "loss": 0.0042, "step": 33420 }, { "action_loss": 0.005761418491601944, "epoch": 30.053956834532375, "step": 33420 }, { "epoch": 30.06294964028777, "grad_norm": 0.12365066260099411, "learning_rate": 4.469410363623442e-05, "loss": 0.0039, "step": 33430 }, { "action_loss": 0.0036254061851650476, "epoch": 30.06294964028777, "step": 33430 }, { "epoch": 30.071942446043167, "grad_norm": 0.18610745668411255, "learning_rate": 4.466670221709044e-05, "loss": 0.0113, "step": 33440 }, { "action_loss": 0.00185974792111665, "epoch": 30.071942446043167, "step": 33440 }, { "epoch": 30.08093525179856, "grad_norm": 0.20970185101032257, "learning_rate": 4.463930241806154e-05, "loss": 0.0065, "step": 33450 }, { "action_loss": 0.002377572236582637, "epoch": 30.08093525179856, "step": 33450 }, { "epoch": 30.08992805755396, "grad_norm": 0.16321131587028503, "learning_rate": 4.4611904247471006e-05, "loss": 0.0037, "step": 33460 }, { "action_loss": 0.0035292517859488726, "epoch": 30.08992805755396, "step": 33460 }, { "epoch": 30.098920863309353, "grad_norm": 0.21486133337020874, "learning_rate": 4.458450771364171e-05, "loss": 0.0056, "step": 33470 }, { "action_loss": 0.002596915466710925, "epoch": 30.098920863309353, "step": 33470 }, { "epoch": 30.107913669064747, "grad_norm": 0.24833935499191284, "learning_rate": 4.4557112824895965e-05, "loss": 0.0052, "step": 33480 }, { "action_loss": 0.007045956794172525, "epoch": 30.107913669064747, "step": 33480 }, { "epoch": 30.116906474820144, "grad_norm": 0.4254510700702667, "learning_rate": 4.452971958955563e-05, "loss": 0.0062, "step": 33490 }, { "action_loss": 0.00572230527177453, "epoch": 30.116906474820144, "step": 33490 }, { "epoch": 30.12589928057554, "grad_norm": 0.20481573045253754, "learning_rate": 4.450232801594208e-05, "loss": 0.0054, "step": 33500 }, { "action_loss": 0.009453891776502132, "epoch": 30.12589928057554, "step": 33500 }, { "epoch": 30.134892086330936, "grad_norm": 0.15784533321857452, "learning_rate": 4.447493811237609e-05, "loss": 0.0053, "step": 33510 }, { "action_loss": 0.009418990463018417, "epoch": 30.134892086330936, "step": 33510 }, { "epoch": 30.14388489208633, "grad_norm": 0.22351272404193878, "learning_rate": 4.444754988717804e-05, "loss": 0.006, "step": 33520 }, { "action_loss": 0.005673277657479048, "epoch": 30.14388489208633, "step": 33520 }, { "epoch": 30.152877697841728, "grad_norm": 0.1711181402206421, "learning_rate": 4.442016334866771e-05, "loss": 0.0098, "step": 33530 }, { "action_loss": 0.00547895347699523, "epoch": 30.152877697841728, "step": 33530 }, { "epoch": 30.16187050359712, "grad_norm": 0.11946315318346024, "learning_rate": 4.4392778505164445e-05, "loss": 0.0041, "step": 33540 }, { "action_loss": 0.0030774485785514116, "epoch": 30.16187050359712, "step": 33540 }, { "epoch": 30.17086330935252, "grad_norm": 0.2365056425333023, "learning_rate": 4.436539536498702e-05, "loss": 0.0037, "step": 33550 }, { "action_loss": 0.002560322405770421, "epoch": 30.17086330935252, "step": 33550 }, { "epoch": 30.179856115107913, "grad_norm": 0.23455503582954407, "learning_rate": 4.433801393645369e-05, "loss": 0.0055, "step": 33560 }, { "action_loss": 0.008104500360786915, "epoch": 30.179856115107913, "step": 33560 }, { "epoch": 30.18884892086331, "grad_norm": 0.21176517009735107, "learning_rate": 4.431063422788226e-05, "loss": 0.0058, "step": 33570 }, { "action_loss": 0.0036712742876261473, "epoch": 30.18884892086331, "step": 33570 }, { "epoch": 30.197841726618705, "grad_norm": 0.21522949635982513, "learning_rate": 4.428325624758991e-05, "loss": 0.0046, "step": 33580 }, { "action_loss": 0.0015361262485384941, "epoch": 30.197841726618705, "step": 33580 }, { "epoch": 30.2068345323741, "grad_norm": 0.22906482219696045, "learning_rate": 4.4255880003893366e-05, "loss": 0.0071, "step": 33590 }, { "action_loss": 0.003347481368109584, "epoch": 30.2068345323741, "step": 33590 }, { "epoch": 30.215827338129497, "grad_norm": 0.10178156197071075, "learning_rate": 4.422850550510884e-05, "loss": 0.004, "step": 33600 }, { "action_loss": 0.0068983882665634155, "epoch": 30.215827338129497, "step": 33600 }, { "epoch": 30.22482014388489, "grad_norm": 0.11549681425094604, "learning_rate": 4.4201132759551934e-05, "loss": 0.0061, "step": 33610 }, { "action_loss": 0.0071049346588552, "epoch": 30.22482014388489, "step": 33610 }, { "epoch": 30.23381294964029, "grad_norm": 0.16417810320854187, "learning_rate": 4.4173761775537804e-05, "loss": 0.0082, "step": 33620 }, { "action_loss": 0.0023655567783862352, "epoch": 30.23381294964029, "step": 33620 }, { "epoch": 30.242805755395683, "grad_norm": 0.13523922860622406, "learning_rate": 4.414639256138099e-05, "loss": 0.0055, "step": 33630 }, { "action_loss": 0.0028149783611297607, "epoch": 30.242805755395683, "step": 33630 }, { "epoch": 30.25179856115108, "grad_norm": 0.15014898777008057, "learning_rate": 4.411902512539557e-05, "loss": 0.0045, "step": 33640 }, { "action_loss": 0.0038727542851120234, "epoch": 30.25179856115108, "step": 33640 }, { "epoch": 30.260791366906474, "grad_norm": 0.21028001606464386, "learning_rate": 4.4091659475895044e-05, "loss": 0.0042, "step": 33650 }, { "action_loss": 0.002013673773035407, "epoch": 30.260791366906474, "step": 33650 }, { "epoch": 30.269784172661872, "grad_norm": 0.1443653255701065, "learning_rate": 4.406429562119235e-05, "loss": 0.005, "step": 33660 }, { "action_loss": 0.007354082074016333, "epoch": 30.269784172661872, "step": 33660 }, { "epoch": 30.278776978417266, "grad_norm": 0.17520464956760406, "learning_rate": 4.4036933569599945e-05, "loss": 0.0052, "step": 33670 }, { "action_loss": 0.0032281558960676193, "epoch": 30.278776978417266, "step": 33670 }, { "epoch": 30.28776978417266, "grad_norm": 0.08235160261392593, "learning_rate": 4.400957332942965e-05, "loss": 0.0043, "step": 33680 }, { "action_loss": 0.004988124594092369, "epoch": 30.28776978417266, "step": 33680 }, { "epoch": 30.296762589928058, "grad_norm": 0.1336335837841034, "learning_rate": 4.3982214908992844e-05, "loss": 0.004, "step": 33690 }, { "action_loss": 0.006176716182380915, "epoch": 30.296762589928058, "step": 33690 }, { "epoch": 30.305755395683452, "grad_norm": 0.15420469641685486, "learning_rate": 4.3954858316600235e-05, "loss": 0.004, "step": 33700 }, { "action_loss": 0.004290448967367411, "epoch": 30.305755395683452, "step": 33700 }, { "epoch": 30.31474820143885, "grad_norm": 0.14919930696487427, "learning_rate": 4.392750356056205e-05, "loss": 0.0054, "step": 33710 }, { "action_loss": 0.0038185836747288704, "epoch": 30.31474820143885, "step": 33710 }, { "epoch": 30.323741007194243, "grad_norm": 0.08552253991365433, "learning_rate": 4.390015064918798e-05, "loss": 0.0032, "step": 33720 }, { "action_loss": 0.002763752592727542, "epoch": 30.323741007194243, "step": 33720 }, { "epoch": 30.33273381294964, "grad_norm": 0.1207551583647728, "learning_rate": 4.387279959078705e-05, "loss": 0.0035, "step": 33730 }, { "action_loss": 0.005785348359495401, "epoch": 30.33273381294964, "step": 33730 }, { "epoch": 30.341726618705035, "grad_norm": 0.11812115460634232, "learning_rate": 4.384545039366786e-05, "loss": 0.0046, "step": 33740 }, { "action_loss": 0.006600712891668081, "epoch": 30.341726618705035, "step": 33740 }, { "epoch": 30.350719424460433, "grad_norm": 0.23262548446655273, "learning_rate": 4.381810306613831e-05, "loss": 0.0063, "step": 33750 }, { "action_loss": 0.010675801895558834, "epoch": 30.350719424460433, "step": 33750 }, { "epoch": 30.359712230215827, "grad_norm": 0.19763484597206116, "learning_rate": 4.3790757616505826e-05, "loss": 0.0057, "step": 33760 }, { "action_loss": 0.0015873567899689078, "epoch": 30.359712230215827, "step": 33760 }, { "epoch": 30.368705035971225, "grad_norm": 0.23493097722530365, "learning_rate": 4.376341405307725e-05, "loss": 0.0068, "step": 33770 }, { "action_loss": 0.0019821098539978266, "epoch": 30.368705035971225, "step": 33770 }, { "epoch": 30.37769784172662, "grad_norm": 0.11948195844888687, "learning_rate": 4.37360723841588e-05, "loss": 0.0055, "step": 33780 }, { "action_loss": 0.007642044220119715, "epoch": 30.37769784172662, "step": 33780 }, { "epoch": 30.386690647482013, "grad_norm": 0.2784402370452881, "learning_rate": 4.370873261805619e-05, "loss": 0.0095, "step": 33790 }, { "action_loss": 0.014165527187287807, "epoch": 30.386690647482013, "step": 33790 }, { "epoch": 30.39568345323741, "grad_norm": 0.18909065425395966, "learning_rate": 4.368139476307449e-05, "loss": 0.008, "step": 33800 }, { "action_loss": 0.003314683912321925, "epoch": 30.39568345323741, "step": 33800 }, { "epoch": 30.404676258992804, "grad_norm": 0.22821593284606934, "learning_rate": 4.365405882751822e-05, "loss": 0.0063, "step": 33810 }, { "action_loss": 0.006398636847734451, "epoch": 30.404676258992804, "step": 33810 }, { "epoch": 30.413669064748202, "grad_norm": 0.24345062673091888, "learning_rate": 4.3626724819691326e-05, "loss": 0.0052, "step": 33820 }, { "action_loss": 0.0050448644906282425, "epoch": 30.413669064748202, "step": 33820 }, { "epoch": 30.422661870503596, "grad_norm": 0.14878836274147034, "learning_rate": 4.359939274789715e-05, "loss": 0.0038, "step": 33830 }, { "action_loss": 0.004001952707767487, "epoch": 30.422661870503596, "step": 33830 }, { "epoch": 30.431654676258994, "grad_norm": 0.14435173571109772, "learning_rate": 4.357206262043848e-05, "loss": 0.0056, "step": 33840 }, { "action_loss": 0.0022320987191051245, "epoch": 30.431654676258994, "step": 33840 }, { "epoch": 30.440647482014388, "grad_norm": 0.09631864726543427, "learning_rate": 4.354473444561745e-05, "loss": 0.0032, "step": 33850 }, { "action_loss": 0.0035373407881706953, "epoch": 30.440647482014388, "step": 33850 }, { "epoch": 30.449640287769785, "grad_norm": 0.18633437156677246, "learning_rate": 4.3517408231735644e-05, "loss": 0.0058, "step": 33860 }, { "action_loss": 0.004473485983908176, "epoch": 30.449640287769785, "step": 33860 }, { "epoch": 30.45863309352518, "grad_norm": 0.14517775177955627, "learning_rate": 4.3490083987094086e-05, "loss": 0.0047, "step": 33870 }, { "action_loss": 0.001785622793249786, "epoch": 30.45863309352518, "step": 33870 }, { "epoch": 30.467625899280577, "grad_norm": 0.1339036226272583, "learning_rate": 4.34627617199931e-05, "loss": 0.0074, "step": 33880 }, { "action_loss": 0.008782275021076202, "epoch": 30.467625899280577, "step": 33880 }, { "epoch": 30.47661870503597, "grad_norm": 0.20080140233039856, "learning_rate": 4.3435441438732526e-05, "loss": 0.0034, "step": 33890 }, { "action_loss": 0.002994253532961011, "epoch": 30.47661870503597, "step": 33890 }, { "epoch": 30.485611510791365, "grad_norm": 0.1541975885629654, "learning_rate": 4.340812315161149e-05, "loss": 0.0053, "step": 33900 }, { "action_loss": 0.010616853833198547, "epoch": 30.485611510791365, "step": 33900 }, { "epoch": 30.494604316546763, "grad_norm": 0.3125298321247101, "learning_rate": 4.338080686692859e-05, "loss": 0.0086, "step": 33910 }, { "action_loss": 0.0057723005302250385, "epoch": 30.494604316546763, "step": 33910 }, { "epoch": 30.503597122302157, "grad_norm": 0.16706907749176025, "learning_rate": 4.3353492592981816e-05, "loss": 0.0044, "step": 33920 }, { "action_loss": 0.0021576278377324343, "epoch": 30.503597122302157, "step": 33920 }, { "epoch": 30.512589928057555, "grad_norm": 0.188608318567276, "learning_rate": 4.3326180338068485e-05, "loss": 0.0089, "step": 33930 }, { "action_loss": 0.0022911466658115387, "epoch": 30.512589928057555, "step": 33930 }, { "epoch": 30.52158273381295, "grad_norm": 0.195052832365036, "learning_rate": 4.3298870110485356e-05, "loss": 0.005, "step": 33940 }, { "action_loss": 0.005837269127368927, "epoch": 30.52158273381295, "step": 33940 }, { "epoch": 30.530575539568346, "grad_norm": 0.09596973657608032, "learning_rate": 4.3271561918528567e-05, "loss": 0.0036, "step": 33950 }, { "action_loss": 0.0019873513374477625, "epoch": 30.530575539568346, "step": 33950 }, { "epoch": 30.53956834532374, "grad_norm": 0.18759694695472717, "learning_rate": 4.324425577049359e-05, "loss": 0.0046, "step": 33960 }, { "action_loss": 0.0043994830921292305, "epoch": 30.53956834532374, "step": 33960 }, { "epoch": 30.548561151079138, "grad_norm": 0.10250195115804672, "learning_rate": 4.321695167467535e-05, "loss": 0.0048, "step": 33970 }, { "action_loss": 0.0017363236984238029, "epoch": 30.548561151079138, "step": 33970 }, { "epoch": 30.557553956834532, "grad_norm": 0.12780025601387024, "learning_rate": 4.3189649639368093e-05, "loss": 0.0096, "step": 33980 }, { "action_loss": 0.0029282812029123306, "epoch": 30.557553956834532, "step": 33980 }, { "epoch": 30.56654676258993, "grad_norm": 0.10884343832731247, "learning_rate": 4.316234967286547e-05, "loss": 0.0043, "step": 33990 }, { "action_loss": 0.005230626557022333, "epoch": 30.56654676258993, "step": 33990 }, { "epoch": 30.575539568345324, "grad_norm": 0.21891994774341583, "learning_rate": 4.313505178346046e-05, "loss": 0.0102, "step": 34000 }, { "action_loss": 0.0030087605118751526, "epoch": 30.575539568345324, "step": 34000 }, { "epoch": 30.584532374100718, "grad_norm": 0.11044017970561981, "learning_rate": 4.3107755979445465e-05, "loss": 0.0057, "step": 34010 }, { "action_loss": 0.0034469973761588335, "epoch": 30.584532374100718, "step": 34010 }, { "epoch": 30.593525179856115, "grad_norm": 0.11012046784162521, "learning_rate": 4.308046226911224e-05, "loss": 0.0043, "step": 34020 }, { "action_loss": 0.006029101088643074, "epoch": 30.593525179856115, "step": 34020 }, { "epoch": 30.60251798561151, "grad_norm": 0.08989904820919037, "learning_rate": 4.305317066075185e-05, "loss": 0.0061, "step": 34030 }, { "action_loss": 0.0017119116382673383, "epoch": 30.60251798561151, "step": 34030 }, { "epoch": 30.611510791366907, "grad_norm": 0.1658906489610672, "learning_rate": 4.302588116265482e-05, "loss": 0.0045, "step": 34040 }, { "action_loss": 0.008406147360801697, "epoch": 30.611510791366907, "step": 34040 }, { "epoch": 30.6205035971223, "grad_norm": 0.14477434754371643, "learning_rate": 4.299859378311094e-05, "loss": 0.0069, "step": 34050 }, { "action_loss": 0.002452765591442585, "epoch": 30.6205035971223, "step": 34050 }, { "epoch": 30.6294964028777, "grad_norm": 0.16195295751094818, "learning_rate": 4.2971308530409424e-05, "loss": 0.0034, "step": 34060 }, { "action_loss": 0.01444871723651886, "epoch": 30.6294964028777, "step": 34060 }, { "epoch": 30.638489208633093, "grad_norm": 0.19555231928825378, "learning_rate": 4.2944025412838765e-05, "loss": 0.0053, "step": 34070 }, { "action_loss": 0.003536924021318555, "epoch": 30.638489208633093, "step": 34070 }, { "epoch": 30.64748201438849, "grad_norm": 0.13490867614746094, "learning_rate": 4.291674443868689e-05, "loss": 0.0047, "step": 34080 }, { "action_loss": 0.00295422226190567, "epoch": 30.64748201438849, "step": 34080 }, { "epoch": 30.656474820143885, "grad_norm": 0.1569691300392151, "learning_rate": 4.288946561624104e-05, "loss": 0.0049, "step": 34090 }, { "action_loss": 0.0029815935995429754, "epoch": 30.656474820143885, "step": 34090 }, { "epoch": 30.665467625899282, "grad_norm": 0.13093861937522888, "learning_rate": 4.2862188953787794e-05, "loss": 0.0058, "step": 34100 }, { "action_loss": 0.006546138320118189, "epoch": 30.665467625899282, "step": 34100 }, { "epoch": 30.674460431654676, "grad_norm": 0.09320028126239777, "learning_rate": 4.283491445961308e-05, "loss": 0.0045, "step": 34110 }, { "action_loss": 0.0063391984440386295, "epoch": 30.674460431654676, "step": 34110 }, { "epoch": 30.68345323741007, "grad_norm": 0.1848832666873932, "learning_rate": 4.2807642142002155e-05, "loss": 0.0048, "step": 34120 }, { "action_loss": 0.0030829396564513445, "epoch": 30.68345323741007, "step": 34120 }, { "epoch": 30.692446043165468, "grad_norm": 0.15624389052391052, "learning_rate": 4.278037200923966e-05, "loss": 0.0039, "step": 34130 }, { "action_loss": 0.0035897090565413237, "epoch": 30.692446043165468, "step": 34130 }, { "epoch": 30.701438848920862, "grad_norm": 0.22299523651599884, "learning_rate": 4.275310406960953e-05, "loss": 0.0079, "step": 34140 }, { "action_loss": 0.022603532299399376, "epoch": 30.701438848920862, "step": 34140 }, { "epoch": 30.71043165467626, "grad_norm": 0.1643507331609726, "learning_rate": 4.272583833139502e-05, "loss": 0.008, "step": 34150 }, { "action_loss": 0.0023946736473590136, "epoch": 30.71043165467626, "step": 34150 }, { "epoch": 30.719424460431654, "grad_norm": 0.144033744931221, "learning_rate": 4.2698574802878794e-05, "loss": 0.005, "step": 34160 }, { "action_loss": 0.00617136200889945, "epoch": 30.719424460431654, "step": 34160 }, { "epoch": 30.72841726618705, "grad_norm": 0.1062321662902832, "learning_rate": 4.2671313492342734e-05, "loss": 0.0037, "step": 34170 }, { "action_loss": 0.002633059397339821, "epoch": 30.72841726618705, "step": 34170 }, { "epoch": 30.737410071942445, "grad_norm": 0.09074166417121887, "learning_rate": 4.264405440806813e-05, "loss": 0.0037, "step": 34180 }, { "action_loss": 0.0012473124079406261, "epoch": 30.737410071942445, "step": 34180 }, { "epoch": 30.746402877697843, "grad_norm": 0.15133841335773468, "learning_rate": 4.26167975583356e-05, "loss": 0.0078, "step": 34190 }, { "action_loss": 0.002135113812983036, "epoch": 30.746402877697843, "step": 34190 }, { "epoch": 30.755395683453237, "grad_norm": 0.07489775866270065, "learning_rate": 4.2589542951425e-05, "loss": 0.0035, "step": 34200 }, { "action_loss": 0.005739381071180105, "epoch": 30.755395683453237, "step": 34200 }, { "epoch": 30.764388489208635, "grad_norm": 0.21084724366664886, "learning_rate": 4.2562290595615615e-05, "loss": 0.0054, "step": 34210 }, { "action_loss": 0.003369263606145978, "epoch": 30.764388489208635, "step": 34210 }, { "epoch": 30.77338129496403, "grad_norm": 0.16771656274795532, "learning_rate": 4.2535040499185946e-05, "loss": 0.0057, "step": 34220 }, { "action_loss": 0.0036571912933140993, "epoch": 30.77338129496403, "step": 34220 }, { "epoch": 30.782374100719423, "grad_norm": 0.18012458086013794, "learning_rate": 4.250779267041387e-05, "loss": 0.005, "step": 34230 }, { "action_loss": 0.0020440213847905397, "epoch": 30.782374100719423, "step": 34230 }, { "epoch": 30.79136690647482, "grad_norm": 0.12835460901260376, "learning_rate": 4.248054711757657e-05, "loss": 0.0051, "step": 34240 }, { "action_loss": 0.0029558923561125994, "epoch": 30.79136690647482, "step": 34240 }, { "epoch": 30.800359712230215, "grad_norm": 0.12089981883764267, "learning_rate": 4.245330384895052e-05, "loss": 0.0069, "step": 34250 }, { "action_loss": 0.021251915022730827, "epoch": 30.800359712230215, "step": 34250 }, { "epoch": 30.809352517985612, "grad_norm": 0.1252288818359375, "learning_rate": 4.242606287281151e-05, "loss": 0.0079, "step": 34260 }, { "action_loss": 0.010533715598285198, "epoch": 30.809352517985612, "step": 34260 }, { "epoch": 30.818345323741006, "grad_norm": 0.15829600393772125, "learning_rate": 4.2398824197434595e-05, "loss": 0.0127, "step": 34270 }, { "action_loss": 0.008603162132203579, "epoch": 30.818345323741006, "step": 34270 }, { "epoch": 30.827338129496404, "grad_norm": 0.12788702547550201, "learning_rate": 4.23715878310942e-05, "loss": 0.0054, "step": 34280 }, { "action_loss": 0.003924598917365074, "epoch": 30.827338129496404, "step": 34280 }, { "epoch": 30.836330935251798, "grad_norm": 0.11773747950792313, "learning_rate": 4.234435378206402e-05, "loss": 0.004, "step": 34290 }, { "action_loss": 0.004919914063066244, "epoch": 30.836330935251798, "step": 34290 }, { "epoch": 30.845323741007196, "grad_norm": 0.11831407248973846, "learning_rate": 4.2317122058617006e-05, "loss": 0.006, "step": 34300 }, { "action_loss": 0.003382251365110278, "epoch": 30.845323741007196, "step": 34300 }, { "epoch": 30.85431654676259, "grad_norm": 0.15062908828258514, "learning_rate": 4.2289892669025485e-05, "loss": 0.0052, "step": 34310 }, { "action_loss": 0.00176796515006572, "epoch": 30.85431654676259, "step": 34310 }, { "epoch": 30.863309352517987, "grad_norm": 0.23573046922683716, "learning_rate": 4.226266562156097e-05, "loss": 0.0064, "step": 34320 }, { "action_loss": 0.001253297203220427, "epoch": 30.863309352517987, "step": 34320 }, { "epoch": 30.87230215827338, "grad_norm": 0.1539004147052765, "learning_rate": 4.223544092449435e-05, "loss": 0.0047, "step": 34330 }, { "action_loss": 0.003175565740093589, "epoch": 30.87230215827338, "step": 34330 }, { "epoch": 30.881294964028775, "grad_norm": 0.1014247015118599, "learning_rate": 4.2208218586095784e-05, "loss": 0.0048, "step": 34340 }, { "action_loss": 0.0028287062887102365, "epoch": 30.881294964028775, "step": 34340 }, { "epoch": 30.890287769784173, "grad_norm": 0.13254842162132263, "learning_rate": 4.218099861463466e-05, "loss": 0.0044, "step": 34350 }, { "action_loss": 0.00797399040311575, "epoch": 30.890287769784173, "step": 34350 }, { "epoch": 30.899280575539567, "grad_norm": 0.09400807321071625, "learning_rate": 4.215378101837972e-05, "loss": 0.0064, "step": 34360 }, { "action_loss": 0.0024377519730478525, "epoch": 30.899280575539567, "step": 34360 }, { "epoch": 30.908273381294965, "grad_norm": 0.09806350618600845, "learning_rate": 4.2126565805598937e-05, "loss": 0.0053, "step": 34370 }, { "action_loss": 0.002903120592236519, "epoch": 30.908273381294965, "step": 34370 }, { "epoch": 30.91726618705036, "grad_norm": 0.1488187164068222, "learning_rate": 4.209935298455957e-05, "loss": 0.0044, "step": 34380 }, { "action_loss": 0.004471208434551954, "epoch": 30.91726618705036, "step": 34380 }, { "epoch": 30.926258992805757, "grad_norm": 0.3598272204399109, "learning_rate": 4.207214256352817e-05, "loss": 0.0049, "step": 34390 }, { "action_loss": 0.0025424640625715256, "epoch": 30.926258992805757, "step": 34390 }, { "epoch": 30.93525179856115, "grad_norm": 0.13942024111747742, "learning_rate": 4.2044934550770524e-05, "loss": 0.0065, "step": 34400 }, { "action_loss": 0.060661282390356064, "epoch": 30.93525179856115, "step": 34400 }, { "epoch": 30.944244604316548, "grad_norm": 0.10674799233675003, "learning_rate": 4.201772895455174e-05, "loss": 0.0091, "step": 34410 }, { "action_loss": 0.0020253295078873634, "epoch": 30.944244604316548, "step": 34410 }, { "epoch": 30.953237410071942, "grad_norm": 0.14035771787166595, "learning_rate": 4.199052578313613e-05, "loss": 0.0055, "step": 34420 }, { "action_loss": 0.0037792653311043978, "epoch": 30.953237410071942, "step": 34420 }, { "epoch": 30.96223021582734, "grad_norm": 0.14702747762203217, "learning_rate": 4.1963325044787294e-05, "loss": 0.0055, "step": 34430 }, { "action_loss": 0.0019831873942166567, "epoch": 30.96223021582734, "step": 34430 }, { "epoch": 30.971223021582734, "grad_norm": 0.18484815955162048, "learning_rate": 4.193612674776814e-05, "loss": 0.0037, "step": 34440 }, { "action_loss": 0.005246617365628481, "epoch": 30.971223021582734, "step": 34440 }, { "epoch": 30.980215827338128, "grad_norm": 0.13924585282802582, "learning_rate": 4.1908930900340745e-05, "loss": 0.0044, "step": 34450 }, { "action_loss": 0.007303924765437841, "epoch": 30.980215827338128, "step": 34450 }, { "epoch": 30.989208633093526, "grad_norm": 0.17199328541755676, "learning_rate": 4.1881737510766536e-05, "loss": 0.004, "step": 34460 }, { "action_loss": 0.003921290393918753, "epoch": 30.989208633093526, "step": 34460 }, { "epoch": 30.99820143884892, "grad_norm": 0.08444350957870483, "learning_rate": 4.185454658730609e-05, "loss": 0.0028, "step": 34470 }, { "action_loss": 0.0034119163174182177, "epoch": 30.99820143884892, "step": 34470 }, { "epoch": 31.007194244604317, "grad_norm": 0.11385879665613174, "learning_rate": 4.1827358138219355e-05, "loss": 0.0072, "step": 34480 }, { "action_loss": 0.0022980859503149986, "epoch": 31.007194244604317, "step": 34480 }, { "epoch": 31.01618705035971, "grad_norm": 0.16413062810897827, "learning_rate": 4.1800172171765404e-05, "loss": 0.0135, "step": 34490 }, { "action_loss": 0.004959933925420046, "epoch": 31.01618705035971, "step": 34490 }, { "epoch": 31.02517985611511, "grad_norm": 0.11657460033893585, "learning_rate": 4.177298869620264e-05, "loss": 0.0036, "step": 34500 }, { "action_loss": 0.0035823993384838104, "epoch": 31.02517985611511, "step": 34500 }, { "epoch": 31.034172661870503, "grad_norm": 0.1489063948392868, "learning_rate": 4.1745807719788705e-05, "loss": 0.0054, "step": 34510 }, { "action_loss": 0.0028051685076206923, "epoch": 31.034172661870503, "step": 34510 }, { "epoch": 31.0431654676259, "grad_norm": 0.20458023250102997, "learning_rate": 4.1718629250780445e-05, "loss": 0.0053, "step": 34520 }, { "action_loss": 0.006864000111818314, "epoch": 31.0431654676259, "step": 34520 }, { "epoch": 31.052158273381295, "grad_norm": 0.1185244768857956, "learning_rate": 4.1691453297433956e-05, "loss": 0.0053, "step": 34530 }, { "action_loss": 0.006023105699568987, "epoch": 31.052158273381295, "step": 34530 }, { "epoch": 31.06115107913669, "grad_norm": 0.2038608342409134, "learning_rate": 4.166427986800457e-05, "loss": 0.006, "step": 34540 }, { "action_loss": 0.004646243993192911, "epoch": 31.06115107913669, "step": 34540 }, { "epoch": 31.070143884892087, "grad_norm": 0.13742893934249878, "learning_rate": 4.163710897074688e-05, "loss": 0.0043, "step": 34550 }, { "action_loss": 0.004214787390083075, "epoch": 31.070143884892087, "step": 34550 }, { "epoch": 31.07913669064748, "grad_norm": 0.13249889016151428, "learning_rate": 4.1609940613914686e-05, "loss": 0.0067, "step": 34560 }, { "action_loss": 0.008083313703536987, "epoch": 31.07913669064748, "step": 34560 }, { "epoch": 31.08812949640288, "grad_norm": 0.10715728253126144, "learning_rate": 4.1582774805760996e-05, "loss": 0.0053, "step": 34570 }, { "action_loss": 0.002792024053633213, "epoch": 31.08812949640288, "step": 34570 }, { "epoch": 31.097122302158272, "grad_norm": 0.2608944773674011, "learning_rate": 4.155561155453809e-05, "loss": 0.005, "step": 34580 }, { "action_loss": 0.0019016456790268421, "epoch": 31.097122302158272, "step": 34580 }, { "epoch": 31.10611510791367, "grad_norm": 0.24754278361797333, "learning_rate": 4.15284508684974e-05, "loss": 0.0083, "step": 34590 }, { "action_loss": 0.0015758359804749489, "epoch": 31.10611510791367, "step": 34590 }, { "epoch": 31.115107913669064, "grad_norm": 0.1726047396659851, "learning_rate": 4.1501292755889675e-05, "loss": 0.0058, "step": 34600 }, { "action_loss": 0.005051734391599894, "epoch": 31.115107913669064, "step": 34600 }, { "epoch": 31.12410071942446, "grad_norm": 0.32521525025367737, "learning_rate": 4.1474137224964833e-05, "loss": 0.0068, "step": 34610 }, { "action_loss": 0.005740294232964516, "epoch": 31.12410071942446, "step": 34610 }, { "epoch": 31.133093525179856, "grad_norm": 0.19293367862701416, "learning_rate": 4.144698428397197e-05, "loss": 0.0079, "step": 34620 }, { "action_loss": 0.002343179890885949, "epoch": 31.133093525179856, "step": 34620 }, { "epoch": 31.142086330935253, "grad_norm": 0.10367549955844879, "learning_rate": 4.1419833941159466e-05, "loss": 0.0038, "step": 34630 }, { "action_loss": 0.0016228649765253067, "epoch": 31.142086330935253, "step": 34630 }, { "epoch": 31.151079136690647, "grad_norm": 0.12054863572120667, "learning_rate": 4.1392686204774846e-05, "loss": 0.0043, "step": 34640 }, { "action_loss": 0.002143118530511856, "epoch": 31.151079136690647, "step": 34640 }, { "epoch": 31.16007194244604, "grad_norm": 0.16522791981697083, "learning_rate": 4.13655410830649e-05, "loss": 0.0042, "step": 34650 }, { "action_loss": 0.005106286611407995, "epoch": 31.16007194244604, "step": 34650 }, { "epoch": 31.16906474820144, "grad_norm": 0.19353839755058289, "learning_rate": 4.1338398584275594e-05, "loss": 0.0082, "step": 34660 }, { "action_loss": 0.014089838601648808, "epoch": 31.16906474820144, "step": 34660 }, { "epoch": 31.178057553956833, "grad_norm": 0.19008830189704895, "learning_rate": 4.1311258716652104e-05, "loss": 0.0081, "step": 34670 }, { "action_loss": 0.006006311625242233, "epoch": 31.178057553956833, "step": 34670 }, { "epoch": 31.18705035971223, "grad_norm": 0.15566860139369965, "learning_rate": 4.128412148843881e-05, "loss": 0.0052, "step": 34680 }, { "action_loss": 0.002884663874283433, "epoch": 31.18705035971223, "step": 34680 }, { "epoch": 31.196043165467625, "grad_norm": 0.25571778416633606, "learning_rate": 4.125698690787926e-05, "loss": 0.0069, "step": 34690 }, { "action_loss": 0.0058122072368860245, "epoch": 31.196043165467625, "step": 34690 }, { "epoch": 31.205035971223023, "grad_norm": 0.18839196860790253, "learning_rate": 4.1229854983216245e-05, "loss": 0.0058, "step": 34700 }, { "action_loss": 0.0036778494250029325, "epoch": 31.205035971223023, "step": 34700 }, { "epoch": 31.214028776978417, "grad_norm": 0.20827674865722656, "learning_rate": 4.120272572269175e-05, "loss": 0.0061, "step": 34710 }, { "action_loss": 0.011010043323040009, "epoch": 31.214028776978417, "step": 34710 }, { "epoch": 31.223021582733814, "grad_norm": 0.1858265995979309, "learning_rate": 4.117559913454687e-05, "loss": 0.0054, "step": 34720 }, { "action_loss": 0.0032042749226093292, "epoch": 31.223021582733814, "step": 34720 }, { "epoch": 31.23201438848921, "grad_norm": 0.16466766595840454, "learning_rate": 4.114847522702201e-05, "loss": 0.0049, "step": 34730 }, { "action_loss": 0.004050715360790491, "epoch": 31.23201438848921, "step": 34730 }, { "epoch": 31.241007194244606, "grad_norm": 0.107801653444767, "learning_rate": 4.112135400835664e-05, "loss": 0.004, "step": 34740 }, { "action_loss": 0.00624746223911643, "epoch": 31.241007194244606, "step": 34740 }, { "epoch": 31.25, "grad_norm": 0.1461847871541977, "learning_rate": 4.109423548678949e-05, "loss": 0.0033, "step": 34750 }, { "action_loss": 0.005655401851981878, "epoch": 31.25, "step": 34750 }, { "epoch": 31.258992805755394, "grad_norm": 0.0769350528717041, "learning_rate": 4.106711967055848e-05, "loss": 0.0041, "step": 34760 }, { "action_loss": 0.003364645643159747, "epoch": 31.258992805755394, "step": 34760 }, { "epoch": 31.26798561151079, "grad_norm": 0.16702386736869812, "learning_rate": 4.1040006567900636e-05, "loss": 0.0048, "step": 34770 }, { "action_loss": 0.001995289698243141, "epoch": 31.26798561151079, "step": 34770 }, { "epoch": 31.276978417266186, "grad_norm": 0.17402684688568115, "learning_rate": 4.101289618705224e-05, "loss": 0.0036, "step": 34780 }, { "action_loss": 0.0024819441605359316, "epoch": 31.276978417266186, "step": 34780 }, { "epoch": 31.285971223021583, "grad_norm": 0.20120863616466522, "learning_rate": 4.0985788536248675e-05, "loss": 0.0042, "step": 34790 }, { "action_loss": 0.0034853878896683455, "epoch": 31.285971223021583, "step": 34790 }, { "epoch": 31.294964028776977, "grad_norm": 0.1474297046661377, "learning_rate": 4.095868362372454e-05, "loss": 0.0036, "step": 34800 }, { "action_loss": 0.006125102285295725, "epoch": 31.294964028776977, "step": 34800 }, { "epoch": 31.303956834532375, "grad_norm": 0.14934957027435303, "learning_rate": 4.0931581457713614e-05, "loss": 0.0044, "step": 34810 }, { "action_loss": 0.008958850055932999, "epoch": 31.303956834532375, "step": 34810 }, { "epoch": 31.31294964028777, "grad_norm": 0.11868130415678024, "learning_rate": 4.09044820464488e-05, "loss": 0.0054, "step": 34820 }, { "action_loss": 0.0014476323267444968, "epoch": 31.31294964028777, "step": 34820 }, { "epoch": 31.321942446043167, "grad_norm": 0.11461054533720016, "learning_rate": 4.087738539816219e-05, "loss": 0.0053, "step": 34830 }, { "action_loss": 0.011450896970927715, "epoch": 31.321942446043167, "step": 34830 }, { "epoch": 31.33093525179856, "grad_norm": 0.2700863182544708, "learning_rate": 4.085029152108501e-05, "loss": 0.0065, "step": 34840 }, { "action_loss": 0.0015552937984466553, "epoch": 31.33093525179856, "step": 34840 }, { "epoch": 31.33992805755396, "grad_norm": 0.2395898401737213, "learning_rate": 4.0823200423447714e-05, "loss": 0.0058, "step": 34850 }, { "action_loss": 0.0030165843199938536, "epoch": 31.33992805755396, "step": 34850 }, { "epoch": 31.348920863309353, "grad_norm": 0.1099943071603775, "learning_rate": 4.079611211347981e-05, "loss": 0.0091, "step": 34860 }, { "action_loss": 0.00454240757972002, "epoch": 31.348920863309353, "step": 34860 }, { "epoch": 31.357913669064747, "grad_norm": 0.1050034761428833, "learning_rate": 4.076902659941002e-05, "loss": 0.0032, "step": 34870 }, { "action_loss": 0.005175555590540171, "epoch": 31.357913669064747, "step": 34870 }, { "epoch": 31.366906474820144, "grad_norm": 0.16799330711364746, "learning_rate": 4.074194388946624e-05, "loss": 0.0038, "step": 34880 }, { "action_loss": 0.002177095040678978, "epoch": 31.366906474820144, "step": 34880 }, { "epoch": 31.37589928057554, "grad_norm": 0.22529001533985138, "learning_rate": 4.071486399187545e-05, "loss": 0.0045, "step": 34890 }, { "action_loss": 0.008152673952281475, "epoch": 31.37589928057554, "step": 34890 }, { "epoch": 31.384892086330936, "grad_norm": 0.10911943018436432, "learning_rate": 4.0687786914863836e-05, "loss": 0.0042, "step": 34900 }, { "action_loss": 0.001721517532132566, "epoch": 31.384892086330936, "step": 34900 }, { "epoch": 31.39388489208633, "grad_norm": 0.22097957134246826, "learning_rate": 4.0660712666656666e-05, "loss": 0.004, "step": 34910 }, { "action_loss": 0.0041173589415848255, "epoch": 31.39388489208633, "step": 34910 }, { "epoch": 31.402877697841728, "grad_norm": 0.13596265017986298, "learning_rate": 4.0633641255478394e-05, "loss": 0.0067, "step": 34920 }, { "action_loss": 0.0020475785713642836, "epoch": 31.402877697841728, "step": 34920 }, { "epoch": 31.41187050359712, "grad_norm": 0.15474171936511993, "learning_rate": 4.0606572689552624e-05, "loss": 0.0041, "step": 34930 }, { "action_loss": 0.0025455341674387455, "epoch": 31.41187050359712, "step": 34930 }, { "epoch": 31.42086330935252, "grad_norm": 0.15955011546611786, "learning_rate": 4.0579506977102036e-05, "loss": 0.0057, "step": 34940 }, { "action_loss": 0.003916407469660044, "epoch": 31.42086330935252, "step": 34940 }, { "epoch": 31.429856115107913, "grad_norm": 0.2229960411787033, "learning_rate": 4.055244412634849e-05, "loss": 0.0051, "step": 34950 }, { "action_loss": 0.0030526432674378157, "epoch": 31.429856115107913, "step": 34950 }, { "epoch": 31.43884892086331, "grad_norm": 0.12946493923664093, "learning_rate": 4.052538414551298e-05, "loss": 0.0051, "step": 34960 }, { "action_loss": 0.011598102748394012, "epoch": 31.43884892086331, "step": 34960 }, { "epoch": 31.447841726618705, "grad_norm": 0.11228328943252563, "learning_rate": 4.0498327042815596e-05, "loss": 0.0051, "step": 34970 }, { "action_loss": 0.0031879779417067766, "epoch": 31.447841726618705, "step": 34970 }, { "epoch": 31.4568345323741, "grad_norm": 0.11676496267318726, "learning_rate": 4.047127282647559e-05, "loss": 0.0043, "step": 34980 }, { "action_loss": 0.004150013905018568, "epoch": 31.4568345323741, "step": 34980 }, { "epoch": 31.465827338129497, "grad_norm": 0.13415931165218353, "learning_rate": 4.04442215047113e-05, "loss": 0.0033, "step": 34990 }, { "action_loss": 0.0032067715656012297, "epoch": 31.465827338129497, "step": 34990 }, { "epoch": 31.47482014388489, "grad_norm": 0.1581064909696579, "learning_rate": 4.041717308574023e-05, "loss": 0.003, "step": 35000 }, { "action_loss": 0.004065258894115686, "epoch": 31.47482014388489, "step": 35000 }, { "epoch": 31.48381294964029, "grad_norm": 0.16603761911392212, "learning_rate": 4.039012757777893e-05, "loss": 0.0044, "step": 35010 }, { "action_loss": 0.0024542303290218115, "epoch": 31.48381294964029, "step": 35010 }, { "epoch": 31.492805755395683, "grad_norm": 0.1329183131456375, "learning_rate": 4.036308498904314e-05, "loss": 0.0028, "step": 35020 }, { "action_loss": 0.004140548408031464, "epoch": 31.492805755395683, "step": 35020 }, { "epoch": 31.50179856115108, "grad_norm": 0.1628052145242691, "learning_rate": 4.033604532774771e-05, "loss": 0.0058, "step": 35030 }, { "action_loss": 0.0042115929536521435, "epoch": 31.50179856115108, "step": 35030 }, { "epoch": 31.510791366906474, "grad_norm": 0.09464331716299057, "learning_rate": 4.030900860210652e-05, "loss": 0.0034, "step": 35040 }, { "action_loss": 0.014816991053521633, "epoch": 31.510791366906474, "step": 35040 }, { "epoch": 31.519784172661872, "grad_norm": 0.125435933470726, "learning_rate": 4.028197482033266e-05, "loss": 0.0055, "step": 35050 }, { "action_loss": 0.0022263871505856514, "epoch": 31.519784172661872, "step": 35050 }, { "epoch": 31.528776978417266, "grad_norm": 0.14212153851985931, "learning_rate": 4.0254943990638246e-05, "loss": 0.0067, "step": 35060 }, { "action_loss": 0.0035998208913952112, "epoch": 31.528776978417266, "step": 35060 }, { "epoch": 31.53776978417266, "grad_norm": 0.19687849283218384, "learning_rate": 4.022791612123454e-05, "loss": 0.0091, "step": 35070 }, { "action_loss": 0.0028145399410277605, "epoch": 31.53776978417266, "step": 35070 }, { "epoch": 31.546762589928058, "grad_norm": 0.09747713059186935, "learning_rate": 4.020089122033192e-05, "loss": 0.0047, "step": 35080 }, { "action_loss": 0.00450294092297554, "epoch": 31.546762589928058, "step": 35080 }, { "epoch": 31.555755395683452, "grad_norm": 0.10412998497486115, "learning_rate": 4.01738692961398e-05, "loss": 0.0097, "step": 35090 }, { "action_loss": 0.0011160830035805702, "epoch": 31.555755395683452, "step": 35090 }, { "epoch": 31.56474820143885, "grad_norm": 0.27203646302223206, "learning_rate": 4.014685035686675e-05, "loss": 0.0035, "step": 35100 }, { "action_loss": 0.0034809578210115433, "epoch": 31.56474820143885, "step": 35100 }, { "epoch": 31.573741007194243, "grad_norm": 0.10082171112298965, "learning_rate": 4.011983441072039e-05, "loss": 0.0049, "step": 35110 }, { "action_loss": 0.0036816566716879606, "epoch": 31.573741007194243, "step": 35110 }, { "epoch": 31.58273381294964, "grad_norm": 0.3423762321472168, "learning_rate": 4.0092821465907485e-05, "loss": 0.0055, "step": 35120 }, { "action_loss": 0.002520256442949176, "epoch": 31.58273381294964, "step": 35120 }, { "epoch": 31.591726618705035, "grad_norm": 0.17778751254081726, "learning_rate": 4.006581153063383e-05, "loss": 0.0041, "step": 35130 }, { "action_loss": 0.003148705931380391, "epoch": 31.591726618705035, "step": 35130 }, { "epoch": 31.600719424460433, "grad_norm": 0.1397748440504074, "learning_rate": 4.003880461310432e-05, "loss": 0.0055, "step": 35140 }, { "action_loss": 0.002482358133420348, "epoch": 31.600719424460433, "step": 35140 }, { "epoch": 31.609712230215827, "grad_norm": 0.24455560743808746, "learning_rate": 4.001180072152298e-05, "loss": 0.0063, "step": 35150 }, { "action_loss": 0.0031293921638280153, "epoch": 31.609712230215827, "step": 35150 }, { "epoch": 31.618705035971225, "grad_norm": 0.20393270254135132, "learning_rate": 3.998479986409285e-05, "loss": 0.0067, "step": 35160 }, { "action_loss": 0.003104493720456958, "epoch": 31.618705035971225, "step": 35160 }, { "epoch": 31.62769784172662, "grad_norm": 0.11572887748479843, "learning_rate": 3.995780204901607e-05, "loss": 0.0042, "step": 35170 }, { "action_loss": 0.003640148788690567, "epoch": 31.62769784172662, "step": 35170 }, { "epoch": 31.636690647482013, "grad_norm": 0.10793471336364746, "learning_rate": 3.993080728449391e-05, "loss": 0.0033, "step": 35180 }, { "action_loss": 0.0025951280258595943, "epoch": 31.636690647482013, "step": 35180 }, { "epoch": 31.64568345323741, "grad_norm": 0.17629823088645935, "learning_rate": 3.990381557872661e-05, "loss": 0.0026, "step": 35190 }, { "action_loss": 0.008916824124753475, "epoch": 31.64568345323741, "step": 35190 }, { "epoch": 31.654676258992804, "grad_norm": 0.0787639170885086, "learning_rate": 3.987682693991359e-05, "loss": 0.0036, "step": 35200 }, { "action_loss": 0.004124096129089594, "epoch": 31.654676258992804, "step": 35200 }, { "epoch": 31.663669064748202, "grad_norm": 0.08910495787858963, "learning_rate": 3.9849841376253226e-05, "loss": 0.0031, "step": 35210 }, { "action_loss": 0.0014525674050673842, "epoch": 31.663669064748202, "step": 35210 }, { "epoch": 31.672661870503596, "grad_norm": 0.1114693358540535, "learning_rate": 3.982285889594306e-05, "loss": 0.0086, "step": 35220 }, { "action_loss": 0.0028390793595463037, "epoch": 31.672661870503596, "step": 35220 }, { "epoch": 31.681654676258994, "grad_norm": 0.13243654370307922, "learning_rate": 3.9795879507179665e-05, "loss": 0.004, "step": 35230 }, { "action_loss": 0.008446543477475643, "epoch": 31.681654676258994, "step": 35230 }, { "epoch": 31.690647482014388, "grad_norm": 0.13875900208950043, "learning_rate": 3.9768903218158634e-05, "loss": 0.0037, "step": 35240 }, { "action_loss": 0.002187282545492053, "epoch": 31.690647482014388, "step": 35240 }, { "epoch": 31.699640287769785, "grad_norm": 0.14259810745716095, "learning_rate": 3.974193003707468e-05, "loss": 0.0045, "step": 35250 }, { "action_loss": 0.0035620436538010836, "epoch": 31.699640287769785, "step": 35250 }, { "epoch": 31.70863309352518, "grad_norm": 0.13727720081806183, "learning_rate": 3.971495997212152e-05, "loss": 0.0047, "step": 35260 }, { "action_loss": 0.005143387708812952, "epoch": 31.70863309352518, "step": 35260 }, { "epoch": 31.717625899280577, "grad_norm": 0.19821108877658844, "learning_rate": 3.9687993031491985e-05, "loss": 0.0052, "step": 35270 }, { "action_loss": 0.007917840965092182, "epoch": 31.717625899280577, "step": 35270 }, { "epoch": 31.72661870503597, "grad_norm": 0.17092038691043854, "learning_rate": 3.966102922337787e-05, "loss": 0.0048, "step": 35280 }, { "action_loss": 0.0035103207919746637, "epoch": 31.72661870503597, "step": 35280 }, { "epoch": 31.735611510791365, "grad_norm": 0.1609119474887848, "learning_rate": 3.963406855597009e-05, "loss": 0.0035, "step": 35290 }, { "action_loss": 0.004565935116261244, "epoch": 31.735611510791365, "step": 35290 }, { "epoch": 31.744604316546763, "grad_norm": 0.11599146574735641, "learning_rate": 3.960711103745861e-05, "loss": 0.0043, "step": 35300 }, { "action_loss": 0.009545602835714817, "epoch": 31.744604316546763, "step": 35300 }, { "epoch": 31.753597122302157, "grad_norm": 0.16738255321979523, "learning_rate": 3.958015667603237e-05, "loss": 0.0061, "step": 35310 }, { "action_loss": 0.0041406722739338875, "epoch": 31.753597122302157, "step": 35310 }, { "epoch": 31.762589928057555, "grad_norm": 0.23429594933986664, "learning_rate": 3.955320547987943e-05, "loss": 0.0041, "step": 35320 }, { "action_loss": 0.0034993968438357115, "epoch": 31.762589928057555, "step": 35320 }, { "epoch": 31.77158273381295, "grad_norm": 0.2492520809173584, "learning_rate": 3.952625745718681e-05, "loss": 0.0082, "step": 35330 }, { "action_loss": 0.012195773422718048, "epoch": 31.77158273381295, "step": 35330 }, { "epoch": 31.780575539568346, "grad_norm": 0.12383107841014862, "learning_rate": 3.949931261614064e-05, "loss": 0.0062, "step": 35340 }, { "action_loss": 0.00294368714094162, "epoch": 31.780575539568346, "step": 35340 }, { "epoch": 31.78956834532374, "grad_norm": 0.1271774172782898, "learning_rate": 3.947237096492605e-05, "loss": 0.0032, "step": 35350 }, { "action_loss": 0.006187796592712402, "epoch": 31.78956834532374, "step": 35350 }, { "epoch": 31.798561151079138, "grad_norm": 0.17293092608451843, "learning_rate": 3.944543251172719e-05, "loss": 0.0058, "step": 35360 }, { "action_loss": 0.005260121542960405, "epoch": 31.798561151079138, "step": 35360 }, { "epoch": 31.807553956834532, "grad_norm": 0.1330040693283081, "learning_rate": 3.941849726472725e-05, "loss": 0.0034, "step": 35370 }, { "action_loss": 0.011233270168304443, "epoch": 31.807553956834532, "step": 35370 }, { "epoch": 31.81654676258993, "grad_norm": 0.14722062647342682, "learning_rate": 3.939156523210846e-05, "loss": 0.0041, "step": 35380 }, { "action_loss": 0.009030613116919994, "epoch": 31.81654676258993, "step": 35380 }, { "epoch": 31.825539568345324, "grad_norm": 0.14094553887844086, "learning_rate": 3.9364636422052046e-05, "loss": 0.0077, "step": 35390 }, { "action_loss": 0.002656827913597226, "epoch": 31.825539568345324, "step": 35390 }, { "epoch": 31.834532374100718, "grad_norm": 0.1679813712835312, "learning_rate": 3.933771084273828e-05, "loss": 0.0083, "step": 35400 }, { "action_loss": 0.0070950426161289215, "epoch": 31.834532374100718, "step": 35400 }, { "epoch": 31.843525179856115, "grad_norm": 0.2228822112083435, "learning_rate": 3.931078850234643e-05, "loss": 0.0065, "step": 35410 }, { "action_loss": 0.0019503766670823097, "epoch": 31.843525179856115, "step": 35410 }, { "epoch": 31.85251798561151, "grad_norm": 0.19722072780132294, "learning_rate": 3.928386940905483e-05, "loss": 0.0039, "step": 35420 }, { "action_loss": 0.00265890103764832, "epoch": 31.85251798561151, "step": 35420 }, { "epoch": 31.861510791366907, "grad_norm": 0.14523889124393463, "learning_rate": 3.925695357104073e-05, "loss": 0.0039, "step": 35430 }, { "action_loss": 0.01042137760668993, "epoch": 31.861510791366907, "step": 35430 }, { "epoch": 31.8705035971223, "grad_norm": 0.21476039290428162, "learning_rate": 3.923004099648049e-05, "loss": 0.0059, "step": 35440 }, { "action_loss": 0.0017739887116476893, "epoch": 31.8705035971223, "step": 35440 }, { "epoch": 31.8794964028777, "grad_norm": 0.16246788203716278, "learning_rate": 3.920313169354944e-05, "loss": 0.0035, "step": 35450 }, { "action_loss": 0.001603678218089044, "epoch": 31.8794964028777, "step": 35450 }, { "epoch": 31.888489208633093, "grad_norm": 0.16323387622833252, "learning_rate": 3.9176225670421897e-05, "loss": 0.0094, "step": 35460 }, { "action_loss": 0.002664597937837243, "epoch": 31.888489208633093, "step": 35460 }, { "epoch": 31.89748201438849, "grad_norm": 0.14873720705509186, "learning_rate": 3.9149322935271224e-05, "loss": 0.0042, "step": 35470 }, { "action_loss": 0.009439348243176937, "epoch": 31.89748201438849, "step": 35470 }, { "epoch": 31.906474820143885, "grad_norm": 0.2279013693332672, "learning_rate": 3.9122423496269725e-05, "loss": 0.0066, "step": 35480 }, { "action_loss": 0.0037518395110964775, "epoch": 31.906474820143885, "step": 35480 }, { "epoch": 31.915467625899282, "grad_norm": 0.18367178738117218, "learning_rate": 3.909552736158877e-05, "loss": 0.0051, "step": 35490 }, { "action_loss": 0.004890019539743662, "epoch": 31.915467625899282, "step": 35490 }, { "epoch": 31.924460431654676, "grad_norm": 0.12065714597702026, "learning_rate": 3.90686345393987e-05, "loss": 0.0049, "step": 35500 }, { "action_loss": 0.0024232545401901007, "epoch": 31.924460431654676, "step": 35500 }, { "epoch": 31.93345323741007, "grad_norm": 0.07614295184612274, "learning_rate": 3.9041745037868816e-05, "loss": 0.003, "step": 35510 }, { "action_loss": 0.001529183704406023, "epoch": 31.93345323741007, "step": 35510 }, { "epoch": 31.942446043165468, "grad_norm": 0.2064821422100067, "learning_rate": 3.9014858865167465e-05, "loss": 0.0058, "step": 35520 }, { "action_loss": 0.00553510757163167, "epoch": 31.942446043165468, "step": 35520 }, { "epoch": 31.951438848920862, "grad_norm": 0.1142628937959671, "learning_rate": 3.8987976029461935e-05, "loss": 0.0089, "step": 35530 }, { "action_loss": 0.002183425473049283, "epoch": 31.951438848920862, "step": 35530 }, { "epoch": 31.96043165467626, "grad_norm": 0.14771832525730133, "learning_rate": 3.896109653891853e-05, "loss": 0.0056, "step": 35540 }, { "action_loss": 0.005507452879101038, "epoch": 31.96043165467626, "step": 35540 }, { "epoch": 31.969424460431654, "grad_norm": 0.10257081687450409, "learning_rate": 3.893422040170254e-05, "loss": 0.0033, "step": 35550 }, { "action_loss": 0.008599292486906052, "epoch": 31.969424460431654, "step": 35550 }, { "epoch": 31.97841726618705, "grad_norm": 0.11069037765264511, "learning_rate": 3.8907347625978207e-05, "loss": 0.0088, "step": 35560 }, { "action_loss": 0.015501023270189762, "epoch": 31.97841726618705, "step": 35560 }, { "epoch": 31.987410071942445, "grad_norm": 0.17515869438648224, "learning_rate": 3.88804782199088e-05, "loss": 0.0049, "step": 35570 }, { "action_loss": 0.008967621251940727, "epoch": 31.987410071942445, "step": 35570 }, { "epoch": 31.996402877697843, "grad_norm": 0.1871839463710785, "learning_rate": 3.8853612191656495e-05, "loss": 0.0036, "step": 35580 }, { "action_loss": 0.0027378525119274855, "epoch": 31.996402877697843, "step": 35580 }, { "epoch": 32.00539568345324, "grad_norm": 0.12248332798480988, "learning_rate": 3.88267495493825e-05, "loss": 0.0052, "step": 35590 }, { "action_loss": 0.002789108082652092, "epoch": 32.00539568345324, "step": 35590 }, { "epoch": 32.014388489208635, "grad_norm": 0.15375716984272003, "learning_rate": 3.8799890301247004e-05, "loss": 0.0075, "step": 35600 }, { "action_loss": 0.01475448627024889, "epoch": 32.014388489208635, "step": 35600 }, { "epoch": 32.023381294964025, "grad_norm": 0.18714243173599243, "learning_rate": 3.8773034455409096e-05, "loss": 0.0056, "step": 35610 }, { "action_loss": 0.007723717484623194, "epoch": 32.023381294964025, "step": 35610 }, { "epoch": 32.03237410071942, "grad_norm": 0.1970641314983368, "learning_rate": 3.8746182020026904e-05, "loss": 0.0054, "step": 35620 }, { "action_loss": 0.005116863176226616, "epoch": 32.03237410071942, "step": 35620 }, { "epoch": 32.04136690647482, "grad_norm": 0.11908687651157379, "learning_rate": 3.871933300325745e-05, "loss": 0.0033, "step": 35630 }, { "action_loss": 0.0037733970675617456, "epoch": 32.04136690647482, "step": 35630 }, { "epoch": 32.05035971223022, "grad_norm": 0.1791248768568039, "learning_rate": 3.869248741325679e-05, "loss": 0.0047, "step": 35640 }, { "action_loss": 0.005445439368486404, "epoch": 32.05035971223022, "step": 35640 }, { "epoch": 32.05935251798561, "grad_norm": 0.09893146902322769, "learning_rate": 3.866564525817992e-05, "loss": 0.004, "step": 35650 }, { "action_loss": 0.00457647442817688, "epoch": 32.05935251798561, "step": 35650 }, { "epoch": 32.068345323741006, "grad_norm": 0.12138485163450241, "learning_rate": 3.8638806546180725e-05, "loss": 0.0044, "step": 35660 }, { "action_loss": 0.0013937827898189425, "epoch": 32.068345323741006, "step": 35660 }, { "epoch": 32.077338129496404, "grad_norm": 0.06486456096172333, "learning_rate": 3.861197128541213e-05, "loss": 0.0065, "step": 35670 }, { "action_loss": 0.00535038486123085, "epoch": 32.077338129496404, "step": 35670 }, { "epoch": 32.0863309352518, "grad_norm": 0.10925783216953278, "learning_rate": 3.858513948402599e-05, "loss": 0.0058, "step": 35680 }, { "action_loss": 0.0028356406837701797, "epoch": 32.0863309352518, "step": 35680 }, { "epoch": 32.09532374100719, "grad_norm": 0.20548856258392334, "learning_rate": 3.8558311150173077e-05, "loss": 0.0138, "step": 35690 }, { "action_loss": 0.0029771721456199884, "epoch": 32.09532374100719, "step": 35690 }, { "epoch": 32.10431654676259, "grad_norm": 0.21016855537891388, "learning_rate": 3.853148629200312e-05, "loss": 0.0083, "step": 35700 }, { "action_loss": 0.0017919702222570777, "epoch": 32.10431654676259, "step": 35700 }, { "epoch": 32.11330935251799, "grad_norm": 0.17470450699329376, "learning_rate": 3.850466491766482e-05, "loss": 0.0051, "step": 35710 }, { "action_loss": 0.0051908656023442745, "epoch": 32.11330935251799, "step": 35710 }, { "epoch": 32.12230215827338, "grad_norm": 0.16059213876724243, "learning_rate": 3.847784703530583e-05, "loss": 0.0041, "step": 35720 }, { "action_loss": 0.0027148674707859755, "epoch": 32.12230215827338, "step": 35720 }, { "epoch": 32.131294964028775, "grad_norm": 0.10480999201536179, "learning_rate": 3.845103265307266e-05, "loss": 0.005, "step": 35730 }, { "action_loss": 0.0018352946499362588, "epoch": 32.131294964028775, "step": 35730 }, { "epoch": 32.14028776978417, "grad_norm": 0.09906525164842606, "learning_rate": 3.842422177911086e-05, "loss": 0.0034, "step": 35740 }, { "action_loss": 0.004372775088995695, "epoch": 32.14028776978417, "step": 35740 }, { "epoch": 32.14928057553957, "grad_norm": 0.12119949609041214, "learning_rate": 3.8397414421564826e-05, "loss": 0.0052, "step": 35750 }, { "action_loss": 0.004442901350557804, "epoch": 32.14928057553957, "step": 35750 }, { "epoch": 32.15827338129496, "grad_norm": 0.1646588295698166, "learning_rate": 3.8370610588577935e-05, "loss": 0.0052, "step": 35760 }, { "action_loss": 0.00495883310213685, "epoch": 32.15827338129496, "step": 35760 }, { "epoch": 32.16726618705036, "grad_norm": 0.1478673219680786, "learning_rate": 3.834381028829251e-05, "loss": 0.0037, "step": 35770 }, { "action_loss": 0.005107708740979433, "epoch": 32.16726618705036, "step": 35770 }, { "epoch": 32.17625899280576, "grad_norm": 0.13037900626659393, "learning_rate": 3.8317013528849745e-05, "loss": 0.0037, "step": 35780 }, { "action_loss": 0.005719877313822508, "epoch": 32.17625899280576, "step": 35780 }, { "epoch": 32.185251798561154, "grad_norm": 0.11798251420259476, "learning_rate": 3.8290220318389815e-05, "loss": 0.0077, "step": 35790 }, { "action_loss": 0.0024848945904523134, "epoch": 32.185251798561154, "step": 35790 }, { "epoch": 32.194244604316545, "grad_norm": 0.11038168519735336, "learning_rate": 3.8263430665051746e-05, "loss": 0.0038, "step": 35800 }, { "action_loss": 0.002480671741068363, "epoch": 32.194244604316545, "step": 35800 }, { "epoch": 32.20323741007194, "grad_norm": 0.15423636138439178, "learning_rate": 3.8236644576973554e-05, "loss": 0.0036, "step": 35810 }, { "action_loss": 0.002978746546432376, "epoch": 32.20323741007194, "step": 35810 }, { "epoch": 32.21223021582734, "grad_norm": 0.10018958151340485, "learning_rate": 3.820986206229217e-05, "loss": 0.0038, "step": 35820 }, { "action_loss": 0.008195077069103718, "epoch": 32.21223021582734, "step": 35820 }, { "epoch": 32.22122302158273, "grad_norm": 0.15490444004535675, "learning_rate": 3.8183083129143384e-05, "loss": 0.0034, "step": 35830 }, { "action_loss": 0.002162885619327426, "epoch": 32.22122302158273, "step": 35830 }, { "epoch": 32.23021582733813, "grad_norm": 0.1524556577205658, "learning_rate": 3.815630778566193e-05, "loss": 0.0135, "step": 35840 }, { "action_loss": 0.00676553463563323, "epoch": 32.23021582733813, "step": 35840 }, { "epoch": 32.239208633093526, "grad_norm": 0.11484084278345108, "learning_rate": 3.812953603998145e-05, "loss": 0.0062, "step": 35850 }, { "action_loss": 0.002237051958218217, "epoch": 32.239208633093526, "step": 35850 }, { "epoch": 32.24820143884892, "grad_norm": 0.17441943287849426, "learning_rate": 3.8102767900234504e-05, "loss": 0.0046, "step": 35860 }, { "action_loss": 0.002285827649757266, "epoch": 32.24820143884892, "step": 35860 }, { "epoch": 32.257194244604314, "grad_norm": 0.10983028262853622, "learning_rate": 3.807600337455256e-05, "loss": 0.0053, "step": 35870 }, { "action_loss": 0.01167913619428873, "epoch": 32.257194244604314, "step": 35870 }, { "epoch": 32.26618705035971, "grad_norm": 0.23653216660022736, "learning_rate": 3.804924247106593e-05, "loss": 0.0071, "step": 35880 }, { "action_loss": 0.006222445052117109, "epoch": 32.26618705035971, "step": 35880 }, { "epoch": 32.27517985611511, "grad_norm": 0.15780960023403168, "learning_rate": 3.8022485197903925e-05, "loss": 0.0048, "step": 35890 }, { "action_loss": 0.0013848142698407173, "epoch": 32.27517985611511, "step": 35890 }, { "epoch": 32.28417266187051, "grad_norm": 0.13515807688236237, "learning_rate": 3.799573156319464e-05, "loss": 0.0044, "step": 35900 }, { "action_loss": 0.008508112281560898, "epoch": 32.28417266187051, "step": 35900 }, { "epoch": 32.2931654676259, "grad_norm": 0.172769695520401, "learning_rate": 3.796898157506515e-05, "loss": 0.0057, "step": 35910 }, { "action_loss": 0.0025609948206692934, "epoch": 32.2931654676259, "step": 35910 }, { "epoch": 32.302158273381295, "grad_norm": 0.13552594184875488, "learning_rate": 3.794223524164143e-05, "loss": 0.0046, "step": 35920 }, { "action_loss": 0.0015610050177201629, "epoch": 32.302158273381295, "step": 35920 }, { "epoch": 32.31115107913669, "grad_norm": 0.21155758202075958, "learning_rate": 3.7915492571048245e-05, "loss": 0.0072, "step": 35930 }, { "action_loss": 0.00534861208871007, "epoch": 32.31115107913669, "step": 35930 }, { "epoch": 32.32014388489208, "grad_norm": 0.13571663200855255, "learning_rate": 3.788875357140937e-05, "loss": 0.0092, "step": 35940 }, { "action_loss": 0.007796250283718109, "epoch": 32.32014388489208, "step": 35940 }, { "epoch": 32.32913669064748, "grad_norm": 0.14965246617794037, "learning_rate": 3.786201825084736e-05, "loss": 0.0046, "step": 35950 }, { "action_loss": 0.001644608681090176, "epoch": 32.32913669064748, "step": 35950 }, { "epoch": 32.33812949640288, "grad_norm": 0.20967863500118256, "learning_rate": 3.783528661748372e-05, "loss": 0.004, "step": 35960 }, { "action_loss": 0.0028259207028895617, "epoch": 32.33812949640288, "step": 35960 }, { "epoch": 32.347122302158276, "grad_norm": 0.14635886251926422, "learning_rate": 3.780855867943882e-05, "loss": 0.0072, "step": 35970 }, { "action_loss": 0.001792785245925188, "epoch": 32.347122302158276, "step": 35970 }, { "epoch": 32.356115107913666, "grad_norm": 0.1382027417421341, "learning_rate": 3.778183444483189e-05, "loss": 0.0044, "step": 35980 }, { "action_loss": 0.005838335957378149, "epoch": 32.356115107913666, "step": 35980 }, { "epoch": 32.365107913669064, "grad_norm": 0.2111128270626068, "learning_rate": 3.775511392178108e-05, "loss": 0.0045, "step": 35990 }, { "action_loss": 0.003439701162278652, "epoch": 32.365107913669064, "step": 35990 }, { "epoch": 32.37410071942446, "grad_norm": 0.168777197599411, "learning_rate": 3.772839711840332e-05, "loss": 0.0063, "step": 36000 }, { "action_loss": 0.00523735536262393, "epoch": 32.37410071942446, "step": 36000 }, { "epoch": 32.38309352517986, "grad_norm": 0.2073218822479248, "learning_rate": 3.7701684042814515e-05, "loss": 0.0062, "step": 36010 }, { "action_loss": 0.009375366382300854, "epoch": 32.38309352517986, "step": 36010 }, { "epoch": 32.39208633093525, "grad_norm": 0.1647033542394638, "learning_rate": 3.76749747031294e-05, "loss": 0.0039, "step": 36020 }, { "action_loss": 0.035419754683971405, "epoch": 32.39208633093525, "step": 36020 }, { "epoch": 32.40107913669065, "grad_norm": 0.18353940546512604, "learning_rate": 3.764826910746152e-05, "loss": 0.0121, "step": 36030 }, { "action_loss": 0.0022948274854570627, "epoch": 32.40107913669065, "step": 36030 }, { "epoch": 32.410071942446045, "grad_norm": 0.1160508319735527, "learning_rate": 3.762156726392338e-05, "loss": 0.0035, "step": 36040 }, { "action_loss": 0.0028590757865458727, "epoch": 32.410071942446045, "step": 36040 }, { "epoch": 32.419064748201436, "grad_norm": 0.15207412838935852, "learning_rate": 3.759486918062625e-05, "loss": 0.0094, "step": 36050 }, { "action_loss": 0.002898117760196328, "epoch": 32.419064748201436, "step": 36050 }, { "epoch": 32.42805755395683, "grad_norm": 0.12597250938415527, "learning_rate": 3.756817486568033e-05, "loss": 0.0046, "step": 36060 }, { "action_loss": 0.0017735747387632728, "epoch": 32.42805755395683, "step": 36060 }, { "epoch": 32.43705035971223, "grad_norm": 0.21195589005947113, "learning_rate": 3.7541484327194654e-05, "loss": 0.0056, "step": 36070 }, { "action_loss": 0.0021892404183745384, "epoch": 32.43705035971223, "step": 36070 }, { "epoch": 32.44604316546763, "grad_norm": 0.10742732882499695, "learning_rate": 3.751479757327707e-05, "loss": 0.0074, "step": 36080 }, { "action_loss": 0.0028156128246337175, "epoch": 32.44604316546763, "step": 36080 }, { "epoch": 32.45503597122302, "grad_norm": 0.18363559246063232, "learning_rate": 3.7488114612034345e-05, "loss": 0.0058, "step": 36090 }, { "action_loss": 0.0008951133931986988, "epoch": 32.45503597122302, "step": 36090 }, { "epoch": 32.46402877697842, "grad_norm": 0.11778587847948074, "learning_rate": 3.7461435451572044e-05, "loss": 0.0065, "step": 36100 }, { "action_loss": 0.007661186158657074, "epoch": 32.46402877697842, "step": 36100 }, { "epoch": 32.473021582733814, "grad_norm": 0.09038609266281128, "learning_rate": 3.743476009999459e-05, "loss": 0.0061, "step": 36110 }, { "action_loss": 0.003978230990469456, "epoch": 32.473021582733814, "step": 36110 }, { "epoch": 32.48201438848921, "grad_norm": 0.17673839628696442, "learning_rate": 3.7408088565405245e-05, "loss": 0.0066, "step": 36120 }, { "action_loss": 0.002247404307126999, "epoch": 32.48201438848921, "step": 36120 }, { "epoch": 32.4910071942446, "grad_norm": 0.14295615255832672, "learning_rate": 3.738142085590612e-05, "loss": 0.0045, "step": 36130 }, { "action_loss": 0.002134142443537712, "epoch": 32.4910071942446, "step": 36130 }, { "epoch": 32.5, "grad_norm": 0.1902933120727539, "learning_rate": 3.7354756979598194e-05, "loss": 0.0037, "step": 36140 }, { "action_loss": 0.003562573343515396, "epoch": 32.5, "step": 36140 }, { "epoch": 32.5089928057554, "grad_norm": 0.12282002717256546, "learning_rate": 3.7328096944581187e-05, "loss": 0.0038, "step": 36150 }, { "action_loss": 0.0013676360249519348, "epoch": 32.5089928057554, "step": 36150 }, { "epoch": 32.51798561151079, "grad_norm": 0.16909490525722504, "learning_rate": 3.730144075895377e-05, "loss": 0.0057, "step": 36160 }, { "action_loss": 0.015185728669166565, "epoch": 32.51798561151079, "step": 36160 }, { "epoch": 32.526978417266186, "grad_norm": 0.15637296438217163, "learning_rate": 3.727478843081335e-05, "loss": 0.0053, "step": 36170 }, { "action_loss": 0.0018989486852660775, "epoch": 32.526978417266186, "step": 36170 }, { "epoch": 32.53597122302158, "grad_norm": 0.15359264612197876, "learning_rate": 3.72481399682562e-05, "loss": 0.0037, "step": 36180 }, { "action_loss": 0.04258938506245613, "epoch": 32.53597122302158, "step": 36180 }, { "epoch": 32.54496402877698, "grad_norm": 0.1837637573480606, "learning_rate": 3.722149537937747e-05, "loss": 0.0081, "step": 36190 }, { "action_loss": 0.0019121180521324277, "epoch": 32.54496402877698, "step": 36190 }, { "epoch": 32.55395683453237, "grad_norm": 0.14792917668819427, "learning_rate": 3.7194854672271015e-05, "loss": 0.0037, "step": 36200 }, { "action_loss": 0.009261737577617168, "epoch": 32.55395683453237, "step": 36200 }, { "epoch": 32.56294964028777, "grad_norm": 0.1791452020406723, "learning_rate": 3.7168217855029644e-05, "loss": 0.0054, "step": 36210 }, { "action_loss": 0.005161740351468325, "epoch": 32.56294964028777, "step": 36210 }, { "epoch": 32.57194244604317, "grad_norm": 0.1558981090784073, "learning_rate": 3.7141584935744856e-05, "loss": 0.0056, "step": 36220 }, { "action_loss": 0.007074102759361267, "epoch": 32.57194244604317, "step": 36220 }, { "epoch": 32.580935251798564, "grad_norm": 0.18262238800525665, "learning_rate": 3.7114955922507055e-05, "loss": 0.0038, "step": 36230 }, { "action_loss": 0.0013915171148255467, "epoch": 32.580935251798564, "step": 36230 }, { "epoch": 32.589928057553955, "grad_norm": 0.14784222841262817, "learning_rate": 3.708833082340545e-05, "loss": 0.0059, "step": 36240 }, { "action_loss": 0.0013894852017983794, "epoch": 32.589928057553955, "step": 36240 }, { "epoch": 32.59892086330935, "grad_norm": 0.09960020333528519, "learning_rate": 3.7061709646528034e-05, "loss": 0.0028, "step": 36250 }, { "action_loss": 0.00825984962284565, "epoch": 32.59892086330935, "step": 36250 }, { "epoch": 32.60791366906475, "grad_norm": 0.14896702766418457, "learning_rate": 3.7035092399961604e-05, "loss": 0.0042, "step": 36260 }, { "action_loss": 0.003843436250463128, "epoch": 32.60791366906475, "step": 36260 }, { "epoch": 32.61690647482014, "grad_norm": 0.21305438876152039, "learning_rate": 3.700847909179177e-05, "loss": 0.0055, "step": 36270 }, { "action_loss": 0.005665413569658995, "epoch": 32.61690647482014, "step": 36270 }, { "epoch": 32.62589928057554, "grad_norm": 0.12852218747138977, "learning_rate": 3.698186973010297e-05, "loss": 0.0065, "step": 36280 }, { "action_loss": 0.009685132652521133, "epoch": 32.62589928057554, "step": 36280 }, { "epoch": 32.634892086330936, "grad_norm": 0.09194786101579666, "learning_rate": 3.695526432297844e-05, "loss": 0.0043, "step": 36290 }, { "action_loss": 0.003960093948990107, "epoch": 32.634892086330936, "step": 36290 }, { "epoch": 32.643884892086334, "grad_norm": 0.14825043082237244, "learning_rate": 3.692866287850017e-05, "loss": 0.003, "step": 36300 }, { "action_loss": 0.009583279490470886, "epoch": 32.643884892086334, "step": 36300 }, { "epoch": 32.652877697841724, "grad_norm": 0.13250939548015594, "learning_rate": 3.6902065404749006e-05, "loss": 0.0048, "step": 36310 }, { "action_loss": 0.008795687928795815, "epoch": 32.652877697841724, "step": 36310 }, { "epoch": 32.66187050359712, "grad_norm": 0.18737544119358063, "learning_rate": 3.6875471909804516e-05, "loss": 0.0078, "step": 36320 }, { "action_loss": 0.0010149338049814105, "epoch": 32.66187050359712, "step": 36320 }, { "epoch": 32.67086330935252, "grad_norm": 0.08594373613595963, "learning_rate": 3.6848882401745135e-05, "loss": 0.002, "step": 36330 }, { "action_loss": 0.010201162658631802, "epoch": 32.67086330935252, "step": 36330 }, { "epoch": 32.67985611510792, "grad_norm": 0.16290263831615448, "learning_rate": 3.682229688864806e-05, "loss": 0.0054, "step": 36340 }, { "action_loss": 0.002084905980154872, "epoch": 32.67985611510792, "step": 36340 }, { "epoch": 32.68884892086331, "grad_norm": 0.11106652021408081, "learning_rate": 3.6795715378589235e-05, "loss": 0.0035, "step": 36350 }, { "action_loss": 0.0013897611061111093, "epoch": 32.68884892086331, "step": 36350 }, { "epoch": 32.697841726618705, "grad_norm": 0.16298866271972656, "learning_rate": 3.676913787964345e-05, "loss": 0.0085, "step": 36360 }, { "action_loss": 0.0021430521737784147, "epoch": 32.697841726618705, "step": 36360 }, { "epoch": 32.7068345323741, "grad_norm": 0.15189366042613983, "learning_rate": 3.674256439988423e-05, "loss": 0.0056, "step": 36370 }, { "action_loss": 0.0027056941762566566, "epoch": 32.7068345323741, "step": 36370 }, { "epoch": 32.71582733812949, "grad_norm": 0.19806304574012756, "learning_rate": 3.6715994947383904e-05, "loss": 0.0056, "step": 36380 }, { "action_loss": 0.001834110007621348, "epoch": 32.71582733812949, "step": 36380 }, { "epoch": 32.72482014388489, "grad_norm": 0.15143874287605286, "learning_rate": 3.668942953021357e-05, "loss": 0.0037, "step": 36390 }, { "action_loss": 0.01136811077594757, "epoch": 32.72482014388489, "step": 36390 }, { "epoch": 32.73381294964029, "grad_norm": 0.17759355902671814, "learning_rate": 3.66628681564431e-05, "loss": 0.0042, "step": 36400 }, { "action_loss": 0.001782359555363655, "epoch": 32.73381294964029, "step": 36400 }, { "epoch": 32.742805755395686, "grad_norm": 0.08930978178977966, "learning_rate": 3.663631083414114e-05, "loss": 0.0025, "step": 36410 }, { "action_loss": 0.0023055800702422857, "epoch": 32.742805755395686, "step": 36410 }, { "epoch": 32.75179856115108, "grad_norm": 0.17222900688648224, "learning_rate": 3.660975757137509e-05, "loss": 0.0033, "step": 36420 }, { "action_loss": 0.001766116707585752, "epoch": 32.75179856115108, "step": 36420 }, { "epoch": 32.760791366906474, "grad_norm": 0.12543629109859467, "learning_rate": 3.658320837621114e-05, "loss": 0.0034, "step": 36430 }, { "action_loss": 0.0010773155372589827, "epoch": 32.760791366906474, "step": 36430 }, { "epoch": 32.76978417266187, "grad_norm": 0.1298225373029709, "learning_rate": 3.655666325671426e-05, "loss": 0.0053, "step": 36440 }, { "action_loss": 0.00219193403609097, "epoch": 32.76978417266187, "step": 36440 }, { "epoch": 32.77877697841727, "grad_norm": 0.09491792321205139, "learning_rate": 3.65301222209481e-05, "loss": 0.0039, "step": 36450 }, { "action_loss": 0.005489857401698828, "epoch": 32.77877697841727, "step": 36450 }, { "epoch": 32.78776978417266, "grad_norm": 0.14971600472927094, "learning_rate": 3.650358527697519e-05, "loss": 0.0083, "step": 36460 }, { "action_loss": 0.002350323600694537, "epoch": 32.78776978417266, "step": 36460 }, { "epoch": 32.79676258992806, "grad_norm": 0.08275145292282104, "learning_rate": 3.64770524328567e-05, "loss": 0.004, "step": 36470 }, { "action_loss": 0.009238597936928272, "epoch": 32.79676258992806, "step": 36470 }, { "epoch": 32.805755395683455, "grad_norm": 0.14038008451461792, "learning_rate": 3.645052369665265e-05, "loss": 0.004, "step": 36480 }, { "action_loss": 0.01400668453425169, "epoch": 32.805755395683455, "step": 36480 }, { "epoch": 32.814748201438846, "grad_norm": 0.13629639148712158, "learning_rate": 3.6423999076421724e-05, "loss": 0.0052, "step": 36490 }, { "action_loss": 0.002607000758871436, "epoch": 32.814748201438846, "step": 36490 }, { "epoch": 32.82374100719424, "grad_norm": 0.18333500623703003, "learning_rate": 3.639747858022142e-05, "loss": 0.0063, "step": 36500 }, { "action_loss": 0.003331100568175316, "epoch": 32.82374100719424, "step": 36500 }, { "epoch": 32.83273381294964, "grad_norm": 0.1255679726600647, "learning_rate": 3.637096221610799e-05, "loss": 0.0052, "step": 36510 }, { "action_loss": 0.001622429583221674, "epoch": 32.83273381294964, "step": 36510 }, { "epoch": 32.84172661870504, "grad_norm": 0.1310015469789505, "learning_rate": 3.634444999213638e-05, "loss": 0.0034, "step": 36520 }, { "action_loss": 0.002086644759401679, "epoch": 32.84172661870504, "step": 36520 }, { "epoch": 32.85071942446043, "grad_norm": 0.14866110682487488, "learning_rate": 3.6317941916360296e-05, "loss": 0.0042, "step": 36530 }, { "action_loss": 0.0023634375538676977, "epoch": 32.85071942446043, "step": 36530 }, { "epoch": 32.85971223021583, "grad_norm": 0.10803817212581635, "learning_rate": 3.629143799683221e-05, "loss": 0.003, "step": 36540 }, { "action_loss": 0.0027783371042460203, "epoch": 32.85971223021583, "step": 36540 }, { "epoch": 32.868705035971225, "grad_norm": 0.10786640644073486, "learning_rate": 3.626493824160331e-05, "loss": 0.0054, "step": 36550 }, { "action_loss": 0.002378048375248909, "epoch": 32.868705035971225, "step": 36550 }, { "epoch": 32.87769784172662, "grad_norm": 0.1408897340297699, "learning_rate": 3.623844265872352e-05, "loss": 0.0019, "step": 36560 }, { "action_loss": 0.001422750181518495, "epoch": 32.87769784172662, "step": 36560 }, { "epoch": 32.88669064748201, "grad_norm": 0.14884419739246368, "learning_rate": 3.621195125624149e-05, "loss": 0.0048, "step": 36570 }, { "action_loss": 0.005943919066339731, "epoch": 32.88669064748201, "step": 36570 }, { "epoch": 32.89568345323741, "grad_norm": 0.17689163982868195, "learning_rate": 3.618546404220463e-05, "loss": 0.0073, "step": 36580 }, { "action_loss": 0.006789979990571737, "epoch": 32.89568345323741, "step": 36580 }, { "epoch": 32.90467625899281, "grad_norm": 0.15210847556591034, "learning_rate": 3.615898102465903e-05, "loss": 0.0071, "step": 36590 }, { "action_loss": 0.004451907705515623, "epoch": 32.90467625899281, "step": 36590 }, { "epoch": 32.9136690647482, "grad_norm": 0.24122574925422668, "learning_rate": 3.6132502211649544e-05, "loss": 0.0049, "step": 36600 }, { "action_loss": 0.002304087160155177, "epoch": 32.9136690647482, "step": 36600 }, { "epoch": 32.922661870503596, "grad_norm": 0.1136304959654808, "learning_rate": 3.610602761121975e-05, "loss": 0.004, "step": 36610 }, { "action_loss": 0.0022370191290974617, "epoch": 32.922661870503596, "step": 36610 }, { "epoch": 32.931654676258994, "grad_norm": 0.06706001609563828, "learning_rate": 3.6079557231411897e-05, "loss": 0.0055, "step": 36620 }, { "action_loss": 0.002038882579654455, "epoch": 32.931654676258994, "step": 36620 }, { "epoch": 32.94064748201439, "grad_norm": 0.12345822900533676, "learning_rate": 3.6053091080267035e-05, "loss": 0.0023, "step": 36630 }, { "action_loss": 0.005372263956815004, "epoch": 32.94064748201439, "step": 36630 }, { "epoch": 32.94964028776978, "grad_norm": 0.14333775639533997, "learning_rate": 3.602662916582483e-05, "loss": 0.0032, "step": 36640 }, { "action_loss": 0.004008857998996973, "epoch": 32.94964028776978, "step": 36640 }, { "epoch": 32.95863309352518, "grad_norm": 0.17060357332229614, "learning_rate": 3.600017149612375e-05, "loss": 0.0048, "step": 36650 }, { "action_loss": 0.0013576545752584934, "epoch": 32.95863309352518, "step": 36650 }, { "epoch": 32.96762589928058, "grad_norm": 0.1793544739484787, "learning_rate": 3.5973718079200935e-05, "loss": 0.0064, "step": 36660 }, { "action_loss": 0.0011166060576215386, "epoch": 32.96762589928058, "step": 36660 }, { "epoch": 32.976618705035975, "grad_norm": 0.12958845496177673, "learning_rate": 3.5947268923092216e-05, "loss": 0.0034, "step": 36670 }, { "action_loss": 0.002267857315018773, "epoch": 32.976618705035975, "step": 36670 }, { "epoch": 32.985611510791365, "grad_norm": 0.16472014784812927, "learning_rate": 3.592082403583216e-05, "loss": 0.0056, "step": 36680 }, { "action_loss": 0.0010194521164521575, "epoch": 32.985611510791365, "step": 36680 }, { "epoch": 32.99460431654676, "grad_norm": 0.14972707629203796, "learning_rate": 3.5894383425454004e-05, "loss": 0.0042, "step": 36690 }, { "action_loss": 0.004349629394710064, "epoch": 32.99460431654676, "step": 36690 }, { "epoch": 33.00359712230216, "grad_norm": 0.09036625921726227, "learning_rate": 3.586794709998975e-05, "loss": 0.0022, "step": 36700 }, { "action_loss": 0.0024426737800240517, "epoch": 33.00359712230216, "step": 36700 }, { "epoch": 33.01258992805755, "grad_norm": 0.180084228515625, "learning_rate": 3.584151506747002e-05, "loss": 0.0053, "step": 36710 }, { "action_loss": 0.0013607010478153825, "epoch": 33.01258992805755, "step": 36710 }, { "epoch": 33.02158273381295, "grad_norm": 0.16329722106456757, "learning_rate": 3.581508733592418e-05, "loss": 0.0063, "step": 36720 }, { "action_loss": 0.008062114007771015, "epoch": 33.02158273381295, "step": 36720 }, { "epoch": 33.030575539568346, "grad_norm": 0.12397963553667068, "learning_rate": 3.5788663913380297e-05, "loss": 0.004, "step": 36730 }, { "action_loss": 0.005465340334922075, "epoch": 33.030575539568346, "step": 36730 }, { "epoch": 33.039568345323744, "grad_norm": 0.13095611333847046, "learning_rate": 3.576224480786506e-05, "loss": 0.0087, "step": 36740 }, { "action_loss": 0.004612915217876434, "epoch": 33.039568345323744, "step": 36740 }, { "epoch": 33.048561151079134, "grad_norm": 0.14372411370277405, "learning_rate": 3.573583002740393e-05, "loss": 0.004, "step": 36750 }, { "action_loss": 0.010056153871119022, "epoch": 33.048561151079134, "step": 36750 }, { "epoch": 33.05755395683453, "grad_norm": 0.2501183748245239, "learning_rate": 3.570941958002103e-05, "loss": 0.0058, "step": 36760 }, { "action_loss": 0.002321447478607297, "epoch": 33.05755395683453, "step": 36760 }, { "epoch": 33.06654676258993, "grad_norm": 0.19306717813014984, "learning_rate": 3.568301347373912e-05, "loss": 0.004, "step": 36770 }, { "action_loss": 0.001660838257521391, "epoch": 33.06654676258993, "step": 36770 }, { "epoch": 33.07553956834533, "grad_norm": 0.13782167434692383, "learning_rate": 3.5656611716579726e-05, "loss": 0.0052, "step": 36780 }, { "action_loss": 0.0027475927490741014, "epoch": 33.07553956834533, "step": 36780 }, { "epoch": 33.08453237410072, "grad_norm": 0.16437968611717224, "learning_rate": 3.5630214316562946e-05, "loss": 0.0073, "step": 36790 }, { "action_loss": 0.002455785172060132, "epoch": 33.08453237410072, "step": 36790 }, { "epoch": 33.093525179856115, "grad_norm": 0.12088681757450104, "learning_rate": 3.560382128170766e-05, "loss": 0.0033, "step": 36800 }, { "action_loss": 0.06047059968113899, "epoch": 33.093525179856115, "step": 36800 }, { "epoch": 33.10251798561151, "grad_norm": 0.09088429063558578, "learning_rate": 3.5577432620031374e-05, "loss": 0.0086, "step": 36810 }, { "action_loss": 0.0026090580504387617, "epoch": 33.10251798561151, "step": 36810 }, { "epoch": 33.111510791366904, "grad_norm": 0.22304750978946686, "learning_rate": 3.5551048339550216e-05, "loss": 0.0049, "step": 36820 }, { "action_loss": 0.0061658467166125774, "epoch": 33.111510791366904, "step": 36820 }, { "epoch": 33.1205035971223, "grad_norm": 0.2341524064540863, "learning_rate": 3.55246684482791e-05, "loss": 0.0056, "step": 36830 }, { "action_loss": 0.0045412820763885975, "epoch": 33.1205035971223, "step": 36830 }, { "epoch": 33.1294964028777, "grad_norm": 0.12522608041763306, "learning_rate": 3.5498292954231496e-05, "loss": 0.0043, "step": 36840 }, { "action_loss": 0.001916719600558281, "epoch": 33.1294964028777, "step": 36840 }, { "epoch": 33.138489208633096, "grad_norm": 0.16633889079093933, "learning_rate": 3.54719218654196e-05, "loss": 0.0046, "step": 36850 }, { "action_loss": 0.018834976479411125, "epoch": 33.138489208633096, "step": 36850 }, { "epoch": 33.14748201438849, "grad_norm": 0.10552429407835007, "learning_rate": 3.544555518985425e-05, "loss": 0.0048, "step": 36860 }, { "action_loss": 0.004288600757718086, "epoch": 33.14748201438849, "step": 36860 }, { "epoch": 33.156474820143885, "grad_norm": 0.07972817122936249, "learning_rate": 3.541919293554494e-05, "loss": 0.0036, "step": 36870 }, { "action_loss": 0.0018417881801724434, "epoch": 33.156474820143885, "step": 36870 }, { "epoch": 33.16546762589928, "grad_norm": 0.17944878339767456, "learning_rate": 3.539283511049985e-05, "loss": 0.0058, "step": 36880 }, { "action_loss": 0.00662559038028121, "epoch": 33.16546762589928, "step": 36880 }, { "epoch": 33.17446043165467, "grad_norm": 0.14153166115283966, "learning_rate": 3.5366481722725755e-05, "loss": 0.0042, "step": 36890 }, { "action_loss": 0.010256742127239704, "epoch": 33.17446043165467, "step": 36890 }, { "epoch": 33.18345323741007, "grad_norm": 0.13976939022541046, "learning_rate": 3.534013278022816e-05, "loss": 0.0039, "step": 36900 }, { "action_loss": 0.027766183018684387, "epoch": 33.18345323741007, "step": 36900 }, { "epoch": 33.19244604316547, "grad_norm": 0.15405650436878204, "learning_rate": 3.531378829101113e-05, "loss": 0.0075, "step": 36910 }, { "action_loss": 0.004410325083881617, "epoch": 33.19244604316547, "step": 36910 }, { "epoch": 33.201438848920866, "grad_norm": 0.16581718623638153, "learning_rate": 3.528744826307746e-05, "loss": 0.0044, "step": 36920 }, { "action_loss": 0.004465358331799507, "epoch": 33.201438848920866, "step": 36920 }, { "epoch": 33.210431654676256, "grad_norm": 0.1453496366739273, "learning_rate": 3.5261112704428554e-05, "loss": 0.0044, "step": 36930 }, { "action_loss": 0.0021388765890151262, "epoch": 33.210431654676256, "step": 36930 }, { "epoch": 33.219424460431654, "grad_norm": 0.10779599845409393, "learning_rate": 3.523478162306443e-05, "loss": 0.0028, "step": 36940 }, { "action_loss": 0.0018982663750648499, "epoch": 33.219424460431654, "step": 36940 }, { "epoch": 33.22841726618705, "grad_norm": 0.08151216804981232, "learning_rate": 3.520845502698381e-05, "loss": 0.0024, "step": 36950 }, { "action_loss": 0.0012465021573007107, "epoch": 33.22841726618705, "step": 36950 }, { "epoch": 33.23741007194245, "grad_norm": 0.17497387528419495, "learning_rate": 3.5182132924184005e-05, "loss": 0.0042, "step": 36960 }, { "action_loss": 0.005945806857198477, "epoch": 33.23741007194245, "step": 36960 }, { "epoch": 33.24640287769784, "grad_norm": 0.16311196982860565, "learning_rate": 3.5155815322660966e-05, "loss": 0.0052, "step": 36970 }, { "action_loss": 0.007794131059199572, "epoch": 33.24640287769784, "step": 36970 }, { "epoch": 33.25539568345324, "grad_norm": 0.16512396931648254, "learning_rate": 3.512950223040931e-05, "loss": 0.0033, "step": 36980 }, { "action_loss": 0.004554223734885454, "epoch": 33.25539568345324, "step": 36980 }, { "epoch": 33.264388489208635, "grad_norm": 0.1846589744091034, "learning_rate": 3.5103193655422216e-05, "loss": 0.0041, "step": 36990 }, { "action_loss": 0.004402628168463707, "epoch": 33.264388489208635, "step": 36990 }, { "epoch": 33.273381294964025, "grad_norm": 0.12657883763313293, "learning_rate": 3.5076889605691596e-05, "loss": 0.0052, "step": 37000 }, { "action_loss": 0.002625327790156007, "epoch": 33.273381294964025, "step": 37000 }, { "epoch": 33.28237410071942, "grad_norm": 0.13703830540180206, "learning_rate": 3.505059008920787e-05, "loss": 0.0054, "step": 37010 }, { "action_loss": 0.003340811235830188, "epoch": 33.28237410071942, "step": 37010 }, { "epoch": 33.29136690647482, "grad_norm": 0.21732477843761444, "learning_rate": 3.502429511396016e-05, "loss": 0.0081, "step": 37020 }, { "action_loss": 0.0024201786145567894, "epoch": 33.29136690647482, "step": 37020 }, { "epoch": 33.30035971223022, "grad_norm": 0.09910013526678085, "learning_rate": 3.4998004687936196e-05, "loss": 0.0052, "step": 37030 }, { "action_loss": 0.0015522902831435204, "epoch": 33.30035971223022, "step": 37030 }, { "epoch": 33.30935251798561, "grad_norm": 0.1988493949174881, "learning_rate": 3.497171881912229e-05, "loss": 0.004, "step": 37040 }, { "action_loss": 0.0033726098481565714, "epoch": 33.30935251798561, "step": 37040 }, { "epoch": 33.318345323741006, "grad_norm": 0.11694467812776566, "learning_rate": 3.494543751550342e-05, "loss": 0.0051, "step": 37050 }, { "action_loss": 0.006412144284695387, "epoch": 33.318345323741006, "step": 37050 }, { "epoch": 33.327338129496404, "grad_norm": 0.17632703483104706, "learning_rate": 3.491916078506313e-05, "loss": 0.0063, "step": 37060 }, { "action_loss": 0.002239445224404335, "epoch": 33.327338129496404, "step": 37060 }, { "epoch": 33.3363309352518, "grad_norm": 0.12045687437057495, "learning_rate": 3.489288863578361e-05, "loss": 0.0082, "step": 37070 }, { "action_loss": 0.006945777218788862, "epoch": 33.3363309352518, "step": 37070 }, { "epoch": 33.34532374100719, "grad_norm": 0.1691104918718338, "learning_rate": 3.4866621075645646e-05, "loss": 0.0069, "step": 37080 }, { "action_loss": 0.0017243102192878723, "epoch": 33.34532374100719, "step": 37080 }, { "epoch": 33.35431654676259, "grad_norm": 0.24121397733688354, "learning_rate": 3.4840358112628614e-05, "loss": 0.0051, "step": 37090 }, { "action_loss": 0.008640247397124767, "epoch": 33.35431654676259, "step": 37090 }, { "epoch": 33.36330935251799, "grad_norm": 0.20852553844451904, "learning_rate": 3.481409975471053e-05, "loss": 0.0031, "step": 37100 }, { "action_loss": 0.0014298880705609918, "epoch": 33.36330935251799, "step": 37100 }, { "epoch": 33.37230215827338, "grad_norm": 0.15880315005779266, "learning_rate": 3.4787846009867986e-05, "loss": 0.0071, "step": 37110 }, { "action_loss": 0.007055803667753935, "epoch": 33.37230215827338, "step": 37110 }, { "epoch": 33.381294964028775, "grad_norm": 0.25311505794525146, "learning_rate": 3.476159688607615e-05, "loss": 0.0058, "step": 37120 }, { "action_loss": 0.0022119374480098486, "epoch": 33.381294964028775, "step": 37120 }, { "epoch": 33.39028776978417, "grad_norm": 0.16583970189094543, "learning_rate": 3.4735352391308854e-05, "loss": 0.0037, "step": 37130 }, { "action_loss": 0.0026351253036409616, "epoch": 33.39028776978417, "step": 37130 }, { "epoch": 33.39928057553957, "grad_norm": 0.11779291927814484, "learning_rate": 3.4709112533538446e-05, "loss": 0.0057, "step": 37140 }, { "action_loss": 0.0021372903138399124, "epoch": 33.39928057553957, "step": 37140 }, { "epoch": 33.40827338129496, "grad_norm": 0.19781585037708282, "learning_rate": 3.4682877320735934e-05, "loss": 0.0022, "step": 37150 }, { "action_loss": 0.004106127191334963, "epoch": 33.40827338129496, "step": 37150 }, { "epoch": 33.41726618705036, "grad_norm": 0.1250293105840683, "learning_rate": 3.465664676087085e-05, "loss": 0.0075, "step": 37160 }, { "action_loss": 0.010277523659169674, "epoch": 33.41726618705036, "step": 37160 }, { "epoch": 33.42625899280576, "grad_norm": 0.15850865840911865, "learning_rate": 3.463042086191136e-05, "loss": 0.0054, "step": 37170 }, { "action_loss": 0.004027718678116798, "epoch": 33.42625899280576, "step": 37170 }, { "epoch": 33.435251798561154, "grad_norm": 0.1511644870042801, "learning_rate": 3.460419963182423e-05, "loss": 0.0049, "step": 37180 }, { "action_loss": 0.0038166812155395746, "epoch": 33.435251798561154, "step": 37180 }, { "epoch": 33.444244604316545, "grad_norm": 0.08963166922330856, "learning_rate": 3.457798307857473e-05, "loss": 0.0051, "step": 37190 }, { "action_loss": 0.0012326128780841827, "epoch": 33.444244604316545, "step": 37190 }, { "epoch": 33.45323741007194, "grad_norm": 0.24176621437072754, "learning_rate": 3.455177121012678e-05, "loss": 0.0052, "step": 37200 }, { "action_loss": 0.002620982239022851, "epoch": 33.45323741007194, "step": 37200 }, { "epoch": 33.46223021582734, "grad_norm": 0.24585069715976715, "learning_rate": 3.452556403444285e-05, "loss": 0.0052, "step": 37210 }, { "action_loss": 0.002202426316216588, "epoch": 33.46223021582734, "step": 37210 }, { "epoch": 33.47122302158273, "grad_norm": 0.18876096606254578, "learning_rate": 3.4499361559483975e-05, "loss": 0.0058, "step": 37220 }, { "action_loss": 0.0020082949195057154, "epoch": 33.47122302158273, "step": 37220 }, { "epoch": 33.48021582733813, "grad_norm": 0.11724703758955002, "learning_rate": 3.44731637932098e-05, "loss": 0.0031, "step": 37230 }, { "action_loss": 0.0021159742027521133, "epoch": 33.48021582733813, "step": 37230 }, { "epoch": 33.489208633093526, "grad_norm": 0.20339848101139069, "learning_rate": 3.44469707435785e-05, "loss": 0.004, "step": 37240 }, { "action_loss": 0.003412713063880801, "epoch": 33.489208633093526, "step": 37240 }, { "epoch": 33.49820143884892, "grad_norm": 0.1917910873889923, "learning_rate": 3.4420782418546835e-05, "loss": 0.0048, "step": 37250 }, { "action_loss": 0.005887916777282953, "epoch": 33.49820143884892, "step": 37250 }, { "epoch": 33.507194244604314, "grad_norm": 0.2196507602930069, "learning_rate": 3.439459882607012e-05, "loss": 0.0039, "step": 37260 }, { "action_loss": 0.002099328674376011, "epoch": 33.507194244604314, "step": 37260 }, { "epoch": 33.51618705035971, "grad_norm": 0.19542217254638672, "learning_rate": 3.436841997410225e-05, "loss": 0.0029, "step": 37270 }, { "action_loss": 0.0017621790757402778, "epoch": 33.51618705035971, "step": 37270 }, { "epoch": 33.52517985611511, "grad_norm": 0.09197212010622025, "learning_rate": 3.434224587059567e-05, "loss": 0.0031, "step": 37280 }, { "action_loss": 0.006747783627361059, "epoch": 33.52517985611511, "step": 37280 }, { "epoch": 33.53417266187051, "grad_norm": 0.13056546449661255, "learning_rate": 3.431607652350136e-05, "loss": 0.004, "step": 37290 }, { "action_loss": 0.004362528678029776, "epoch": 33.53417266187051, "step": 37290 }, { "epoch": 33.5431654676259, "grad_norm": 0.13114778697490692, "learning_rate": 3.428991194076891e-05, "loss": 0.0032, "step": 37300 }, { "action_loss": 0.0017991597997024655, "epoch": 33.5431654676259, "step": 37300 }, { "epoch": 33.552158273381295, "grad_norm": 0.15808314085006714, "learning_rate": 3.4263752130346394e-05, "loss": 0.004, "step": 37310 }, { "action_loss": 0.0040968311950564384, "epoch": 33.552158273381295, "step": 37310 }, { "epoch": 33.56115107913669, "grad_norm": 0.14518043398857117, "learning_rate": 3.4237597100180515e-05, "loss": 0.0041, "step": 37320 }, { "action_loss": 0.0010839792666956782, "epoch": 33.56115107913669, "step": 37320 }, { "epoch": 33.57014388489208, "grad_norm": 0.15568861365318298, "learning_rate": 3.4211446858216427e-05, "loss": 0.0067, "step": 37330 }, { "action_loss": 0.009069525636732578, "epoch": 33.57014388489208, "step": 37330 }, { "epoch": 33.57913669064748, "grad_norm": 0.2100234031677246, "learning_rate": 3.4185301412397915e-05, "loss": 0.0132, "step": 37340 }, { "action_loss": 0.004383255261927843, "epoch": 33.57913669064748, "step": 37340 }, { "epoch": 33.58812949640288, "grad_norm": 0.06923673301935196, "learning_rate": 3.415916077066729e-05, "loss": 0.0076, "step": 37350 }, { "action_loss": 0.0020985992159694433, "epoch": 33.58812949640288, "step": 37350 }, { "epoch": 33.597122302158276, "grad_norm": 0.09169404208660126, "learning_rate": 3.413302494096535e-05, "loss": 0.003, "step": 37360 }, { "action_loss": 0.005753522738814354, "epoch": 33.597122302158276, "step": 37360 }, { "epoch": 33.606115107913666, "grad_norm": 0.1303762048482895, "learning_rate": 3.410689393123151e-05, "loss": 0.0043, "step": 37370 }, { "action_loss": 0.003256847383454442, "epoch": 33.606115107913666, "step": 37370 }, { "epoch": 33.615107913669064, "grad_norm": 0.2273321896791458, "learning_rate": 3.408076774940364e-05, "loss": 0.0051, "step": 37380 }, { "action_loss": 0.004093047231435776, "epoch": 33.615107913669064, "step": 37380 }, { "epoch": 33.62410071942446, "grad_norm": 0.13068735599517822, "learning_rate": 3.40546464034182e-05, "loss": 0.0043, "step": 37390 }, { "action_loss": 0.004809471312910318, "epoch": 33.62410071942446, "step": 37390 }, { "epoch": 33.63309352517986, "grad_norm": 0.09691885858774185, "learning_rate": 3.4028529901210185e-05, "loss": 0.0083, "step": 37400 }, { "action_loss": 0.011140505783259869, "epoch": 33.63309352517986, "step": 37400 }, { "epoch": 33.64208633093525, "grad_norm": 0.08163347095251083, "learning_rate": 3.4002418250713086e-05, "loss": 0.0063, "step": 37410 }, { "action_loss": 0.003981772810220718, "epoch": 33.64208633093525, "step": 37410 }, { "epoch": 33.65107913669065, "grad_norm": 0.12319646775722504, "learning_rate": 3.3976311459858936e-05, "loss": 0.0031, "step": 37420 }, { "action_loss": 0.002861318178474903, "epoch": 33.65107913669065, "step": 37420 }, { "epoch": 33.660071942446045, "grad_norm": 0.14489059150218964, "learning_rate": 3.395020953657826e-05, "loss": 0.0037, "step": 37430 }, { "action_loss": 0.0032896753400564194, "epoch": 33.660071942446045, "step": 37430 }, { "epoch": 33.669064748201436, "grad_norm": 0.09024159610271454, "learning_rate": 3.3924112488800165e-05, "loss": 0.0034, "step": 37440 }, { "action_loss": 0.003048267913982272, "epoch": 33.669064748201436, "step": 37440 }, { "epoch": 33.67805755395683, "grad_norm": 0.10672909766435623, "learning_rate": 3.389802032445225e-05, "loss": 0.0035, "step": 37450 }, { "action_loss": 0.0032496193889528513, "epoch": 33.67805755395683, "step": 37450 }, { "epoch": 33.68705035971223, "grad_norm": 0.1065741702914238, "learning_rate": 3.38719330514606e-05, "loss": 0.0037, "step": 37460 }, { "action_loss": 0.0035584683064371347, "epoch": 33.68705035971223, "step": 37460 }, { "epoch": 33.69604316546763, "grad_norm": 0.11679323762655258, "learning_rate": 3.3845850677749866e-05, "loss": 0.0048, "step": 37470 }, { "action_loss": 0.012744273990392685, "epoch": 33.69604316546763, "step": 37470 }, { "epoch": 33.70503597122302, "grad_norm": 0.1827230304479599, "learning_rate": 3.3819773211243157e-05, "loss": 0.005, "step": 37480 }, { "action_loss": 0.0030790443997830153, "epoch": 33.70503597122302, "step": 37480 }, { "epoch": 33.71402877697842, "grad_norm": 0.14651326835155487, "learning_rate": 3.379370065986213e-05, "loss": 0.0032, "step": 37490 }, { "action_loss": 0.0026799235492944717, "epoch": 33.71402877697842, "step": 37490 }, { "epoch": 33.723021582733814, "grad_norm": 0.11101000010967255, "learning_rate": 3.3767633031526955e-05, "loss": 0.0037, "step": 37500 }, { "action_loss": 0.006659328937530518, "epoch": 33.723021582733814, "step": 37500 }, { "epoch": 33.73201438848921, "grad_norm": 0.09546113759279251, "learning_rate": 3.374157033415626e-05, "loss": 0.0037, "step": 37510 }, { "action_loss": 0.008834216743707657, "epoch": 33.73201438848921, "step": 37510 }, { "epoch": 33.7410071942446, "grad_norm": 0.150636687874794, "learning_rate": 3.371551257566723e-05, "loss": 0.0047, "step": 37520 }, { "action_loss": 0.007737825158983469, "epoch": 33.7410071942446, "step": 37520 }, { "epoch": 33.75, "grad_norm": 0.14296185970306396, "learning_rate": 3.36894597639755e-05, "loss": 0.0043, "step": 37530 }, { "action_loss": 0.008833090774714947, "epoch": 33.75, "step": 37530 }, { "epoch": 33.7589928057554, "grad_norm": 0.23239979147911072, "learning_rate": 3.366341190699523e-05, "loss": 0.0046, "step": 37540 }, { "action_loss": 0.0035228878259658813, "epoch": 33.7589928057554, "step": 37540 }, { "epoch": 33.76798561151079, "grad_norm": 0.24760062992572784, "learning_rate": 3.36373690126391e-05, "loss": 0.0044, "step": 37550 }, { "action_loss": 0.0013196715153753757, "epoch": 33.76798561151079, "step": 37550 }, { "epoch": 33.776978417266186, "grad_norm": 0.2413242757320404, "learning_rate": 3.3611331088818234e-05, "loss": 0.0047, "step": 37560 }, { "action_loss": 0.003271509660407901, "epoch": 33.776978417266186, "step": 37560 }, { "epoch": 33.78597122302158, "grad_norm": 0.11364180594682693, "learning_rate": 3.3585298143442265e-05, "loss": 0.0054, "step": 37570 }, { "action_loss": 0.0012269517173990607, "epoch": 33.78597122302158, "step": 37570 }, { "epoch": 33.79496402877698, "grad_norm": 0.17758166790008545, "learning_rate": 3.35592701844193e-05, "loss": 0.0052, "step": 37580 }, { "action_loss": 0.0018831604393199086, "epoch": 33.79496402877698, "step": 37580 }, { "epoch": 33.80395683453237, "grad_norm": 0.14813844859600067, "learning_rate": 3.353324721965596e-05, "loss": 0.0077, "step": 37590 }, { "action_loss": 0.0015347342705354095, "epoch": 33.80395683453237, "step": 37590 }, { "epoch": 33.81294964028777, "grad_norm": 0.19583801925182343, "learning_rate": 3.350722925705736e-05, "loss": 0.0038, "step": 37600 }, { "action_loss": 0.009990249760448933, "epoch": 33.81294964028777, "step": 37600 }, { "epoch": 33.82194244604317, "grad_norm": 0.18028078973293304, "learning_rate": 3.348121630452703e-05, "loss": 0.0072, "step": 37610 }, { "action_loss": 0.009727922268211842, "epoch": 33.82194244604317, "step": 37610 }, { "epoch": 33.830935251798564, "grad_norm": 0.13632576167583466, "learning_rate": 3.3455208369967044e-05, "loss": 0.0039, "step": 37620 }, { "action_loss": 0.0021213276777416468, "epoch": 33.830935251798564, "step": 37620 }, { "epoch": 33.839928057553955, "grad_norm": 0.15599501132965088, "learning_rate": 3.34292054612779e-05, "loss": 0.0047, "step": 37630 }, { "action_loss": 0.003912245389074087, "epoch": 33.839928057553955, "step": 37630 }, { "epoch": 33.84892086330935, "grad_norm": 0.15608526766300201, "learning_rate": 3.340320758635861e-05, "loss": 0.0049, "step": 37640 }, { "action_loss": 0.0015843672445043921, "epoch": 33.84892086330935, "step": 37640 }, { "epoch": 33.85791366906475, "grad_norm": 0.1960272490978241, "learning_rate": 3.337721475310666e-05, "loss": 0.0041, "step": 37650 }, { "action_loss": 0.0028179846704006195, "epoch": 33.85791366906475, "step": 37650 }, { "epoch": 33.86690647482014, "grad_norm": 0.1666017770767212, "learning_rate": 3.335122696941795e-05, "loss": 0.0037, "step": 37660 }, { "action_loss": 0.004240883979946375, "epoch": 33.86690647482014, "step": 37660 }, { "epoch": 33.87589928057554, "grad_norm": 0.20618365705013275, "learning_rate": 3.332524424318692e-05, "loss": 0.0036, "step": 37670 }, { "action_loss": 0.00099433574359864, "epoch": 33.87589928057554, "step": 37670 }, { "epoch": 33.884892086330936, "grad_norm": 0.17402127385139465, "learning_rate": 3.32992665823064e-05, "loss": 0.0031, "step": 37680 }, { "action_loss": 0.008156824856996536, "epoch": 33.884892086330936, "step": 37680 }, { "epoch": 33.893884892086334, "grad_norm": 0.1280670464038849, "learning_rate": 3.327329399466774e-05, "loss": 0.005, "step": 37690 }, { "action_loss": 0.0016316802939400077, "epoch": 33.893884892086334, "step": 37690 }, { "epoch": 33.902877697841724, "grad_norm": 0.14452175796031952, "learning_rate": 3.324732648816072e-05, "loss": 0.0042, "step": 37700 }, { "action_loss": 0.0025005675852298737, "epoch": 33.902877697841724, "step": 37700 }, { "epoch": 33.91187050359712, "grad_norm": 0.17625078558921814, "learning_rate": 3.322136407067358e-05, "loss": 0.0033, "step": 37710 }, { "action_loss": 0.008846105076372623, "epoch": 33.91187050359712, "step": 37710 }, { "epoch": 33.92086330935252, "grad_norm": 0.09962745010852814, "learning_rate": 3.3195406750093036e-05, "loss": 0.0037, "step": 37720 }, { "action_loss": 0.002149050822481513, "epoch": 33.92086330935252, "step": 37720 }, { "epoch": 33.92985611510792, "grad_norm": 0.13084225356578827, "learning_rate": 3.3169454534304205e-05, "loss": 0.0036, "step": 37730 }, { "action_loss": 0.001352498191408813, "epoch": 33.92985611510792, "step": 37730 }, { "epoch": 33.93884892086331, "grad_norm": 0.11372675001621246, "learning_rate": 3.3143507431190725e-05, "loss": 0.0046, "step": 37740 }, { "action_loss": 0.0046785990707576275, "epoch": 33.93884892086331, "step": 37740 }, { "epoch": 33.947841726618705, "grad_norm": 0.14343203604221344, "learning_rate": 3.311756544863459e-05, "loss": 0.0031, "step": 37750 }, { "action_loss": 0.0027220353949815035, "epoch": 33.947841726618705, "step": 37750 }, { "epoch": 33.9568345323741, "grad_norm": 0.09966231882572174, "learning_rate": 3.309162859451633e-05, "loss": 0.0056, "step": 37760 }, { "action_loss": 0.006025421898812056, "epoch": 33.9568345323741, "step": 37760 }, { "epoch": 33.96582733812949, "grad_norm": 0.09501839429140091, "learning_rate": 3.306569687671487e-05, "loss": 0.0032, "step": 37770 }, { "action_loss": 0.008518519811332226, "epoch": 33.96582733812949, "step": 37770 }, { "epoch": 33.97482014388489, "grad_norm": 0.19578248262405396, "learning_rate": 3.303977030310756e-05, "loss": 0.004, "step": 37780 }, { "action_loss": 0.005295850336551666, "epoch": 33.97482014388489, "step": 37780 }, { "epoch": 33.98381294964029, "grad_norm": 0.12415032088756561, "learning_rate": 3.3013848881570245e-05, "loss": 0.0028, "step": 37790 }, { "action_loss": 0.004129535052925348, "epoch": 33.98381294964029, "step": 37790 }, { "epoch": 33.992805755395686, "grad_norm": 0.0874973013997078, "learning_rate": 3.298793261997712e-05, "loss": 0.0084, "step": 37800 }, { "action_loss": 0.0009515060228295624, "epoch": 33.992805755395686, "step": 37800 }, { "epoch": 34.00179856115108, "grad_norm": 0.09488090127706528, "learning_rate": 3.2962021526200893e-05, "loss": 0.003, "step": 37810 }, { "action_loss": 0.00449381535872817, "epoch": 34.00179856115108, "step": 37810 }, { "epoch": 34.010791366906474, "grad_norm": 0.11916285753250122, "learning_rate": 3.293611560811268e-05, "loss": 0.0039, "step": 37820 }, { "action_loss": 0.004735084250569344, "epoch": 34.010791366906474, "step": 37820 }, { "epoch": 34.01978417266187, "grad_norm": 0.19051991403102875, "learning_rate": 3.291021487358199e-05, "loss": 0.0056, "step": 37830 }, { "action_loss": 0.010406303219497204, "epoch": 34.01978417266187, "step": 37830 }, { "epoch": 34.02877697841727, "grad_norm": 0.21519531309604645, "learning_rate": 3.28843193304768e-05, "loss": 0.0066, "step": 37840 }, { "action_loss": 0.009404932148754597, "epoch": 34.02877697841727, "step": 37840 }, { "epoch": 34.03776978417266, "grad_norm": 0.15874741971492767, "learning_rate": 3.2858428986663456e-05, "loss": 0.0059, "step": 37850 }, { "action_loss": 0.0029651799704879522, "epoch": 34.03776978417266, "step": 37850 }, { "epoch": 34.04676258992806, "grad_norm": 0.08924701064825058, "learning_rate": 3.283254385000681e-05, "loss": 0.0031, "step": 37860 }, { "action_loss": 0.005775185767561197, "epoch": 34.04676258992806, "step": 37860 }, { "epoch": 34.055755395683455, "grad_norm": 0.16303129494190216, "learning_rate": 3.2806663928370076e-05, "loss": 0.0041, "step": 37870 }, { "action_loss": 0.004080818500369787, "epoch": 34.055755395683455, "step": 37870 }, { "epoch": 34.064748201438846, "grad_norm": 0.08903481066226959, "learning_rate": 3.278078922961485e-05, "loss": 0.0031, "step": 37880 }, { "action_loss": 0.0015564517816528678, "epoch": 34.064748201438846, "step": 37880 }, { "epoch": 34.07374100719424, "grad_norm": 0.14344994723796844, "learning_rate": 3.275491976160123e-05, "loss": 0.0033, "step": 37890 }, { "action_loss": 0.003444003639742732, "epoch": 34.07374100719424, "step": 37890 }, { "epoch": 34.08273381294964, "grad_norm": 0.15195006132125854, "learning_rate": 3.2729055532187645e-05, "loss": 0.0031, "step": 37900 }, { "action_loss": 0.0036667052190750837, "epoch": 34.08273381294964, "step": 37900 }, { "epoch": 34.09172661870504, "grad_norm": 0.18024936318397522, "learning_rate": 3.270319654923097e-05, "loss": 0.0048, "step": 37910 }, { "action_loss": 0.010141037404537201, "epoch": 34.09172661870504, "step": 37910 }, { "epoch": 34.10071942446043, "grad_norm": 0.10056460648775101, "learning_rate": 3.2677342820586506e-05, "loss": 0.0039, "step": 37920 }, { "action_loss": 0.004314274061471224, "epoch": 34.10071942446043, "step": 37920 }, { "epoch": 34.10971223021583, "grad_norm": 0.24338406324386597, "learning_rate": 3.2651494354107905e-05, "loss": 0.0046, "step": 37930 }, { "action_loss": 0.0007416151347570121, "epoch": 34.10971223021583, "step": 37930 }, { "epoch": 34.118705035971225, "grad_norm": 0.10330739617347717, "learning_rate": 3.2625651157647266e-05, "loss": 0.0027, "step": 37940 }, { "action_loss": 0.0017102955607697368, "epoch": 34.118705035971225, "step": 37940 }, { "epoch": 34.12769784172662, "grad_norm": 0.13184623420238495, "learning_rate": 3.259981323905505e-05, "loss": 0.004, "step": 37950 }, { "action_loss": 0.0015409361803904176, "epoch": 34.12769784172662, "step": 37950 }, { "epoch": 34.13669064748201, "grad_norm": 0.11949178576469421, "learning_rate": 3.257398060618014e-05, "loss": 0.0032, "step": 37960 }, { "action_loss": 0.002259613247588277, "epoch": 34.13669064748201, "step": 37960 }, { "epoch": 34.14568345323741, "grad_norm": 0.1518007218837738, "learning_rate": 3.254815326686983e-05, "loss": 0.0026, "step": 37970 }, { "action_loss": 0.001606625854037702, "epoch": 34.14568345323741, "step": 37970 }, { "epoch": 34.15467625899281, "grad_norm": 0.08264458924531937, "learning_rate": 3.2522331228969774e-05, "loss": 0.0022, "step": 37980 }, { "action_loss": 0.0017343353247269988, "epoch": 34.15467625899281, "step": 37980 }, { "epoch": 34.1636690647482, "grad_norm": 0.18648193776607513, "learning_rate": 3.2496514500324006e-05, "loss": 0.0042, "step": 37990 }, { "action_loss": 0.0015511329984292388, "epoch": 34.1636690647482, "step": 37990 }, { "epoch": 34.172661870503596, "grad_norm": 0.09003141522407532, "learning_rate": 3.247070308877498e-05, "loss": 0.0024, "step": 38000 }, { "action_loss": 0.007520841900259256, "epoch": 34.172661870503596, "step": 38000 }, { "epoch": 34.181654676258994, "grad_norm": 0.10556302964687347, "learning_rate": 3.2444897002163515e-05, "loss": 0.0042, "step": 38010 }, { "action_loss": 0.0016129948198795319, "epoch": 34.181654676258994, "step": 38010 }, { "epoch": 34.19064748201439, "grad_norm": 0.11403976380825043, "learning_rate": 3.241909624832885e-05, "loss": 0.003, "step": 38020 }, { "action_loss": 0.002717286115512252, "epoch": 34.19064748201439, "step": 38020 }, { "epoch": 34.19964028776978, "grad_norm": 0.07524944096803665, "learning_rate": 3.239330083510852e-05, "loss": 0.006, "step": 38030 }, { "action_loss": 0.003673429833725095, "epoch": 34.19964028776978, "step": 38030 }, { "epoch": 34.20863309352518, "grad_norm": 0.08593634516000748, "learning_rate": 3.236751077033855e-05, "loss": 0.0037, "step": 38040 }, { "action_loss": 0.0015653070295229554, "epoch": 34.20863309352518, "step": 38040 }, { "epoch": 34.21762589928058, "grad_norm": 0.1567583680152893, "learning_rate": 3.234172606185322e-05, "loss": 0.0042, "step": 38050 }, { "action_loss": 0.002078992547467351, "epoch": 34.21762589928058, "step": 38050 }, { "epoch": 34.226618705035975, "grad_norm": 0.225105419754982, "learning_rate": 3.231594671748528e-05, "loss": 0.0044, "step": 38060 }, { "action_loss": 0.0038040727376937866, "epoch": 34.226618705035975, "step": 38060 }, { "epoch": 34.235611510791365, "grad_norm": 0.17999425530433655, "learning_rate": 3.2290172745065815e-05, "loss": 0.0029, "step": 38070 }, { "action_loss": 0.0026293143164366484, "epoch": 34.235611510791365, "step": 38070 }, { "epoch": 34.24460431654676, "grad_norm": 0.18597933650016785, "learning_rate": 3.226440415242426e-05, "loss": 0.0036, "step": 38080 }, { "action_loss": 0.001185842091217637, "epoch": 34.24460431654676, "step": 38080 }, { "epoch": 34.25359712230216, "grad_norm": 0.09400206804275513, "learning_rate": 3.223864094738846e-05, "loss": 0.0033, "step": 38090 }, { "action_loss": 0.0023869983851909637, "epoch": 34.25359712230216, "step": 38090 }, { "epoch": 34.26258992805755, "grad_norm": 0.10031960159540176, "learning_rate": 3.221288313778456e-05, "loss": 0.003, "step": 38100 }, { "action_loss": 0.0014056047657504678, "epoch": 34.26258992805755, "step": 38100 }, { "epoch": 34.27158273381295, "grad_norm": 0.07306437194347382, "learning_rate": 3.2187130731437125e-05, "loss": 0.0025, "step": 38110 }, { "action_loss": 0.007573132868856192, "epoch": 34.27158273381295, "step": 38110 }, { "epoch": 34.280575539568346, "grad_norm": 0.11531416326761246, "learning_rate": 3.216138373616905e-05, "loss": 0.0042, "step": 38120 }, { "action_loss": 0.001941557857207954, "epoch": 34.280575539568346, "step": 38120 }, { "epoch": 34.289568345323744, "grad_norm": 0.07580224424600601, "learning_rate": 3.21356421598016e-05, "loss": 0.0038, "step": 38130 }, { "action_loss": 0.002932961331680417, "epoch": 34.289568345323744, "step": 38130 }, { "epoch": 34.298561151079134, "grad_norm": 0.20440123975276947, "learning_rate": 3.210990601015438e-05, "loss": 0.0038, "step": 38140 }, { "action_loss": 0.0013537928462028503, "epoch": 34.298561151079134, "step": 38140 }, { "epoch": 34.30755395683453, "grad_norm": 0.21123267710208893, "learning_rate": 3.208417529504535e-05, "loss": 0.0032, "step": 38150 }, { "action_loss": 0.002728157676756382, "epoch": 34.30755395683453, "step": 38150 }, { "epoch": 34.31654676258993, "grad_norm": 0.22504034638404846, "learning_rate": 3.205845002229084e-05, "loss": 0.0044, "step": 38160 }, { "action_loss": 0.002121390076354146, "epoch": 34.31654676258993, "step": 38160 }, { "epoch": 34.32553956834532, "grad_norm": 0.25995996594429016, "learning_rate": 3.203273019970547e-05, "loss": 0.0036, "step": 38170 }, { "action_loss": 0.012681079097092152, "epoch": 34.32553956834532, "step": 38170 }, { "epoch": 34.33453237410072, "grad_norm": 0.21127218008041382, "learning_rate": 3.200701583510227e-05, "loss": 0.0049, "step": 38180 }, { "action_loss": 0.002762533025816083, "epoch": 34.33453237410072, "step": 38180 }, { "epoch": 34.343525179856115, "grad_norm": 0.08820629119873047, "learning_rate": 3.198130693629261e-05, "loss": 0.0038, "step": 38190 }, { "action_loss": 0.0010658729588612914, "epoch": 34.343525179856115, "step": 38190 }, { "epoch": 34.35251798561151, "grad_norm": 0.16260835528373718, "learning_rate": 3.195560351108612e-05, "loss": 0.004, "step": 38200 }, { "action_loss": 0.00204480136744678, "epoch": 34.35251798561151, "step": 38200 }, { "epoch": 34.361510791366904, "grad_norm": 0.07428569346666336, "learning_rate": 3.1929905567290865e-05, "loss": 0.0033, "step": 38210 }, { "action_loss": 0.003897123271599412, "epoch": 34.361510791366904, "step": 38210 }, { "epoch": 34.3705035971223, "grad_norm": 0.09929513186216354, "learning_rate": 3.1904213112713164e-05, "loss": 0.0028, "step": 38220 }, { "action_loss": 0.006211241241544485, "epoch": 34.3705035971223, "step": 38220 }, { "epoch": 34.3794964028777, "grad_norm": 0.09406351298093796, "learning_rate": 3.187852615515774e-05, "loss": 0.0051, "step": 38230 }, { "action_loss": 0.001588044106028974, "epoch": 34.3794964028777, "step": 38230 }, { "epoch": 34.388489208633096, "grad_norm": 0.12949422001838684, "learning_rate": 3.1852844702427606e-05, "loss": 0.0024, "step": 38240 }, { "action_loss": 0.0029395809397101402, "epoch": 34.388489208633096, "step": 38240 }, { "epoch": 34.39748201438849, "grad_norm": 0.10694783926010132, "learning_rate": 3.18271687623241e-05, "loss": 0.0039, "step": 38250 }, { "action_loss": 0.0020239604637026787, "epoch": 34.39748201438849, "step": 38250 }, { "epoch": 34.406474820143885, "grad_norm": 0.19266195595264435, "learning_rate": 3.1801498342646896e-05, "loss": 0.006, "step": 38260 }, { "action_loss": 0.0018095307750627398, "epoch": 34.406474820143885, "step": 38260 }, { "epoch": 34.41546762589928, "grad_norm": 0.14904853701591492, "learning_rate": 3.177583345119398e-05, "loss": 0.0044, "step": 38270 }, { "action_loss": 0.0019678587559610605, "epoch": 34.41546762589928, "step": 38270 }, { "epoch": 34.42446043165468, "grad_norm": 0.13651753962039948, "learning_rate": 3.17501740957617e-05, "loss": 0.0028, "step": 38280 }, { "action_loss": 0.0012511307140812278, "epoch": 34.42446043165468, "step": 38280 }, { "epoch": 34.43345323741007, "grad_norm": 0.1766027808189392, "learning_rate": 3.172452028414467e-05, "loss": 0.0046, "step": 38290 }, { "action_loss": 0.00265030306763947, "epoch": 34.43345323741007, "step": 38290 }, { "epoch": 34.44244604316547, "grad_norm": 0.19482049345970154, "learning_rate": 3.169887202413583e-05, "loss": 0.0045, "step": 38300 }, { "action_loss": 0.002063487423583865, "epoch": 34.44244604316547, "step": 38300 }, { "epoch": 34.451438848920866, "grad_norm": 0.15039898455142975, "learning_rate": 3.167322932352646e-05, "loss": 0.0043, "step": 38310 }, { "action_loss": 0.002091074828058481, "epoch": 34.451438848920866, "step": 38310 }, { "epoch": 34.460431654676256, "grad_norm": 0.07921699434518814, "learning_rate": 3.164759219010613e-05, "loss": 0.0047, "step": 38320 }, { "action_loss": 0.0032194918021559715, "epoch": 34.460431654676256, "step": 38320 }, { "epoch": 34.469424460431654, "grad_norm": 0.25245583057403564, "learning_rate": 3.1621960631662725e-05, "loss": 0.004, "step": 38330 }, { "action_loss": 0.005222521722316742, "epoch": 34.469424460431654, "step": 38330 }, { "epoch": 34.47841726618705, "grad_norm": 0.2151504009962082, "learning_rate": 3.159633465598245e-05, "loss": 0.0067, "step": 38340 }, { "action_loss": 0.0036478296387940645, "epoch": 34.47841726618705, "step": 38340 }, { "epoch": 34.48741007194245, "grad_norm": 0.14125744998455048, "learning_rate": 3.1570714270849767e-05, "loss": 0.0031, "step": 38350 }, { "action_loss": 0.00212804414331913, "epoch": 34.48741007194245, "step": 38350 }, { "epoch": 34.49640287769784, "grad_norm": 0.09279458969831467, "learning_rate": 3.1545099484047516e-05, "loss": 0.0035, "step": 38360 }, { "action_loss": 0.0023787328973412514, "epoch": 34.49640287769784, "step": 38360 }, { "epoch": 34.50539568345324, "grad_norm": 0.1309266835451126, "learning_rate": 3.151949030335674e-05, "loss": 0.0026, "step": 38370 }, { "action_loss": 0.0026376491878181696, "epoch": 34.50539568345324, "step": 38370 }, { "epoch": 34.514388489208635, "grad_norm": 0.1168433129787445, "learning_rate": 3.149388673655687e-05, "loss": 0.0059, "step": 38380 }, { "action_loss": 0.0038735277485102415, "epoch": 34.514388489208635, "step": 38380 }, { "epoch": 34.523381294964025, "grad_norm": 0.17838279902935028, "learning_rate": 3.146828879142559e-05, "loss": 0.005, "step": 38390 }, { "action_loss": 0.0014613271923735738, "epoch": 34.523381294964025, "step": 38390 }, { "epoch": 34.53237410071942, "grad_norm": 0.1010776087641716, "learning_rate": 3.1442696475738866e-05, "loss": 0.0032, "step": 38400 }, { "action_loss": 0.00325384852476418, "epoch": 34.53237410071942, "step": 38400 }, { "epoch": 34.54136690647482, "grad_norm": 0.13440456986427307, "learning_rate": 3.141710979727098e-05, "loss": 0.0033, "step": 38410 }, { "action_loss": 0.0013190648751333356, "epoch": 34.54136690647482, "step": 38410 }, { "epoch": 34.55035971223022, "grad_norm": 0.1535390019416809, "learning_rate": 3.139152876379447e-05, "loss": 0.0038, "step": 38420 }, { "action_loss": 0.0018150066025555134, "epoch": 34.55035971223022, "step": 38420 }, { "epoch": 34.55935251798561, "grad_norm": 0.14452728629112244, "learning_rate": 3.1365953383080214e-05, "loss": 0.0036, "step": 38430 }, { "action_loss": 0.00261475145816803, "epoch": 34.55935251798561, "step": 38430 }, { "epoch": 34.568345323741006, "grad_norm": 0.09154591709375381, "learning_rate": 3.134038366289731e-05, "loss": 0.0034, "step": 38440 }, { "action_loss": 0.002492495346814394, "epoch": 34.568345323741006, "step": 38440 }, { "epoch": 34.577338129496404, "grad_norm": 0.13873706758022308, "learning_rate": 3.131481961101317e-05, "loss": 0.0063, "step": 38450 }, { "action_loss": 0.0024798153899610043, "epoch": 34.577338129496404, "step": 38450 }, { "epoch": 34.5863309352518, "grad_norm": 0.08760226517915726, "learning_rate": 3.128926123519349e-05, "loss": 0.004, "step": 38460 }, { "action_loss": 0.001037202775478363, "epoch": 34.5863309352518, "step": 38460 }, { "epoch": 34.59532374100719, "grad_norm": 0.08143702149391174, "learning_rate": 3.1263708543202194e-05, "loss": 0.0021, "step": 38470 }, { "action_loss": 0.00391370477154851, "epoch": 34.59532374100719, "step": 38470 }, { "epoch": 34.60431654676259, "grad_norm": 0.18649707734584808, "learning_rate": 3.123816154280155e-05, "loss": 0.0052, "step": 38480 }, { "action_loss": 0.004016458988189697, "epoch": 34.60431654676259, "step": 38480 }, { "epoch": 34.61330935251799, "grad_norm": 0.138680100440979, "learning_rate": 3.121262024175207e-05, "loss": 0.0051, "step": 38490 }, { "action_loss": 0.005591526627540588, "epoch": 34.61330935251799, "step": 38490 }, { "epoch": 34.62230215827338, "grad_norm": 0.17217512428760529, "learning_rate": 3.118708464781248e-05, "loss": 0.0048, "step": 38500 }, { "action_loss": 0.0030374813359230757, "epoch": 34.62230215827338, "step": 38500 }, { "epoch": 34.631294964028775, "grad_norm": 0.15016843378543854, "learning_rate": 3.116155476873987e-05, "loss": 0.0038, "step": 38510 }, { "action_loss": 0.0020131950732320547, "epoch": 34.631294964028775, "step": 38510 }, { "epoch": 34.64028776978417, "grad_norm": 0.08936108648777008, "learning_rate": 3.11360306122895e-05, "loss": 0.0045, "step": 38520 }, { "action_loss": 0.006730742286890745, "epoch": 34.64028776978417, "step": 38520 }, { "epoch": 34.64928057553957, "grad_norm": 0.18324629962444305, "learning_rate": 3.1110512186214975e-05, "loss": 0.0036, "step": 38530 }, { "action_loss": 0.0019353698007762432, "epoch": 34.64928057553957, "step": 38530 }, { "epoch": 34.65827338129496, "grad_norm": 0.22481614351272583, "learning_rate": 3.1084999498268095e-05, "loss": 0.0034, "step": 38540 }, { "action_loss": 0.010696329176425934, "epoch": 34.65827338129496, "step": 38540 }, { "epoch": 34.66726618705036, "grad_norm": 0.14135214686393738, "learning_rate": 3.1059492556198934e-05, "loss": 0.0048, "step": 38550 }, { "action_loss": 0.0036443464923650026, "epoch": 34.66726618705036, "step": 38550 }, { "epoch": 34.67625899280576, "grad_norm": 0.2691732943058014, "learning_rate": 3.103399136775586e-05, "loss": 0.0057, "step": 38560 }, { "action_loss": 0.00559326633810997, "epoch": 34.67625899280576, "step": 38560 }, { "epoch": 34.685251798561154, "grad_norm": 0.14753949642181396, "learning_rate": 3.100849594068541e-05, "loss": 0.0045, "step": 38570 }, { "action_loss": 0.002846869407221675, "epoch": 34.685251798561154, "step": 38570 }, { "epoch": 34.694244604316545, "grad_norm": 0.2071831375360489, "learning_rate": 3.0983006282732484e-05, "loss": 0.0036, "step": 38580 }, { "action_loss": 0.0018440483836457133, "epoch": 34.694244604316545, "step": 38580 }, { "epoch": 34.70323741007194, "grad_norm": 0.16487710177898407, "learning_rate": 3.0957522401640116e-05, "loss": 0.0041, "step": 38590 }, { "action_loss": 0.002072876086458564, "epoch": 34.70323741007194, "step": 38590 }, { "epoch": 34.71223021582734, "grad_norm": 0.12599192559719086, "learning_rate": 3.0932044305149645e-05, "loss": 0.0029, "step": 38600 }, { "action_loss": 0.0021736181806772947, "epoch": 34.71223021582734, "step": 38600 }, { "epoch": 34.72122302158273, "grad_norm": 0.11604104191064835, "learning_rate": 3.090657200100068e-05, "loss": 0.0027, "step": 38610 }, { "action_loss": 0.0023358312901109457, "epoch": 34.72122302158273, "step": 38610 }, { "epoch": 34.73021582733813, "grad_norm": 0.11362652480602264, "learning_rate": 3.088110549693099e-05, "loss": 0.0038, "step": 38620 }, { "action_loss": 0.004103093408048153, "epoch": 34.73021582733813, "step": 38620 }, { "epoch": 34.739208633093526, "grad_norm": 0.09376366436481476, "learning_rate": 3.085564480067667e-05, "loss": 0.0036, "step": 38630 }, { "action_loss": 0.001043123658746481, "epoch": 34.739208633093526, "step": 38630 }, { "epoch": 34.74820143884892, "grad_norm": 0.07134327292442322, "learning_rate": 3.0830189919971955e-05, "loss": 0.003, "step": 38640 }, { "action_loss": 0.0018539760494604707, "epoch": 34.74820143884892, "step": 38640 }, { "epoch": 34.757194244604314, "grad_norm": 0.17400392889976501, "learning_rate": 3.080474086254939e-05, "loss": 0.0049, "step": 38650 }, { "action_loss": 0.0022982743103057146, "epoch": 34.757194244604314, "step": 38650 }, { "epoch": 34.76618705035971, "grad_norm": 0.1233682706952095, "learning_rate": 3.077929763613975e-05, "loss": 0.0046, "step": 38660 }, { "action_loss": 0.002870668424293399, "epoch": 34.76618705035971, "step": 38660 }, { "epoch": 34.77517985611511, "grad_norm": 0.11907074600458145, "learning_rate": 3.075386024847198e-05, "loss": 0.006, "step": 38670 }, { "action_loss": 0.005081409588456154, "epoch": 34.77517985611511, "step": 38670 }, { "epoch": 34.78417266187051, "grad_norm": 0.10275634378194809, "learning_rate": 3.072842870727331e-05, "loss": 0.0034, "step": 38680 }, { "action_loss": 0.0008126001339405775, "epoch": 34.78417266187051, "step": 38680 }, { "epoch": 34.7931654676259, "grad_norm": 0.12449565529823303, "learning_rate": 3.070300302026916e-05, "loss": 0.0031, "step": 38690 }, { "action_loss": 0.0018684616079553962, "epoch": 34.7931654676259, "step": 38690 }, { "epoch": 34.802158273381295, "grad_norm": 0.1932569146156311, "learning_rate": 3.067758319518318e-05, "loss": 0.0053, "step": 38700 }, { "action_loss": 0.014216075651347637, "epoch": 34.802158273381295, "step": 38700 }, { "epoch": 34.81115107913669, "grad_norm": 0.15475967526435852, "learning_rate": 3.065216923973725e-05, "loss": 0.0038, "step": 38710 }, { "action_loss": 0.002000020584091544, "epoch": 34.81115107913669, "step": 38710 }, { "epoch": 34.82014388489208, "grad_norm": 0.11115572601556778, "learning_rate": 3.062676116165145e-05, "loss": 0.0029, "step": 38720 }, { "action_loss": 0.00486650550737977, "epoch": 34.82014388489208, "step": 38720 }, { "epoch": 34.82913669064748, "grad_norm": 0.18098655343055725, "learning_rate": 3.06013589686441e-05, "loss": 0.0031, "step": 38730 }, { "action_loss": 0.001540353405289352, "epoch": 34.82913669064748, "step": 38730 }, { "epoch": 34.83812949640288, "grad_norm": 0.10392223298549652, "learning_rate": 3.05759626684317e-05, "loss": 0.0038, "step": 38740 }, { "action_loss": 0.004921941086649895, "epoch": 34.83812949640288, "step": 38740 }, { "epoch": 34.847122302158276, "grad_norm": 0.0959218293428421, "learning_rate": 3.055057226872896e-05, "loss": 0.0038, "step": 38750 }, { "action_loss": 0.013155599124729633, "epoch": 34.847122302158276, "step": 38750 }, { "epoch": 34.856115107913666, "grad_norm": 0.08018144220113754, "learning_rate": 3.052518777724887e-05, "loss": 0.0049, "step": 38760 }, { "action_loss": 0.0035738013684749603, "epoch": 34.856115107913666, "step": 38760 }, { "epoch": 34.865107913669064, "grad_norm": 0.16481263935565948, "learning_rate": 3.04998092017025e-05, "loss": 0.0029, "step": 38770 }, { "action_loss": 0.008117669261991978, "epoch": 34.865107913669064, "step": 38770 }, { "epoch": 34.87410071942446, "grad_norm": 0.14719004929065704, "learning_rate": 3.0474436549799246e-05, "loss": 0.0064, "step": 38780 }, { "action_loss": 0.001672096666879952, "epoch": 34.87410071942446, "step": 38780 }, { "epoch": 34.88309352517986, "grad_norm": 0.12306924909353256, "learning_rate": 3.044906982924661e-05, "loss": 0.0042, "step": 38790 }, { "action_loss": 0.004353325814008713, "epoch": 34.88309352517986, "step": 38790 }, { "epoch": 34.89208633093525, "grad_norm": 0.1523303985595703, "learning_rate": 3.0423709047750337e-05, "loss": 0.0036, "step": 38800 }, { "action_loss": 0.0020897153299301863, "epoch": 34.89208633093525, "step": 38800 }, { "epoch": 34.90107913669065, "grad_norm": 0.2570861279964447, "learning_rate": 3.03983542130144e-05, "loss": 0.0057, "step": 38810 }, { "action_loss": 0.005094016902148724, "epoch": 34.90107913669065, "step": 38810 }, { "epoch": 34.910071942446045, "grad_norm": 0.23128154873847961, "learning_rate": 3.0373005332740877e-05, "loss": 0.0067, "step": 38820 }, { "action_loss": 0.007649517152458429, "epoch": 34.910071942446045, "step": 38820 }, { "epoch": 34.919064748201436, "grad_norm": 0.11191292852163315, "learning_rate": 3.034766241463013e-05, "loss": 0.0036, "step": 38830 }, { "action_loss": 0.0011726135853677988, "epoch": 34.919064748201436, "step": 38830 }, { "epoch": 34.92805755395683, "grad_norm": 0.1720207929611206, "learning_rate": 3.032232546638064e-05, "loss": 0.0032, "step": 38840 }, { "action_loss": 0.00439508818089962, "epoch": 34.92805755395683, "step": 38840 }, { "epoch": 34.93705035971223, "grad_norm": 0.2224401980638504, "learning_rate": 3.0296994495689114e-05, "loss": 0.0036, "step": 38850 }, { "action_loss": 0.0063585820607841015, "epoch": 34.93705035971223, "step": 38850 }, { "epoch": 34.94604316546763, "grad_norm": 0.147141233086586, "learning_rate": 3.0271669510250444e-05, "loss": 0.0039, "step": 38860 }, { "action_loss": 0.0015339450910687447, "epoch": 34.94604316546763, "step": 38860 }, { "epoch": 34.95503597122302, "grad_norm": 0.24899636209011078, "learning_rate": 3.024635051775766e-05, "loss": 0.0038, "step": 38870 }, { "action_loss": 0.002501842798665166, "epoch": 34.95503597122302, "step": 38870 }, { "epoch": 34.96402877697842, "grad_norm": 0.19657891988754272, "learning_rate": 3.022103752590205e-05, "loss": 0.0057, "step": 38880 }, { "action_loss": 0.004128696396946907, "epoch": 34.96402877697842, "step": 38880 }, { "epoch": 34.973021582733814, "grad_norm": 0.26436540484428406, "learning_rate": 3.0195730542372992e-05, "loss": 0.005, "step": 38890 }, { "action_loss": 0.00480616046115756, "epoch": 34.973021582733814, "step": 38890 }, { "epoch": 34.98201438848921, "grad_norm": 0.11205265671014786, "learning_rate": 3.0170429574858084e-05, "loss": 0.0028, "step": 38900 }, { "action_loss": 0.009277667850255966, "epoch": 34.98201438848921, "step": 38900 }, { "epoch": 34.9910071942446, "grad_norm": 0.10727319121360779, "learning_rate": 3.0145134631043127e-05, "loss": 0.0041, "step": 38910 }, { "action_loss": 0.00292029045522213, "epoch": 34.9910071942446, "step": 38910 }, { "epoch": 35.0, "grad_norm": 0.10665540397167206, "learning_rate": 3.0119845718612018e-05, "loss": 0.0035, "step": 38920 }, { "action_loss": 0.0035877812188118696, "epoch": 35.0, "step": 38920 }, { "epoch": 35.0089928057554, "grad_norm": 0.3377535045146942, "learning_rate": 3.009456284524688e-05, "loss": 0.0053, "step": 38930 }, { "action_loss": 0.005970244761556387, "epoch": 35.0089928057554, "step": 38930 }, { "epoch": 35.01798561151079, "grad_norm": 0.09635712206363678, "learning_rate": 3.0069286018627967e-05, "loss": 0.0046, "step": 38940 }, { "action_loss": 0.0038659172132611275, "epoch": 35.01798561151079, "step": 38940 }, { "epoch": 35.026978417266186, "grad_norm": 0.10235170274972916, "learning_rate": 3.0044015246433743e-05, "loss": 0.0067, "step": 38950 }, { "action_loss": 0.011386769823729992, "epoch": 35.026978417266186, "step": 38950 }, { "epoch": 35.03597122302158, "grad_norm": 0.19558191299438477, "learning_rate": 3.0018750536340755e-05, "loss": 0.0058, "step": 38960 }, { "action_loss": 0.02015441469848156, "epoch": 35.03597122302158, "step": 38960 }, { "epoch": 35.04496402877698, "grad_norm": 0.1055746003985405, "learning_rate": 2.999349189602378e-05, "loss": 0.0098, "step": 38970 }, { "action_loss": 0.0013885889202356339, "epoch": 35.04496402877698, "step": 38970 }, { "epoch": 35.05395683453237, "grad_norm": 0.14020590484142303, "learning_rate": 2.9968239333155733e-05, "loss": 0.0034, "step": 38980 }, { "action_loss": 0.010025396943092346, "epoch": 35.05395683453237, "step": 38980 }, { "epoch": 35.06294964028777, "grad_norm": 0.07887320965528488, "learning_rate": 2.994299285540767e-05, "loss": 0.0043, "step": 38990 }, { "action_loss": 0.0013028234243392944, "epoch": 35.06294964028777, "step": 38990 }, { "epoch": 35.07194244604317, "grad_norm": 0.15232770144939423, "learning_rate": 2.9917752470448813e-05, "loss": 0.0044, "step": 39000 }, { "action_loss": 0.0014638664433732629, "epoch": 35.07194244604317, "step": 39000 }, { "epoch": 35.080935251798564, "grad_norm": 0.07135417312383652, "learning_rate": 2.9892518185946495e-05, "loss": 0.0033, "step": 39010 }, { "action_loss": 0.0023984818253666162, "epoch": 35.080935251798564, "step": 39010 }, { "epoch": 35.089928057553955, "grad_norm": 0.09434060007333755, "learning_rate": 2.986729000956624e-05, "loss": 0.006, "step": 39020 }, { "action_loss": 0.004258539993315935, "epoch": 35.089928057553955, "step": 39020 }, { "epoch": 35.09892086330935, "grad_norm": 0.07387889921665192, "learning_rate": 2.9842067948971736e-05, "loss": 0.0034, "step": 39030 }, { "action_loss": 0.0016552567249163985, "epoch": 35.09892086330935, "step": 39030 }, { "epoch": 35.10791366906475, "grad_norm": 0.08609019964933395, "learning_rate": 2.9816852011824727e-05, "loss": 0.0031, "step": 39040 }, { "action_loss": 0.0010405649663880467, "epoch": 35.10791366906475, "step": 39040 }, { "epoch": 35.11690647482014, "grad_norm": 0.10602539777755737, "learning_rate": 2.979164220578519e-05, "loss": 0.0044, "step": 39050 }, { "action_loss": 0.0011354846647009254, "epoch": 35.11690647482014, "step": 39050 }, { "epoch": 35.12589928057554, "grad_norm": 0.1517791748046875, "learning_rate": 2.9766438538511165e-05, "loss": 0.0049, "step": 39060 }, { "action_loss": 0.0022347895428538322, "epoch": 35.12589928057554, "step": 39060 }, { "epoch": 35.134892086330936, "grad_norm": 0.20155662298202515, "learning_rate": 2.9741241017658873e-05, "loss": 0.0035, "step": 39070 }, { "action_loss": 0.0025889866519719362, "epoch": 35.134892086330936, "step": 39070 }, { "epoch": 35.143884892086334, "grad_norm": 0.11352328956127167, "learning_rate": 2.971604965088267e-05, "loss": 0.0036, "step": 39080 }, { "action_loss": 0.003647106932476163, "epoch": 35.143884892086334, "step": 39080 }, { "epoch": 35.152877697841724, "grad_norm": 0.1834889054298401, "learning_rate": 2.9690864445835008e-05, "loss": 0.0041, "step": 39090 }, { "action_loss": 0.005811700131744146, "epoch": 35.152877697841724, "step": 39090 }, { "epoch": 35.16187050359712, "grad_norm": 0.12221163511276245, "learning_rate": 2.966568541016651e-05, "loss": 0.0113, "step": 39100 }, { "action_loss": 0.0010909244883805513, "epoch": 35.16187050359712, "step": 39100 }, { "epoch": 35.17086330935252, "grad_norm": 0.2287237048149109, "learning_rate": 2.9640512551525867e-05, "loss": 0.0077, "step": 39110 }, { "action_loss": 0.0018561995821073651, "epoch": 35.17086330935252, "step": 39110 }, { "epoch": 35.17985611510792, "grad_norm": 0.16588862240314484, "learning_rate": 2.961534587755995e-05, "loss": 0.0039, "step": 39120 }, { "action_loss": 0.0027364089619368315, "epoch": 35.17985611510792, "step": 39120 }, { "epoch": 35.18884892086331, "grad_norm": 0.20973773300647736, "learning_rate": 2.959018539591375e-05, "loss": 0.0054, "step": 39130 }, { "action_loss": 0.0034142460208386183, "epoch": 35.18884892086331, "step": 39130 }, { "epoch": 35.197841726618705, "grad_norm": 0.17682397365570068, "learning_rate": 2.9565031114230325e-05, "loss": 0.0068, "step": 39140 }, { "action_loss": 0.0029887023847550154, "epoch": 35.197841726618705, "step": 39140 }, { "epoch": 35.2068345323741, "grad_norm": 0.22823752462863922, "learning_rate": 2.9539883040150895e-05, "loss": 0.0063, "step": 39150 }, { "action_loss": 0.001972832949832082, "epoch": 35.2068345323741, "step": 39150 }, { "epoch": 35.21582733812949, "grad_norm": 0.12877827882766724, "learning_rate": 2.9514741181314774e-05, "loss": 0.004, "step": 39160 }, { "action_loss": 0.004149691667407751, "epoch": 35.21582733812949, "step": 39160 }, { "epoch": 35.22482014388489, "grad_norm": 0.10428760945796967, "learning_rate": 2.94896055453594e-05, "loss": 0.0021, "step": 39170 }, { "action_loss": 0.0018191123381257057, "epoch": 35.22482014388489, "step": 39170 }, { "epoch": 35.23381294964029, "grad_norm": 0.13729619979858398, "learning_rate": 2.9464476139920332e-05, "loss": 0.0046, "step": 39180 }, { "action_loss": 0.0035081636160612106, "epoch": 35.23381294964029, "step": 39180 }, { "epoch": 35.242805755395686, "grad_norm": 0.11252397298812866, "learning_rate": 2.9439352972631186e-05, "loss": 0.0024, "step": 39190 }, { "action_loss": 0.0033708615228533745, "epoch": 35.242805755395686, "step": 39190 }, { "epoch": 35.25179856115108, "grad_norm": 0.11672496050596237, "learning_rate": 2.9414236051123757e-05, "loss": 0.0052, "step": 39200 }, { "action_loss": 0.0019289361080154777, "epoch": 35.25179856115108, "step": 39200 }, { "epoch": 35.260791366906474, "grad_norm": 0.30770158767700195, "learning_rate": 2.938912538302785e-05, "loss": 0.0037, "step": 39210 }, { "action_loss": 0.003617968177422881, "epoch": 35.260791366906474, "step": 39210 }, { "epoch": 35.26978417266187, "grad_norm": 0.10530151426792145, "learning_rate": 2.9364020975971464e-05, "loss": 0.0027, "step": 39220 }, { "action_loss": 0.0020340033806860447, "epoch": 35.26978417266187, "step": 39220 }, { "epoch": 35.27877697841727, "grad_norm": 0.09684733301401138, "learning_rate": 2.9338922837580657e-05, "loss": 0.0035, "step": 39230 }, { "action_loss": 0.004051284398883581, "epoch": 35.27877697841727, "step": 39230 }, { "epoch": 35.28776978417266, "grad_norm": 0.09955726563930511, "learning_rate": 2.931383097547955e-05, "loss": 0.0039, "step": 39240 }, { "action_loss": 0.006340695079416037, "epoch": 35.28776978417266, "step": 39240 }, { "epoch": 35.29676258992806, "grad_norm": 0.1462031453847885, "learning_rate": 2.928874539729043e-05, "loss": 0.0047, "step": 39250 }, { "action_loss": 0.002559739165008068, "epoch": 35.29676258992806, "step": 39250 }, { "epoch": 35.305755395683455, "grad_norm": 0.11976153403520584, "learning_rate": 2.926366611063358e-05, "loss": 0.0025, "step": 39260 }, { "action_loss": 0.004467617720365524, "epoch": 35.305755395683455, "step": 39260 }, { "epoch": 35.314748201438846, "grad_norm": 0.09377238899469376, "learning_rate": 2.9238593123127463e-05, "loss": 0.0038, "step": 39270 }, { "action_loss": 0.0018317686626687646, "epoch": 35.314748201438846, "step": 39270 }, { "epoch": 35.32374100719424, "grad_norm": 0.08526521921157837, "learning_rate": 2.9213526442388583e-05, "loss": 0.0024, "step": 39280 }, { "action_loss": 0.0018328037112951279, "epoch": 35.32374100719424, "step": 39280 }, { "epoch": 35.33273381294964, "grad_norm": 0.15683239698410034, "learning_rate": 2.9188466076031545e-05, "loss": 0.0022, "step": 39290 }, { "action_loss": 0.001900858711451292, "epoch": 35.33273381294964, "step": 39290 }, { "epoch": 35.34172661870504, "grad_norm": 0.0885600745677948, "learning_rate": 2.9163412031669012e-05, "loss": 0.0031, "step": 39300 }, { "action_loss": 0.004403526894748211, "epoch": 35.34172661870504, "step": 39300 }, { "epoch": 35.35071942446043, "grad_norm": 0.0706351101398468, "learning_rate": 2.913836431691175e-05, "loss": 0.005, "step": 39310 }, { "action_loss": 0.0007000166806392372, "epoch": 35.35071942446043, "step": 39310 }, { "epoch": 35.35971223021583, "grad_norm": 0.1407081037759781, "learning_rate": 2.9113322939368583e-05, "loss": 0.0046, "step": 39320 }, { "action_loss": 0.0016359499422833323, "epoch": 35.35971223021583, "step": 39320 }, { "epoch": 35.368705035971225, "grad_norm": 0.1265326887369156, "learning_rate": 2.9088287906646427e-05, "loss": 0.0047, "step": 39330 }, { "action_loss": 0.0035106493160128593, "epoch": 35.368705035971225, "step": 39330 }, { "epoch": 35.37769784172662, "grad_norm": 0.33206140995025635, "learning_rate": 2.906325922635024e-05, "loss": 0.005, "step": 39340 }, { "action_loss": 0.0009677723865024745, "epoch": 35.37769784172662, "step": 39340 }, { "epoch": 35.38669064748201, "grad_norm": 0.1468777060508728, "learning_rate": 2.903823690608313e-05, "loss": 0.0025, "step": 39350 }, { "action_loss": 0.00310962344519794, "epoch": 35.38669064748201, "step": 39350 }, { "epoch": 35.39568345323741, "grad_norm": 0.12568789720535278, "learning_rate": 2.9013220953446174e-05, "loss": 0.0032, "step": 39360 }, { "action_loss": 0.0020125440787523985, "epoch": 35.39568345323741, "step": 39360 }, { "epoch": 35.40467625899281, "grad_norm": 0.09386993944644928, "learning_rate": 2.8988211376038564e-05, "loss": 0.0027, "step": 39370 }, { "action_loss": 0.0018559045856818557, "epoch": 35.40467625899281, "step": 39370 }, { "epoch": 35.4136690647482, "grad_norm": 0.1729799062013626, "learning_rate": 2.8963208181457564e-05, "loss": 0.0039, "step": 39380 }, { "action_loss": 0.028141608461737633, "epoch": 35.4136690647482, "step": 39380 }, { "epoch": 35.422661870503596, "grad_norm": 0.09994859993457794, "learning_rate": 2.8938211377298453e-05, "loss": 0.0102, "step": 39390 }, { "action_loss": 0.0037664007395505905, "epoch": 35.422661870503596, "step": 39390 }, { "epoch": 35.431654676258994, "grad_norm": 0.1777755618095398, "learning_rate": 2.8913220971154652e-05, "loss": 0.0096, "step": 39400 }, { "action_loss": 0.006789745297282934, "epoch": 35.431654676258994, "step": 39400 }, { "epoch": 35.44064748201439, "grad_norm": 0.0728956013917923, "learning_rate": 2.888823697061753e-05, "loss": 0.0026, "step": 39410 }, { "action_loss": 0.00252911145798862, "epoch": 35.44064748201439, "step": 39410 }, { "epoch": 35.44964028776978, "grad_norm": 0.11084414273500443, "learning_rate": 2.8863259383276618e-05, "loss": 0.0043, "step": 39420 }, { "action_loss": 0.003995383623987436, "epoch": 35.44964028776978, "step": 39420 }, { "epoch": 35.45863309352518, "grad_norm": 0.1640557497739792, "learning_rate": 2.8838288216719395e-05, "loss": 0.0053, "step": 39430 }, { "action_loss": 0.0040168617852032185, "epoch": 35.45863309352518, "step": 39430 }, { "epoch": 35.46762589928058, "grad_norm": 0.1048629879951477, "learning_rate": 2.8813323478531484e-05, "loss": 0.0033, "step": 39440 }, { "action_loss": 0.00202618888579309, "epoch": 35.46762589928058, "step": 39440 }, { "epoch": 35.476618705035975, "grad_norm": 0.262109637260437, "learning_rate": 2.8788365176296496e-05, "loss": 0.0078, "step": 39450 }, { "action_loss": 0.0024054106324911118, "epoch": 35.476618705035975, "step": 39450 }, { "epoch": 35.485611510791365, "grad_norm": 0.1270199418067932, "learning_rate": 2.876341331759611e-05, "loss": 0.0024, "step": 39460 }, { "action_loss": 0.005949532147496939, "epoch": 35.485611510791365, "step": 39460 }, { "epoch": 35.49460431654676, "grad_norm": 0.10560697317123413, "learning_rate": 2.8738467910010036e-05, "loss": 0.0083, "step": 39470 }, { "action_loss": 0.00512034073472023, "epoch": 35.49460431654676, "step": 39470 }, { "epoch": 35.50359712230216, "grad_norm": 0.20565854012966156, "learning_rate": 2.8713528961116032e-05, "loss": 0.0037, "step": 39480 }, { "action_loss": 0.01074303314089775, "epoch": 35.50359712230216, "step": 39480 }, { "epoch": 35.51258992805755, "grad_norm": 0.1503971517086029, "learning_rate": 2.8688596478489875e-05, "loss": 0.0065, "step": 39490 }, { "action_loss": 0.0036193279083818197, "epoch": 35.51258992805755, "step": 39490 }, { "epoch": 35.52158273381295, "grad_norm": 0.14190736413002014, "learning_rate": 2.8663670469705434e-05, "loss": 0.0026, "step": 39500 }, { "action_loss": 0.011734425090253353, "epoch": 35.52158273381295, "step": 39500 }, { "epoch": 35.530575539568346, "grad_norm": 0.11354619264602661, "learning_rate": 2.8638750942334546e-05, "loss": 0.0054, "step": 39510 }, { "action_loss": 0.001271480112336576, "epoch": 35.530575539568346, "step": 39510 }, { "epoch": 35.539568345323744, "grad_norm": 0.15771427750587463, "learning_rate": 2.8613837903947115e-05, "loss": 0.0027, "step": 39520 }, { "action_loss": 0.0066843293607234955, "epoch": 35.539568345323744, "step": 39520 }, { "epoch": 35.548561151079134, "grad_norm": 0.16062656044960022, "learning_rate": 2.858893136211106e-05, "loss": 0.0047, "step": 39530 }, { "action_loss": 0.010101192630827427, "epoch": 35.548561151079134, "step": 39530 }, { "epoch": 35.55755395683453, "grad_norm": 0.18025921285152435, "learning_rate": 2.8564031324392315e-05, "loss": 0.0044, "step": 39540 }, { "action_loss": 0.0025556462351232767, "epoch": 35.55755395683453, "step": 39540 }, { "epoch": 35.56654676258993, "grad_norm": 0.14044125378131866, "learning_rate": 2.85391377983549e-05, "loss": 0.005, "step": 39550 }, { "action_loss": 0.0018913469975814223, "epoch": 35.56654676258993, "step": 39550 }, { "epoch": 35.57553956834532, "grad_norm": 0.16810238361358643, "learning_rate": 2.851425079156075e-05, "loss": 0.0063, "step": 39560 }, { "action_loss": 0.003631760599091649, "epoch": 35.57553956834532, "step": 39560 }, { "epoch": 35.58453237410072, "grad_norm": 0.2628825306892395, "learning_rate": 2.848937031156994e-05, "loss": 0.0046, "step": 39570 }, { "action_loss": 0.0016509584384039044, "epoch": 35.58453237410072, "step": 39570 }, { "epoch": 35.593525179856115, "grad_norm": 0.12951777875423431, "learning_rate": 2.846449636594044e-05, "loss": 0.0074, "step": 39580 }, { "action_loss": 0.0052654012106359005, "epoch": 35.593525179856115, "step": 39580 }, { "epoch": 35.60251798561151, "grad_norm": 0.07602233439683914, "learning_rate": 2.843962896222836e-05, "loss": 0.0032, "step": 39590 }, { "action_loss": 0.0025410086382180452, "epoch": 35.60251798561151, "step": 39590 }, { "epoch": 35.611510791366904, "grad_norm": 0.14001613855361938, "learning_rate": 2.8414768107987722e-05, "loss": 0.0038, "step": 39600 }, { "action_loss": 0.004393273964524269, "epoch": 35.611510791366904, "step": 39600 }, { "epoch": 35.6205035971223, "grad_norm": 0.22546744346618652, "learning_rate": 2.838991381077061e-05, "loss": 0.0051, "step": 39610 }, { "action_loss": 0.005446434020996094, "epoch": 35.6205035971223, "step": 39610 }, { "epoch": 35.6294964028777, "grad_norm": 0.14155389368534088, "learning_rate": 2.83650660781271e-05, "loss": 0.0032, "step": 39620 }, { "action_loss": 0.001286769867874682, "epoch": 35.6294964028777, "step": 39620 }, { "epoch": 35.638489208633096, "grad_norm": 0.06748964637517929, "learning_rate": 2.8340224917605285e-05, "loss": 0.0035, "step": 39630 }, { "action_loss": 0.002274435944855213, "epoch": 35.638489208633096, "step": 39630 }, { "epoch": 35.64748201438849, "grad_norm": 0.13534246385097504, "learning_rate": 2.831539033675122e-05, "loss": 0.005, "step": 39640 }, { "action_loss": 0.0008914985810406506, "epoch": 35.64748201438849, "step": 39640 }, { "epoch": 35.656474820143885, "grad_norm": 0.1409360021352768, "learning_rate": 2.8290562343109038e-05, "loss": 0.0075, "step": 39650 }, { "action_loss": 0.008834038861095905, "epoch": 35.656474820143885, "step": 39650 }, { "epoch": 35.66546762589928, "grad_norm": 0.117301344871521, "learning_rate": 2.826574094422082e-05, "loss": 0.0033, "step": 39660 }, { "action_loss": 0.0029144634027034044, "epoch": 35.66546762589928, "step": 39660 }, { "epoch": 35.67446043165468, "grad_norm": 0.08565407246351242, "learning_rate": 2.8240926147626645e-05, "loss": 0.0065, "step": 39670 }, { "action_loss": 0.0016995420446619391, "epoch": 35.67446043165468, "step": 39670 }, { "epoch": 35.68345323741007, "grad_norm": 0.17564894258975983, "learning_rate": 2.8216117960864586e-05, "loss": 0.0055, "step": 39680 }, { "action_loss": 0.00847961287945509, "epoch": 35.68345323741007, "step": 39680 }, { "epoch": 35.69244604316547, "grad_norm": 0.15938904881477356, "learning_rate": 2.8191316391470703e-05, "loss": 0.0045, "step": 39690 }, { "action_loss": 0.004263973329216242, "epoch": 35.69244604316547, "step": 39690 }, { "epoch": 35.701438848920866, "grad_norm": 0.1488962471485138, "learning_rate": 2.816652144697911e-05, "loss": 0.0073, "step": 39700 }, { "action_loss": 0.001226026681251824, "epoch": 35.701438848920866, "step": 39700 }, { "epoch": 35.710431654676256, "grad_norm": 0.13241735100746155, "learning_rate": 2.8141733134921783e-05, "loss": 0.0066, "step": 39710 }, { "action_loss": 0.0029950591269880533, "epoch": 35.710431654676256, "step": 39710 }, { "epoch": 35.719424460431654, "grad_norm": 0.19568264484405518, "learning_rate": 2.811695146282884e-05, "loss": 0.005, "step": 39720 }, { "action_loss": 0.0014853201573714614, "epoch": 35.719424460431654, "step": 39720 }, { "epoch": 35.72841726618705, "grad_norm": 0.25234705209732056, "learning_rate": 2.8092176438228212e-05, "loss": 0.0045, "step": 39730 }, { "action_loss": 0.0029680654406547546, "epoch": 35.72841726618705, "step": 39730 }, { "epoch": 35.73741007194245, "grad_norm": 0.1321662962436676, "learning_rate": 2.806740806864598e-05, "loss": 0.0046, "step": 39740 }, { "action_loss": 0.0074003622867167, "epoch": 35.73741007194245, "step": 39740 }, { "epoch": 35.74640287769784, "grad_norm": 0.157197967171669, "learning_rate": 2.804264636160604e-05, "loss": 0.0037, "step": 39750 }, { "action_loss": 0.005381097551435232, "epoch": 35.74640287769784, "step": 39750 }, { "epoch": 35.75539568345324, "grad_norm": 0.07085539400577545, "learning_rate": 2.8017891324630402e-05, "loss": 0.0028, "step": 39760 }, { "action_loss": 0.003693800186738372, "epoch": 35.75539568345324, "step": 39760 }, { "epoch": 35.764388489208635, "grad_norm": 0.09597919136285782, "learning_rate": 2.7993142965238976e-05, "loss": 0.0062, "step": 39770 }, { "action_loss": 0.0017387730767950416, "epoch": 35.764388489208635, "step": 39770 }, { "epoch": 35.773381294964025, "grad_norm": 0.10536296665668488, "learning_rate": 2.7968401290949665e-05, "loss": 0.0037, "step": 39780 }, { "action_loss": 0.006596583407372236, "epoch": 35.773381294964025, "step": 39780 }, { "epoch": 35.78237410071942, "grad_norm": 0.14356780052185059, "learning_rate": 2.7943666309278328e-05, "loss": 0.0033, "step": 39790 }, { "action_loss": 0.002742405980825424, "epoch": 35.78237410071942, "step": 39790 }, { "epoch": 35.79136690647482, "grad_norm": 0.10158092528581619, "learning_rate": 2.7918938027738783e-05, "loss": 0.0051, "step": 39800 }, { "action_loss": 0.00149652233812958, "epoch": 35.79136690647482, "step": 39800 }, { "epoch": 35.80035971223022, "grad_norm": 0.10485301911830902, "learning_rate": 2.789421645384287e-05, "loss": 0.0022, "step": 39810 }, { "action_loss": 0.03776917606592178, "epoch": 35.80035971223022, "step": 39810 }, { "epoch": 35.80935251798561, "grad_norm": 0.19090178608894348, "learning_rate": 2.786950159510032e-05, "loss": 0.0057, "step": 39820 }, { "action_loss": 0.004984199535101652, "epoch": 35.80935251798561, "step": 39820 }, { "epoch": 35.818345323741006, "grad_norm": 0.13195718824863434, "learning_rate": 2.7844793459018876e-05, "loss": 0.0039, "step": 39830 }, { "action_loss": 0.0019025589572265744, "epoch": 35.818345323741006, "step": 39830 }, { "epoch": 35.827338129496404, "grad_norm": 0.11857205629348755, "learning_rate": 2.7820092053104195e-05, "loss": 0.0032, "step": 39840 }, { "action_loss": 0.0029382705688476562, "epoch": 35.827338129496404, "step": 39840 }, { "epoch": 35.8363309352518, "grad_norm": 0.06865575909614563, "learning_rate": 2.7795397384859933e-05, "loss": 0.0024, "step": 39850 }, { "action_loss": 0.006848613265901804, "epoch": 35.8363309352518, "step": 39850 }, { "epoch": 35.84532374100719, "grad_norm": 0.102997325360775, "learning_rate": 2.7770709461787638e-05, "loss": 0.0028, "step": 39860 }, { "action_loss": 0.001436841208487749, "epoch": 35.84532374100719, "step": 39860 }, { "epoch": 35.85431654676259, "grad_norm": 0.09619610011577606, "learning_rate": 2.7746028291386915e-05, "loss": 0.0023, "step": 39870 }, { "action_loss": 0.001260741613805294, "epoch": 35.85431654676259, "step": 39870 }, { "epoch": 35.86330935251799, "grad_norm": 0.16712455451488495, "learning_rate": 2.772135388115519e-05, "loss": 0.0041, "step": 39880 }, { "action_loss": 0.0018963301554322243, "epoch": 35.86330935251799, "step": 39880 }, { "epoch": 35.87230215827338, "grad_norm": 0.07415693253278732, "learning_rate": 2.7696686238587945e-05, "loss": 0.0039, "step": 39890 }, { "action_loss": 0.005628745537251234, "epoch": 35.87230215827338, "step": 39890 }, { "epoch": 35.881294964028775, "grad_norm": 0.11634203046560287, "learning_rate": 2.7672025371178505e-05, "loss": 0.0047, "step": 39900 }, { "action_loss": 0.0013394365087151527, "epoch": 35.881294964028775, "step": 39900 }, { "epoch": 35.89028776978417, "grad_norm": 0.19285719096660614, "learning_rate": 2.7647371286418238e-05, "loss": 0.0031, "step": 39910 }, { "action_loss": 0.005019925069063902, "epoch": 35.89028776978417, "step": 39910 }, { "epoch": 35.89928057553957, "grad_norm": 0.1287764012813568, "learning_rate": 2.762272399179639e-05, "loss": 0.0043, "step": 39920 }, { "action_loss": 0.005049928557127714, "epoch": 35.89928057553957, "step": 39920 }, { "epoch": 35.90827338129496, "grad_norm": 0.15538819134235382, "learning_rate": 2.7598083494800154e-05, "loss": 0.0042, "step": 39930 }, { "action_loss": 0.011720557697117329, "epoch": 35.90827338129496, "step": 39930 }, { "epoch": 35.91726618705036, "grad_norm": 0.14549997448921204, "learning_rate": 2.7573449802914664e-05, "loss": 0.005, "step": 39940 }, { "action_loss": 0.0031718153040856123, "epoch": 35.91726618705036, "step": 39940 }, { "epoch": 35.92625899280576, "grad_norm": 0.13550232350826263, "learning_rate": 2.7548822923622964e-05, "loss": 0.0027, "step": 39950 }, { "action_loss": 0.0022067795507609844, "epoch": 35.92625899280576, "step": 39950 }, { "epoch": 35.935251798561154, "grad_norm": 0.11993009597063065, "learning_rate": 2.752420286440609e-05, "loss": 0.004, "step": 39960 }, { "action_loss": 0.001111661666072905, "epoch": 35.935251798561154, "step": 39960 }, { "epoch": 35.944244604316545, "grad_norm": 0.1761402189731598, "learning_rate": 2.749958963274295e-05, "loss": 0.0045, "step": 39970 }, { "action_loss": 0.003941040951758623, "epoch": 35.944244604316545, "step": 39970 }, { "epoch": 35.95323741007194, "grad_norm": 0.17382799088954926, "learning_rate": 2.747498323611039e-05, "loss": 0.0047, "step": 39980 }, { "action_loss": 0.0009996541775763035, "epoch": 35.95323741007194, "step": 39980 }, { "epoch": 35.96223021582734, "grad_norm": 0.14125435054302216, "learning_rate": 2.7450383681983184e-05, "loss": 0.0028, "step": 39990 }, { "action_loss": 0.0039018522948026657, "epoch": 35.96223021582734, "step": 39990 }, { "epoch": 35.97122302158273, "grad_norm": 0.15252472460269928, "learning_rate": 2.742579097783403e-05, "loss": 0.0038, "step": 40000 } ], "logging_steps": 10, "max_steps": 60000, "num_input_tokens_seen": 0, "num_train_epochs": 54, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }