| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9893190921228303, | |
| "eval_steps": 500, | |
| "global_step": 747, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004005340453938585, | |
| "grad_norm": 12.274865344598787, | |
| "learning_rate": 0.0, | |
| "loss": 0.8646, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00801068090787717, | |
| "grad_norm": 12.535290073417938, | |
| "learning_rate": 1.3333333333333336e-07, | |
| "loss": 0.8676, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.012016021361815754, | |
| "grad_norm": 13.07061199884287, | |
| "learning_rate": 2.666666666666667e-07, | |
| "loss": 0.8902, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01602136181575434, | |
| "grad_norm": 12.267049803439043, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 0.8587, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.020026702269692925, | |
| "grad_norm": 11.897699285802622, | |
| "learning_rate": 5.333333333333335e-07, | |
| "loss": 0.8435, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02403204272363151, | |
| "grad_norm": 13.076081077633422, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 0.848, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.028037383177570093, | |
| "grad_norm": 12.354188617098337, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.8607, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.03204272363150868, | |
| "grad_norm": 11.607524627207871, | |
| "learning_rate": 9.333333333333334e-07, | |
| "loss": 0.8296, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.036048064085447265, | |
| "grad_norm": 11.787571525976068, | |
| "learning_rate": 1.066666666666667e-06, | |
| "loss": 0.8279, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04005340453938585, | |
| "grad_norm": 10.854034800032643, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.8255, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.044058744993324434, | |
| "grad_norm": 10.331302172366403, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 0.8452, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.04806408544726302, | |
| "grad_norm": 10.51455049392686, | |
| "learning_rate": 1.4666666666666669e-06, | |
| "loss": 0.8477, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0520694259012016, | |
| "grad_norm": 6.804734157169537, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 0.702, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.056074766355140186, | |
| "grad_norm": 6.630143554129149, | |
| "learning_rate": 1.7333333333333336e-06, | |
| "loss": 0.716, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06008010680907877, | |
| "grad_norm": 5.650387053647136, | |
| "learning_rate": 1.8666666666666669e-06, | |
| "loss": 0.6831, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06408544726301736, | |
| "grad_norm": 5.847220094115504, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.7188, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.06809078771695594, | |
| "grad_norm": 2.7400023117365913, | |
| "learning_rate": 2.133333333333334e-06, | |
| "loss": 0.6409, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07209612817089453, | |
| "grad_norm": 2.2018255790223518, | |
| "learning_rate": 2.266666666666667e-06, | |
| "loss": 0.539, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.07610146862483311, | |
| "grad_norm": 2.392641868923764, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.6092, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0801068090787717, | |
| "grad_norm": 2.1787060198095847, | |
| "learning_rate": 2.5333333333333338e-06, | |
| "loss": 0.6169, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08411214953271028, | |
| "grad_norm": 1.8580036959151014, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.5581, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.08811748998664887, | |
| "grad_norm": 1.8125640693516234, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 0.5566, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09212283044058744, | |
| "grad_norm": 1.7280121022360342, | |
| "learning_rate": 2.9333333333333338e-06, | |
| "loss": 0.5672, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.09612817089452604, | |
| "grad_norm": 1.8991117514168228, | |
| "learning_rate": 3.066666666666667e-06, | |
| "loss": 0.5354, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10013351134846461, | |
| "grad_norm": 1.6356492105183125, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 0.5275, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1041388518024032, | |
| "grad_norm": 1.6267128477825465, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.5338, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1081441922563418, | |
| "grad_norm": 1.404686847432176, | |
| "learning_rate": 3.4666666666666672e-06, | |
| "loss": 0.5197, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.11214953271028037, | |
| "grad_norm": 1.303663556401033, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 0.5308, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11615487316421896, | |
| "grad_norm": 1.1046386263286005, | |
| "learning_rate": 3.7333333333333337e-06, | |
| "loss": 0.5012, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.12016021361815754, | |
| "grad_norm": 1.0758105382558327, | |
| "learning_rate": 3.866666666666667e-06, | |
| "loss": 0.4804, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12416555407209613, | |
| "grad_norm": 0.889059378144954, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.4571, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.12817089452603472, | |
| "grad_norm": 0.9541992345873649, | |
| "learning_rate": 4.133333333333333e-06, | |
| "loss": 0.4188, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1321762349799733, | |
| "grad_norm": 0.9939649352643045, | |
| "learning_rate": 4.266666666666668e-06, | |
| "loss": 0.4638, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.13618157543391188, | |
| "grad_norm": 1.0368182385408335, | |
| "learning_rate": 4.4e-06, | |
| "loss": 0.4594, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.14018691588785046, | |
| "grad_norm": 1.0252643282112182, | |
| "learning_rate": 4.533333333333334e-06, | |
| "loss": 0.4349, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.14419225634178906, | |
| "grad_norm": 0.8930734240919034, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 0.4105, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.14819759679572764, | |
| "grad_norm": 0.8638620093928763, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.444, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.15220293724966621, | |
| "grad_norm": 0.8266472764867793, | |
| "learning_rate": 4.933333333333334e-06, | |
| "loss": 0.4264, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.15620827770360482, | |
| "grad_norm": 0.7587973597324337, | |
| "learning_rate": 5.0666666666666676e-06, | |
| "loss": 0.4153, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1602136181575434, | |
| "grad_norm": 0.7046790303627571, | |
| "learning_rate": 5.2e-06, | |
| "loss": 0.3968, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16421895861148197, | |
| "grad_norm": 0.7828655737674856, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 0.425, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.16822429906542055, | |
| "grad_norm": 0.7970939916520573, | |
| "learning_rate": 5.466666666666667e-06, | |
| "loss": 0.4055, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.17222963951935916, | |
| "grad_norm": 0.6788619839771596, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 0.3942, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.17623497997329773, | |
| "grad_norm": 0.6213101486173681, | |
| "learning_rate": 5.733333333333334e-06, | |
| "loss": 0.4008, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1802403204272363, | |
| "grad_norm": 0.5942610468735896, | |
| "learning_rate": 5.8666666666666675e-06, | |
| "loss": 0.3918, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.1842456608811749, | |
| "grad_norm": 0.661249969118244, | |
| "learning_rate": 6e-06, | |
| "loss": 0.3713, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1882510013351135, | |
| "grad_norm": 0.6165605112645042, | |
| "learning_rate": 6.133333333333334e-06, | |
| "loss": 0.3695, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.19225634178905207, | |
| "grad_norm": 0.6418004850122087, | |
| "learning_rate": 6.266666666666668e-06, | |
| "loss": 0.3855, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.19626168224299065, | |
| "grad_norm": 0.671209019626683, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 0.4228, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.20026702269692923, | |
| "grad_norm": 0.6303030288370243, | |
| "learning_rate": 6.533333333333334e-06, | |
| "loss": 0.3711, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20427236315086783, | |
| "grad_norm": 0.6417652044922048, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.389, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2082777036048064, | |
| "grad_norm": 0.572170970965431, | |
| "learning_rate": 6.800000000000001e-06, | |
| "loss": 0.3795, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.21228304405874499, | |
| "grad_norm": 0.5711694232400057, | |
| "learning_rate": 6.9333333333333344e-06, | |
| "loss": 0.3689, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2162883845126836, | |
| "grad_norm": 0.5910040436075836, | |
| "learning_rate": 7.066666666666667e-06, | |
| "loss": 0.365, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.22029372496662217, | |
| "grad_norm": 0.6284207342849625, | |
| "learning_rate": 7.2000000000000005e-06, | |
| "loss": 0.4132, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.22429906542056074, | |
| "grad_norm": 0.5849289722490485, | |
| "learning_rate": 7.333333333333333e-06, | |
| "loss": 0.373, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.22830440587449932, | |
| "grad_norm": 0.6341921136746668, | |
| "learning_rate": 7.4666666666666675e-06, | |
| "loss": 0.3918, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.23230974632843793, | |
| "grad_norm": 0.5938896188604564, | |
| "learning_rate": 7.600000000000001e-06, | |
| "loss": 0.3663, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2363150867823765, | |
| "grad_norm": 0.5821270563686713, | |
| "learning_rate": 7.733333333333334e-06, | |
| "loss": 0.3465, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.24032042723631508, | |
| "grad_norm": 0.5958193467288128, | |
| "learning_rate": 7.866666666666667e-06, | |
| "loss": 0.3619, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24432576769025366, | |
| "grad_norm": 0.5778869298012563, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.3501, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.24833110814419226, | |
| "grad_norm": 0.5809265935247063, | |
| "learning_rate": 8.133333333333334e-06, | |
| "loss": 0.3593, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2523364485981308, | |
| "grad_norm": 0.5301298964648872, | |
| "learning_rate": 8.266666666666667e-06, | |
| "loss": 0.3262, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.25634178905206945, | |
| "grad_norm": 0.6073029142771318, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 0.3458, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.260347129506008, | |
| "grad_norm": 0.5862505044336555, | |
| "learning_rate": 8.533333333333335e-06, | |
| "loss": 0.3444, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2643524699599466, | |
| "grad_norm": 0.5545311112728927, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 0.379, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2683578104138852, | |
| "grad_norm": 0.5979912535625811, | |
| "learning_rate": 8.8e-06, | |
| "loss": 0.3582, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.27236315086782376, | |
| "grad_norm": 0.5727967554965969, | |
| "learning_rate": 8.933333333333333e-06, | |
| "loss": 0.3428, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.27636849132176233, | |
| "grad_norm": 0.6017340630111007, | |
| "learning_rate": 9.066666666666667e-06, | |
| "loss": 0.3587, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2803738317757009, | |
| "grad_norm": 0.5498581806098397, | |
| "learning_rate": 9.200000000000002e-06, | |
| "loss": 0.3567, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.28437917222963954, | |
| "grad_norm": 0.5526640416700183, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.3337, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2883845126835781, | |
| "grad_norm": 0.5492315682122837, | |
| "learning_rate": 9.466666666666667e-06, | |
| "loss": 0.3486, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2923898531375167, | |
| "grad_norm": 0.5494699596828775, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.3374, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.2963951935914553, | |
| "grad_norm": 0.630131268447689, | |
| "learning_rate": 9.733333333333334e-06, | |
| "loss": 0.3568, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.30040053404539385, | |
| "grad_norm": 0.6336383497338373, | |
| "learning_rate": 9.866666666666668e-06, | |
| "loss": 0.3616, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.30440587449933243, | |
| "grad_norm": 0.5624776217319135, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3505, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.308411214953271, | |
| "grad_norm": 0.5899336003315098, | |
| "learning_rate": 9.999945361292553e-06, | |
| "loss": 0.3576, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.31241655540720964, | |
| "grad_norm": 0.6756783302903452, | |
| "learning_rate": 9.999781446364366e-06, | |
| "loss": 0.3519, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3164218958611482, | |
| "grad_norm": 0.5644425121126243, | |
| "learning_rate": 9.999508258797876e-06, | |
| "loss": 0.3164, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3204272363150868, | |
| "grad_norm": 0.587830154954018, | |
| "learning_rate": 9.999125804563732e-06, | |
| "loss": 0.3268, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.32443257676902537, | |
| "grad_norm": 0.5884963318825209, | |
| "learning_rate": 9.998634092020659e-06, | |
| "loss": 0.345, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.32843791722296395, | |
| "grad_norm": 0.6887604600916913, | |
| "learning_rate": 9.998033131915266e-06, | |
| "loss": 0.3803, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3324432576769025, | |
| "grad_norm": 0.5384174093778301, | |
| "learning_rate": 9.997322937381829e-06, | |
| "loss": 0.3344, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3364485981308411, | |
| "grad_norm": 0.5281542891218585, | |
| "learning_rate": 9.996503523941994e-06, | |
| "loss": 0.32, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3404539385847797, | |
| "grad_norm": 0.5164890027100179, | |
| "learning_rate": 9.995574909504434e-06, | |
| "loss": 0.3204, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.3444592790387183, | |
| "grad_norm": 0.6252952243404047, | |
| "learning_rate": 9.994537114364471e-06, | |
| "loss": 0.3335, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.3484646194926569, | |
| "grad_norm": 0.5558865473599024, | |
| "learning_rate": 9.993390161203615e-06, | |
| "loss": 0.3311, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.35246995994659547, | |
| "grad_norm": 0.5761999832501623, | |
| "learning_rate": 9.992134075089085e-06, | |
| "loss": 0.3429, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.35647530040053405, | |
| "grad_norm": 0.5374104324302127, | |
| "learning_rate": 9.990768883473243e-06, | |
| "loss": 0.3302, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.3604806408544726, | |
| "grad_norm": 0.5310051502544871, | |
| "learning_rate": 9.989294616193018e-06, | |
| "loss": 0.345, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3644859813084112, | |
| "grad_norm": 0.6006990772867254, | |
| "learning_rate": 9.987711305469232e-06, | |
| "loss": 0.3351, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.3684913217623498, | |
| "grad_norm": 0.559674099597398, | |
| "learning_rate": 9.986018985905901e-06, | |
| "loss": 0.3423, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3724966622162884, | |
| "grad_norm": 0.5126707083736739, | |
| "learning_rate": 9.984217694489493e-06, | |
| "loss": 0.344, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.376502002670227, | |
| "grad_norm": 0.6357067005494667, | |
| "learning_rate": 9.982307470588097e-06, | |
| "loss": 0.3356, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.38050734312416556, | |
| "grad_norm": 0.5258363366242368, | |
| "learning_rate": 9.98028835595058e-06, | |
| "loss": 0.3405, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.38451268357810414, | |
| "grad_norm": 0.6022837706168479, | |
| "learning_rate": 9.978160394705669e-06, | |
| "loss": 0.3451, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3885180240320427, | |
| "grad_norm": 0.5617885913949726, | |
| "learning_rate": 9.975923633360985e-06, | |
| "loss": 0.3141, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.3925233644859813, | |
| "grad_norm": 0.6322761944732146, | |
| "learning_rate": 9.973578120802025e-06, | |
| "loss": 0.3225, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.3965287049399199, | |
| "grad_norm": 0.5513939521450553, | |
| "learning_rate": 9.971123908291103e-06, | |
| "loss": 0.3269, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.40053404539385845, | |
| "grad_norm": 0.5692799860975164, | |
| "learning_rate": 9.968561049466214e-06, | |
| "loss": 0.337, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4045393858477971, | |
| "grad_norm": 0.552465429973677, | |
| "learning_rate": 9.965889600339877e-06, | |
| "loss": 0.3256, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.40854472630173566, | |
| "grad_norm": 0.5542585997979107, | |
| "learning_rate": 9.963109619297905e-06, | |
| "loss": 0.3147, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.41255006675567424, | |
| "grad_norm": 0.5724996614177005, | |
| "learning_rate": 9.960221167098124e-06, | |
| "loss": 0.3034, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.4165554072096128, | |
| "grad_norm": 0.5546269037589538, | |
| "learning_rate": 9.957224306869053e-06, | |
| "loss": 0.3283, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.4205607476635514, | |
| "grad_norm": 0.5445864929651966, | |
| "learning_rate": 9.95411910410852e-06, | |
| "loss": 0.3161, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.42456608811748997, | |
| "grad_norm": 0.5679124498352474, | |
| "learning_rate": 9.950905626682229e-06, | |
| "loss": 0.3205, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 0.5746860342884514, | |
| "learning_rate": 9.947583944822284e-06, | |
| "loss": 0.3087, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.4325767690253672, | |
| "grad_norm": 0.6293503344651058, | |
| "learning_rate": 9.944154131125643e-06, | |
| "loss": 0.3481, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.43658210947930576, | |
| "grad_norm": 0.5733682700314644, | |
| "learning_rate": 9.940616260552545e-06, | |
| "loss": 0.3292, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.44058744993324434, | |
| "grad_norm": 0.5593414000264296, | |
| "learning_rate": 9.936970410424857e-06, | |
| "loss": 0.3282, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4445927903871829, | |
| "grad_norm": 0.5928528566284356, | |
| "learning_rate": 9.933216660424396e-06, | |
| "loss": 0.3305, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.4485981308411215, | |
| "grad_norm": 0.5958761955618564, | |
| "learning_rate": 9.92935509259118e-06, | |
| "loss": 0.3372, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.45260347129506007, | |
| "grad_norm": 0.5974132983016888, | |
| "learning_rate": 9.92538579132164e-06, | |
| "loss": 0.3258, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.45660881174899864, | |
| "grad_norm": 0.576878904820484, | |
| "learning_rate": 9.921308843366773e-06, | |
| "loss": 0.3223, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.4606141522029373, | |
| "grad_norm": 0.5705071294697854, | |
| "learning_rate": 9.917124337830242e-06, | |
| "loss": 0.3078, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.46461949265687585, | |
| "grad_norm": 0.630779206880613, | |
| "learning_rate": 9.912832366166443e-06, | |
| "loss": 0.3405, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.46862483311081443, | |
| "grad_norm": 0.612175246698219, | |
| "learning_rate": 9.908433022178484e-06, | |
| "loss": 0.3247, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.472630173564753, | |
| "grad_norm": 0.5888384503018512, | |
| "learning_rate": 9.903926402016153e-06, | |
| "loss": 0.3237, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.4766355140186916, | |
| "grad_norm": 0.584706307851654, | |
| "learning_rate": 9.899312604173814e-06, | |
| "loss": 0.3289, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.48064085447263016, | |
| "grad_norm": 0.5850586724735871, | |
| "learning_rate": 9.894591729488243e-06, | |
| "loss": 0.3103, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.48464619492656874, | |
| "grad_norm": 0.5676874241983956, | |
| "learning_rate": 9.889763881136439e-06, | |
| "loss": 0.3416, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.4886515353805073, | |
| "grad_norm": 0.5507142647058878, | |
| "learning_rate": 9.884829164633359e-06, | |
| "loss": 0.332, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.49265687583444595, | |
| "grad_norm": 0.5883422169944877, | |
| "learning_rate": 9.879787687829616e-06, | |
| "loss": 0.341, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.49666221628838453, | |
| "grad_norm": 0.6052974016126247, | |
| "learning_rate": 9.874639560909118e-06, | |
| "loss": 0.3145, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5006675567423231, | |
| "grad_norm": 0.5623497695712161, | |
| "learning_rate": 9.869384896386669e-06, | |
| "loss": 0.324, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5046728971962616, | |
| "grad_norm": 0.637548494608964, | |
| "learning_rate": 9.864023809105497e-06, | |
| "loss": 0.3512, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5086782376502003, | |
| "grad_norm": 0.5948450995539613, | |
| "learning_rate": 9.858556416234755e-06, | |
| "loss": 0.3323, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5126835781041389, | |
| "grad_norm": 0.6071663583544622, | |
| "learning_rate": 9.852982837266955e-06, | |
| "loss": 0.3106, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5166889185580774, | |
| "grad_norm": 0.5702598727693834, | |
| "learning_rate": 9.847303194015358e-06, | |
| "loss": 0.2964, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.520694259012016, | |
| "grad_norm": 0.5427287917310376, | |
| "learning_rate": 9.841517610611309e-06, | |
| "loss": 0.3146, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5246995994659546, | |
| "grad_norm": 0.5757835163942887, | |
| "learning_rate": 9.835626213501526e-06, | |
| "loss": 0.2962, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5287049399198932, | |
| "grad_norm": 0.5698018296606896, | |
| "learning_rate": 9.829629131445342e-06, | |
| "loss": 0.3193, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5327102803738317, | |
| "grad_norm": 0.5410707033561443, | |
| "learning_rate": 9.82352649551188e-06, | |
| "loss": 0.3106, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5367156208277704, | |
| "grad_norm": 0.5502177437943316, | |
| "learning_rate": 9.817318439077197e-06, | |
| "loss": 0.3085, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.540720961281709, | |
| "grad_norm": 0.5582555787315889, | |
| "learning_rate": 9.811005097821362e-06, | |
| "loss": 0.3151, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5447263017356475, | |
| "grad_norm": 0.530130894795682, | |
| "learning_rate": 9.804586609725499e-06, | |
| "loss": 0.3144, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5487316421895861, | |
| "grad_norm": 0.5740015673727541, | |
| "learning_rate": 9.798063115068766e-06, | |
| "loss": 0.3306, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5527369826435247, | |
| "grad_norm": 0.5191944650205522, | |
| "learning_rate": 9.791434756425288e-06, | |
| "loss": 0.3084, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5567423230974633, | |
| "grad_norm": 0.5972892802378095, | |
| "learning_rate": 9.784701678661045e-06, | |
| "loss": 0.3163, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.5607476635514018, | |
| "grad_norm": 0.5504117854519875, | |
| "learning_rate": 9.777864028930705e-06, | |
| "loss": 0.3167, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5647530040053405, | |
| "grad_norm": 0.537223650006093, | |
| "learning_rate": 9.770921956674402e-06, | |
| "loss": 0.3006, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.5687583444592791, | |
| "grad_norm": 0.5781957843358095, | |
| "learning_rate": 9.763875613614482e-06, | |
| "loss": 0.3123, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5727636849132176, | |
| "grad_norm": 0.5957683062334633, | |
| "learning_rate": 9.756725153752173e-06, | |
| "loss": 0.3154, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.5767690253671562, | |
| "grad_norm": 0.5368008525982312, | |
| "learning_rate": 9.749470733364231e-06, | |
| "loss": 0.3108, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5807743658210948, | |
| "grad_norm": 0.5390147399817238, | |
| "learning_rate": 9.742112510999516e-06, | |
| "loss": 0.3267, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.5847797062750334, | |
| "grad_norm": 0.538592138527249, | |
| "learning_rate": 9.73465064747553e-06, | |
| "loss": 0.3034, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5887850467289719, | |
| "grad_norm": 0.5909802194217371, | |
| "learning_rate": 9.727085305874906e-06, | |
| "loss": 0.3273, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.5927903871829105, | |
| "grad_norm": 0.5625936142604115, | |
| "learning_rate": 9.719416651541839e-06, | |
| "loss": 0.3229, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.5967957276368492, | |
| "grad_norm": 0.5606718697695303, | |
| "learning_rate": 9.711644852078472e-06, | |
| "loss": 0.3107, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6008010680907877, | |
| "grad_norm": 0.6078858789603624, | |
| "learning_rate": 9.703770077341236e-06, | |
| "loss": 0.3229, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6048064085447263, | |
| "grad_norm": 0.5549984708342697, | |
| "learning_rate": 9.69579249943714e-06, | |
| "loss": 0.3129, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6088117489986649, | |
| "grad_norm": 0.507347145487873, | |
| "learning_rate": 9.687712292719997e-06, | |
| "loss": 0.3002, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6128170894526035, | |
| "grad_norm": 0.6833991975396279, | |
| "learning_rate": 9.67952963378663e-06, | |
| "loss": 0.3087, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.616822429906542, | |
| "grad_norm": 0.5915486951914966, | |
| "learning_rate": 9.671244701472999e-06, | |
| "loss": 0.3393, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6208277703604806, | |
| "grad_norm": 0.5643268444514835, | |
| "learning_rate": 9.662857676850306e-06, | |
| "loss": 0.2944, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6248331108144193, | |
| "grad_norm": 0.6409184914823735, | |
| "learning_rate": 9.654368743221022e-06, | |
| "loss": 0.3247, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6288384512683578, | |
| "grad_norm": 0.625767486890822, | |
| "learning_rate": 9.645778086114892e-06, | |
| "loss": 0.3134, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6328437917222964, | |
| "grad_norm": 0.5700497840235004, | |
| "learning_rate": 9.637085893284875e-06, | |
| "loss": 0.3023, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.636849132176235, | |
| "grad_norm": 0.5790034198291902, | |
| "learning_rate": 9.628292354703046e-06, | |
| "loss": 0.2933, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.6408544726301736, | |
| "grad_norm": 0.5713100840362291, | |
| "learning_rate": 9.619397662556434e-06, | |
| "loss": 0.3042, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6448598130841121, | |
| "grad_norm": 0.5741995306695465, | |
| "learning_rate": 9.610402011242837e-06, | |
| "loss": 0.3196, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.6488651535380507, | |
| "grad_norm": 0.5674889795972151, | |
| "learning_rate": 9.601305597366553e-06, | |
| "loss": 0.3071, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6528704939919893, | |
| "grad_norm": 0.5369636049566915, | |
| "learning_rate": 9.592108619734107e-06, | |
| "loss": 0.3247, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.6568758344459279, | |
| "grad_norm": 0.5443809471875736, | |
| "learning_rate": 9.582811279349881e-06, | |
| "loss": 0.3072, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6608811748998665, | |
| "grad_norm": 0.5953124014685344, | |
| "learning_rate": 9.573413779411745e-06, | |
| "loss": 0.3085, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.664886515353805, | |
| "grad_norm": 0.5401734564217464, | |
| "learning_rate": 9.563916325306595e-06, | |
| "loss": 0.29, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6688918558077437, | |
| "grad_norm": 0.5444349651712469, | |
| "learning_rate": 9.55431912460588e-06, | |
| "loss": 0.3054, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.6728971962616822, | |
| "grad_norm": 0.510267722435052, | |
| "learning_rate": 9.544622387061055e-06, | |
| "loss": 0.28, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6769025367156208, | |
| "grad_norm": 0.5184264543864224, | |
| "learning_rate": 9.534826324599002e-06, | |
| "loss": 0.2955, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.6809078771695594, | |
| "grad_norm": 0.5637636626391551, | |
| "learning_rate": 9.5249311513174e-06, | |
| "loss": 0.2792, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.684913217623498, | |
| "grad_norm": 0.5428313722322577, | |
| "learning_rate": 9.514937083480037e-06, | |
| "loss": 0.2945, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.6889185580774366, | |
| "grad_norm": 0.5561412606219924, | |
| "learning_rate": 9.504844339512096e-06, | |
| "loss": 0.315, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.6929238985313751, | |
| "grad_norm": 0.5081631254602269, | |
| "learning_rate": 9.494653139995368e-06, | |
| "loss": 0.3066, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.6969292389853138, | |
| "grad_norm": 0.5856758014884262, | |
| "learning_rate": 9.484363707663443e-06, | |
| "loss": 0.2801, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7009345794392523, | |
| "grad_norm": 0.5414939052665023, | |
| "learning_rate": 9.473976267396831e-06, | |
| "loss": 0.2894, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7049399198931909, | |
| "grad_norm": 0.5188788468344311, | |
| "learning_rate": 9.463491046218058e-06, | |
| "loss": 0.2917, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7089452603471295, | |
| "grad_norm": 0.6208165205427856, | |
| "learning_rate": 9.452908273286699e-06, | |
| "loss": 0.3124, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7129506008010681, | |
| "grad_norm": 0.4892884392964166, | |
| "learning_rate": 9.442228179894362e-06, | |
| "loss": 0.2937, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7169559412550067, | |
| "grad_norm": 0.5126422005865922, | |
| "learning_rate": 9.431450999459653e-06, | |
| "loss": 0.2902, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7209612817089452, | |
| "grad_norm": 0.5578472838688182, | |
| "learning_rate": 9.420576967523049e-06, | |
| "loss": 0.2886, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7249666221628839, | |
| "grad_norm": 0.5457635354712708, | |
| "learning_rate": 9.409606321741776e-06, | |
| "loss": 0.299, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7289719626168224, | |
| "grad_norm": 0.5665035398169768, | |
| "learning_rate": 9.398539301884592e-06, | |
| "loss": 0.2975, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.732977303070761, | |
| "grad_norm": 0.5286590948141091, | |
| "learning_rate": 9.387376149826564e-06, | |
| "loss": 0.2767, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.7369826435246996, | |
| "grad_norm": 0.5743317468381327, | |
| "learning_rate": 9.376117109543769e-06, | |
| "loss": 0.2909, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7409879839786382, | |
| "grad_norm": 0.6431953313269012, | |
| "learning_rate": 9.364762427107971e-06, | |
| "loss": 0.3004, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7449933244325768, | |
| "grad_norm": 0.6117784289038739, | |
| "learning_rate": 9.353312350681242e-06, | |
| "loss": 0.3062, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.7489986648865153, | |
| "grad_norm": 0.5466166236913528, | |
| "learning_rate": 9.341767130510529e-06, | |
| "loss": 0.3047, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.753004005340454, | |
| "grad_norm": 0.5672388678846847, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.3099, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7570093457943925, | |
| "grad_norm": 0.5854324070547063, | |
| "learning_rate": 9.318392270316501e-06, | |
| "loss": 0.3097, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.7610146862483311, | |
| "grad_norm": 0.5582358269319914, | |
| "learning_rate": 9.306563141162046e-06, | |
| "loss": 0.3061, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7650200267022697, | |
| "grad_norm": 0.5807552655282949, | |
| "learning_rate": 9.29463988999016e-06, | |
| "loss": 0.3004, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.7690253671562083, | |
| "grad_norm": 0.5445709450895333, | |
| "learning_rate": 9.282622777389258e-06, | |
| "loss": 0.2864, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7730307076101469, | |
| "grad_norm": 0.6479747482171502, | |
| "learning_rate": 9.270512065999139e-06, | |
| "loss": 0.2979, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.7770360480640854, | |
| "grad_norm": 0.5604311181657256, | |
| "learning_rate": 9.258308020505247e-06, | |
| "loss": 0.2997, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.7810413885180241, | |
| "grad_norm": 0.5829812198586952, | |
| "learning_rate": 9.246010907632894e-06, | |
| "loss": 0.3233, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.7850467289719626, | |
| "grad_norm": 0.580492515245855, | |
| "learning_rate": 9.233620996141421e-06, | |
| "loss": 0.299, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.7890520694259012, | |
| "grad_norm": 0.592467400441266, | |
| "learning_rate": 9.221138556818327e-06, | |
| "loss": 0.2967, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.7930574098798397, | |
| "grad_norm": 0.5997985716991547, | |
| "learning_rate": 9.20856386247335e-06, | |
| "loss": 0.3123, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.7970627503337784, | |
| "grad_norm": 0.5397407737667249, | |
| "learning_rate": 9.195897187932513e-06, | |
| "loss": 0.2953, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8010680907877169, | |
| "grad_norm": 0.5362871237537865, | |
| "learning_rate": 9.1831388100321e-06, | |
| "loss": 0.283, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8050734312416555, | |
| "grad_norm": 0.5737882959212091, | |
| "learning_rate": 9.170289007612625e-06, | |
| "loss": 0.2922, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8090787716955942, | |
| "grad_norm": 0.5932417559868806, | |
| "learning_rate": 9.157348061512728e-06, | |
| "loss": 0.2955, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8130841121495327, | |
| "grad_norm": 0.5072437104528961, | |
| "learning_rate": 9.144316254563032e-06, | |
| "loss": 0.2696, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8170894526034713, | |
| "grad_norm": 0.557818245925382, | |
| "learning_rate": 9.131193871579975e-06, | |
| "loss": 0.2994, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8210947930574098, | |
| "grad_norm": 0.5973919395531655, | |
| "learning_rate": 9.117981199359575e-06, | |
| "loss": 0.3008, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8251001335113485, | |
| "grad_norm": 0.574306055152991, | |
| "learning_rate": 9.104678526671162e-06, | |
| "loss": 0.3086, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.829105473965287, | |
| "grad_norm": 0.6115940443338315, | |
| "learning_rate": 9.091286144251077e-06, | |
| "loss": 0.2893, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.8331108144192256, | |
| "grad_norm": 0.4939307262823331, | |
| "learning_rate": 9.077804344796302e-06, | |
| "loss": 0.2758, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8371161548731643, | |
| "grad_norm": 0.5523854629181829, | |
| "learning_rate": 9.064233422958078e-06, | |
| "loss": 0.2761, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.8411214953271028, | |
| "grad_norm": 0.5641559704847691, | |
| "learning_rate": 9.050573675335453e-06, | |
| "loss": 0.2702, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8451268357810414, | |
| "grad_norm": 0.538525400332704, | |
| "learning_rate": 9.036825400468814e-06, | |
| "loss": 0.2625, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.8491321762349799, | |
| "grad_norm": 0.5571857402527918, | |
| "learning_rate": 9.022988898833342e-06, | |
| "loss": 0.2812, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.8531375166889186, | |
| "grad_norm": 0.5875899405650873, | |
| "learning_rate": 9.009064472832468e-06, | |
| "loss": 0.3085, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.5955430126526817, | |
| "learning_rate": 8.995052426791247e-06, | |
| "loss": 0.2921, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8611481975967957, | |
| "grad_norm": 0.5826373250067038, | |
| "learning_rate": 8.980953066949708e-06, | |
| "loss": 0.2912, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.8651535380507344, | |
| "grad_norm": 0.5537935549036811, | |
| "learning_rate": 8.966766701456177e-06, | |
| "loss": 0.2809, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8691588785046729, | |
| "grad_norm": 0.5838980864534432, | |
| "learning_rate": 8.952493640360518e-06, | |
| "loss": 0.2909, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.8731642189586115, | |
| "grad_norm": 0.5875405818886061, | |
| "learning_rate": 8.938134195607378e-06, | |
| "loss": 0.2952, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.87716955941255, | |
| "grad_norm": 0.6226962389915714, | |
| "learning_rate": 8.923688681029356e-06, | |
| "loss": 0.325, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.8811748998664887, | |
| "grad_norm": 0.5984817454130974, | |
| "learning_rate": 8.90915741234015e-06, | |
| "loss": 0.3124, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8851802403204272, | |
| "grad_norm": 0.5487459145803628, | |
| "learning_rate": 8.894540707127655e-06, | |
| "loss": 0.2926, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.8891855807743658, | |
| "grad_norm": 0.5437039065539668, | |
| "learning_rate": 8.879838884847025e-06, | |
| "loss": 0.2769, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.8931909212283045, | |
| "grad_norm": 0.5361919135525014, | |
| "learning_rate": 8.865052266813686e-06, | |
| "loss": 0.2565, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.897196261682243, | |
| "grad_norm": 0.6022317598018883, | |
| "learning_rate": 8.850181176196316e-06, | |
| "loss": 0.2904, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9012016021361816, | |
| "grad_norm": 0.5777383647207497, | |
| "learning_rate": 8.835225938009781e-06, | |
| "loss": 0.2942, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9052069425901201, | |
| "grad_norm": 0.5517455644071223, | |
| "learning_rate": 8.820186879108038e-06, | |
| "loss": 0.2827, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9092122830440588, | |
| "grad_norm": 0.5746060945697256, | |
| "learning_rate": 8.80506432817698e-06, | |
| "loss": 0.2901, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.9132176234979973, | |
| "grad_norm": 0.5678185790220811, | |
| "learning_rate": 8.789858615727266e-06, | |
| "loss": 0.277, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9172229639519359, | |
| "grad_norm": 0.5818515518798564, | |
| "learning_rate": 8.77457007408708e-06, | |
| "loss": 0.2805, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.9212283044058746, | |
| "grad_norm": 0.5828788812029032, | |
| "learning_rate": 8.759199037394888e-06, | |
| "loss": 0.3054, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9252336448598131, | |
| "grad_norm": 0.5578305381732657, | |
| "learning_rate": 8.743745841592118e-06, | |
| "loss": 0.279, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.9292389853137517, | |
| "grad_norm": 0.5710661098406483, | |
| "learning_rate": 8.728210824415829e-06, | |
| "loss": 0.2734, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9332443257676902, | |
| "grad_norm": 0.5767939333864601, | |
| "learning_rate": 8.712594325391324e-06, | |
| "loss": 0.2699, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.9372496662216289, | |
| "grad_norm": 0.574270861342953, | |
| "learning_rate": 8.69689668582473e-06, | |
| "loss": 0.2766, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9412550066755674, | |
| "grad_norm": 0.5757498082823792, | |
| "learning_rate": 8.681118248795548e-06, | |
| "loss": 0.2818, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.945260347129506, | |
| "grad_norm": 0.6077724733956369, | |
| "learning_rate": 8.665259359149132e-06, | |
| "loss": 0.2969, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9492656875834445, | |
| "grad_norm": 0.549944516647202, | |
| "learning_rate": 8.649320363489178e-06, | |
| "loss": 0.2609, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.9532710280373832, | |
| "grad_norm": 0.5456975844935816, | |
| "learning_rate": 8.633301610170136e-06, | |
| "loss": 0.287, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9572763684913218, | |
| "grad_norm": 0.5280058829694398, | |
| "learning_rate": 8.617203449289593e-06, | |
| "loss": 0.2644, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.9612817089452603, | |
| "grad_norm": 0.5920277658059444, | |
| "learning_rate": 8.601026232680634e-06, | |
| "loss": 0.291, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.965287049399199, | |
| "grad_norm": 0.6029720289705192, | |
| "learning_rate": 8.584770313904138e-06, | |
| "loss": 0.2883, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.9692923898531375, | |
| "grad_norm": 0.5726602829217651, | |
| "learning_rate": 8.568436048241062e-06, | |
| "loss": 0.265, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9732977303070761, | |
| "grad_norm": 0.5825245425360942, | |
| "learning_rate": 8.552023792684672e-06, | |
| "loss": 0.2868, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.9773030707610146, | |
| "grad_norm": 0.6673081291871541, | |
| "learning_rate": 8.535533905932739e-06, | |
| "loss": 0.3054, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9813084112149533, | |
| "grad_norm": 0.6031779392976561, | |
| "learning_rate": 8.518966748379702e-06, | |
| "loss": 0.2851, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.9853137516688919, | |
| "grad_norm": 0.6325037575700174, | |
| "learning_rate": 8.502322682108792e-06, | |
| "loss": 0.269, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.9893190921228304, | |
| "grad_norm": 0.630286272644598, | |
| "learning_rate": 8.485602070884118e-06, | |
| "loss": 0.2835, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.9933244325767691, | |
| "grad_norm": 0.5550792941097276, | |
| "learning_rate": 8.46880528014271e-06, | |
| "loss": 0.2716, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.9973297730307076, | |
| "grad_norm": 0.5955656163999381, | |
| "learning_rate": 8.451932676986543e-06, | |
| "loss": 0.2919, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5955656163999381, | |
| "learning_rate": 8.43498463017451e-06, | |
| "loss": 0.2708, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0040053404539386, | |
| "grad_norm": 0.762897141846903, | |
| "learning_rate": 8.417961510114357e-06, | |
| "loss": 0.2589, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.0080106809078773, | |
| "grad_norm": 0.5667256244239827, | |
| "learning_rate": 8.400863688854598e-06, | |
| "loss": 0.2347, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.0120160213618157, | |
| "grad_norm": 0.5182073558824543, | |
| "learning_rate": 8.383691540076372e-06, | |
| "loss": 0.2473, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.0160213618157543, | |
| "grad_norm": 0.5411427558297038, | |
| "learning_rate": 8.366445439085286e-06, | |
| "loss": 0.239, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.020026702269693, | |
| "grad_norm": 0.5411386326348857, | |
| "learning_rate": 8.349125762803204e-06, | |
| "loss": 0.255, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.0240320427236316, | |
| "grad_norm": 0.5768680583207191, | |
| "learning_rate": 8.331732889760021e-06, | |
| "loss": 0.2304, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.02803738317757, | |
| "grad_norm": 0.5663192339105902, | |
| "learning_rate": 8.314267200085373e-06, | |
| "loss": 0.2364, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.0320427236315086, | |
| "grad_norm": 0.5782681146396127, | |
| "learning_rate": 8.296729075500345e-06, | |
| "loss": 0.2554, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.0360480640854473, | |
| "grad_norm": 0.5924117511990626, | |
| "learning_rate": 8.279118899309121e-06, | |
| "loss": 0.2381, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.0400534045393859, | |
| "grad_norm": 0.5849704627036197, | |
| "learning_rate": 8.261437056390607e-06, | |
| "loss": 0.2592, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.0440587449933245, | |
| "grad_norm": 0.5462578431369289, | |
| "learning_rate": 8.243683933190019e-06, | |
| "loss": 0.2481, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.048064085447263, | |
| "grad_norm": 0.5687469203522241, | |
| "learning_rate": 8.22585991771044e-06, | |
| "loss": 0.2406, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.0520694259012016, | |
| "grad_norm": 0.5764520043363477, | |
| "learning_rate": 8.207965399504334e-06, | |
| "loss": 0.2435, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.0560747663551402, | |
| "grad_norm": 0.6130154606997985, | |
| "learning_rate": 8.190000769665044e-06, | |
| "loss": 0.2494, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.0600801068090788, | |
| "grad_norm": 0.5421995984684055, | |
| "learning_rate": 8.171966420818227e-06, | |
| "loss": 0.2435, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.0640854472630175, | |
| "grad_norm": 0.5828640036968468, | |
| "learning_rate": 8.153862747113293e-06, | |
| "loss": 0.2353, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.0680907877169559, | |
| "grad_norm": 0.5148839059504708, | |
| "learning_rate": 8.135690144214767e-06, | |
| "loss": 0.2318, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.0720961281708945, | |
| "grad_norm": 0.5486187246706559, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 0.2416, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.0761014686248331, | |
| "grad_norm": 0.564502169912709, | |
| "learning_rate": 8.099139741018809e-06, | |
| "loss": 0.2364, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.0801068090787718, | |
| "grad_norm": 0.6097314278041118, | |
| "learning_rate": 8.08076273954809e-06, | |
| "loss": 0.2598, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0841121495327102, | |
| "grad_norm": 0.6059107746858474, | |
| "learning_rate": 8.062318406519751e-06, | |
| "loss": 0.2507, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.0881174899866488, | |
| "grad_norm": 0.6241821796107588, | |
| "learning_rate": 8.043807145043604e-06, | |
| "loss": 0.2479, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.0921228304405874, | |
| "grad_norm": 0.5883002088770041, | |
| "learning_rate": 8.025229359692206e-06, | |
| "loss": 0.2504, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.096128170894526, | |
| "grad_norm": 0.5888253144437603, | |
| "learning_rate": 8.00658545649203e-06, | |
| "loss": 0.2346, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.1001335113484647, | |
| "grad_norm": 0.5409284658955128, | |
| "learning_rate": 7.987875842914583e-06, | |
| "loss": 0.2357, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.1041388518024031, | |
| "grad_norm": 0.5648850017659398, | |
| "learning_rate": 7.969100927867508e-06, | |
| "loss": 0.2479, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1081441922563418, | |
| "grad_norm": 0.6139375755294754, | |
| "learning_rate": 7.950261121685642e-06, | |
| "loss": 0.2452, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.1121495327102804, | |
| "grad_norm": 0.6246425570636841, | |
| "learning_rate": 7.931356836122046e-06, | |
| "loss": 0.2404, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.116154873164219, | |
| "grad_norm": 0.5298624506016548, | |
| "learning_rate": 7.912388484339012e-06, | |
| "loss": 0.2318, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.1201602136181577, | |
| "grad_norm": 0.5727259445791012, | |
| "learning_rate": 7.89335648089903e-06, | |
| "loss": 0.2444, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.124165554072096, | |
| "grad_norm": 0.568496396477039, | |
| "learning_rate": 7.874261241755726e-06, | |
| "loss": 0.2361, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.1281708945260347, | |
| "grad_norm": 0.5698858845026502, | |
| "learning_rate": 7.855103184244777e-06, | |
| "loss": 0.2475, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.1321762349799733, | |
| "grad_norm": 0.6033437616235542, | |
| "learning_rate": 7.835882727074779e-06, | |
| "loss": 0.2483, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.136181575433912, | |
| "grad_norm": 0.6137682287341324, | |
| "learning_rate": 7.81660029031811e-06, | |
| "loss": 0.2485, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.1401869158878504, | |
| "grad_norm": 0.5389251730544439, | |
| "learning_rate": 7.797256295401738e-06, | |
| "loss": 0.2287, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.144192256341789, | |
| "grad_norm": 0.582366745214894, | |
| "learning_rate": 7.777851165098012e-06, | |
| "loss": 0.247, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.1481975967957276, | |
| "grad_norm": 0.5617439076162762, | |
| "learning_rate": 7.75838532351543e-06, | |
| "loss": 0.223, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.1522029372496663, | |
| "grad_norm": 0.5457463297035726, | |
| "learning_rate": 7.738859196089358e-06, | |
| "loss": 0.2481, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.156208277703605, | |
| "grad_norm": 0.6026062150338968, | |
| "learning_rate": 7.719273209572745e-06, | |
| "loss": 0.2602, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.1602136181575433, | |
| "grad_norm": 0.595754963300469, | |
| "learning_rate": 7.699627792026784e-06, | |
| "loss": 0.2388, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.164218958611482, | |
| "grad_norm": 0.5245236864467587, | |
| "learning_rate": 7.679923372811564e-06, | |
| "loss": 0.2353, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.1682242990654206, | |
| "grad_norm": 0.6296844005130243, | |
| "learning_rate": 7.660160382576683e-06, | |
| "loss": 0.2342, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.1722296395193592, | |
| "grad_norm": 0.5981183888141479, | |
| "learning_rate": 7.64033925325184e-06, | |
| "loss": 0.2416, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.1762349799732976, | |
| "grad_norm": 0.584776287003421, | |
| "learning_rate": 7.620460418037388e-06, | |
| "loss": 0.228, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.1802403204272363, | |
| "grad_norm": 0.5906417247227626, | |
| "learning_rate": 7.600524311394873e-06, | |
| "loss": 0.2323, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.1842456608811749, | |
| "grad_norm": 0.5834730538207583, | |
| "learning_rate": 7.580531369037534e-06, | |
| "loss": 0.2428, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.1882510013351135, | |
| "grad_norm": 0.592770510365303, | |
| "learning_rate": 7.5604820279207816e-06, | |
| "loss": 0.2311, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.1922563417890522, | |
| "grad_norm": 0.5932974539859142, | |
| "learning_rate": 7.540376726232648e-06, | |
| "loss": 0.2456, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.1962616822429906, | |
| "grad_norm": 0.599679016601175, | |
| "learning_rate": 7.520215903384215e-06, | |
| "loss": 0.2319, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.2002670226969292, | |
| "grad_norm": 0.543968089573662, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.2451, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2042723631508678, | |
| "grad_norm": 0.5983969022372734, | |
| "learning_rate": 7.4797294579083405e-06, | |
| "loss": 0.2491, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.2082777036048065, | |
| "grad_norm": 0.5538744153921799, | |
| "learning_rate": 7.459404720131717e-06, | |
| "loss": 0.233, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.2122830440587449, | |
| "grad_norm": 0.5900495952853351, | |
| "learning_rate": 7.439026230877096e-06, | |
| "loss": 0.2297, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.2162883845126835, | |
| "grad_norm": 0.5465667743441658, | |
| "learning_rate": 7.4185944355261996e-06, | |
| "loss": 0.2528, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.2202937249666221, | |
| "grad_norm": 0.6269598978844211, | |
| "learning_rate": 7.398109780625784e-06, | |
| "loss": 0.2501, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.2242990654205608, | |
| "grad_norm": 0.5806451562215877, | |
| "learning_rate": 7.3775727138778776e-06, | |
| "loss": 0.2391, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.2283044058744994, | |
| "grad_norm": 0.6320385790504774, | |
| "learning_rate": 7.3569836841299905e-06, | |
| "loss": 0.2464, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.232309746328438, | |
| "grad_norm": 0.5737559605551135, | |
| "learning_rate": 7.336343141365311e-06, | |
| "loss": 0.2441, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.2363150867823764, | |
| "grad_norm": 0.5952972084149591, | |
| "learning_rate": 7.315651536692873e-06, | |
| "loss": 0.2564, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.240320427236315, | |
| "grad_norm": 0.7146063936502873, | |
| "learning_rate": 7.294909322337689e-06, | |
| "loss": 0.2313, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.2443257676902537, | |
| "grad_norm": 0.595320191460265, | |
| "learning_rate": 7.274116951630873e-06, | |
| "loss": 0.2368, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.2483311081441923, | |
| "grad_norm": 0.5744358155705048, | |
| "learning_rate": 7.253274878999728e-06, | |
| "loss": 0.2282, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.2523364485981308, | |
| "grad_norm": 0.6298803013176558, | |
| "learning_rate": 7.232383559957815e-06, | |
| "loss": 0.2418, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.2563417890520694, | |
| "grad_norm": 0.6145313830867569, | |
| "learning_rate": 7.211443451095007e-06, | |
| "loss": 0.2365, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.260347129506008, | |
| "grad_norm": 0.6285532186169481, | |
| "learning_rate": 7.190455010067494e-06, | |
| "loss": 0.2347, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.2643524699599467, | |
| "grad_norm": 0.590793052150211, | |
| "learning_rate": 7.169418695587791e-06, | |
| "loss": 0.2303, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.2683578104138853, | |
| "grad_norm": 0.5713241346393119, | |
| "learning_rate": 7.1483349674147125e-06, | |
| "loss": 0.2242, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.2723631508678237, | |
| "grad_norm": 0.5490796477452554, | |
| "learning_rate": 7.127204286343321e-06, | |
| "loss": 0.2338, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.2763684913217623, | |
| "grad_norm": 0.6071154182227954, | |
| "learning_rate": 7.106027114194856e-06, | |
| "loss": 0.225, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.280373831775701, | |
| "grad_norm": 0.5963758796133684, | |
| "learning_rate": 7.084803913806642e-06, | |
| "loss": 0.2369, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.2843791722296396, | |
| "grad_norm": 0.6339033842544861, | |
| "learning_rate": 7.063535149021974e-06, | |
| "loss": 0.2441, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.288384512683578, | |
| "grad_norm": 0.6284003653179433, | |
| "learning_rate": 7.042221284679982e-06, | |
| "loss": 0.2402, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.2923898531375166, | |
| "grad_norm": 0.6593560684745596, | |
| "learning_rate": 7.02086278660546e-06, | |
| "loss": 0.2535, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.2963951935914553, | |
| "grad_norm": 0.6387070843334016, | |
| "learning_rate": 6.999460121598704e-06, | |
| "loss": 0.2297, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.300400534045394, | |
| "grad_norm": 0.5750425275519615, | |
| "learning_rate": 6.978013757425295e-06, | |
| "loss": 0.2355, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.3044058744993325, | |
| "grad_norm": 0.5586774593218413, | |
| "learning_rate": 6.956524162805875e-06, | |
| "loss": 0.2384, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.308411214953271, | |
| "grad_norm": 0.5596782830604753, | |
| "learning_rate": 6.934991807405919e-06, | |
| "loss": 0.2305, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.3124165554072096, | |
| "grad_norm": 0.5665505846964202, | |
| "learning_rate": 6.913417161825449e-06, | |
| "loss": 0.2239, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.3164218958611482, | |
| "grad_norm": 0.5958541676468069, | |
| "learning_rate": 6.8918006975887685e-06, | |
| "loss": 0.2441, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.3204272363150868, | |
| "grad_norm": 0.5843413853980698, | |
| "learning_rate": 6.870142887134141e-06, | |
| "loss": 0.2221, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3244325767690253, | |
| "grad_norm": 0.5533441366477334, | |
| "learning_rate": 6.848444203803476e-06, | |
| "loss": 0.224, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.328437917222964, | |
| "grad_norm": 0.5944276005227449, | |
| "learning_rate": 6.8267051218319766e-06, | |
| "loss": 0.2333, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.3324432576769025, | |
| "grad_norm": 0.5816039732327815, | |
| "learning_rate": 6.804926116337779e-06, | |
| "loss": 0.2332, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.3364485981308412, | |
| "grad_norm": 0.5997442007990729, | |
| "learning_rate": 6.783107663311566e-06, | |
| "loss": 0.2288, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.3404539385847798, | |
| "grad_norm": 0.5758289065119726, | |
| "learning_rate": 6.7612502396061685e-06, | |
| "loss": 0.238, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.3444592790387184, | |
| "grad_norm": 0.578849426349599, | |
| "learning_rate": 6.739354322926136e-06, | |
| "loss": 0.2382, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.3484646194926568, | |
| "grad_norm": 0.571090431459051, | |
| "learning_rate": 6.717420391817306e-06, | |
| "loss": 0.2663, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.3524699599465955, | |
| "grad_norm": 0.6196058792645048, | |
| "learning_rate": 6.6954489256563334e-06, | |
| "loss": 0.2274, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.356475300400534, | |
| "grad_norm": 0.5922325724177396, | |
| "learning_rate": 6.6734404046402256e-06, | |
| "loss": 0.2199, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.3604806408544725, | |
| "grad_norm": 0.5523613961098914, | |
| "learning_rate": 6.651395309775837e-06, | |
| "loss": 0.2352, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3644859813084111, | |
| "grad_norm": 0.6615232115067652, | |
| "learning_rate": 6.629314122869363e-06, | |
| "loss": 0.2259, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.3684913217623498, | |
| "grad_norm": 0.5749887582077661, | |
| "learning_rate": 6.607197326515808e-06, | |
| "loss": 0.2515, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.3724966622162884, | |
| "grad_norm": 0.6229806856360468, | |
| "learning_rate": 6.585045404088442e-06, | |
| "loss": 0.2446, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.376502002670227, | |
| "grad_norm": 0.6535943167246338, | |
| "learning_rate": 6.562858839728224e-06, | |
| "loss": 0.233, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.3805073431241657, | |
| "grad_norm": 0.564932235491322, | |
| "learning_rate": 6.540638118333235e-06, | |
| "loss": 0.2377, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.384512683578104, | |
| "grad_norm": 0.5864382063135621, | |
| "learning_rate": 6.518383725548074e-06, | |
| "loss": 0.2351, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.3885180240320427, | |
| "grad_norm": 0.5719846231002432, | |
| "learning_rate": 6.4960961477532444e-06, | |
| "loss": 0.2213, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.3925233644859814, | |
| "grad_norm": 0.5996971644613003, | |
| "learning_rate": 6.473775872054522e-06, | |
| "loss": 0.2315, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.3965287049399198, | |
| "grad_norm": 0.6164036127115975, | |
| "learning_rate": 6.451423386272312e-06, | |
| "loss": 0.233, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.4005340453938584, | |
| "grad_norm": 0.6375735882940162, | |
| "learning_rate": 6.429039178930989e-06, | |
| "loss": 0.2303, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.404539385847797, | |
| "grad_norm": 0.6098688935758428, | |
| "learning_rate": 6.406623739248214e-06, | |
| "loss": 0.2337, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.4085447263017357, | |
| "grad_norm": 0.5853193211453952, | |
| "learning_rate": 6.384177557124247e-06, | |
| "loss": 0.2317, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.4125500667556743, | |
| "grad_norm": 0.5660416243135848, | |
| "learning_rate": 6.361701123131242e-06, | |
| "loss": 0.2399, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.416555407209613, | |
| "grad_norm": 0.6522270889233022, | |
| "learning_rate": 6.339194928502516e-06, | |
| "loss": 0.2438, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.4205607476635513, | |
| "grad_norm": 0.6108617575895426, | |
| "learning_rate": 6.3166594651218235e-06, | |
| "loss": 0.2273, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.42456608811749, | |
| "grad_norm": 0.5025445202572053, | |
| "learning_rate": 6.294095225512604e-06, | |
| "loss": 0.2134, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.5765280021465139, | |
| "learning_rate": 6.271502702827209e-06, | |
| "loss": 0.2249, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.4325767690253672, | |
| "grad_norm": 0.5862375523852222, | |
| "learning_rate": 6.248882390836135e-06, | |
| "loss": 0.2326, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.4365821094793056, | |
| "grad_norm": 0.6482330755264025, | |
| "learning_rate": 6.226234783917224e-06, | |
| "loss": 0.2264, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.4405874499332443, | |
| "grad_norm": 0.5899710814731541, | |
| "learning_rate": 6.2035603770448664e-06, | |
| "loss": 0.2261, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.444592790387183, | |
| "grad_norm": 0.5866295112067526, | |
| "learning_rate": 6.180859665779173e-06, | |
| "loss": 0.2523, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.4485981308411215, | |
| "grad_norm": 0.6837319847065889, | |
| "learning_rate": 6.158133146255153e-06, | |
| "loss": 0.2423, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.4526034712950602, | |
| "grad_norm": 0.6307432382267119, | |
| "learning_rate": 6.135381315171867e-06, | |
| "loss": 0.2425, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.4566088117489986, | |
| "grad_norm": 0.5774555129513689, | |
| "learning_rate": 6.112604669781572e-06, | |
| "loss": 0.244, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.4606141522029372, | |
| "grad_norm": 0.5910862302024886, | |
| "learning_rate": 6.089803707878855e-06, | |
| "loss": 0.2466, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.4646194926568759, | |
| "grad_norm": 0.5816261166399118, | |
| "learning_rate": 6.066978927789751e-06, | |
| "loss": 0.2166, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.4686248331108145, | |
| "grad_norm": 0.5583031010374657, | |
| "learning_rate": 6.04413082836085e-06, | |
| "loss": 0.2274, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.472630173564753, | |
| "grad_norm": 0.6020357565888714, | |
| "learning_rate": 6.0212599089484026e-06, | |
| "loss": 0.2423, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.4766355140186915, | |
| "grad_norm": 0.6086024579173414, | |
| "learning_rate": 5.998366669407398e-06, | |
| "loss": 0.2347, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.4806408544726302, | |
| "grad_norm": 0.6639055801702823, | |
| "learning_rate": 5.975451610080643e-06, | |
| "loss": 0.2257, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.4846461949265688, | |
| "grad_norm": 0.5765943328550973, | |
| "learning_rate": 5.952515231787825e-06, | |
| "loss": 0.2299, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.4886515353805074, | |
| "grad_norm": 0.6523998837222308, | |
| "learning_rate": 5.929558035814574e-06, | |
| "loss": 0.232, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.492656875834446, | |
| "grad_norm": 0.6525256233306673, | |
| "learning_rate": 5.906580523901493e-06, | |
| "loss": 0.2249, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.4966622162883845, | |
| "grad_norm": 0.6002500523113792, | |
| "learning_rate": 5.883583198233202e-06, | |
| "loss": 0.2296, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.500667556742323, | |
| "grad_norm": 0.5859819020795045, | |
| "learning_rate": 5.86056656142736e-06, | |
| "loss": 0.245, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.5046728971962615, | |
| "grad_norm": 0.6224565671534654, | |
| "learning_rate": 5.837531116523683e-06, | |
| "loss": 0.2144, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.5086782376502001, | |
| "grad_norm": 0.5645624857480281, | |
| "learning_rate": 5.814477366972945e-06, | |
| "loss": 0.2379, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.5126835781041388, | |
| "grad_norm": 0.6412786483722962, | |
| "learning_rate": 5.791405816625974e-06, | |
| "loss": 0.2307, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.5166889185580774, | |
| "grad_norm": 0.6304941089005965, | |
| "learning_rate": 5.768316969722651e-06, | |
| "loss": 0.2225, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.520694259012016, | |
| "grad_norm": 0.5988757758307962, | |
| "learning_rate": 5.745211330880872e-06, | |
| "loss": 0.2258, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.5246995994659547, | |
| "grad_norm": 0.5815105109581388, | |
| "learning_rate": 5.722089405085537e-06, | |
| "loss": 0.2414, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.5287049399198933, | |
| "grad_norm": 0.6249599659350047, | |
| "learning_rate": 5.698951697677498e-06, | |
| "loss": 0.2174, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.5327102803738317, | |
| "grad_norm": 0.6137726726584709, | |
| "learning_rate": 5.6757987143425276e-06, | |
| "loss": 0.2236, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.5367156208277704, | |
| "grad_norm": 0.5347431576806394, | |
| "learning_rate": 5.65263096110026e-06, | |
| "loss": 0.2269, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.540720961281709, | |
| "grad_norm": 0.5727634021907735, | |
| "learning_rate": 5.629448944293128e-06, | |
| "loss": 0.2005, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.5447263017356474, | |
| "grad_norm": 0.5965865418656213, | |
| "learning_rate": 5.6062531705753075e-06, | |
| "loss": 0.2424, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.548731642189586, | |
| "grad_norm": 0.6169279262529003, | |
| "learning_rate": 5.583044146901638e-06, | |
| "loss": 0.2232, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.5527369826435247, | |
| "grad_norm": 0.6333814417689827, | |
| "learning_rate": 5.559822380516539e-06, | |
| "loss": 0.2353, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.5567423230974633, | |
| "grad_norm": 0.5578859188906384, | |
| "learning_rate": 5.536588378942933e-06, | |
| "loss": 0.2279, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.560747663551402, | |
| "grad_norm": 0.5725028706674725, | |
| "learning_rate": 5.513342649971143e-06, | |
| "loss": 0.2166, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.5647530040053406, | |
| "grad_norm": 0.5718602933152237, | |
| "learning_rate": 5.490085701647805e-06, | |
| "loss": 0.2248, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.5687583444592792, | |
| "grad_norm": 0.5624960499027032, | |
| "learning_rate": 5.466818042264754e-06, | |
| "loss": 0.2218, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.5727636849132176, | |
| "grad_norm": 0.5783959440608054, | |
| "learning_rate": 5.443540180347927e-06, | |
| "loss": 0.2271, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.5767690253671562, | |
| "grad_norm": 0.6463973620860086, | |
| "learning_rate": 5.420252624646238e-06, | |
| "loss": 0.2297, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.5807743658210947, | |
| "grad_norm": 0.5796227960968064, | |
| "learning_rate": 5.396955884120465e-06, | |
| "loss": 0.2249, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.5847797062750333, | |
| "grad_norm": 0.6378570148256715, | |
| "learning_rate": 5.373650467932122e-06, | |
| "loss": 0.1968, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.588785046728972, | |
| "grad_norm": 0.537428645891956, | |
| "learning_rate": 5.350336885432337e-06, | |
| "loss": 0.2161, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.5927903871829105, | |
| "grad_norm": 0.6322802889879201, | |
| "learning_rate": 5.327015646150716e-06, | |
| "loss": 0.2342, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.5967957276368492, | |
| "grad_norm": 0.6773017918561144, | |
| "learning_rate": 5.303687259784206e-06, | |
| "loss": 0.2234, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.6008010680907878, | |
| "grad_norm": 0.5862814494934032, | |
| "learning_rate": 5.2803522361859596e-06, | |
| "loss": 0.2101, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6048064085447264, | |
| "grad_norm": 0.5824962855214252, | |
| "learning_rate": 5.257011085354187e-06, | |
| "loss": 0.2432, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.6088117489986649, | |
| "grad_norm": 0.6031958185300317, | |
| "learning_rate": 5.233664317421012e-06, | |
| "loss": 0.232, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.6128170894526035, | |
| "grad_norm": 0.5862841084106785, | |
| "learning_rate": 5.210312442641327e-06, | |
| "loss": 0.2216, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.616822429906542, | |
| "grad_norm": 0.6566275421066564, | |
| "learning_rate": 5.18695597138163e-06, | |
| "loss": 0.2285, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.6208277703604805, | |
| "grad_norm": 0.6361225120156005, | |
| "learning_rate": 5.1635954141088815e-06, | |
| "loss": 0.2243, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.6248331108144192, | |
| "grad_norm": 0.6538742068020424, | |
| "learning_rate": 5.140231281379345e-06, | |
| "loss": 0.2301, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.6288384512683578, | |
| "grad_norm": 0.6553343367565488, | |
| "learning_rate": 5.116864083827425e-06, | |
| "loss": 0.2437, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.6328437917222964, | |
| "grad_norm": 0.5611348158619709, | |
| "learning_rate": 5.093494332154511e-06, | |
| "loss": 0.2146, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.636849132176235, | |
| "grad_norm": 0.549466753270084, | |
| "learning_rate": 5.070122537117812e-06, | |
| "loss": 0.2496, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.6408544726301737, | |
| "grad_norm": 0.636480400336947, | |
| "learning_rate": 5.046749209519197e-06, | |
| "loss": 0.2032, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.644859813084112, | |
| "grad_norm": 0.5984681247401569, | |
| "learning_rate": 5.023374860194028e-06, | |
| "loss": 0.204, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.6488651535380507, | |
| "grad_norm": 0.5956484635411822, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2227, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.6528704939919892, | |
| "grad_norm": 0.5495043593104894, | |
| "learning_rate": 4.976625139805974e-06, | |
| "loss": 0.2201, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.6568758344459278, | |
| "grad_norm": 0.5546136194425144, | |
| "learning_rate": 4.953250790480805e-06, | |
| "loss": 0.2139, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.6608811748998664, | |
| "grad_norm": 0.5761321975039401, | |
| "learning_rate": 4.92987746288219e-06, | |
| "loss": 0.2164, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.664886515353805, | |
| "grad_norm": 0.5842428190534428, | |
| "learning_rate": 4.90650566784549e-06, | |
| "loss": 0.2367, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.6688918558077437, | |
| "grad_norm": 0.6113684961889835, | |
| "learning_rate": 4.883135916172576e-06, | |
| "loss": 0.2367, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.6728971962616823, | |
| "grad_norm": 0.6680898005899061, | |
| "learning_rate": 4.859768718620656e-06, | |
| "loss": 0.2132, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.676902536715621, | |
| "grad_norm": 0.5445125151437461, | |
| "learning_rate": 4.83640458589112e-06, | |
| "loss": 0.2138, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.6809078771695594, | |
| "grad_norm": 0.5559680738599577, | |
| "learning_rate": 4.8130440286183725e-06, | |
| "loss": 0.2267, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.684913217623498, | |
| "grad_norm": 0.6144551876120194, | |
| "learning_rate": 4.789687557358676e-06, | |
| "loss": 0.2182, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.6889185580774366, | |
| "grad_norm": 0.5897924033640597, | |
| "learning_rate": 4.7663356825789894e-06, | |
| "loss": 0.2122, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.692923898531375, | |
| "grad_norm": 0.5598416977353012, | |
| "learning_rate": 4.742988914645814e-06, | |
| "loss": 0.2216, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.6969292389853137, | |
| "grad_norm": 0.5601120553987341, | |
| "learning_rate": 4.719647763814041e-06, | |
| "loss": 0.2177, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.7009345794392523, | |
| "grad_norm": 0.5891529754303583, | |
| "learning_rate": 4.696312740215794e-06, | |
| "loss": 0.2005, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.704939919893191, | |
| "grad_norm": 0.5680866327716454, | |
| "learning_rate": 4.672984353849285e-06, | |
| "loss": 0.2326, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.7089452603471296, | |
| "grad_norm": 0.5657360719382496, | |
| "learning_rate": 4.649663114567663e-06, | |
| "loss": 0.2131, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.7129506008010682, | |
| "grad_norm": 0.5740029198869598, | |
| "learning_rate": 4.626349532067879e-06, | |
| "loss": 0.2138, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.7169559412550068, | |
| "grad_norm": 0.6388774843029362, | |
| "learning_rate": 4.603044115879536e-06, | |
| "loss": 0.2251, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.7209612817089452, | |
| "grad_norm": 0.6877647851263269, | |
| "learning_rate": 4.579747375353763e-06, | |
| "loss": 0.2212, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.7249666221628839, | |
| "grad_norm": 0.6414623377958295, | |
| "learning_rate": 4.556459819652074e-06, | |
| "loss": 0.2414, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.7289719626168223, | |
| "grad_norm": 0.6323839747789576, | |
| "learning_rate": 4.533181957735247e-06, | |
| "loss": 0.2339, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.732977303070761, | |
| "grad_norm": 0.639653221825577, | |
| "learning_rate": 4.509914298352197e-06, | |
| "loss": 0.2215, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.7369826435246996, | |
| "grad_norm": 0.6284764820090778, | |
| "learning_rate": 4.486657350028859e-06, | |
| "loss": 0.2119, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.7409879839786382, | |
| "grad_norm": 0.6067872660667347, | |
| "learning_rate": 4.463411621057068e-06, | |
| "loss": 0.2177, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.7449933244325768, | |
| "grad_norm": 0.6234859050465342, | |
| "learning_rate": 4.4401776194834615e-06, | |
| "loss": 0.207, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.7489986648865155, | |
| "grad_norm": 0.6562941478198795, | |
| "learning_rate": 4.4169558530983635e-06, | |
| "loss": 0.2043, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.753004005340454, | |
| "grad_norm": 0.5909532065510827, | |
| "learning_rate": 4.393746829424693e-06, | |
| "loss": 0.2124, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.7570093457943925, | |
| "grad_norm": 0.6266064150386942, | |
| "learning_rate": 4.3705510557068746e-06, | |
| "loss": 0.2156, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.7610146862483311, | |
| "grad_norm": 0.5569141516837799, | |
| "learning_rate": 4.347369038899744e-06, | |
| "loss": 0.1997, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.7650200267022695, | |
| "grad_norm": 0.6592779504723204, | |
| "learning_rate": 4.324201285657474e-06, | |
| "loss": 0.215, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.7690253671562082, | |
| "grad_norm": 0.6388494836347968, | |
| "learning_rate": 4.3010483023225045e-06, | |
| "loss": 0.2293, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.7730307076101468, | |
| "grad_norm": 0.691528320994546, | |
| "learning_rate": 4.277910594914466e-06, | |
| "loss": 0.1971, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.7770360480640854, | |
| "grad_norm": 0.6052938531570694, | |
| "learning_rate": 4.254788669119127e-06, | |
| "loss": 0.215, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.781041388518024, | |
| "grad_norm": 0.5915787822047736, | |
| "learning_rate": 4.231683030277349e-06, | |
| "loss": 0.22, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.7850467289719627, | |
| "grad_norm": 0.669779074015281, | |
| "learning_rate": 4.208594183374026e-06, | |
| "loss": 0.2139, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.7890520694259013, | |
| "grad_norm": 0.5637355181902335, | |
| "learning_rate": 4.185522633027057e-06, | |
| "loss": 0.2074, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.7930574098798397, | |
| "grad_norm": 0.6354293774014055, | |
| "learning_rate": 4.162468883476319e-06, | |
| "loss": 0.2029, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.7970627503337784, | |
| "grad_norm": 0.5585098291406209, | |
| "learning_rate": 4.139433438572641e-06, | |
| "loss": 0.2149, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.8010680907877168, | |
| "grad_norm": 0.5982104585865691, | |
| "learning_rate": 4.116416801766801e-06, | |
| "loss": 0.2272, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.8050734312416554, | |
| "grad_norm": 0.5958872678061956, | |
| "learning_rate": 4.0934194760985095e-06, | |
| "loss": 0.2033, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.809078771695594, | |
| "grad_norm": 0.6149274909600425, | |
| "learning_rate": 4.070441964185428e-06, | |
| "loss": 0.2047, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.8130841121495327, | |
| "grad_norm": 0.649920066288117, | |
| "learning_rate": 4.047484768212175e-06, | |
| "loss": 0.2127, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.8170894526034713, | |
| "grad_norm": 0.6231664353854522, | |
| "learning_rate": 4.02454838991936e-06, | |
| "loss": 0.214, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.82109479305741, | |
| "grad_norm": 0.6324772858655361, | |
| "learning_rate": 4.001633330592604e-06, | |
| "loss": 0.2279, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.8251001335113486, | |
| "grad_norm": 0.6387099548525226, | |
| "learning_rate": 3.978740091051599e-06, | |
| "loss": 0.231, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.829105473965287, | |
| "grad_norm": 0.5888035825906338, | |
| "learning_rate": 3.955869171639151e-06, | |
| "loss": 0.2043, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.8331108144192256, | |
| "grad_norm": 0.5900537278953912, | |
| "learning_rate": 3.933021072210251e-06, | |
| "loss": 0.2189, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.8371161548731643, | |
| "grad_norm": 0.6155751843123176, | |
| "learning_rate": 3.910196292121147e-06, | |
| "loss": 0.2172, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.8411214953271027, | |
| "grad_norm": 0.6185704225951081, | |
| "learning_rate": 3.887395330218429e-06, | |
| "loss": 0.2433, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.8451268357810413, | |
| "grad_norm": 0.5757793970809637, | |
| "learning_rate": 3.864618684828135e-06, | |
| "loss": 0.2086, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.84913217623498, | |
| "grad_norm": 0.5715122189230345, | |
| "learning_rate": 3.84186685374485e-06, | |
| "loss": 0.2174, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.8531375166889186, | |
| "grad_norm": 0.562934776821966, | |
| "learning_rate": 3.81914033422083e-06, | |
| "loss": 0.1969, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.8571428571428572, | |
| "grad_norm": 0.584422709822903, | |
| "learning_rate": 3.7964396229551365e-06, | |
| "loss": 0.2264, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.8611481975967958, | |
| "grad_norm": 0.5887703309285004, | |
| "learning_rate": 3.7737652160827752e-06, | |
| "loss": 0.2118, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.8651535380507345, | |
| "grad_norm": 0.5978955576130686, | |
| "learning_rate": 3.751117609163865e-06, | |
| "loss": 0.2162, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.8691588785046729, | |
| "grad_norm": 0.6119642861090717, | |
| "learning_rate": 3.7284972971727907e-06, | |
| "loss": 0.225, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.8731642189586115, | |
| "grad_norm": 0.6210379401883211, | |
| "learning_rate": 3.705904774487396e-06, | |
| "loss": 0.2056, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.87716955941255, | |
| "grad_norm": 0.6081726738147702, | |
| "learning_rate": 3.683340534878176e-06, | |
| "loss": 0.2046, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.8811748998664886, | |
| "grad_norm": 0.6467889875621615, | |
| "learning_rate": 3.6608050714974854e-06, | |
| "loss": 0.2503, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.8851802403204272, | |
| "grad_norm": 0.6021455582975629, | |
| "learning_rate": 3.63829887686876e-06, | |
| "loss": 0.2244, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.8891855807743658, | |
| "grad_norm": 0.634859275238841, | |
| "learning_rate": 3.6158224428757538e-06, | |
| "loss": 0.2208, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.8931909212283045, | |
| "grad_norm": 0.6211383819455257, | |
| "learning_rate": 3.5933762607517875e-06, | |
| "loss": 0.2201, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.897196261682243, | |
| "grad_norm": 0.5805284245558593, | |
| "learning_rate": 3.5709608210690127e-06, | |
| "loss": 0.2171, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.9012016021361817, | |
| "grad_norm": 0.5969539880951342, | |
| "learning_rate": 3.5485766137276894e-06, | |
| "loss": 0.1989, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.9052069425901201, | |
| "grad_norm": 0.5763877271910841, | |
| "learning_rate": 3.526224127945479e-06, | |
| "loss": 0.2051, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.9092122830440588, | |
| "grad_norm": 0.573639466833024, | |
| "learning_rate": 3.5039038522467572e-06, | |
| "loss": 0.2216, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.9132176234979972, | |
| "grad_norm": 0.6187640637863968, | |
| "learning_rate": 3.4816162744519266e-06, | |
| "loss": 0.2207, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.9172229639519358, | |
| "grad_norm": 0.6135054921454743, | |
| "learning_rate": 3.459361881666766e-06, | |
| "loss": 0.216, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.9212283044058744, | |
| "grad_norm": 0.5975760329649652, | |
| "learning_rate": 3.4371411602717785e-06, | |
| "loss": 0.1997, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.925233644859813, | |
| "grad_norm": 0.5864424343197752, | |
| "learning_rate": 3.4149545959115604e-06, | |
| "loss": 0.1997, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.9292389853137517, | |
| "grad_norm": 0.6206284942820284, | |
| "learning_rate": 3.3928026734841935e-06, | |
| "loss": 0.1958, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.9332443257676903, | |
| "grad_norm": 0.627723357086264, | |
| "learning_rate": 3.3706858771306393e-06, | |
| "loss": 0.2099, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.937249666221629, | |
| "grad_norm": 0.6391449489733926, | |
| "learning_rate": 3.3486046902241663e-06, | |
| "loss": 0.1946, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.9412550066755674, | |
| "grad_norm": 0.6232074962194952, | |
| "learning_rate": 3.3265595953597774e-06, | |
| "loss": 0.2167, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.945260347129506, | |
| "grad_norm": 0.7178387576084606, | |
| "learning_rate": 3.3045510743436665e-06, | |
| "loss": 0.2052, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.9492656875834444, | |
| "grad_norm": 0.66259069636869, | |
| "learning_rate": 3.2825796081826943e-06, | |
| "loss": 0.2103, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.953271028037383, | |
| "grad_norm": 0.6319630042493534, | |
| "learning_rate": 3.2606456770738636e-06, | |
| "loss": 0.1899, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.9572763684913217, | |
| "grad_norm": 0.6583941141635503, | |
| "learning_rate": 3.2387497603938327e-06, | |
| "loss": 0.2057, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.9612817089452603, | |
| "grad_norm": 0.6583371328399131, | |
| "learning_rate": 3.216892336688435e-06, | |
| "loss": 0.208, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.965287049399199, | |
| "grad_norm": 0.6626656334140933, | |
| "learning_rate": 3.1950738836622226e-06, | |
| "loss": 0.213, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.9692923898531376, | |
| "grad_norm": 0.57636570851929, | |
| "learning_rate": 3.173294878168025e-06, | |
| "loss": 0.2123, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.9732977303070762, | |
| "grad_norm": 0.628166413351637, | |
| "learning_rate": 3.1515557961965254e-06, | |
| "loss": 0.2082, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.9773030707610146, | |
| "grad_norm": 0.6161007175331755, | |
| "learning_rate": 3.1298571128658593e-06, | |
| "loss": 0.2071, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.9813084112149533, | |
| "grad_norm": 0.5578291746094638, | |
| "learning_rate": 3.1081993024112328e-06, | |
| "loss": 0.1953, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.985313751668892, | |
| "grad_norm": 0.5742324867913953, | |
| "learning_rate": 3.0865828381745515e-06, | |
| "loss": 0.2183, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.9893190921228303, | |
| "grad_norm": 0.5651971848567813, | |
| "learning_rate": 3.0650081925940834e-06, | |
| "loss": 0.1881, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.993324432576769, | |
| "grad_norm": 0.5805509174084333, | |
| "learning_rate": 3.043475837194126e-06, | |
| "loss": 0.2148, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.9973297730307076, | |
| "grad_norm": 0.753242305787025, | |
| "learning_rate": 3.021986242574707e-06, | |
| "loss": 0.2226, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.7484446599170808, | |
| "learning_rate": 3.000539878401296e-06, | |
| "loss": 0.2156, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0040053404539386, | |
| "grad_norm": 0.7266923236604501, | |
| "learning_rate": 2.9791372133945405e-06, | |
| "loss": 0.1868, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.0080106809078773, | |
| "grad_norm": 0.6838990546286972, | |
| "learning_rate": 2.95777871532002e-06, | |
| "loss": 0.1693, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.012016021361816, | |
| "grad_norm": 0.6522038544426395, | |
| "learning_rate": 2.936464850978027e-06, | |
| "loss": 0.1648, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.0160213618157545, | |
| "grad_norm": 0.6932425119304833, | |
| "learning_rate": 2.9151960861933616e-06, | |
| "loss": 0.1781, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.0200267022696927, | |
| "grad_norm": 0.6187151564266777, | |
| "learning_rate": 2.893972885805148e-06, | |
| "loss": 0.1814, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.0240320427236314, | |
| "grad_norm": 0.5852591736999052, | |
| "learning_rate": 2.8727957136566825e-06, | |
| "loss": 0.1616, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.02803738317757, | |
| "grad_norm": 0.6216187678840704, | |
| "learning_rate": 2.8516650325852883e-06, | |
| "loss": 0.1776, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.0320427236315086, | |
| "grad_norm": 0.6805660651282351, | |
| "learning_rate": 2.83058130441221e-06, | |
| "loss": 0.1848, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.0360480640854473, | |
| "grad_norm": 0.8026841589589839, | |
| "learning_rate": 2.809544989932508e-06, | |
| "loss": 0.1905, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.040053404539386, | |
| "grad_norm": 0.661638883853578, | |
| "learning_rate": 2.7885565489049948e-06, | |
| "loss": 0.1568, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.0440587449933245, | |
| "grad_norm": 0.6472428396855733, | |
| "learning_rate": 2.7676164400421864e-06, | |
| "loss": 0.1767, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.048064085447263, | |
| "grad_norm": 0.6917047834821735, | |
| "learning_rate": 2.746725121000273e-06, | |
| "loss": 0.1871, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.052069425901202, | |
| "grad_norm": 0.6021301697708129, | |
| "learning_rate": 2.725883048369128e-06, | |
| "loss": 0.1695, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.05607476635514, | |
| "grad_norm": 0.6089561158393074, | |
| "learning_rate": 2.705090677662311e-06, | |
| "loss": 0.1743, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.0600801068090786, | |
| "grad_norm": 0.6433217615782736, | |
| "learning_rate": 2.684348463307128e-06, | |
| "loss": 0.1648, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.0640854472630172, | |
| "grad_norm": 0.6675245713273296, | |
| "learning_rate": 2.66365685863469e-06, | |
| "loss": 0.1695, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.068090787716956, | |
| "grad_norm": 0.5639272937571744, | |
| "learning_rate": 2.6430163158700116e-06, | |
| "loss": 0.1552, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.0720961281708945, | |
| "grad_norm": 0.6756211271225167, | |
| "learning_rate": 2.6224272861221245e-06, | |
| "loss": 0.1871, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.076101468624833, | |
| "grad_norm": 0.56951104964083, | |
| "learning_rate": 2.601890219374217e-06, | |
| "loss": 0.175, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.0801068090787718, | |
| "grad_norm": 0.5849605152037826, | |
| "learning_rate": 2.5814055644738013e-06, | |
| "loss": 0.1577, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.0841121495327104, | |
| "grad_norm": 0.6208978605303654, | |
| "learning_rate": 2.5609737691229055e-06, | |
| "loss": 0.1676, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.088117489986649, | |
| "grad_norm": 0.6300535483875338, | |
| "learning_rate": 2.5405952798682844e-06, | |
| "loss": 0.1767, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.0921228304405872, | |
| "grad_norm": 0.6515325771939616, | |
| "learning_rate": 2.520270542091663e-06, | |
| "loss": 0.1862, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.096128170894526, | |
| "grad_norm": 0.6573962419086741, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.1707, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.1001335113484645, | |
| "grad_norm": 0.6008492264289961, | |
| "learning_rate": 2.4797840966157877e-06, | |
| "loss": 0.1562, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.104138851802403, | |
| "grad_norm": 0.5554906783739959, | |
| "learning_rate": 2.4596232737673544e-06, | |
| "loss": 0.1753, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.1081441922563418, | |
| "grad_norm": 0.5883215878515664, | |
| "learning_rate": 2.439517972079222e-06, | |
| "loss": 0.1739, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.1121495327102804, | |
| "grad_norm": 0.6225552692825768, | |
| "learning_rate": 2.4194686309624664e-06, | |
| "loss": 0.1736, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.116154873164219, | |
| "grad_norm": 0.6538525824437825, | |
| "learning_rate": 2.3994756886051267e-06, | |
| "loss": 0.1793, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.1201602136181577, | |
| "grad_norm": 0.6426343380918219, | |
| "learning_rate": 2.3795395819626116e-06, | |
| "loss": 0.1636, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.1241655540720963, | |
| "grad_norm": 0.6984876016877216, | |
| "learning_rate": 2.3596607467481602e-06, | |
| "loss": 0.1818, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.128170894526035, | |
| "grad_norm": 0.5969581027123277, | |
| "learning_rate": 2.339839617423318e-06, | |
| "loss": 0.1761, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.132176234979973, | |
| "grad_norm": 0.5965591945629233, | |
| "learning_rate": 2.320076627188438e-06, | |
| "loss": 0.1699, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.1361815754339117, | |
| "grad_norm": 0.6646391923991977, | |
| "learning_rate": 2.300372207973219e-06, | |
| "loss": 0.1642, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.1401869158878504, | |
| "grad_norm": 0.5996069077675478, | |
| "learning_rate": 2.280726790427258e-06, | |
| "loss": 0.1721, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.144192256341789, | |
| "grad_norm": 0.6741481126361855, | |
| "learning_rate": 2.261140803910644e-06, | |
| "loss": 0.1727, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.1481975967957276, | |
| "grad_norm": 0.6323694946147757, | |
| "learning_rate": 2.2416146764845733e-06, | |
| "loss": 0.1702, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.1522029372496663, | |
| "grad_norm": 0.6862267704077283, | |
| "learning_rate": 2.2221488349019903e-06, | |
| "loss": 0.1729, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.156208277703605, | |
| "grad_norm": 0.6070081128579359, | |
| "learning_rate": 2.202743704598263e-06, | |
| "loss": 0.1593, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.1602136181575435, | |
| "grad_norm": 0.7278096682292641, | |
| "learning_rate": 2.1833997096818897e-06, | |
| "loss": 0.1836, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.164218958611482, | |
| "grad_norm": 0.632259449291054, | |
| "learning_rate": 2.1641172729252206e-06, | |
| "loss": 0.1711, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.1682242990654204, | |
| "grad_norm": 0.6117574742799209, | |
| "learning_rate": 2.1448968157552243e-06, | |
| "loss": 0.1632, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.172229639519359, | |
| "grad_norm": 0.631961523767331, | |
| "learning_rate": 2.1257387582442746e-06, | |
| "loss": 0.1694, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.1762349799732976, | |
| "grad_norm": 0.6156725197172384, | |
| "learning_rate": 2.1066435191009717e-06, | |
| "loss": 0.1643, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.1802403204272363, | |
| "grad_norm": 0.6322664734036844, | |
| "learning_rate": 2.08761151566099e-06, | |
| "loss": 0.1798, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.184245660881175, | |
| "grad_norm": 0.6464718958812395, | |
| "learning_rate": 2.0686431638779564e-06, | |
| "loss": 0.1731, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.1882510013351135, | |
| "grad_norm": 0.6399447883731574, | |
| "learning_rate": 2.04973887831436e-06, | |
| "loss": 0.1835, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.192256341789052, | |
| "grad_norm": 0.5720412483866, | |
| "learning_rate": 2.030899072132493e-06, | |
| "loss": 0.1574, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.196261682242991, | |
| "grad_norm": 0.5860714566791593, | |
| "learning_rate": 2.0121241570854165e-06, | |
| "loss": 0.182, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.2002670226969294, | |
| "grad_norm": 0.6083557073323276, | |
| "learning_rate": 1.9934145435079705e-06, | |
| "loss": 0.1661, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.204272363150868, | |
| "grad_norm": 0.6104569659276108, | |
| "learning_rate": 1.9747706403077943e-06, | |
| "loss": 0.1979, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.2082777036048062, | |
| "grad_norm": 0.62244894576839, | |
| "learning_rate": 1.956192854956397e-06, | |
| "loss": 0.1774, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.212283044058745, | |
| "grad_norm": 0.630950626155233, | |
| "learning_rate": 1.9376815934802496e-06, | |
| "loss": 0.1758, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.2162883845126835, | |
| "grad_norm": 0.6625191361259387, | |
| "learning_rate": 1.9192372604519127e-06, | |
| "loss": 0.1928, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.220293724966622, | |
| "grad_norm": 0.5820926209462741, | |
| "learning_rate": 1.9008602589811931e-06, | |
| "loss": 0.1565, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.2242990654205608, | |
| "grad_norm": 0.5567632552609313, | |
| "learning_rate": 1.8825509907063328e-06, | |
| "loss": 0.1756, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.2283044058744994, | |
| "grad_norm": 0.5980702955281321, | |
| "learning_rate": 1.864309855785234e-06, | |
| "loss": 0.1594, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.232309746328438, | |
| "grad_norm": 0.5820363892907623, | |
| "learning_rate": 1.8461372528867095e-06, | |
| "loss": 0.1768, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.2363150867823767, | |
| "grad_norm": 0.5967644083406489, | |
| "learning_rate": 1.8280335791817733e-06, | |
| "loss": 0.1689, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.2403204272363153, | |
| "grad_norm": 0.6250075972601391, | |
| "learning_rate": 1.809999230334958e-06, | |
| "loss": 0.1748, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.2443257676902535, | |
| "grad_norm": 0.606205688498456, | |
| "learning_rate": 1.7920346004956673e-06, | |
| "loss": 0.1834, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.248331108144192, | |
| "grad_norm": 0.5940182864906453, | |
| "learning_rate": 1.7741400822895633e-06, | |
| "loss": 0.1691, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.2523364485981308, | |
| "grad_norm": 0.5683156268730282, | |
| "learning_rate": 1.7563160668099838e-06, | |
| "loss": 0.1726, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.2563417890520694, | |
| "grad_norm": 0.6031458823381572, | |
| "learning_rate": 1.7385629436093958e-06, | |
| "loss": 0.1618, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.260347129506008, | |
| "grad_norm": 0.6310913458129014, | |
| "learning_rate": 1.7208811006908798e-06, | |
| "loss": 0.18, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.2643524699599467, | |
| "grad_norm": 0.6148520423770238, | |
| "learning_rate": 1.7032709244996559e-06, | |
| "loss": 0.1699, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.2683578104138853, | |
| "grad_norm": 0.6101595073175935, | |
| "learning_rate": 1.6857327999146284e-06, | |
| "loss": 0.1623, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.272363150867824, | |
| "grad_norm": 0.5775767049567889, | |
| "learning_rate": 1.6682671102399806e-06, | |
| "loss": 0.1611, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.2763684913217626, | |
| "grad_norm": 0.6215298766632447, | |
| "learning_rate": 1.6508742371967962e-06, | |
| "loss": 0.1708, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.2803738317757007, | |
| "grad_norm": 0.6064011848210037, | |
| "learning_rate": 1.633554560914714e-06, | |
| "loss": 0.1793, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.2843791722296394, | |
| "grad_norm": 0.5913150625405588, | |
| "learning_rate": 1.6163084599236278e-06, | |
| "loss": 0.1734, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.288384512683578, | |
| "grad_norm": 0.6290184807128071, | |
| "learning_rate": 1.5991363111454023e-06, | |
| "loss": 0.1643, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.2923898531375166, | |
| "grad_norm": 0.6150638457127714, | |
| "learning_rate": 1.5820384898856433e-06, | |
| "loss": 0.1662, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.2963951935914553, | |
| "grad_norm": 0.6496821212159293, | |
| "learning_rate": 1.5650153698254916e-06, | |
| "loss": 0.1854, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.300400534045394, | |
| "grad_norm": 0.6678529895051883, | |
| "learning_rate": 1.5480673230134585e-06, | |
| "loss": 0.1618, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.3044058744993325, | |
| "grad_norm": 0.6202814553309792, | |
| "learning_rate": 1.5311947198572918e-06, | |
| "loss": 0.1669, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.308411214953271, | |
| "grad_norm": 0.5462233451328915, | |
| "learning_rate": 1.514397929115884e-06, | |
| "loss": 0.1578, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.31241655540721, | |
| "grad_norm": 0.6123799648363422, | |
| "learning_rate": 1.4976773178912085e-06, | |
| "loss": 0.1678, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.316421895861148, | |
| "grad_norm": 0.6365385572090578, | |
| "learning_rate": 1.481033251620299e-06, | |
| "loss": 0.1686, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.3204272363150866, | |
| "grad_norm": 0.6063441561728166, | |
| "learning_rate": 1.4644660940672628e-06, | |
| "loss": 0.1772, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.3244325767690253, | |
| "grad_norm": 0.658171365772341, | |
| "learning_rate": 1.4479762073153304e-06, | |
| "loss": 0.1726, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.328437917222964, | |
| "grad_norm": 0.6269724587094964, | |
| "learning_rate": 1.4315639517589398e-06, | |
| "loss": 0.1662, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.3324432576769025, | |
| "grad_norm": 0.6133548152558246, | |
| "learning_rate": 1.4152296860958641e-06, | |
| "loss": 0.1702, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.336448598130841, | |
| "grad_norm": 0.66729626203352, | |
| "learning_rate": 1.3989737673193682e-06, | |
| "loss": 0.1926, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.34045393858478, | |
| "grad_norm": 0.6087596853512452, | |
| "learning_rate": 1.382796550710408e-06, | |
| "loss": 0.1886, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.3444592790387184, | |
| "grad_norm": 0.5915578437600094, | |
| "learning_rate": 1.3666983898298659e-06, | |
| "loss": 0.1541, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.348464619492657, | |
| "grad_norm": 0.6098604276701993, | |
| "learning_rate": 1.3506796365108232e-06, | |
| "loss": 0.1739, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.3524699599465952, | |
| "grad_norm": 0.6102908288565849, | |
| "learning_rate": 1.3347406408508695e-06, | |
| "loss": 0.1715, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.356475300400534, | |
| "grad_norm": 0.6133775478993069, | |
| "learning_rate": 1.3188817512044544e-06, | |
| "loss": 0.1646, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.3604806408544725, | |
| "grad_norm": 0.6293540508319713, | |
| "learning_rate": 1.3031033141752702e-06, | |
| "loss": 0.1711, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.364485981308411, | |
| "grad_norm": 0.7443933199698777, | |
| "learning_rate": 1.2874056746086772e-06, | |
| "loss": 0.152, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.3684913217623498, | |
| "grad_norm": 0.5731726505456445, | |
| "learning_rate": 1.2717891755841722e-06, | |
| "loss": 0.1509, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.3724966622162884, | |
| "grad_norm": 0.5987015572774188, | |
| "learning_rate": 1.2562541584078835e-06, | |
| "loss": 0.1664, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.376502002670227, | |
| "grad_norm": 0.6300995737372567, | |
| "learning_rate": 1.2408009626051137e-06, | |
| "loss": 0.1744, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.3805073431241657, | |
| "grad_norm": 0.6142455245728455, | |
| "learning_rate": 1.225429925912921e-06, | |
| "loss": 0.1563, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.3845126835781043, | |
| "grad_norm": 0.6234917981255259, | |
| "learning_rate": 1.2101413842727345e-06, | |
| "loss": 0.1648, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.3885180240320425, | |
| "grad_norm": 0.5825435971591906, | |
| "learning_rate": 1.1949356718230188e-06, | |
| "loss": 0.1602, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.392523364485981, | |
| "grad_norm": 0.6402288566179174, | |
| "learning_rate": 1.1798131208919628e-06, | |
| "loss": 0.1851, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.3965287049399198, | |
| "grad_norm": 0.6037042606359994, | |
| "learning_rate": 1.1647740619902193e-06, | |
| "loss": 0.1747, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.4005340453938584, | |
| "grad_norm": 0.5948400476478892, | |
| "learning_rate": 1.1498188238036862e-06, | |
| "loss": 0.1554, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.404539385847797, | |
| "grad_norm": 0.6966147020255966, | |
| "learning_rate": 1.134947733186315e-06, | |
| "loss": 0.1779, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.4085447263017357, | |
| "grad_norm": 0.6005191719492351, | |
| "learning_rate": 1.1201611151529756e-06, | |
| "loss": 0.1607, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.4125500667556743, | |
| "grad_norm": 0.5820830342726288, | |
| "learning_rate": 1.105459292872345e-06, | |
| "loss": 0.165, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.416555407209613, | |
| "grad_norm": 0.5655842631268676, | |
| "learning_rate": 1.0908425876598512e-06, | |
| "loss": 0.1528, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.4205607476635516, | |
| "grad_norm": 0.5964382967668805, | |
| "learning_rate": 1.0763113189706453e-06, | |
| "loss": 0.1694, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.4245660881174897, | |
| "grad_norm": 0.6456074354041683, | |
| "learning_rate": 1.0618658043926233e-06, | |
| "loss": 0.1747, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.4285714285714284, | |
| "grad_norm": 0.7700257765536643, | |
| "learning_rate": 1.047506359639483e-06, | |
| "loss": 0.1804, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.432576769025367, | |
| "grad_norm": 0.6024820562838693, | |
| "learning_rate": 1.0332332985438248e-06, | |
| "loss": 0.1704, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.4365821094793056, | |
| "grad_norm": 0.6345644986717863, | |
| "learning_rate": 1.0190469330502928e-06, | |
| "loss": 0.1782, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.4405874499332443, | |
| "grad_norm": 0.5806972189010675, | |
| "learning_rate": 1.004947573208756e-06, | |
| "loss": 0.1803, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.444592790387183, | |
| "grad_norm": 0.6158669549354134, | |
| "learning_rate": 9.909355271675335e-07, | |
| "loss": 0.1531, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.4485981308411215, | |
| "grad_norm": 0.6579929468353193, | |
| "learning_rate": 9.770111011666582e-07, | |
| "loss": 0.1821, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.45260347129506, | |
| "grad_norm": 0.5991038573483655, | |
| "learning_rate": 9.631745995311881e-07, | |
| "loss": 0.1658, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.456608811748999, | |
| "grad_norm": 0.5782202975221776, | |
| "learning_rate": 9.494263246645474e-07, | |
| "loss": 0.1525, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.4606141522029374, | |
| "grad_norm": 0.6234695823453394, | |
| "learning_rate": 9.357665770419244e-07, | |
| "loss": 0.1676, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.464619492656876, | |
| "grad_norm": 0.6614765092596231, | |
| "learning_rate": 9.221956552036992e-07, | |
| "loss": 0.1809, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.4686248331108143, | |
| "grad_norm": 0.5631502943362868, | |
| "learning_rate": 9.08713855748925e-07, | |
| "loss": 0.1511, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.472630173564753, | |
| "grad_norm": 0.6001768275330218, | |
| "learning_rate": 8.953214733288384e-07, | |
| "loss": 0.1685, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.4766355140186915, | |
| "grad_norm": 0.6128594108692941, | |
| "learning_rate": 8.820188006404268e-07, | |
| "loss": 0.167, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.48064085447263, | |
| "grad_norm": 0.6003517878466088, | |
| "learning_rate": 8.688061284200266e-07, | |
| "loss": 0.16, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.484646194926569, | |
| "grad_norm": 0.6125927993221344, | |
| "learning_rate": 8.556837454369698e-07, | |
| "loss": 0.1548, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.4886515353805074, | |
| "grad_norm": 0.620566464502616, | |
| "learning_rate": 8.426519384872733e-07, | |
| "loss": 0.1753, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.492656875834446, | |
| "grad_norm": 0.6137925843567528, | |
| "learning_rate": 8.297109923873753e-07, | |
| "loss": 0.1552, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 2.4966622162883847, | |
| "grad_norm": 0.5783321494225999, | |
| "learning_rate": 8.168611899679013e-07, | |
| "loss": 0.1643, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.5006675567423233, | |
| "grad_norm": 0.565759829851567, | |
| "learning_rate": 8.041028120674894e-07, | |
| "loss": 0.1568, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.5046728971962615, | |
| "grad_norm": 0.6438983607349034, | |
| "learning_rate": 7.914361375266505e-07, | |
| "loss": 0.167, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.5086782376502, | |
| "grad_norm": 0.7266085897712414, | |
| "learning_rate": 7.788614431816743e-07, | |
| "loss": 0.1775, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.512683578104139, | |
| "grad_norm": 0.5951499553068635, | |
| "learning_rate": 7.663790038585794e-07, | |
| "loss": 0.1567, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.5166889185580774, | |
| "grad_norm": 0.6024813088753026, | |
| "learning_rate": 7.539890923671061e-07, | |
| "loss": 0.1654, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.520694259012016, | |
| "grad_norm": 0.5611519260516766, | |
| "learning_rate": 7.416919794947536e-07, | |
| "loss": 0.1666, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.5246995994659547, | |
| "grad_norm": 0.6940972612783466, | |
| "learning_rate": 7.294879340008632e-07, | |
| "loss": 0.1745, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.5287049399198933, | |
| "grad_norm": 0.5590592473739715, | |
| "learning_rate": 7.173772226107434e-07, | |
| "loss": 0.1698, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.5327102803738315, | |
| "grad_norm": 0.591058116554628, | |
| "learning_rate": 7.053601100098401e-07, | |
| "loss": 0.1671, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.5367156208277706, | |
| "grad_norm": 0.6077726003073021, | |
| "learning_rate": 6.934368588379553e-07, | |
| "loss": 0.1847, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.5407209612817088, | |
| "grad_norm": 0.5900365979656913, | |
| "learning_rate": 6.816077296835006e-07, | |
| "loss": 0.1632, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.5447263017356474, | |
| "grad_norm": 0.6291526388062625, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 0.1669, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.548731642189586, | |
| "grad_norm": 0.6405676228401413, | |
| "learning_rate": 6.582328694894729e-07, | |
| "loss": 0.1678, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.5527369826435247, | |
| "grad_norm": 0.5916007034338923, | |
| "learning_rate": 6.46687649318759e-07, | |
| "loss": 0.1687, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.5567423230974633, | |
| "grad_norm": 0.6197597194779472, | |
| "learning_rate": 6.352375728920285e-07, | |
| "loss": 0.159, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.560747663551402, | |
| "grad_norm": 0.6078888459723736, | |
| "learning_rate": 6.238828904562316e-07, | |
| "loss": 0.1818, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.5647530040053406, | |
| "grad_norm": 0.5953442248473361, | |
| "learning_rate": 6.126238501734372e-07, | |
| "loss": 0.1747, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.568758344459279, | |
| "grad_norm": 0.6197098462200006, | |
| "learning_rate": 6.014606981154086e-07, | |
| "loss": 0.1705, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.572763684913218, | |
| "grad_norm": 0.6171410856963269, | |
| "learning_rate": 5.903936782582253e-07, | |
| "loss": 0.1771, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.576769025367156, | |
| "grad_norm": 0.5991169552059425, | |
| "learning_rate": 5.794230324769518e-07, | |
| "loss": 0.1669, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.5807743658210947, | |
| "grad_norm": 0.6103321073889593, | |
| "learning_rate": 5.685490005403499e-07, | |
| "loss": 0.1726, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.5847797062750333, | |
| "grad_norm": 0.6256121075513487, | |
| "learning_rate": 5.577718201056392e-07, | |
| "loss": 0.1557, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.588785046728972, | |
| "grad_norm": 0.6204867013864792, | |
| "learning_rate": 5.470917267133041e-07, | |
| "loss": 0.1603, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.5927903871829105, | |
| "grad_norm": 0.5859779013723672, | |
| "learning_rate": 5.365089537819435e-07, | |
| "loss": 0.1717, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.596795727636849, | |
| "grad_norm": 0.5950814615589916, | |
| "learning_rate": 5.260237326031698e-07, | |
| "loss": 0.1684, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 2.600801068090788, | |
| "grad_norm": 0.5788284827989015, | |
| "learning_rate": 5.156362923365587e-07, | |
| "loss": 0.1748, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.6048064085447264, | |
| "grad_norm": 0.6500848201090982, | |
| "learning_rate": 5.053468600046324e-07, | |
| "loss": 0.1551, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 2.608811748998665, | |
| "grad_norm": 0.579677458355888, | |
| "learning_rate": 4.951556604879049e-07, | |
| "loss": 0.1561, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.6128170894526033, | |
| "grad_norm": 0.5938327964966817, | |
| "learning_rate": 4.850629165199627e-07, | |
| "loss": 0.1748, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 2.616822429906542, | |
| "grad_norm": 0.5844525078342523, | |
| "learning_rate": 4.7506884868259996e-07, | |
| "loss": 0.1565, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.6208277703604805, | |
| "grad_norm": 0.5834653222280398, | |
| "learning_rate": 4.651736754009972e-07, | |
| "loss": 0.1631, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.624833110814419, | |
| "grad_norm": 0.5977062556990842, | |
| "learning_rate": 4.5537761293894535e-07, | |
| "loss": 0.1555, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.628838451268358, | |
| "grad_norm": 0.595817231192985, | |
| "learning_rate": 4.456808753941205e-07, | |
| "loss": 0.1881, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 2.6328437917222964, | |
| "grad_norm": 0.6038863958838983, | |
| "learning_rate": 4.3608367469340553e-07, | |
| "loss": 0.1611, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.636849132176235, | |
| "grad_norm": 0.5765506637261613, | |
| "learning_rate": 4.265862205882559e-07, | |
| "loss": 0.1669, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 2.6408544726301737, | |
| "grad_norm": 0.6081323911406266, | |
| "learning_rate": 4.171887206501191e-07, | |
| "loss": 0.166, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.6448598130841123, | |
| "grad_norm": 0.7942617005475607, | |
| "learning_rate": 4.078913802658946e-07, | |
| "loss": 0.1526, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 2.6488651535380505, | |
| "grad_norm": 0.5745245099031036, | |
| "learning_rate": 3.9869440263344714e-07, | |
| "loss": 0.1865, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.652870493991989, | |
| "grad_norm": 0.6362782671167186, | |
| "learning_rate": 3.895979887571649e-07, | |
| "loss": 0.1702, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 2.656875834445928, | |
| "grad_norm": 0.6659260281530986, | |
| "learning_rate": 3.8060233744356634e-07, | |
| "loss": 0.1588, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.6608811748998664, | |
| "grad_norm": 0.5821725085601599, | |
| "learning_rate": 3.717076452969559e-07, | |
| "loss": 0.1585, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.664886515353805, | |
| "grad_norm": 0.5721973191552109, | |
| "learning_rate": 3.6291410671512597e-07, | |
| "loss": 0.1546, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.6688918558077437, | |
| "grad_norm": 0.6108240231082995, | |
| "learning_rate": 3.542219138851094e-07, | |
| "loss": 0.165, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 2.6728971962616823, | |
| "grad_norm": 0.5890590495394973, | |
| "learning_rate": 3.4563125677897936e-07, | |
| "loss": 0.1697, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.676902536715621, | |
| "grad_norm": 0.5687849267564792, | |
| "learning_rate": 3.371423231496951e-07, | |
| "loss": 0.1737, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 2.6809078771695596, | |
| "grad_norm": 0.6029561874285849, | |
| "learning_rate": 3.287552985270015e-07, | |
| "loss": 0.1618, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.6849132176234978, | |
| "grad_norm": 0.6317046685851485, | |
| "learning_rate": 3.204703662133724e-07, | |
| "loss": 0.1761, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 2.688918558077437, | |
| "grad_norm": 0.6617100618302314, | |
| "learning_rate": 3.122877072800046e-07, | |
| "loss": 0.1656, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.692923898531375, | |
| "grad_norm": 0.5447463259022025, | |
| "learning_rate": 3.0420750056286195e-07, | |
| "loss": 0.1611, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 2.6969292389853137, | |
| "grad_norm": 0.6232513934357247, | |
| "learning_rate": 2.962299226587639e-07, | |
| "loss": 0.1613, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.7009345794392523, | |
| "grad_norm": 0.620106407560748, | |
| "learning_rate": 2.8835514792152854e-07, | |
| "loss": 0.1525, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.704939919893191, | |
| "grad_norm": 0.6010185411049328, | |
| "learning_rate": 2.8058334845816214e-07, | |
| "loss": 0.1491, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.7089452603471296, | |
| "grad_norm": 0.5685858072848753, | |
| "learning_rate": 2.729146941250954e-07, | |
| "loss": 0.1558, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 2.712950600801068, | |
| "grad_norm": 0.605493206929418, | |
| "learning_rate": 2.653493525244721e-07, | |
| "loss": 0.167, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.716955941255007, | |
| "grad_norm": 0.5867378767850222, | |
| "learning_rate": 2.5788748900048676e-07, | |
| "loss": 0.1622, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 2.720961281708945, | |
| "grad_norm": 0.6167298470042248, | |
| "learning_rate": 2.5052926663577006e-07, | |
| "loss": 0.161, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.724966622162884, | |
| "grad_norm": 0.6730247619234008, | |
| "learning_rate": 2.4327484624782684e-07, | |
| "loss": 0.159, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 2.7289719626168223, | |
| "grad_norm": 0.5614764326727409, | |
| "learning_rate": 2.3612438638551837e-07, | |
| "loss": 0.173, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.732977303070761, | |
| "grad_norm": 0.6302046681068347, | |
| "learning_rate": 2.290780433255979e-07, | |
| "loss": 0.1823, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 2.7369826435246996, | |
| "grad_norm": 0.6339293472571709, | |
| "learning_rate": 2.2213597106929608e-07, | |
| "loss": 0.1653, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.740987983978638, | |
| "grad_norm": 0.6169502312976588, | |
| "learning_rate": 2.152983213389559e-07, | |
| "loss": 0.1738, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.744993324432577, | |
| "grad_norm": 0.59777733584903, | |
| "learning_rate": 2.085652435747132e-07, | |
| "loss": 0.1728, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.7489986648865155, | |
| "grad_norm": 0.6587479139210242, | |
| "learning_rate": 2.0193688493123588e-07, | |
| "loss": 0.1748, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 2.753004005340454, | |
| "grad_norm": 0.5962185804964122, | |
| "learning_rate": 1.9541339027450256e-07, | |
| "loss": 0.1617, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.7570093457943923, | |
| "grad_norm": 0.7505303172182166, | |
| "learning_rate": 1.889949021786397e-07, | |
| "loss": 0.1722, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 2.7610146862483314, | |
| "grad_norm": 0.5964493036331348, | |
| "learning_rate": 1.8268156092280498e-07, | |
| "loss": 0.1654, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.7650200267022695, | |
| "grad_norm": 0.5762622321084703, | |
| "learning_rate": 1.7647350448812105e-07, | |
| "loss": 0.1583, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 2.769025367156208, | |
| "grad_norm": 0.6801945089414425, | |
| "learning_rate": 1.7037086855465902e-07, | |
| "loss": 0.1768, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.773030707610147, | |
| "grad_norm": 0.6911899110033116, | |
| "learning_rate": 1.6437378649847458e-07, | |
| "loss": 0.1732, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 2.7770360480640854, | |
| "grad_norm": 0.6400292646247479, | |
| "learning_rate": 1.5848238938869332e-07, | |
| "loss": 0.1713, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.781041388518024, | |
| "grad_norm": 0.6293782671500883, | |
| "learning_rate": 1.5269680598464342e-07, | |
| "loss": 0.1698, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.7850467289719627, | |
| "grad_norm": 0.6326255144077011, | |
| "learning_rate": 1.4701716273304524e-07, | |
| "loss": 0.1617, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.7890520694259013, | |
| "grad_norm": 0.5877129222871145, | |
| "learning_rate": 1.4144358376524504e-07, | |
| "loss": 0.1736, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 2.7930574098798395, | |
| "grad_norm": 0.5768063266465919, | |
| "learning_rate": 1.3597619089450343e-07, | |
| "loss": 0.1678, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.7970627503337786, | |
| "grad_norm": 0.6141881975935009, | |
| "learning_rate": 1.3061510361333186e-07, | |
| "loss": 0.174, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 2.801068090787717, | |
| "grad_norm": 0.6135094476904235, | |
| "learning_rate": 1.253604390908819e-07, | |
| "loss": 0.1643, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.8050734312416554, | |
| "grad_norm": 0.5963941238541673, | |
| "learning_rate": 1.2021231217038522e-07, | |
| "loss": 0.1719, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 2.809078771695594, | |
| "grad_norm": 0.6188679804552468, | |
| "learning_rate": 1.1517083536664142e-07, | |
| "loss": 0.1732, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.8130841121495327, | |
| "grad_norm": 0.6378524366699763, | |
| "learning_rate": 1.10236118863562e-07, | |
| "loss": 0.1657, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 2.8170894526034713, | |
| "grad_norm": 0.6624926543396782, | |
| "learning_rate": 1.0540827051175817e-07, | |
| "loss": 0.163, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.82109479305741, | |
| "grad_norm": 0.6241183789468542, | |
| "learning_rate": 1.0068739582618781e-07, | |
| "loss": 0.1738, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 2.8251001335113486, | |
| "grad_norm": 0.5929380446422712, | |
| "learning_rate": 9.607359798384785e-08, | |
| "loss": 0.1698, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.8291054739652868, | |
| "grad_norm": 0.6379777759896461, | |
| "learning_rate": 9.15669778215178e-08, | |
| "loss": 0.178, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 2.833110814419226, | |
| "grad_norm": 0.5934788386620728, | |
| "learning_rate": 8.716763383355863e-08, | |
| "loss": 0.1781, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.837116154873164, | |
| "grad_norm": 0.67169486389099, | |
| "learning_rate": 8.287566216975795e-08, | |
| "loss": 0.1468, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 2.8411214953271027, | |
| "grad_norm": 0.5945998400168926, | |
| "learning_rate": 7.869115663322879e-08, | |
| "loss": 0.1677, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.8451268357810413, | |
| "grad_norm": 0.6379381928228921, | |
| "learning_rate": 7.461420867836078e-08, | |
| "loss": 0.1596, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 2.84913217623498, | |
| "grad_norm": 0.5694853594582754, | |
| "learning_rate": 7.064490740882057e-08, | |
| "loss": 0.1564, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.8531375166889186, | |
| "grad_norm": 0.6431564447838445, | |
| "learning_rate": 6.678333957560513e-08, | |
| "loss": 0.1779, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.603659494625915, | |
| "learning_rate": 6.302958957514372e-08, | |
| "loss": 0.1703, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.861148197596796, | |
| "grad_norm": 0.5731990501391916, | |
| "learning_rate": 5.938373944745612e-08, | |
| "loss": 0.1687, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 2.8651535380507345, | |
| "grad_norm": 0.6437255392307442, | |
| "learning_rate": 5.584586887435739e-08, | |
| "loss": 0.1727, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.869158878504673, | |
| "grad_norm": 0.5868868712177298, | |
| "learning_rate": 5.241605517771753e-08, | |
| "loss": 0.1621, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 2.8731642189586113, | |
| "grad_norm": 0.6333270306830352, | |
| "learning_rate": 4.909437331777178e-08, | |
| "loss": 0.1635, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.87716955941255, | |
| "grad_norm": 0.6290385820686785, | |
| "learning_rate": 4.588089589148192e-08, | |
| "loss": 0.1642, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 2.8811748998664886, | |
| "grad_norm": 0.6191579999515221, | |
| "learning_rate": 4.2775693130948094e-08, | |
| "loss": 0.1789, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.885180240320427, | |
| "grad_norm": 0.5476898057957497, | |
| "learning_rate": 3.977883290187667e-08, | |
| "loss": 0.1506, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 2.889185580774366, | |
| "grad_norm": 0.578059211687489, | |
| "learning_rate": 3.689038070209594e-08, | |
| "loss": 0.1592, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.8931909212283045, | |
| "grad_norm": 0.5958692191590165, | |
| "learning_rate": 3.4110399660123306e-08, | |
| "loss": 0.1733, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 2.897196261682243, | |
| "grad_norm": 0.5808682012621369, | |
| "learning_rate": 3.143895053378698e-08, | |
| "loss": 0.151, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.9012016021361817, | |
| "grad_norm": 0.5943310418284603, | |
| "learning_rate": 2.8876091708898714e-08, | |
| "loss": 0.1733, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.9052069425901204, | |
| "grad_norm": 0.5647474381435812, | |
| "learning_rate": 2.642187919797479e-08, | |
| "loss": 0.1621, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.9092122830440585, | |
| "grad_norm": 0.5948502812847656, | |
| "learning_rate": 2.4076366639015914e-08, | |
| "loss": 0.158, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 2.913217623497997, | |
| "grad_norm": 0.595088477171892, | |
| "learning_rate": 2.1839605294330935e-08, | |
| "loss": 0.1754, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.917222963951936, | |
| "grad_norm": 0.5612839333804125, | |
| "learning_rate": 1.97116440494205e-08, | |
| "loss": 0.1539, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 2.9212283044058744, | |
| "grad_norm": 0.6167465533535336, | |
| "learning_rate": 1.769252941190458e-08, | |
| "loss": 0.1708, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.925233644859813, | |
| "grad_norm": 0.6811415193373171, | |
| "learning_rate": 1.5782305510508855e-08, | |
| "loss": 0.1712, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 2.9292389853137517, | |
| "grad_norm": 0.6295676411628186, | |
| "learning_rate": 1.3981014094099354e-08, | |
| "loss": 0.1606, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.9332443257676903, | |
| "grad_norm": 0.5883747534045934, | |
| "learning_rate": 1.2288694530769862e-08, | |
| "loss": 0.1713, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 2.937249666221629, | |
| "grad_norm": 0.6467588103154122, | |
| "learning_rate": 1.0705383806982606e-08, | |
| "loss": 0.1882, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.9412550066755676, | |
| "grad_norm": 0.7011732558711389, | |
| "learning_rate": 9.231116526757234e-09, | |
| "loss": 0.1704, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 2.945260347129506, | |
| "grad_norm": 0.6116779172964563, | |
| "learning_rate": 7.865924910916977e-09, | |
| "loss": 0.1698, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.9492656875834444, | |
| "grad_norm": 0.6138331601067519, | |
| "learning_rate": 6.609838796385326e-09, | |
| "loss": 0.1621, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 2.953271028037383, | |
| "grad_norm": 0.5884709984985781, | |
| "learning_rate": 5.4628856355293245e-09, | |
| "loss": 0.1704, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.9572763684913217, | |
| "grad_norm": 0.6040205984929831, | |
| "learning_rate": 4.4250904955656095e-09, | |
| "loss": 0.1665, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 2.9612817089452603, | |
| "grad_norm": 0.5944128522570793, | |
| "learning_rate": 3.496476058006959e-09, | |
| "loss": 0.1696, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.965287049399199, | |
| "grad_norm": 0.5824311819553499, | |
| "learning_rate": 2.6770626181715776e-09, | |
| "loss": 0.1878, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 2.9692923898531376, | |
| "grad_norm": 0.69435894344096, | |
| "learning_rate": 1.9668680847356735e-09, | |
| "loss": 0.1689, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.9732977303070762, | |
| "grad_norm": 0.6630378694543846, | |
| "learning_rate": 1.3659079793432173e-09, | |
| "loss": 0.1518, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 2.977303070761015, | |
| "grad_norm": 0.5642162664981942, | |
| "learning_rate": 8.741954362678773e-10, | |
| "loss": 0.1691, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.981308411214953, | |
| "grad_norm": 0.5752610766888291, | |
| "learning_rate": 4.91741202124918e-10, | |
| "loss": 0.1637, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 2.985313751668892, | |
| "grad_norm": 0.6316838155198957, | |
| "learning_rate": 2.1855363563638708e-10, | |
| "loss": 0.188, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.9893190921228303, | |
| "grad_norm": 0.5868868040589227, | |
| "learning_rate": 5.4638707447929315e-11, | |
| "loss": 0.1496, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 2.9893190921228303, | |
| "step": 747, | |
| "total_flos": 194126978285568.0, | |
| "train_loss": 0.254960169514499, | |
| "train_runtime": 5438.7593, | |
| "train_samples_per_second": 13.217, | |
| "train_steps_per_second": 0.137 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 747, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": -747, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 194126978285568.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |