| { |
| "best_metric": 0.2714239654140476, |
| "best_model_checkpoint": "outputs3/checkpoint-2000", |
| "epoch": 10.9375, |
| "eval_steps": 100, |
| "global_step": 2100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005208333333333333, |
| "grad_norm": 5.397207736968994, |
| "learning_rate": 7.692307692307692e-06, |
| "loss": 1.8222, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.010416666666666666, |
| "grad_norm": 6.121150970458984, |
| "learning_rate": 1.5384615384615384e-05, |
| "loss": 1.7814, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.015625, |
| "grad_norm": 5.906391620635986, |
| "learning_rate": 2.3076923076923076e-05, |
| "loss": 1.7875, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.020833333333333332, |
| "grad_norm": 6.087553977966309, |
| "learning_rate": 3.076923076923077e-05, |
| "loss": 1.7581, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.026041666666666668, |
| "grad_norm": 6.655142307281494, |
| "learning_rate": 3.8461538461538456e-05, |
| "loss": 1.7142, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03125, |
| "grad_norm": 6.431704521179199, |
| "learning_rate": 4.615384615384615e-05, |
| "loss": 1.7388, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.036458333333333336, |
| "grad_norm": 5.434669017791748, |
| "learning_rate": 5.384615384615384e-05, |
| "loss": 1.708, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.041666666666666664, |
| "grad_norm": 4.95908260345459, |
| "learning_rate": 6.153846153846154e-05, |
| "loss": 1.7009, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.046875, |
| "grad_norm": 4.0141096115112305, |
| "learning_rate": 6.923076923076922e-05, |
| "loss": 1.6786, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.052083333333333336, |
| "grad_norm": 4.6218438148498535, |
| "learning_rate": 7.692307692307691e-05, |
| "loss": 1.5881, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.057291666666666664, |
| "grad_norm": 4.7401123046875, |
| "learning_rate": 8.46153846153846e-05, |
| "loss": 1.5249, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 3.9866232872009277, |
| "learning_rate": 9.23076923076923e-05, |
| "loss": 1.5392, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06770833333333333, |
| "grad_norm": 4.265102863311768, |
| "learning_rate": 9.999999999999999e-05, |
| "loss": 1.4752, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.07291666666666667, |
| "grad_norm": 3.678858518600464, |
| "learning_rate": 0.00010769230769230768, |
| "loss": 1.4193, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.078125, |
| "grad_norm": 2.892754316329956, |
| "learning_rate": 0.00011538461538461538, |
| "loss": 1.3897, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08333333333333333, |
| "grad_norm": 2.3903520107269287, |
| "learning_rate": 0.00012307692307692307, |
| "loss": 1.3581, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.08854166666666667, |
| "grad_norm": 2.1593003273010254, |
| "learning_rate": 0.00013076923076923077, |
| "loss": 1.3176, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.09375, |
| "grad_norm": 2.443652391433716, |
| "learning_rate": 0.00013846153846153845, |
| "loss": 1.3814, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.09895833333333333, |
| "grad_norm": 1.588437795639038, |
| "learning_rate": 0.00014615384615384615, |
| "loss": 1.3447, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.10416666666666667, |
| "grad_norm": 1.6803085803985596, |
| "learning_rate": 0.00015384615384615382, |
| "loss": 1.1906, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.109375, |
| "grad_norm": 2.654311180114746, |
| "learning_rate": 0.00016153846153846153, |
| "loss": 1.3355, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.11458333333333333, |
| "grad_norm": 1.8692028522491455, |
| "learning_rate": 0.0001692307692307692, |
| "loss": 1.1053, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.11979166666666667, |
| "grad_norm": 1.5948660373687744, |
| "learning_rate": 0.0001769230769230769, |
| "loss": 1.1783, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 1.2649933099746704, |
| "learning_rate": 0.0001846153846153846, |
| "loss": 1.1386, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.13020833333333334, |
| "grad_norm": 1.5177069902420044, |
| "learning_rate": 0.0001923076923076923, |
| "loss": 1.1199, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.13541666666666666, |
| "grad_norm": 2.4497432708740234, |
| "learning_rate": 0.00019999999999999998, |
| "loss": 1.1877, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.140625, |
| "grad_norm": 1.5048519372940063, |
| "learning_rate": 0.00020769230769230766, |
| "loss": 1.054, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.14583333333333334, |
| "grad_norm": 1.3417630195617676, |
| "learning_rate": 0.00021538461538461536, |
| "loss": 1.0691, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.15104166666666666, |
| "grad_norm": 1.0508840084075928, |
| "learning_rate": 0.00022307692307692306, |
| "loss": 1.047, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.15625, |
| "grad_norm": 3.145096778869629, |
| "learning_rate": 0.00023076923076923076, |
| "loss": 1.2932, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.16145833333333334, |
| "grad_norm": 0.9920203685760498, |
| "learning_rate": 0.00023846153846153844, |
| "loss": 1.0972, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 1.444179892539978, |
| "learning_rate": 0.00024615384615384614, |
| "loss": 1.2505, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.171875, |
| "grad_norm": 1.376099944114685, |
| "learning_rate": 0.0002538461538461538, |
| "loss": 1.1676, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.17708333333333334, |
| "grad_norm": 2.056878089904785, |
| "learning_rate": 0.00026153846153846154, |
| "loss": 1.2406, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.18229166666666666, |
| "grad_norm": 1.7314432859420776, |
| "learning_rate": 0.0002692307692307692, |
| "loss": 1.1003, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 1.4198064804077148, |
| "learning_rate": 0.0002769230769230769, |
| "loss": 1.0615, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.19270833333333334, |
| "grad_norm": 1.2226223945617676, |
| "learning_rate": 0.00028461538461538457, |
| "loss": 0.9502, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.19791666666666666, |
| "grad_norm": 1.234714150428772, |
| "learning_rate": 0.0002923076923076923, |
| "loss": 1.1702, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.203125, |
| "grad_norm": 2.4479353427886963, |
| "learning_rate": 0.0003, |
| "loss": 1.1457, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.20833333333333334, |
| "grad_norm": 2.8535573482513428, |
| "learning_rate": 0.0002999999487651764, |
| "loss": 1.1176, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.21354166666666666, |
| "grad_norm": 1.0224534273147583, |
| "learning_rate": 0.0002999997950607408, |
| "loss": 1.0677, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.21875, |
| "grad_norm": 1.8066833019256592, |
| "learning_rate": 0.00029999953888679804, |
| "loss": 1.0787, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.22395833333333334, |
| "grad_norm": 1.463275671005249, |
| "learning_rate": 0.0002999991802435232, |
| "loss": 1.0178, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.22916666666666666, |
| "grad_norm": 1.6874860525131226, |
| "learning_rate": 0.00029999871913116125, |
| "loss": 1.2014, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.234375, |
| "grad_norm": 1.8337984085083008, |
| "learning_rate": 0.00029999815555002723, |
| "loss": 1.0094, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.23958333333333334, |
| "grad_norm": 2.3896031379699707, |
| "learning_rate": 0.0002999974895005061, |
| "loss": 1.0034, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.24479166666666666, |
| "grad_norm": 1.2153241634368896, |
| "learning_rate": 0.00029999672098305285, |
| "loss": 1.1102, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.5140814781188965, |
| "learning_rate": 0.0002999958499981925, |
| "loss": 1.0591, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2552083333333333, |
| "grad_norm": 1.827317476272583, |
| "learning_rate": 0.0002999948765465201, |
| "loss": 1.0227, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.2604166666666667, |
| "grad_norm": 2.928297281265259, |
| "learning_rate": 0.0002999938006287005, |
| "loss": 1.053, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.265625, |
| "grad_norm": 1.8400626182556152, |
| "learning_rate": 0.00029999262224546887, |
| "loss": 0.9833, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2708333333333333, |
| "grad_norm": 1.3562742471694946, |
| "learning_rate": 0.0002999913413976301, |
| "loss": 1.1162, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2760416666666667, |
| "grad_norm": 2.2726147174835205, |
| "learning_rate": 0.0002999899580860592, |
| "loss": 1.0374, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.28125, |
| "grad_norm": 1.2114999294281006, |
| "learning_rate": 0.0002999884723117011, |
| "loss": 1.0079, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2864583333333333, |
| "grad_norm": 1.2094279527664185, |
| "learning_rate": 0.00029998688407557086, |
| "loss": 1.0718, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2916666666666667, |
| "grad_norm": 2.686410903930664, |
| "learning_rate": 0.00029998519337875334, |
| "loss": 1.0395, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.296875, |
| "grad_norm": 1.2186232805252075, |
| "learning_rate": 0.0002999834002224037, |
| "loss": 1.0996, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3020833333333333, |
| "grad_norm": 1.7384519577026367, |
| "learning_rate": 0.0002999815046077467, |
| "loss": 1.2145, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3072916666666667, |
| "grad_norm": 1.577171802520752, |
| "learning_rate": 0.0002999795065360774, |
| "loss": 0.997, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 1.5297189950942993, |
| "learning_rate": 0.00029997740600876073, |
| "loss": 1.0212, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3177083333333333, |
| "grad_norm": 1.384333848953247, |
| "learning_rate": 0.00029997520302723165, |
| "loss": 1.0294, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.3229166666666667, |
| "grad_norm": 1.4977476596832275, |
| "learning_rate": 0.000299972897592995, |
| "loss": 1.1351, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.328125, |
| "grad_norm": 2.163052558898926, |
| "learning_rate": 0.0002999704897076258, |
| "loss": 1.1299, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 1.1992921829223633, |
| "learning_rate": 0.0002999679793727688, |
| "loss": 0.9314, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.3385416666666667, |
| "grad_norm": 1.5068293809890747, |
| "learning_rate": 0.00029996536659013907, |
| "loss": 1.0567, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.34375, |
| "grad_norm": 1.186168909072876, |
| "learning_rate": 0.00029996265136152133, |
| "loss": 0.995, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3489583333333333, |
| "grad_norm": 1.0788813829421997, |
| "learning_rate": 0.00029995983368877056, |
| "loss": 1.0576, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3541666666666667, |
| "grad_norm": 2.363426923751831, |
| "learning_rate": 0.00029995691357381153, |
| "loss": 1.0428, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.359375, |
| "grad_norm": 1.3764511346817017, |
| "learning_rate": 0.00029995389101863907, |
| "loss": 0.9951, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3645833333333333, |
| "grad_norm": 1.084170937538147, |
| "learning_rate": 0.00029995076602531794, |
| "loss": 1.0308, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3697916666666667, |
| "grad_norm": 1.5358678102493286, |
| "learning_rate": 0.00029994753859598303, |
| "loss": 0.8918, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 2.0857279300689697, |
| "learning_rate": 0.000299944208732839, |
| "loss": 1.184, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3802083333333333, |
| "grad_norm": 1.613572359085083, |
| "learning_rate": 0.0002999407764381607, |
| "loss": 1.0715, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3854166666666667, |
| "grad_norm": 0.9789800643920898, |
| "learning_rate": 0.00029993724171429264, |
| "loss": 1.0046, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.390625, |
| "grad_norm": 1.4175748825073242, |
| "learning_rate": 0.0002999336045636497, |
| "loss": 0.9586, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3958333333333333, |
| "grad_norm": 1.1527223587036133, |
| "learning_rate": 0.00029992986498871634, |
| "loss": 1.0749, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4010416666666667, |
| "grad_norm": 1.5099977254867554, |
| "learning_rate": 0.00029992602299204734, |
| "loss": 1.0535, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.40625, |
| "grad_norm": 2.7003188133239746, |
| "learning_rate": 0.00029992207857626723, |
| "loss": 1.0038, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.4114583333333333, |
| "grad_norm": 2.36690092086792, |
| "learning_rate": 0.0002999180317440705, |
| "loss": 1.1263, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 1.0674234628677368, |
| "learning_rate": 0.0002999138824982218, |
| "loss": 0.967, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.421875, |
| "grad_norm": 1.9884986877441406, |
| "learning_rate": 0.00029990963084155554, |
| "loss": 1.0438, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.4270833333333333, |
| "grad_norm": 2.576671838760376, |
| "learning_rate": 0.0002999052767769761, |
| "loss": 1.0245, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.4322916666666667, |
| "grad_norm": 1.039006233215332, |
| "learning_rate": 0.000299900820307458, |
| "loss": 1.0491, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.4375, |
| "grad_norm": 1.5377469062805176, |
| "learning_rate": 0.00029989626143604546, |
| "loss": 1.0123, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4427083333333333, |
| "grad_norm": 1.6944743394851685, |
| "learning_rate": 0.0002998916001658529, |
| "loss": 1.0236, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4479166666666667, |
| "grad_norm": 1.0246829986572266, |
| "learning_rate": 0.00029988683650006444, |
| "loss": 0.9965, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.453125, |
| "grad_norm": 1.4158473014831543, |
| "learning_rate": 0.0002998819704419345, |
| "loss": 0.9731, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.4583333333333333, |
| "grad_norm": 2.2339560985565186, |
| "learning_rate": 0.00029987700199478703, |
| "loss": 0.9871, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.4635416666666667, |
| "grad_norm": 1.7724788188934326, |
| "learning_rate": 0.0002998719311620162, |
| "loss": 1.0376, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.46875, |
| "grad_norm": 2.3797409534454346, |
| "learning_rate": 0.00029986675794708604, |
| "loss": 0.8429, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4739583333333333, |
| "grad_norm": 2.8000009059906006, |
| "learning_rate": 0.00029986148235353066, |
| "loss": 1.0252, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4791666666666667, |
| "grad_norm": 1.5784305334091187, |
| "learning_rate": 0.0002998561043849538, |
| "loss": 0.9626, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.484375, |
| "grad_norm": 1.0656737089157104, |
| "learning_rate": 0.00029985062404502935, |
| "loss": 0.9562, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.4895833333333333, |
| "grad_norm": 2.1506361961364746, |
| "learning_rate": 0.00029984504133750127, |
| "loss": 1.0785, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.4947916666666667, |
| "grad_norm": 1.3268401622772217, |
| "learning_rate": 0.0002998393562661831, |
| "loss": 0.9932, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.4154417514801025, |
| "learning_rate": 0.00029983356883495847, |
| "loss": 1.0169, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5052083333333334, |
| "grad_norm": 0.81435227394104, |
| "learning_rate": 0.0002998276790477811, |
| "loss": 0.9403, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5104166666666666, |
| "grad_norm": 3.3552396297454834, |
| "learning_rate": 0.0002998216869086744, |
| "loss": 1.0094, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.515625, |
| "grad_norm": 2.0080602169036865, |
| "learning_rate": 0.00029981559242173183, |
| "loss": 1.0351, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5208333333333334, |
| "grad_norm": 2.168379068374634, |
| "learning_rate": 0.00029980939559111667, |
| "loss": 1.0543, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5208333333333334, |
| "eval_f1_macro": 0.25250803933928845, |
| "eval_loss": 1.0556310415267944, |
| "eval_runtime": 4.9645, |
| "eval_samples_per_second": 616.981, |
| "eval_steps_per_second": 9.669, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5260416666666666, |
| "grad_norm": 2.4171462059020996, |
| "learning_rate": 0.0002998030964210621, |
| "loss": 1.0142, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.53125, |
| "grad_norm": 0.9183530211448669, |
| "learning_rate": 0.0002997966949158715, |
| "loss": 1.0285, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.5364583333333334, |
| "grad_norm": 2.74279522895813, |
| "learning_rate": 0.00029979019107991776, |
| "loss": 0.9122, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.5416666666666666, |
| "grad_norm": 1.0783967971801758, |
| "learning_rate": 0.00029978358491764386, |
| "loss": 1.0548, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.546875, |
| "grad_norm": 2.4839041233062744, |
| "learning_rate": 0.00029977687643356274, |
| "loss": 1.0375, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5520833333333334, |
| "grad_norm": 2.8750600814819336, |
| "learning_rate": 0.00029977006563225715, |
| "loss": 0.9208, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5572916666666666, |
| "grad_norm": 2.0126476287841797, |
| "learning_rate": 0.00029976315251837974, |
| "loss": 1.0759, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.5625, |
| "grad_norm": 2.5347306728363037, |
| "learning_rate": 0.00029975613709665313, |
| "loss": 1.1292, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.5677083333333334, |
| "grad_norm": 1.521519422531128, |
| "learning_rate": 0.00029974901937186965, |
| "loss": 0.9461, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.5729166666666666, |
| "grad_norm": 1.184241771697998, |
| "learning_rate": 0.0002997417993488917, |
| "loss": 0.9042, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.578125, |
| "grad_norm": 1.6956247091293335, |
| "learning_rate": 0.0002997344770326516, |
| "loss": 0.8928, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5833333333333334, |
| "grad_norm": 1.785886287689209, |
| "learning_rate": 0.00029972705242815135, |
| "loss": 0.9185, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5885416666666666, |
| "grad_norm": 0.9504479765892029, |
| "learning_rate": 0.0002997195255404629, |
| "loss": 0.9856, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.59375, |
| "grad_norm": 1.5911412239074707, |
| "learning_rate": 0.00029971189637472816, |
| "loss": 1.0633, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5989583333333334, |
| "grad_norm": 2.043400764465332, |
| "learning_rate": 0.00029970416493615884, |
| "loss": 1.1406, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6041666666666666, |
| "grad_norm": 2.781583547592163, |
| "learning_rate": 0.0002996963312300365, |
| "loss": 0.9831, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.609375, |
| "grad_norm": 1.1262691020965576, |
| "learning_rate": 0.0002996883952617127, |
| "loss": 1.0073, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6145833333333334, |
| "grad_norm": 1.5104336738586426, |
| "learning_rate": 0.00029968035703660853, |
| "loss": 1.1539, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6197916666666666, |
| "grad_norm": 1.1106146574020386, |
| "learning_rate": 0.0002996722165602153, |
| "loss": 1.0731, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 2.0642178058624268, |
| "learning_rate": 0.0002996639738380941, |
| "loss": 1.0645, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6302083333333334, |
| "grad_norm": 1.9791700839996338, |
| "learning_rate": 0.00029965562887587555, |
| "loss": 0.9938, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.6354166666666666, |
| "grad_norm": 0.9058408737182617, |
| "learning_rate": 0.0002996471816792605, |
| "loss": 0.9668, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.640625, |
| "grad_norm": 0.9904219508171082, |
| "learning_rate": 0.00029963863225401956, |
| "loss": 1.026, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.6458333333333334, |
| "grad_norm": 1.514210820198059, |
| "learning_rate": 0.00029962998060599293, |
| "loss": 0.9911, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.6510416666666666, |
| "grad_norm": 2.3045387268066406, |
| "learning_rate": 0.000299621226741091, |
| "loss": 1.0377, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.65625, |
| "grad_norm": 2.3845651149749756, |
| "learning_rate": 0.00029961237066529364, |
| "loss": 1.0142, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.6614583333333334, |
| "grad_norm": 2.677121877670288, |
| "learning_rate": 0.00029960341238465085, |
| "loss": 1.1228, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.26005220413208, |
| "learning_rate": 0.00029959435190528224, |
| "loss": 1.1243, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.671875, |
| "grad_norm": 0.929267406463623, |
| "learning_rate": 0.0002995851892333773, |
| "loss": 1.0841, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.6770833333333334, |
| "grad_norm": 1.5696921348571777, |
| "learning_rate": 0.0002995759243751953, |
| "loss": 1.0964, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6822916666666666, |
| "grad_norm": 0.9027417898178101, |
| "learning_rate": 0.00029956655733706547, |
| "loss": 1.022, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.6875, |
| "grad_norm": 0.9396100044250488, |
| "learning_rate": 0.00029955708812538665, |
| "loss": 1.0013, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.6927083333333334, |
| "grad_norm": 1.8517224788665771, |
| "learning_rate": 0.00029954751674662747, |
| "loss": 0.9586, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.6979166666666666, |
| "grad_norm": 1.3162637948989868, |
| "learning_rate": 0.0002995378432073266, |
| "loss": 1.0868, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.703125, |
| "grad_norm": 1.117018222808838, |
| "learning_rate": 0.00029952806751409227, |
| "loss": 0.9481, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7083333333333334, |
| "grad_norm": 0.8276170492172241, |
| "learning_rate": 0.0002995181896736025, |
| "loss": 0.8816, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.7135416666666666, |
| "grad_norm": 1.1518480777740479, |
| "learning_rate": 0.0002995082096926052, |
| "loss": 0.8526, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.71875, |
| "grad_norm": 1.3258408308029175, |
| "learning_rate": 0.000299498127577918, |
| "loss": 0.8629, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.7239583333333334, |
| "grad_norm": 0.8253046274185181, |
| "learning_rate": 0.0002994879433364283, |
| "loss": 0.9769, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.7291666666666666, |
| "grad_norm": 1.9821393489837646, |
| "learning_rate": 0.0002994776569750933, |
| "loss": 1.0081, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.734375, |
| "grad_norm": 1.014163613319397, |
| "learning_rate": 0.0002994672685009399, |
| "loss": 0.9894, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.7395833333333334, |
| "grad_norm": 1.311524510383606, |
| "learning_rate": 0.0002994567779210648, |
| "loss": 0.9449, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.7447916666666666, |
| "grad_norm": 1.3515123128890991, |
| "learning_rate": 0.00029944618524263436, |
| "loss": 1.0882, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.9710060954093933, |
| "learning_rate": 0.00029943549047288493, |
| "loss": 0.9491, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.7552083333333334, |
| "grad_norm": 1.8086128234863281, |
| "learning_rate": 0.0002994246936191223, |
| "loss": 1.1328, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.7604166666666666, |
| "grad_norm": 1.4990899562835693, |
| "learning_rate": 0.00029941379468872216, |
| "loss": 1.0128, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.765625, |
| "grad_norm": 0.9715232849121094, |
| "learning_rate": 0.0002994027936891299, |
| "loss": 0.9558, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.7708333333333334, |
| "grad_norm": 1.0712926387786865, |
| "learning_rate": 0.0002993916906278608, |
| "loss": 1.0808, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.7760416666666666, |
| "grad_norm": 2.822744131088257, |
| "learning_rate": 0.00029938048551249953, |
| "loss": 0.9373, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.78125, |
| "grad_norm": 1.8546404838562012, |
| "learning_rate": 0.00029936917835070065, |
| "loss": 1.065, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7864583333333334, |
| "grad_norm": 1.3934117555618286, |
| "learning_rate": 0.0002993577691501885, |
| "loss": 0.8912, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.7916666666666666, |
| "grad_norm": 1.9082303047180176, |
| "learning_rate": 0.00029934625791875713, |
| "loss": 0.9831, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.796875, |
| "grad_norm": 1.3178884983062744, |
| "learning_rate": 0.00029933464466427005, |
| "loss": 0.9402, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.8020833333333334, |
| "grad_norm": 1.6817083358764648, |
| "learning_rate": 0.0002993229293946607, |
| "loss": 1.2149, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.8072916666666666, |
| "grad_norm": 1.7874362468719482, |
| "learning_rate": 0.0002993111121179322, |
| "loss": 1.0213, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8125, |
| "grad_norm": 1.8898367881774902, |
| "learning_rate": 0.0002992991928421572, |
| "loss": 0.9575, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.8177083333333334, |
| "grad_norm": 1.1247788667678833, |
| "learning_rate": 0.00029928717157547824, |
| "loss": 0.9937, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.8229166666666666, |
| "grad_norm": 0.9836966395378113, |
| "learning_rate": 0.0002992750483261074, |
| "loss": 0.9639, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.828125, |
| "grad_norm": 1.373844861984253, |
| "learning_rate": 0.0002992628231023263, |
| "loss": 1.0198, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 1.1692348718643188, |
| "learning_rate": 0.00029925049591248657, |
| "loss": 1.0126, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8385416666666666, |
| "grad_norm": 1.5027955770492554, |
| "learning_rate": 0.00029923806676500917, |
| "loss": 1.0299, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.84375, |
| "grad_norm": 1.4094992876052856, |
| "learning_rate": 0.0002992255356683849, |
| "loss": 1.0137, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.8489583333333334, |
| "grad_norm": 2.50742769241333, |
| "learning_rate": 0.00029921290263117405, |
| "loss": 0.9327, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.8541666666666666, |
| "grad_norm": 2.115795850753784, |
| "learning_rate": 0.00029920016766200674, |
| "loss": 0.947, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.859375, |
| "grad_norm": 1.6246610879898071, |
| "learning_rate": 0.0002991873307695826, |
| "loss": 1.0292, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.8645833333333334, |
| "grad_norm": 1.1676617860794067, |
| "learning_rate": 0.0002991743919626708, |
| "loss": 1.0125, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.8697916666666666, |
| "grad_norm": 0.9825252890586853, |
| "learning_rate": 0.0002991613512501104, |
| "loss": 0.9619, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.8229219913482666, |
| "learning_rate": 0.00029914820864080987, |
| "loss": 0.9046, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.8802083333333334, |
| "grad_norm": 1.481568455696106, |
| "learning_rate": 0.00029913496414374726, |
| "loss": 1.0084, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.8854166666666666, |
| "grad_norm": 1.985333800315857, |
| "learning_rate": 0.00029912161776797036, |
| "loss": 1.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.890625, |
| "grad_norm": 1.7560663223266602, |
| "learning_rate": 0.0002991081695225965, |
| "loss": 1.1472, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.8958333333333334, |
| "grad_norm": 1.0130590200424194, |
| "learning_rate": 0.00029909461941681257, |
| "loss": 0.9337, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.9010416666666666, |
| "grad_norm": 1.1458450555801392, |
| "learning_rate": 0.000299080967459875, |
| "loss": 0.9239, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.90625, |
| "grad_norm": 2.193901777267456, |
| "learning_rate": 0.00029906721366111, |
| "loss": 0.9941, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.9114583333333334, |
| "grad_norm": 1.0150309801101685, |
| "learning_rate": 0.0002990533580299131, |
| "loss": 0.9181, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9166666666666666, |
| "grad_norm": 1.1026190519332886, |
| "learning_rate": 0.00029903940057574957, |
| "loss": 1.0907, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.921875, |
| "grad_norm": 1.014424443244934, |
| "learning_rate": 0.0002990253413081542, |
| "loss": 0.9963, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.9270833333333334, |
| "grad_norm": 2.157789945602417, |
| "learning_rate": 0.00029901118023673126, |
| "loss": 1.0478, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.9322916666666666, |
| "grad_norm": 1.51250422000885, |
| "learning_rate": 0.0002989969173711546, |
| "loss": 1.0496, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 2.3087375164031982, |
| "learning_rate": 0.0002989825527211677, |
| "loss": 0.9566, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9427083333333334, |
| "grad_norm": 2.373321294784546, |
| "learning_rate": 0.0002989680862965835, |
| "loss": 0.9643, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.9479166666666666, |
| "grad_norm": 0.9937634468078613, |
| "learning_rate": 0.00029895351810728437, |
| "loss": 1.0283, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.953125, |
| "grad_norm": 1.3540972471237183, |
| "learning_rate": 0.0002989388481632224, |
| "loss": 1.0127, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.9583333333333334, |
| "grad_norm": 0.8663268685340881, |
| "learning_rate": 0.0002989240764744189, |
| "loss": 0.9014, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.9635416666666666, |
| "grad_norm": 1.247704029083252, |
| "learning_rate": 0.00029890920305096513, |
| "loss": 1.0444, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.96875, |
| "grad_norm": 1.2338781356811523, |
| "learning_rate": 0.0002988942279030214, |
| "loss": 0.9622, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.9739583333333334, |
| "grad_norm": 1.9096918106079102, |
| "learning_rate": 0.00029887915104081776, |
| "loss": 1.0125, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.9791666666666666, |
| "grad_norm": 1.1534987688064575, |
| "learning_rate": 0.0002988639724746536, |
| "loss": 0.9994, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.984375, |
| "grad_norm": 1.740694522857666, |
| "learning_rate": 0.00029884869221489803, |
| "loss": 1.0453, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.9895833333333334, |
| "grad_norm": 1.186547040939331, |
| "learning_rate": 0.00029883331027198934, |
| "loss": 1.0409, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9947916666666666, |
| "grad_norm": 1.1369073390960693, |
| "learning_rate": 0.00029881782665643547, |
| "loss": 0.8663, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.2923506498336792, |
| "learning_rate": 0.0002988022413788137, |
| "loss": 1.0748, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.0052083333333333, |
| "grad_norm": 0.8358171582221985, |
| "learning_rate": 0.00029878655444977087, |
| "loss": 0.9383, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.0104166666666667, |
| "grad_norm": 0.9785019159317017, |
| "learning_rate": 0.0002987707658800232, |
| "loss": 1.0312, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.015625, |
| "grad_norm": 2.520554542541504, |
| "learning_rate": 0.0002987548756803564, |
| "loss": 0.9177, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.0208333333333333, |
| "grad_norm": 0.9842451810836792, |
| "learning_rate": 0.0002987388838616254, |
| "loss": 0.9256, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.0260416666666667, |
| "grad_norm": 1.3091936111450195, |
| "learning_rate": 0.00029872279043475483, |
| "loss": 0.95, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.03125, |
| "grad_norm": 1.2800310850143433, |
| "learning_rate": 0.0002987065954107386, |
| "loss": 0.9659, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.0364583333333333, |
| "grad_norm": 1.0626246929168701, |
| "learning_rate": 0.00029869029880064, |
| "loss": 1.0586, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.0416666666666667, |
| "grad_norm": 1.0719517469406128, |
| "learning_rate": 0.00029867390061559177, |
| "loss": 1.0079, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0416666666666667, |
| "eval_f1_macro": 0.25812201476359903, |
| "eval_loss": 1.0145524740219116, |
| "eval_runtime": 4.9751, |
| "eval_samples_per_second": 615.661, |
| "eval_steps_per_second": 9.648, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.046875, |
| "grad_norm": 1.501737117767334, |
| "learning_rate": 0.00029865740086679605, |
| "loss": 0.9748, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.0520833333333333, |
| "grad_norm": 1.8237347602844238, |
| "learning_rate": 0.0002986407995655242, |
| "loss": 0.9762, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.0572916666666667, |
| "grad_norm": 1.9297834634780884, |
| "learning_rate": 0.0002986240967231173, |
| "loss": 1.0243, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.0625, |
| "grad_norm": 0.92362380027771, |
| "learning_rate": 0.0002986072923509854, |
| "loss": 1.0366, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.0677083333333333, |
| "grad_norm": 1.632373332977295, |
| "learning_rate": 0.0002985903864606082, |
| "loss": 0.961, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.0729166666666667, |
| "grad_norm": 0.9494464993476868, |
| "learning_rate": 0.00029857337906353447, |
| "loss": 1.0624, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.078125, |
| "grad_norm": 1.1524139642715454, |
| "learning_rate": 0.00029855627017138267, |
| "loss": 1.0251, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.0833333333333333, |
| "grad_norm": 2.085423231124878, |
| "learning_rate": 0.0002985390597958403, |
| "loss": 0.9033, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.0885416666666667, |
| "grad_norm": 1.8887369632720947, |
| "learning_rate": 0.0002985217479486644, |
| "loss": 0.9907, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.09375, |
| "grad_norm": 3.6027991771698, |
| "learning_rate": 0.0002985043346416812, |
| "loss": 1.1848, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.0989583333333333, |
| "grad_norm": 1.0362842082977295, |
| "learning_rate": 0.00029848681988678616, |
| "loss": 0.9025, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.1041666666666667, |
| "grad_norm": 0.8222612738609314, |
| "learning_rate": 0.0002984692036959443, |
| "loss": 0.9347, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.109375, |
| "grad_norm": 2.0442848205566406, |
| "learning_rate": 0.0002984514860811897, |
| "loss": 1.132, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.1145833333333333, |
| "grad_norm": 1.191304087638855, |
| "learning_rate": 0.00029843366705462586, |
| "loss": 0.9808, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.1197916666666667, |
| "grad_norm": 1.925605297088623, |
| "learning_rate": 0.00029841574662842543, |
| "loss": 0.8746, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 1.50774085521698, |
| "learning_rate": 0.0002983977248148305, |
| "loss": 0.9299, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.1302083333333333, |
| "grad_norm": 2.7488646507263184, |
| "learning_rate": 0.00029837960162615225, |
| "loss": 0.9792, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.1354166666666667, |
| "grad_norm": 0.8560806512832642, |
| "learning_rate": 0.0002983613770747713, |
| "loss": 0.9866, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.140625, |
| "grad_norm": 0.8255361914634705, |
| "learning_rate": 0.0002983430511731373, |
| "loss": 0.988, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.1458333333333333, |
| "grad_norm": 1.8752880096435547, |
| "learning_rate": 0.0002983246239337692, |
| "loss": 0.9489, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1510416666666667, |
| "grad_norm": 1.3407621383666992, |
| "learning_rate": 0.00029830609536925543, |
| "loss": 1.0244, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.15625, |
| "grad_norm": 1.4292068481445312, |
| "learning_rate": 0.0002982874654922532, |
| "loss": 0.995, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.1614583333333333, |
| "grad_norm": 1.4803357124328613, |
| "learning_rate": 0.0002982687343154893, |
| "loss": 0.9455, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.1666666666666667, |
| "grad_norm": 1.7722859382629395, |
| "learning_rate": 0.00029824990185175956, |
| "loss": 0.9612, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.171875, |
| "grad_norm": 1.9795076847076416, |
| "learning_rate": 0.00029823096811392894, |
| "loss": 0.9507, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.1770833333333333, |
| "grad_norm": 1.4444527626037598, |
| "learning_rate": 0.0002982119331149317, |
| "loss": 0.8944, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.1822916666666667, |
| "grad_norm": 1.160965919494629, |
| "learning_rate": 0.0002981927968677712, |
| "loss": 0.973, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.1875, |
| "grad_norm": 2.1548776626586914, |
| "learning_rate": 0.00029817355938552016, |
| "loss": 0.8621, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.1927083333333333, |
| "grad_norm": 2.7744781970977783, |
| "learning_rate": 0.00029815422068132013, |
| "loss": 1.0788, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.1979166666666667, |
| "grad_norm": 0.9514101147651672, |
| "learning_rate": 0.000298134780768382, |
| "loss": 0.9885, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.203125, |
| "grad_norm": 2.4913086891174316, |
| "learning_rate": 0.00029811523965998587, |
| "loss": 0.9986, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.2083333333333333, |
| "grad_norm": 1.27024507522583, |
| "learning_rate": 0.0002980955973694808, |
| "loss": 0.9789, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.2135416666666667, |
| "grad_norm": 2.916783332824707, |
| "learning_rate": 0.0002980758539102851, |
| "loss": 0.9847, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.21875, |
| "grad_norm": 1.988237977027893, |
| "learning_rate": 0.0002980560092958861, |
| "loss": 0.981, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.2239583333333333, |
| "grad_norm": 2.7537686824798584, |
| "learning_rate": 0.0002980360635398403, |
| "loss": 0.958, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.2291666666666667, |
| "grad_norm": 1.0258450508117676, |
| "learning_rate": 0.00029801601665577313, |
| "loss": 0.9996, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.234375, |
| "grad_norm": 0.9269573092460632, |
| "learning_rate": 0.00029799586865737943, |
| "loss": 0.9585, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.2395833333333333, |
| "grad_norm": 1.3605316877365112, |
| "learning_rate": 0.00029797561955842287, |
| "loss": 1.0105, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.2447916666666667, |
| "grad_norm": 1.7195180654525757, |
| "learning_rate": 0.00029795526937273617, |
| "loss": 0.944, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 1.744049072265625, |
| "learning_rate": 0.0002979348181142212, |
| "loss": 0.9515, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2552083333333333, |
| "grad_norm": 1.1699268817901611, |
| "learning_rate": 0.00029791426579684887, |
| "loss": 0.9891, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.2604166666666667, |
| "grad_norm": 1.360535979270935, |
| "learning_rate": 0.00029789361243465906, |
| "loss": 0.8403, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.265625, |
| "grad_norm": 0.8625591993331909, |
| "learning_rate": 0.0002978728580417608, |
| "loss": 0.9455, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.2708333333333333, |
| "grad_norm": 2.204169988632202, |
| "learning_rate": 0.000297852002632332, |
| "loss": 0.9276, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.2760416666666667, |
| "grad_norm": 0.9667099118232727, |
| "learning_rate": 0.0002978310462206197, |
| "loss": 0.9072, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.28125, |
| "grad_norm": 2.535550355911255, |
| "learning_rate": 0.00029780998882093974, |
| "loss": 0.996, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.2864583333333333, |
| "grad_norm": 1.4619113206863403, |
| "learning_rate": 0.0002977888304476772, |
| "loss": 0.8931, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.2916666666666667, |
| "grad_norm": 1.7474439144134521, |
| "learning_rate": 0.000297767571115286, |
| "loss": 1.0628, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.296875, |
| "grad_norm": 1.0162748098373413, |
| "learning_rate": 0.00029774621083828904, |
| "loss": 0.8972, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.3020833333333333, |
| "grad_norm": 1.7343775033950806, |
| "learning_rate": 0.0002977247496312782, |
| "loss": 0.9162, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3072916666666667, |
| "grad_norm": 1.19266939163208, |
| "learning_rate": 0.0002977031875089142, |
| "loss": 1.0321, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.3125, |
| "grad_norm": 1.0502628087997437, |
| "learning_rate": 0.0002976815244859269, |
| "loss": 0.961, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.3177083333333333, |
| "grad_norm": 1.0067213773727417, |
| "learning_rate": 0.000297659760577115, |
| "loss": 0.9911, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.3229166666666667, |
| "grad_norm": 1.1698657274246216, |
| "learning_rate": 0.000297637895797346, |
| "loss": 0.9686, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.328125, |
| "grad_norm": 1.0224770307540894, |
| "learning_rate": 0.0002976159301615565, |
| "loss": 1.0666, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 2.009638786315918, |
| "learning_rate": 0.00029759386368475183, |
| "loss": 0.9714, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.3385416666666667, |
| "grad_norm": 1.8785254955291748, |
| "learning_rate": 0.00029757169638200637, |
| "loss": 0.9342, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.34375, |
| "grad_norm": 1.7755099534988403, |
| "learning_rate": 0.0002975494282684632, |
| "loss": 1.029, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.3489583333333333, |
| "grad_norm": 1.7360833883285522, |
| "learning_rate": 0.0002975270593593344, |
| "loss": 0.9465, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.3541666666666667, |
| "grad_norm": 1.9437713623046875, |
| "learning_rate": 0.00029750458966990094, |
| "loss": 0.8847, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.359375, |
| "grad_norm": 0.8070132732391357, |
| "learning_rate": 0.00029748201921551244, |
| "loss": 0.9494, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.3645833333333333, |
| "grad_norm": 1.0000035762786865, |
| "learning_rate": 0.0002974593480115875, |
| "loss": 1.1494, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.3697916666666667, |
| "grad_norm": 1.0765668153762817, |
| "learning_rate": 0.00029743657607361355, |
| "loss": 0.9309, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 2.3590445518493652, |
| "learning_rate": 0.0002974137034171468, |
| "loss": 1.0352, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.3802083333333333, |
| "grad_norm": 1.5572516918182373, |
| "learning_rate": 0.00029739073005781235, |
| "loss": 0.8954, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.3854166666666667, |
| "grad_norm": 1.171444058418274, |
| "learning_rate": 0.00029736765601130386, |
| "loss": 0.9485, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.390625, |
| "grad_norm": 2.3477118015289307, |
| "learning_rate": 0.000297344481293384, |
| "loss": 1.1708, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.3958333333333333, |
| "grad_norm": 1.123369574546814, |
| "learning_rate": 0.00029732120591988414, |
| "loss": 0.9665, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.4010416666666667, |
| "grad_norm": 1.778559923171997, |
| "learning_rate": 0.00029729782990670447, |
| "loss": 0.9576, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.40625, |
| "grad_norm": 1.516681432723999, |
| "learning_rate": 0.0002972743532698138, |
| "loss": 0.9646, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.4114583333333333, |
| "grad_norm": 2.350296974182129, |
| "learning_rate": 0.0002972507760252497, |
| "loss": 0.9876, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.4166666666666667, |
| "grad_norm": 1.2134095430374146, |
| "learning_rate": 0.00029722709818911857, |
| "loss": 0.9293, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.421875, |
| "grad_norm": 0.9792975187301636, |
| "learning_rate": 0.0002972033197775955, |
| "loss": 0.8713, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.4270833333333333, |
| "grad_norm": 2.348506212234497, |
| "learning_rate": 0.00029717944080692423, |
| "loss": 0.8795, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.4322916666666667, |
| "grad_norm": 1.6565791368484497, |
| "learning_rate": 0.00029715546129341727, |
| "loss": 1.04, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.4375, |
| "grad_norm": 1.3527671098709106, |
| "learning_rate": 0.00029713138125345563, |
| "loss": 0.9107, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.4427083333333333, |
| "grad_norm": 1.1875962018966675, |
| "learning_rate": 0.0002971072007034893, |
| "loss": 0.8781, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.4479166666666667, |
| "grad_norm": 0.9425337910652161, |
| "learning_rate": 0.0002970829196600367, |
| "loss": 0.9727, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.453125, |
| "grad_norm": 1.5304170846939087, |
| "learning_rate": 0.00029705853813968494, |
| "loss": 0.966, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.4583333333333333, |
| "grad_norm": 1.051808476448059, |
| "learning_rate": 0.00029703405615908975, |
| "loss": 1.0545, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4635416666666667, |
| "grad_norm": 0.7866876721382141, |
| "learning_rate": 0.00029700947373497566, |
| "loss": 0.924, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.46875, |
| "grad_norm": 0.7926193475723267, |
| "learning_rate": 0.0002969847908841356, |
| "loss": 0.8782, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.4739583333333333, |
| "grad_norm": 2.334317684173584, |
| "learning_rate": 0.0002969600076234312, |
| "loss": 1.1138, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.4791666666666667, |
| "grad_norm": 1.3602794408798218, |
| "learning_rate": 0.00029693512396979267, |
| "loss": 1.0573, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.484375, |
| "grad_norm": 2.091019630432129, |
| "learning_rate": 0.0002969101399402188, |
| "loss": 1.0804, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.4895833333333333, |
| "grad_norm": 2.2692272663116455, |
| "learning_rate": 0.000296885055551777, |
| "loss": 1.032, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.4947916666666667, |
| "grad_norm": 1.062971830368042, |
| "learning_rate": 0.00029685987082160315, |
| "loss": 0.9472, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.497585654258728, |
| "learning_rate": 0.00029683458576690167, |
| "loss": 0.9739, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.5052083333333335, |
| "grad_norm": 1.1266616582870483, |
| "learning_rate": 0.0002968092004049457, |
| "loss": 0.894, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.5104166666666665, |
| "grad_norm": 1.3249938488006592, |
| "learning_rate": 0.0002967837147530766, |
| "loss": 0.9985, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.515625, |
| "grad_norm": 1.330154299736023, |
| "learning_rate": 0.00029675812882870453, |
| "loss": 1.0062, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.5208333333333335, |
| "grad_norm": 1.4275290966033936, |
| "learning_rate": 0.000296732442649308, |
| "loss": 1.0334, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.5260416666666665, |
| "grad_norm": 1.7656426429748535, |
| "learning_rate": 0.0002967066562324341, |
| "loss": 0.8841, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.53125, |
| "grad_norm": 1.3839107751846313, |
| "learning_rate": 0.00029668076959569816, |
| "loss": 0.9068, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.5364583333333335, |
| "grad_norm": 1.6695129871368408, |
| "learning_rate": 0.0002966547827567843, |
| "loss": 1.0085, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.5416666666666665, |
| "grad_norm": 1.7697908878326416, |
| "learning_rate": 0.0002966286957334449, |
| "loss": 1.0159, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.546875, |
| "grad_norm": 1.9005810022354126, |
| "learning_rate": 0.0002966025085435007, |
| "loss": 1.1423, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.5520833333333335, |
| "grad_norm": 1.1246470212936401, |
| "learning_rate": 0.0002965762212048411, |
| "loss": 1.0651, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.5572916666666665, |
| "grad_norm": 1.324097752571106, |
| "learning_rate": 0.0002965498337354238, |
| "loss": 0.9872, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.5625, |
| "grad_norm": 1.2919105291366577, |
| "learning_rate": 0.0002965233461532749, |
| "loss": 1.1084, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5625, |
| "eval_f1_macro": 0.2502048667418363, |
| "eval_loss": 1.0074748992919922, |
| "eval_runtime": 4.9655, |
| "eval_samples_per_second": 616.861, |
| "eval_steps_per_second": 9.667, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5677083333333335, |
| "grad_norm": 1.5591518878936768, |
| "learning_rate": 0.0002964967584764888, |
| "loss": 1.0566, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.5729166666666665, |
| "grad_norm": 1.0953510999679565, |
| "learning_rate": 0.0002964700707232284, |
| "loss": 0.9199, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.578125, |
| "grad_norm": 2.0232112407684326, |
| "learning_rate": 0.000296443282911725, |
| "loss": 0.9589, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.5833333333333335, |
| "grad_norm": 1.7497212886810303, |
| "learning_rate": 0.0002964163950602781, |
| "loss": 0.9374, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.5885416666666665, |
| "grad_norm": 1.2291638851165771, |
| "learning_rate": 0.0002963894071872557, |
| "loss": 0.9255, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.59375, |
| "grad_norm": 1.224286675453186, |
| "learning_rate": 0.00029636231931109394, |
| "loss": 0.8855, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.5989583333333335, |
| "grad_norm": 0.942564845085144, |
| "learning_rate": 0.00029633513145029753, |
| "loss": 0.9122, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.6041666666666665, |
| "grad_norm": 1.294308066368103, |
| "learning_rate": 0.0002963078436234392, |
| "loss": 0.9245, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.609375, |
| "grad_norm": 1.5455310344696045, |
| "learning_rate": 0.00029628045584916023, |
| "loss": 0.9942, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.6145833333333335, |
| "grad_norm": 1.5446116924285889, |
| "learning_rate": 0.00029625296814616994, |
| "loss": 0.8775, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6197916666666665, |
| "grad_norm": 1.1263961791992188, |
| "learning_rate": 0.0002962253805332462, |
| "loss": 0.9871, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 1.2371349334716797, |
| "learning_rate": 0.0002961976930292348, |
| "loss": 0.9276, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.6302083333333335, |
| "grad_norm": 0.8237337470054626, |
| "learning_rate": 0.00029616990565305, |
| "loss": 0.946, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.6354166666666665, |
| "grad_norm": 1.7117730379104614, |
| "learning_rate": 0.0002961420184236742, |
| "loss": 0.9775, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.640625, |
| "grad_norm": 1.6812770366668701, |
| "learning_rate": 0.00029611403136015806, |
| "loss": 1.0382, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.6458333333333335, |
| "grad_norm": 0.8278731107711792, |
| "learning_rate": 0.0002960859444816204, |
| "loss": 0.9287, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.6510416666666665, |
| "grad_norm": 2.0879504680633545, |
| "learning_rate": 0.0002960577578072482, |
| "loss": 0.9333, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.65625, |
| "grad_norm": 1.746490240097046, |
| "learning_rate": 0.00029602947135629667, |
| "loss": 0.9811, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.6614583333333335, |
| "grad_norm": 1.3608087301254272, |
| "learning_rate": 0.00029600108514808916, |
| "loss": 0.9406, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 1.5564864873886108, |
| "learning_rate": 0.0002959725992020172, |
| "loss": 0.9249, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.671875, |
| "grad_norm": 1.7467681169509888, |
| "learning_rate": 0.0002959440135375404, |
| "loss": 1.0743, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.6770833333333335, |
| "grad_norm": 1.5353367328643799, |
| "learning_rate": 0.0002959153281741865, |
| "loss": 0.8625, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.6822916666666665, |
| "grad_norm": 1.6767648458480835, |
| "learning_rate": 0.0002958865431315514, |
| "loss": 0.9652, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.6875, |
| "grad_norm": 2.0083630084991455, |
| "learning_rate": 0.00029585765842929894, |
| "loss": 1.0019, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.6927083333333335, |
| "grad_norm": 0.8608649373054504, |
| "learning_rate": 0.0002958286740871613, |
| "loss": 0.9335, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.6979166666666665, |
| "grad_norm": 1.1160967350006104, |
| "learning_rate": 0.00029579959012493857, |
| "loss": 1.0132, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.703125, |
| "grad_norm": 1.6392353773117065, |
| "learning_rate": 0.00029577040656249883, |
| "loss": 0.9826, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.7083333333333335, |
| "grad_norm": 1.2190186977386475, |
| "learning_rate": 0.0002957411234197782, |
| "loss": 0.9809, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.7135416666666665, |
| "grad_norm": 1.574937343597412, |
| "learning_rate": 0.00029571174071678114, |
| "loss": 1.0064, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.71875, |
| "grad_norm": 1.1905817985534668, |
| "learning_rate": 0.00029568225847357974, |
| "loss": 0.9704, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7239583333333335, |
| "grad_norm": 1.3929685354232788, |
| "learning_rate": 0.0002956526767103142, |
| "loss": 1.0101, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.7291666666666665, |
| "grad_norm": 1.005627155303955, |
| "learning_rate": 0.0002956229954471928, |
| "loss": 1.0353, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.734375, |
| "grad_norm": 0.924006462097168, |
| "learning_rate": 0.00029559321470449173, |
| "loss": 0.9333, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.7395833333333335, |
| "grad_norm": 1.2514586448669434, |
| "learning_rate": 0.0002955633345025551, |
| "loss": 1.0038, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.7447916666666665, |
| "grad_norm": 0.7907490730285645, |
| "learning_rate": 0.000295533354861795, |
| "loss": 0.8886, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.0092335939407349, |
| "learning_rate": 0.0002955032758026915, |
| "loss": 0.9163, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.7552083333333335, |
| "grad_norm": 1.610192894935608, |
| "learning_rate": 0.00029547309734579254, |
| "loss": 0.978, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.7604166666666665, |
| "grad_norm": 1.259249210357666, |
| "learning_rate": 0.0002954428195117139, |
| "loss": 0.972, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.765625, |
| "grad_norm": 0.9588133096694946, |
| "learning_rate": 0.00029541244232113935, |
| "loss": 0.9098, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.7708333333333335, |
| "grad_norm": 0.9365316033363342, |
| "learning_rate": 0.00029538196579482047, |
| "loss": 0.8464, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.7760416666666665, |
| "grad_norm": 1.1950703859329224, |
| "learning_rate": 0.0002953513899535767, |
| "loss": 1.022, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.78125, |
| "grad_norm": 2.4652135372161865, |
| "learning_rate": 0.0002953207148182954, |
| "loss": 1.0181, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.7864583333333335, |
| "grad_norm": 2.3812384605407715, |
| "learning_rate": 0.0002952899404099317, |
| "loss": 0.9667, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.7916666666666665, |
| "grad_norm": 1.3816120624542236, |
| "learning_rate": 0.00029525906674950845, |
| "loss": 0.9298, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.796875, |
| "grad_norm": 1.103812336921692, |
| "learning_rate": 0.00029522809385811656, |
| "loss": 0.9019, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.8020833333333335, |
| "grad_norm": 1.0140377283096313, |
| "learning_rate": 0.0002951970217569145, |
| "loss": 0.9144, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.8072916666666665, |
| "grad_norm": 1.3824679851531982, |
| "learning_rate": 0.00029516585046712854, |
| "loss": 0.918, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.8125, |
| "grad_norm": 1.8375003337860107, |
| "learning_rate": 0.0002951345800100528, |
| "loss": 0.8817, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.8177083333333335, |
| "grad_norm": 1.3797621726989746, |
| "learning_rate": 0.0002951032104070491, |
| "loss": 0.9289, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.8229166666666665, |
| "grad_norm": 0.9739082455635071, |
| "learning_rate": 0.000295071741679547, |
| "loss": 0.9645, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.828125, |
| "grad_norm": 1.0602864027023315, |
| "learning_rate": 0.00029504017384904366, |
| "loss": 0.9788, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.8333333333333335, |
| "grad_norm": 2.1750597953796387, |
| "learning_rate": 0.0002950085069371042, |
| "loss": 0.9688, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.8385416666666665, |
| "grad_norm": 1.4753605127334595, |
| "learning_rate": 0.0002949767409653612, |
| "loss": 1.0414, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.84375, |
| "grad_norm": 1.687851071357727, |
| "learning_rate": 0.00029494487595551494, |
| "loss": 0.9738, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.8489583333333335, |
| "grad_norm": 2.179429054260254, |
| "learning_rate": 0.0002949129119293334, |
| "loss": 0.9774, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.8541666666666665, |
| "grad_norm": 3.0312469005584717, |
| "learning_rate": 0.00029488084890865227, |
| "loss": 1.105, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.859375, |
| "grad_norm": 0.8521849513053894, |
| "learning_rate": 0.0002948486869153747, |
| "loss": 0.9677, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.8645833333333335, |
| "grad_norm": 1.4378811120986938, |
| "learning_rate": 0.00029481642597147163, |
| "loss": 1.0367, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.8697916666666665, |
| "grad_norm": 2.9494237899780273, |
| "learning_rate": 0.0002947840660989815, |
| "loss": 1.1472, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 2.496962547302246, |
| "learning_rate": 0.00029475160732001025, |
| "loss": 1.0401, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.8802083333333335, |
| "grad_norm": 2.738942861557007, |
| "learning_rate": 0.00029471904965673156, |
| "loss": 0.9948, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.8854166666666665, |
| "grad_norm": 2.0154058933258057, |
| "learning_rate": 0.00029468639313138655, |
| "loss": 0.9398, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.890625, |
| "grad_norm": 2.7557668685913086, |
| "learning_rate": 0.000294653637766284, |
| "loss": 0.9944, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.8958333333333335, |
| "grad_norm": 2.0527195930480957, |
| "learning_rate": 0.00029462078358379995, |
| "loss": 0.9773, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.9010416666666665, |
| "grad_norm": 1.1843440532684326, |
| "learning_rate": 0.0002945878306063782, |
| "loss": 1.0659, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.90625, |
| "grad_norm": 1.2736120223999023, |
| "learning_rate": 0.0002945547788565299, |
| "loss": 0.9566, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.9114583333333335, |
| "grad_norm": 1.2100763320922852, |
| "learning_rate": 0.0002945216283568338, |
| "loss": 0.8795, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.9166666666666665, |
| "grad_norm": 3.235872745513916, |
| "learning_rate": 0.00029448837912993597, |
| "loss": 0.9803, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.921875, |
| "grad_norm": 1.9371110200881958, |
| "learning_rate": 0.00029445503119855004, |
| "loss": 0.9312, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.9270833333333335, |
| "grad_norm": 1.730183720588684, |
| "learning_rate": 0.000294421584585457, |
| "loss": 0.9805, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.9322916666666665, |
| "grad_norm": 2.59682559967041, |
| "learning_rate": 0.00029438803931350517, |
| "loss": 0.9081, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.9375, |
| "grad_norm": 2.336057662963867, |
| "learning_rate": 0.0002943543954056105, |
| "loss": 0.9667, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.9427083333333335, |
| "grad_norm": 1.2367947101593018, |
| "learning_rate": 0.0002943206528847562, |
| "loss": 0.8969, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.9479166666666665, |
| "grad_norm": 1.756821870803833, |
| "learning_rate": 0.0002942868117739927, |
| "loss": 1.0679, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.953125, |
| "grad_norm": 2.854928970336914, |
| "learning_rate": 0.0002942528720964379, |
| "loss": 0.9556, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.9583333333333335, |
| "grad_norm": 1.5611786842346191, |
| "learning_rate": 0.00029421883387527727, |
| "loss": 0.9275, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.9635416666666665, |
| "grad_norm": 2.214402914047241, |
| "learning_rate": 0.00029418469713376317, |
| "loss": 0.8831, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.96875, |
| "grad_norm": 1.249422311782837, |
| "learning_rate": 0.0002941504618952155, |
| "loss": 0.892, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.9739583333333335, |
| "grad_norm": 2.153195381164551, |
| "learning_rate": 0.00029411612818302146, |
| "loss": 0.8771, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.9791666666666665, |
| "grad_norm": 1.1903241872787476, |
| "learning_rate": 0.00029408169602063546, |
| "loss": 0.9691, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.984375, |
| "grad_norm": 1.2506940364837646, |
| "learning_rate": 0.0002940471654315791, |
| "loss": 0.9917, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.9895833333333335, |
| "grad_norm": 1.5205382108688354, |
| "learning_rate": 0.00029401253643944144, |
| "loss": 0.9588, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.9947916666666665, |
| "grad_norm": 0.8318743705749512, |
| "learning_rate": 0.0002939778090678785, |
| "loss": 0.9663, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.4195784330368042, |
| "learning_rate": 0.00029394298334061363, |
| "loss": 0.9935, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.0052083333333335, |
| "grad_norm": 1.631840705871582, |
| "learning_rate": 0.0002939080592814374, |
| "loss": 0.969, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.0104166666666665, |
| "grad_norm": 1.9001116752624512, |
| "learning_rate": 0.0002938730369142076, |
| "loss": 0.9961, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.015625, |
| "grad_norm": 1.2098877429962158, |
| "learning_rate": 0.0002938379162628489, |
| "loss": 0.8887, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.0208333333333335, |
| "grad_norm": 0.856966495513916, |
| "learning_rate": 0.0002938026973513534, |
| "loss": 0.9572, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.0260416666666665, |
| "grad_norm": 1.1199861764907837, |
| "learning_rate": 0.0002937673802037803, |
| "loss": 0.8854, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.03125, |
| "grad_norm": 1.6614012718200684, |
| "learning_rate": 0.00029373196484425574, |
| "loss": 0.9032, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.0364583333333335, |
| "grad_norm": 1.8955625295639038, |
| "learning_rate": 0.00029369645129697313, |
| "loss": 0.8382, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.0416666666666665, |
| "grad_norm": 3.055558204650879, |
| "learning_rate": 0.00029366083958619273, |
| "loss": 1.0119, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.046875, |
| "grad_norm": 1.1816325187683105, |
| "learning_rate": 0.0002936251297362422, |
| "loss": 0.9669, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.0520833333333335, |
| "grad_norm": 1.4919461011886597, |
| "learning_rate": 0.0002935893217715159, |
| "loss": 0.9906, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.0572916666666665, |
| "grad_norm": 1.6824826002120972, |
| "learning_rate": 0.0002935534157164754, |
| "loss": 0.9215, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.0625, |
| "grad_norm": 1.937314510345459, |
| "learning_rate": 0.00029351741159564925, |
| "loss": 0.9264, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.0677083333333335, |
| "grad_norm": 1.0067870616912842, |
| "learning_rate": 0.000293481309433633, |
| "loss": 0.9302, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.0729166666666665, |
| "grad_norm": 1.1681950092315674, |
| "learning_rate": 0.0002934451092550891, |
| "loss": 0.9472, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.078125, |
| "grad_norm": 1.580079197883606, |
| "learning_rate": 0.0002934088110847471, |
| "loss": 0.9831, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.0833333333333335, |
| "grad_norm": 1.843764305114746, |
| "learning_rate": 0.0002933724149474033, |
| "loss": 0.9891, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0833333333333335, |
| "eval_f1_macro": 0.2551037670149762, |
| "eval_loss": 1.0003770589828491, |
| "eval_runtime": 4.9747, |
| "eval_samples_per_second": 615.711, |
| "eval_steps_per_second": 9.649, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0885416666666665, |
| "grad_norm": 1.5101807117462158, |
| "learning_rate": 0.00029333592086792107, |
| "loss": 0.9313, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.09375, |
| "grad_norm": 1.4258942604064941, |
| "learning_rate": 0.00029329932887123066, |
| "loss": 0.9512, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.0989583333333335, |
| "grad_norm": 1.745343565940857, |
| "learning_rate": 0.00029326263898232926, |
| "loss": 0.8936, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.1041666666666665, |
| "grad_norm": 1.0738366842269897, |
| "learning_rate": 0.0002932258512262807, |
| "loss": 0.9914, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.109375, |
| "grad_norm": 1.6219918727874756, |
| "learning_rate": 0.000293188965628216, |
| "loss": 0.8846, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.1145833333333335, |
| "grad_norm": 1.3136440515518188, |
| "learning_rate": 0.0002931519822133328, |
| "loss": 0.8951, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.1197916666666665, |
| "grad_norm": 1.1748030185699463, |
| "learning_rate": 0.0002931149010068956, |
| "loss": 0.8768, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.125, |
| "grad_norm": 1.7310854196548462, |
| "learning_rate": 0.00029307772203423566, |
| "loss": 0.8647, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.1302083333333335, |
| "grad_norm": 1.2103936672210693, |
| "learning_rate": 0.0002930404453207512, |
| "loss": 0.9474, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.1354166666666665, |
| "grad_norm": 1.0020298957824707, |
| "learning_rate": 0.00029300307089190705, |
| "loss": 0.9751, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.140625, |
| "grad_norm": 1.4418237209320068, |
| "learning_rate": 0.0002929655987732349, |
| "loss": 1.0099, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.1458333333333335, |
| "grad_norm": 1.3294477462768555, |
| "learning_rate": 0.000292928028990333, |
| "loss": 0.9743, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.1510416666666665, |
| "grad_norm": 1.4592185020446777, |
| "learning_rate": 0.00029289036156886656, |
| "loss": 0.8666, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.15625, |
| "grad_norm": 1.8001863956451416, |
| "learning_rate": 0.0002928525965345672, |
| "loss": 0.9713, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.1614583333333335, |
| "grad_norm": 1.4137237071990967, |
| "learning_rate": 0.00029281473391323354, |
| "loss": 0.9245, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.1666666666666665, |
| "grad_norm": 1.6174209117889404, |
| "learning_rate": 0.00029277677373073065, |
| "loss": 0.9642, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.171875, |
| "grad_norm": 1.048624038696289, |
| "learning_rate": 0.00029273871601299023, |
| "loss": 0.9405, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.1770833333333335, |
| "grad_norm": 1.1521005630493164, |
| "learning_rate": 0.00029270056078601085, |
| "loss": 0.9587, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.1822916666666665, |
| "grad_norm": 1.7807267904281616, |
| "learning_rate": 0.0002926623080758574, |
| "loss": 1.009, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.1875, |
| "grad_norm": 0.8930591940879822, |
| "learning_rate": 0.0002926239579086616, |
| "loss": 0.8881, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.1927083333333335, |
| "grad_norm": 1.6773489713668823, |
| "learning_rate": 0.0002925855103106215, |
| "loss": 0.8385, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.1979166666666665, |
| "grad_norm": 1.0301157236099243, |
| "learning_rate": 0.0002925469653080019, |
| "loss": 0.9364, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.203125, |
| "grad_norm": 1.5200835466384888, |
| "learning_rate": 0.00029250832292713417, |
| "loss": 0.9132, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.2083333333333335, |
| "grad_norm": 1.290411353111267, |
| "learning_rate": 0.000292469583194416, |
| "loss": 0.8637, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.2135416666666665, |
| "grad_norm": 1.1873688697814941, |
| "learning_rate": 0.00029243074613631174, |
| "loss": 0.94, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.21875, |
| "grad_norm": 0.9899685382843018, |
| "learning_rate": 0.0002923918117793522, |
| "loss": 0.9439, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.2239583333333335, |
| "grad_norm": 3.0960183143615723, |
| "learning_rate": 0.0002923527801501347, |
| "loss": 0.8845, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.2291666666666665, |
| "grad_norm": 1.4349253177642822, |
| "learning_rate": 0.00029231365127532283, |
| "loss": 1.0517, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.234375, |
| "grad_norm": 1.969152569770813, |
| "learning_rate": 0.00029227442518164686, |
| "loss": 1.0739, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.2395833333333335, |
| "grad_norm": 0.9446045756340027, |
| "learning_rate": 0.00029223510189590324, |
| "loss": 1.0955, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.2447916666666665, |
| "grad_norm": 1.1332173347473145, |
| "learning_rate": 0.000292195681444955, |
| "loss": 0.9101, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 1.0700052976608276, |
| "learning_rate": 0.00029215616385573146, |
| "loss": 0.9677, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.2552083333333335, |
| "grad_norm": 1.5803247690200806, |
| "learning_rate": 0.00029211654915522827, |
| "loss": 0.9721, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.2604166666666665, |
| "grad_norm": 1.3807910680770874, |
| "learning_rate": 0.0002920768373705075, |
| "loss": 0.9481, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.265625, |
| "grad_norm": 2.07843017578125, |
| "learning_rate": 0.0002920370285286975, |
| "loss": 0.7729, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.2708333333333335, |
| "grad_norm": 0.8631197214126587, |
| "learning_rate": 0.0002919971226569929, |
| "loss": 0.9007, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.2760416666666665, |
| "grad_norm": 1.1435606479644775, |
| "learning_rate": 0.0002919571197826547, |
| "loss": 0.953, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.28125, |
| "grad_norm": 1.6061850786209106, |
| "learning_rate": 0.00029191701993300993, |
| "loss": 0.9941, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.2864583333333335, |
| "grad_norm": 1.3588578701019287, |
| "learning_rate": 0.00029187682313545226, |
| "loss": 0.9773, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.2916666666666665, |
| "grad_norm": 1.1119353771209717, |
| "learning_rate": 0.00029183652941744127, |
| "loss": 0.9996, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.296875, |
| "grad_norm": 2.0934276580810547, |
| "learning_rate": 0.0002917961388065028, |
| "loss": 0.8898, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.3020833333333335, |
| "grad_norm": 1.0908467769622803, |
| "learning_rate": 0.000291755651330229, |
| "loss": 1.0339, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.3072916666666665, |
| "grad_norm": 0.9023425579071045, |
| "learning_rate": 0.0002917150670162781, |
| "loss": 0.9236, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.3125, |
| "grad_norm": 0.8267666697502136, |
| "learning_rate": 0.0002916743858923745, |
| "loss": 0.9271, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.3177083333333335, |
| "grad_norm": 1.1516417264938354, |
| "learning_rate": 0.0002916336079863087, |
| "loss": 1.0042, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.3229166666666665, |
| "grad_norm": 1.2533706426620483, |
| "learning_rate": 0.00029159273332593743, |
| "loss": 0.9098, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.328125, |
| "grad_norm": 1.4551417827606201, |
| "learning_rate": 0.0002915517619391834, |
| "loss": 0.9798, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 1.185194492340088, |
| "learning_rate": 0.00029151069385403535, |
| "loss": 0.9078, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.3385416666666665, |
| "grad_norm": 1.6680675745010376, |
| "learning_rate": 0.00029146952909854827, |
| "loss": 0.9204, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.34375, |
| "grad_norm": 1.152811050415039, |
| "learning_rate": 0.00029142826770084304, |
| "loss": 0.9494, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.3489583333333335, |
| "grad_norm": 1.492680311203003, |
| "learning_rate": 0.00029138690968910663, |
| "loss": 0.8824, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.3541666666666665, |
| "grad_norm": 0.7853989005088806, |
| "learning_rate": 0.0002913454550915919, |
| "loss": 0.8679, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.359375, |
| "grad_norm": 1.0536161661148071, |
| "learning_rate": 0.00029130390393661786, |
| "loss": 0.8226, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.3645833333333335, |
| "grad_norm": 0.9902849197387695, |
| "learning_rate": 0.0002912622562525693, |
| "loss": 0.9976, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.3697916666666665, |
| "grad_norm": 1.3338510990142822, |
| "learning_rate": 0.00029122051206789717, |
| "loss": 1.0061, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.375, |
| "grad_norm": 1.0235854387283325, |
| "learning_rate": 0.0002911786714111181, |
| "loss": 0.86, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.3802083333333335, |
| "grad_norm": 0.9769745469093323, |
| "learning_rate": 0.0002911367343108148, |
| "loss": 0.963, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.3854166666666665, |
| "grad_norm": 1.3756396770477295, |
| "learning_rate": 0.0002910947007956358, |
| "loss": 0.8991, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.390625, |
| "grad_norm": 0.9927957057952881, |
| "learning_rate": 0.00029105257089429546, |
| "loss": 0.8729, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.3958333333333335, |
| "grad_norm": 2.393449068069458, |
| "learning_rate": 0.00029101034463557406, |
| "loss": 1.0583, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.4010416666666665, |
| "grad_norm": 1.2361811399459839, |
| "learning_rate": 0.0002909680220483176, |
| "loss": 0.8353, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.40625, |
| "grad_norm": 1.641019344329834, |
| "learning_rate": 0.00029092560316143804, |
| "loss": 1.0196, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.4114583333333335, |
| "grad_norm": 1.4226264953613281, |
| "learning_rate": 0.00029088308800391294, |
| "loss": 0.9383, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.4166666666666665, |
| "grad_norm": 1.9286446571350098, |
| "learning_rate": 0.0002908404766047858, |
| "loss": 1.1102, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.421875, |
| "grad_norm": 1.3239190578460693, |
| "learning_rate": 0.00029079776899316583, |
| "loss": 1.0138, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.4270833333333335, |
| "grad_norm": 1.9432460069656372, |
| "learning_rate": 0.00029075496519822777, |
| "loss": 0.8855, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.4322916666666665, |
| "grad_norm": 1.3702703714370728, |
| "learning_rate": 0.00029071206524921233, |
| "loss": 0.9374, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.4375, |
| "grad_norm": 2.246791124343872, |
| "learning_rate": 0.00029066906917542577, |
| "loss": 0.9858, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.4427083333333335, |
| "grad_norm": 1.1936606168746948, |
| "learning_rate": 0.00029062597700624, |
| "loss": 0.9695, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.4479166666666665, |
| "grad_norm": 1.582943081855774, |
| "learning_rate": 0.0002905827887710927, |
| "loss": 0.9802, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.453125, |
| "grad_norm": 3.034010648727417, |
| "learning_rate": 0.0002905395044994871, |
| "loss": 0.878, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.4583333333333335, |
| "grad_norm": 2.005070447921753, |
| "learning_rate": 0.00029049612422099184, |
| "loss": 0.9637, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.4635416666666665, |
| "grad_norm": 1.0272732973098755, |
| "learning_rate": 0.0002904526479652415, |
| "loss": 0.9584, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.46875, |
| "grad_norm": 1.364323616027832, |
| "learning_rate": 0.0002904090757619361, |
| "loss": 0.8073, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.4739583333333335, |
| "grad_norm": 1.1104812622070312, |
| "learning_rate": 0.000290365407640841, |
| "loss": 0.8382, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.4791666666666665, |
| "grad_norm": 1.6935021877288818, |
| "learning_rate": 0.0002903216436317873, |
| "loss": 0.9611, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.484375, |
| "grad_norm": 2.2480547428131104, |
| "learning_rate": 0.0002902777837646717, |
| "loss": 0.9391, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.4895833333333335, |
| "grad_norm": 0.9665905237197876, |
| "learning_rate": 0.00029023382806945603, |
| "loss": 0.9344, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.4947916666666665, |
| "grad_norm": 1.0064702033996582, |
| "learning_rate": 0.0002901897765761679, |
| "loss": 0.9095, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 1.4491630792617798, |
| "learning_rate": 0.00029014562931490017, |
| "loss": 0.926, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.5052083333333335, |
| "grad_norm": 2.897665023803711, |
| "learning_rate": 0.0002901013863158113, |
| "loss": 1.0588, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.5104166666666665, |
| "grad_norm": 1.155824899673462, |
| "learning_rate": 0.00029005704760912495, |
| "loss": 0.8901, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.515625, |
| "grad_norm": 2.7137579917907715, |
| "learning_rate": 0.00029001261322513037, |
| "loss": 1.1084, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.5208333333333335, |
| "grad_norm": 1.8364861011505127, |
| "learning_rate": 0.000289968083194182, |
| "loss": 1.0474, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.5260416666666665, |
| "grad_norm": 0.8526556491851807, |
| "learning_rate": 0.0002899234575466997, |
| "loss": 0.9259, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.53125, |
| "grad_norm": 1.5991791486740112, |
| "learning_rate": 0.00028987873631316867, |
| "loss": 0.8589, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.5364583333333335, |
| "grad_norm": 2.5090625286102295, |
| "learning_rate": 0.00028983391952413926, |
| "loss": 0.9364, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.5416666666666665, |
| "grad_norm": 0.9283762574195862, |
| "learning_rate": 0.0002897890072102273, |
| "loss": 1.0054, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.546875, |
| "grad_norm": 1.5692499876022339, |
| "learning_rate": 0.00028974399940211377, |
| "loss": 1.0345, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.5520833333333335, |
| "grad_norm": 1.5684329271316528, |
| "learning_rate": 0.00028969889613054484, |
| "loss": 0.8892, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.5572916666666665, |
| "grad_norm": 1.4285774230957031, |
| "learning_rate": 0.0002896536974263321, |
| "loss": 0.9587, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.5625, |
| "grad_norm": 1.89910089969635, |
| "learning_rate": 0.000289608403320352, |
| "loss": 0.913, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.5677083333333335, |
| "grad_norm": 1.9246125221252441, |
| "learning_rate": 0.00028956301384354643, |
| "loss": 0.9389, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.5729166666666665, |
| "grad_norm": 2.062614917755127, |
| "learning_rate": 0.0002895175290269224, |
| "loss": 1.0251, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.578125, |
| "grad_norm": 2.767091989517212, |
| "learning_rate": 0.0002894719489015519, |
| "loss": 1.0611, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.5833333333333335, |
| "grad_norm": 2.488267660140991, |
| "learning_rate": 0.0002894262734985722, |
| "loss": 1.0715, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.5885416666666665, |
| "grad_norm": 1.9916088581085205, |
| "learning_rate": 0.0002893805028491856, |
| "loss": 0.9411, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.59375, |
| "grad_norm": 1.1801390647888184, |
| "learning_rate": 0.0002893346369846594, |
| "loss": 0.9961, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.5989583333333335, |
| "grad_norm": 1.0057222843170166, |
| "learning_rate": 0.0002892886759363259, |
| "loss": 0.9383, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.6041666666666665, |
| "grad_norm": 1.5073933601379395, |
| "learning_rate": 0.00028924261973558267, |
| "loss": 0.9136, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.6041666666666665, |
| "eval_f1_macro": 0.2605442264935877, |
| "eval_loss": 0.9971085786819458, |
| "eval_runtime": 4.9758, |
| "eval_samples_per_second": 615.578, |
| "eval_steps_per_second": 9.647, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.609375, |
| "grad_norm": 1.734981894493103, |
| "learning_rate": 0.00028919646841389207, |
| "loss": 1.119, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.6145833333333335, |
| "grad_norm": 2.089855432510376, |
| "learning_rate": 0.0002891502220027815, |
| "loss": 1.009, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.6197916666666665, |
| "grad_norm": 0.9486216306686401, |
| "learning_rate": 0.00028910388053384335, |
| "loss": 0.8526, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.625, |
| "grad_norm": 1.1793452501296997, |
| "learning_rate": 0.0002890574440387348, |
| "loss": 1.0006, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.6302083333333335, |
| "grad_norm": 1.0026938915252686, |
| "learning_rate": 0.00028901091254917823, |
| "loss": 0.9113, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.6354166666666665, |
| "grad_norm": 2.241178512573242, |
| "learning_rate": 0.0002889642860969606, |
| "loss": 1.0816, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.640625, |
| "grad_norm": 1.843820333480835, |
| "learning_rate": 0.0002889175647139339, |
| "loss": 1.1204, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.6458333333333335, |
| "grad_norm": 1.7183661460876465, |
| "learning_rate": 0.00028887074843201503, |
| "loss": 0.9354, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.6510416666666665, |
| "grad_norm": 1.190252661705017, |
| "learning_rate": 0.0002888238372831856, |
| "loss": 0.8393, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.65625, |
| "grad_norm": 1.6804723739624023, |
| "learning_rate": 0.00028877683129949205, |
| "loss": 0.9788, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.6614583333333335, |
| "grad_norm": 1.684126853942871, |
| "learning_rate": 0.0002887297305130456, |
| "loss": 0.8783, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 1.523580551147461, |
| "learning_rate": 0.00028868253495602236, |
| "loss": 0.9464, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.671875, |
| "grad_norm": 1.0638447999954224, |
| "learning_rate": 0.000288635244660663, |
| "loss": 1.0119, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.6770833333333335, |
| "grad_norm": 1.5706989765167236, |
| "learning_rate": 0.00028858785965927305, |
| "loss": 0.9627, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.6822916666666665, |
| "grad_norm": 1.5810585021972656, |
| "learning_rate": 0.0002885403799842226, |
| "loss": 0.9581, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.6875, |
| "grad_norm": 1.5091859102249146, |
| "learning_rate": 0.0002884928056679464, |
| "loss": 0.9684, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.6927083333333335, |
| "grad_norm": 1.259341835975647, |
| "learning_rate": 0.0002884451367429442, |
| "loss": 0.9869, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.6979166666666665, |
| "grad_norm": 1.2241402864456177, |
| "learning_rate": 0.0002883973732417799, |
| "loss": 0.906, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.703125, |
| "grad_norm": 0.7743521928787231, |
| "learning_rate": 0.0002883495151970823, |
| "loss": 0.932, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.7083333333333335, |
| "grad_norm": 1.4225233793258667, |
| "learning_rate": 0.00028830156264154477, |
| "loss": 1.0068, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.7135416666666665, |
| "grad_norm": 1.0250027179718018, |
| "learning_rate": 0.00028825351560792504, |
| "loss": 1.0447, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.71875, |
| "grad_norm": 0.9756643176078796, |
| "learning_rate": 0.00028820537412904565, |
| "loss": 0.9044, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.7239583333333335, |
| "grad_norm": 1.2294031381607056, |
| "learning_rate": 0.0002881571382377935, |
| "loss": 1.0361, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.7291666666666665, |
| "grad_norm": 2.379302501678467, |
| "learning_rate": 0.00028810880796712, |
| "loss": 0.9285, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.734375, |
| "grad_norm": 1.1162290573120117, |
| "learning_rate": 0.0002880603833500411, |
| "loss": 1.0513, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.7395833333333335, |
| "grad_norm": 1.1067630052566528, |
| "learning_rate": 0.0002880118644196371, |
| "loss": 0.8996, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.7447916666666665, |
| "grad_norm": 2.0322606563568115, |
| "learning_rate": 0.0002879632512090529, |
| "loss": 0.9124, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.9867918491363525, |
| "learning_rate": 0.0002879145437514975, |
| "loss": 0.9309, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.7552083333333335, |
| "grad_norm": 1.442891001701355, |
| "learning_rate": 0.0002878657420802447, |
| "loss": 0.9185, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.7604166666666665, |
| "grad_norm": 1.6756298542022705, |
| "learning_rate": 0.0002878168462286322, |
| "loss": 0.9334, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.765625, |
| "grad_norm": 1.4435206651687622, |
| "learning_rate": 0.00028776785623006246, |
| "loss": 0.9028, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.7708333333333335, |
| "grad_norm": 0.7525215148925781, |
| "learning_rate": 0.0002877187721180019, |
| "loss": 0.9615, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.7760416666666665, |
| "grad_norm": 1.6122651100158691, |
| "learning_rate": 0.0002876695939259816, |
| "loss": 1.0145, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.78125, |
| "grad_norm": 1.6951870918273926, |
| "learning_rate": 0.0002876203216875966, |
| "loss": 1.0339, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.7864583333333335, |
| "grad_norm": 2.785329818725586, |
| "learning_rate": 0.0002875709554365062, |
| "loss": 1.149, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.7916666666666665, |
| "grad_norm": 2.6187095642089844, |
| "learning_rate": 0.00028752149520643413, |
| "loss": 0.9165, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.796875, |
| "grad_norm": 2.877760648727417, |
| "learning_rate": 0.00028747194103116816, |
| "loss": 0.995, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.8020833333333335, |
| "grad_norm": 1.3189387321472168, |
| "learning_rate": 0.00028742229294456033, |
| "loss": 0.9512, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.8072916666666665, |
| "grad_norm": 1.3698662519454956, |
| "learning_rate": 0.00028737255098052676, |
| "loss": 0.9817, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.8125, |
| "grad_norm": 1.3579190969467163, |
| "learning_rate": 0.0002873227151730477, |
| "loss": 1.046, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.8177083333333335, |
| "grad_norm": 1.497771143913269, |
| "learning_rate": 0.00028727278555616757, |
| "loss": 0.9259, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.8229166666666665, |
| "grad_norm": 1.3495075702667236, |
| "learning_rate": 0.00028722276216399485, |
| "loss": 0.9521, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.828125, |
| "grad_norm": 1.8813629150390625, |
| "learning_rate": 0.00028717264503070206, |
| "loss": 1.0093, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.8333333333333335, |
| "grad_norm": 2.222083330154419, |
| "learning_rate": 0.00028712243419052566, |
| "loss": 0.8958, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.8385416666666665, |
| "grad_norm": 1.6770775318145752, |
| "learning_rate": 0.0002870721296777664, |
| "loss": 0.9286, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.84375, |
| "grad_norm": 1.3346805572509766, |
| "learning_rate": 0.00028702173152678873, |
| "loss": 1.0623, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.8489583333333335, |
| "grad_norm": 1.0745223760604858, |
| "learning_rate": 0.00028697123977202126, |
| "loss": 0.9952, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.8541666666666665, |
| "grad_norm": 1.4419219493865967, |
| "learning_rate": 0.00028692065444795646, |
| "loss": 1.037, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.859375, |
| "grad_norm": 1.4035565853118896, |
| "learning_rate": 0.0002868699755891507, |
| "loss": 0.9748, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.8645833333333335, |
| "grad_norm": 0.7591080069541931, |
| "learning_rate": 0.00028681920323022427, |
| "loss": 0.9058, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.8697916666666665, |
| "grad_norm": 1.2616761922836304, |
| "learning_rate": 0.00028676833740586135, |
| "loss": 0.9464, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.875, |
| "grad_norm": 1.9414821863174438, |
| "learning_rate": 0.00028671737815081, |
| "loss": 1.0319, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.8802083333333335, |
| "grad_norm": 1.1402050256729126, |
| "learning_rate": 0.0002866663254998821, |
| "loss": 0.8569, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.8854166666666665, |
| "grad_norm": 2.4764318466186523, |
| "learning_rate": 0.0002866151794879532, |
| "loss": 0.9408, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.890625, |
| "grad_norm": 1.115903615951538, |
| "learning_rate": 0.0002865639401499627, |
| "loss": 0.9777, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.8958333333333335, |
| "grad_norm": 1.0213887691497803, |
| "learning_rate": 0.00028651260752091394, |
| "loss": 0.8582, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.9010416666666665, |
| "grad_norm": 1.2118194103240967, |
| "learning_rate": 0.00028646118163587367, |
| "loss": 0.9308, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.90625, |
| "grad_norm": 1.1214053630828857, |
| "learning_rate": 0.00028640966252997256, |
| "loss": 0.8237, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.9114583333333335, |
| "grad_norm": 1.061611294746399, |
| "learning_rate": 0.0002863580502384049, |
| "loss": 0.8886, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.9166666666666665, |
| "grad_norm": 1.2852762937545776, |
| "learning_rate": 0.0002863063447964287, |
| "loss": 0.8909, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.921875, |
| "grad_norm": 0.9640898704528809, |
| "learning_rate": 0.00028625454623936554, |
| "loss": 0.9961, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.9270833333333335, |
| "grad_norm": 1.009779930114746, |
| "learning_rate": 0.0002862026546026006, |
| "loss": 0.9037, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.9322916666666665, |
| "grad_norm": 0.9958177804946899, |
| "learning_rate": 0.00028615066992158255, |
| "loss": 1.0459, |
| "step": 563 |
| }, |
| { |
| "epoch": 2.9375, |
| "grad_norm": 1.6453534364700317, |
| "learning_rate": 0.00028609859223182395, |
| "loss": 0.955, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.9427083333333335, |
| "grad_norm": 2.159295082092285, |
| "learning_rate": 0.0002860464215689005, |
| "loss": 1.1224, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.9479166666666665, |
| "grad_norm": 2.021392583847046, |
| "learning_rate": 0.00028599415796845166, |
| "loss": 0.914, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.953125, |
| "grad_norm": 1.290340781211853, |
| "learning_rate": 0.0002859418014661804, |
| "loss": 0.9497, |
| "step": 567 |
| }, |
| { |
| "epoch": 2.9583333333333335, |
| "grad_norm": 1.8013122081756592, |
| "learning_rate": 0.00028588935209785294, |
| "loss": 0.9642, |
| "step": 568 |
| }, |
| { |
| "epoch": 2.9635416666666665, |
| "grad_norm": 1.7033482789993286, |
| "learning_rate": 0.00028583680989929913, |
| "loss": 1.0164, |
| "step": 569 |
| }, |
| { |
| "epoch": 2.96875, |
| "grad_norm": 1.6898046731948853, |
| "learning_rate": 0.00028578417490641213, |
| "loss": 0.993, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.9739583333333335, |
| "grad_norm": 1.05963933467865, |
| "learning_rate": 0.00028573144715514863, |
| "loss": 0.8816, |
| "step": 571 |
| }, |
| { |
| "epoch": 2.9791666666666665, |
| "grad_norm": 1.91165030002594, |
| "learning_rate": 0.0002856786266815285, |
| "loss": 0.9207, |
| "step": 572 |
| }, |
| { |
| "epoch": 2.984375, |
| "grad_norm": 2.9410109519958496, |
| "learning_rate": 0.0002856257135216351, |
| "loss": 0.9847, |
| "step": 573 |
| }, |
| { |
| "epoch": 2.9895833333333335, |
| "grad_norm": 2.4728829860687256, |
| "learning_rate": 0.00028557270771161495, |
| "loss": 0.9236, |
| "step": 574 |
| }, |
| { |
| "epoch": 2.9947916666666665, |
| "grad_norm": 2.6016228199005127, |
| "learning_rate": 0.00028551960928767807, |
| "loss": 0.948, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 5.500241756439209, |
| "learning_rate": 0.00028546641828609766, |
| "loss": 1.1828, |
| "step": 576 |
| }, |
| { |
| "epoch": 3.0052083333333335, |
| "grad_norm": 0.8321835994720459, |
| "learning_rate": 0.00028541313474321, |
| "loss": 0.9071, |
| "step": 577 |
| }, |
| { |
| "epoch": 3.0104166666666665, |
| "grad_norm": 1.7026640176773071, |
| "learning_rate": 0.0002853597586954148, |
| "loss": 0.9021, |
| "step": 578 |
| }, |
| { |
| "epoch": 3.015625, |
| "grad_norm": 0.9146273732185364, |
| "learning_rate": 0.00028530629017917494, |
| "loss": 1.0115, |
| "step": 579 |
| }, |
| { |
| "epoch": 3.0208333333333335, |
| "grad_norm": 1.3471211194992065, |
| "learning_rate": 0.0002852527292310164, |
| "loss": 0.8783, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.0260416666666665, |
| "grad_norm": 2.41924786567688, |
| "learning_rate": 0.0002851990758875283, |
| "loss": 0.9398, |
| "step": 581 |
| }, |
| { |
| "epoch": 3.03125, |
| "grad_norm": 2.409782648086548, |
| "learning_rate": 0.0002851453301853628, |
| "loss": 0.9135, |
| "step": 582 |
| }, |
| { |
| "epoch": 3.0364583333333335, |
| "grad_norm": 1.603161096572876, |
| "learning_rate": 0.0002850914921612355, |
| "loss": 0.946, |
| "step": 583 |
| }, |
| { |
| "epoch": 3.0416666666666665, |
| "grad_norm": 0.9972139000892639, |
| "learning_rate": 0.0002850375618519246, |
| "loss": 0.9355, |
| "step": 584 |
| }, |
| { |
| "epoch": 3.046875, |
| "grad_norm": 1.0554614067077637, |
| "learning_rate": 0.00028498353929427167, |
| "loss": 0.8818, |
| "step": 585 |
| }, |
| { |
| "epoch": 3.0520833333333335, |
| "grad_norm": 2.1641576290130615, |
| "learning_rate": 0.0002849294245251812, |
| "loss": 0.8491, |
| "step": 586 |
| }, |
| { |
| "epoch": 3.0572916666666665, |
| "grad_norm": 1.4487417936325073, |
| "learning_rate": 0.0002848752175816206, |
| "loss": 1.0006, |
| "step": 587 |
| }, |
| { |
| "epoch": 3.0625, |
| "grad_norm": 2.2291972637176514, |
| "learning_rate": 0.00028482091850062037, |
| "loss": 0.8693, |
| "step": 588 |
| }, |
| { |
| "epoch": 3.0677083333333335, |
| "grad_norm": 2.6936957836151123, |
| "learning_rate": 0.00028476652731927385, |
| "loss": 0.8435, |
| "step": 589 |
| }, |
| { |
| "epoch": 3.0729166666666665, |
| "grad_norm": 1.3946832418441772, |
| "learning_rate": 0.00028471204407473736, |
| "loss": 1.0212, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.078125, |
| "grad_norm": 1.5958478450775146, |
| "learning_rate": 0.0002846574688042301, |
| "loss": 1.0184, |
| "step": 591 |
| }, |
| { |
| "epoch": 3.0833333333333335, |
| "grad_norm": 1.3046830892562866, |
| "learning_rate": 0.00028460280154503413, |
| "loss": 0.8143, |
| "step": 592 |
| }, |
| { |
| "epoch": 3.0885416666666665, |
| "grad_norm": 1.774622917175293, |
| "learning_rate": 0.00028454804233449434, |
| "loss": 1.0621, |
| "step": 593 |
| }, |
| { |
| "epoch": 3.09375, |
| "grad_norm": 1.0581188201904297, |
| "learning_rate": 0.00028449319121001844, |
| "loss": 0.9691, |
| "step": 594 |
| }, |
| { |
| "epoch": 3.0989583333333335, |
| "grad_norm": 1.7214605808258057, |
| "learning_rate": 0.00028443824820907697, |
| "loss": 0.9998, |
| "step": 595 |
| }, |
| { |
| "epoch": 3.1041666666666665, |
| "grad_norm": 1.7903075218200684, |
| "learning_rate": 0.0002843832133692031, |
| "loss": 0.8922, |
| "step": 596 |
| }, |
| { |
| "epoch": 3.109375, |
| "grad_norm": 1.0542113780975342, |
| "learning_rate": 0.00028432808672799293, |
| "loss": 1.0223, |
| "step": 597 |
| }, |
| { |
| "epoch": 3.1145833333333335, |
| "grad_norm": 1.6248779296875, |
| "learning_rate": 0.00028427286832310517, |
| "loss": 0.7709, |
| "step": 598 |
| }, |
| { |
| "epoch": 3.1197916666666665, |
| "grad_norm": 1.614200234413147, |
| "learning_rate": 0.00028421755819226115, |
| "loss": 0.8515, |
| "step": 599 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 0.9364871382713318, |
| "learning_rate": 0.000284162156373245, |
| "loss": 0.8944, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.125, |
| "eval_f1_macro": 0.2503698322849481, |
| "eval_loss": 1.00216543674469, |
| "eval_runtime": 4.972, |
| "eval_samples_per_second": 616.047, |
| "eval_steps_per_second": 9.654, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.1302083333333335, |
| "grad_norm": 1.6614043712615967, |
| "learning_rate": 0.00028410666290390344, |
| "loss": 0.9782, |
| "step": 601 |
| }, |
| { |
| "epoch": 3.1354166666666665, |
| "grad_norm": 1.4950212240219116, |
| "learning_rate": 0.0002840510778221457, |
| "loss": 0.8551, |
| "step": 602 |
| }, |
| { |
| "epoch": 3.140625, |
| "grad_norm": 1.8259551525115967, |
| "learning_rate": 0.0002839954011659437, |
| "loss": 0.824, |
| "step": 603 |
| }, |
| { |
| "epoch": 3.1458333333333335, |
| "grad_norm": 0.9227896332740784, |
| "learning_rate": 0.00028393963297333193, |
| "loss": 0.9253, |
| "step": 604 |
| }, |
| { |
| "epoch": 3.1510416666666665, |
| "grad_norm": 1.4293307065963745, |
| "learning_rate": 0.0002838837732824073, |
| "loss": 0.9324, |
| "step": 605 |
| }, |
| { |
| "epoch": 3.15625, |
| "grad_norm": 1.0427287817001343, |
| "learning_rate": 0.00028382782213132933, |
| "loss": 0.9662, |
| "step": 606 |
| }, |
| { |
| "epoch": 3.1614583333333335, |
| "grad_norm": 1.3180409669876099, |
| "learning_rate": 0.00028377177955832007, |
| "loss": 1.0768, |
| "step": 607 |
| }, |
| { |
| "epoch": 3.1666666666666665, |
| "grad_norm": 1.0040141344070435, |
| "learning_rate": 0.0002837156456016638, |
| "loss": 0.9895, |
| "step": 608 |
| }, |
| { |
| "epoch": 3.171875, |
| "grad_norm": 1.3632732629776, |
| "learning_rate": 0.0002836594202997074, |
| "loss": 0.9819, |
| "step": 609 |
| }, |
| { |
| "epoch": 3.1770833333333335, |
| "grad_norm": 1.476967692375183, |
| "learning_rate": 0.00028360310369086015, |
| "loss": 0.887, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.1822916666666665, |
| "grad_norm": 0.8526042699813843, |
| "learning_rate": 0.00028354669581359367, |
| "loss": 0.8996, |
| "step": 611 |
| }, |
| { |
| "epoch": 3.1875, |
| "grad_norm": 2.272521495819092, |
| "learning_rate": 0.0002834901967064419, |
| "loss": 0.8646, |
| "step": 612 |
| }, |
| { |
| "epoch": 3.1927083333333335, |
| "grad_norm": 1.122592806816101, |
| "learning_rate": 0.00028343360640800116, |
| "loss": 0.9387, |
| "step": 613 |
| }, |
| { |
| "epoch": 3.1979166666666665, |
| "grad_norm": 1.4360101222991943, |
| "learning_rate": 0.00028337692495693, |
| "loss": 1.151, |
| "step": 614 |
| }, |
| { |
| "epoch": 3.203125, |
| "grad_norm": 1.2686383724212646, |
| "learning_rate": 0.00028332015239194937, |
| "loss": 0.7848, |
| "step": 615 |
| }, |
| { |
| "epoch": 3.2083333333333335, |
| "grad_norm": 1.0647835731506348, |
| "learning_rate": 0.0002832632887518422, |
| "loss": 0.9603, |
| "step": 616 |
| }, |
| { |
| "epoch": 3.2135416666666665, |
| "grad_norm": 0.8335797786712646, |
| "learning_rate": 0.00028320633407545397, |
| "loss": 0.8683, |
| "step": 617 |
| }, |
| { |
| "epoch": 3.21875, |
| "grad_norm": 1.7472202777862549, |
| "learning_rate": 0.0002831492884016921, |
| "loss": 0.9218, |
| "step": 618 |
| }, |
| { |
| "epoch": 3.2239583333333335, |
| "grad_norm": 1.66518235206604, |
| "learning_rate": 0.0002830921517695262, |
| "loss": 0.9546, |
| "step": 619 |
| }, |
| { |
| "epoch": 3.2291666666666665, |
| "grad_norm": 2.8334012031555176, |
| "learning_rate": 0.0002830349242179882, |
| "loss": 0.9563, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.234375, |
| "grad_norm": 0.968147873878479, |
| "learning_rate": 0.000282977605786172, |
| "loss": 0.9353, |
| "step": 621 |
| }, |
| { |
| "epoch": 3.2395833333333335, |
| "grad_norm": 1.046994686126709, |
| "learning_rate": 0.00028292019651323346, |
| "loss": 0.981, |
| "step": 622 |
| }, |
| { |
| "epoch": 3.2447916666666665, |
| "grad_norm": 0.9138261675834656, |
| "learning_rate": 0.0002828626964383908, |
| "loss": 0.9716, |
| "step": 623 |
| }, |
| { |
| "epoch": 3.25, |
| "grad_norm": 1.8486944437026978, |
| "learning_rate": 0.000282805105600924, |
| "loss": 0.9748, |
| "step": 624 |
| }, |
| { |
| "epoch": 3.2552083333333335, |
| "grad_norm": 1.6632397174835205, |
| "learning_rate": 0.0002827474240401752, |
| "loss": 0.9161, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.2604166666666665, |
| "grad_norm": 1.149093508720398, |
| "learning_rate": 0.0002826896517955484, |
| "loss": 0.9905, |
| "step": 626 |
| }, |
| { |
| "epoch": 3.265625, |
| "grad_norm": 1.6981698274612427, |
| "learning_rate": 0.00028263178890650966, |
| "loss": 1.0029, |
| "step": 627 |
| }, |
| { |
| "epoch": 3.2708333333333335, |
| "grad_norm": 1.1890650987625122, |
| "learning_rate": 0.0002825738354125869, |
| "loss": 0.9793, |
| "step": 628 |
| }, |
| { |
| "epoch": 3.2760416666666665, |
| "grad_norm": 1.198307752609253, |
| "learning_rate": 0.00028251579135337, |
| "loss": 0.839, |
| "step": 629 |
| }, |
| { |
| "epoch": 3.28125, |
| "grad_norm": 0.785366952419281, |
| "learning_rate": 0.0002824576567685105, |
| "loss": 0.9717, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.2864583333333335, |
| "grad_norm": 1.0166940689086914, |
| "learning_rate": 0.00028239943169772213, |
| "loss": 0.9931, |
| "step": 631 |
| }, |
| { |
| "epoch": 3.2916666666666665, |
| "grad_norm": 1.294536828994751, |
| "learning_rate": 0.0002823411161807801, |
| "loss": 0.8804, |
| "step": 632 |
| }, |
| { |
| "epoch": 3.296875, |
| "grad_norm": 0.9966655373573303, |
| "learning_rate": 0.0002822827102575216, |
| "loss": 1.0032, |
| "step": 633 |
| }, |
| { |
| "epoch": 3.3020833333333335, |
| "grad_norm": 0.8434579372406006, |
| "learning_rate": 0.0002822242139678455, |
| "loss": 0.8938, |
| "step": 634 |
| }, |
| { |
| "epoch": 3.3072916666666665, |
| "grad_norm": 1.6518864631652832, |
| "learning_rate": 0.00028216562735171243, |
| "loss": 0.9143, |
| "step": 635 |
| }, |
| { |
| "epoch": 3.3125, |
| "grad_norm": 1.2482690811157227, |
| "learning_rate": 0.0002821069504491448, |
| "loss": 0.9394, |
| "step": 636 |
| }, |
| { |
| "epoch": 3.3177083333333335, |
| "grad_norm": 1.466522216796875, |
| "learning_rate": 0.0002820481833002265, |
| "loss": 0.9672, |
| "step": 637 |
| }, |
| { |
| "epoch": 3.3229166666666665, |
| "grad_norm": 0.9955615997314453, |
| "learning_rate": 0.0002819893259451033, |
| "loss": 0.9704, |
| "step": 638 |
| }, |
| { |
| "epoch": 3.328125, |
| "grad_norm": 0.8381821513175964, |
| "learning_rate": 0.00028193037842398237, |
| "loss": 0.88, |
| "step": 639 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 1.2333645820617676, |
| "learning_rate": 0.0002818713407771327, |
| "loss": 0.83, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.3385416666666665, |
| "grad_norm": 1.2303617000579834, |
| "learning_rate": 0.0002818122130448847, |
| "loss": 0.967, |
| "step": 641 |
| }, |
| { |
| "epoch": 3.34375, |
| "grad_norm": 1.8304386138916016, |
| "learning_rate": 0.00028175299526763027, |
| "loss": 0.9691, |
| "step": 642 |
| }, |
| { |
| "epoch": 3.3489583333333335, |
| "grad_norm": 1.3071438074111938, |
| "learning_rate": 0.000281693687485823, |
| "loss": 0.9675, |
| "step": 643 |
| }, |
| { |
| "epoch": 3.3541666666666665, |
| "grad_norm": 1.2603262662887573, |
| "learning_rate": 0.0002816342897399779, |
| "loss": 0.8512, |
| "step": 644 |
| }, |
| { |
| "epoch": 3.359375, |
| "grad_norm": 0.9570459127426147, |
| "learning_rate": 0.0002815748020706713, |
| "loss": 0.9964, |
| "step": 645 |
| }, |
| { |
| "epoch": 3.3645833333333335, |
| "grad_norm": 1.0817947387695312, |
| "learning_rate": 0.0002815152245185412, |
| "loss": 0.9366, |
| "step": 646 |
| }, |
| { |
| "epoch": 3.3697916666666665, |
| "grad_norm": 0.8419851064682007, |
| "learning_rate": 0.00028145555712428676, |
| "loss": 0.9615, |
| "step": 647 |
| }, |
| { |
| "epoch": 3.375, |
| "grad_norm": 1.1309850215911865, |
| "learning_rate": 0.00028139579992866865, |
| "loss": 0.9397, |
| "step": 648 |
| }, |
| { |
| "epoch": 3.3802083333333335, |
| "grad_norm": 1.234587550163269, |
| "learning_rate": 0.0002813359529725089, |
| "loss": 0.9031, |
| "step": 649 |
| }, |
| { |
| "epoch": 3.3854166666666665, |
| "grad_norm": 1.1707249879837036, |
| "learning_rate": 0.00028127601629669084, |
| "loss": 0.9106, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.390625, |
| "grad_norm": 1.5023181438446045, |
| "learning_rate": 0.000281215989942159, |
| "loss": 0.9405, |
| "step": 651 |
| }, |
| { |
| "epoch": 3.3958333333333335, |
| "grad_norm": 2.19960355758667, |
| "learning_rate": 0.00028115587394991927, |
| "loss": 0.9894, |
| "step": 652 |
| }, |
| { |
| "epoch": 3.4010416666666665, |
| "grad_norm": 1.5934264659881592, |
| "learning_rate": 0.00028109566836103875, |
| "loss": 0.9093, |
| "step": 653 |
| }, |
| { |
| "epoch": 3.40625, |
| "grad_norm": 0.7969881296157837, |
| "learning_rate": 0.00028103537321664575, |
| "loss": 0.9113, |
| "step": 654 |
| }, |
| { |
| "epoch": 3.4114583333333335, |
| "grad_norm": 1.5218361616134644, |
| "learning_rate": 0.00028097498855792977, |
| "loss": 0.9546, |
| "step": 655 |
| }, |
| { |
| "epoch": 3.4166666666666665, |
| "grad_norm": 1.2241264581680298, |
| "learning_rate": 0.00028091451442614133, |
| "loss": 0.9638, |
| "step": 656 |
| }, |
| { |
| "epoch": 3.421875, |
| "grad_norm": 0.8899747729301453, |
| "learning_rate": 0.00028085395086259235, |
| "loss": 0.9935, |
| "step": 657 |
| }, |
| { |
| "epoch": 3.4270833333333335, |
| "grad_norm": 1.4845017194747925, |
| "learning_rate": 0.0002807932979086556, |
| "loss": 0.9234, |
| "step": 658 |
| }, |
| { |
| "epoch": 3.4322916666666665, |
| "grad_norm": 1.7527490854263306, |
| "learning_rate": 0.0002807325556057649, |
| "loss": 0.9699, |
| "step": 659 |
| }, |
| { |
| "epoch": 3.4375, |
| "grad_norm": 2.043527126312256, |
| "learning_rate": 0.00028067172399541535, |
| "loss": 1.0744, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.4427083333333335, |
| "grad_norm": 3.593808174133301, |
| "learning_rate": 0.0002806108031191628, |
| "loss": 0.9648, |
| "step": 661 |
| }, |
| { |
| "epoch": 3.4479166666666665, |
| "grad_norm": 2.41843843460083, |
| "learning_rate": 0.0002805497930186243, |
| "loss": 0.8796, |
| "step": 662 |
| }, |
| { |
| "epoch": 3.453125, |
| "grad_norm": 2.0723562240600586, |
| "learning_rate": 0.00028048869373547766, |
| "loss": 0.9405, |
| "step": 663 |
| }, |
| { |
| "epoch": 3.4583333333333335, |
| "grad_norm": 1.064660906791687, |
| "learning_rate": 0.00028042750531146165, |
| "loss": 0.9939, |
| "step": 664 |
| }, |
| { |
| "epoch": 3.4635416666666665, |
| "grad_norm": 1.3294516801834106, |
| "learning_rate": 0.0002803662277883761, |
| "loss": 0.9431, |
| "step": 665 |
| }, |
| { |
| "epoch": 3.46875, |
| "grad_norm": 1.1866672039031982, |
| "learning_rate": 0.0002803048612080814, |
| "loss": 0.8869, |
| "step": 666 |
| }, |
| { |
| "epoch": 3.4739583333333335, |
| "grad_norm": 0.944036066532135, |
| "learning_rate": 0.0002802434056124992, |
| "loss": 0.835, |
| "step": 667 |
| }, |
| { |
| "epoch": 3.4791666666666665, |
| "grad_norm": 1.6011360883712769, |
| "learning_rate": 0.0002801818610436115, |
| "loss": 0.9772, |
| "step": 668 |
| }, |
| { |
| "epoch": 3.484375, |
| "grad_norm": 1.6003479957580566, |
| "learning_rate": 0.0002801202275434615, |
| "loss": 0.9671, |
| "step": 669 |
| }, |
| { |
| "epoch": 3.4895833333333335, |
| "grad_norm": 2.572004556655884, |
| "learning_rate": 0.00028005850515415285, |
| "loss": 1.1648, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.4947916666666665, |
| "grad_norm": 2.088407039642334, |
| "learning_rate": 0.00027999669391784996, |
| "loss": 0.8078, |
| "step": 671 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 2.1827175617218018, |
| "learning_rate": 0.00027993479387677814, |
| "loss": 0.9574, |
| "step": 672 |
| }, |
| { |
| "epoch": 3.5052083333333335, |
| "grad_norm": 1.3459587097167969, |
| "learning_rate": 0.0002798728050732231, |
| "loss": 0.9502, |
| "step": 673 |
| }, |
| { |
| "epoch": 3.5104166666666665, |
| "grad_norm": 0.7578115463256836, |
| "learning_rate": 0.00027981072754953144, |
| "loss": 0.8881, |
| "step": 674 |
| }, |
| { |
| "epoch": 3.515625, |
| "grad_norm": 1.406132698059082, |
| "learning_rate": 0.0002797485613481101, |
| "loss": 1.0825, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.5208333333333335, |
| "grad_norm": 1.048601508140564, |
| "learning_rate": 0.0002796863065114268, |
| "loss": 0.9674, |
| "step": 676 |
| }, |
| { |
| "epoch": 3.5260416666666665, |
| "grad_norm": 1.2002074718475342, |
| "learning_rate": 0.00027962396308200976, |
| "loss": 0.8904, |
| "step": 677 |
| }, |
| { |
| "epoch": 3.53125, |
| "grad_norm": 1.6247832775115967, |
| "learning_rate": 0.0002795615311024478, |
| "loss": 0.8243, |
| "step": 678 |
| }, |
| { |
| "epoch": 3.5364583333333335, |
| "grad_norm": 1.2734575271606445, |
| "learning_rate": 0.00027949901061538997, |
| "loss": 1.0138, |
| "step": 679 |
| }, |
| { |
| "epoch": 3.5416666666666665, |
| "grad_norm": 2.3922007083892822, |
| "learning_rate": 0.000279436401663546, |
| "loss": 0.8893, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.546875, |
| "grad_norm": 0.9886126518249512, |
| "learning_rate": 0.00027937370428968606, |
| "loss": 0.9621, |
| "step": 681 |
| }, |
| { |
| "epoch": 3.5520833333333335, |
| "grad_norm": 1.441712498664856, |
| "learning_rate": 0.0002793109185366407, |
| "loss": 0.8902, |
| "step": 682 |
| }, |
| { |
| "epoch": 3.5572916666666665, |
| "grad_norm": 2.3064510822296143, |
| "learning_rate": 0.0002792480444473006, |
| "loss": 0.8551, |
| "step": 683 |
| }, |
| { |
| "epoch": 3.5625, |
| "grad_norm": 0.8048340082168579, |
| "learning_rate": 0.00027918508206461725, |
| "loss": 0.9581, |
| "step": 684 |
| }, |
| { |
| "epoch": 3.5677083333333335, |
| "grad_norm": 1.1847056150436401, |
| "learning_rate": 0.0002791220314316021, |
| "loss": 0.862, |
| "step": 685 |
| }, |
| { |
| "epoch": 3.5729166666666665, |
| "grad_norm": 2.0729622840881348, |
| "learning_rate": 0.00027905889259132697, |
| "loss": 0.9487, |
| "step": 686 |
| }, |
| { |
| "epoch": 3.578125, |
| "grad_norm": 0.7976325750350952, |
| "learning_rate": 0.00027899566558692404, |
| "loss": 0.8762, |
| "step": 687 |
| }, |
| { |
| "epoch": 3.5833333333333335, |
| "grad_norm": 1.9107147455215454, |
| "learning_rate": 0.0002789323504615855, |
| "loss": 0.9233, |
| "step": 688 |
| }, |
| { |
| "epoch": 3.5885416666666665, |
| "grad_norm": 2.0347461700439453, |
| "learning_rate": 0.000278868947258564, |
| "loss": 1.0287, |
| "step": 689 |
| }, |
| { |
| "epoch": 3.59375, |
| "grad_norm": 1.259476661682129, |
| "learning_rate": 0.0002788054560211722, |
| "loss": 0.9371, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.5989583333333335, |
| "grad_norm": 1.8213045597076416, |
| "learning_rate": 0.00027874187679278285, |
| "loss": 0.9021, |
| "step": 691 |
| }, |
| { |
| "epoch": 3.6041666666666665, |
| "grad_norm": 2.2518670558929443, |
| "learning_rate": 0.00027867820961682897, |
| "loss": 0.9444, |
| "step": 692 |
| }, |
| { |
| "epoch": 3.609375, |
| "grad_norm": 1.4648590087890625, |
| "learning_rate": 0.00027861445453680354, |
| "loss": 0.8878, |
| "step": 693 |
| }, |
| { |
| "epoch": 3.6145833333333335, |
| "grad_norm": 1.1237908601760864, |
| "learning_rate": 0.00027855061159625977, |
| "loss": 0.9383, |
| "step": 694 |
| }, |
| { |
| "epoch": 3.6197916666666665, |
| "grad_norm": 0.9109013080596924, |
| "learning_rate": 0.0002784866808388105, |
| "loss": 0.9008, |
| "step": 695 |
| }, |
| { |
| "epoch": 3.625, |
| "grad_norm": 1.4416403770446777, |
| "learning_rate": 0.00027842266230812903, |
| "loss": 0.906, |
| "step": 696 |
| }, |
| { |
| "epoch": 3.6302083333333335, |
| "grad_norm": 1.4434268474578857, |
| "learning_rate": 0.0002783585560479483, |
| "loss": 0.9113, |
| "step": 697 |
| }, |
| { |
| "epoch": 3.6354166666666665, |
| "grad_norm": 1.0922330617904663, |
| "learning_rate": 0.00027829436210206134, |
| "loss": 0.9122, |
| "step": 698 |
| }, |
| { |
| "epoch": 3.640625, |
| "grad_norm": 1.563151240348816, |
| "learning_rate": 0.00027823008051432093, |
| "loss": 1.0092, |
| "step": 699 |
| }, |
| { |
| "epoch": 3.6458333333333335, |
| "grad_norm": 1.0171072483062744, |
| "learning_rate": 0.00027816571132863985, |
| "loss": 0.9397, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.6458333333333335, |
| "eval_f1_macro": 0.2649971284249309, |
| "eval_loss": 0.9973326921463013, |
| "eval_runtime": 4.9712, |
| "eval_samples_per_second": 616.144, |
| "eval_steps_per_second": 9.656, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.6510416666666665, |
| "grad_norm": 0.8679147958755493, |
| "learning_rate": 0.00027810125458899073, |
| "loss": 0.9719, |
| "step": 701 |
| }, |
| { |
| "epoch": 3.65625, |
| "grad_norm": 1.4182761907577515, |
| "learning_rate": 0.000278036710339406, |
| "loss": 0.9601, |
| "step": 702 |
| }, |
| { |
| "epoch": 3.6614583333333335, |
| "grad_norm": 0.9437317252159119, |
| "learning_rate": 0.0002779720786239777, |
| "loss": 0.978, |
| "step": 703 |
| }, |
| { |
| "epoch": 3.6666666666666665, |
| "grad_norm": 1.6689434051513672, |
| "learning_rate": 0.00027790735948685785, |
| "loss": 0.9818, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.671875, |
| "grad_norm": 2.154742956161499, |
| "learning_rate": 0.00027784255297225814, |
| "loss": 0.9204, |
| "step": 705 |
| }, |
| { |
| "epoch": 3.6770833333333335, |
| "grad_norm": 1.4212405681610107, |
| "learning_rate": 0.0002777776591244498, |
| "loss": 1.0286, |
| "step": 706 |
| }, |
| { |
| "epoch": 3.6822916666666665, |
| "grad_norm": 1.4557844400405884, |
| "learning_rate": 0.00027771267798776386, |
| "loss": 1.0006, |
| "step": 707 |
| }, |
| { |
| "epoch": 3.6875, |
| "grad_norm": 1.6718941926956177, |
| "learning_rate": 0.00027764760960659096, |
| "loss": 0.8912, |
| "step": 708 |
| }, |
| { |
| "epoch": 3.6927083333333335, |
| "grad_norm": 1.5094162225723267, |
| "learning_rate": 0.0002775824540253814, |
| "loss": 1.0125, |
| "step": 709 |
| }, |
| { |
| "epoch": 3.6979166666666665, |
| "grad_norm": 2.3852412700653076, |
| "learning_rate": 0.0002775172112886448, |
| "loss": 0.9906, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.703125, |
| "grad_norm": 1.990700364112854, |
| "learning_rate": 0.0002774518814409507, |
| "loss": 0.9769, |
| "step": 711 |
| }, |
| { |
| "epoch": 3.7083333333333335, |
| "grad_norm": 2.1096384525299072, |
| "learning_rate": 0.0002773864645269278, |
| "loss": 0.9344, |
| "step": 712 |
| }, |
| { |
| "epoch": 3.7135416666666665, |
| "grad_norm": 0.714316725730896, |
| "learning_rate": 0.0002773209605912644, |
| "loss": 0.9779, |
| "step": 713 |
| }, |
| { |
| "epoch": 3.71875, |
| "grad_norm": 1.1978435516357422, |
| "learning_rate": 0.00027725536967870845, |
| "loss": 0.8985, |
| "step": 714 |
| }, |
| { |
| "epoch": 3.7239583333333335, |
| "grad_norm": 1.3425475358963013, |
| "learning_rate": 0.000277189691834067, |
| "loss": 0.8932, |
| "step": 715 |
| }, |
| { |
| "epoch": 3.7291666666666665, |
| "grad_norm": 1.166999340057373, |
| "learning_rate": 0.0002771239271022066, |
| "loss": 0.911, |
| "step": 716 |
| }, |
| { |
| "epoch": 3.734375, |
| "grad_norm": 0.8004557490348816, |
| "learning_rate": 0.0002770580755280532, |
| "loss": 0.9562, |
| "step": 717 |
| }, |
| { |
| "epoch": 3.7395833333333335, |
| "grad_norm": 1.3408677577972412, |
| "learning_rate": 0.00027699213715659207, |
| "loss": 0.9858, |
| "step": 718 |
| }, |
| { |
| "epoch": 3.7447916666666665, |
| "grad_norm": 0.9547677040100098, |
| "learning_rate": 0.0002769261120328678, |
| "loss": 0.9269, |
| "step": 719 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 1.4744911193847656, |
| "learning_rate": 0.0002768600002019841, |
| "loss": 1.1146, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.7552083333333335, |
| "grad_norm": 1.4306371212005615, |
| "learning_rate": 0.00027679380170910406, |
| "loss": 0.8572, |
| "step": 721 |
| }, |
| { |
| "epoch": 3.7604166666666665, |
| "grad_norm": 1.7378649711608887, |
| "learning_rate": 0.00027672751659944995, |
| "loss": 0.8961, |
| "step": 722 |
| }, |
| { |
| "epoch": 3.765625, |
| "grad_norm": 2.21596097946167, |
| "learning_rate": 0.0002766611449183031, |
| "loss": 0.8827, |
| "step": 723 |
| }, |
| { |
| "epoch": 3.7708333333333335, |
| "grad_norm": 1.228125810623169, |
| "learning_rate": 0.00027659468671100416, |
| "loss": 0.9782, |
| "step": 724 |
| }, |
| { |
| "epoch": 3.7760416666666665, |
| "grad_norm": 1.9838706254959106, |
| "learning_rate": 0.0002765281420229527, |
| "loss": 1.0777, |
| "step": 725 |
| }, |
| { |
| "epoch": 3.78125, |
| "grad_norm": 1.301526427268982, |
| "learning_rate": 0.00027646151089960754, |
| "loss": 0.9499, |
| "step": 726 |
| }, |
| { |
| "epoch": 3.7864583333333335, |
| "grad_norm": 2.4068593978881836, |
| "learning_rate": 0.0002763947933864864, |
| "loss": 0.9324, |
| "step": 727 |
| }, |
| { |
| "epoch": 3.7916666666666665, |
| "grad_norm": 0.9724470376968384, |
| "learning_rate": 0.0002763279895291661, |
| "loss": 0.8965, |
| "step": 728 |
| }, |
| { |
| "epoch": 3.796875, |
| "grad_norm": 1.34438955783844, |
| "learning_rate": 0.0002762610993732824, |
| "loss": 0.9183, |
| "step": 729 |
| }, |
| { |
| "epoch": 3.8020833333333335, |
| "grad_norm": 0.9990777969360352, |
| "learning_rate": 0.0002761941229645301, |
| "loss": 0.9567, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.8072916666666665, |
| "grad_norm": 1.665177822113037, |
| "learning_rate": 0.00027612706034866284, |
| "loss": 1.0613, |
| "step": 731 |
| }, |
| { |
| "epoch": 3.8125, |
| "grad_norm": 1.087782859802246, |
| "learning_rate": 0.00027605991157149314, |
| "loss": 1.0609, |
| "step": 732 |
| }, |
| { |
| "epoch": 3.8177083333333335, |
| "grad_norm": 1.1333589553833008, |
| "learning_rate": 0.00027599267667889247, |
| "loss": 1.0084, |
| "step": 733 |
| }, |
| { |
| "epoch": 3.8229166666666665, |
| "grad_norm": 1.3271658420562744, |
| "learning_rate": 0.00027592535571679093, |
| "loss": 0.9724, |
| "step": 734 |
| }, |
| { |
| "epoch": 3.828125, |
| "grad_norm": 0.9437097311019897, |
| "learning_rate": 0.0002758579487311777, |
| "loss": 0.9165, |
| "step": 735 |
| }, |
| { |
| "epoch": 3.8333333333333335, |
| "grad_norm": 1.2586958408355713, |
| "learning_rate": 0.00027579045576810055, |
| "loss": 0.9751, |
| "step": 736 |
| }, |
| { |
| "epoch": 3.8385416666666665, |
| "grad_norm": 2.036743640899658, |
| "learning_rate": 0.000275722876873666, |
| "loss": 0.9273, |
| "step": 737 |
| }, |
| { |
| "epoch": 3.84375, |
| "grad_norm": 1.4216351509094238, |
| "learning_rate": 0.0002756552120940393, |
| "loss": 0.8592, |
| "step": 738 |
| }, |
| { |
| "epoch": 3.8489583333333335, |
| "grad_norm": 1.3019397258758545, |
| "learning_rate": 0.00027558746147544427, |
| "loss": 0.8948, |
| "step": 739 |
| }, |
| { |
| "epoch": 3.8541666666666665, |
| "grad_norm": 1.5534601211547852, |
| "learning_rate": 0.00027551962506416357, |
| "loss": 1.0827, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.859375, |
| "grad_norm": 1.8556050062179565, |
| "learning_rate": 0.0002754517029065383, |
| "loss": 0.9017, |
| "step": 741 |
| }, |
| { |
| "epoch": 3.8645833333333335, |
| "grad_norm": 1.5270673036575317, |
| "learning_rate": 0.00027538369504896817, |
| "loss": 0.8828, |
| "step": 742 |
| }, |
| { |
| "epoch": 3.8697916666666665, |
| "grad_norm": 1.001419186592102, |
| "learning_rate": 0.00027531560153791154, |
| "loss": 1.0277, |
| "step": 743 |
| }, |
| { |
| "epoch": 3.875, |
| "grad_norm": 0.9761799573898315, |
| "learning_rate": 0.0002752474224198851, |
| "loss": 0.9256, |
| "step": 744 |
| }, |
| { |
| "epoch": 3.8802083333333335, |
| "grad_norm": 1.148443579673767, |
| "learning_rate": 0.0002751791577414641, |
| "loss": 0.9213, |
| "step": 745 |
| }, |
| { |
| "epoch": 3.8854166666666665, |
| "grad_norm": 2.223802328109741, |
| "learning_rate": 0.00027511080754928245, |
| "loss": 0.9062, |
| "step": 746 |
| }, |
| { |
| "epoch": 3.890625, |
| "grad_norm": 1.1964625120162964, |
| "learning_rate": 0.00027504237189003203, |
| "loss": 0.9185, |
| "step": 747 |
| }, |
| { |
| "epoch": 3.8958333333333335, |
| "grad_norm": 1.900980830192566, |
| "learning_rate": 0.0002749738508104635, |
| "loss": 1.0069, |
| "step": 748 |
| }, |
| { |
| "epoch": 3.9010416666666665, |
| "grad_norm": 1.1942070722579956, |
| "learning_rate": 0.00027490524435738576, |
| "loss": 1.0127, |
| "step": 749 |
| }, |
| { |
| "epoch": 3.90625, |
| "grad_norm": 1.3024336099624634, |
| "learning_rate": 0.00027483655257766595, |
| "loss": 0.7715, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.9114583333333335, |
| "grad_norm": 1.0629496574401855, |
| "learning_rate": 0.00027476777551822953, |
| "loss": 1.0013, |
| "step": 751 |
| }, |
| { |
| "epoch": 3.9166666666666665, |
| "grad_norm": 0.8041543960571289, |
| "learning_rate": 0.00027469891322606023, |
| "loss": 0.8916, |
| "step": 752 |
| }, |
| { |
| "epoch": 3.921875, |
| "grad_norm": 1.2700127363204956, |
| "learning_rate": 0.00027462996574820014, |
| "loss": 0.8536, |
| "step": 753 |
| }, |
| { |
| "epoch": 3.9270833333333335, |
| "grad_norm": 0.901572048664093, |
| "learning_rate": 0.00027456093313174925, |
| "loss": 0.9031, |
| "step": 754 |
| }, |
| { |
| "epoch": 3.9322916666666665, |
| "grad_norm": 1.1074095964431763, |
| "learning_rate": 0.00027449181542386593, |
| "loss": 0.9519, |
| "step": 755 |
| }, |
| { |
| "epoch": 3.9375, |
| "grad_norm": 1.086336374282837, |
| "learning_rate": 0.00027442261267176675, |
| "loss": 0.9075, |
| "step": 756 |
| }, |
| { |
| "epoch": 3.9427083333333335, |
| "grad_norm": 1.2827433347702026, |
| "learning_rate": 0.00027435332492272617, |
| "loss": 1.0517, |
| "step": 757 |
| }, |
| { |
| "epoch": 3.9479166666666665, |
| "grad_norm": 1.2783414125442505, |
| "learning_rate": 0.00027428395222407666, |
| "loss": 1.0223, |
| "step": 758 |
| }, |
| { |
| "epoch": 3.953125, |
| "grad_norm": 1.4035155773162842, |
| "learning_rate": 0.0002742144946232091, |
| "loss": 0.8735, |
| "step": 759 |
| }, |
| { |
| "epoch": 3.9583333333333335, |
| "grad_norm": 1.7555954456329346, |
| "learning_rate": 0.000274144952167572, |
| "loss": 0.8724, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.9635416666666665, |
| "grad_norm": 1.303883671760559, |
| "learning_rate": 0.00027407532490467194, |
| "loss": 0.8971, |
| "step": 761 |
| }, |
| { |
| "epoch": 3.96875, |
| "grad_norm": 0.9069563150405884, |
| "learning_rate": 0.00027400561288207356, |
| "loss": 0.8859, |
| "step": 762 |
| }, |
| { |
| "epoch": 3.9739583333333335, |
| "grad_norm": 1.987623929977417, |
| "learning_rate": 0.00027393581614739923, |
| "loss": 1.0913, |
| "step": 763 |
| }, |
| { |
| "epoch": 3.9791666666666665, |
| "grad_norm": 1.6624423265457153, |
| "learning_rate": 0.00027386593474832923, |
| "loss": 0.9434, |
| "step": 764 |
| }, |
| { |
| "epoch": 3.984375, |
| "grad_norm": 1.82593834400177, |
| "learning_rate": 0.0002737959687326018, |
| "loss": 1.0463, |
| "step": 765 |
| }, |
| { |
| "epoch": 3.9895833333333335, |
| "grad_norm": 1.6780967712402344, |
| "learning_rate": 0.00027372591814801286, |
| "loss": 1.0148, |
| "step": 766 |
| }, |
| { |
| "epoch": 3.9947916666666665, |
| "grad_norm": 1.834500789642334, |
| "learning_rate": 0.0002736557830424161, |
| "loss": 0.8677, |
| "step": 767 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 3.4374337196350098, |
| "learning_rate": 0.000273585563463723, |
| "loss": 1.1346, |
| "step": 768 |
| }, |
| { |
| "epoch": 4.005208333333333, |
| "grad_norm": 1.8154795169830322, |
| "learning_rate": 0.0002735152594599027, |
| "loss": 0.9483, |
| "step": 769 |
| }, |
| { |
| "epoch": 4.010416666666667, |
| "grad_norm": 1.0201916694641113, |
| "learning_rate": 0.00027344487107898213, |
| "loss": 0.9617, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.015625, |
| "grad_norm": 0.9306284189224243, |
| "learning_rate": 0.0002733743983690457, |
| "loss": 0.9375, |
| "step": 771 |
| }, |
| { |
| "epoch": 4.020833333333333, |
| "grad_norm": 1.2412221431732178, |
| "learning_rate": 0.00027330384137823555, |
| "loss": 1.0222, |
| "step": 772 |
| }, |
| { |
| "epoch": 4.026041666666667, |
| "grad_norm": 2.1069960594177246, |
| "learning_rate": 0.00027323320015475134, |
| "loss": 0.9485, |
| "step": 773 |
| }, |
| { |
| "epoch": 4.03125, |
| "grad_norm": 1.5242425203323364, |
| "learning_rate": 0.0002731624747468502, |
| "loss": 0.8244, |
| "step": 774 |
| }, |
| { |
| "epoch": 4.036458333333333, |
| "grad_norm": 1.4789518117904663, |
| "learning_rate": 0.00027309166520284695, |
| "loss": 0.9445, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.041666666666667, |
| "grad_norm": 1.1733978986740112, |
| "learning_rate": 0.00027302077157111376, |
| "loss": 0.8544, |
| "step": 776 |
| }, |
| { |
| "epoch": 4.046875, |
| "grad_norm": 0.7193760871887207, |
| "learning_rate": 0.00027294979390008015, |
| "loss": 0.8677, |
| "step": 777 |
| }, |
| { |
| "epoch": 4.052083333333333, |
| "grad_norm": 1.0690431594848633, |
| "learning_rate": 0.0002728787322382333, |
| "loss": 0.9182, |
| "step": 778 |
| }, |
| { |
| "epoch": 4.057291666666667, |
| "grad_norm": 1.777198076248169, |
| "learning_rate": 0.00027280758663411766, |
| "loss": 0.949, |
| "step": 779 |
| }, |
| { |
| "epoch": 4.0625, |
| "grad_norm": 1.3391640186309814, |
| "learning_rate": 0.00027273635713633487, |
| "loss": 0.9411, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.067708333333333, |
| "grad_norm": 2.0043766498565674, |
| "learning_rate": 0.0002726650437935441, |
| "loss": 1.0062, |
| "step": 781 |
| }, |
| { |
| "epoch": 4.072916666666667, |
| "grad_norm": 0.938504695892334, |
| "learning_rate": 0.00027259364665446165, |
| "loss": 0.9222, |
| "step": 782 |
| }, |
| { |
| "epoch": 4.078125, |
| "grad_norm": 1.931041955947876, |
| "learning_rate": 0.00027252216576786117, |
| "loss": 1.0085, |
| "step": 783 |
| }, |
| { |
| "epoch": 4.083333333333333, |
| "grad_norm": 2.1131203174591064, |
| "learning_rate": 0.0002724506011825734, |
| "loss": 0.8492, |
| "step": 784 |
| }, |
| { |
| "epoch": 4.088541666666667, |
| "grad_norm": 1.3868041038513184, |
| "learning_rate": 0.00027237895294748637, |
| "loss": 0.8241, |
| "step": 785 |
| }, |
| { |
| "epoch": 4.09375, |
| "grad_norm": 1.596948504447937, |
| "learning_rate": 0.0002723072211115452, |
| "loss": 0.915, |
| "step": 786 |
| }, |
| { |
| "epoch": 4.098958333333333, |
| "grad_norm": 2.1440203189849854, |
| "learning_rate": 0.00027223540572375213, |
| "loss": 0.8533, |
| "step": 787 |
| }, |
| { |
| "epoch": 4.104166666666667, |
| "grad_norm": 2.0448787212371826, |
| "learning_rate": 0.0002721635068331665, |
| "loss": 0.9393, |
| "step": 788 |
| }, |
| { |
| "epoch": 4.109375, |
| "grad_norm": 1.9799960851669312, |
| "learning_rate": 0.00027209152448890464, |
| "loss": 1.0225, |
| "step": 789 |
| }, |
| { |
| "epoch": 4.114583333333333, |
| "grad_norm": 1.896451473236084, |
| "learning_rate": 0.0002720194587401399, |
| "loss": 1.0069, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.119791666666667, |
| "grad_norm": 1.6112256050109863, |
| "learning_rate": 0.00027194730963610265, |
| "loss": 0.9561, |
| "step": 791 |
| }, |
| { |
| "epoch": 4.125, |
| "grad_norm": 1.6906410455703735, |
| "learning_rate": 0.0002718750772260802, |
| "loss": 0.9768, |
| "step": 792 |
| }, |
| { |
| "epoch": 4.130208333333333, |
| "grad_norm": 2.6631453037261963, |
| "learning_rate": 0.00027180276155941674, |
| "loss": 1.0498, |
| "step": 793 |
| }, |
| { |
| "epoch": 4.135416666666667, |
| "grad_norm": 1.336904525756836, |
| "learning_rate": 0.0002717303626855133, |
| "loss": 0.8904, |
| "step": 794 |
| }, |
| { |
| "epoch": 4.140625, |
| "grad_norm": 1.2258650064468384, |
| "learning_rate": 0.0002716578806538279, |
| "loss": 0.9772, |
| "step": 795 |
| }, |
| { |
| "epoch": 4.145833333333333, |
| "grad_norm": 1.779228925704956, |
| "learning_rate": 0.0002715853155138751, |
| "loss": 0.9333, |
| "step": 796 |
| }, |
| { |
| "epoch": 4.151041666666667, |
| "grad_norm": 0.8930888772010803, |
| "learning_rate": 0.0002715126673152265, |
| "loss": 0.8808, |
| "step": 797 |
| }, |
| { |
| "epoch": 4.15625, |
| "grad_norm": 0.8486023545265198, |
| "learning_rate": 0.00027143993610751036, |
| "loss": 0.898, |
| "step": 798 |
| }, |
| { |
| "epoch": 4.161458333333333, |
| "grad_norm": 1.5234107971191406, |
| "learning_rate": 0.0002713671219404115, |
| "loss": 0.8857, |
| "step": 799 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 1.4347233772277832, |
| "learning_rate": 0.0002712942248636716, |
| "loss": 1.0577, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "eval_f1_macro": 0.2669983502723603, |
| "eval_loss": 0.9996221661567688, |
| "eval_runtime": 5.0067, |
| "eval_samples_per_second": 611.784, |
| "eval_steps_per_second": 9.587, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.171875, |
| "grad_norm": 1.6529940366744995, |
| "learning_rate": 0.000271221244927089, |
| "loss": 1.0549, |
| "step": 801 |
| }, |
| { |
| "epoch": 4.177083333333333, |
| "grad_norm": 1.2619229555130005, |
| "learning_rate": 0.0002711481821805185, |
| "loss": 1.0252, |
| "step": 802 |
| }, |
| { |
| "epoch": 4.182291666666667, |
| "grad_norm": 1.160146951675415, |
| "learning_rate": 0.0002710750366738714, |
| "loss": 0.9957, |
| "step": 803 |
| }, |
| { |
| "epoch": 4.1875, |
| "grad_norm": 2.281104326248169, |
| "learning_rate": 0.00027100180845711586, |
| "loss": 0.9434, |
| "step": 804 |
| }, |
| { |
| "epoch": 4.192708333333333, |
| "grad_norm": 1.6052273511886597, |
| "learning_rate": 0.00027092849758027617, |
| "loss": 0.9996, |
| "step": 805 |
| }, |
| { |
| "epoch": 4.197916666666667, |
| "grad_norm": 1.332768440246582, |
| "learning_rate": 0.0002708551040934334, |
| "loss": 0.9406, |
| "step": 806 |
| }, |
| { |
| "epoch": 4.203125, |
| "grad_norm": 1.463221788406372, |
| "learning_rate": 0.00027078162804672476, |
| "loss": 0.9408, |
| "step": 807 |
| }, |
| { |
| "epoch": 4.208333333333333, |
| "grad_norm": 1.3395721912384033, |
| "learning_rate": 0.0002707080694903442, |
| "loss": 0.921, |
| "step": 808 |
| }, |
| { |
| "epoch": 4.213541666666667, |
| "grad_norm": 1.0490790605545044, |
| "learning_rate": 0.00027063442847454167, |
| "loss": 0.9658, |
| "step": 809 |
| }, |
| { |
| "epoch": 4.21875, |
| "grad_norm": 1.112535834312439, |
| "learning_rate": 0.0002705607050496238, |
| "loss": 0.983, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.223958333333333, |
| "grad_norm": 1.3234186172485352, |
| "learning_rate": 0.0002704868992659532, |
| "loss": 0.9853, |
| "step": 811 |
| }, |
| { |
| "epoch": 4.229166666666667, |
| "grad_norm": 2.1610472202301025, |
| "learning_rate": 0.00027041301117394903, |
| "loss": 0.8092, |
| "step": 812 |
| }, |
| { |
| "epoch": 4.234375, |
| "grad_norm": 1.5889304876327515, |
| "learning_rate": 0.0002703390408240864, |
| "loss": 0.9574, |
| "step": 813 |
| }, |
| { |
| "epoch": 4.239583333333333, |
| "grad_norm": 1.1762019395828247, |
| "learning_rate": 0.0002702649882668968, |
| "loss": 1.0407, |
| "step": 814 |
| }, |
| { |
| "epoch": 4.244791666666667, |
| "grad_norm": 2.3938939571380615, |
| "learning_rate": 0.00027019085355296785, |
| "loss": 0.8096, |
| "step": 815 |
| }, |
| { |
| "epoch": 4.25, |
| "grad_norm": 2.5290915966033936, |
| "learning_rate": 0.00027011663673294316, |
| "loss": 0.9597, |
| "step": 816 |
| }, |
| { |
| "epoch": 4.255208333333333, |
| "grad_norm": 2.2086853981018066, |
| "learning_rate": 0.00027004233785752273, |
| "loss": 0.9237, |
| "step": 817 |
| }, |
| { |
| "epoch": 4.260416666666667, |
| "grad_norm": 0.9818319082260132, |
| "learning_rate": 0.00026996795697746225, |
| "loss": 0.8899, |
| "step": 818 |
| }, |
| { |
| "epoch": 4.265625, |
| "grad_norm": 3.002997875213623, |
| "learning_rate": 0.0002698934941435737, |
| "loss": 0.9004, |
| "step": 819 |
| }, |
| { |
| "epoch": 4.270833333333333, |
| "grad_norm": 2.593953847885132, |
| "learning_rate": 0.0002698189494067249, |
| "loss": 0.8178, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.276041666666667, |
| "grad_norm": 1.4551466703414917, |
| "learning_rate": 0.00026974432281783977, |
| "loss": 0.9492, |
| "step": 821 |
| }, |
| { |
| "epoch": 4.28125, |
| "grad_norm": 3.0733509063720703, |
| "learning_rate": 0.00026966961442789793, |
| "loss": 1.0938, |
| "step": 822 |
| }, |
| { |
| "epoch": 4.286458333333333, |
| "grad_norm": 2.7927916049957275, |
| "learning_rate": 0.000269594824287935, |
| "loss": 0.907, |
| "step": 823 |
| }, |
| { |
| "epoch": 4.291666666666667, |
| "grad_norm": 1.4562456607818604, |
| "learning_rate": 0.0002695199524490425, |
| "loss": 0.9512, |
| "step": 824 |
| }, |
| { |
| "epoch": 4.296875, |
| "grad_norm": 1.4765658378601074, |
| "learning_rate": 0.0002694449989623676, |
| "loss": 0.914, |
| "step": 825 |
| }, |
| { |
| "epoch": 4.302083333333333, |
| "grad_norm": 1.151477575302124, |
| "learning_rate": 0.00026936996387911356, |
| "loss": 0.8183, |
| "step": 826 |
| }, |
| { |
| "epoch": 4.307291666666667, |
| "grad_norm": 0.8285448551177979, |
| "learning_rate": 0.00026929484725053893, |
| "loss": 0.8717, |
| "step": 827 |
| }, |
| { |
| "epoch": 4.3125, |
| "grad_norm": 2.533783197402954, |
| "learning_rate": 0.0002692196491279584, |
| "loss": 0.986, |
| "step": 828 |
| }, |
| { |
| "epoch": 4.317708333333333, |
| "grad_norm": 1.4099982976913452, |
| "learning_rate": 0.0002691443695627419, |
| "loss": 0.9723, |
| "step": 829 |
| }, |
| { |
| "epoch": 4.322916666666667, |
| "grad_norm": 1.2374764680862427, |
| "learning_rate": 0.0002690690086063155, |
| "loss": 0.894, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.328125, |
| "grad_norm": 0.974353551864624, |
| "learning_rate": 0.00026899356631016047, |
| "loss": 0.9183, |
| "step": 831 |
| }, |
| { |
| "epoch": 4.333333333333333, |
| "grad_norm": 1.0040624141693115, |
| "learning_rate": 0.00026891804272581376, |
| "loss": 0.9193, |
| "step": 832 |
| }, |
| { |
| "epoch": 4.338541666666667, |
| "grad_norm": 2.1607229709625244, |
| "learning_rate": 0.00026884243790486795, |
| "loss": 0.8577, |
| "step": 833 |
| }, |
| { |
| "epoch": 4.34375, |
| "grad_norm": 0.7694854736328125, |
| "learning_rate": 0.00026876675189897096, |
| "loss": 0.9513, |
| "step": 834 |
| }, |
| { |
| "epoch": 4.348958333333333, |
| "grad_norm": 0.9537512063980103, |
| "learning_rate": 0.00026869098475982623, |
| "loss": 0.8849, |
| "step": 835 |
| }, |
| { |
| "epoch": 4.354166666666667, |
| "grad_norm": 1.1201133728027344, |
| "learning_rate": 0.00026861513653919276, |
| "loss": 1.0231, |
| "step": 836 |
| }, |
| { |
| "epoch": 4.359375, |
| "grad_norm": 1.3850104808807373, |
| "learning_rate": 0.0002685392072888847, |
| "loss": 0.9652, |
| "step": 837 |
| }, |
| { |
| "epoch": 4.364583333333333, |
| "grad_norm": 1.7246252298355103, |
| "learning_rate": 0.00026846319706077176, |
| "loss": 0.9213, |
| "step": 838 |
| }, |
| { |
| "epoch": 4.369791666666667, |
| "grad_norm": 1.0635846853256226, |
| "learning_rate": 0.0002683871059067789, |
| "loss": 0.8771, |
| "step": 839 |
| }, |
| { |
| "epoch": 4.375, |
| "grad_norm": 1.0011677742004395, |
| "learning_rate": 0.0002683109338788862, |
| "loss": 0.9803, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.380208333333333, |
| "grad_norm": 1.212459921836853, |
| "learning_rate": 0.0002682346810291292, |
| "loss": 0.9864, |
| "step": 841 |
| }, |
| { |
| "epoch": 4.385416666666667, |
| "grad_norm": 1.4357210397720337, |
| "learning_rate": 0.00026815834740959865, |
| "loss": 0.9357, |
| "step": 842 |
| }, |
| { |
| "epoch": 4.390625, |
| "grad_norm": 1.5059441328048706, |
| "learning_rate": 0.00026808193307244026, |
| "loss": 0.9397, |
| "step": 843 |
| }, |
| { |
| "epoch": 4.395833333333333, |
| "grad_norm": 1.5994515419006348, |
| "learning_rate": 0.00026800543806985527, |
| "loss": 0.8611, |
| "step": 844 |
| }, |
| { |
| "epoch": 4.401041666666667, |
| "grad_norm": 2.2354719638824463, |
| "learning_rate": 0.0002679288624540996, |
| "loss": 0.9675, |
| "step": 845 |
| }, |
| { |
| "epoch": 4.40625, |
| "grad_norm": 0.8673033714294434, |
| "learning_rate": 0.0002678522062774844, |
| "loss": 0.9029, |
| "step": 846 |
| }, |
| { |
| "epoch": 4.411458333333333, |
| "grad_norm": 1.1725605726242065, |
| "learning_rate": 0.00026777546959237604, |
| "loss": 0.8396, |
| "step": 847 |
| }, |
| { |
| "epoch": 4.416666666666667, |
| "grad_norm": 0.9631534218788147, |
| "learning_rate": 0.0002676986524511956, |
| "loss": 0.9129, |
| "step": 848 |
| }, |
| { |
| "epoch": 4.421875, |
| "grad_norm": 1.285298466682434, |
| "learning_rate": 0.00026762175490641934, |
| "loss": 0.9177, |
| "step": 849 |
| }, |
| { |
| "epoch": 4.427083333333333, |
| "grad_norm": 2.1914656162261963, |
| "learning_rate": 0.00026754477701057826, |
| "loss": 0.9149, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.432291666666667, |
| "grad_norm": 2.5263001918792725, |
| "learning_rate": 0.0002674677188162584, |
| "loss": 0.9361, |
| "step": 851 |
| }, |
| { |
| "epoch": 4.4375, |
| "grad_norm": 1.41455078125, |
| "learning_rate": 0.0002673905803761006, |
| "loss": 1.0583, |
| "step": 852 |
| }, |
| { |
| "epoch": 4.442708333333333, |
| "grad_norm": 1.350877285003662, |
| "learning_rate": 0.0002673133617428005, |
| "loss": 0.9231, |
| "step": 853 |
| }, |
| { |
| "epoch": 4.447916666666667, |
| "grad_norm": 1.084396243095398, |
| "learning_rate": 0.00026723606296910857, |
| "loss": 0.8626, |
| "step": 854 |
| }, |
| { |
| "epoch": 4.453125, |
| "grad_norm": 1.2830311059951782, |
| "learning_rate": 0.0002671586841078299, |
| "loss": 0.9739, |
| "step": 855 |
| }, |
| { |
| "epoch": 4.458333333333333, |
| "grad_norm": 1.3219810724258423, |
| "learning_rate": 0.0002670812252118245, |
| "loss": 1.0191, |
| "step": 856 |
| }, |
| { |
| "epoch": 4.463541666666667, |
| "grad_norm": 1.0295839309692383, |
| "learning_rate": 0.0002670036863340069, |
| "loss": 0.7695, |
| "step": 857 |
| }, |
| { |
| "epoch": 4.46875, |
| "grad_norm": 1.6539872884750366, |
| "learning_rate": 0.00026692606752734636, |
| "loss": 0.8241, |
| "step": 858 |
| }, |
| { |
| "epoch": 4.473958333333333, |
| "grad_norm": 1.8296704292297363, |
| "learning_rate": 0.0002668483688448667, |
| "loss": 0.87, |
| "step": 859 |
| }, |
| { |
| "epoch": 4.479166666666667, |
| "grad_norm": 2.168663740158081, |
| "learning_rate": 0.00026677059033964614, |
| "loss": 1.0786, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.484375, |
| "grad_norm": 1.566535472869873, |
| "learning_rate": 0.0002666927320648177, |
| "loss": 0.9108, |
| "step": 861 |
| }, |
| { |
| "epoch": 4.489583333333333, |
| "grad_norm": 1.1221470832824707, |
| "learning_rate": 0.00026661479407356876, |
| "loss": 0.8069, |
| "step": 862 |
| }, |
| { |
| "epoch": 4.494791666666667, |
| "grad_norm": 2.2228097915649414, |
| "learning_rate": 0.0002665367764191412, |
| "loss": 0.9773, |
| "step": 863 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 1.2060190439224243, |
| "learning_rate": 0.0002664586791548313, |
| "loss": 0.905, |
| "step": 864 |
| }, |
| { |
| "epoch": 4.505208333333333, |
| "grad_norm": 1.2408952713012695, |
| "learning_rate": 0.0002663805023339897, |
| "loss": 1.0424, |
| "step": 865 |
| }, |
| { |
| "epoch": 4.510416666666667, |
| "grad_norm": 1.0498292446136475, |
| "learning_rate": 0.00026630224601002144, |
| "loss": 0.8705, |
| "step": 866 |
| }, |
| { |
| "epoch": 4.515625, |
| "grad_norm": 1.215868592262268, |
| "learning_rate": 0.0002662239102363857, |
| "loss": 0.909, |
| "step": 867 |
| }, |
| { |
| "epoch": 4.520833333333333, |
| "grad_norm": 1.3728846311569214, |
| "learning_rate": 0.0002661454950665963, |
| "loss": 0.8947, |
| "step": 868 |
| }, |
| { |
| "epoch": 4.526041666666667, |
| "grad_norm": 1.812875747680664, |
| "learning_rate": 0.00026606700055422093, |
| "loss": 1.0283, |
| "step": 869 |
| }, |
| { |
| "epoch": 4.53125, |
| "grad_norm": 3.14894437789917, |
| "learning_rate": 0.00026598842675288166, |
| "loss": 0.9897, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.536458333333333, |
| "grad_norm": 2.237990617752075, |
| "learning_rate": 0.0002659097737162547, |
| "loss": 1.0439, |
| "step": 871 |
| }, |
| { |
| "epoch": 4.541666666666667, |
| "grad_norm": 1.8150421380996704, |
| "learning_rate": 0.0002658310414980703, |
| "loss": 0.8968, |
| "step": 872 |
| }, |
| { |
| "epoch": 4.546875, |
| "grad_norm": 0.990670382976532, |
| "learning_rate": 0.0002657522301521129, |
| "loss": 0.9498, |
| "step": 873 |
| }, |
| { |
| "epoch": 4.552083333333333, |
| "grad_norm": 1.3864818811416626, |
| "learning_rate": 0.00026567333973222105, |
| "loss": 0.9168, |
| "step": 874 |
| }, |
| { |
| "epoch": 4.557291666666667, |
| "grad_norm": 1.8043999671936035, |
| "learning_rate": 0.00026559437029228714, |
| "loss": 0.9755, |
| "step": 875 |
| }, |
| { |
| "epoch": 4.5625, |
| "grad_norm": 1.129779577255249, |
| "learning_rate": 0.0002655153218862578, |
| "loss": 0.9927, |
| "step": 876 |
| }, |
| { |
| "epoch": 4.567708333333333, |
| "grad_norm": 0.7324973940849304, |
| "learning_rate": 0.0002654361945681333, |
| "loss": 0.8962, |
| "step": 877 |
| }, |
| { |
| "epoch": 4.572916666666667, |
| "grad_norm": 1.420326590538025, |
| "learning_rate": 0.00026535698839196795, |
| "loss": 0.9236, |
| "step": 878 |
| }, |
| { |
| "epoch": 4.578125, |
| "grad_norm": 1.5459980964660645, |
| "learning_rate": 0.00026527770341187, |
| "loss": 0.9044, |
| "step": 879 |
| }, |
| { |
| "epoch": 4.583333333333333, |
| "grad_norm": 2.1882777214050293, |
| "learning_rate": 0.0002651983396820015, |
| "loss": 0.9218, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.588541666666667, |
| "grad_norm": 2.0452213287353516, |
| "learning_rate": 0.00026511889725657823, |
| "loss": 0.9228, |
| "step": 881 |
| }, |
| { |
| "epoch": 4.59375, |
| "grad_norm": 3.040541410446167, |
| "learning_rate": 0.0002650393761898698, |
| "loss": 0.9779, |
| "step": 882 |
| }, |
| { |
| "epoch": 4.598958333333333, |
| "grad_norm": 1.1053589582443237, |
| "learning_rate": 0.0002649597765361995, |
| "loss": 0.9652, |
| "step": 883 |
| }, |
| { |
| "epoch": 4.604166666666667, |
| "grad_norm": 0.9193670153617859, |
| "learning_rate": 0.00026488009834994434, |
| "loss": 0.8622, |
| "step": 884 |
| }, |
| { |
| "epoch": 4.609375, |
| "grad_norm": 1.3254812955856323, |
| "learning_rate": 0.00026480034168553494, |
| "loss": 0.8447, |
| "step": 885 |
| }, |
| { |
| "epoch": 4.614583333333333, |
| "grad_norm": 1.0661706924438477, |
| "learning_rate": 0.00026472050659745547, |
| "loss": 0.9024, |
| "step": 886 |
| }, |
| { |
| "epoch": 4.619791666666667, |
| "grad_norm": 1.4004321098327637, |
| "learning_rate": 0.0002646405931402439, |
| "loss": 0.8798, |
| "step": 887 |
| }, |
| { |
| "epoch": 4.625, |
| "grad_norm": 1.630423665046692, |
| "learning_rate": 0.00026456060136849146, |
| "loss": 0.8991, |
| "step": 888 |
| }, |
| { |
| "epoch": 4.630208333333333, |
| "grad_norm": 1.3233263492584229, |
| "learning_rate": 0.0002644805313368431, |
| "loss": 0.8627, |
| "step": 889 |
| }, |
| { |
| "epoch": 4.635416666666667, |
| "grad_norm": 1.0006464719772339, |
| "learning_rate": 0.00026440038309999707, |
| "loss": 0.7917, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.640625, |
| "grad_norm": 1.1303244829177856, |
| "learning_rate": 0.00026432015671270517, |
| "loss": 0.806, |
| "step": 891 |
| }, |
| { |
| "epoch": 4.645833333333333, |
| "grad_norm": 1.5840848684310913, |
| "learning_rate": 0.0002642398522297725, |
| "loss": 1.0341, |
| "step": 892 |
| }, |
| { |
| "epoch": 4.651041666666667, |
| "grad_norm": 1.892253041267395, |
| "learning_rate": 0.00026415946970605755, |
| "loss": 1.0208, |
| "step": 893 |
| }, |
| { |
| "epoch": 4.65625, |
| "grad_norm": 1.0454318523406982, |
| "learning_rate": 0.00026407900919647216, |
| "loss": 0.8649, |
| "step": 894 |
| }, |
| { |
| "epoch": 4.661458333333333, |
| "grad_norm": 2.5079212188720703, |
| "learning_rate": 0.0002639984707559813, |
| "loss": 0.9217, |
| "step": 895 |
| }, |
| { |
| "epoch": 4.666666666666667, |
| "grad_norm": 1.2754862308502197, |
| "learning_rate": 0.0002639178544396033, |
| "loss": 1.0768, |
| "step": 896 |
| }, |
| { |
| "epoch": 4.671875, |
| "grad_norm": 1.2195018529891968, |
| "learning_rate": 0.0002638371603024097, |
| "loss": 0.8718, |
| "step": 897 |
| }, |
| { |
| "epoch": 4.677083333333333, |
| "grad_norm": 1.4819666147232056, |
| "learning_rate": 0.00026375638839952513, |
| "loss": 0.975, |
| "step": 898 |
| }, |
| { |
| "epoch": 4.682291666666667, |
| "grad_norm": 1.0100791454315186, |
| "learning_rate": 0.0002636755387861274, |
| "loss": 0.9582, |
| "step": 899 |
| }, |
| { |
| "epoch": 4.6875, |
| "grad_norm": 1.5848075151443481, |
| "learning_rate": 0.00026359461151744744, |
| "loss": 0.9135, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.6875, |
| "eval_f1_macro": 0.26475954424506526, |
| "eval_loss": 1.0087125301361084, |
| "eval_runtime": 4.937, |
| "eval_samples_per_second": 620.419, |
| "eval_steps_per_second": 9.723, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.692708333333333, |
| "grad_norm": 1.1077685356140137, |
| "learning_rate": 0.00026351360664876903, |
| "loss": 0.9264, |
| "step": 901 |
| }, |
| { |
| "epoch": 4.697916666666667, |
| "grad_norm": 1.2814671993255615, |
| "learning_rate": 0.00026343252423542923, |
| "loss": 0.903, |
| "step": 902 |
| }, |
| { |
| "epoch": 4.703125, |
| "grad_norm": 1.8769515752792358, |
| "learning_rate": 0.0002633513643328179, |
| "loss": 0.9432, |
| "step": 903 |
| }, |
| { |
| "epoch": 4.708333333333333, |
| "grad_norm": 1.1710680723190308, |
| "learning_rate": 0.00026327012699637795, |
| "loss": 0.9289, |
| "step": 904 |
| }, |
| { |
| "epoch": 4.713541666666667, |
| "grad_norm": 1.5783774852752686, |
| "learning_rate": 0.000263188812281605, |
| "loss": 0.936, |
| "step": 905 |
| }, |
| { |
| "epoch": 4.71875, |
| "grad_norm": 1.2788571119308472, |
| "learning_rate": 0.00026310742024404773, |
| "loss": 0.9259, |
| "step": 906 |
| }, |
| { |
| "epoch": 4.723958333333333, |
| "grad_norm": 1.1684014797210693, |
| "learning_rate": 0.00026302595093930756, |
| "loss": 0.9219, |
| "step": 907 |
| }, |
| { |
| "epoch": 4.729166666666667, |
| "grad_norm": 1.3754348754882812, |
| "learning_rate": 0.00026294440442303877, |
| "loss": 0.8223, |
| "step": 908 |
| }, |
| { |
| "epoch": 4.734375, |
| "grad_norm": 1.397780418395996, |
| "learning_rate": 0.0002628627807509481, |
| "loss": 0.9289, |
| "step": 909 |
| }, |
| { |
| "epoch": 4.739583333333333, |
| "grad_norm": 1.4212968349456787, |
| "learning_rate": 0.00026278107997879536, |
| "loss": 0.8858, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.744791666666667, |
| "grad_norm": 2.1302297115325928, |
| "learning_rate": 0.0002626993021623929, |
| "loss": 0.9329, |
| "step": 911 |
| }, |
| { |
| "epoch": 4.75, |
| "grad_norm": 3.523074150085449, |
| "learning_rate": 0.0002626174473576056, |
| "loss": 1.0717, |
| "step": 912 |
| }, |
| { |
| "epoch": 4.755208333333333, |
| "grad_norm": 2.0694878101348877, |
| "learning_rate": 0.00026253551562035104, |
| "loss": 1.0564, |
| "step": 913 |
| }, |
| { |
| "epoch": 4.760416666666667, |
| "grad_norm": 2.2898738384246826, |
| "learning_rate": 0.00026245350700659936, |
| "loss": 0.9263, |
| "step": 914 |
| }, |
| { |
| "epoch": 4.765625, |
| "grad_norm": 1.4025534391403198, |
| "learning_rate": 0.00026237142157237316, |
| "loss": 0.9038, |
| "step": 915 |
| }, |
| { |
| "epoch": 4.770833333333333, |
| "grad_norm": 1.2898749113082886, |
| "learning_rate": 0.00026228925937374755, |
| "loss": 0.9562, |
| "step": 916 |
| }, |
| { |
| "epoch": 4.776041666666667, |
| "grad_norm": 1.029670238494873, |
| "learning_rate": 0.0002622070204668501, |
| "loss": 0.9232, |
| "step": 917 |
| }, |
| { |
| "epoch": 4.78125, |
| "grad_norm": 2.1035585403442383, |
| "learning_rate": 0.0002621247049078606, |
| "loss": 0.8647, |
| "step": 918 |
| }, |
| { |
| "epoch": 4.786458333333333, |
| "grad_norm": 1.5297807455062866, |
| "learning_rate": 0.0002620423127530116, |
| "loss": 0.8616, |
| "step": 919 |
| }, |
| { |
| "epoch": 4.791666666666667, |
| "grad_norm": 1.1556665897369385, |
| "learning_rate": 0.0002619598440585876, |
| "loss": 0.8326, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.796875, |
| "grad_norm": 1.4606956243515015, |
| "learning_rate": 0.0002618772988809255, |
| "loss": 0.8535, |
| "step": 921 |
| }, |
| { |
| "epoch": 4.802083333333333, |
| "grad_norm": 0.9597684741020203, |
| "learning_rate": 0.0002617946772764145, |
| "loss": 0.8545, |
| "step": 922 |
| }, |
| { |
| "epoch": 4.807291666666667, |
| "grad_norm": 0.9675697684288025, |
| "learning_rate": 0.000261711979301496, |
| "loss": 0.9406, |
| "step": 923 |
| }, |
| { |
| "epoch": 4.8125, |
| "grad_norm": 1.1574293375015259, |
| "learning_rate": 0.0002616292050126635, |
| "loss": 0.8523, |
| "step": 924 |
| }, |
| { |
| "epoch": 4.817708333333333, |
| "grad_norm": 1.1320936679840088, |
| "learning_rate": 0.0002615463544664627, |
| "loss": 1.015, |
| "step": 925 |
| }, |
| { |
| "epoch": 4.822916666666667, |
| "grad_norm": 1.63894522190094, |
| "learning_rate": 0.0002614634277194914, |
| "loss": 0.902, |
| "step": 926 |
| }, |
| { |
| "epoch": 4.828125, |
| "grad_norm": 0.9774540662765503, |
| "learning_rate": 0.0002613804248283995, |
| "loss": 0.9172, |
| "step": 927 |
| }, |
| { |
| "epoch": 4.833333333333333, |
| "grad_norm": 1.8441598415374756, |
| "learning_rate": 0.0002612973458498886, |
| "loss": 0.889, |
| "step": 928 |
| }, |
| { |
| "epoch": 4.838541666666667, |
| "grad_norm": 1.9038026332855225, |
| "learning_rate": 0.0002612141908407128, |
| "loss": 0.8876, |
| "step": 929 |
| }, |
| { |
| "epoch": 4.84375, |
| "grad_norm": 1.0927232503890991, |
| "learning_rate": 0.00026113095985767776, |
| "loss": 0.8974, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.848958333333333, |
| "grad_norm": 0.8928850293159485, |
| "learning_rate": 0.00026104765295764106, |
| "loss": 0.9338, |
| "step": 931 |
| }, |
| { |
| "epoch": 4.854166666666667, |
| "grad_norm": 0.8173729181289673, |
| "learning_rate": 0.00026096427019751234, |
| "loss": 0.9222, |
| "step": 932 |
| }, |
| { |
| "epoch": 4.859375, |
| "grad_norm": 0.859594464302063, |
| "learning_rate": 0.00026088081163425287, |
| "loss": 0.8669, |
| "step": 933 |
| }, |
| { |
| "epoch": 4.864583333333333, |
| "grad_norm": 2.029209613800049, |
| "learning_rate": 0.00026079727732487585, |
| "loss": 0.9163, |
| "step": 934 |
| }, |
| { |
| "epoch": 4.869791666666667, |
| "grad_norm": 1.0107686519622803, |
| "learning_rate": 0.0002607136673264461, |
| "loss": 0.9548, |
| "step": 935 |
| }, |
| { |
| "epoch": 4.875, |
| "grad_norm": 1.430413007736206, |
| "learning_rate": 0.00026062998169608024, |
| "loss": 0.9465, |
| "step": 936 |
| }, |
| { |
| "epoch": 4.880208333333333, |
| "grad_norm": 0.7445321083068848, |
| "learning_rate": 0.0002605462204909465, |
| "loss": 0.7932, |
| "step": 937 |
| }, |
| { |
| "epoch": 4.885416666666667, |
| "grad_norm": 1.2740421295166016, |
| "learning_rate": 0.00026046238376826466, |
| "loss": 0.7722, |
| "step": 938 |
| }, |
| { |
| "epoch": 4.890625, |
| "grad_norm": 1.5232279300689697, |
| "learning_rate": 0.00026037847158530636, |
| "loss": 0.9532, |
| "step": 939 |
| }, |
| { |
| "epoch": 4.895833333333333, |
| "grad_norm": 1.110803484916687, |
| "learning_rate": 0.0002602944839993945, |
| "loss": 1.0394, |
| "step": 940 |
| }, |
| { |
| "epoch": 4.901041666666667, |
| "grad_norm": 1.2615315914154053, |
| "learning_rate": 0.0002602104210679036, |
| "loss": 0.9285, |
| "step": 941 |
| }, |
| { |
| "epoch": 4.90625, |
| "grad_norm": 0.9494742155075073, |
| "learning_rate": 0.0002601262828482597, |
| "loss": 0.9952, |
| "step": 942 |
| }, |
| { |
| "epoch": 4.911458333333333, |
| "grad_norm": 1.2447307109832764, |
| "learning_rate": 0.0002600420693979402, |
| "loss": 1.0099, |
| "step": 943 |
| }, |
| { |
| "epoch": 4.916666666666667, |
| "grad_norm": 1.4909299612045288, |
| "learning_rate": 0.0002599577807744739, |
| "loss": 0.8191, |
| "step": 944 |
| }, |
| { |
| "epoch": 4.921875, |
| "grad_norm": 1.1714874505996704, |
| "learning_rate": 0.00025987341703544105, |
| "loss": 0.9043, |
| "step": 945 |
| }, |
| { |
| "epoch": 4.927083333333333, |
| "grad_norm": 1.2767671346664429, |
| "learning_rate": 0.00025978897823847303, |
| "loss": 0.7895, |
| "step": 946 |
| }, |
| { |
| "epoch": 4.932291666666667, |
| "grad_norm": 0.9500859975814819, |
| "learning_rate": 0.00025970446444125263, |
| "loss": 0.9293, |
| "step": 947 |
| }, |
| { |
| "epoch": 4.9375, |
| "grad_norm": 1.855484127998352, |
| "learning_rate": 0.00025961987570151385, |
| "loss": 0.8971, |
| "step": 948 |
| }, |
| { |
| "epoch": 4.942708333333333, |
| "grad_norm": 1.549333930015564, |
| "learning_rate": 0.00025953521207704194, |
| "loss": 0.9118, |
| "step": 949 |
| }, |
| { |
| "epoch": 4.947916666666667, |
| "grad_norm": 0.9645236730575562, |
| "learning_rate": 0.0002594504736256731, |
| "loss": 0.958, |
| "step": 950 |
| }, |
| { |
| "epoch": 4.953125, |
| "grad_norm": 1.479516863822937, |
| "learning_rate": 0.00025936566040529497, |
| "loss": 0.9518, |
| "step": 951 |
| }, |
| { |
| "epoch": 4.958333333333333, |
| "grad_norm": 0.9501330852508545, |
| "learning_rate": 0.00025928077247384593, |
| "loss": 0.9265, |
| "step": 952 |
| }, |
| { |
| "epoch": 4.963541666666667, |
| "grad_norm": 1.8942593336105347, |
| "learning_rate": 0.00025919580988931565, |
| "loss": 1.0224, |
| "step": 953 |
| }, |
| { |
| "epoch": 4.96875, |
| "grad_norm": 1.112295150756836, |
| "learning_rate": 0.00025911077270974466, |
| "loss": 0.9095, |
| "step": 954 |
| }, |
| { |
| "epoch": 4.973958333333333, |
| "grad_norm": 1.9376155138015747, |
| "learning_rate": 0.00025902566099322453, |
| "loss": 0.9888, |
| "step": 955 |
| }, |
| { |
| "epoch": 4.979166666666667, |
| "grad_norm": 2.250056505203247, |
| "learning_rate": 0.00025894047479789765, |
| "loss": 0.9613, |
| "step": 956 |
| }, |
| { |
| "epoch": 4.984375, |
| "grad_norm": 1.2937275171279907, |
| "learning_rate": 0.00025885521418195744, |
| "loss": 0.8892, |
| "step": 957 |
| }, |
| { |
| "epoch": 4.989583333333333, |
| "grad_norm": 1.174055576324463, |
| "learning_rate": 0.000258769879203648, |
| "loss": 0.9636, |
| "step": 958 |
| }, |
| { |
| "epoch": 4.994791666666667, |
| "grad_norm": 1.508382797241211, |
| "learning_rate": 0.0002586844699212643, |
| "loss": 0.9901, |
| "step": 959 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 4.150700569152832, |
| "learning_rate": 0.0002585989863931521, |
| "loss": 0.9974, |
| "step": 960 |
| }, |
| { |
| "epoch": 5.005208333333333, |
| "grad_norm": 1.2563892602920532, |
| "learning_rate": 0.00025851342867770784, |
| "loss": 0.8135, |
| "step": 961 |
| }, |
| { |
| "epoch": 5.010416666666667, |
| "grad_norm": 1.886023998260498, |
| "learning_rate": 0.00025842779683337856, |
| "loss": 0.9077, |
| "step": 962 |
| }, |
| { |
| "epoch": 5.015625, |
| "grad_norm": 0.895328938961029, |
| "learning_rate": 0.0002583420909186622, |
| "loss": 0.9189, |
| "step": 963 |
| }, |
| { |
| "epoch": 5.020833333333333, |
| "grad_norm": 1.5237576961517334, |
| "learning_rate": 0.00025825631099210696, |
| "loss": 0.9261, |
| "step": 964 |
| }, |
| { |
| "epoch": 5.026041666666667, |
| "grad_norm": 1.2115793228149414, |
| "learning_rate": 0.0002581704571123118, |
| "loss": 0.9329, |
| "step": 965 |
| }, |
| { |
| "epoch": 5.03125, |
| "grad_norm": 2.261406660079956, |
| "learning_rate": 0.00025808452933792625, |
| "loss": 0.9505, |
| "step": 966 |
| }, |
| { |
| "epoch": 5.036458333333333, |
| "grad_norm": 0.9326643943786621, |
| "learning_rate": 0.00025799852772765016, |
| "loss": 0.8384, |
| "step": 967 |
| }, |
| { |
| "epoch": 5.041666666666667, |
| "grad_norm": 2.723942756652832, |
| "learning_rate": 0.00025791245234023396, |
| "loss": 0.9183, |
| "step": 968 |
| }, |
| { |
| "epoch": 5.046875, |
| "grad_norm": 2.4416637420654297, |
| "learning_rate": 0.00025782630323447833, |
| "loss": 0.9415, |
| "step": 969 |
| }, |
| { |
| "epoch": 5.052083333333333, |
| "grad_norm": 1.8465089797973633, |
| "learning_rate": 0.0002577400804692344, |
| "loss": 0.9497, |
| "step": 970 |
| }, |
| { |
| "epoch": 5.057291666666667, |
| "grad_norm": 3.513157606124878, |
| "learning_rate": 0.0002576537841034037, |
| "loss": 0.9849, |
| "step": 971 |
| }, |
| { |
| "epoch": 5.0625, |
| "grad_norm": 2.239727020263672, |
| "learning_rate": 0.00025756741419593785, |
| "loss": 0.9539, |
| "step": 972 |
| }, |
| { |
| "epoch": 5.067708333333333, |
| "grad_norm": 1.5752499103546143, |
| "learning_rate": 0.00025748097080583886, |
| "loss": 0.9912, |
| "step": 973 |
| }, |
| { |
| "epoch": 5.072916666666667, |
| "grad_norm": 1.9222862720489502, |
| "learning_rate": 0.0002573944539921589, |
| "loss": 0.9494, |
| "step": 974 |
| }, |
| { |
| "epoch": 5.078125, |
| "grad_norm": 2.7157843112945557, |
| "learning_rate": 0.0002573078638140003, |
| "loss": 0.9198, |
| "step": 975 |
| }, |
| { |
| "epoch": 5.083333333333333, |
| "grad_norm": 1.8254926204681396, |
| "learning_rate": 0.0002572212003305154, |
| "loss": 0.8879, |
| "step": 976 |
| }, |
| { |
| "epoch": 5.088541666666667, |
| "grad_norm": 1.0631786584854126, |
| "learning_rate": 0.0002571344636009068, |
| "loss": 0.8764, |
| "step": 977 |
| }, |
| { |
| "epoch": 5.09375, |
| "grad_norm": 1.9443947076797485, |
| "learning_rate": 0.000257047653684427, |
| "loss": 0.951, |
| "step": 978 |
| }, |
| { |
| "epoch": 5.098958333333333, |
| "grad_norm": 0.9997848868370056, |
| "learning_rate": 0.0002569607706403786, |
| "loss": 0.8579, |
| "step": 979 |
| }, |
| { |
| "epoch": 5.104166666666667, |
| "grad_norm": 1.6596300601959229, |
| "learning_rate": 0.00025687381452811403, |
| "loss": 0.9918, |
| "step": 980 |
| }, |
| { |
| "epoch": 5.109375, |
| "grad_norm": 1.5508747100830078, |
| "learning_rate": 0.00025678678540703574, |
| "loss": 0.9811, |
| "step": 981 |
| }, |
| { |
| "epoch": 5.114583333333333, |
| "grad_norm": 1.3492475748062134, |
| "learning_rate": 0.000256699683336596, |
| "loss": 0.9226, |
| "step": 982 |
| }, |
| { |
| "epoch": 5.119791666666667, |
| "grad_norm": 2.3414571285247803, |
| "learning_rate": 0.000256612508376297, |
| "loss": 0.8925, |
| "step": 983 |
| }, |
| { |
| "epoch": 5.125, |
| "grad_norm": 1.80460524559021, |
| "learning_rate": 0.0002565252605856905, |
| "loss": 0.9618, |
| "step": 984 |
| }, |
| { |
| "epoch": 5.130208333333333, |
| "grad_norm": 1.1025346517562866, |
| "learning_rate": 0.00025643794002437836, |
| "loss": 0.8263, |
| "step": 985 |
| }, |
| { |
| "epoch": 5.135416666666667, |
| "grad_norm": 2.011056423187256, |
| "learning_rate": 0.00025635054675201185, |
| "loss": 0.8202, |
| "step": 986 |
| }, |
| { |
| "epoch": 5.140625, |
| "grad_norm": 1.703884243965149, |
| "learning_rate": 0.000256263080828292, |
| "loss": 0.9576, |
| "step": 987 |
| }, |
| { |
| "epoch": 5.145833333333333, |
| "grad_norm": 1.3033201694488525, |
| "learning_rate": 0.00025617554231296963, |
| "loss": 0.9364, |
| "step": 988 |
| }, |
| { |
| "epoch": 5.151041666666667, |
| "grad_norm": 2.1112852096557617, |
| "learning_rate": 0.0002560879312658448, |
| "loss": 1.0086, |
| "step": 989 |
| }, |
| { |
| "epoch": 5.15625, |
| "grad_norm": 1.007615566253662, |
| "learning_rate": 0.0002560002477467676, |
| "loss": 0.8669, |
| "step": 990 |
| }, |
| { |
| "epoch": 5.161458333333333, |
| "grad_norm": 0.9610044360160828, |
| "learning_rate": 0.00025591249181563706, |
| "loss": 0.8689, |
| "step": 991 |
| }, |
| { |
| "epoch": 5.166666666666667, |
| "grad_norm": 2.0266001224517822, |
| "learning_rate": 0.0002558246635324022, |
| "loss": 0.9621, |
| "step": 992 |
| }, |
| { |
| "epoch": 5.171875, |
| "grad_norm": 1.2469562292099, |
| "learning_rate": 0.00025573676295706114, |
| "loss": 0.8902, |
| "step": 993 |
| }, |
| { |
| "epoch": 5.177083333333333, |
| "grad_norm": 0.8878538012504578, |
| "learning_rate": 0.00025564879014966155, |
| "loss": 0.8215, |
| "step": 994 |
| }, |
| { |
| "epoch": 5.182291666666667, |
| "grad_norm": 1.1323845386505127, |
| "learning_rate": 0.00025556074517030025, |
| "loss": 0.9481, |
| "step": 995 |
| }, |
| { |
| "epoch": 5.1875, |
| "grad_norm": 1.1482610702514648, |
| "learning_rate": 0.00025547262807912367, |
| "loss": 0.8794, |
| "step": 996 |
| }, |
| { |
| "epoch": 5.192708333333333, |
| "grad_norm": 1.84371817111969, |
| "learning_rate": 0.0002553844389363272, |
| "loss": 1.1111, |
| "step": 997 |
| }, |
| { |
| "epoch": 5.197916666666667, |
| "grad_norm": 2.745741367340088, |
| "learning_rate": 0.00025529617780215567, |
| "loss": 0.8846, |
| "step": 998 |
| }, |
| { |
| "epoch": 5.203125, |
| "grad_norm": 1.4134106636047363, |
| "learning_rate": 0.00025520784473690285, |
| "loss": 0.7982, |
| "step": 999 |
| }, |
| { |
| "epoch": 5.208333333333333, |
| "grad_norm": 1.1539026498794556, |
| "learning_rate": 0.00025511943980091194, |
| "loss": 0.9399, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.208333333333333, |
| "eval_f1_macro": 0.2672543694405039, |
| "eval_loss": 0.9960449934005737, |
| "eval_runtime": 4.9771, |
| "eval_samples_per_second": 615.418, |
| "eval_steps_per_second": 9.644, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.213541666666667, |
| "grad_norm": 1.2849938869476318, |
| "learning_rate": 0.000255030963054575, |
| "loss": 0.9698, |
| "step": 1001 |
| }, |
| { |
| "epoch": 5.21875, |
| "grad_norm": 1.4743467569351196, |
| "learning_rate": 0.0002549424145583333, |
| "loss": 0.9717, |
| "step": 1002 |
| }, |
| { |
| "epoch": 5.223958333333333, |
| "grad_norm": 1.3506722450256348, |
| "learning_rate": 0.000254853794372677, |
| "loss": 0.9177, |
| "step": 1003 |
| }, |
| { |
| "epoch": 5.229166666666667, |
| "grad_norm": 1.2336499691009521, |
| "learning_rate": 0.00025476510255814536, |
| "loss": 0.9103, |
| "step": 1004 |
| }, |
| { |
| "epoch": 5.234375, |
| "grad_norm": 1.1146384477615356, |
| "learning_rate": 0.00025467633917532646, |
| "loss": 0.9585, |
| "step": 1005 |
| }, |
| { |
| "epoch": 5.239583333333333, |
| "grad_norm": 1.0190068483352661, |
| "learning_rate": 0.00025458750428485733, |
| "loss": 0.8771, |
| "step": 1006 |
| }, |
| { |
| "epoch": 5.244791666666667, |
| "grad_norm": 1.3294785022735596, |
| "learning_rate": 0.0002544985979474238, |
| "loss": 0.9204, |
| "step": 1007 |
| }, |
| { |
| "epoch": 5.25, |
| "grad_norm": 0.9086825251579285, |
| "learning_rate": 0.0002544096202237606, |
| "loss": 0.9739, |
| "step": 1008 |
| }, |
| { |
| "epoch": 5.255208333333333, |
| "grad_norm": 0.9365246295928955, |
| "learning_rate": 0.0002543205711746512, |
| "loss": 0.8514, |
| "step": 1009 |
| }, |
| { |
| "epoch": 5.260416666666667, |
| "grad_norm": 0.9531471133232117, |
| "learning_rate": 0.00025423145086092764, |
| "loss": 0.8587, |
| "step": 1010 |
| }, |
| { |
| "epoch": 5.265625, |
| "grad_norm": 1.5030970573425293, |
| "learning_rate": 0.00025414225934347086, |
| "loss": 0.9598, |
| "step": 1011 |
| }, |
| { |
| "epoch": 5.270833333333333, |
| "grad_norm": 1.0795291662216187, |
| "learning_rate": 0.0002540529966832103, |
| "loss": 0.9194, |
| "step": 1012 |
| }, |
| { |
| "epoch": 5.276041666666667, |
| "grad_norm": 1.1357522010803223, |
| "learning_rate": 0.00025396366294112415, |
| "loss": 0.9268, |
| "step": 1013 |
| }, |
| { |
| "epoch": 5.28125, |
| "grad_norm": 1.2575525045394897, |
| "learning_rate": 0.0002538742581782389, |
| "loss": 0.8503, |
| "step": 1014 |
| }, |
| { |
| "epoch": 5.286458333333333, |
| "grad_norm": 1.264581561088562, |
| "learning_rate": 0.00025378478245562986, |
| "loss": 0.9483, |
| "step": 1015 |
| }, |
| { |
| "epoch": 5.291666666666667, |
| "grad_norm": 1.139939546585083, |
| "learning_rate": 0.0002536952358344206, |
| "loss": 0.9264, |
| "step": 1016 |
| }, |
| { |
| "epoch": 5.296875, |
| "grad_norm": 1.4487439393997192, |
| "learning_rate": 0.0002536056183757831, |
| "loss": 0.8099, |
| "step": 1017 |
| }, |
| { |
| "epoch": 5.302083333333333, |
| "grad_norm": 0.9167041778564453, |
| "learning_rate": 0.000253515930140938, |
| "loss": 0.9347, |
| "step": 1018 |
| }, |
| { |
| "epoch": 5.307291666666667, |
| "grad_norm": 1.6278972625732422, |
| "learning_rate": 0.000253426171191154, |
| "loss": 0.9785, |
| "step": 1019 |
| }, |
| { |
| "epoch": 5.3125, |
| "grad_norm": 1.0470623970031738, |
| "learning_rate": 0.00025333634158774827, |
| "loss": 0.824, |
| "step": 1020 |
| }, |
| { |
| "epoch": 5.317708333333333, |
| "grad_norm": 2.160714864730835, |
| "learning_rate": 0.0002532464413920862, |
| "loss": 1.0169, |
| "step": 1021 |
| }, |
| { |
| "epoch": 5.322916666666667, |
| "grad_norm": 1.2980526685714722, |
| "learning_rate": 0.0002531564706655813, |
| "loss": 0.9489, |
| "step": 1022 |
| }, |
| { |
| "epoch": 5.328125, |
| "grad_norm": 1.1453959941864014, |
| "learning_rate": 0.0002530664294696955, |
| "loss": 0.9824, |
| "step": 1023 |
| }, |
| { |
| "epoch": 5.333333333333333, |
| "grad_norm": 1.6749076843261719, |
| "learning_rate": 0.0002529763178659386, |
| "loss": 1.0469, |
| "step": 1024 |
| }, |
| { |
| "epoch": 5.338541666666667, |
| "grad_norm": 1.8919302225112915, |
| "learning_rate": 0.00025288613591586875, |
| "loss": 1.0054, |
| "step": 1025 |
| }, |
| { |
| "epoch": 5.34375, |
| "grad_norm": 1.1139763593673706, |
| "learning_rate": 0.000252795883681092, |
| "loss": 0.911, |
| "step": 1026 |
| }, |
| { |
| "epoch": 5.348958333333333, |
| "grad_norm": 1.5120837688446045, |
| "learning_rate": 0.00025270556122326235, |
| "loss": 0.8728, |
| "step": 1027 |
| }, |
| { |
| "epoch": 5.354166666666667, |
| "grad_norm": 1.2393710613250732, |
| "learning_rate": 0.00025261516860408196, |
| "loss": 0.885, |
| "step": 1028 |
| }, |
| { |
| "epoch": 5.359375, |
| "grad_norm": 0.8873183727264404, |
| "learning_rate": 0.00025252470588530085, |
| "loss": 0.8163, |
| "step": 1029 |
| }, |
| { |
| "epoch": 5.364583333333333, |
| "grad_norm": 0.9413037300109863, |
| "learning_rate": 0.0002524341731287168, |
| "loss": 0.9343, |
| "step": 1030 |
| }, |
| { |
| "epoch": 5.369791666666667, |
| "grad_norm": 1.7422375679016113, |
| "learning_rate": 0.0002523435703961757, |
| "loss": 0.8609, |
| "step": 1031 |
| }, |
| { |
| "epoch": 5.375, |
| "grad_norm": 1.1480616331100464, |
| "learning_rate": 0.00025225289774957093, |
| "loss": 0.8038, |
| "step": 1032 |
| }, |
| { |
| "epoch": 5.380208333333333, |
| "grad_norm": 1.4056084156036377, |
| "learning_rate": 0.00025216215525084387, |
| "loss": 0.849, |
| "step": 1033 |
| }, |
| { |
| "epoch": 5.385416666666667, |
| "grad_norm": 1.002510666847229, |
| "learning_rate": 0.0002520713429619835, |
| "loss": 0.8602, |
| "step": 1034 |
| }, |
| { |
| "epoch": 5.390625, |
| "grad_norm": 1.5287071466445923, |
| "learning_rate": 0.00025198046094502646, |
| "loss": 0.931, |
| "step": 1035 |
| }, |
| { |
| "epoch": 5.395833333333333, |
| "grad_norm": 1.3442922830581665, |
| "learning_rate": 0.0002518895092620572, |
| "loss": 0.9477, |
| "step": 1036 |
| }, |
| { |
| "epoch": 5.401041666666667, |
| "grad_norm": 1.4497796297073364, |
| "learning_rate": 0.0002517984879752075, |
| "loss": 0.9163, |
| "step": 1037 |
| }, |
| { |
| "epoch": 5.40625, |
| "grad_norm": 1.6785835027694702, |
| "learning_rate": 0.0002517073971466569, |
| "loss": 0.8461, |
| "step": 1038 |
| }, |
| { |
| "epoch": 5.411458333333333, |
| "grad_norm": 1.2001979351043701, |
| "learning_rate": 0.0002516162368386324, |
| "loss": 0.9837, |
| "step": 1039 |
| }, |
| { |
| "epoch": 5.416666666666667, |
| "grad_norm": 1.5367956161499023, |
| "learning_rate": 0.00025152500711340833, |
| "loss": 0.8495, |
| "step": 1040 |
| }, |
| { |
| "epoch": 5.421875, |
| "grad_norm": 0.8142486810684204, |
| "learning_rate": 0.0002514337080333066, |
| "loss": 0.9412, |
| "step": 1041 |
| }, |
| { |
| "epoch": 5.427083333333333, |
| "grad_norm": 1.201979637145996, |
| "learning_rate": 0.0002513423396606965, |
| "loss": 0.8429, |
| "step": 1042 |
| }, |
| { |
| "epoch": 5.432291666666667, |
| "grad_norm": 0.9371297359466553, |
| "learning_rate": 0.0002512509020579944, |
| "loss": 0.8239, |
| "step": 1043 |
| }, |
| { |
| "epoch": 5.4375, |
| "grad_norm": 1.1644885540008545, |
| "learning_rate": 0.00025115939528766435, |
| "loss": 0.8255, |
| "step": 1044 |
| }, |
| { |
| "epoch": 5.442708333333333, |
| "grad_norm": 1.019392728805542, |
| "learning_rate": 0.0002510678194122174, |
| "loss": 0.864, |
| "step": 1045 |
| }, |
| { |
| "epoch": 5.447916666666667, |
| "grad_norm": 1.9943132400512695, |
| "learning_rate": 0.0002509761744942118, |
| "loss": 0.9573, |
| "step": 1046 |
| }, |
| { |
| "epoch": 5.453125, |
| "grad_norm": 1.090933084487915, |
| "learning_rate": 0.0002508844605962532, |
| "loss": 0.8375, |
| "step": 1047 |
| }, |
| { |
| "epoch": 5.458333333333333, |
| "grad_norm": 1.4983948469161987, |
| "learning_rate": 0.00025079267778099404, |
| "loss": 0.9617, |
| "step": 1048 |
| }, |
| { |
| "epoch": 5.463541666666667, |
| "grad_norm": 1.049011468887329, |
| "learning_rate": 0.00025070082611113406, |
| "loss": 0.9365, |
| "step": 1049 |
| }, |
| { |
| "epoch": 5.46875, |
| "grad_norm": 2.579281806945801, |
| "learning_rate": 0.00025060890564942, |
| "loss": 0.9742, |
| "step": 1050 |
| }, |
| { |
| "epoch": 5.473958333333333, |
| "grad_norm": 0.9312069416046143, |
| "learning_rate": 0.00025051691645864557, |
| "loss": 0.8272, |
| "step": 1051 |
| }, |
| { |
| "epoch": 5.479166666666667, |
| "grad_norm": 1.3110783100128174, |
| "learning_rate": 0.0002504248586016514, |
| "loss": 0.8539, |
| "step": 1052 |
| }, |
| { |
| "epoch": 5.484375, |
| "grad_norm": 1.5285732746124268, |
| "learning_rate": 0.0002503327321413251, |
| "loss": 0.9685, |
| "step": 1053 |
| }, |
| { |
| "epoch": 5.489583333333333, |
| "grad_norm": 2.023607015609741, |
| "learning_rate": 0.00025024053714060114, |
| "loss": 0.96, |
| "step": 1054 |
| }, |
| { |
| "epoch": 5.494791666666667, |
| "grad_norm": 1.2621828317642212, |
| "learning_rate": 0.0002501482736624607, |
| "loss": 0.8754, |
| "step": 1055 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 1.1877576112747192, |
| "learning_rate": 0.00025005594176993185, |
| "loss": 0.884, |
| "step": 1056 |
| }, |
| { |
| "epoch": 5.505208333333333, |
| "grad_norm": 1.0975958108901978, |
| "learning_rate": 0.0002499635415260894, |
| "loss": 0.891, |
| "step": 1057 |
| }, |
| { |
| "epoch": 5.510416666666667, |
| "grad_norm": 1.6969680786132812, |
| "learning_rate": 0.0002498710729940548, |
| "loss": 1.0012, |
| "step": 1058 |
| }, |
| { |
| "epoch": 5.515625, |
| "grad_norm": 0.9656524062156677, |
| "learning_rate": 0.00024977853623699614, |
| "loss": 1.0031, |
| "step": 1059 |
| }, |
| { |
| "epoch": 5.520833333333333, |
| "grad_norm": 1.2152016162872314, |
| "learning_rate": 0.00024968593131812817, |
| "loss": 0.8774, |
| "step": 1060 |
| }, |
| { |
| "epoch": 5.526041666666667, |
| "grad_norm": 1.943495512008667, |
| "learning_rate": 0.00024959325830071225, |
| "loss": 0.8232, |
| "step": 1061 |
| }, |
| { |
| "epoch": 5.53125, |
| "grad_norm": 1.1358082294464111, |
| "learning_rate": 0.00024950051724805607, |
| "loss": 0.9344, |
| "step": 1062 |
| }, |
| { |
| "epoch": 5.536458333333333, |
| "grad_norm": 0.9680805802345276, |
| "learning_rate": 0.00024940770822351395, |
| "loss": 1.049, |
| "step": 1063 |
| }, |
| { |
| "epoch": 5.541666666666667, |
| "grad_norm": 1.9633959531784058, |
| "learning_rate": 0.00024931483129048663, |
| "loss": 1.0505, |
| "step": 1064 |
| }, |
| { |
| "epoch": 5.546875, |
| "grad_norm": 3.208379030227661, |
| "learning_rate": 0.00024922188651242124, |
| "loss": 0.9401, |
| "step": 1065 |
| }, |
| { |
| "epoch": 5.552083333333333, |
| "grad_norm": 1.0340007543563843, |
| "learning_rate": 0.0002491288739528112, |
| "loss": 0.8382, |
| "step": 1066 |
| }, |
| { |
| "epoch": 5.557291666666667, |
| "grad_norm": 1.6783497333526611, |
| "learning_rate": 0.0002490357936751963, |
| "loss": 1.0073, |
| "step": 1067 |
| }, |
| { |
| "epoch": 5.5625, |
| "grad_norm": 2.2053964138031006, |
| "learning_rate": 0.00024894264574316254, |
| "loss": 0.9477, |
| "step": 1068 |
| }, |
| { |
| "epoch": 5.567708333333333, |
| "grad_norm": 2.396202802658081, |
| "learning_rate": 0.00024884943022034214, |
| "loss": 0.9524, |
| "step": 1069 |
| }, |
| { |
| "epoch": 5.572916666666667, |
| "grad_norm": 1.8267946243286133, |
| "learning_rate": 0.0002487561471704136, |
| "loss": 0.9304, |
| "step": 1070 |
| }, |
| { |
| "epoch": 5.578125, |
| "grad_norm": 0.7886064052581787, |
| "learning_rate": 0.00024866279665710137, |
| "loss": 0.8881, |
| "step": 1071 |
| }, |
| { |
| "epoch": 5.583333333333333, |
| "grad_norm": 1.4858075380325317, |
| "learning_rate": 0.00024856937874417613, |
| "loss": 1.035, |
| "step": 1072 |
| }, |
| { |
| "epoch": 5.588541666666667, |
| "grad_norm": 2.2485852241516113, |
| "learning_rate": 0.00024847589349545444, |
| "loss": 0.8561, |
| "step": 1073 |
| }, |
| { |
| "epoch": 5.59375, |
| "grad_norm": 1.272775650024414, |
| "learning_rate": 0.00024838234097479913, |
| "loss": 0.9152, |
| "step": 1074 |
| }, |
| { |
| "epoch": 5.598958333333333, |
| "grad_norm": 2.121262311935425, |
| "learning_rate": 0.0002482887212461187, |
| "loss": 0.9671, |
| "step": 1075 |
| }, |
| { |
| "epoch": 5.604166666666667, |
| "grad_norm": 2.268122434616089, |
| "learning_rate": 0.0002481950343733678, |
| "loss": 0.9554, |
| "step": 1076 |
| }, |
| { |
| "epoch": 5.609375, |
| "grad_norm": 1.6661125421524048, |
| "learning_rate": 0.00024810128042054673, |
| "loss": 0.905, |
| "step": 1077 |
| }, |
| { |
| "epoch": 5.614583333333333, |
| "grad_norm": 1.6562724113464355, |
| "learning_rate": 0.0002480074594517018, |
| "loss": 0.822, |
| "step": 1078 |
| }, |
| { |
| "epoch": 5.619791666666667, |
| "grad_norm": 1.1474725008010864, |
| "learning_rate": 0.0002479135715309249, |
| "loss": 0.857, |
| "step": 1079 |
| }, |
| { |
| "epoch": 5.625, |
| "grad_norm": 1.0111240148544312, |
| "learning_rate": 0.000247819616722354, |
| "loss": 0.8626, |
| "step": 1080 |
| }, |
| { |
| "epoch": 5.630208333333333, |
| "grad_norm": 1.436287760734558, |
| "learning_rate": 0.00024772559509017225, |
| "loss": 0.889, |
| "step": 1081 |
| }, |
| { |
| "epoch": 5.635416666666667, |
| "grad_norm": 1.559177041053772, |
| "learning_rate": 0.00024763150669860896, |
| "loss": 0.8913, |
| "step": 1082 |
| }, |
| { |
| "epoch": 5.640625, |
| "grad_norm": 1.0281094312667847, |
| "learning_rate": 0.00024753735161193877, |
| "loss": 0.9516, |
| "step": 1083 |
| }, |
| { |
| "epoch": 5.645833333333333, |
| "grad_norm": 0.971319854259491, |
| "learning_rate": 0.0002474431298944819, |
| "loss": 0.8675, |
| "step": 1084 |
| }, |
| { |
| "epoch": 5.651041666666667, |
| "grad_norm": 1.2818177938461304, |
| "learning_rate": 0.00024734884161060417, |
| "loss": 0.9956, |
| "step": 1085 |
| }, |
| { |
| "epoch": 5.65625, |
| "grad_norm": 1.0834110975265503, |
| "learning_rate": 0.0002472544868247168, |
| "loss": 0.9378, |
| "step": 1086 |
| }, |
| { |
| "epoch": 5.661458333333333, |
| "grad_norm": 1.6228058338165283, |
| "learning_rate": 0.0002471600656012765, |
| "loss": 0.8927, |
| "step": 1087 |
| }, |
| { |
| "epoch": 5.666666666666667, |
| "grad_norm": 1.5936682224273682, |
| "learning_rate": 0.00024706557800478524, |
| "loss": 0.8714, |
| "step": 1088 |
| }, |
| { |
| "epoch": 5.671875, |
| "grad_norm": 1.869787573814392, |
| "learning_rate": 0.00024697102409979054, |
| "loss": 0.998, |
| "step": 1089 |
| }, |
| { |
| "epoch": 5.677083333333333, |
| "grad_norm": 0.7709593772888184, |
| "learning_rate": 0.000246876403950885, |
| "loss": 0.8842, |
| "step": 1090 |
| }, |
| { |
| "epoch": 5.682291666666667, |
| "grad_norm": 1.2476056814193726, |
| "learning_rate": 0.00024678171762270665, |
| "loss": 0.9079, |
| "step": 1091 |
| }, |
| { |
| "epoch": 5.6875, |
| "grad_norm": 1.721409797668457, |
| "learning_rate": 0.00024668696517993866, |
| "loss": 1.0253, |
| "step": 1092 |
| }, |
| { |
| "epoch": 5.692708333333333, |
| "grad_norm": 2.1255621910095215, |
| "learning_rate": 0.0002465921466873093, |
| "loss": 0.8893, |
| "step": 1093 |
| }, |
| { |
| "epoch": 5.697916666666667, |
| "grad_norm": 1.2987499237060547, |
| "learning_rate": 0.0002464972622095921, |
| "loss": 0.9749, |
| "step": 1094 |
| }, |
| { |
| "epoch": 5.703125, |
| "grad_norm": 1.2073231935501099, |
| "learning_rate": 0.00024640231181160546, |
| "loss": 0.9406, |
| "step": 1095 |
| }, |
| { |
| "epoch": 5.708333333333333, |
| "grad_norm": 1.1670585870742798, |
| "learning_rate": 0.000246307295558213, |
| "loss": 0.861, |
| "step": 1096 |
| }, |
| { |
| "epoch": 5.713541666666667, |
| "grad_norm": 1.3458144664764404, |
| "learning_rate": 0.00024621221351432335, |
| "loss": 0.9064, |
| "step": 1097 |
| }, |
| { |
| "epoch": 5.71875, |
| "grad_norm": 1.1030082702636719, |
| "learning_rate": 0.0002461170657448899, |
| "loss": 0.8793, |
| "step": 1098 |
| }, |
| { |
| "epoch": 5.723958333333333, |
| "grad_norm": 2.0787160396575928, |
| "learning_rate": 0.0002460218523149111, |
| "loss": 0.9431, |
| "step": 1099 |
| }, |
| { |
| "epoch": 5.729166666666667, |
| "grad_norm": 0.9919264912605286, |
| "learning_rate": 0.0002459265732894302, |
| "loss": 0.9136, |
| "step": 1100 |
| }, |
| { |
| "epoch": 5.729166666666667, |
| "eval_f1_macro": 0.2548558332455323, |
| "eval_loss": 0.9981412291526794, |
| "eval_runtime": 5.0201, |
| "eval_samples_per_second": 610.144, |
| "eval_steps_per_second": 9.562, |
| "step": 1100 |
| }, |
| { |
| "epoch": 5.734375, |
| "grad_norm": 0.9366310238838196, |
| "learning_rate": 0.00024583122873353505, |
| "loss": 0.8214, |
| "step": 1101 |
| }, |
| { |
| "epoch": 5.739583333333333, |
| "grad_norm": 1.4609984159469604, |
| "learning_rate": 0.0002457358187123588, |
| "loss": 0.9957, |
| "step": 1102 |
| }, |
| { |
| "epoch": 5.744791666666667, |
| "grad_norm": 1.6161651611328125, |
| "learning_rate": 0.0002456403432910788, |
| "loss": 0.943, |
| "step": 1103 |
| }, |
| { |
| "epoch": 5.75, |
| "grad_norm": 2.7775988578796387, |
| "learning_rate": 0.00024554480253491726, |
| "loss": 0.907, |
| "step": 1104 |
| }, |
| { |
| "epoch": 5.755208333333333, |
| "grad_norm": 1.2367454767227173, |
| "learning_rate": 0.00024544919650914106, |
| "loss": 0.9871, |
| "step": 1105 |
| }, |
| { |
| "epoch": 5.760416666666667, |
| "grad_norm": 1.9151272773742676, |
| "learning_rate": 0.0002453535252790617, |
| "loss": 0.9976, |
| "step": 1106 |
| }, |
| { |
| "epoch": 5.765625, |
| "grad_norm": 1.1052898168563843, |
| "learning_rate": 0.0002452577889100351, |
| "loss": 0.9722, |
| "step": 1107 |
| }, |
| { |
| "epoch": 5.770833333333333, |
| "grad_norm": 0.9039381146430969, |
| "learning_rate": 0.0002451619874674617, |
| "loss": 0.9319, |
| "step": 1108 |
| }, |
| { |
| "epoch": 5.776041666666667, |
| "grad_norm": 1.1422407627105713, |
| "learning_rate": 0.0002450661210167865, |
| "loss": 0.9447, |
| "step": 1109 |
| }, |
| { |
| "epoch": 5.78125, |
| "grad_norm": 2.4576256275177, |
| "learning_rate": 0.0002449701896234988, |
| "loss": 0.9087, |
| "step": 1110 |
| }, |
| { |
| "epoch": 5.786458333333333, |
| "grad_norm": 1.7521029710769653, |
| "learning_rate": 0.0002448741933531324, |
| "loss": 0.9336, |
| "step": 1111 |
| }, |
| { |
| "epoch": 5.791666666666667, |
| "grad_norm": 1.4532365798950195, |
| "learning_rate": 0.0002447781322712652, |
| "loss": 0.9336, |
| "step": 1112 |
| }, |
| { |
| "epoch": 5.796875, |
| "grad_norm": 1.034534215927124, |
| "learning_rate": 0.00024468200644351957, |
| "loss": 0.986, |
| "step": 1113 |
| }, |
| { |
| "epoch": 5.802083333333333, |
| "grad_norm": 0.8038991689682007, |
| "learning_rate": 0.000244585815935562, |
| "loss": 0.7772, |
| "step": 1114 |
| }, |
| { |
| "epoch": 5.807291666666667, |
| "grad_norm": 1.114243984222412, |
| "learning_rate": 0.0002444895608131033, |
| "loss": 0.8602, |
| "step": 1115 |
| }, |
| { |
| "epoch": 5.8125, |
| "grad_norm": 1.8868643045425415, |
| "learning_rate": 0.0002443932411418982, |
| "loss": 0.9398, |
| "step": 1116 |
| }, |
| { |
| "epoch": 5.817708333333333, |
| "grad_norm": 1.280373454093933, |
| "learning_rate": 0.0002442968569877457, |
| "loss": 0.8921, |
| "step": 1117 |
| }, |
| { |
| "epoch": 5.822916666666667, |
| "grad_norm": 1.2436622381210327, |
| "learning_rate": 0.0002442004084164889, |
| "loss": 0.9616, |
| "step": 1118 |
| }, |
| { |
| "epoch": 5.828125, |
| "grad_norm": 1.0993311405181885, |
| "learning_rate": 0.00024410389549401468, |
| "loss": 0.9205, |
| "step": 1119 |
| }, |
| { |
| "epoch": 5.833333333333333, |
| "grad_norm": 1.6833208799362183, |
| "learning_rate": 0.000244007318286254, |
| "loss": 0.9274, |
| "step": 1120 |
| }, |
| { |
| "epoch": 5.838541666666667, |
| "grad_norm": 2.085017442703247, |
| "learning_rate": 0.00024391067685918186, |
| "loss": 0.9993, |
| "step": 1121 |
| }, |
| { |
| "epoch": 5.84375, |
| "grad_norm": 0.9897282719612122, |
| "learning_rate": 0.0002438139712788169, |
| "loss": 0.8508, |
| "step": 1122 |
| }, |
| { |
| "epoch": 5.848958333333333, |
| "grad_norm": 2.345332622528076, |
| "learning_rate": 0.00024371720161122173, |
| "loss": 0.9048, |
| "step": 1123 |
| }, |
| { |
| "epoch": 5.854166666666667, |
| "grad_norm": 2.143970489501953, |
| "learning_rate": 0.00024362036792250271, |
| "loss": 0.951, |
| "step": 1124 |
| }, |
| { |
| "epoch": 5.859375, |
| "grad_norm": 2.7110958099365234, |
| "learning_rate": 0.00024352347027881003, |
| "loss": 0.9409, |
| "step": 1125 |
| }, |
| { |
| "epoch": 5.864583333333333, |
| "grad_norm": 1.7701855897903442, |
| "learning_rate": 0.0002434265087463373, |
| "loss": 0.7844, |
| "step": 1126 |
| }, |
| { |
| "epoch": 5.869791666666667, |
| "grad_norm": 2.046635389328003, |
| "learning_rate": 0.00024332948339132206, |
| "loss": 0.883, |
| "step": 1127 |
| }, |
| { |
| "epoch": 5.875, |
| "grad_norm": 2.1631217002868652, |
| "learning_rate": 0.00024323239428004532, |
| "loss": 0.8695, |
| "step": 1128 |
| }, |
| { |
| "epoch": 5.880208333333333, |
| "grad_norm": 1.0865082740783691, |
| "learning_rate": 0.00024313524147883162, |
| "loss": 0.9873, |
| "step": 1129 |
| }, |
| { |
| "epoch": 5.885416666666667, |
| "grad_norm": 0.8010328412055969, |
| "learning_rate": 0.00024303802505404909, |
| "loss": 0.8472, |
| "step": 1130 |
| }, |
| { |
| "epoch": 5.890625, |
| "grad_norm": 1.2175791263580322, |
| "learning_rate": 0.0002429407450721093, |
| "loss": 0.9254, |
| "step": 1131 |
| }, |
| { |
| "epoch": 5.895833333333333, |
| "grad_norm": 1.2180486917495728, |
| "learning_rate": 0.0002428434015994671, |
| "loss": 0.961, |
| "step": 1132 |
| }, |
| { |
| "epoch": 5.901041666666667, |
| "grad_norm": 1.3575981855392456, |
| "learning_rate": 0.00024274599470262097, |
| "loss": 0.9724, |
| "step": 1133 |
| }, |
| { |
| "epoch": 5.90625, |
| "grad_norm": 2.1392955780029297, |
| "learning_rate": 0.0002426485244481125, |
| "loss": 0.9143, |
| "step": 1134 |
| }, |
| { |
| "epoch": 5.911458333333333, |
| "grad_norm": 1.9324779510498047, |
| "learning_rate": 0.0002425509909025267, |
| "loss": 0.8425, |
| "step": 1135 |
| }, |
| { |
| "epoch": 5.916666666666667, |
| "grad_norm": 2.003596067428589, |
| "learning_rate": 0.00024245339413249173, |
| "loss": 0.9038, |
| "step": 1136 |
| }, |
| { |
| "epoch": 5.921875, |
| "grad_norm": 0.7988989949226379, |
| "learning_rate": 0.00024235573420467894, |
| "loss": 0.8688, |
| "step": 1137 |
| }, |
| { |
| "epoch": 5.927083333333333, |
| "grad_norm": 1.4796421527862549, |
| "learning_rate": 0.00024225801118580288, |
| "loss": 0.9539, |
| "step": 1138 |
| }, |
| { |
| "epoch": 5.932291666666667, |
| "grad_norm": 2.703186273574829, |
| "learning_rate": 0.00024216022514262118, |
| "loss": 1.0689, |
| "step": 1139 |
| }, |
| { |
| "epoch": 5.9375, |
| "grad_norm": 1.6065925359725952, |
| "learning_rate": 0.00024206237614193444, |
| "loss": 0.9301, |
| "step": 1140 |
| }, |
| { |
| "epoch": 5.942708333333333, |
| "grad_norm": 1.3765863180160522, |
| "learning_rate": 0.00024196446425058646, |
| "loss": 0.9215, |
| "step": 1141 |
| }, |
| { |
| "epoch": 5.947916666666667, |
| "grad_norm": 1.8924000263214111, |
| "learning_rate": 0.00024186648953546385, |
| "loss": 0.8414, |
| "step": 1142 |
| }, |
| { |
| "epoch": 5.953125, |
| "grad_norm": 1.3952339887619019, |
| "learning_rate": 0.00024176845206349613, |
| "loss": 0.8382, |
| "step": 1143 |
| }, |
| { |
| "epoch": 5.958333333333333, |
| "grad_norm": 0.766994059085846, |
| "learning_rate": 0.00024167035190165575, |
| "loss": 0.8375, |
| "step": 1144 |
| }, |
| { |
| "epoch": 5.963541666666667, |
| "grad_norm": 1.4613276720046997, |
| "learning_rate": 0.000241572189116958, |
| "loss": 1.012, |
| "step": 1145 |
| }, |
| { |
| "epoch": 5.96875, |
| "grad_norm": 1.7389620542526245, |
| "learning_rate": 0.00024147396377646082, |
| "loss": 1.0275, |
| "step": 1146 |
| }, |
| { |
| "epoch": 5.973958333333333, |
| "grad_norm": 2.106982946395874, |
| "learning_rate": 0.00024137567594726517, |
| "loss": 0.9476, |
| "step": 1147 |
| }, |
| { |
| "epoch": 5.979166666666667, |
| "grad_norm": 0.9516728520393372, |
| "learning_rate": 0.00024127732569651433, |
| "loss": 0.9504, |
| "step": 1148 |
| }, |
| { |
| "epoch": 5.984375, |
| "grad_norm": 1.2063301801681519, |
| "learning_rate": 0.0002411789130913945, |
| "loss": 0.959, |
| "step": 1149 |
| }, |
| { |
| "epoch": 5.989583333333333, |
| "grad_norm": 1.2911386489868164, |
| "learning_rate": 0.00024108043819913438, |
| "loss": 0.9592, |
| "step": 1150 |
| }, |
| { |
| "epoch": 5.994791666666667, |
| "grad_norm": 0.9982579350471497, |
| "learning_rate": 0.00024098190108700517, |
| "loss": 0.8526, |
| "step": 1151 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.7528659105300903, |
| "learning_rate": 0.00024088330182232064, |
| "loss": 0.7813, |
| "step": 1152 |
| }, |
| { |
| "epoch": 6.005208333333333, |
| "grad_norm": 1.642162561416626, |
| "learning_rate": 0.000240784640472437, |
| "loss": 1.0182, |
| "step": 1153 |
| }, |
| { |
| "epoch": 6.010416666666667, |
| "grad_norm": 1.3873138427734375, |
| "learning_rate": 0.0002406859171047529, |
| "loss": 0.7857, |
| "step": 1154 |
| }, |
| { |
| "epoch": 6.015625, |
| "grad_norm": 2.5621554851531982, |
| "learning_rate": 0.00024058713178670932, |
| "loss": 0.9587, |
| "step": 1155 |
| }, |
| { |
| "epoch": 6.020833333333333, |
| "grad_norm": 0.7547006607055664, |
| "learning_rate": 0.0002404882845857896, |
| "loss": 0.8965, |
| "step": 1156 |
| }, |
| { |
| "epoch": 6.026041666666667, |
| "grad_norm": 0.9521669745445251, |
| "learning_rate": 0.00024038937556951926, |
| "loss": 0.8534, |
| "step": 1157 |
| }, |
| { |
| "epoch": 6.03125, |
| "grad_norm": 2.053400754928589, |
| "learning_rate": 0.00024029040480546613, |
| "loss": 0.7219, |
| "step": 1158 |
| }, |
| { |
| "epoch": 6.036458333333333, |
| "grad_norm": 1.0980910062789917, |
| "learning_rate": 0.00024019137236124026, |
| "loss": 0.9627, |
| "step": 1159 |
| }, |
| { |
| "epoch": 6.041666666666667, |
| "grad_norm": 0.9454628229141235, |
| "learning_rate": 0.0002400922783044937, |
| "loss": 0.8891, |
| "step": 1160 |
| }, |
| { |
| "epoch": 6.046875, |
| "grad_norm": 1.26895272731781, |
| "learning_rate": 0.00023999312270292077, |
| "loss": 0.8181, |
| "step": 1161 |
| }, |
| { |
| "epoch": 6.052083333333333, |
| "grad_norm": 1.211905837059021, |
| "learning_rate": 0.0002398939056242576, |
| "loss": 0.8144, |
| "step": 1162 |
| }, |
| { |
| "epoch": 6.057291666666667, |
| "grad_norm": 1.6527258157730103, |
| "learning_rate": 0.0002397946271362826, |
| "loss": 0.8133, |
| "step": 1163 |
| }, |
| { |
| "epoch": 6.0625, |
| "grad_norm": 1.2424415349960327, |
| "learning_rate": 0.00023969528730681587, |
| "loss": 0.8384, |
| "step": 1164 |
| }, |
| { |
| "epoch": 6.067708333333333, |
| "grad_norm": 1.9715627431869507, |
| "learning_rate": 0.00023959588620371954, |
| "loss": 0.9235, |
| "step": 1165 |
| }, |
| { |
| "epoch": 6.072916666666667, |
| "grad_norm": 1.286944031715393, |
| "learning_rate": 0.00023949642389489763, |
| "loss": 0.9116, |
| "step": 1166 |
| }, |
| { |
| "epoch": 6.078125, |
| "grad_norm": 1.0383280515670776, |
| "learning_rate": 0.00023939690044829588, |
| "loss": 0.9179, |
| "step": 1167 |
| }, |
| { |
| "epoch": 6.083333333333333, |
| "grad_norm": 1.4770768880844116, |
| "learning_rate": 0.00023929731593190192, |
| "loss": 1.0329, |
| "step": 1168 |
| }, |
| { |
| "epoch": 6.088541666666667, |
| "grad_norm": 2.1358981132507324, |
| "learning_rate": 0.0002391976704137449, |
| "loss": 0.9162, |
| "step": 1169 |
| }, |
| { |
| "epoch": 6.09375, |
| "grad_norm": 1.4205318689346313, |
| "learning_rate": 0.00023909796396189584, |
| "loss": 0.778, |
| "step": 1170 |
| }, |
| { |
| "epoch": 6.098958333333333, |
| "grad_norm": 2.2904787063598633, |
| "learning_rate": 0.0002389981966444673, |
| "loss": 0.8788, |
| "step": 1171 |
| }, |
| { |
| "epoch": 6.104166666666667, |
| "grad_norm": 1.2764476537704468, |
| "learning_rate": 0.0002388983685296134, |
| "loss": 0.8461, |
| "step": 1172 |
| }, |
| { |
| "epoch": 6.109375, |
| "grad_norm": 2.4351987838745117, |
| "learning_rate": 0.00023879847968552982, |
| "loss": 0.9385, |
| "step": 1173 |
| }, |
| { |
| "epoch": 6.114583333333333, |
| "grad_norm": 2.2215793132781982, |
| "learning_rate": 0.0002386985301804537, |
| "loss": 0.93, |
| "step": 1174 |
| }, |
| { |
| "epoch": 6.119791666666667, |
| "grad_norm": 1.5373567342758179, |
| "learning_rate": 0.00023859852008266372, |
| "loss": 0.8567, |
| "step": 1175 |
| }, |
| { |
| "epoch": 6.125, |
| "grad_norm": 1.2724008560180664, |
| "learning_rate": 0.0002384984494604798, |
| "loss": 0.8929, |
| "step": 1176 |
| }, |
| { |
| "epoch": 6.130208333333333, |
| "grad_norm": 1.0099635124206543, |
| "learning_rate": 0.00023839831838226333, |
| "loss": 0.9616, |
| "step": 1177 |
| }, |
| { |
| "epoch": 6.135416666666667, |
| "grad_norm": 0.8339307904243469, |
| "learning_rate": 0.00023829812691641695, |
| "loss": 0.8492, |
| "step": 1178 |
| }, |
| { |
| "epoch": 6.140625, |
| "grad_norm": 1.1188122034072876, |
| "learning_rate": 0.0002381978751313845, |
| "loss": 0.9308, |
| "step": 1179 |
| }, |
| { |
| "epoch": 6.145833333333333, |
| "grad_norm": 1.696159839630127, |
| "learning_rate": 0.00023809756309565116, |
| "loss": 0.8888, |
| "step": 1180 |
| }, |
| { |
| "epoch": 6.151041666666667, |
| "grad_norm": 1.982905626296997, |
| "learning_rate": 0.00023799719087774313, |
| "loss": 0.9193, |
| "step": 1181 |
| }, |
| { |
| "epoch": 6.15625, |
| "grad_norm": 1.5203526020050049, |
| "learning_rate": 0.00023789675854622784, |
| "loss": 0.9152, |
| "step": 1182 |
| }, |
| { |
| "epoch": 6.161458333333333, |
| "grad_norm": 2.2082104682922363, |
| "learning_rate": 0.0002377962661697136, |
| "loss": 0.9844, |
| "step": 1183 |
| }, |
| { |
| "epoch": 6.166666666666667, |
| "grad_norm": 1.4635909795761108, |
| "learning_rate": 0.00023769571381685004, |
| "loss": 0.9024, |
| "step": 1184 |
| }, |
| { |
| "epoch": 6.171875, |
| "grad_norm": 2.222090482711792, |
| "learning_rate": 0.00023759510155632743, |
| "loss": 0.9761, |
| "step": 1185 |
| }, |
| { |
| "epoch": 6.177083333333333, |
| "grad_norm": 2.24017333984375, |
| "learning_rate": 0.00023749442945687722, |
| "loss": 0.9897, |
| "step": 1186 |
| }, |
| { |
| "epoch": 6.182291666666667, |
| "grad_norm": 1.0959813594818115, |
| "learning_rate": 0.00023739369758727163, |
| "loss": 0.8902, |
| "step": 1187 |
| }, |
| { |
| "epoch": 6.1875, |
| "grad_norm": 1.6349151134490967, |
| "learning_rate": 0.00023729290601632374, |
| "loss": 0.8632, |
| "step": 1188 |
| }, |
| { |
| "epoch": 6.192708333333333, |
| "grad_norm": 1.5861648321151733, |
| "learning_rate": 0.00023719205481288728, |
| "loss": 0.9528, |
| "step": 1189 |
| }, |
| { |
| "epoch": 6.197916666666667, |
| "grad_norm": 2.0252082347869873, |
| "learning_rate": 0.00023709114404585696, |
| "loss": 0.8041, |
| "step": 1190 |
| }, |
| { |
| "epoch": 6.203125, |
| "grad_norm": 1.4111462831497192, |
| "learning_rate": 0.00023699017378416798, |
| "loss": 0.9422, |
| "step": 1191 |
| }, |
| { |
| "epoch": 6.208333333333333, |
| "grad_norm": 0.9264470934867859, |
| "learning_rate": 0.0002368891440967963, |
| "loss": 0.9044, |
| "step": 1192 |
| }, |
| { |
| "epoch": 6.213541666666667, |
| "grad_norm": 2.255669593811035, |
| "learning_rate": 0.00023678805505275834, |
| "loss": 0.9082, |
| "step": 1193 |
| }, |
| { |
| "epoch": 6.21875, |
| "grad_norm": 1.136857509613037, |
| "learning_rate": 0.00023668690672111134, |
| "loss": 0.9063, |
| "step": 1194 |
| }, |
| { |
| "epoch": 6.223958333333333, |
| "grad_norm": 1.0426369905471802, |
| "learning_rate": 0.00023658569917095267, |
| "loss": 0.8882, |
| "step": 1195 |
| }, |
| { |
| "epoch": 6.229166666666667, |
| "grad_norm": 1.6113125085830688, |
| "learning_rate": 0.0002364844324714204, |
| "loss": 0.8969, |
| "step": 1196 |
| }, |
| { |
| "epoch": 6.234375, |
| "grad_norm": 1.0691860914230347, |
| "learning_rate": 0.000236383106691693, |
| "loss": 0.8843, |
| "step": 1197 |
| }, |
| { |
| "epoch": 6.239583333333333, |
| "grad_norm": 1.4577548503875732, |
| "learning_rate": 0.00023628172190098915, |
| "loss": 0.9051, |
| "step": 1198 |
| }, |
| { |
| "epoch": 6.244791666666667, |
| "grad_norm": 0.9418482780456543, |
| "learning_rate": 0.00023618027816856807, |
| "loss": 0.8479, |
| "step": 1199 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 1.2280293703079224, |
| "learning_rate": 0.00023607877556372904, |
| "loss": 0.8549, |
| "step": 1200 |
| }, |
| { |
| "epoch": 6.25, |
| "eval_f1_macro": 0.24912647359331333, |
| "eval_loss": 1.002465844154358, |
| "eval_runtime": 4.9836, |
| "eval_samples_per_second": 614.615, |
| "eval_steps_per_second": 9.632, |
| "step": 1200 |
| }, |
| { |
| "epoch": 6.255208333333333, |
| "grad_norm": 2.894786834716797, |
| "learning_rate": 0.00023597721415581164, |
| "loss": 0.966, |
| "step": 1201 |
| }, |
| { |
| "epoch": 6.260416666666667, |
| "grad_norm": 1.7258634567260742, |
| "learning_rate": 0.0002358755940141956, |
| "loss": 0.8944, |
| "step": 1202 |
| }, |
| { |
| "epoch": 6.265625, |
| "grad_norm": 0.958251953125, |
| "learning_rate": 0.00023577391520830082, |
| "loss": 0.8618, |
| "step": 1203 |
| }, |
| { |
| "epoch": 6.270833333333333, |
| "grad_norm": 1.7301183938980103, |
| "learning_rate": 0.00023567217780758726, |
| "loss": 0.9737, |
| "step": 1204 |
| }, |
| { |
| "epoch": 6.276041666666667, |
| "grad_norm": 1.1636407375335693, |
| "learning_rate": 0.00023557038188155483, |
| "loss": 0.8897, |
| "step": 1205 |
| }, |
| { |
| "epoch": 6.28125, |
| "grad_norm": 1.6016204357147217, |
| "learning_rate": 0.0002354685274997435, |
| "loss": 0.9737, |
| "step": 1206 |
| }, |
| { |
| "epoch": 6.286458333333333, |
| "grad_norm": 1.5181108713150024, |
| "learning_rate": 0.0002353666147317332, |
| "loss": 0.8442, |
| "step": 1207 |
| }, |
| { |
| "epoch": 6.291666666666667, |
| "grad_norm": 1.3113034963607788, |
| "learning_rate": 0.00023526464364714367, |
| "loss": 0.9882, |
| "step": 1208 |
| }, |
| { |
| "epoch": 6.296875, |
| "grad_norm": 2.258310556411743, |
| "learning_rate": 0.00023516261431563444, |
| "loss": 0.922, |
| "step": 1209 |
| }, |
| { |
| "epoch": 6.302083333333333, |
| "grad_norm": 1.9025388956069946, |
| "learning_rate": 0.00023506052680690503, |
| "loss": 0.8942, |
| "step": 1210 |
| }, |
| { |
| "epoch": 6.307291666666667, |
| "grad_norm": 0.9995399117469788, |
| "learning_rate": 0.00023495838119069447, |
| "loss": 0.9115, |
| "step": 1211 |
| }, |
| { |
| "epoch": 6.3125, |
| "grad_norm": 1.2945363521575928, |
| "learning_rate": 0.00023485617753678175, |
| "loss": 0.8938, |
| "step": 1212 |
| }, |
| { |
| "epoch": 6.317708333333333, |
| "grad_norm": 1.126760482788086, |
| "learning_rate": 0.0002347539159149852, |
| "loss": 0.7741, |
| "step": 1213 |
| }, |
| { |
| "epoch": 6.322916666666667, |
| "grad_norm": 2.2126784324645996, |
| "learning_rate": 0.00023465159639516294, |
| "loss": 0.8546, |
| "step": 1214 |
| }, |
| { |
| "epoch": 6.328125, |
| "grad_norm": 1.2938756942749023, |
| "learning_rate": 0.00023454921904721265, |
| "loss": 0.9432, |
| "step": 1215 |
| }, |
| { |
| "epoch": 6.333333333333333, |
| "grad_norm": 1.702868103981018, |
| "learning_rate": 0.00023444678394107143, |
| "loss": 0.9473, |
| "step": 1216 |
| }, |
| { |
| "epoch": 6.338541666666667, |
| "grad_norm": 1.3906638622283936, |
| "learning_rate": 0.00023434429114671586, |
| "loss": 1.0177, |
| "step": 1217 |
| }, |
| { |
| "epoch": 6.34375, |
| "grad_norm": 1.9192582368850708, |
| "learning_rate": 0.00023424174073416196, |
| "loss": 0.9689, |
| "step": 1218 |
| }, |
| { |
| "epoch": 6.348958333333333, |
| "grad_norm": 2.496931552886963, |
| "learning_rate": 0.0002341391327734652, |
| "loss": 0.8639, |
| "step": 1219 |
| }, |
| { |
| "epoch": 6.354166666666667, |
| "grad_norm": 1.560662865638733, |
| "learning_rate": 0.00023403646733472007, |
| "loss": 0.8556, |
| "step": 1220 |
| }, |
| { |
| "epoch": 6.359375, |
| "grad_norm": 0.9464545845985413, |
| "learning_rate": 0.0002339337444880606, |
| "loss": 0.9902, |
| "step": 1221 |
| }, |
| { |
| "epoch": 6.364583333333333, |
| "grad_norm": 1.3904879093170166, |
| "learning_rate": 0.00023383096430365998, |
| "loss": 0.9824, |
| "step": 1222 |
| }, |
| { |
| "epoch": 6.369791666666667, |
| "grad_norm": 0.9854976534843445, |
| "learning_rate": 0.00023372812685173048, |
| "loss": 0.7763, |
| "step": 1223 |
| }, |
| { |
| "epoch": 6.375, |
| "grad_norm": 0.973736584186554, |
| "learning_rate": 0.00023362523220252357, |
| "loss": 0.9856, |
| "step": 1224 |
| }, |
| { |
| "epoch": 6.380208333333333, |
| "grad_norm": 1.1423206329345703, |
| "learning_rate": 0.0002335222804263298, |
| "loss": 0.9085, |
| "step": 1225 |
| }, |
| { |
| "epoch": 6.385416666666667, |
| "grad_norm": 0.9111782908439636, |
| "learning_rate": 0.00023341927159347867, |
| "loss": 0.8163, |
| "step": 1226 |
| }, |
| { |
| "epoch": 6.390625, |
| "grad_norm": 0.9250236749649048, |
| "learning_rate": 0.00023331620577433877, |
| "loss": 0.9144, |
| "step": 1227 |
| }, |
| { |
| "epoch": 6.395833333333333, |
| "grad_norm": 0.8134245872497559, |
| "learning_rate": 0.00023321308303931752, |
| "loss": 0.8372, |
| "step": 1228 |
| }, |
| { |
| "epoch": 6.401041666666667, |
| "grad_norm": 1.4817832708358765, |
| "learning_rate": 0.0002331099034588612, |
| "loss": 0.9303, |
| "step": 1229 |
| }, |
| { |
| "epoch": 6.40625, |
| "grad_norm": 2.273141860961914, |
| "learning_rate": 0.000233006667103455, |
| "loss": 0.9515, |
| "step": 1230 |
| }, |
| { |
| "epoch": 6.411458333333333, |
| "grad_norm": 1.6524724960327148, |
| "learning_rate": 0.000232903374043623, |
| "loss": 0.951, |
| "step": 1231 |
| }, |
| { |
| "epoch": 6.416666666666667, |
| "grad_norm": 0.9412207007408142, |
| "learning_rate": 0.00023280002434992772, |
| "loss": 0.9582, |
| "step": 1232 |
| }, |
| { |
| "epoch": 6.421875, |
| "grad_norm": 0.7923340797424316, |
| "learning_rate": 0.00023269661809297064, |
| "loss": 0.8924, |
| "step": 1233 |
| }, |
| { |
| "epoch": 6.427083333333333, |
| "grad_norm": 0.8150245547294617, |
| "learning_rate": 0.00023259315534339176, |
| "loss": 0.9236, |
| "step": 1234 |
| }, |
| { |
| "epoch": 6.432291666666667, |
| "grad_norm": 1.4531840085983276, |
| "learning_rate": 0.00023248963617186966, |
| "loss": 0.895, |
| "step": 1235 |
| }, |
| { |
| "epoch": 6.4375, |
| "grad_norm": 1.4492069482803345, |
| "learning_rate": 0.0002323860606491215, |
| "loss": 0.8539, |
| "step": 1236 |
| }, |
| { |
| "epoch": 6.442708333333333, |
| "grad_norm": 1.4099410772323608, |
| "learning_rate": 0.0002322824288459029, |
| "loss": 0.9156, |
| "step": 1237 |
| }, |
| { |
| "epoch": 6.447916666666667, |
| "grad_norm": 1.0040732622146606, |
| "learning_rate": 0.00023217874083300804, |
| "loss": 0.8495, |
| "step": 1238 |
| }, |
| { |
| "epoch": 6.453125, |
| "grad_norm": 2.835129737854004, |
| "learning_rate": 0.00023207499668126934, |
| "loss": 0.9578, |
| "step": 1239 |
| }, |
| { |
| "epoch": 6.458333333333333, |
| "grad_norm": 1.726873755455017, |
| "learning_rate": 0.0002319711964615577, |
| "loss": 0.8416, |
| "step": 1240 |
| }, |
| { |
| "epoch": 6.463541666666667, |
| "grad_norm": 2.642096996307373, |
| "learning_rate": 0.0002318673402447822, |
| "loss": 1.0474, |
| "step": 1241 |
| }, |
| { |
| "epoch": 6.46875, |
| "grad_norm": 1.2253150939941406, |
| "learning_rate": 0.00023176342810189032, |
| "loss": 0.8999, |
| "step": 1242 |
| }, |
| { |
| "epoch": 6.473958333333333, |
| "grad_norm": 2.009229898452759, |
| "learning_rate": 0.00023165946010386758, |
| "loss": 0.8705, |
| "step": 1243 |
| }, |
| { |
| "epoch": 6.479166666666667, |
| "grad_norm": 1.980753779411316, |
| "learning_rate": 0.0002315554363217378, |
| "loss": 0.9756, |
| "step": 1244 |
| }, |
| { |
| "epoch": 6.484375, |
| "grad_norm": 1.4456915855407715, |
| "learning_rate": 0.00023145135682656285, |
| "loss": 0.846, |
| "step": 1245 |
| }, |
| { |
| "epoch": 6.489583333333333, |
| "grad_norm": 1.2889485359191895, |
| "learning_rate": 0.0002313472216894426, |
| "loss": 0.9794, |
| "step": 1246 |
| }, |
| { |
| "epoch": 6.494791666666667, |
| "grad_norm": 0.9595457315444946, |
| "learning_rate": 0.000231243030981515, |
| "loss": 0.8967, |
| "step": 1247 |
| }, |
| { |
| "epoch": 6.5, |
| "grad_norm": 1.03409743309021, |
| "learning_rate": 0.00023113878477395598, |
| "loss": 0.8603, |
| "step": 1248 |
| }, |
| { |
| "epoch": 6.505208333333333, |
| "grad_norm": 1.6965584754943848, |
| "learning_rate": 0.00023103448313797936, |
| "loss": 1.0145, |
| "step": 1249 |
| }, |
| { |
| "epoch": 6.510416666666667, |
| "grad_norm": 1.6744215488433838, |
| "learning_rate": 0.00023093012614483683, |
| "loss": 0.9163, |
| "step": 1250 |
| }, |
| { |
| "epoch": 6.515625, |
| "grad_norm": 3.1192703247070312, |
| "learning_rate": 0.00023082571386581787, |
| "loss": 1.1539, |
| "step": 1251 |
| }, |
| { |
| "epoch": 6.520833333333333, |
| "grad_norm": 1.0737314224243164, |
| "learning_rate": 0.0002307212463722497, |
| "loss": 0.8066, |
| "step": 1252 |
| }, |
| { |
| "epoch": 6.526041666666667, |
| "grad_norm": 2.1446847915649414, |
| "learning_rate": 0.00023061672373549737, |
| "loss": 0.9184, |
| "step": 1253 |
| }, |
| { |
| "epoch": 6.53125, |
| "grad_norm": 2.487567663192749, |
| "learning_rate": 0.00023051214602696346, |
| "loss": 0.9173, |
| "step": 1254 |
| }, |
| { |
| "epoch": 6.536458333333333, |
| "grad_norm": 1.4534178972244263, |
| "learning_rate": 0.00023040751331808833, |
| "loss": 0.898, |
| "step": 1255 |
| }, |
| { |
| "epoch": 6.541666666666667, |
| "grad_norm": 1.562321424484253, |
| "learning_rate": 0.0002303028256803497, |
| "loss": 1.0075, |
| "step": 1256 |
| }, |
| { |
| "epoch": 6.546875, |
| "grad_norm": 1.2452698945999146, |
| "learning_rate": 0.00023019808318526307, |
| "loss": 0.8887, |
| "step": 1257 |
| }, |
| { |
| "epoch": 6.552083333333333, |
| "grad_norm": 1.877057671546936, |
| "learning_rate": 0.0002300932859043812, |
| "loss": 0.8817, |
| "step": 1258 |
| }, |
| { |
| "epoch": 6.557291666666667, |
| "grad_norm": 1.0443779230117798, |
| "learning_rate": 0.00022998843390929443, |
| "loss": 0.9228, |
| "step": 1259 |
| }, |
| { |
| "epoch": 6.5625, |
| "grad_norm": 1.441368579864502, |
| "learning_rate": 0.00022988352727163036, |
| "loss": 0.8775, |
| "step": 1260 |
| }, |
| { |
| "epoch": 6.567708333333333, |
| "grad_norm": 1.2894471883773804, |
| "learning_rate": 0.00022977856606305394, |
| "loss": 0.9157, |
| "step": 1261 |
| }, |
| { |
| "epoch": 6.572916666666667, |
| "grad_norm": 2.3620381355285645, |
| "learning_rate": 0.00022967355035526744, |
| "loss": 0.8933, |
| "step": 1262 |
| }, |
| { |
| "epoch": 6.578125, |
| "grad_norm": 2.679354667663574, |
| "learning_rate": 0.00022956848022001036, |
| "loss": 0.9288, |
| "step": 1263 |
| }, |
| { |
| "epoch": 6.583333333333333, |
| "grad_norm": 2.029309034347534, |
| "learning_rate": 0.0002294633557290594, |
| "loss": 1.0417, |
| "step": 1264 |
| }, |
| { |
| "epoch": 6.588541666666667, |
| "grad_norm": 1.4134982824325562, |
| "learning_rate": 0.00022935817695422822, |
| "loss": 0.9054, |
| "step": 1265 |
| }, |
| { |
| "epoch": 6.59375, |
| "grad_norm": 1.879136085510254, |
| "learning_rate": 0.00022925294396736786, |
| "loss": 0.7745, |
| "step": 1266 |
| }, |
| { |
| "epoch": 6.598958333333333, |
| "grad_norm": 1.5110605955123901, |
| "learning_rate": 0.00022914765684036616, |
| "loss": 0.919, |
| "step": 1267 |
| }, |
| { |
| "epoch": 6.604166666666667, |
| "grad_norm": 1.2854112386703491, |
| "learning_rate": 0.00022904231564514803, |
| "loss": 0.7909, |
| "step": 1268 |
| }, |
| { |
| "epoch": 6.609375, |
| "grad_norm": 1.588845133781433, |
| "learning_rate": 0.00022893692045367525, |
| "loss": 0.8901, |
| "step": 1269 |
| }, |
| { |
| "epoch": 6.614583333333333, |
| "grad_norm": 1.6896146535873413, |
| "learning_rate": 0.00022883147133794668, |
| "loss": 1.0004, |
| "step": 1270 |
| }, |
| { |
| "epoch": 6.619791666666667, |
| "grad_norm": 2.22658109664917, |
| "learning_rate": 0.00022872596836999773, |
| "loss": 0.9141, |
| "step": 1271 |
| }, |
| { |
| "epoch": 6.625, |
| "grad_norm": 1.8191312551498413, |
| "learning_rate": 0.00022862041162190083, |
| "loss": 0.902, |
| "step": 1272 |
| }, |
| { |
| "epoch": 6.630208333333333, |
| "grad_norm": 0.8939937353134155, |
| "learning_rate": 0.00022851480116576502, |
| "loss": 0.899, |
| "step": 1273 |
| }, |
| { |
| "epoch": 6.635416666666667, |
| "grad_norm": 1.7369438409805298, |
| "learning_rate": 0.00022840913707373612, |
| "loss": 0.9919, |
| "step": 1274 |
| }, |
| { |
| "epoch": 6.640625, |
| "grad_norm": 2.6199216842651367, |
| "learning_rate": 0.00022830341941799647, |
| "loss": 0.8542, |
| "step": 1275 |
| }, |
| { |
| "epoch": 6.645833333333333, |
| "grad_norm": 0.8418968319892883, |
| "learning_rate": 0.00022819764827076516, |
| "loss": 0.8627, |
| "step": 1276 |
| }, |
| { |
| "epoch": 6.651041666666667, |
| "grad_norm": 1.614916443824768, |
| "learning_rate": 0.00022809182370429768, |
| "loss": 0.8589, |
| "step": 1277 |
| }, |
| { |
| "epoch": 6.65625, |
| "grad_norm": 1.3723595142364502, |
| "learning_rate": 0.00022798594579088617, |
| "loss": 0.9403, |
| "step": 1278 |
| }, |
| { |
| "epoch": 6.661458333333333, |
| "grad_norm": 1.3376940488815308, |
| "learning_rate": 0.00022788001460285893, |
| "loss": 0.9242, |
| "step": 1279 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 1.0703601837158203, |
| "learning_rate": 0.000227774030212581, |
| "loss": 0.8797, |
| "step": 1280 |
| }, |
| { |
| "epoch": 6.671875, |
| "grad_norm": 1.7410355806350708, |
| "learning_rate": 0.0002276679926924535, |
| "loss": 0.9296, |
| "step": 1281 |
| }, |
| { |
| "epoch": 6.677083333333333, |
| "grad_norm": 1.2360808849334717, |
| "learning_rate": 0.000227561902114914, |
| "loss": 0.7961, |
| "step": 1282 |
| }, |
| { |
| "epoch": 6.682291666666667, |
| "grad_norm": 1.5201635360717773, |
| "learning_rate": 0.00022745575855243627, |
| "loss": 0.9047, |
| "step": 1283 |
| }, |
| { |
| "epoch": 6.6875, |
| "grad_norm": 1.1959121227264404, |
| "learning_rate": 0.00022734956207753023, |
| "loss": 0.8518, |
| "step": 1284 |
| }, |
| { |
| "epoch": 6.692708333333333, |
| "grad_norm": 1.0450208187103271, |
| "learning_rate": 0.000227243312762742, |
| "loss": 0.9856, |
| "step": 1285 |
| }, |
| { |
| "epoch": 6.697916666666667, |
| "grad_norm": 0.8541420102119446, |
| "learning_rate": 0.00022713701068065382, |
| "loss": 0.7927, |
| "step": 1286 |
| }, |
| { |
| "epoch": 6.703125, |
| "grad_norm": 1.195273518562317, |
| "learning_rate": 0.0002270306559038838, |
| "loss": 0.8574, |
| "step": 1287 |
| }, |
| { |
| "epoch": 6.708333333333333, |
| "grad_norm": 0.949259877204895, |
| "learning_rate": 0.0002269242485050863, |
| "loss": 0.8855, |
| "step": 1288 |
| }, |
| { |
| "epoch": 6.713541666666667, |
| "grad_norm": 2.2935752868652344, |
| "learning_rate": 0.0002268177885569515, |
| "loss": 0.8325, |
| "step": 1289 |
| }, |
| { |
| "epoch": 6.71875, |
| "grad_norm": 1.4497005939483643, |
| "learning_rate": 0.00022671127613220547, |
| "loss": 0.9967, |
| "step": 1290 |
| }, |
| { |
| "epoch": 6.723958333333333, |
| "grad_norm": 1.0545239448547363, |
| "learning_rate": 0.0002266047113036101, |
| "loss": 0.9352, |
| "step": 1291 |
| }, |
| { |
| "epoch": 6.729166666666667, |
| "grad_norm": 1.0327166318893433, |
| "learning_rate": 0.00022649809414396327, |
| "loss": 0.9867, |
| "step": 1292 |
| }, |
| { |
| "epoch": 6.734375, |
| "grad_norm": 3.331115245819092, |
| "learning_rate": 0.00022639142472609833, |
| "loss": 0.9158, |
| "step": 1293 |
| }, |
| { |
| "epoch": 6.739583333333333, |
| "grad_norm": 1.2026890516281128, |
| "learning_rate": 0.00022628470312288448, |
| "loss": 0.9959, |
| "step": 1294 |
| }, |
| { |
| "epoch": 6.744791666666667, |
| "grad_norm": 1.0518403053283691, |
| "learning_rate": 0.00022617792940722654, |
| "loss": 0.9538, |
| "step": 1295 |
| }, |
| { |
| "epoch": 6.75, |
| "grad_norm": 1.2331963777542114, |
| "learning_rate": 0.00022607110365206506, |
| "loss": 0.9843, |
| "step": 1296 |
| }, |
| { |
| "epoch": 6.755208333333333, |
| "grad_norm": 1.0371195077896118, |
| "learning_rate": 0.0002259642259303759, |
| "loss": 0.8765, |
| "step": 1297 |
| }, |
| { |
| "epoch": 6.760416666666667, |
| "grad_norm": 0.9036943316459656, |
| "learning_rate": 0.0002258572963151706, |
| "loss": 0.8847, |
| "step": 1298 |
| }, |
| { |
| "epoch": 6.765625, |
| "grad_norm": 0.9553194642066956, |
| "learning_rate": 0.00022575031487949605, |
| "loss": 0.9352, |
| "step": 1299 |
| }, |
| { |
| "epoch": 6.770833333333333, |
| "grad_norm": 1.568284273147583, |
| "learning_rate": 0.0002256432816964347, |
| "loss": 0.7972, |
| "step": 1300 |
| }, |
| { |
| "epoch": 6.770833333333333, |
| "eval_f1_macro": 0.261851865276454, |
| "eval_loss": 0.9946062564849854, |
| "eval_runtime": 4.9672, |
| "eval_samples_per_second": 616.649, |
| "eval_steps_per_second": 9.663, |
| "step": 1300 |
| }, |
| { |
| "epoch": 6.776041666666667, |
| "grad_norm": 2.2896854877471924, |
| "learning_rate": 0.00022553619683910405, |
| "loss": 0.8681, |
| "step": 1301 |
| }, |
| { |
| "epoch": 6.78125, |
| "grad_norm": 1.276363730430603, |
| "learning_rate": 0.0002254290603806572, |
| "loss": 0.9458, |
| "step": 1302 |
| }, |
| { |
| "epoch": 6.786458333333333, |
| "grad_norm": 1.0162060260772705, |
| "learning_rate": 0.00022532187239428236, |
| "loss": 0.9017, |
| "step": 1303 |
| }, |
| { |
| "epoch": 6.791666666666667, |
| "grad_norm": 1.7596327066421509, |
| "learning_rate": 0.00022521463295320295, |
| "loss": 0.9448, |
| "step": 1304 |
| }, |
| { |
| "epoch": 6.796875, |
| "grad_norm": 0.9443992376327515, |
| "learning_rate": 0.0002251073421306776, |
| "loss": 0.8985, |
| "step": 1305 |
| }, |
| { |
| "epoch": 6.802083333333333, |
| "grad_norm": 1.2825473546981812, |
| "learning_rate": 0.000225, |
| "loss": 0.9129, |
| "step": 1306 |
| }, |
| { |
| "epoch": 6.807291666666667, |
| "grad_norm": 1.728300929069519, |
| "learning_rate": 0.0002248926066344988, |
| "loss": 0.8998, |
| "step": 1307 |
| }, |
| { |
| "epoch": 6.8125, |
| "grad_norm": 2.1218109130859375, |
| "learning_rate": 0.00022478516210753779, |
| "loss": 0.8863, |
| "step": 1308 |
| }, |
| { |
| "epoch": 6.817708333333333, |
| "grad_norm": 1.3730586767196655, |
| "learning_rate": 0.00022467766649251567, |
| "loss": 0.8866, |
| "step": 1309 |
| }, |
| { |
| "epoch": 6.822916666666667, |
| "grad_norm": 1.2994970083236694, |
| "learning_rate": 0.000224570119862866, |
| "loss": 1.0219, |
| "step": 1310 |
| }, |
| { |
| "epoch": 6.828125, |
| "grad_norm": 1.8196054697036743, |
| "learning_rate": 0.0002244625222920572, |
| "loss": 0.9804, |
| "step": 1311 |
| }, |
| { |
| "epoch": 6.833333333333333, |
| "grad_norm": 0.9306721091270447, |
| "learning_rate": 0.00022435487385359256, |
| "loss": 0.9265, |
| "step": 1312 |
| }, |
| { |
| "epoch": 6.838541666666667, |
| "grad_norm": 1.5279303789138794, |
| "learning_rate": 0.00022424717462101, |
| "loss": 0.8847, |
| "step": 1313 |
| }, |
| { |
| "epoch": 6.84375, |
| "grad_norm": 1.3321701288223267, |
| "learning_rate": 0.00022413942466788223, |
| "loss": 0.9296, |
| "step": 1314 |
| }, |
| { |
| "epoch": 6.848958333333333, |
| "grad_norm": 1.8765895366668701, |
| "learning_rate": 0.00022403162406781663, |
| "loss": 0.8566, |
| "step": 1315 |
| }, |
| { |
| "epoch": 6.854166666666667, |
| "grad_norm": 2.4004135131835938, |
| "learning_rate": 0.000223923772894455, |
| "loss": 0.8258, |
| "step": 1316 |
| }, |
| { |
| "epoch": 6.859375, |
| "grad_norm": 1.4305599927902222, |
| "learning_rate": 0.00022381587122147396, |
| "loss": 1.0252, |
| "step": 1317 |
| }, |
| { |
| "epoch": 6.864583333333333, |
| "grad_norm": 1.0506645441055298, |
| "learning_rate": 0.0002237079191225844, |
| "loss": 0.9989, |
| "step": 1318 |
| }, |
| { |
| "epoch": 6.869791666666667, |
| "grad_norm": 1.85765540599823, |
| "learning_rate": 0.0002235999166715318, |
| "loss": 0.8339, |
| "step": 1319 |
| }, |
| { |
| "epoch": 6.875, |
| "grad_norm": 1.1768540143966675, |
| "learning_rate": 0.00022349186394209587, |
| "loss": 0.8324, |
| "step": 1320 |
| }, |
| { |
| "epoch": 6.880208333333333, |
| "grad_norm": 1.0473297834396362, |
| "learning_rate": 0.00022338376100809095, |
| "loss": 0.889, |
| "step": 1321 |
| }, |
| { |
| "epoch": 6.885416666666667, |
| "grad_norm": 0.9658707976341248, |
| "learning_rate": 0.00022327560794336535, |
| "loss": 0.943, |
| "step": 1322 |
| }, |
| { |
| "epoch": 6.890625, |
| "grad_norm": 0.8636470437049866, |
| "learning_rate": 0.00022316740482180187, |
| "loss": 0.8756, |
| "step": 1323 |
| }, |
| { |
| "epoch": 6.895833333333333, |
| "grad_norm": 1.1423020362854004, |
| "learning_rate": 0.00022305915171731737, |
| "loss": 0.9688, |
| "step": 1324 |
| }, |
| { |
| "epoch": 6.901041666666667, |
| "grad_norm": 1.6616876125335693, |
| "learning_rate": 0.00022295084870386294, |
| "loss": 0.9097, |
| "step": 1325 |
| }, |
| { |
| "epoch": 6.90625, |
| "grad_norm": 1.53605055809021, |
| "learning_rate": 0.00022284249585542362, |
| "loss": 0.9602, |
| "step": 1326 |
| }, |
| { |
| "epoch": 6.911458333333333, |
| "grad_norm": 0.9761777520179749, |
| "learning_rate": 0.0002227340932460187, |
| "loss": 0.8555, |
| "step": 1327 |
| }, |
| { |
| "epoch": 6.916666666666667, |
| "grad_norm": 1.5346243381500244, |
| "learning_rate": 0.00022262564094970136, |
| "loss": 0.8454, |
| "step": 1328 |
| }, |
| { |
| "epoch": 6.921875, |
| "grad_norm": 1.636579990386963, |
| "learning_rate": 0.0002225171390405587, |
| "loss": 0.915, |
| "step": 1329 |
| }, |
| { |
| "epoch": 6.927083333333333, |
| "grad_norm": 3.133974313735962, |
| "learning_rate": 0.0002224085875927117, |
| "loss": 0.9981, |
| "step": 1330 |
| }, |
| { |
| "epoch": 6.932291666666667, |
| "grad_norm": 2.651822090148926, |
| "learning_rate": 0.0002222999866803153, |
| "loss": 1.0721, |
| "step": 1331 |
| }, |
| { |
| "epoch": 6.9375, |
| "grad_norm": 1.6511924266815186, |
| "learning_rate": 0.00022219133637755802, |
| "loss": 0.8865, |
| "step": 1332 |
| }, |
| { |
| "epoch": 6.942708333333333, |
| "grad_norm": 2.065239191055298, |
| "learning_rate": 0.0002220826367586624, |
| "loss": 0.9462, |
| "step": 1333 |
| }, |
| { |
| "epoch": 6.947916666666667, |
| "grad_norm": 1.566549301147461, |
| "learning_rate": 0.0002219738878978844, |
| "loss": 0.9718, |
| "step": 1334 |
| }, |
| { |
| "epoch": 6.953125, |
| "grad_norm": 1.4814587831497192, |
| "learning_rate": 0.0002218650898695138, |
| "loss": 1.0242, |
| "step": 1335 |
| }, |
| { |
| "epoch": 6.958333333333333, |
| "grad_norm": 1.342617392539978, |
| "learning_rate": 0.00022175624274787387, |
| "loss": 0.9835, |
| "step": 1336 |
| }, |
| { |
| "epoch": 6.963541666666667, |
| "grad_norm": 2.262120485305786, |
| "learning_rate": 0.0002216473466073215, |
| "loss": 1.0762, |
| "step": 1337 |
| }, |
| { |
| "epoch": 6.96875, |
| "grad_norm": 1.890270709991455, |
| "learning_rate": 0.00022153840152224692, |
| "loss": 0.9028, |
| "step": 1338 |
| }, |
| { |
| "epoch": 6.973958333333333, |
| "grad_norm": 2.0230281352996826, |
| "learning_rate": 0.000221429407567074, |
| "loss": 0.9348, |
| "step": 1339 |
| }, |
| { |
| "epoch": 6.979166666666667, |
| "grad_norm": 1.681060552597046, |
| "learning_rate": 0.00022132036481625986, |
| "loss": 0.9381, |
| "step": 1340 |
| }, |
| { |
| "epoch": 6.984375, |
| "grad_norm": 1.1678459644317627, |
| "learning_rate": 0.0002212112733442949, |
| "loss": 0.8406, |
| "step": 1341 |
| }, |
| { |
| "epoch": 6.989583333333333, |
| "grad_norm": 0.9426416158676147, |
| "learning_rate": 0.00022110213322570297, |
| "loss": 0.9437, |
| "step": 1342 |
| }, |
| { |
| "epoch": 6.994791666666667, |
| "grad_norm": 2.0843846797943115, |
| "learning_rate": 0.00022099294453504104, |
| "loss": 0.8192, |
| "step": 1343 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.7579600811004639, |
| "learning_rate": 0.00022088370734689932, |
| "loss": 0.882, |
| "step": 1344 |
| }, |
| { |
| "epoch": 7.005208333333333, |
| "grad_norm": 1.9478321075439453, |
| "learning_rate": 0.00022077442173590108, |
| "loss": 0.8843, |
| "step": 1345 |
| }, |
| { |
| "epoch": 7.010416666666667, |
| "grad_norm": 1.1952757835388184, |
| "learning_rate": 0.00022066508777670267, |
| "loss": 0.8734, |
| "step": 1346 |
| }, |
| { |
| "epoch": 7.015625, |
| "grad_norm": 0.9896311163902283, |
| "learning_rate": 0.0002205557055439936, |
| "loss": 0.8925, |
| "step": 1347 |
| }, |
| { |
| "epoch": 7.020833333333333, |
| "grad_norm": 2.1063690185546875, |
| "learning_rate": 0.00022044627511249616, |
| "loss": 0.8634, |
| "step": 1348 |
| }, |
| { |
| "epoch": 7.026041666666667, |
| "grad_norm": 1.7648112773895264, |
| "learning_rate": 0.0002203367965569658, |
| "loss": 0.906, |
| "step": 1349 |
| }, |
| { |
| "epoch": 7.03125, |
| "grad_norm": 1.2033640146255493, |
| "learning_rate": 0.00022022726995219054, |
| "loss": 0.872, |
| "step": 1350 |
| }, |
| { |
| "epoch": 7.036458333333333, |
| "grad_norm": 1.3703522682189941, |
| "learning_rate": 0.00022011769537299156, |
| "loss": 0.8764, |
| "step": 1351 |
| }, |
| { |
| "epoch": 7.041666666666667, |
| "grad_norm": 1.5515109300613403, |
| "learning_rate": 0.00022000807289422253, |
| "loss": 0.7829, |
| "step": 1352 |
| }, |
| { |
| "epoch": 7.046875, |
| "grad_norm": 1.8876760005950928, |
| "learning_rate": 0.00021989840259077002, |
| "loss": 0.8114, |
| "step": 1353 |
| }, |
| { |
| "epoch": 7.052083333333333, |
| "grad_norm": 1.9279142618179321, |
| "learning_rate": 0.00021978868453755325, |
| "loss": 0.847, |
| "step": 1354 |
| }, |
| { |
| "epoch": 7.057291666666667, |
| "grad_norm": 1.592970609664917, |
| "learning_rate": 0.00021967891880952389, |
| "loss": 1.0202, |
| "step": 1355 |
| }, |
| { |
| "epoch": 7.0625, |
| "grad_norm": 2.9997711181640625, |
| "learning_rate": 0.00021956910548166648, |
| "loss": 0.9628, |
| "step": 1356 |
| }, |
| { |
| "epoch": 7.067708333333333, |
| "grad_norm": 1.7588224411010742, |
| "learning_rate": 0.00021945924462899775, |
| "loss": 0.8018, |
| "step": 1357 |
| }, |
| { |
| "epoch": 7.072916666666667, |
| "grad_norm": 1.3819934129714966, |
| "learning_rate": 0.00021934933632656716, |
| "loss": 0.805, |
| "step": 1358 |
| }, |
| { |
| "epoch": 7.078125, |
| "grad_norm": 1.4061005115509033, |
| "learning_rate": 0.0002192393806494564, |
| "loss": 0.9661, |
| "step": 1359 |
| }, |
| { |
| "epoch": 7.083333333333333, |
| "grad_norm": 0.9433820247650146, |
| "learning_rate": 0.00021912937767277967, |
| "loss": 0.7328, |
| "step": 1360 |
| }, |
| { |
| "epoch": 7.088541666666667, |
| "grad_norm": 1.1612136363983154, |
| "learning_rate": 0.00021901932747168338, |
| "loss": 0.8716, |
| "step": 1361 |
| }, |
| { |
| "epoch": 7.09375, |
| "grad_norm": 2.7698450088500977, |
| "learning_rate": 0.00021890923012134623, |
| "loss": 0.8933, |
| "step": 1362 |
| }, |
| { |
| "epoch": 7.098958333333333, |
| "grad_norm": 1.7993488311767578, |
| "learning_rate": 0.0002187990856969791, |
| "loss": 0.9786, |
| "step": 1363 |
| }, |
| { |
| "epoch": 7.104166666666667, |
| "grad_norm": 1.948461651802063, |
| "learning_rate": 0.00021868889427382513, |
| "loss": 0.9171, |
| "step": 1364 |
| }, |
| { |
| "epoch": 7.109375, |
| "grad_norm": 1.851943850517273, |
| "learning_rate": 0.00021857865592715944, |
| "loss": 0.8633, |
| "step": 1365 |
| }, |
| { |
| "epoch": 7.114583333333333, |
| "grad_norm": 2.228773832321167, |
| "learning_rate": 0.00021846837073228925, |
| "loss": 0.8767, |
| "step": 1366 |
| }, |
| { |
| "epoch": 7.119791666666667, |
| "grad_norm": 1.7219014167785645, |
| "learning_rate": 0.00021835803876455384, |
| "loss": 0.8183, |
| "step": 1367 |
| }, |
| { |
| "epoch": 7.125, |
| "grad_norm": 1.131668210029602, |
| "learning_rate": 0.00021824766009932438, |
| "loss": 0.9531, |
| "step": 1368 |
| }, |
| { |
| "epoch": 7.130208333333333, |
| "grad_norm": 2.579493284225464, |
| "learning_rate": 0.00021813723481200394, |
| "loss": 0.9843, |
| "step": 1369 |
| }, |
| { |
| "epoch": 7.135416666666667, |
| "grad_norm": 1.175447940826416, |
| "learning_rate": 0.00021802676297802748, |
| "loss": 0.8555, |
| "step": 1370 |
| }, |
| { |
| "epoch": 7.140625, |
| "grad_norm": 0.9509913921356201, |
| "learning_rate": 0.00021791624467286166, |
| "loss": 0.9028, |
| "step": 1371 |
| }, |
| { |
| "epoch": 7.145833333333333, |
| "grad_norm": 1.8409545421600342, |
| "learning_rate": 0.00021780567997200503, |
| "loss": 0.8671, |
| "step": 1372 |
| }, |
| { |
| "epoch": 7.151041666666667, |
| "grad_norm": 1.7558434009552002, |
| "learning_rate": 0.0002176950689509878, |
| "loss": 0.8789, |
| "step": 1373 |
| }, |
| { |
| "epoch": 7.15625, |
| "grad_norm": 2.374663829803467, |
| "learning_rate": 0.00021758441168537172, |
| "loss": 0.9095, |
| "step": 1374 |
| }, |
| { |
| "epoch": 7.161458333333333, |
| "grad_norm": 0.9827964305877686, |
| "learning_rate": 0.00021747370825075019, |
| "loss": 0.8957, |
| "step": 1375 |
| }, |
| { |
| "epoch": 7.166666666666667, |
| "grad_norm": 1.4413766860961914, |
| "learning_rate": 0.00021736295872274816, |
| "loss": 0.8824, |
| "step": 1376 |
| }, |
| { |
| "epoch": 7.171875, |
| "grad_norm": 1.0796465873718262, |
| "learning_rate": 0.0002172521631770221, |
| "loss": 0.8143, |
| "step": 1377 |
| }, |
| { |
| "epoch": 7.177083333333333, |
| "grad_norm": 1.3801743984222412, |
| "learning_rate": 0.00021714132168925988, |
| "loss": 1.027, |
| "step": 1378 |
| }, |
| { |
| "epoch": 7.182291666666667, |
| "grad_norm": 1.203778624534607, |
| "learning_rate": 0.00021703043433518064, |
| "loss": 0.919, |
| "step": 1379 |
| }, |
| { |
| "epoch": 7.1875, |
| "grad_norm": 1.7386813163757324, |
| "learning_rate": 0.00021691950119053513, |
| "loss": 0.9522, |
| "step": 1380 |
| }, |
| { |
| "epoch": 7.192708333333333, |
| "grad_norm": 0.9413809180259705, |
| "learning_rate": 0.00021680852233110518, |
| "loss": 0.7456, |
| "step": 1381 |
| }, |
| { |
| "epoch": 7.197916666666667, |
| "grad_norm": 1.632208228111267, |
| "learning_rate": 0.00021669749783270381, |
| "loss": 0.9471, |
| "step": 1382 |
| }, |
| { |
| "epoch": 7.203125, |
| "grad_norm": 0.9503183960914612, |
| "learning_rate": 0.00021658642777117534, |
| "loss": 0.8709, |
| "step": 1383 |
| }, |
| { |
| "epoch": 7.208333333333333, |
| "grad_norm": 1.2300368547439575, |
| "learning_rate": 0.00021647531222239517, |
| "loss": 0.9519, |
| "step": 1384 |
| }, |
| { |
| "epoch": 7.213541666666667, |
| "grad_norm": 1.0146613121032715, |
| "learning_rate": 0.00021636415126226978, |
| "loss": 0.9492, |
| "step": 1385 |
| }, |
| { |
| "epoch": 7.21875, |
| "grad_norm": 0.8389624357223511, |
| "learning_rate": 0.00021625294496673662, |
| "loss": 0.8177, |
| "step": 1386 |
| }, |
| { |
| "epoch": 7.223958333333333, |
| "grad_norm": 1.6988322734832764, |
| "learning_rate": 0.00021614169341176425, |
| "loss": 0.9344, |
| "step": 1387 |
| }, |
| { |
| "epoch": 7.229166666666667, |
| "grad_norm": 1.625183343887329, |
| "learning_rate": 0.00021603039667335194, |
| "loss": 0.7924, |
| "step": 1388 |
| }, |
| { |
| "epoch": 7.234375, |
| "grad_norm": 1.6808733940124512, |
| "learning_rate": 0.00021591905482753, |
| "loss": 0.8591, |
| "step": 1389 |
| }, |
| { |
| "epoch": 7.239583333333333, |
| "grad_norm": 1.2159885168075562, |
| "learning_rate": 0.00021580766795035956, |
| "loss": 0.8422, |
| "step": 1390 |
| }, |
| { |
| "epoch": 7.244791666666667, |
| "grad_norm": 3.453206777572632, |
| "learning_rate": 0.0002156962361179323, |
| "loss": 0.8155, |
| "step": 1391 |
| }, |
| { |
| "epoch": 7.25, |
| "grad_norm": 1.6344776153564453, |
| "learning_rate": 0.0002155847594063709, |
| "loss": 0.9507, |
| "step": 1392 |
| }, |
| { |
| "epoch": 7.255208333333333, |
| "grad_norm": 0.9838268756866455, |
| "learning_rate": 0.00021547323789182848, |
| "loss": 0.9448, |
| "step": 1393 |
| }, |
| { |
| "epoch": 7.260416666666667, |
| "grad_norm": 1.496156930923462, |
| "learning_rate": 0.00021536167165048885, |
| "loss": 0.9448, |
| "step": 1394 |
| }, |
| { |
| "epoch": 7.265625, |
| "grad_norm": 1.4182825088500977, |
| "learning_rate": 0.00021525006075856636, |
| "loss": 0.9405, |
| "step": 1395 |
| }, |
| { |
| "epoch": 7.270833333333333, |
| "grad_norm": 2.261598587036133, |
| "learning_rate": 0.00021513840529230585, |
| "loss": 0.9184, |
| "step": 1396 |
| }, |
| { |
| "epoch": 7.276041666666667, |
| "grad_norm": 1.0471082925796509, |
| "learning_rate": 0.0002150267053279827, |
| "loss": 0.7773, |
| "step": 1397 |
| }, |
| { |
| "epoch": 7.28125, |
| "grad_norm": 0.962591290473938, |
| "learning_rate": 0.00021491496094190252, |
| "loss": 0.8784, |
| "step": 1398 |
| }, |
| { |
| "epoch": 7.286458333333333, |
| "grad_norm": 1.4438096284866333, |
| "learning_rate": 0.00021480317221040147, |
| "loss": 0.9101, |
| "step": 1399 |
| }, |
| { |
| "epoch": 7.291666666666667, |
| "grad_norm": 1.4270983934402466, |
| "learning_rate": 0.00021469133920984577, |
| "loss": 0.92, |
| "step": 1400 |
| }, |
| { |
| "epoch": 7.291666666666667, |
| "eval_f1_macro": 0.2588723846425682, |
| "eval_loss": 0.9983934164047241, |
| "eval_runtime": 5.0505, |
| "eval_samples_per_second": 606.477, |
| "eval_steps_per_second": 9.504, |
| "step": 1400 |
| }, |
| { |
| "epoch": 7.296875, |
| "grad_norm": 0.9379773139953613, |
| "learning_rate": 0.00021457946201663215, |
| "loss": 0.8304, |
| "step": 1401 |
| }, |
| { |
| "epoch": 7.302083333333333, |
| "grad_norm": 1.4283442497253418, |
| "learning_rate": 0.00021446754070718725, |
| "loss": 0.8968, |
| "step": 1402 |
| }, |
| { |
| "epoch": 7.307291666666667, |
| "grad_norm": 1.3309910297393799, |
| "learning_rate": 0.0002143555753579681, |
| "loss": 0.8451, |
| "step": 1403 |
| }, |
| { |
| "epoch": 7.3125, |
| "grad_norm": 1.259289264678955, |
| "learning_rate": 0.00021424356604546156, |
| "loss": 1.0271, |
| "step": 1404 |
| }, |
| { |
| "epoch": 7.317708333333333, |
| "grad_norm": 2.0744223594665527, |
| "learning_rate": 0.00021413151284618484, |
| "loss": 0.9044, |
| "step": 1405 |
| }, |
| { |
| "epoch": 7.322916666666667, |
| "grad_norm": 2.1598122119903564, |
| "learning_rate": 0.0002140194158366848, |
| "loss": 0.9422, |
| "step": 1406 |
| }, |
| { |
| "epoch": 7.328125, |
| "grad_norm": 1.4584722518920898, |
| "learning_rate": 0.00021390727509353847, |
| "loss": 0.9186, |
| "step": 1407 |
| }, |
| { |
| "epoch": 7.333333333333333, |
| "grad_norm": 1.4487046003341675, |
| "learning_rate": 0.00021379509069335262, |
| "loss": 0.8512, |
| "step": 1408 |
| }, |
| { |
| "epoch": 7.338541666666667, |
| "grad_norm": 2.3701748847961426, |
| "learning_rate": 0.00021368286271276394, |
| "loss": 1.0565, |
| "step": 1409 |
| }, |
| { |
| "epoch": 7.34375, |
| "grad_norm": 1.773170828819275, |
| "learning_rate": 0.00021357059122843874, |
| "loss": 0.872, |
| "step": 1410 |
| }, |
| { |
| "epoch": 7.348958333333333, |
| "grad_norm": 3.16743540763855, |
| "learning_rate": 0.00021345827631707325, |
| "loss": 0.9498, |
| "step": 1411 |
| }, |
| { |
| "epoch": 7.354166666666667, |
| "grad_norm": 1.0806071758270264, |
| "learning_rate": 0.0002133459180553932, |
| "loss": 0.9677, |
| "step": 1412 |
| }, |
| { |
| "epoch": 7.359375, |
| "grad_norm": 1.4526619911193848, |
| "learning_rate": 0.00021323351652015408, |
| "loss": 0.8933, |
| "step": 1413 |
| }, |
| { |
| "epoch": 7.364583333333333, |
| "grad_norm": 1.21781325340271, |
| "learning_rate": 0.0002131210717881408, |
| "loss": 0.9038, |
| "step": 1414 |
| }, |
| { |
| "epoch": 7.369791666666667, |
| "grad_norm": 1.533321499824524, |
| "learning_rate": 0.00021300858393616783, |
| "loss": 0.8856, |
| "step": 1415 |
| }, |
| { |
| "epoch": 7.375, |
| "grad_norm": 1.4183151721954346, |
| "learning_rate": 0.0002128960530410791, |
| "loss": 0.8539, |
| "step": 1416 |
| }, |
| { |
| "epoch": 7.380208333333333, |
| "grad_norm": 1.3590943813323975, |
| "learning_rate": 0.00021278347917974802, |
| "loss": 0.9027, |
| "step": 1417 |
| }, |
| { |
| "epoch": 7.385416666666667, |
| "grad_norm": 1.068263053894043, |
| "learning_rate": 0.0002126708624290772, |
| "loss": 0.8554, |
| "step": 1418 |
| }, |
| { |
| "epoch": 7.390625, |
| "grad_norm": 0.869499683380127, |
| "learning_rate": 0.00021255820286599872, |
| "loss": 0.9157, |
| "step": 1419 |
| }, |
| { |
| "epoch": 7.395833333333333, |
| "grad_norm": 1.8608933687210083, |
| "learning_rate": 0.00021244550056747375, |
| "loss": 0.877, |
| "step": 1420 |
| }, |
| { |
| "epoch": 7.401041666666667, |
| "grad_norm": 1.0794503688812256, |
| "learning_rate": 0.00021233275561049274, |
| "loss": 0.8363, |
| "step": 1421 |
| }, |
| { |
| "epoch": 7.40625, |
| "grad_norm": 2.041581869125366, |
| "learning_rate": 0.00021221996807207523, |
| "loss": 0.8033, |
| "step": 1422 |
| }, |
| { |
| "epoch": 7.411458333333333, |
| "grad_norm": 1.1566667556762695, |
| "learning_rate": 0.00021210713802926996, |
| "loss": 0.8149, |
| "step": 1423 |
| }, |
| { |
| "epoch": 7.416666666666667, |
| "grad_norm": 2.1345791816711426, |
| "learning_rate": 0.00021199426555915454, |
| "loss": 0.9429, |
| "step": 1424 |
| }, |
| { |
| "epoch": 7.421875, |
| "grad_norm": 1.5878188610076904, |
| "learning_rate": 0.0002118813507388357, |
| "loss": 1.066, |
| "step": 1425 |
| }, |
| { |
| "epoch": 7.427083333333333, |
| "grad_norm": 1.3753982782363892, |
| "learning_rate": 0.00021176839364544906, |
| "loss": 0.846, |
| "step": 1426 |
| }, |
| { |
| "epoch": 7.432291666666667, |
| "grad_norm": 1.4147439002990723, |
| "learning_rate": 0.0002116553943561591, |
| "loss": 0.967, |
| "step": 1427 |
| }, |
| { |
| "epoch": 7.4375, |
| "grad_norm": 0.8319414258003235, |
| "learning_rate": 0.00021154235294815914, |
| "loss": 0.9026, |
| "step": 1428 |
| }, |
| { |
| "epoch": 7.442708333333333, |
| "grad_norm": 1.9101743698120117, |
| "learning_rate": 0.00021142926949867125, |
| "loss": 0.8722, |
| "step": 1429 |
| }, |
| { |
| "epoch": 7.447916666666667, |
| "grad_norm": 2.2124507427215576, |
| "learning_rate": 0.00021131614408494623, |
| "loss": 0.8744, |
| "step": 1430 |
| }, |
| { |
| "epoch": 7.453125, |
| "grad_norm": 1.5508439540863037, |
| "learning_rate": 0.0002112029767842636, |
| "loss": 1.048, |
| "step": 1431 |
| }, |
| { |
| "epoch": 7.458333333333333, |
| "grad_norm": 1.0624767541885376, |
| "learning_rate": 0.0002110897676739314, |
| "loss": 0.8197, |
| "step": 1432 |
| }, |
| { |
| "epoch": 7.463541666666667, |
| "grad_norm": 1.1269443035125732, |
| "learning_rate": 0.0002109765168312863, |
| "loss": 0.9436, |
| "step": 1433 |
| }, |
| { |
| "epoch": 7.46875, |
| "grad_norm": 1.2486778497695923, |
| "learning_rate": 0.0002108632243336935, |
| "loss": 0.8676, |
| "step": 1434 |
| }, |
| { |
| "epoch": 7.473958333333333, |
| "grad_norm": 1.6063307523727417, |
| "learning_rate": 0.00021074989025854653, |
| "loss": 0.9524, |
| "step": 1435 |
| }, |
| { |
| "epoch": 7.479166666666667, |
| "grad_norm": 1.9341213703155518, |
| "learning_rate": 0.00021063651468326743, |
| "loss": 0.9611, |
| "step": 1436 |
| }, |
| { |
| "epoch": 7.484375, |
| "grad_norm": 1.0980159044265747, |
| "learning_rate": 0.00021052309768530665, |
| "loss": 0.8905, |
| "step": 1437 |
| }, |
| { |
| "epoch": 7.489583333333333, |
| "grad_norm": 2.0579183101654053, |
| "learning_rate": 0.00021040963934214278, |
| "loss": 0.867, |
| "step": 1438 |
| }, |
| { |
| "epoch": 7.494791666666667, |
| "grad_norm": 1.6749255657196045, |
| "learning_rate": 0.00021029613973128273, |
| "loss": 0.9197, |
| "step": 1439 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 2.157895088195801, |
| "learning_rate": 0.00021018259893026162, |
| "loss": 0.9018, |
| "step": 1440 |
| }, |
| { |
| "epoch": 7.505208333333333, |
| "grad_norm": 1.799407720565796, |
| "learning_rate": 0.0002100690170166427, |
| "loss": 0.985, |
| "step": 1441 |
| }, |
| { |
| "epoch": 7.510416666666667, |
| "grad_norm": 1.7359036207199097, |
| "learning_rate": 0.00020995539406801726, |
| "loss": 0.9087, |
| "step": 1442 |
| }, |
| { |
| "epoch": 7.515625, |
| "grad_norm": 1.6354731321334839, |
| "learning_rate": 0.00020984173016200464, |
| "loss": 0.9997, |
| "step": 1443 |
| }, |
| { |
| "epoch": 7.520833333333333, |
| "grad_norm": 1.2198872566223145, |
| "learning_rate": 0.0002097280253762523, |
| "loss": 0.908, |
| "step": 1444 |
| }, |
| { |
| "epoch": 7.526041666666667, |
| "grad_norm": 0.9235466718673706, |
| "learning_rate": 0.00020961427978843532, |
| "loss": 0.8695, |
| "step": 1445 |
| }, |
| { |
| "epoch": 7.53125, |
| "grad_norm": 1.4569190740585327, |
| "learning_rate": 0.000209500493476257, |
| "loss": 0.8348, |
| "step": 1446 |
| }, |
| { |
| "epoch": 7.536458333333333, |
| "grad_norm": 1.0817468166351318, |
| "learning_rate": 0.00020938666651744818, |
| "loss": 0.8691, |
| "step": 1447 |
| }, |
| { |
| "epoch": 7.541666666666667, |
| "grad_norm": 1.3885513544082642, |
| "learning_rate": 0.00020927279898976764, |
| "loss": 0.7955, |
| "step": 1448 |
| }, |
| { |
| "epoch": 7.546875, |
| "grad_norm": 2.0213024616241455, |
| "learning_rate": 0.0002091588909710018, |
| "loss": 0.9483, |
| "step": 1449 |
| }, |
| { |
| "epoch": 7.552083333333333, |
| "grad_norm": 1.503325343132019, |
| "learning_rate": 0.00020904494253896474, |
| "loss": 0.9796, |
| "step": 1450 |
| }, |
| { |
| "epoch": 7.557291666666667, |
| "grad_norm": 1.7630645036697388, |
| "learning_rate": 0.00020893095377149816, |
| "loss": 0.8437, |
| "step": 1451 |
| }, |
| { |
| "epoch": 7.5625, |
| "grad_norm": 1.6904546022415161, |
| "learning_rate": 0.00020881692474647142, |
| "loss": 0.9283, |
| "step": 1452 |
| }, |
| { |
| "epoch": 7.567708333333333, |
| "grad_norm": 1.221543312072754, |
| "learning_rate": 0.00020870285554178115, |
| "loss": 0.7785, |
| "step": 1453 |
| }, |
| { |
| "epoch": 7.572916666666667, |
| "grad_norm": 1.9312421083450317, |
| "learning_rate": 0.00020858874623535162, |
| "loss": 0.8888, |
| "step": 1454 |
| }, |
| { |
| "epoch": 7.578125, |
| "grad_norm": 1.0212852954864502, |
| "learning_rate": 0.0002084745969051344, |
| "loss": 0.9068, |
| "step": 1455 |
| }, |
| { |
| "epoch": 7.583333333333333, |
| "grad_norm": 0.7210800051689148, |
| "learning_rate": 0.0002083604076291084, |
| "loss": 0.8836, |
| "step": 1456 |
| }, |
| { |
| "epoch": 7.588541666666667, |
| "grad_norm": 2.227121591567993, |
| "learning_rate": 0.00020824617848527995, |
| "loss": 0.9692, |
| "step": 1457 |
| }, |
| { |
| "epoch": 7.59375, |
| "grad_norm": 1.3703480958938599, |
| "learning_rate": 0.00020813190955168255, |
| "loss": 0.8895, |
| "step": 1458 |
| }, |
| { |
| "epoch": 7.598958333333333, |
| "grad_norm": 2.221235990524292, |
| "learning_rate": 0.0002080176009063767, |
| "loss": 0.8832, |
| "step": 1459 |
| }, |
| { |
| "epoch": 7.604166666666667, |
| "grad_norm": 1.8787826299667358, |
| "learning_rate": 0.00020790325262745024, |
| "loss": 0.9208, |
| "step": 1460 |
| }, |
| { |
| "epoch": 7.609375, |
| "grad_norm": 1.1941794157028198, |
| "learning_rate": 0.00020778886479301805, |
| "loss": 0.8694, |
| "step": 1461 |
| }, |
| { |
| "epoch": 7.614583333333333, |
| "grad_norm": 2.32602858543396, |
| "learning_rate": 0.00020767443748122196, |
| "loss": 0.872, |
| "step": 1462 |
| }, |
| { |
| "epoch": 7.619791666666667, |
| "grad_norm": 1.2279512882232666, |
| "learning_rate": 0.00020755997077023084, |
| "loss": 0.8653, |
| "step": 1463 |
| }, |
| { |
| "epoch": 7.625, |
| "grad_norm": 1.084899663925171, |
| "learning_rate": 0.00020744546473824046, |
| "loss": 0.8639, |
| "step": 1464 |
| }, |
| { |
| "epoch": 7.630208333333333, |
| "grad_norm": 1.2692832946777344, |
| "learning_rate": 0.0002073309194634734, |
| "loss": 0.9453, |
| "step": 1465 |
| }, |
| { |
| "epoch": 7.635416666666667, |
| "grad_norm": 1.310727596282959, |
| "learning_rate": 0.00020721633502417914, |
| "loss": 0.9123, |
| "step": 1466 |
| }, |
| { |
| "epoch": 7.640625, |
| "grad_norm": 1.2067586183547974, |
| "learning_rate": 0.0002071017114986338, |
| "loss": 0.7506, |
| "step": 1467 |
| }, |
| { |
| "epoch": 7.645833333333333, |
| "grad_norm": 2.0143823623657227, |
| "learning_rate": 0.0002069870489651403, |
| "loss": 0.9343, |
| "step": 1468 |
| }, |
| { |
| "epoch": 7.651041666666667, |
| "grad_norm": 1.2169349193572998, |
| "learning_rate": 0.00020687234750202814, |
| "loss": 0.8887, |
| "step": 1469 |
| }, |
| { |
| "epoch": 7.65625, |
| "grad_norm": 1.2079393863677979, |
| "learning_rate": 0.00020675760718765347, |
| "loss": 0.912, |
| "step": 1470 |
| }, |
| { |
| "epoch": 7.661458333333333, |
| "grad_norm": 1.108014464378357, |
| "learning_rate": 0.00020664282810039893, |
| "loss": 0.8833, |
| "step": 1471 |
| }, |
| { |
| "epoch": 7.666666666666667, |
| "grad_norm": 1.0924439430236816, |
| "learning_rate": 0.0002065280103186737, |
| "loss": 0.8812, |
| "step": 1472 |
| }, |
| { |
| "epoch": 7.671875, |
| "grad_norm": 2.0231122970581055, |
| "learning_rate": 0.0002064131539209133, |
| "loss": 0.897, |
| "step": 1473 |
| }, |
| { |
| "epoch": 7.677083333333333, |
| "grad_norm": 1.0176211595535278, |
| "learning_rate": 0.0002062982589855798, |
| "loss": 0.908, |
| "step": 1474 |
| }, |
| { |
| "epoch": 7.682291666666667, |
| "grad_norm": 1.0276072025299072, |
| "learning_rate": 0.0002061833255911614, |
| "loss": 0.9525, |
| "step": 1475 |
| }, |
| { |
| "epoch": 7.6875, |
| "grad_norm": 1.2676780223846436, |
| "learning_rate": 0.00020606835381617268, |
| "loss": 0.869, |
| "step": 1476 |
| }, |
| { |
| "epoch": 7.692708333333333, |
| "grad_norm": 1.111617088317871, |
| "learning_rate": 0.00020595334373915448, |
| "loss": 0.8313, |
| "step": 1477 |
| }, |
| { |
| "epoch": 7.697916666666667, |
| "grad_norm": 1.2492828369140625, |
| "learning_rate": 0.00020583829543867368, |
| "loss": 0.94, |
| "step": 1478 |
| }, |
| { |
| "epoch": 7.703125, |
| "grad_norm": 1.1704272031784058, |
| "learning_rate": 0.00020572320899332335, |
| "loss": 0.7923, |
| "step": 1479 |
| }, |
| { |
| "epoch": 7.708333333333333, |
| "grad_norm": 1.7118340730667114, |
| "learning_rate": 0.0002056080844817226, |
| "loss": 0.8256, |
| "step": 1480 |
| }, |
| { |
| "epoch": 7.713541666666667, |
| "grad_norm": 2.090040445327759, |
| "learning_rate": 0.0002054929219825166, |
| "loss": 0.9181, |
| "step": 1481 |
| }, |
| { |
| "epoch": 7.71875, |
| "grad_norm": 2.6840693950653076, |
| "learning_rate": 0.00020537772157437634, |
| "loss": 0.9581, |
| "step": 1482 |
| }, |
| { |
| "epoch": 7.723958333333333, |
| "grad_norm": 1.522383689880371, |
| "learning_rate": 0.00020526248333599886, |
| "loss": 0.9783, |
| "step": 1483 |
| }, |
| { |
| "epoch": 7.729166666666667, |
| "grad_norm": 1.7940455675125122, |
| "learning_rate": 0.00020514720734610698, |
| "loss": 0.9378, |
| "step": 1484 |
| }, |
| { |
| "epoch": 7.734375, |
| "grad_norm": 1.202690839767456, |
| "learning_rate": 0.00020503189368344923, |
| "loss": 0.8681, |
| "step": 1485 |
| }, |
| { |
| "epoch": 7.739583333333333, |
| "grad_norm": 0.8676372170448303, |
| "learning_rate": 0.00020491654242679998, |
| "loss": 0.8491, |
| "step": 1486 |
| }, |
| { |
| "epoch": 7.744791666666667, |
| "grad_norm": 1.774646520614624, |
| "learning_rate": 0.00020480115365495926, |
| "loss": 0.9647, |
| "step": 1487 |
| }, |
| { |
| "epoch": 7.75, |
| "grad_norm": 1.6225104331970215, |
| "learning_rate": 0.00020468572744675265, |
| "loss": 0.9287, |
| "step": 1488 |
| }, |
| { |
| "epoch": 7.755208333333333, |
| "grad_norm": 1.3308194875717163, |
| "learning_rate": 0.0002045702638810315, |
| "loss": 0.8665, |
| "step": 1489 |
| }, |
| { |
| "epoch": 7.760416666666667, |
| "grad_norm": 0.9340488314628601, |
| "learning_rate": 0.0002044547630366724, |
| "loss": 0.9712, |
| "step": 1490 |
| }, |
| { |
| "epoch": 7.765625, |
| "grad_norm": 1.1394914388656616, |
| "learning_rate": 0.00020433922499257768, |
| "loss": 0.902, |
| "step": 1491 |
| }, |
| { |
| "epoch": 7.770833333333333, |
| "grad_norm": 1.3907122611999512, |
| "learning_rate": 0.0002042236498276749, |
| "loss": 0.9029, |
| "step": 1492 |
| }, |
| { |
| "epoch": 7.776041666666667, |
| "grad_norm": 2.106663942337036, |
| "learning_rate": 0.00020410803762091706, |
| "loss": 0.9427, |
| "step": 1493 |
| }, |
| { |
| "epoch": 7.78125, |
| "grad_norm": 1.0980714559555054, |
| "learning_rate": 0.00020399238845128235, |
| "loss": 0.9589, |
| "step": 1494 |
| }, |
| { |
| "epoch": 7.786458333333333, |
| "grad_norm": 1.276893138885498, |
| "learning_rate": 0.00020387670239777438, |
| "loss": 0.8987, |
| "step": 1495 |
| }, |
| { |
| "epoch": 7.791666666666667, |
| "grad_norm": 1.5461565256118774, |
| "learning_rate": 0.0002037609795394219, |
| "loss": 0.8283, |
| "step": 1496 |
| }, |
| { |
| "epoch": 7.796875, |
| "grad_norm": 1.57701575756073, |
| "learning_rate": 0.00020364521995527876, |
| "loss": 0.8744, |
| "step": 1497 |
| }, |
| { |
| "epoch": 7.802083333333333, |
| "grad_norm": 0.9526175260543823, |
| "learning_rate": 0.00020352942372442386, |
| "loss": 0.9176, |
| "step": 1498 |
| }, |
| { |
| "epoch": 7.807291666666667, |
| "grad_norm": 1.047446608543396, |
| "learning_rate": 0.00020341359092596127, |
| "loss": 0.8593, |
| "step": 1499 |
| }, |
| { |
| "epoch": 7.8125, |
| "grad_norm": 0.8768670558929443, |
| "learning_rate": 0.0002032977216390199, |
| "loss": 0.8204, |
| "step": 1500 |
| }, |
| { |
| "epoch": 7.8125, |
| "eval_f1_macro": 0.2671045020220497, |
| "eval_loss": 1.0028856992721558, |
| "eval_runtime": 4.9843, |
| "eval_samples_per_second": 614.534, |
| "eval_steps_per_second": 9.63, |
| "step": 1500 |
| }, |
| { |
| "epoch": 7.817708333333333, |
| "grad_norm": 1.333977460861206, |
| "learning_rate": 0.0002031818159427537, |
| "loss": 0.9069, |
| "step": 1501 |
| }, |
| { |
| "epoch": 7.822916666666667, |
| "grad_norm": 1.5186134576797485, |
| "learning_rate": 0.0002030658739163414, |
| "loss": 0.9086, |
| "step": 1502 |
| }, |
| { |
| "epoch": 7.828125, |
| "grad_norm": 1.3570932149887085, |
| "learning_rate": 0.00020294989563898665, |
| "loss": 0.9918, |
| "step": 1503 |
| }, |
| { |
| "epoch": 7.833333333333333, |
| "grad_norm": 0.8266729712486267, |
| "learning_rate": 0.00020283388118991775, |
| "loss": 0.796, |
| "step": 1504 |
| }, |
| { |
| "epoch": 7.838541666666667, |
| "grad_norm": 1.4012207984924316, |
| "learning_rate": 0.0002027178306483878, |
| "loss": 0.8695, |
| "step": 1505 |
| }, |
| { |
| "epoch": 7.84375, |
| "grad_norm": 1.2174034118652344, |
| "learning_rate": 0.00020260174409367446, |
| "loss": 0.8112, |
| "step": 1506 |
| }, |
| { |
| "epoch": 7.848958333333333, |
| "grad_norm": 1.2859429121017456, |
| "learning_rate": 0.0002024856216050801, |
| "loss": 0.8525, |
| "step": 1507 |
| }, |
| { |
| "epoch": 7.854166666666667, |
| "grad_norm": 1.4697388410568237, |
| "learning_rate": 0.00020236946326193162, |
| "loss": 0.8898, |
| "step": 1508 |
| }, |
| { |
| "epoch": 7.859375, |
| "grad_norm": 2.531489610671997, |
| "learning_rate": 0.0002022532691435803, |
| "loss": 1.0225, |
| "step": 1509 |
| }, |
| { |
| "epoch": 7.864583333333333, |
| "grad_norm": 1.312649130821228, |
| "learning_rate": 0.000202137039329402, |
| "loss": 0.7977, |
| "step": 1510 |
| }, |
| { |
| "epoch": 7.869791666666667, |
| "grad_norm": 1.083827018737793, |
| "learning_rate": 0.00020202077389879693, |
| "loss": 0.8848, |
| "step": 1511 |
| }, |
| { |
| "epoch": 7.875, |
| "grad_norm": 0.8703586459159851, |
| "learning_rate": 0.00020190447293118953, |
| "loss": 0.8902, |
| "step": 1512 |
| }, |
| { |
| "epoch": 7.880208333333333, |
| "grad_norm": 0.9000101685523987, |
| "learning_rate": 0.00020178813650602868, |
| "loss": 0.9422, |
| "step": 1513 |
| }, |
| { |
| "epoch": 7.885416666666667, |
| "grad_norm": 1.0305346250534058, |
| "learning_rate": 0.00020167176470278729, |
| "loss": 0.8621, |
| "step": 1514 |
| }, |
| { |
| "epoch": 7.890625, |
| "grad_norm": 1.086289882659912, |
| "learning_rate": 0.00020155535760096264, |
| "loss": 0.9581, |
| "step": 1515 |
| }, |
| { |
| "epoch": 7.895833333333333, |
| "grad_norm": 0.9811847805976868, |
| "learning_rate": 0.000201438915280076, |
| "loss": 0.8656, |
| "step": 1516 |
| }, |
| { |
| "epoch": 7.901041666666667, |
| "grad_norm": 1.3435516357421875, |
| "learning_rate": 0.00020132243781967272, |
| "loss": 0.975, |
| "step": 1517 |
| }, |
| { |
| "epoch": 7.90625, |
| "grad_norm": 1.8136249780654907, |
| "learning_rate": 0.00020120592529932217, |
| "loss": 0.9194, |
| "step": 1518 |
| }, |
| { |
| "epoch": 7.911458333333333, |
| "grad_norm": 1.2509864568710327, |
| "learning_rate": 0.00020108937779861764, |
| "loss": 0.8556, |
| "step": 1519 |
| }, |
| { |
| "epoch": 7.916666666666667, |
| "grad_norm": 1.6279518604278564, |
| "learning_rate": 0.00020097279539717637, |
| "loss": 0.8857, |
| "step": 1520 |
| }, |
| { |
| "epoch": 7.921875, |
| "grad_norm": 1.670060157775879, |
| "learning_rate": 0.00020085617817463934, |
| "loss": 0.9232, |
| "step": 1521 |
| }, |
| { |
| "epoch": 7.927083333333333, |
| "grad_norm": 2.1818840503692627, |
| "learning_rate": 0.0002007395262106715, |
| "loss": 0.9238, |
| "step": 1522 |
| }, |
| { |
| "epoch": 7.932291666666667, |
| "grad_norm": 1.574806809425354, |
| "learning_rate": 0.00020062283958496133, |
| "loss": 0.8495, |
| "step": 1523 |
| }, |
| { |
| "epoch": 7.9375, |
| "grad_norm": 1.4423264265060425, |
| "learning_rate": 0.00020050611837722115, |
| "loss": 0.9907, |
| "step": 1524 |
| }, |
| { |
| "epoch": 7.942708333333333, |
| "grad_norm": 1.541987657546997, |
| "learning_rate": 0.00020038936266718674, |
| "loss": 0.8685, |
| "step": 1525 |
| }, |
| { |
| "epoch": 7.947916666666667, |
| "grad_norm": 1.876416802406311, |
| "learning_rate": 0.00020027257253461763, |
| "loss": 1.0203, |
| "step": 1526 |
| }, |
| { |
| "epoch": 7.953125, |
| "grad_norm": 0.9489941000938416, |
| "learning_rate": 0.00020015574805929673, |
| "loss": 0.9345, |
| "step": 1527 |
| }, |
| { |
| "epoch": 7.958333333333333, |
| "grad_norm": 0.7960556149482727, |
| "learning_rate": 0.00020003888932103048, |
| "loss": 0.8845, |
| "step": 1528 |
| }, |
| { |
| "epoch": 7.963541666666667, |
| "grad_norm": 1.0501737594604492, |
| "learning_rate": 0.00019992199639964867, |
| "loss": 0.8632, |
| "step": 1529 |
| }, |
| { |
| "epoch": 7.96875, |
| "grad_norm": 0.9765360355377197, |
| "learning_rate": 0.00019980506937500458, |
| "loss": 0.9713, |
| "step": 1530 |
| }, |
| { |
| "epoch": 7.973958333333333, |
| "grad_norm": 1.9877150058746338, |
| "learning_rate": 0.00019968810832697454, |
| "loss": 1.0352, |
| "step": 1531 |
| }, |
| { |
| "epoch": 7.979166666666667, |
| "grad_norm": 1.1884766817092896, |
| "learning_rate": 0.00019957111333545837, |
| "loss": 0.8546, |
| "step": 1532 |
| }, |
| { |
| "epoch": 7.984375, |
| "grad_norm": 0.9048582911491394, |
| "learning_rate": 0.0001994540844803789, |
| "loss": 0.8457, |
| "step": 1533 |
| }, |
| { |
| "epoch": 7.989583333333333, |
| "grad_norm": 1.4616566896438599, |
| "learning_rate": 0.00019933702184168222, |
| "loss": 0.8252, |
| "step": 1534 |
| }, |
| { |
| "epoch": 7.994791666666667, |
| "grad_norm": 1.6708563566207886, |
| "learning_rate": 0.0001992199254993374, |
| "loss": 0.8422, |
| "step": 1535 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 1.6911219358444214, |
| "learning_rate": 0.00019910279553333665, |
| "loss": 0.7669, |
| "step": 1536 |
| }, |
| { |
| "epoch": 8.005208333333334, |
| "grad_norm": 1.7075163125991821, |
| "learning_rate": 0.00019898563202369498, |
| "loss": 0.8986, |
| "step": 1537 |
| }, |
| { |
| "epoch": 8.010416666666666, |
| "grad_norm": 2.1228349208831787, |
| "learning_rate": 0.00019886843505045055, |
| "loss": 0.8975, |
| "step": 1538 |
| }, |
| { |
| "epoch": 8.015625, |
| "grad_norm": 2.0353777408599854, |
| "learning_rate": 0.0001987512046936641, |
| "loss": 1.0308, |
| "step": 1539 |
| }, |
| { |
| "epoch": 8.020833333333334, |
| "grad_norm": 1.5351810455322266, |
| "learning_rate": 0.00019863394103341935, |
| "loss": 0.9468, |
| "step": 1540 |
| }, |
| { |
| "epoch": 8.026041666666666, |
| "grad_norm": 1.0597927570343018, |
| "learning_rate": 0.00019851664414982282, |
| "loss": 0.7844, |
| "step": 1541 |
| }, |
| { |
| "epoch": 8.03125, |
| "grad_norm": 1.1958881616592407, |
| "learning_rate": 0.0001983993141230036, |
| "loss": 0.8147, |
| "step": 1542 |
| }, |
| { |
| "epoch": 8.036458333333334, |
| "grad_norm": 1.1445024013519287, |
| "learning_rate": 0.00019828195103311347, |
| "loss": 0.9448, |
| "step": 1543 |
| }, |
| { |
| "epoch": 8.041666666666666, |
| "grad_norm": 0.9409570693969727, |
| "learning_rate": 0.00019816455496032677, |
| "loss": 0.7713, |
| "step": 1544 |
| }, |
| { |
| "epoch": 8.046875, |
| "grad_norm": 1.4979612827301025, |
| "learning_rate": 0.00019804712598484037, |
| "loss": 0.8723, |
| "step": 1545 |
| }, |
| { |
| "epoch": 8.052083333333334, |
| "grad_norm": 1.4379620552062988, |
| "learning_rate": 0.0001979296641868737, |
| "loss": 0.8708, |
| "step": 1546 |
| }, |
| { |
| "epoch": 8.057291666666666, |
| "grad_norm": 1.1967267990112305, |
| "learning_rate": 0.00019781216964666855, |
| "loss": 0.8027, |
| "step": 1547 |
| }, |
| { |
| "epoch": 8.0625, |
| "grad_norm": 1.0533971786499023, |
| "learning_rate": 0.00019769464244448901, |
| "loss": 0.9116, |
| "step": 1548 |
| }, |
| { |
| "epoch": 8.067708333333334, |
| "grad_norm": 1.689709186553955, |
| "learning_rate": 0.00019757708266062167, |
| "loss": 1.0015, |
| "step": 1549 |
| }, |
| { |
| "epoch": 8.072916666666666, |
| "grad_norm": 1.1493604183197021, |
| "learning_rate": 0.00019745949037537516, |
| "loss": 0.8578, |
| "step": 1550 |
| }, |
| { |
| "epoch": 8.078125, |
| "grad_norm": 1.2346526384353638, |
| "learning_rate": 0.00019734186566908047, |
| "loss": 0.848, |
| "step": 1551 |
| }, |
| { |
| "epoch": 8.083333333333334, |
| "grad_norm": 1.5721527338027954, |
| "learning_rate": 0.00019722420862209065, |
| "loss": 0.8518, |
| "step": 1552 |
| }, |
| { |
| "epoch": 8.088541666666666, |
| "grad_norm": 1.1285545825958252, |
| "learning_rate": 0.00019710651931478088, |
| "loss": 0.7855, |
| "step": 1553 |
| }, |
| { |
| "epoch": 8.09375, |
| "grad_norm": 0.8635709881782532, |
| "learning_rate": 0.00019698879782754835, |
| "loss": 0.8361, |
| "step": 1554 |
| }, |
| { |
| "epoch": 8.098958333333334, |
| "grad_norm": 0.9391288161277771, |
| "learning_rate": 0.00019687104424081231, |
| "loss": 0.8432, |
| "step": 1555 |
| }, |
| { |
| "epoch": 8.104166666666666, |
| "grad_norm": 1.8131964206695557, |
| "learning_rate": 0.00019675325863501386, |
| "loss": 0.9466, |
| "step": 1556 |
| }, |
| { |
| "epoch": 8.109375, |
| "grad_norm": 1.376322627067566, |
| "learning_rate": 0.00019663544109061598, |
| "loss": 0.9165, |
| "step": 1557 |
| }, |
| { |
| "epoch": 8.114583333333334, |
| "grad_norm": 1.7109417915344238, |
| "learning_rate": 0.0001965175916881035, |
| "loss": 0.9216, |
| "step": 1558 |
| }, |
| { |
| "epoch": 8.119791666666666, |
| "grad_norm": 2.0198328495025635, |
| "learning_rate": 0.00019639971050798295, |
| "loss": 0.9223, |
| "step": 1559 |
| }, |
| { |
| "epoch": 8.125, |
| "grad_norm": 1.2279762029647827, |
| "learning_rate": 0.00019628179763078268, |
| "loss": 0.8897, |
| "step": 1560 |
| }, |
| { |
| "epoch": 8.130208333333334, |
| "grad_norm": 1.0138179063796997, |
| "learning_rate": 0.00019616385313705266, |
| "loss": 0.8973, |
| "step": 1561 |
| }, |
| { |
| "epoch": 8.135416666666666, |
| "grad_norm": 2.1141295433044434, |
| "learning_rate": 0.0001960458771073643, |
| "loss": 0.8248, |
| "step": 1562 |
| }, |
| { |
| "epoch": 8.140625, |
| "grad_norm": 0.8939463496208191, |
| "learning_rate": 0.00019592786962231083, |
| "loss": 0.9369, |
| "step": 1563 |
| }, |
| { |
| "epoch": 8.145833333333334, |
| "grad_norm": 1.401039958000183, |
| "learning_rate": 0.0001958098307625067, |
| "loss": 0.8646, |
| "step": 1564 |
| }, |
| { |
| "epoch": 8.151041666666666, |
| "grad_norm": 0.9621601104736328, |
| "learning_rate": 0.00019569176060858797, |
| "loss": 0.9407, |
| "step": 1565 |
| }, |
| { |
| "epoch": 8.15625, |
| "grad_norm": 2.1736552715301514, |
| "learning_rate": 0.00019557365924121196, |
| "loss": 0.8678, |
| "step": 1566 |
| }, |
| { |
| "epoch": 8.161458333333334, |
| "grad_norm": 1.1304179430007935, |
| "learning_rate": 0.0001954555267410575, |
| "loss": 0.8028, |
| "step": 1567 |
| }, |
| { |
| "epoch": 8.166666666666666, |
| "grad_norm": 1.3096349239349365, |
| "learning_rate": 0.0001953373631888244, |
| "loss": 0.9294, |
| "step": 1568 |
| }, |
| { |
| "epoch": 8.171875, |
| "grad_norm": 1.4034409523010254, |
| "learning_rate": 0.00019521916866523402, |
| "loss": 0.9736, |
| "step": 1569 |
| }, |
| { |
| "epoch": 8.177083333333334, |
| "grad_norm": 1.661106824874878, |
| "learning_rate": 0.00019510094325102853, |
| "loss": 0.7912, |
| "step": 1570 |
| }, |
| { |
| "epoch": 8.182291666666666, |
| "grad_norm": 2.2189064025878906, |
| "learning_rate": 0.00019498268702697148, |
| "loss": 0.8222, |
| "step": 1571 |
| }, |
| { |
| "epoch": 8.1875, |
| "grad_norm": 1.7093162536621094, |
| "learning_rate": 0.00019486440007384728, |
| "loss": 0.8903, |
| "step": 1572 |
| }, |
| { |
| "epoch": 8.192708333333334, |
| "grad_norm": 1.1843113899230957, |
| "learning_rate": 0.00019474608247246154, |
| "loss": 0.8921, |
| "step": 1573 |
| }, |
| { |
| "epoch": 8.197916666666666, |
| "grad_norm": 1.780982494354248, |
| "learning_rate": 0.00019462773430364055, |
| "loss": 0.852, |
| "step": 1574 |
| }, |
| { |
| "epoch": 8.203125, |
| "grad_norm": 1.6011700630187988, |
| "learning_rate": 0.0001945093556482317, |
| "loss": 0.7595, |
| "step": 1575 |
| }, |
| { |
| "epoch": 8.208333333333334, |
| "grad_norm": 1.6182771921157837, |
| "learning_rate": 0.000194390946587103, |
| "loss": 0.9999, |
| "step": 1576 |
| }, |
| { |
| "epoch": 8.213541666666666, |
| "grad_norm": 1.151809811592102, |
| "learning_rate": 0.00019427250720114352, |
| "loss": 0.8638, |
| "step": 1577 |
| }, |
| { |
| "epoch": 8.21875, |
| "grad_norm": 1.0961347818374634, |
| "learning_rate": 0.00019415403757126274, |
| "loss": 0.9307, |
| "step": 1578 |
| }, |
| { |
| "epoch": 8.223958333333334, |
| "grad_norm": 1.1597023010253906, |
| "learning_rate": 0.00019403553777839095, |
| "loss": 0.8457, |
| "step": 1579 |
| }, |
| { |
| "epoch": 8.229166666666666, |
| "grad_norm": 1.3062262535095215, |
| "learning_rate": 0.00019391700790347905, |
| "loss": 0.8448, |
| "step": 1580 |
| }, |
| { |
| "epoch": 8.234375, |
| "grad_norm": 1.092405080795288, |
| "learning_rate": 0.00019379844802749855, |
| "loss": 0.8544, |
| "step": 1581 |
| }, |
| { |
| "epoch": 8.239583333333334, |
| "grad_norm": 1.1081969738006592, |
| "learning_rate": 0.00019367985823144122, |
| "loss": 0.7947, |
| "step": 1582 |
| }, |
| { |
| "epoch": 8.244791666666666, |
| "grad_norm": 1.033085823059082, |
| "learning_rate": 0.00019356123859631957, |
| "loss": 0.851, |
| "step": 1583 |
| }, |
| { |
| "epoch": 8.25, |
| "grad_norm": 1.009354829788208, |
| "learning_rate": 0.0001934425892031662, |
| "loss": 0.8403, |
| "step": 1584 |
| }, |
| { |
| "epoch": 8.255208333333334, |
| "grad_norm": 1.08565354347229, |
| "learning_rate": 0.0001933239101330343, |
| "loss": 0.9419, |
| "step": 1585 |
| }, |
| { |
| "epoch": 8.260416666666666, |
| "grad_norm": 1.2444456815719604, |
| "learning_rate": 0.00019320520146699717, |
| "loss": 0.8448, |
| "step": 1586 |
| }, |
| { |
| "epoch": 8.265625, |
| "grad_norm": 1.000773310661316, |
| "learning_rate": 0.00019308646328614846, |
| "loss": 0.9441, |
| "step": 1587 |
| }, |
| { |
| "epoch": 8.270833333333334, |
| "grad_norm": 1.8746178150177002, |
| "learning_rate": 0.00019296769567160176, |
| "loss": 0.8003, |
| "step": 1588 |
| }, |
| { |
| "epoch": 8.276041666666666, |
| "grad_norm": 1.0205769538879395, |
| "learning_rate": 0.000192848898704491, |
| "loss": 0.8381, |
| "step": 1589 |
| }, |
| { |
| "epoch": 8.28125, |
| "grad_norm": 1.5520131587982178, |
| "learning_rate": 0.00019273007246597003, |
| "loss": 0.886, |
| "step": 1590 |
| }, |
| { |
| "epoch": 8.286458333333334, |
| "grad_norm": 1.3089935779571533, |
| "learning_rate": 0.00019261121703721283, |
| "loss": 0.8694, |
| "step": 1591 |
| }, |
| { |
| "epoch": 8.291666666666666, |
| "grad_norm": 1.7694345712661743, |
| "learning_rate": 0.00019249233249941304, |
| "loss": 0.865, |
| "step": 1592 |
| }, |
| { |
| "epoch": 8.296875, |
| "grad_norm": 0.9227246642112732, |
| "learning_rate": 0.00019237341893378452, |
| "loss": 0.8495, |
| "step": 1593 |
| }, |
| { |
| "epoch": 8.302083333333334, |
| "grad_norm": 1.5196970701217651, |
| "learning_rate": 0.00019225447642156083, |
| "loss": 0.8693, |
| "step": 1594 |
| }, |
| { |
| "epoch": 8.307291666666666, |
| "grad_norm": 1.0794928073883057, |
| "learning_rate": 0.00019213550504399517, |
| "loss": 0.7752, |
| "step": 1595 |
| }, |
| { |
| "epoch": 8.3125, |
| "grad_norm": 1.0978753566741943, |
| "learning_rate": 0.00019201650488236062, |
| "loss": 0.7135, |
| "step": 1596 |
| }, |
| { |
| "epoch": 8.317708333333334, |
| "grad_norm": 0.8486403226852417, |
| "learning_rate": 0.0001918974760179499, |
| "loss": 0.8411, |
| "step": 1597 |
| }, |
| { |
| "epoch": 8.322916666666666, |
| "grad_norm": 1.3490883111953735, |
| "learning_rate": 0.0001917784185320753, |
| "loss": 0.8847, |
| "step": 1598 |
| }, |
| { |
| "epoch": 8.328125, |
| "grad_norm": 1.1354326009750366, |
| "learning_rate": 0.00019165933250606866, |
| "loss": 0.8751, |
| "step": 1599 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 1.4168822765350342, |
| "learning_rate": 0.0001915402180212814, |
| "loss": 0.85, |
| "step": 1600 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "eval_f1_macro": 0.2573556773696306, |
| "eval_loss": 1.0050162076950073, |
| "eval_runtime": 4.9523, |
| "eval_samples_per_second": 618.498, |
| "eval_steps_per_second": 9.692, |
| "step": 1600 |
| }, |
| { |
| "epoch": 8.338541666666666, |
| "grad_norm": 1.3757166862487793, |
| "learning_rate": 0.00019142107515908422, |
| "loss": 0.8905, |
| "step": 1601 |
| }, |
| { |
| "epoch": 8.34375, |
| "grad_norm": 1.3433445692062378, |
| "learning_rate": 0.0001913019040008674, |
| "loss": 0.8651, |
| "step": 1602 |
| }, |
| { |
| "epoch": 8.348958333333334, |
| "grad_norm": 1.0341925621032715, |
| "learning_rate": 0.00019118270462804035, |
| "loss": 0.862, |
| "step": 1603 |
| }, |
| { |
| "epoch": 8.354166666666666, |
| "grad_norm": 1.1494280099868774, |
| "learning_rate": 0.0001910634771220319, |
| "loss": 0.9027, |
| "step": 1604 |
| }, |
| { |
| "epoch": 8.359375, |
| "grad_norm": 1.7477883100509644, |
| "learning_rate": 0.00019094422156429005, |
| "loss": 0.8248, |
| "step": 1605 |
| }, |
| { |
| "epoch": 8.364583333333334, |
| "grad_norm": 1.6592377424240112, |
| "learning_rate": 0.00019082493803628203, |
| "loss": 0.8216, |
| "step": 1606 |
| }, |
| { |
| "epoch": 8.369791666666666, |
| "grad_norm": 1.910586953163147, |
| "learning_rate": 0.000190705626619494, |
| "loss": 0.8666, |
| "step": 1607 |
| }, |
| { |
| "epoch": 8.375, |
| "grad_norm": 1.2351220846176147, |
| "learning_rate": 0.00019058628739543134, |
| "loss": 0.9608, |
| "step": 1608 |
| }, |
| { |
| "epoch": 8.380208333333334, |
| "grad_norm": 1.4847471714019775, |
| "learning_rate": 0.00019046692044561838, |
| "loss": 0.9541, |
| "step": 1609 |
| }, |
| { |
| "epoch": 8.385416666666666, |
| "grad_norm": 1.34902822971344, |
| "learning_rate": 0.00019034752585159838, |
| "loss": 0.9831, |
| "step": 1610 |
| }, |
| { |
| "epoch": 8.390625, |
| "grad_norm": 1.1088131666183472, |
| "learning_rate": 0.00019022810369493344, |
| "loss": 0.9015, |
| "step": 1611 |
| }, |
| { |
| "epoch": 8.395833333333334, |
| "grad_norm": 1.0606215000152588, |
| "learning_rate": 0.0001901086540572046, |
| "loss": 0.8169, |
| "step": 1612 |
| }, |
| { |
| "epoch": 8.401041666666666, |
| "grad_norm": 2.081139087677002, |
| "learning_rate": 0.0001899891770200116, |
| "loss": 0.8263, |
| "step": 1613 |
| }, |
| { |
| "epoch": 8.40625, |
| "grad_norm": 1.2573264837265015, |
| "learning_rate": 0.0001898696726649729, |
| "loss": 0.8432, |
| "step": 1614 |
| }, |
| { |
| "epoch": 8.411458333333334, |
| "grad_norm": 1.4664214849472046, |
| "learning_rate": 0.00018975014107372557, |
| "loss": 0.8251, |
| "step": 1615 |
| }, |
| { |
| "epoch": 8.416666666666666, |
| "grad_norm": 1.2317558526992798, |
| "learning_rate": 0.00018963058232792542, |
| "loss": 0.8865, |
| "step": 1616 |
| }, |
| { |
| "epoch": 8.421875, |
| "grad_norm": 1.1491007804870605, |
| "learning_rate": 0.00018951099650924664, |
| "loss": 0.8923, |
| "step": 1617 |
| }, |
| { |
| "epoch": 8.427083333333334, |
| "grad_norm": 1.1793814897537231, |
| "learning_rate": 0.00018939138369938215, |
| "loss": 0.8741, |
| "step": 1618 |
| }, |
| { |
| "epoch": 8.432291666666666, |
| "grad_norm": 1.700282335281372, |
| "learning_rate": 0.00018927174398004304, |
| "loss": 0.7696, |
| "step": 1619 |
| }, |
| { |
| "epoch": 8.4375, |
| "grad_norm": 1.7441513538360596, |
| "learning_rate": 0.00018915207743295896, |
| "loss": 0.8602, |
| "step": 1620 |
| }, |
| { |
| "epoch": 8.442708333333334, |
| "grad_norm": 1.3243112564086914, |
| "learning_rate": 0.00018903238413987781, |
| "loss": 0.7544, |
| "step": 1621 |
| }, |
| { |
| "epoch": 8.447916666666666, |
| "grad_norm": 2.235153913497925, |
| "learning_rate": 0.00018891266418256586, |
| "loss": 0.9348, |
| "step": 1622 |
| }, |
| { |
| "epoch": 8.453125, |
| "grad_norm": 1.850517988204956, |
| "learning_rate": 0.0001887929176428074, |
| "loss": 0.8175, |
| "step": 1623 |
| }, |
| { |
| "epoch": 8.458333333333334, |
| "grad_norm": 1.178423523902893, |
| "learning_rate": 0.0001886731446024051, |
| "loss": 0.932, |
| "step": 1624 |
| }, |
| { |
| "epoch": 8.463541666666666, |
| "grad_norm": 1.3114804029464722, |
| "learning_rate": 0.0001885533451431796, |
| "loss": 0.7823, |
| "step": 1625 |
| }, |
| { |
| "epoch": 8.46875, |
| "grad_norm": 1.0685346126556396, |
| "learning_rate": 0.00018843351934696962, |
| "loss": 0.8454, |
| "step": 1626 |
| }, |
| { |
| "epoch": 8.473958333333334, |
| "grad_norm": 1.4516304731369019, |
| "learning_rate": 0.0001883136672956319, |
| "loss": 0.8853, |
| "step": 1627 |
| }, |
| { |
| "epoch": 8.479166666666666, |
| "grad_norm": 1.3181425333023071, |
| "learning_rate": 0.00018819378907104106, |
| "loss": 0.9031, |
| "step": 1628 |
| }, |
| { |
| "epoch": 8.484375, |
| "grad_norm": 2.178090810775757, |
| "learning_rate": 0.00018807388475508964, |
| "loss": 1.0161, |
| "step": 1629 |
| }, |
| { |
| "epoch": 8.489583333333334, |
| "grad_norm": 2.6315276622772217, |
| "learning_rate": 0.00018795395442968796, |
| "loss": 0.8516, |
| "step": 1630 |
| }, |
| { |
| "epoch": 8.494791666666666, |
| "grad_norm": 1.0097579956054688, |
| "learning_rate": 0.00018783399817676421, |
| "loss": 0.9679, |
| "step": 1631 |
| }, |
| { |
| "epoch": 8.5, |
| "grad_norm": 1.837315320968628, |
| "learning_rate": 0.00018771401607826414, |
| "loss": 0.933, |
| "step": 1632 |
| }, |
| { |
| "epoch": 8.505208333333334, |
| "grad_norm": 1.2252494096755981, |
| "learning_rate": 0.0001875940082161513, |
| "loss": 0.9469, |
| "step": 1633 |
| }, |
| { |
| "epoch": 8.510416666666666, |
| "grad_norm": 1.161903738975525, |
| "learning_rate": 0.0001874739746724068, |
| "loss": 0.8614, |
| "step": 1634 |
| }, |
| { |
| "epoch": 8.515625, |
| "grad_norm": 1.968441128730774, |
| "learning_rate": 0.00018735391552902918, |
| "loss": 0.8952, |
| "step": 1635 |
| }, |
| { |
| "epoch": 8.520833333333334, |
| "grad_norm": 1.0104209184646606, |
| "learning_rate": 0.00018723383086803464, |
| "loss": 0.9189, |
| "step": 1636 |
| }, |
| { |
| "epoch": 8.526041666666666, |
| "grad_norm": 1.2697433233261108, |
| "learning_rate": 0.0001871137207714567, |
| "loss": 1.001, |
| "step": 1637 |
| }, |
| { |
| "epoch": 8.53125, |
| "grad_norm": 1.207429051399231, |
| "learning_rate": 0.00018699358532134629, |
| "loss": 0.9628, |
| "step": 1638 |
| }, |
| { |
| "epoch": 8.536458333333334, |
| "grad_norm": 1.7252628803253174, |
| "learning_rate": 0.00018687342459977168, |
| "loss": 0.8368, |
| "step": 1639 |
| }, |
| { |
| "epoch": 8.541666666666666, |
| "grad_norm": 2.353644847869873, |
| "learning_rate": 0.00018675323868881834, |
| "loss": 0.8451, |
| "step": 1640 |
| }, |
| { |
| "epoch": 8.546875, |
| "grad_norm": 1.3986711502075195, |
| "learning_rate": 0.00018663302767058903, |
| "loss": 0.8657, |
| "step": 1641 |
| }, |
| { |
| "epoch": 8.552083333333334, |
| "grad_norm": 1.5077753067016602, |
| "learning_rate": 0.00018651279162720357, |
| "loss": 0.9385, |
| "step": 1642 |
| }, |
| { |
| "epoch": 8.557291666666666, |
| "grad_norm": 1.5071702003479004, |
| "learning_rate": 0.00018639253064079895, |
| "loss": 0.8233, |
| "step": 1643 |
| }, |
| { |
| "epoch": 8.5625, |
| "grad_norm": 1.439050555229187, |
| "learning_rate": 0.00018627224479352922, |
| "loss": 0.9344, |
| "step": 1644 |
| }, |
| { |
| "epoch": 8.567708333333334, |
| "grad_norm": 1.0051567554473877, |
| "learning_rate": 0.00018615193416756534, |
| "loss": 0.9101, |
| "step": 1645 |
| }, |
| { |
| "epoch": 8.572916666666666, |
| "grad_norm": 1.276497721672058, |
| "learning_rate": 0.0001860315988450952, |
| "loss": 0.8264, |
| "step": 1646 |
| }, |
| { |
| "epoch": 8.578125, |
| "grad_norm": 1.2876343727111816, |
| "learning_rate": 0.00018591123890832366, |
| "loss": 1.0334, |
| "step": 1647 |
| }, |
| { |
| "epoch": 8.583333333333334, |
| "grad_norm": 1.343522548675537, |
| "learning_rate": 0.00018579085443947221, |
| "loss": 0.8495, |
| "step": 1648 |
| }, |
| { |
| "epoch": 8.588541666666666, |
| "grad_norm": 1.131576418876648, |
| "learning_rate": 0.0001856704455207793, |
| "loss": 0.8003, |
| "step": 1649 |
| }, |
| { |
| "epoch": 8.59375, |
| "grad_norm": 1.6502807140350342, |
| "learning_rate": 0.00018555001223449988, |
| "loss": 0.7636, |
| "step": 1650 |
| }, |
| { |
| "epoch": 8.598958333333334, |
| "grad_norm": 2.0885956287384033, |
| "learning_rate": 0.00018542955466290583, |
| "loss": 0.9877, |
| "step": 1651 |
| }, |
| { |
| "epoch": 8.604166666666666, |
| "grad_norm": 1.4774620532989502, |
| "learning_rate": 0.00018530907288828532, |
| "loss": 0.7481, |
| "step": 1652 |
| }, |
| { |
| "epoch": 8.609375, |
| "grad_norm": 1.243220567703247, |
| "learning_rate": 0.00018518856699294327, |
| "loss": 0.8312, |
| "step": 1653 |
| }, |
| { |
| "epoch": 8.614583333333334, |
| "grad_norm": 1.2918946743011475, |
| "learning_rate": 0.00018506803705920088, |
| "loss": 0.8337, |
| "step": 1654 |
| }, |
| { |
| "epoch": 8.619791666666666, |
| "grad_norm": 1.2461190223693848, |
| "learning_rate": 0.00018494748316939602, |
| "loss": 0.9495, |
| "step": 1655 |
| }, |
| { |
| "epoch": 8.625, |
| "grad_norm": 1.1804251670837402, |
| "learning_rate": 0.0001848269054058826, |
| "loss": 0.8274, |
| "step": 1656 |
| }, |
| { |
| "epoch": 8.630208333333334, |
| "grad_norm": 1.7044153213500977, |
| "learning_rate": 0.0001847063038510312, |
| "loss": 0.9257, |
| "step": 1657 |
| }, |
| { |
| "epoch": 8.635416666666666, |
| "grad_norm": 1.3898588418960571, |
| "learning_rate": 0.00018458567858722842, |
| "loss": 0.8639, |
| "step": 1658 |
| }, |
| { |
| "epoch": 8.640625, |
| "grad_norm": 1.1398965120315552, |
| "learning_rate": 0.00018446502969687712, |
| "loss": 0.8427, |
| "step": 1659 |
| }, |
| { |
| "epoch": 8.645833333333334, |
| "grad_norm": 1.221265435218811, |
| "learning_rate": 0.00018434435726239626, |
| "loss": 0.8091, |
| "step": 1660 |
| }, |
| { |
| "epoch": 8.651041666666666, |
| "grad_norm": 1.1786211729049683, |
| "learning_rate": 0.00018422366136622095, |
| "loss": 0.865, |
| "step": 1661 |
| }, |
| { |
| "epoch": 8.65625, |
| "grad_norm": 1.2030609846115112, |
| "learning_rate": 0.00018410294209080227, |
| "loss": 0.9024, |
| "step": 1662 |
| }, |
| { |
| "epoch": 8.661458333333334, |
| "grad_norm": 1.934684157371521, |
| "learning_rate": 0.00018398219951860735, |
| "loss": 0.9429, |
| "step": 1663 |
| }, |
| { |
| "epoch": 8.666666666666666, |
| "grad_norm": 1.727907657623291, |
| "learning_rate": 0.0001838614337321191, |
| "loss": 0.8645, |
| "step": 1664 |
| }, |
| { |
| "epoch": 8.671875, |
| "grad_norm": 1.9125276803970337, |
| "learning_rate": 0.0001837406448138365, |
| "loss": 0.824, |
| "step": 1665 |
| }, |
| { |
| "epoch": 8.677083333333334, |
| "grad_norm": 1.4167234897613525, |
| "learning_rate": 0.00018361983284627406, |
| "loss": 0.8661, |
| "step": 1666 |
| }, |
| { |
| "epoch": 8.682291666666666, |
| "grad_norm": 1.499733567237854, |
| "learning_rate": 0.00018349899791196228, |
| "loss": 0.877, |
| "step": 1667 |
| }, |
| { |
| "epoch": 8.6875, |
| "grad_norm": 1.6418204307556152, |
| "learning_rate": 0.00018337814009344714, |
| "loss": 0.9638, |
| "step": 1668 |
| }, |
| { |
| "epoch": 8.692708333333334, |
| "grad_norm": 1.8527815341949463, |
| "learning_rate": 0.00018325725947329044, |
| "loss": 0.8483, |
| "step": 1669 |
| }, |
| { |
| "epoch": 8.697916666666666, |
| "grad_norm": 1.3495299816131592, |
| "learning_rate": 0.00018313635613406953, |
| "loss": 0.8521, |
| "step": 1670 |
| }, |
| { |
| "epoch": 8.703125, |
| "grad_norm": 1.5147218704223633, |
| "learning_rate": 0.0001830154301583771, |
| "loss": 0.9628, |
| "step": 1671 |
| }, |
| { |
| "epoch": 8.708333333333334, |
| "grad_norm": 0.8391832709312439, |
| "learning_rate": 0.00018289448162882147, |
| "loss": 0.9343, |
| "step": 1672 |
| }, |
| { |
| "epoch": 8.713541666666666, |
| "grad_norm": 1.4423229694366455, |
| "learning_rate": 0.00018277351062802641, |
| "loss": 0.9363, |
| "step": 1673 |
| }, |
| { |
| "epoch": 8.71875, |
| "grad_norm": 1.0093936920166016, |
| "learning_rate": 0.00018265251723863086, |
| "loss": 0.843, |
| "step": 1674 |
| }, |
| { |
| "epoch": 8.723958333333334, |
| "grad_norm": 1.3527741432189941, |
| "learning_rate": 0.00018253150154328918, |
| "loss": 0.9889, |
| "step": 1675 |
| }, |
| { |
| "epoch": 8.729166666666666, |
| "grad_norm": 1.2604354619979858, |
| "learning_rate": 0.000182410463624671, |
| "loss": 0.9267, |
| "step": 1676 |
| }, |
| { |
| "epoch": 8.734375, |
| "grad_norm": 1.2228010892868042, |
| "learning_rate": 0.00018228940356546097, |
| "loss": 0.8161, |
| "step": 1677 |
| }, |
| { |
| "epoch": 8.739583333333334, |
| "grad_norm": 1.4455665349960327, |
| "learning_rate": 0.00018216832144835905, |
| "loss": 0.9664, |
| "step": 1678 |
| }, |
| { |
| "epoch": 8.744791666666666, |
| "grad_norm": 1.0850530862808228, |
| "learning_rate": 0.0001820472173560801, |
| "loss": 0.8899, |
| "step": 1679 |
| }, |
| { |
| "epoch": 8.75, |
| "grad_norm": 1.7145651578903198, |
| "learning_rate": 0.0001819260913713542, |
| "loss": 0.9656, |
| "step": 1680 |
| }, |
| { |
| "epoch": 8.755208333333334, |
| "grad_norm": 1.3947546482086182, |
| "learning_rate": 0.00018180494357692613, |
| "loss": 0.9753, |
| "step": 1681 |
| }, |
| { |
| "epoch": 8.760416666666666, |
| "grad_norm": 2.7188243865966797, |
| "learning_rate": 0.0001816837740555558, |
| "loss": 0.8898, |
| "step": 1682 |
| }, |
| { |
| "epoch": 8.765625, |
| "grad_norm": 1.5973002910614014, |
| "learning_rate": 0.0001815625828900178, |
| "loss": 0.895, |
| "step": 1683 |
| }, |
| { |
| "epoch": 8.770833333333334, |
| "grad_norm": 1.4160178899765015, |
| "learning_rate": 0.00018144137016310163, |
| "loss": 0.9565, |
| "step": 1684 |
| }, |
| { |
| "epoch": 8.776041666666666, |
| "grad_norm": 1.7953330278396606, |
| "learning_rate": 0.0001813201359576114, |
| "loss": 0.9008, |
| "step": 1685 |
| }, |
| { |
| "epoch": 8.78125, |
| "grad_norm": 1.2504719495773315, |
| "learning_rate": 0.00018119888035636605, |
| "loss": 0.9783, |
| "step": 1686 |
| }, |
| { |
| "epoch": 8.786458333333334, |
| "grad_norm": 1.8528499603271484, |
| "learning_rate": 0.00018107760344219888, |
| "loss": 0.8897, |
| "step": 1687 |
| }, |
| { |
| "epoch": 8.791666666666666, |
| "grad_norm": 1.4942772388458252, |
| "learning_rate": 0.000180956305297958, |
| "loss": 0.9421, |
| "step": 1688 |
| }, |
| { |
| "epoch": 8.796875, |
| "grad_norm": 1.3330466747283936, |
| "learning_rate": 0.00018083498600650596, |
| "loss": 0.8779, |
| "step": 1689 |
| }, |
| { |
| "epoch": 8.802083333333334, |
| "grad_norm": 1.351995587348938, |
| "learning_rate": 0.00018071364565071967, |
| "loss": 0.8872, |
| "step": 1690 |
| }, |
| { |
| "epoch": 8.807291666666666, |
| "grad_norm": 0.9744947552680969, |
| "learning_rate": 0.0001805922843134905, |
| "loss": 0.8418, |
| "step": 1691 |
| }, |
| { |
| "epoch": 8.8125, |
| "grad_norm": 1.3473825454711914, |
| "learning_rate": 0.00018047090207772422, |
| "loss": 0.825, |
| "step": 1692 |
| }, |
| { |
| "epoch": 8.817708333333334, |
| "grad_norm": 1.2680867910385132, |
| "learning_rate": 0.00018034949902634065, |
| "loss": 0.7721, |
| "step": 1693 |
| }, |
| { |
| "epoch": 8.822916666666666, |
| "grad_norm": 1.686719536781311, |
| "learning_rate": 0.00018022807524227408, |
| "loss": 0.9616, |
| "step": 1694 |
| }, |
| { |
| "epoch": 8.828125, |
| "grad_norm": 2.3633172512054443, |
| "learning_rate": 0.00018010663080847276, |
| "loss": 0.9817, |
| "step": 1695 |
| }, |
| { |
| "epoch": 8.833333333333334, |
| "grad_norm": 1.2376353740692139, |
| "learning_rate": 0.00017998516580789928, |
| "loss": 0.8461, |
| "step": 1696 |
| }, |
| { |
| "epoch": 8.838541666666666, |
| "grad_norm": 1.4113950729370117, |
| "learning_rate": 0.00017986368032353006, |
| "loss": 0.864, |
| "step": 1697 |
| }, |
| { |
| "epoch": 8.84375, |
| "grad_norm": 1.1325446367263794, |
| "learning_rate": 0.00017974217443835562, |
| "loss": 0.9549, |
| "step": 1698 |
| }, |
| { |
| "epoch": 8.848958333333334, |
| "grad_norm": 1.4927231073379517, |
| "learning_rate": 0.00017962064823538035, |
| "loss": 1.0023, |
| "step": 1699 |
| }, |
| { |
| "epoch": 8.854166666666666, |
| "grad_norm": 2.522202491760254, |
| "learning_rate": 0.0001794991017976227, |
| "loss": 0.871, |
| "step": 1700 |
| }, |
| { |
| "epoch": 8.854166666666666, |
| "eval_f1_macro": 0.25328854896355385, |
| "eval_loss": 1.013994574546814, |
| "eval_runtime": 5.0037, |
| "eval_samples_per_second": 612.15, |
| "eval_steps_per_second": 9.593, |
| "step": 1700 |
| }, |
| { |
| "epoch": 8.859375, |
| "grad_norm": 1.055760145187378, |
| "learning_rate": 0.00017937753520811458, |
| "loss": 0.8, |
| "step": 1701 |
| }, |
| { |
| "epoch": 8.864583333333334, |
| "grad_norm": 1.2828651666641235, |
| "learning_rate": 0.0001792559485499021, |
| "loss": 0.8223, |
| "step": 1702 |
| }, |
| { |
| "epoch": 8.869791666666666, |
| "grad_norm": 1.3935633897781372, |
| "learning_rate": 0.0001791343419060448, |
| "loss": 0.8789, |
| "step": 1703 |
| }, |
| { |
| "epoch": 8.875, |
| "grad_norm": 2.1194071769714355, |
| "learning_rate": 0.00017901271535961593, |
| "loss": 0.9469, |
| "step": 1704 |
| }, |
| { |
| "epoch": 8.880208333333334, |
| "grad_norm": 1.864885926246643, |
| "learning_rate": 0.00017889106899370236, |
| "loss": 0.8724, |
| "step": 1705 |
| }, |
| { |
| "epoch": 8.885416666666666, |
| "grad_norm": 1.5563757419586182, |
| "learning_rate": 0.00017876940289140452, |
| "loss": 0.931, |
| "step": 1706 |
| }, |
| { |
| "epoch": 8.890625, |
| "grad_norm": 1.17184317111969, |
| "learning_rate": 0.00017864771713583624, |
| "loss": 0.9989, |
| "step": 1707 |
| }, |
| { |
| "epoch": 8.895833333333334, |
| "grad_norm": 2.1553313732147217, |
| "learning_rate": 0.00017852601181012485, |
| "loss": 0.9717, |
| "step": 1708 |
| }, |
| { |
| "epoch": 8.901041666666666, |
| "grad_norm": 1.2322522401809692, |
| "learning_rate": 0.00017840428699741106, |
| "loss": 0.9509, |
| "step": 1709 |
| }, |
| { |
| "epoch": 8.90625, |
| "grad_norm": 1.189863681793213, |
| "learning_rate": 0.00017828254278084887, |
| "loss": 0.8962, |
| "step": 1710 |
| }, |
| { |
| "epoch": 8.911458333333334, |
| "grad_norm": 1.2887623310089111, |
| "learning_rate": 0.00017816077924360545, |
| "loss": 0.917, |
| "step": 1711 |
| }, |
| { |
| "epoch": 8.916666666666666, |
| "grad_norm": 2.2879159450531006, |
| "learning_rate": 0.0001780389964688613, |
| "loss": 0.8585, |
| "step": 1712 |
| }, |
| { |
| "epoch": 8.921875, |
| "grad_norm": 1.058251142501831, |
| "learning_rate": 0.00017791719453981003, |
| "loss": 0.8189, |
| "step": 1713 |
| }, |
| { |
| "epoch": 8.927083333333334, |
| "grad_norm": 2.301994562149048, |
| "learning_rate": 0.0001777953735396582, |
| "loss": 0.8454, |
| "step": 1714 |
| }, |
| { |
| "epoch": 8.932291666666666, |
| "grad_norm": 1.592377781867981, |
| "learning_rate": 0.00017767353355162564, |
| "loss": 0.8744, |
| "step": 1715 |
| }, |
| { |
| "epoch": 8.9375, |
| "grad_norm": 1.311887264251709, |
| "learning_rate": 0.00017755167465894497, |
| "loss": 0.9902, |
| "step": 1716 |
| }, |
| { |
| "epoch": 8.942708333333334, |
| "grad_norm": 2.1754603385925293, |
| "learning_rate": 0.00017742979694486174, |
| "loss": 0.9609, |
| "step": 1717 |
| }, |
| { |
| "epoch": 8.947916666666666, |
| "grad_norm": 1.1010438203811646, |
| "learning_rate": 0.00017730790049263445, |
| "loss": 0.9336, |
| "step": 1718 |
| }, |
| { |
| "epoch": 8.953125, |
| "grad_norm": 1.9108693599700928, |
| "learning_rate": 0.00017718598538553432, |
| "loss": 0.8601, |
| "step": 1719 |
| }, |
| { |
| "epoch": 8.958333333333334, |
| "grad_norm": 1.5406994819641113, |
| "learning_rate": 0.00017706405170684535, |
| "loss": 0.8294, |
| "step": 1720 |
| }, |
| { |
| "epoch": 8.963541666666666, |
| "grad_norm": 1.6857560873031616, |
| "learning_rate": 0.00017694209953986415, |
| "loss": 0.9005, |
| "step": 1721 |
| }, |
| { |
| "epoch": 8.96875, |
| "grad_norm": 2.8644046783447266, |
| "learning_rate": 0.00017682012896790006, |
| "loss": 0.845, |
| "step": 1722 |
| }, |
| { |
| "epoch": 8.973958333333334, |
| "grad_norm": 2.3307111263275146, |
| "learning_rate": 0.000176698140074275, |
| "loss": 0.9471, |
| "step": 1723 |
| }, |
| { |
| "epoch": 8.979166666666666, |
| "grad_norm": 1.7393910884857178, |
| "learning_rate": 0.00017657613294232334, |
| "loss": 0.9705, |
| "step": 1724 |
| }, |
| { |
| "epoch": 8.984375, |
| "grad_norm": 1.2329258918762207, |
| "learning_rate": 0.00017645410765539194, |
| "loss": 0.8819, |
| "step": 1725 |
| }, |
| { |
| "epoch": 8.989583333333334, |
| "grad_norm": 1.442914366722107, |
| "learning_rate": 0.00017633206429683998, |
| "loss": 0.8937, |
| "step": 1726 |
| }, |
| { |
| "epoch": 8.994791666666666, |
| "grad_norm": 1.211586833000183, |
| "learning_rate": 0.0001762100029500391, |
| "loss": 1.0405, |
| "step": 1727 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 1.484076976776123, |
| "learning_rate": 0.00017608792369837325, |
| "loss": 0.7441, |
| "step": 1728 |
| }, |
| { |
| "epoch": 9.005208333333334, |
| "grad_norm": 1.4238801002502441, |
| "learning_rate": 0.00017596582662523851, |
| "loss": 0.8863, |
| "step": 1729 |
| }, |
| { |
| "epoch": 9.010416666666666, |
| "grad_norm": 1.1415566205978394, |
| "learning_rate": 0.00017584371181404314, |
| "loss": 0.8313, |
| "step": 1730 |
| }, |
| { |
| "epoch": 9.015625, |
| "grad_norm": 0.9945783615112305, |
| "learning_rate": 0.00017572157934820758, |
| "loss": 0.8668, |
| "step": 1731 |
| }, |
| { |
| "epoch": 9.020833333333334, |
| "grad_norm": 1.2672406435012817, |
| "learning_rate": 0.0001755994293111643, |
| "loss": 0.9184, |
| "step": 1732 |
| }, |
| { |
| "epoch": 9.026041666666666, |
| "grad_norm": 1.0518169403076172, |
| "learning_rate": 0.00017547726178635776, |
| "loss": 0.9789, |
| "step": 1733 |
| }, |
| { |
| "epoch": 9.03125, |
| "grad_norm": 1.7924593687057495, |
| "learning_rate": 0.00017535507685724437, |
| "loss": 0.8418, |
| "step": 1734 |
| }, |
| { |
| "epoch": 9.036458333333334, |
| "grad_norm": 1.2205973863601685, |
| "learning_rate": 0.0001752328746072925, |
| "loss": 0.9068, |
| "step": 1735 |
| }, |
| { |
| "epoch": 9.041666666666666, |
| "grad_norm": 2.5946874618530273, |
| "learning_rate": 0.00017511065511998224, |
| "loss": 0.9082, |
| "step": 1736 |
| }, |
| { |
| "epoch": 9.046875, |
| "grad_norm": 1.8363667726516724, |
| "learning_rate": 0.00017498841847880556, |
| "loss": 0.8379, |
| "step": 1737 |
| }, |
| { |
| "epoch": 9.052083333333334, |
| "grad_norm": 2.259819746017456, |
| "learning_rate": 0.00017486616476726602, |
| "loss": 0.7696, |
| "step": 1738 |
| }, |
| { |
| "epoch": 9.057291666666666, |
| "grad_norm": 0.8072317838668823, |
| "learning_rate": 0.00017474389406887898, |
| "loss": 0.795, |
| "step": 1739 |
| }, |
| { |
| "epoch": 9.0625, |
| "grad_norm": 1.1685822010040283, |
| "learning_rate": 0.00017462160646717124, |
| "loss": 0.8804, |
| "step": 1740 |
| }, |
| { |
| "epoch": 9.067708333333334, |
| "grad_norm": 0.9135856032371521, |
| "learning_rate": 0.0001744993020456814, |
| "loss": 0.8776, |
| "step": 1741 |
| }, |
| { |
| "epoch": 9.072916666666666, |
| "grad_norm": 1.2591679096221924, |
| "learning_rate": 0.00017437698088795933, |
| "loss": 0.965, |
| "step": 1742 |
| }, |
| { |
| "epoch": 9.078125, |
| "grad_norm": 1.0872520208358765, |
| "learning_rate": 0.0001742546430775664, |
| "loss": 0.849, |
| "step": 1743 |
| }, |
| { |
| "epoch": 9.083333333333334, |
| "grad_norm": 0.9992260932922363, |
| "learning_rate": 0.00017413228869807534, |
| "loss": 0.901, |
| "step": 1744 |
| }, |
| { |
| "epoch": 9.088541666666666, |
| "grad_norm": 0.8910778164863586, |
| "learning_rate": 0.00017400991783307027, |
| "loss": 0.8458, |
| "step": 1745 |
| }, |
| { |
| "epoch": 9.09375, |
| "grad_norm": 1.5813016891479492, |
| "learning_rate": 0.00017388753056614642, |
| "loss": 1.0358, |
| "step": 1746 |
| }, |
| { |
| "epoch": 9.098958333333334, |
| "grad_norm": 1.0092264413833618, |
| "learning_rate": 0.00017376512698091045, |
| "loss": 0.8389, |
| "step": 1747 |
| }, |
| { |
| "epoch": 9.104166666666666, |
| "grad_norm": 1.9503432512283325, |
| "learning_rate": 0.00017364270716097992, |
| "loss": 0.8866, |
| "step": 1748 |
| }, |
| { |
| "epoch": 9.109375, |
| "grad_norm": 1.077916145324707, |
| "learning_rate": 0.00017352027118998372, |
| "loss": 0.9051, |
| "step": 1749 |
| }, |
| { |
| "epoch": 9.114583333333334, |
| "grad_norm": 0.9882513284683228, |
| "learning_rate": 0.00017339781915156157, |
| "loss": 0.8242, |
| "step": 1750 |
| }, |
| { |
| "epoch": 9.119791666666666, |
| "grad_norm": 0.7993467450141907, |
| "learning_rate": 0.00017327535112936432, |
| "loss": 0.7621, |
| "step": 1751 |
| }, |
| { |
| "epoch": 9.125, |
| "grad_norm": 0.8911805748939514, |
| "learning_rate": 0.00017315286720705358, |
| "loss": 0.8959, |
| "step": 1752 |
| }, |
| { |
| "epoch": 9.130208333333334, |
| "grad_norm": 1.3977391719818115, |
| "learning_rate": 0.00017303036746830198, |
| "loss": 0.8646, |
| "step": 1753 |
| }, |
| { |
| "epoch": 9.135416666666666, |
| "grad_norm": 1.3632495403289795, |
| "learning_rate": 0.00017290785199679286, |
| "loss": 0.9947, |
| "step": 1754 |
| }, |
| { |
| "epoch": 9.140625, |
| "grad_norm": 1.1790826320648193, |
| "learning_rate": 0.00017278532087622032, |
| "loss": 0.9303, |
| "step": 1755 |
| }, |
| { |
| "epoch": 9.145833333333334, |
| "grad_norm": 1.1971368789672852, |
| "learning_rate": 0.0001726627741902892, |
| "loss": 0.7645, |
| "step": 1756 |
| }, |
| { |
| "epoch": 9.151041666666666, |
| "grad_norm": 1.2460052967071533, |
| "learning_rate": 0.00017254021202271495, |
| "loss": 0.7615, |
| "step": 1757 |
| }, |
| { |
| "epoch": 9.15625, |
| "grad_norm": 0.8298244476318359, |
| "learning_rate": 0.0001724176344572235, |
| "loss": 0.9349, |
| "step": 1758 |
| }, |
| { |
| "epoch": 9.161458333333334, |
| "grad_norm": 0.9905279278755188, |
| "learning_rate": 0.0001722950415775515, |
| "loss": 0.8843, |
| "step": 1759 |
| }, |
| { |
| "epoch": 9.166666666666666, |
| "grad_norm": 1.3429685831069946, |
| "learning_rate": 0.0001721724334674459, |
| "loss": 0.9138, |
| "step": 1760 |
| }, |
| { |
| "epoch": 9.171875, |
| "grad_norm": 1.1189883947372437, |
| "learning_rate": 0.00017204981021066401, |
| "loss": 0.7665, |
| "step": 1761 |
| }, |
| { |
| "epoch": 9.177083333333334, |
| "grad_norm": 0.9278711676597595, |
| "learning_rate": 0.0001719271718909737, |
| "loss": 0.8787, |
| "step": 1762 |
| }, |
| { |
| "epoch": 9.182291666666666, |
| "grad_norm": 1.0220022201538086, |
| "learning_rate": 0.0001718045185921529, |
| "loss": 0.7685, |
| "step": 1763 |
| }, |
| { |
| "epoch": 9.1875, |
| "grad_norm": 1.8079322576522827, |
| "learning_rate": 0.00017168185039799, |
| "loss": 0.8952, |
| "step": 1764 |
| }, |
| { |
| "epoch": 9.192708333333334, |
| "grad_norm": 1.547468900680542, |
| "learning_rate": 0.00017155916739228333, |
| "loss": 0.9964, |
| "step": 1765 |
| }, |
| { |
| "epoch": 9.197916666666666, |
| "grad_norm": 1.2668606042861938, |
| "learning_rate": 0.00017143646965884148, |
| "loss": 1.028, |
| "step": 1766 |
| }, |
| { |
| "epoch": 9.203125, |
| "grad_norm": 1.464700698852539, |
| "learning_rate": 0.0001713137572814831, |
| "loss": 0.893, |
| "step": 1767 |
| }, |
| { |
| "epoch": 9.208333333333334, |
| "grad_norm": 2.200805187225342, |
| "learning_rate": 0.00017119103034403683, |
| "loss": 0.9534, |
| "step": 1768 |
| }, |
| { |
| "epoch": 9.213541666666666, |
| "grad_norm": 1.0847400426864624, |
| "learning_rate": 0.00017106828893034124, |
| "loss": 0.8129, |
| "step": 1769 |
| }, |
| { |
| "epoch": 9.21875, |
| "grad_norm": 1.123695731163025, |
| "learning_rate": 0.00017094553312424477, |
| "loss": 0.89, |
| "step": 1770 |
| }, |
| { |
| "epoch": 9.223958333333334, |
| "grad_norm": 2.078059434890747, |
| "learning_rate": 0.00017082276300960566, |
| "loss": 0.9177, |
| "step": 1771 |
| }, |
| { |
| "epoch": 9.229166666666666, |
| "grad_norm": 1.2454807758331299, |
| "learning_rate": 0.00017069997867029204, |
| "loss": 0.9148, |
| "step": 1772 |
| }, |
| { |
| "epoch": 9.234375, |
| "grad_norm": 2.7443206310272217, |
| "learning_rate": 0.0001705771801901817, |
| "loss": 1.0144, |
| "step": 1773 |
| }, |
| { |
| "epoch": 9.239583333333334, |
| "grad_norm": 1.376096248626709, |
| "learning_rate": 0.00017045436765316215, |
| "loss": 1.0428, |
| "step": 1774 |
| }, |
| { |
| "epoch": 9.244791666666666, |
| "grad_norm": 1.8344825506210327, |
| "learning_rate": 0.0001703315411431303, |
| "loss": 0.7785, |
| "step": 1775 |
| }, |
| { |
| "epoch": 9.25, |
| "grad_norm": 2.3690102100372314, |
| "learning_rate": 0.00017020870074399284, |
| "loss": 0.7994, |
| "step": 1776 |
| }, |
| { |
| "epoch": 9.255208333333334, |
| "grad_norm": 1.6408567428588867, |
| "learning_rate": 0.00017008584653966578, |
| "loss": 0.8534, |
| "step": 1777 |
| }, |
| { |
| "epoch": 9.260416666666666, |
| "grad_norm": 1.6116575002670288, |
| "learning_rate": 0.00016996297861407471, |
| "loss": 0.9231, |
| "step": 1778 |
| }, |
| { |
| "epoch": 9.265625, |
| "grad_norm": 1.0859854221343994, |
| "learning_rate": 0.00016984009705115445, |
| "loss": 0.8144, |
| "step": 1779 |
| }, |
| { |
| "epoch": 9.270833333333334, |
| "grad_norm": 1.2436550855636597, |
| "learning_rate": 0.00016971720193484927, |
| "loss": 0.8888, |
| "step": 1780 |
| }, |
| { |
| "epoch": 9.276041666666666, |
| "grad_norm": 1.1352794170379639, |
| "learning_rate": 0.0001695942933491126, |
| "loss": 0.8668, |
| "step": 1781 |
| }, |
| { |
| "epoch": 9.28125, |
| "grad_norm": 1.3962384462356567, |
| "learning_rate": 0.00016947137137790708, |
| "loss": 0.8356, |
| "step": 1782 |
| }, |
| { |
| "epoch": 9.286458333333334, |
| "grad_norm": 1.6355719566345215, |
| "learning_rate": 0.0001693484361052045, |
| "loss": 0.8422, |
| "step": 1783 |
| }, |
| { |
| "epoch": 9.291666666666666, |
| "grad_norm": 1.1523274183273315, |
| "learning_rate": 0.00016922548761498589, |
| "loss": 0.8213, |
| "step": 1784 |
| }, |
| { |
| "epoch": 9.296875, |
| "grad_norm": 1.0713026523590088, |
| "learning_rate": 0.00016910252599124095, |
| "loss": 0.8608, |
| "step": 1785 |
| }, |
| { |
| "epoch": 9.302083333333334, |
| "grad_norm": 0.9762910008430481, |
| "learning_rate": 0.0001689795513179688, |
| "loss": 0.8827, |
| "step": 1786 |
| }, |
| { |
| "epoch": 9.307291666666666, |
| "grad_norm": 1.3710393905639648, |
| "learning_rate": 0.0001688565636791771, |
| "loss": 0.8708, |
| "step": 1787 |
| }, |
| { |
| "epoch": 9.3125, |
| "grad_norm": 2.33774733543396, |
| "learning_rate": 0.00016873356315888258, |
| "loss": 0.8892, |
| "step": 1788 |
| }, |
| { |
| "epoch": 9.317708333333334, |
| "grad_norm": 1.1536335945129395, |
| "learning_rate": 0.00016861054984111063, |
| "loss": 0.7697, |
| "step": 1789 |
| }, |
| { |
| "epoch": 9.322916666666666, |
| "grad_norm": 1.9205913543701172, |
| "learning_rate": 0.0001684875238098956, |
| "loss": 0.9338, |
| "step": 1790 |
| }, |
| { |
| "epoch": 9.328125, |
| "grad_norm": 1.4130369424819946, |
| "learning_rate": 0.00016836448514928024, |
| "loss": 0.8153, |
| "step": 1791 |
| }, |
| { |
| "epoch": 9.333333333333334, |
| "grad_norm": 1.0258595943450928, |
| "learning_rate": 0.00016824143394331617, |
| "loss": 0.9344, |
| "step": 1792 |
| }, |
| { |
| "epoch": 9.338541666666666, |
| "grad_norm": 0.9560496807098389, |
| "learning_rate": 0.00016811837027606342, |
| "loss": 0.8894, |
| "step": 1793 |
| }, |
| { |
| "epoch": 9.34375, |
| "grad_norm": 1.4449131488800049, |
| "learning_rate": 0.00016799529423159058, |
| "loss": 0.9262, |
| "step": 1794 |
| }, |
| { |
| "epoch": 9.348958333333334, |
| "grad_norm": 1.5801047086715698, |
| "learning_rate": 0.0001678722058939748, |
| "loss": 0.9511, |
| "step": 1795 |
| }, |
| { |
| "epoch": 9.354166666666666, |
| "grad_norm": 1.2853633165359497, |
| "learning_rate": 0.00016774910534730146, |
| "loss": 0.9002, |
| "step": 1796 |
| }, |
| { |
| "epoch": 9.359375, |
| "grad_norm": 1.615765929222107, |
| "learning_rate": 0.0001676259926756644, |
| "loss": 0.925, |
| "step": 1797 |
| }, |
| { |
| "epoch": 9.364583333333334, |
| "grad_norm": 1.2230147123336792, |
| "learning_rate": 0.00016750286796316565, |
| "loss": 0.926, |
| "step": 1798 |
| }, |
| { |
| "epoch": 9.369791666666666, |
| "grad_norm": 1.0612201690673828, |
| "learning_rate": 0.00016737973129391559, |
| "loss": 0.8685, |
| "step": 1799 |
| }, |
| { |
| "epoch": 9.375, |
| "grad_norm": 1.9655784368515015, |
| "learning_rate": 0.0001672565827520326, |
| "loss": 0.8897, |
| "step": 1800 |
| }, |
| { |
| "epoch": 9.375, |
| "eval_f1_macro": 0.2694283841036222, |
| "eval_loss": 1.0087451934814453, |
| "eval_runtime": 4.9629, |
| "eval_samples_per_second": 617.176, |
| "eval_steps_per_second": 9.672, |
| "step": 1800 |
| }, |
| { |
| "epoch": 9.380208333333334, |
| "grad_norm": 2.1720430850982666, |
| "learning_rate": 0.00016713342242164332, |
| "loss": 0.7929, |
| "step": 1801 |
| }, |
| { |
| "epoch": 9.385416666666666, |
| "grad_norm": 1.6230685710906982, |
| "learning_rate": 0.00016701025038688236, |
| "loss": 0.9881, |
| "step": 1802 |
| }, |
| { |
| "epoch": 9.390625, |
| "grad_norm": 2.0377554893493652, |
| "learning_rate": 0.00016688706673189245, |
| "loss": 0.8387, |
| "step": 1803 |
| }, |
| { |
| "epoch": 9.395833333333334, |
| "grad_norm": 1.8742332458496094, |
| "learning_rate": 0.00016676387154082397, |
| "loss": 0.925, |
| "step": 1804 |
| }, |
| { |
| "epoch": 9.401041666666666, |
| "grad_norm": 1.164205551147461, |
| "learning_rate": 0.00016664066489783556, |
| "loss": 0.9351, |
| "step": 1805 |
| }, |
| { |
| "epoch": 9.40625, |
| "grad_norm": 0.8714650273323059, |
| "learning_rate": 0.00016651744688709342, |
| "loss": 0.8877, |
| "step": 1806 |
| }, |
| { |
| "epoch": 9.411458333333334, |
| "grad_norm": 0.9655963182449341, |
| "learning_rate": 0.00016639421759277157, |
| "loss": 0.8567, |
| "step": 1807 |
| }, |
| { |
| "epoch": 9.416666666666666, |
| "grad_norm": 1.2926838397979736, |
| "learning_rate": 0.00016627097709905175, |
| "loss": 0.9712, |
| "step": 1808 |
| }, |
| { |
| "epoch": 9.421875, |
| "grad_norm": 0.8868259787559509, |
| "learning_rate": 0.00016614772549012347, |
| "loss": 0.8556, |
| "step": 1809 |
| }, |
| { |
| "epoch": 9.427083333333334, |
| "grad_norm": 1.0273313522338867, |
| "learning_rate": 0.00016602446285018355, |
| "loss": 0.8703, |
| "step": 1810 |
| }, |
| { |
| "epoch": 9.432291666666666, |
| "grad_norm": 1.5909594297409058, |
| "learning_rate": 0.00016590118926343667, |
| "loss": 0.9383, |
| "step": 1811 |
| }, |
| { |
| "epoch": 9.4375, |
| "grad_norm": 0.8595499992370605, |
| "learning_rate": 0.00016577790481409475, |
| "loss": 0.8314, |
| "step": 1812 |
| }, |
| { |
| "epoch": 9.442708333333334, |
| "grad_norm": 0.9614971876144409, |
| "learning_rate": 0.00016565460958637725, |
| "loss": 0.9195, |
| "step": 1813 |
| }, |
| { |
| "epoch": 9.447916666666666, |
| "grad_norm": 1.5743353366851807, |
| "learning_rate": 0.00016553130366451095, |
| "loss": 0.7888, |
| "step": 1814 |
| }, |
| { |
| "epoch": 9.453125, |
| "grad_norm": 1.5163698196411133, |
| "learning_rate": 0.00016540798713272995, |
| "loss": 0.9288, |
| "step": 1815 |
| }, |
| { |
| "epoch": 9.458333333333334, |
| "grad_norm": 1.3740694522857666, |
| "learning_rate": 0.00016528466007527558, |
| "loss": 0.8954, |
| "step": 1816 |
| }, |
| { |
| "epoch": 9.463541666666666, |
| "grad_norm": 1.0571930408477783, |
| "learning_rate": 0.00016516132257639638, |
| "loss": 0.7689, |
| "step": 1817 |
| }, |
| { |
| "epoch": 9.46875, |
| "grad_norm": 1.4243452548980713, |
| "learning_rate": 0.000165037974720348, |
| "loss": 0.9052, |
| "step": 1818 |
| }, |
| { |
| "epoch": 9.473958333333334, |
| "grad_norm": 1.2999029159545898, |
| "learning_rate": 0.00016491461659139329, |
| "loss": 0.7824, |
| "step": 1819 |
| }, |
| { |
| "epoch": 9.479166666666666, |
| "grad_norm": 0.8955234885215759, |
| "learning_rate": 0.00016479124827380181, |
| "loss": 0.8954, |
| "step": 1820 |
| }, |
| { |
| "epoch": 9.484375, |
| "grad_norm": 1.1370500326156616, |
| "learning_rate": 0.0001646678698518505, |
| "loss": 0.7885, |
| "step": 1821 |
| }, |
| { |
| "epoch": 9.489583333333334, |
| "grad_norm": 0.9530293941497803, |
| "learning_rate": 0.0001645444814098228, |
| "loss": 0.9547, |
| "step": 1822 |
| }, |
| { |
| "epoch": 9.494791666666666, |
| "grad_norm": 1.20500910282135, |
| "learning_rate": 0.00016442108303200926, |
| "loss": 0.9167, |
| "step": 1823 |
| }, |
| { |
| "epoch": 9.5, |
| "grad_norm": 1.140815258026123, |
| "learning_rate": 0.0001642976748027071, |
| "loss": 0.7805, |
| "step": 1824 |
| }, |
| { |
| "epoch": 9.505208333333334, |
| "grad_norm": 1.3024696111679077, |
| "learning_rate": 0.00016417425680622042, |
| "loss": 0.7826, |
| "step": 1825 |
| }, |
| { |
| "epoch": 9.510416666666666, |
| "grad_norm": 1.1705375909805298, |
| "learning_rate": 0.0001640508291268597, |
| "loss": 0.8734, |
| "step": 1826 |
| }, |
| { |
| "epoch": 9.515625, |
| "grad_norm": 1.470380425453186, |
| "learning_rate": 0.00016392739184894236, |
| "loss": 0.8776, |
| "step": 1827 |
| }, |
| { |
| "epoch": 9.520833333333334, |
| "grad_norm": 1.6553527116775513, |
| "learning_rate": 0.00016380394505679214, |
| "loss": 0.9011, |
| "step": 1828 |
| }, |
| { |
| "epoch": 9.526041666666666, |
| "grad_norm": 0.9269207715988159, |
| "learning_rate": 0.0001636804888347394, |
| "loss": 0.8116, |
| "step": 1829 |
| }, |
| { |
| "epoch": 9.53125, |
| "grad_norm": 2.229746103286743, |
| "learning_rate": 0.0001635570232671209, |
| "loss": 0.8079, |
| "step": 1830 |
| }, |
| { |
| "epoch": 9.536458333333334, |
| "grad_norm": 1.5487534999847412, |
| "learning_rate": 0.00016343354843827978, |
| "loss": 0.9129, |
| "step": 1831 |
| }, |
| { |
| "epoch": 9.541666666666666, |
| "grad_norm": 1.0547854900360107, |
| "learning_rate": 0.00016331006443256557, |
| "loss": 0.7915, |
| "step": 1832 |
| }, |
| { |
| "epoch": 9.546875, |
| "grad_norm": 1.5278902053833008, |
| "learning_rate": 0.00016318657133433402, |
| "loss": 0.9259, |
| "step": 1833 |
| }, |
| { |
| "epoch": 9.552083333333334, |
| "grad_norm": 1.8977489471435547, |
| "learning_rate": 0.00016306306922794702, |
| "loss": 0.8188, |
| "step": 1834 |
| }, |
| { |
| "epoch": 9.557291666666666, |
| "grad_norm": 1.931062936782837, |
| "learning_rate": 0.00016293955819777275, |
| "loss": 0.963, |
| "step": 1835 |
| }, |
| { |
| "epoch": 9.5625, |
| "grad_norm": 1.39543616771698, |
| "learning_rate": 0.00016281603832818536, |
| "loss": 0.9377, |
| "step": 1836 |
| }, |
| { |
| "epoch": 9.567708333333334, |
| "grad_norm": 0.9857389330863953, |
| "learning_rate": 0.0001626925097035652, |
| "loss": 0.8202, |
| "step": 1837 |
| }, |
| { |
| "epoch": 9.572916666666666, |
| "grad_norm": 1.1530201435089111, |
| "learning_rate": 0.0001625689724082984, |
| "loss": 0.8923, |
| "step": 1838 |
| }, |
| { |
| "epoch": 9.578125, |
| "grad_norm": 0.8918301463127136, |
| "learning_rate": 0.00016244542652677713, |
| "loss": 0.9322, |
| "step": 1839 |
| }, |
| { |
| "epoch": 9.583333333333334, |
| "grad_norm": 1.5796302556991577, |
| "learning_rate": 0.0001623218721433995, |
| "loss": 0.9277, |
| "step": 1840 |
| }, |
| { |
| "epoch": 9.588541666666666, |
| "grad_norm": 1.9781290292739868, |
| "learning_rate": 0.00016219830934256923, |
| "loss": 0.8199, |
| "step": 1841 |
| }, |
| { |
| "epoch": 9.59375, |
| "grad_norm": 1.52654230594635, |
| "learning_rate": 0.00016207473820869592, |
| "loss": 0.9355, |
| "step": 1842 |
| }, |
| { |
| "epoch": 9.598958333333334, |
| "grad_norm": 1.7908263206481934, |
| "learning_rate": 0.00016195115882619487, |
| "loss": 0.9788, |
| "step": 1843 |
| }, |
| { |
| "epoch": 9.604166666666666, |
| "grad_norm": 1.3963121175765991, |
| "learning_rate": 0.000161827571279487, |
| "loss": 0.8503, |
| "step": 1844 |
| }, |
| { |
| "epoch": 9.609375, |
| "grad_norm": 1.0625687837600708, |
| "learning_rate": 0.00016170397565299873, |
| "loss": 0.9723, |
| "step": 1845 |
| }, |
| { |
| "epoch": 9.614583333333334, |
| "grad_norm": 0.7715263962745667, |
| "learning_rate": 0.00016158037203116212, |
| "loss": 0.7732, |
| "step": 1846 |
| }, |
| { |
| "epoch": 9.619791666666666, |
| "grad_norm": 1.531618595123291, |
| "learning_rate": 0.00016145676049841458, |
| "loss": 0.7831, |
| "step": 1847 |
| }, |
| { |
| "epoch": 9.625, |
| "grad_norm": 1.5678844451904297, |
| "learning_rate": 0.00016133314113919906, |
| "loss": 0.9661, |
| "step": 1848 |
| }, |
| { |
| "epoch": 9.630208333333334, |
| "grad_norm": 1.0000859498977661, |
| "learning_rate": 0.00016120951403796364, |
| "loss": 0.8216, |
| "step": 1849 |
| }, |
| { |
| "epoch": 9.635416666666666, |
| "grad_norm": 2.274866819381714, |
| "learning_rate": 0.00016108587927916195, |
| "loss": 0.9336, |
| "step": 1850 |
| }, |
| { |
| "epoch": 9.640625, |
| "grad_norm": 1.5799639225006104, |
| "learning_rate": 0.00016096223694725265, |
| "loss": 0.7913, |
| "step": 1851 |
| }, |
| { |
| "epoch": 9.645833333333334, |
| "grad_norm": 1.817676305770874, |
| "learning_rate": 0.0001608385871266997, |
| "loss": 0.8583, |
| "step": 1852 |
| }, |
| { |
| "epoch": 9.651041666666666, |
| "grad_norm": 0.9751538634300232, |
| "learning_rate": 0.00016071492990197205, |
| "loss": 0.8318, |
| "step": 1853 |
| }, |
| { |
| "epoch": 9.65625, |
| "grad_norm": 1.3606888055801392, |
| "learning_rate": 0.0001605912653575439, |
| "loss": 0.8908, |
| "step": 1854 |
| }, |
| { |
| "epoch": 9.661458333333334, |
| "grad_norm": 1.9638605117797852, |
| "learning_rate": 0.00016046759357789421, |
| "loss": 0.8026, |
| "step": 1855 |
| }, |
| { |
| "epoch": 9.666666666666666, |
| "grad_norm": 1.3917369842529297, |
| "learning_rate": 0.00016034391464750706, |
| "loss": 0.7918, |
| "step": 1856 |
| }, |
| { |
| "epoch": 9.671875, |
| "grad_norm": 1.197762131690979, |
| "learning_rate": 0.00016022022865087136, |
| "loss": 0.8672, |
| "step": 1857 |
| }, |
| { |
| "epoch": 9.677083333333334, |
| "grad_norm": 1.712149739265442, |
| "learning_rate": 0.00016009653567248085, |
| "loss": 0.9447, |
| "step": 1858 |
| }, |
| { |
| "epoch": 9.682291666666666, |
| "grad_norm": 1.7471809387207031, |
| "learning_rate": 0.000159972835796834, |
| "loss": 0.9283, |
| "step": 1859 |
| }, |
| { |
| "epoch": 9.6875, |
| "grad_norm": 0.9695741534233093, |
| "learning_rate": 0.00015984912910843412, |
| "loss": 0.8726, |
| "step": 1860 |
| }, |
| { |
| "epoch": 9.692708333333334, |
| "grad_norm": 1.349272608757019, |
| "learning_rate": 0.00015972541569178893, |
| "loss": 0.8112, |
| "step": 1861 |
| }, |
| { |
| "epoch": 9.697916666666666, |
| "grad_norm": 1.4048632383346558, |
| "learning_rate": 0.00015960169563141105, |
| "loss": 0.9015, |
| "step": 1862 |
| }, |
| { |
| "epoch": 9.703125, |
| "grad_norm": 1.3995472192764282, |
| "learning_rate": 0.00015947796901181734, |
| "loss": 0.7994, |
| "step": 1863 |
| }, |
| { |
| "epoch": 9.708333333333334, |
| "grad_norm": 1.1818926334381104, |
| "learning_rate": 0.00015935423591752941, |
| "loss": 0.8386, |
| "step": 1864 |
| }, |
| { |
| "epoch": 9.713541666666666, |
| "grad_norm": 1.57122004032135, |
| "learning_rate": 0.00015923049643307312, |
| "loss": 0.9707, |
| "step": 1865 |
| }, |
| { |
| "epoch": 9.71875, |
| "grad_norm": 0.8379884362220764, |
| "learning_rate": 0.00015910675064297877, |
| "loss": 0.8906, |
| "step": 1866 |
| }, |
| { |
| "epoch": 9.723958333333334, |
| "grad_norm": 1.659083604812622, |
| "learning_rate": 0.00015898299863178087, |
| "loss": 0.9528, |
| "step": 1867 |
| }, |
| { |
| "epoch": 9.729166666666666, |
| "grad_norm": 1.412984013557434, |
| "learning_rate": 0.00015885924048401838, |
| "loss": 0.806, |
| "step": 1868 |
| }, |
| { |
| "epoch": 9.734375, |
| "grad_norm": 1.822378396987915, |
| "learning_rate": 0.00015873547628423418, |
| "loss": 0.9493, |
| "step": 1869 |
| }, |
| { |
| "epoch": 9.739583333333334, |
| "grad_norm": 1.181948184967041, |
| "learning_rate": 0.00015861170611697554, |
| "loss": 0.8746, |
| "step": 1870 |
| }, |
| { |
| "epoch": 9.744791666666666, |
| "grad_norm": 1.8277508020401, |
| "learning_rate": 0.00015848793006679362, |
| "loss": 0.8869, |
| "step": 1871 |
| }, |
| { |
| "epoch": 9.75, |
| "grad_norm": 1.9489363431930542, |
| "learning_rate": 0.00015836414821824378, |
| "loss": 1.0035, |
| "step": 1872 |
| }, |
| { |
| "epoch": 9.755208333333334, |
| "grad_norm": 2.0474987030029297, |
| "learning_rate": 0.00015824036065588507, |
| "loss": 0.774, |
| "step": 1873 |
| }, |
| { |
| "epoch": 9.760416666666666, |
| "grad_norm": 1.153254508972168, |
| "learning_rate": 0.0001581165674642808, |
| "loss": 0.8212, |
| "step": 1874 |
| }, |
| { |
| "epoch": 9.765625, |
| "grad_norm": 1.8783382177352905, |
| "learning_rate": 0.00015799276872799776, |
| "loss": 0.8721, |
| "step": 1875 |
| }, |
| { |
| "epoch": 9.770833333333334, |
| "grad_norm": 0.9127784967422485, |
| "learning_rate": 0.0001578689645316068, |
| "loss": 0.8562, |
| "step": 1876 |
| }, |
| { |
| "epoch": 9.776041666666666, |
| "grad_norm": 1.114161729812622, |
| "learning_rate": 0.00015774515495968242, |
| "loss": 0.8548, |
| "step": 1877 |
| }, |
| { |
| "epoch": 9.78125, |
| "grad_norm": 2.2826757431030273, |
| "learning_rate": 0.0001576213400968027, |
| "loss": 0.8676, |
| "step": 1878 |
| }, |
| { |
| "epoch": 9.786458333333334, |
| "grad_norm": 1.47170889377594, |
| "learning_rate": 0.00015749752002754944, |
| "loss": 0.7281, |
| "step": 1879 |
| }, |
| { |
| "epoch": 9.791666666666666, |
| "grad_norm": 1.9411265850067139, |
| "learning_rate": 0.00015737369483650803, |
| "loss": 0.7685, |
| "step": 1880 |
| }, |
| { |
| "epoch": 9.796875, |
| "grad_norm": 0.9674518704414368, |
| "learning_rate": 0.00015724986460826715, |
| "loss": 0.9649, |
| "step": 1881 |
| }, |
| { |
| "epoch": 9.802083333333334, |
| "grad_norm": 1.499306321144104, |
| "learning_rate": 0.00015712602942741921, |
| "loss": 0.9059, |
| "step": 1882 |
| }, |
| { |
| "epoch": 9.807291666666666, |
| "grad_norm": 1.0943316221237183, |
| "learning_rate": 0.0001570021893785598, |
| "loss": 0.8616, |
| "step": 1883 |
| }, |
| { |
| "epoch": 9.8125, |
| "grad_norm": 1.5167124271392822, |
| "learning_rate": 0.00015687834454628785, |
| "loss": 0.9523, |
| "step": 1884 |
| }, |
| { |
| "epoch": 9.817708333333334, |
| "grad_norm": 1.4581629037857056, |
| "learning_rate": 0.0001567544950152057, |
| "loss": 0.9571, |
| "step": 1885 |
| }, |
| { |
| "epoch": 9.822916666666666, |
| "grad_norm": 1.1597574949264526, |
| "learning_rate": 0.00015663064086991868, |
| "loss": 0.8957, |
| "step": 1886 |
| }, |
| { |
| "epoch": 9.828125, |
| "grad_norm": 1.3003188371658325, |
| "learning_rate": 0.00015650678219503554, |
| "loss": 0.824, |
| "step": 1887 |
| }, |
| { |
| "epoch": 9.833333333333334, |
| "grad_norm": 0.8928467035293579, |
| "learning_rate": 0.0001563829190751678, |
| "loss": 0.8965, |
| "step": 1888 |
| }, |
| { |
| "epoch": 9.838541666666666, |
| "grad_norm": 1.4566998481750488, |
| "learning_rate": 0.00015625905159493038, |
| "loss": 0.9082, |
| "step": 1889 |
| }, |
| { |
| "epoch": 9.84375, |
| "grad_norm": 1.4332826137542725, |
| "learning_rate": 0.00015613517983894084, |
| "loss": 0.8004, |
| "step": 1890 |
| }, |
| { |
| "epoch": 9.848958333333334, |
| "grad_norm": 1.048706293106079, |
| "learning_rate": 0.00015601130389181995, |
| "loss": 0.8075, |
| "step": 1891 |
| }, |
| { |
| "epoch": 9.854166666666666, |
| "grad_norm": 1.4890114068984985, |
| "learning_rate": 0.00015588742383819109, |
| "loss": 0.9009, |
| "step": 1892 |
| }, |
| { |
| "epoch": 9.859375, |
| "grad_norm": 0.8497212529182434, |
| "learning_rate": 0.00015576353976268066, |
| "loss": 0.7994, |
| "step": 1893 |
| }, |
| { |
| "epoch": 9.864583333333334, |
| "grad_norm": 0.9586274027824402, |
| "learning_rate": 0.0001556396517499176, |
| "loss": 0.8193, |
| "step": 1894 |
| }, |
| { |
| "epoch": 9.869791666666666, |
| "grad_norm": 1.0411231517791748, |
| "learning_rate": 0.00015551575988453372, |
| "loss": 0.8459, |
| "step": 1895 |
| }, |
| { |
| "epoch": 9.875, |
| "grad_norm": 1.1388230323791504, |
| "learning_rate": 0.00015539186425116337, |
| "loss": 0.931, |
| "step": 1896 |
| }, |
| { |
| "epoch": 9.880208333333334, |
| "grad_norm": 1.2059820890426636, |
| "learning_rate": 0.00015526796493444358, |
| "loss": 0.8334, |
| "step": 1897 |
| }, |
| { |
| "epoch": 9.885416666666666, |
| "grad_norm": 1.657261848449707, |
| "learning_rate": 0.00015514406201901364, |
| "loss": 0.856, |
| "step": 1898 |
| }, |
| { |
| "epoch": 9.890625, |
| "grad_norm": 2.136936902999878, |
| "learning_rate": 0.00015502015558951563, |
| "loss": 0.7684, |
| "step": 1899 |
| }, |
| { |
| "epoch": 9.895833333333334, |
| "grad_norm": 1.7340823411941528, |
| "learning_rate": 0.0001548962457305938, |
| "loss": 0.8447, |
| "step": 1900 |
| }, |
| { |
| "epoch": 9.895833333333334, |
| "eval_f1_macro": 0.2564746953497497, |
| "eval_loss": 1.0024032592773438, |
| "eval_runtime": 4.976, |
| "eval_samples_per_second": 615.558, |
| "eval_steps_per_second": 9.646, |
| "step": 1900 |
| }, |
| { |
| "epoch": 9.901041666666666, |
| "grad_norm": 0.9543799161911011, |
| "learning_rate": 0.0001547723325268948, |
| "loss": 0.8188, |
| "step": 1901 |
| }, |
| { |
| "epoch": 9.90625, |
| "grad_norm": 1.7960683107376099, |
| "learning_rate": 0.00015464841606306764, |
| "loss": 0.8787, |
| "step": 1902 |
| }, |
| { |
| "epoch": 9.911458333333334, |
| "grad_norm": 1.5671051740646362, |
| "learning_rate": 0.0001545244964237634, |
| "loss": 0.8102, |
| "step": 1903 |
| }, |
| { |
| "epoch": 9.916666666666666, |
| "grad_norm": 1.0282342433929443, |
| "learning_rate": 0.00015440057369363552, |
| "loss": 0.8243, |
| "step": 1904 |
| }, |
| { |
| "epoch": 9.921875, |
| "grad_norm": 1.3564125299453735, |
| "learning_rate": 0.00015427664795733943, |
| "loss": 0.9381, |
| "step": 1905 |
| }, |
| { |
| "epoch": 9.927083333333334, |
| "grad_norm": 1.3567719459533691, |
| "learning_rate": 0.0001541527192995326, |
| "loss": 0.7725, |
| "step": 1906 |
| }, |
| { |
| "epoch": 9.932291666666666, |
| "grad_norm": 1.8143551349639893, |
| "learning_rate": 0.0001540287878048746, |
| "loss": 0.9584, |
| "step": 1907 |
| }, |
| { |
| "epoch": 9.9375, |
| "grad_norm": 1.2195340394973755, |
| "learning_rate": 0.00015390485355802679, |
| "loss": 0.8624, |
| "step": 1908 |
| }, |
| { |
| "epoch": 9.942708333333334, |
| "grad_norm": 1.2106612920761108, |
| "learning_rate": 0.00015378091664365256, |
| "loss": 0.8553, |
| "step": 1909 |
| }, |
| { |
| "epoch": 9.947916666666666, |
| "grad_norm": 1.081300973892212, |
| "learning_rate": 0.000153656977146417, |
| "loss": 0.8384, |
| "step": 1910 |
| }, |
| { |
| "epoch": 9.953125, |
| "grad_norm": 1.6551260948181152, |
| "learning_rate": 0.00015353303515098715, |
| "loss": 0.8964, |
| "step": 1911 |
| }, |
| { |
| "epoch": 9.958333333333334, |
| "grad_norm": 1.8574953079223633, |
| "learning_rate": 0.00015340909074203142, |
| "loss": 0.8653, |
| "step": 1912 |
| }, |
| { |
| "epoch": 9.963541666666666, |
| "grad_norm": 0.9766885638237, |
| "learning_rate": 0.00015328514400422027, |
| "loss": 0.9531, |
| "step": 1913 |
| }, |
| { |
| "epoch": 9.96875, |
| "grad_norm": 1.3088806867599487, |
| "learning_rate": 0.00015316119502222544, |
| "loss": 0.8889, |
| "step": 1914 |
| }, |
| { |
| "epoch": 9.973958333333334, |
| "grad_norm": 1.101117491722107, |
| "learning_rate": 0.00015303724388072035, |
| "loss": 0.7998, |
| "step": 1915 |
| }, |
| { |
| "epoch": 9.979166666666666, |
| "grad_norm": 1.1403522491455078, |
| "learning_rate": 0.00015291329066437984, |
| "loss": 0.8408, |
| "step": 1916 |
| }, |
| { |
| "epoch": 9.984375, |
| "grad_norm": 1.4599354267120361, |
| "learning_rate": 0.00015278933545788032, |
| "loss": 0.849, |
| "step": 1917 |
| }, |
| { |
| "epoch": 9.989583333333334, |
| "grad_norm": 1.4991867542266846, |
| "learning_rate": 0.00015266537834589922, |
| "loss": 0.9304, |
| "step": 1918 |
| }, |
| { |
| "epoch": 9.994791666666666, |
| "grad_norm": 1.0324890613555908, |
| "learning_rate": 0.0001525414194131157, |
| "loss": 0.826, |
| "step": 1919 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 2.3991434574127197, |
| "learning_rate": 0.0001524174587442098, |
| "loss": 0.93, |
| "step": 1920 |
| }, |
| { |
| "epoch": 10.005208333333334, |
| "grad_norm": 1.237000823020935, |
| "learning_rate": 0.00015229349642386291, |
| "loss": 0.8497, |
| "step": 1921 |
| }, |
| { |
| "epoch": 10.010416666666666, |
| "grad_norm": 1.2854629755020142, |
| "learning_rate": 0.0001521695325367576, |
| "loss": 0.8169, |
| "step": 1922 |
| }, |
| { |
| "epoch": 10.015625, |
| "grad_norm": 1.0376895666122437, |
| "learning_rate": 0.00015204556716757737, |
| "loss": 0.8878, |
| "step": 1923 |
| }, |
| { |
| "epoch": 10.020833333333334, |
| "grad_norm": 1.3712040185928345, |
| "learning_rate": 0.00015192160040100686, |
| "loss": 0.7967, |
| "step": 1924 |
| }, |
| { |
| "epoch": 10.026041666666666, |
| "grad_norm": 1.7033828496932983, |
| "learning_rate": 0.00015179763232173155, |
| "loss": 0.8292, |
| "step": 1925 |
| }, |
| { |
| "epoch": 10.03125, |
| "grad_norm": 2.0847558975219727, |
| "learning_rate": 0.0001516736630144379, |
| "loss": 0.8041, |
| "step": 1926 |
| }, |
| { |
| "epoch": 10.036458333333334, |
| "grad_norm": 1.0598844289779663, |
| "learning_rate": 0.00015154969256381325, |
| "loss": 0.9379, |
| "step": 1927 |
| }, |
| { |
| "epoch": 10.041666666666666, |
| "grad_norm": 1.4183096885681152, |
| "learning_rate": 0.00015142572105454556, |
| "loss": 0.9137, |
| "step": 1928 |
| }, |
| { |
| "epoch": 10.046875, |
| "grad_norm": 1.6121402978897095, |
| "learning_rate": 0.00015130174857132364, |
| "loss": 0.7236, |
| "step": 1929 |
| }, |
| { |
| "epoch": 10.052083333333334, |
| "grad_norm": 1.3973758220672607, |
| "learning_rate": 0.00015117777519883699, |
| "loss": 0.8415, |
| "step": 1930 |
| }, |
| { |
| "epoch": 10.057291666666666, |
| "grad_norm": 0.935997724533081, |
| "learning_rate": 0.00015105380102177555, |
| "loss": 0.8321, |
| "step": 1931 |
| }, |
| { |
| "epoch": 10.0625, |
| "grad_norm": 1.572262167930603, |
| "learning_rate": 0.0001509298261248301, |
| "loss": 0.8401, |
| "step": 1932 |
| }, |
| { |
| "epoch": 10.067708333333334, |
| "grad_norm": 0.83994460105896, |
| "learning_rate": 0.0001508058505926915, |
| "loss": 0.7732, |
| "step": 1933 |
| }, |
| { |
| "epoch": 10.072916666666666, |
| "grad_norm": 2.278294324874878, |
| "learning_rate": 0.00015068187451005143, |
| "loss": 0.8827, |
| "step": 1934 |
| }, |
| { |
| "epoch": 10.078125, |
| "grad_norm": 1.2654446363449097, |
| "learning_rate": 0.00015055789796160175, |
| "loss": 0.8877, |
| "step": 1935 |
| }, |
| { |
| "epoch": 10.083333333333334, |
| "grad_norm": 0.9135214686393738, |
| "learning_rate": 0.0001504339210320347, |
| "loss": 0.8957, |
| "step": 1936 |
| }, |
| { |
| "epoch": 10.088541666666666, |
| "grad_norm": 0.9637655019760132, |
| "learning_rate": 0.00015030994380604273, |
| "loss": 0.8845, |
| "step": 1937 |
| }, |
| { |
| "epoch": 10.09375, |
| "grad_norm": 1.5173457860946655, |
| "learning_rate": 0.0001501859663683186, |
| "loss": 0.8139, |
| "step": 1938 |
| }, |
| { |
| "epoch": 10.098958333333334, |
| "grad_norm": 1.2285293340682983, |
| "learning_rate": 0.000150061988803555, |
| "loss": 0.8277, |
| "step": 1939 |
| }, |
| { |
| "epoch": 10.104166666666666, |
| "grad_norm": 1.4348660707473755, |
| "learning_rate": 0.00014993801119644496, |
| "loss": 0.8914, |
| "step": 1940 |
| }, |
| { |
| "epoch": 10.109375, |
| "grad_norm": 2.5631632804870605, |
| "learning_rate": 0.00014981403363168144, |
| "loss": 1.0173, |
| "step": 1941 |
| }, |
| { |
| "epoch": 10.114583333333334, |
| "grad_norm": 1.6706727743148804, |
| "learning_rate": 0.00014969005619395727, |
| "loss": 0.7852, |
| "step": 1942 |
| }, |
| { |
| "epoch": 10.119791666666666, |
| "grad_norm": 1.785130262374878, |
| "learning_rate": 0.00014956607896796526, |
| "loss": 0.9289, |
| "step": 1943 |
| }, |
| { |
| "epoch": 10.125, |
| "grad_norm": 1.032949686050415, |
| "learning_rate": 0.00014944210203839825, |
| "loss": 0.8017, |
| "step": 1944 |
| }, |
| { |
| "epoch": 10.130208333333334, |
| "grad_norm": 1.3665375709533691, |
| "learning_rate": 0.00014931812548994855, |
| "loss": 0.7687, |
| "step": 1945 |
| }, |
| { |
| "epoch": 10.135416666666666, |
| "grad_norm": 1.6267966032028198, |
| "learning_rate": 0.00014919414940730853, |
| "loss": 1.0062, |
| "step": 1946 |
| }, |
| { |
| "epoch": 10.140625, |
| "grad_norm": 1.3555468320846558, |
| "learning_rate": 0.0001490701738751699, |
| "loss": 0.822, |
| "step": 1947 |
| }, |
| { |
| "epoch": 10.145833333333334, |
| "grad_norm": 1.7364243268966675, |
| "learning_rate": 0.00014894619897822445, |
| "loss": 0.9219, |
| "step": 1948 |
| }, |
| { |
| "epoch": 10.151041666666666, |
| "grad_norm": 1.4341026544570923, |
| "learning_rate": 0.000148822224801163, |
| "loss": 0.8949, |
| "step": 1949 |
| }, |
| { |
| "epoch": 10.15625, |
| "grad_norm": 1.0388355255126953, |
| "learning_rate": 0.0001486982514286763, |
| "loss": 0.9767, |
| "step": 1950 |
| }, |
| { |
| "epoch": 10.161458333333334, |
| "grad_norm": 1.0383387804031372, |
| "learning_rate": 0.0001485742789454544, |
| "loss": 0.8727, |
| "step": 1951 |
| }, |
| { |
| "epoch": 10.166666666666666, |
| "grad_norm": 1.3972128629684448, |
| "learning_rate": 0.00014845030743618673, |
| "loss": 0.8994, |
| "step": 1952 |
| }, |
| { |
| "epoch": 10.171875, |
| "grad_norm": 1.834364414215088, |
| "learning_rate": 0.00014832633698556207, |
| "loss": 0.8408, |
| "step": 1953 |
| }, |
| { |
| "epoch": 10.177083333333334, |
| "grad_norm": 1.3753862380981445, |
| "learning_rate": 0.00014820236767826845, |
| "loss": 0.8192, |
| "step": 1954 |
| }, |
| { |
| "epoch": 10.182291666666666, |
| "grad_norm": 0.9641554951667786, |
| "learning_rate": 0.00014807839959899316, |
| "loss": 0.9154, |
| "step": 1955 |
| }, |
| { |
| "epoch": 10.1875, |
| "grad_norm": 1.084476113319397, |
| "learning_rate": 0.00014795443283242263, |
| "loss": 0.9695, |
| "step": 1956 |
| }, |
| { |
| "epoch": 10.192708333333334, |
| "grad_norm": 1.6882784366607666, |
| "learning_rate": 0.0001478304674632424, |
| "loss": 0.8921, |
| "step": 1957 |
| }, |
| { |
| "epoch": 10.197916666666666, |
| "grad_norm": 1.6915993690490723, |
| "learning_rate": 0.0001477065035761371, |
| "loss": 0.8862, |
| "step": 1958 |
| }, |
| { |
| "epoch": 10.203125, |
| "grad_norm": 2.598761796951294, |
| "learning_rate": 0.0001475825412557902, |
| "loss": 0.8385, |
| "step": 1959 |
| }, |
| { |
| "epoch": 10.208333333333334, |
| "grad_norm": 1.5530712604522705, |
| "learning_rate": 0.0001474585805868843, |
| "loss": 0.7495, |
| "step": 1960 |
| }, |
| { |
| "epoch": 10.213541666666666, |
| "grad_norm": 1.3915677070617676, |
| "learning_rate": 0.00014733462165410075, |
| "loss": 0.9352, |
| "step": 1961 |
| }, |
| { |
| "epoch": 10.21875, |
| "grad_norm": 1.225396752357483, |
| "learning_rate": 0.00014721066454211973, |
| "loss": 0.84, |
| "step": 1962 |
| }, |
| { |
| "epoch": 10.223958333333334, |
| "grad_norm": 1.4181886911392212, |
| "learning_rate": 0.00014708670933562013, |
| "loss": 0.8562, |
| "step": 1963 |
| }, |
| { |
| "epoch": 10.229166666666666, |
| "grad_norm": 1.1548523902893066, |
| "learning_rate": 0.0001469627561192796, |
| "loss": 0.9367, |
| "step": 1964 |
| }, |
| { |
| "epoch": 10.234375, |
| "grad_norm": 1.4004180431365967, |
| "learning_rate": 0.00014683880497777453, |
| "loss": 0.8868, |
| "step": 1965 |
| }, |
| { |
| "epoch": 10.239583333333334, |
| "grad_norm": 1.1728037595748901, |
| "learning_rate": 0.00014671485599577973, |
| "loss": 0.9255, |
| "step": 1966 |
| }, |
| { |
| "epoch": 10.244791666666666, |
| "grad_norm": 1.174159288406372, |
| "learning_rate": 0.00014659090925796855, |
| "loss": 0.825, |
| "step": 1967 |
| }, |
| { |
| "epoch": 10.25, |
| "grad_norm": 1.097143292427063, |
| "learning_rate": 0.00014646696484901288, |
| "loss": 0.7424, |
| "step": 1968 |
| }, |
| { |
| "epoch": 10.255208333333334, |
| "grad_norm": 1.0802202224731445, |
| "learning_rate": 0.00014634302285358294, |
| "loss": 0.8132, |
| "step": 1969 |
| }, |
| { |
| "epoch": 10.260416666666666, |
| "grad_norm": 1.2086232900619507, |
| "learning_rate": 0.00014621908335634744, |
| "loss": 0.837, |
| "step": 1970 |
| }, |
| { |
| "epoch": 10.265625, |
| "grad_norm": 1.0211628675460815, |
| "learning_rate": 0.0001460951464419732, |
| "loss": 0.7846, |
| "step": 1971 |
| }, |
| { |
| "epoch": 10.270833333333334, |
| "grad_norm": 0.8610634207725525, |
| "learning_rate": 0.00014597121219512543, |
| "loss": 0.9135, |
| "step": 1972 |
| }, |
| { |
| "epoch": 10.276041666666666, |
| "grad_norm": 0.9612573981285095, |
| "learning_rate": 0.00014584728070046737, |
| "loss": 0.8763, |
| "step": 1973 |
| }, |
| { |
| "epoch": 10.28125, |
| "grad_norm": 1.3558603525161743, |
| "learning_rate": 0.0001457233520426606, |
| "loss": 0.8741, |
| "step": 1974 |
| }, |
| { |
| "epoch": 10.286458333333334, |
| "grad_norm": 1.286874771118164, |
| "learning_rate": 0.00014559942630636445, |
| "loss": 0.8111, |
| "step": 1975 |
| }, |
| { |
| "epoch": 10.291666666666666, |
| "grad_norm": 1.3767387866973877, |
| "learning_rate": 0.00014547550357623654, |
| "loss": 0.9272, |
| "step": 1976 |
| }, |
| { |
| "epoch": 10.296875, |
| "grad_norm": 1.2512094974517822, |
| "learning_rate": 0.0001453515839369324, |
| "loss": 0.8466, |
| "step": 1977 |
| }, |
| { |
| "epoch": 10.302083333333334, |
| "grad_norm": 1.204245924949646, |
| "learning_rate": 0.00014522766747310516, |
| "loss": 0.7177, |
| "step": 1978 |
| }, |
| { |
| "epoch": 10.307291666666666, |
| "grad_norm": 1.0785009860992432, |
| "learning_rate": 0.00014510375426940623, |
| "loss": 0.9094, |
| "step": 1979 |
| }, |
| { |
| "epoch": 10.3125, |
| "grad_norm": 1.0506519079208374, |
| "learning_rate": 0.00014497984441048435, |
| "loss": 0.8821, |
| "step": 1980 |
| }, |
| { |
| "epoch": 10.317708333333334, |
| "grad_norm": 2.4271035194396973, |
| "learning_rate": 0.00014485593798098636, |
| "loss": 0.8996, |
| "step": 1981 |
| }, |
| { |
| "epoch": 10.322916666666666, |
| "grad_norm": 0.9708760380744934, |
| "learning_rate": 0.00014473203506555645, |
| "loss": 0.8657, |
| "step": 1982 |
| }, |
| { |
| "epoch": 10.328125, |
| "grad_norm": 1.3734625577926636, |
| "learning_rate": 0.00014460813574883658, |
| "loss": 0.7621, |
| "step": 1983 |
| }, |
| { |
| "epoch": 10.333333333333334, |
| "grad_norm": 2.412170648574829, |
| "learning_rate": 0.00014448424011546628, |
| "loss": 0.905, |
| "step": 1984 |
| }, |
| { |
| "epoch": 10.338541666666666, |
| "grad_norm": 1.245095133781433, |
| "learning_rate": 0.0001443603482500824, |
| "loss": 0.8928, |
| "step": 1985 |
| }, |
| { |
| "epoch": 10.34375, |
| "grad_norm": 2.2401235103607178, |
| "learning_rate": 0.00014423646023731937, |
| "loss": 0.8682, |
| "step": 1986 |
| }, |
| { |
| "epoch": 10.348958333333334, |
| "grad_norm": 0.9285467267036438, |
| "learning_rate": 0.0001441125761618089, |
| "loss": 0.7574, |
| "step": 1987 |
| }, |
| { |
| "epoch": 10.354166666666666, |
| "grad_norm": 1.56451416015625, |
| "learning_rate": 0.00014398869610818003, |
| "loss": 0.8793, |
| "step": 1988 |
| }, |
| { |
| "epoch": 10.359375, |
| "grad_norm": 1.2093544006347656, |
| "learning_rate": 0.00014386482016105913, |
| "loss": 0.9222, |
| "step": 1989 |
| }, |
| { |
| "epoch": 10.364583333333334, |
| "grad_norm": 1.3364008665084839, |
| "learning_rate": 0.00014374094840506962, |
| "loss": 0.8528, |
| "step": 1990 |
| }, |
| { |
| "epoch": 10.369791666666666, |
| "grad_norm": 0.9096165895462036, |
| "learning_rate": 0.0001436170809248322, |
| "loss": 0.8732, |
| "step": 1991 |
| }, |
| { |
| "epoch": 10.375, |
| "grad_norm": 1.2033054828643799, |
| "learning_rate": 0.00014349321780496446, |
| "loss": 0.8493, |
| "step": 1992 |
| }, |
| { |
| "epoch": 10.380208333333334, |
| "grad_norm": 1.260677695274353, |
| "learning_rate": 0.00014336935913008135, |
| "loss": 0.8621, |
| "step": 1993 |
| }, |
| { |
| "epoch": 10.385416666666666, |
| "grad_norm": 2.4502477645874023, |
| "learning_rate": 0.00014324550498479428, |
| "loss": 0.8717, |
| "step": 1994 |
| }, |
| { |
| "epoch": 10.390625, |
| "grad_norm": 2.078524351119995, |
| "learning_rate": 0.0001431216554537121, |
| "loss": 0.8733, |
| "step": 1995 |
| }, |
| { |
| "epoch": 10.395833333333334, |
| "grad_norm": 2.769761800765991, |
| "learning_rate": 0.0001429978106214402, |
| "loss": 0.8508, |
| "step": 1996 |
| }, |
| { |
| "epoch": 10.401041666666666, |
| "grad_norm": 0.9554612636566162, |
| "learning_rate": 0.00014287397057258076, |
| "loss": 0.8725, |
| "step": 1997 |
| }, |
| { |
| "epoch": 10.40625, |
| "grad_norm": 0.9986212253570557, |
| "learning_rate": 0.00014275013539173282, |
| "loss": 0.9494, |
| "step": 1998 |
| }, |
| { |
| "epoch": 10.411458333333334, |
| "grad_norm": 1.8764184713363647, |
| "learning_rate": 0.000142626305163492, |
| "loss": 0.8284, |
| "step": 1999 |
| }, |
| { |
| "epoch": 10.416666666666666, |
| "grad_norm": 2.3540470600128174, |
| "learning_rate": 0.00014250247997245054, |
| "loss": 0.9608, |
| "step": 2000 |
| }, |
| { |
| "epoch": 10.416666666666666, |
| "eval_f1_macro": 0.2714239654140476, |
| "eval_loss": 1.0049389600753784, |
| "eval_runtime": 4.9951, |
| "eval_samples_per_second": 613.202, |
| "eval_steps_per_second": 9.609, |
| "step": 2000 |
| }, |
| { |
| "epoch": 10.421875, |
| "grad_norm": 1.033603549003601, |
| "learning_rate": 0.0001423786599031973, |
| "loss": 0.893, |
| "step": 2001 |
| }, |
| { |
| "epoch": 10.427083333333334, |
| "grad_norm": 1.1865248680114746, |
| "learning_rate": 0.00014225484504031758, |
| "loss": 0.9303, |
| "step": 2002 |
| }, |
| { |
| "epoch": 10.432291666666666, |
| "grad_norm": 1.2963870763778687, |
| "learning_rate": 0.00014213103546839318, |
| "loss": 0.8362, |
| "step": 2003 |
| }, |
| { |
| "epoch": 10.4375, |
| "grad_norm": 1.7028440237045288, |
| "learning_rate": 0.00014200723127200224, |
| "loss": 0.9008, |
| "step": 2004 |
| }, |
| { |
| "epoch": 10.442708333333334, |
| "grad_norm": 0.9437593221664429, |
| "learning_rate": 0.00014188343253571925, |
| "loss": 0.8596, |
| "step": 2005 |
| }, |
| { |
| "epoch": 10.447916666666666, |
| "grad_norm": 2.2818105220794678, |
| "learning_rate": 0.0001417596393441149, |
| "loss": 0.8832, |
| "step": 2006 |
| }, |
| { |
| "epoch": 10.453125, |
| "grad_norm": 0.9342473149299622, |
| "learning_rate": 0.00014163585178175627, |
| "loss": 0.8065, |
| "step": 2007 |
| }, |
| { |
| "epoch": 10.458333333333334, |
| "grad_norm": 0.998382031917572, |
| "learning_rate": 0.00014151206993320638, |
| "loss": 0.8174, |
| "step": 2008 |
| }, |
| { |
| "epoch": 10.463541666666666, |
| "grad_norm": 1.0820338726043701, |
| "learning_rate": 0.0001413882938830244, |
| "loss": 0.8128, |
| "step": 2009 |
| }, |
| { |
| "epoch": 10.46875, |
| "grad_norm": 0.9338914752006531, |
| "learning_rate": 0.00014126452371576584, |
| "loss": 0.8313, |
| "step": 2010 |
| }, |
| { |
| "epoch": 10.473958333333334, |
| "grad_norm": 1.1161863803863525, |
| "learning_rate": 0.00014114075951598162, |
| "loss": 0.8448, |
| "step": 2011 |
| }, |
| { |
| "epoch": 10.479166666666666, |
| "grad_norm": 1.6217401027679443, |
| "learning_rate": 0.0001410170013682191, |
| "loss": 0.8366, |
| "step": 2012 |
| }, |
| { |
| "epoch": 10.484375, |
| "grad_norm": 0.9196451902389526, |
| "learning_rate": 0.00014089324935702123, |
| "loss": 0.7964, |
| "step": 2013 |
| }, |
| { |
| "epoch": 10.489583333333334, |
| "grad_norm": 1.63260817527771, |
| "learning_rate": 0.00014076950356692685, |
| "loss": 0.8629, |
| "step": 2014 |
| }, |
| { |
| "epoch": 10.494791666666666, |
| "grad_norm": 1.3700957298278809, |
| "learning_rate": 0.00014064576408247059, |
| "loss": 0.8193, |
| "step": 2015 |
| }, |
| { |
| "epoch": 10.5, |
| "grad_norm": 0.9263189435005188, |
| "learning_rate": 0.00014052203098818264, |
| "loss": 0.9037, |
| "step": 2016 |
| }, |
| { |
| "epoch": 10.505208333333334, |
| "grad_norm": 0.9309031367301941, |
| "learning_rate": 0.00014039830436858897, |
| "loss": 0.8337, |
| "step": 2017 |
| }, |
| { |
| "epoch": 10.510416666666666, |
| "grad_norm": 1.7955965995788574, |
| "learning_rate": 0.00014027458430821105, |
| "loss": 0.9287, |
| "step": 2018 |
| }, |
| { |
| "epoch": 10.515625, |
| "grad_norm": 1.5209351778030396, |
| "learning_rate": 0.0001401508708915659, |
| "loss": 0.8972, |
| "step": 2019 |
| }, |
| { |
| "epoch": 10.520833333333334, |
| "grad_norm": 1.6187100410461426, |
| "learning_rate": 0.00014002716420316596, |
| "loss": 0.8524, |
| "step": 2020 |
| }, |
| { |
| "epoch": 10.526041666666666, |
| "grad_norm": 1.226399540901184, |
| "learning_rate": 0.0001399034643275191, |
| "loss": 0.9305, |
| "step": 2021 |
| }, |
| { |
| "epoch": 10.53125, |
| "grad_norm": 1.370202898979187, |
| "learning_rate": 0.00013977977134912862, |
| "loss": 0.8267, |
| "step": 2022 |
| }, |
| { |
| "epoch": 10.536458333333334, |
| "grad_norm": 1.2037887573242188, |
| "learning_rate": 0.0001396560853524929, |
| "loss": 0.908, |
| "step": 2023 |
| }, |
| { |
| "epoch": 10.541666666666666, |
| "grad_norm": 1.9648921489715576, |
| "learning_rate": 0.00013953240642210581, |
| "loss": 0.958, |
| "step": 2024 |
| }, |
| { |
| "epoch": 10.546875, |
| "grad_norm": 1.1781076192855835, |
| "learning_rate": 0.00013940873464245607, |
| "loss": 0.7814, |
| "step": 2025 |
| }, |
| { |
| "epoch": 10.552083333333334, |
| "grad_norm": 1.7728925943374634, |
| "learning_rate": 0.00013928507009802795, |
| "loss": 0.9434, |
| "step": 2026 |
| }, |
| { |
| "epoch": 10.557291666666666, |
| "grad_norm": 1.652596354484558, |
| "learning_rate": 0.0001391614128733003, |
| "loss": 0.7889, |
| "step": 2027 |
| }, |
| { |
| "epoch": 10.5625, |
| "grad_norm": 1.4260307550430298, |
| "learning_rate": 0.00013903776305274732, |
| "loss": 0.8581, |
| "step": 2028 |
| }, |
| { |
| "epoch": 10.567708333333334, |
| "grad_norm": 1.9916085004806519, |
| "learning_rate": 0.00013891412072083808, |
| "loss": 0.8063, |
| "step": 2029 |
| }, |
| { |
| "epoch": 10.572916666666666, |
| "grad_norm": 0.9925673007965088, |
| "learning_rate": 0.00013879048596203636, |
| "loss": 0.9284, |
| "step": 2030 |
| }, |
| { |
| "epoch": 10.578125, |
| "grad_norm": 1.2506064176559448, |
| "learning_rate": 0.000138666858860801, |
| "loss": 0.9697, |
| "step": 2031 |
| }, |
| { |
| "epoch": 10.583333333333334, |
| "grad_norm": 0.952881932258606, |
| "learning_rate": 0.00013854323950158543, |
| "loss": 0.7404, |
| "step": 2032 |
| }, |
| { |
| "epoch": 10.588541666666666, |
| "grad_norm": 1.0048246383666992, |
| "learning_rate": 0.0001384196279688379, |
| "loss": 0.8549, |
| "step": 2033 |
| }, |
| { |
| "epoch": 10.59375, |
| "grad_norm": 1.0715419054031372, |
| "learning_rate": 0.00013829602434700127, |
| "loss": 0.8503, |
| "step": 2034 |
| }, |
| { |
| "epoch": 10.598958333333334, |
| "grad_norm": 1.5163006782531738, |
| "learning_rate": 0.000138172428720513, |
| "loss": 0.8364, |
| "step": 2035 |
| }, |
| { |
| "epoch": 10.604166666666666, |
| "grad_norm": 1.0766113996505737, |
| "learning_rate": 0.0001380488411738051, |
| "loss": 0.9005, |
| "step": 2036 |
| }, |
| { |
| "epoch": 10.609375, |
| "grad_norm": 1.186450719833374, |
| "learning_rate": 0.00013792526179130408, |
| "loss": 0.755, |
| "step": 2037 |
| }, |
| { |
| "epoch": 10.614583333333334, |
| "grad_norm": 1.4846112728118896, |
| "learning_rate": 0.00013780169065743077, |
| "loss": 0.9291, |
| "step": 2038 |
| }, |
| { |
| "epoch": 10.619791666666666, |
| "grad_norm": 1.3092869520187378, |
| "learning_rate": 0.00013767812785660046, |
| "loss": 0.9049, |
| "step": 2039 |
| }, |
| { |
| "epoch": 10.625, |
| "grad_norm": 1.9071677923202515, |
| "learning_rate": 0.00013755457347322278, |
| "loss": 0.8367, |
| "step": 2040 |
| }, |
| { |
| "epoch": 10.630208333333334, |
| "grad_norm": 1.3589783906936646, |
| "learning_rate": 0.00013743102759170158, |
| "loss": 0.7833, |
| "step": 2041 |
| }, |
| { |
| "epoch": 10.635416666666666, |
| "grad_norm": 2.744535207748413, |
| "learning_rate": 0.00013730749029643478, |
| "loss": 0.8767, |
| "step": 2042 |
| }, |
| { |
| "epoch": 10.640625, |
| "grad_norm": 0.9242825508117676, |
| "learning_rate": 0.00013718396167181461, |
| "loss": 0.7582, |
| "step": 2043 |
| }, |
| { |
| "epoch": 10.645833333333334, |
| "grad_norm": 1.5504391193389893, |
| "learning_rate": 0.00013706044180222723, |
| "loss": 0.7691, |
| "step": 2044 |
| }, |
| { |
| "epoch": 10.651041666666666, |
| "grad_norm": 1.4087800979614258, |
| "learning_rate": 0.00013693693077205298, |
| "loss": 0.8833, |
| "step": 2045 |
| }, |
| { |
| "epoch": 10.65625, |
| "grad_norm": 1.8851345777511597, |
| "learning_rate": 0.00013681342866566599, |
| "loss": 0.9323, |
| "step": 2046 |
| }, |
| { |
| "epoch": 10.661458333333334, |
| "grad_norm": 1.9556982517242432, |
| "learning_rate": 0.0001366899355674344, |
| "loss": 0.942, |
| "step": 2047 |
| }, |
| { |
| "epoch": 10.666666666666666, |
| "grad_norm": 2.13218355178833, |
| "learning_rate": 0.0001365664515617202, |
| "loss": 0.8401, |
| "step": 2048 |
| }, |
| { |
| "epoch": 10.671875, |
| "grad_norm": 1.1995073556900024, |
| "learning_rate": 0.00013644297673287908, |
| "loss": 0.8426, |
| "step": 2049 |
| }, |
| { |
| "epoch": 10.677083333333334, |
| "grad_norm": 1.0810803174972534, |
| "learning_rate": 0.0001363195111652606, |
| "loss": 0.831, |
| "step": 2050 |
| }, |
| { |
| "epoch": 10.682291666666666, |
| "grad_norm": 1.2767646312713623, |
| "learning_rate": 0.00013619605494320786, |
| "loss": 0.8301, |
| "step": 2051 |
| }, |
| { |
| "epoch": 10.6875, |
| "grad_norm": 2.020418643951416, |
| "learning_rate": 0.00013607260815105766, |
| "loss": 0.8854, |
| "step": 2052 |
| }, |
| { |
| "epoch": 10.692708333333334, |
| "grad_norm": 1.2917944192886353, |
| "learning_rate": 0.0001359491708731403, |
| "loss": 0.8606, |
| "step": 2053 |
| }, |
| { |
| "epoch": 10.697916666666666, |
| "grad_norm": 1.2073050737380981, |
| "learning_rate": 0.00013582574319377956, |
| "loss": 0.8096, |
| "step": 2054 |
| }, |
| { |
| "epoch": 10.703125, |
| "grad_norm": 1.2684693336486816, |
| "learning_rate": 0.0001357023251972929, |
| "loss": 0.8642, |
| "step": 2055 |
| }, |
| { |
| "epoch": 10.708333333333334, |
| "grad_norm": 1.8962839841842651, |
| "learning_rate": 0.0001355789169679907, |
| "loss": 0.8318, |
| "step": 2056 |
| }, |
| { |
| "epoch": 10.713541666666666, |
| "grad_norm": 1.5715564489364624, |
| "learning_rate": 0.00013545551859017724, |
| "loss": 0.8054, |
| "step": 2057 |
| }, |
| { |
| "epoch": 10.71875, |
| "grad_norm": 1.1155095100402832, |
| "learning_rate": 0.0001353321301481495, |
| "loss": 0.8898, |
| "step": 2058 |
| }, |
| { |
| "epoch": 10.723958333333334, |
| "grad_norm": 1.864199161529541, |
| "learning_rate": 0.00013520875172619813, |
| "loss": 0.9033, |
| "step": 2059 |
| }, |
| { |
| "epoch": 10.729166666666666, |
| "grad_norm": 1.1912822723388672, |
| "learning_rate": 0.00013508538340860674, |
| "loss": 0.9958, |
| "step": 2060 |
| }, |
| { |
| "epoch": 10.734375, |
| "grad_norm": 1.12935471534729, |
| "learning_rate": 0.00013496202527965196, |
| "loss": 0.7489, |
| "step": 2061 |
| }, |
| { |
| "epoch": 10.739583333333334, |
| "grad_norm": 1.547351360321045, |
| "learning_rate": 0.00013483867742360362, |
| "loss": 0.8785, |
| "step": 2062 |
| }, |
| { |
| "epoch": 10.744791666666666, |
| "grad_norm": 1.0415828227996826, |
| "learning_rate": 0.0001347153399247244, |
| "loss": 0.8812, |
| "step": 2063 |
| }, |
| { |
| "epoch": 10.75, |
| "grad_norm": 0.9923093914985657, |
| "learning_rate": 0.00013459201286727008, |
| "loss": 0.8591, |
| "step": 2064 |
| }, |
| { |
| "epoch": 10.755208333333334, |
| "grad_norm": 1.4013783931732178, |
| "learning_rate": 0.00013446869633548905, |
| "loss": 0.954, |
| "step": 2065 |
| }, |
| { |
| "epoch": 10.760416666666666, |
| "grad_norm": 2.069014072418213, |
| "learning_rate": 0.00013434539041362272, |
| "loss": 0.8686, |
| "step": 2066 |
| }, |
| { |
| "epoch": 10.765625, |
| "grad_norm": 1.4210000038146973, |
| "learning_rate": 0.00013422209518590525, |
| "loss": 0.812, |
| "step": 2067 |
| }, |
| { |
| "epoch": 10.770833333333334, |
| "grad_norm": 1.614791750907898, |
| "learning_rate": 0.00013409881073656328, |
| "loss": 0.9769, |
| "step": 2068 |
| }, |
| { |
| "epoch": 10.776041666666666, |
| "grad_norm": 2.682544231414795, |
| "learning_rate": 0.00013397553714981645, |
| "loss": 0.8877, |
| "step": 2069 |
| }, |
| { |
| "epoch": 10.78125, |
| "grad_norm": 1.5227347612380981, |
| "learning_rate": 0.00013385227450987653, |
| "loss": 1.0006, |
| "step": 2070 |
| }, |
| { |
| "epoch": 10.786458333333334, |
| "grad_norm": 1.786467432975769, |
| "learning_rate": 0.00013372902290094825, |
| "loss": 0.8777, |
| "step": 2071 |
| }, |
| { |
| "epoch": 10.791666666666666, |
| "grad_norm": 1.035192847251892, |
| "learning_rate": 0.0001336057824072284, |
| "loss": 0.9824, |
| "step": 2072 |
| }, |
| { |
| "epoch": 10.796875, |
| "grad_norm": 1.5866483449935913, |
| "learning_rate": 0.00013348255311290656, |
| "loss": 0.9651, |
| "step": 2073 |
| }, |
| { |
| "epoch": 10.802083333333334, |
| "grad_norm": 1.332126259803772, |
| "learning_rate": 0.0001333593351021644, |
| "loss": 0.8414, |
| "step": 2074 |
| }, |
| { |
| "epoch": 10.807291666666666, |
| "grad_norm": 1.0151480436325073, |
| "learning_rate": 0.00013323612845917598, |
| "loss": 0.9014, |
| "step": 2075 |
| }, |
| { |
| "epoch": 10.8125, |
| "grad_norm": 1.9119325876235962, |
| "learning_rate": 0.00013311293326810758, |
| "loss": 0.8308, |
| "step": 2076 |
| }, |
| { |
| "epoch": 10.817708333333334, |
| "grad_norm": 1.6210479736328125, |
| "learning_rate": 0.00013298974961311762, |
| "loss": 0.9498, |
| "step": 2077 |
| }, |
| { |
| "epoch": 10.822916666666666, |
| "grad_norm": 1.8309333324432373, |
| "learning_rate": 0.00013286657757835668, |
| "loss": 0.9534, |
| "step": 2078 |
| }, |
| { |
| "epoch": 10.828125, |
| "grad_norm": 0.9787785410881042, |
| "learning_rate": 0.0001327434172479674, |
| "loss": 0.7928, |
| "step": 2079 |
| }, |
| { |
| "epoch": 10.833333333333334, |
| "grad_norm": 1.4746061563491821, |
| "learning_rate": 0.00013262026870608442, |
| "loss": 0.8549, |
| "step": 2080 |
| }, |
| { |
| "epoch": 10.838541666666666, |
| "grad_norm": 1.3585798740386963, |
| "learning_rate": 0.00013249713203683435, |
| "loss": 0.8287, |
| "step": 2081 |
| }, |
| { |
| "epoch": 10.84375, |
| "grad_norm": 1.2940641641616821, |
| "learning_rate": 0.0001323740073243356, |
| "loss": 0.8803, |
| "step": 2082 |
| }, |
| { |
| "epoch": 10.848958333333334, |
| "grad_norm": 1.4390153884887695, |
| "learning_rate": 0.00013225089465269854, |
| "loss": 0.8862, |
| "step": 2083 |
| }, |
| { |
| "epoch": 10.854166666666666, |
| "grad_norm": 0.9366956353187561, |
| "learning_rate": 0.0001321277941060252, |
| "loss": 0.8909, |
| "step": 2084 |
| }, |
| { |
| "epoch": 10.859375, |
| "grad_norm": 0.8678808808326721, |
| "learning_rate": 0.00013200470576840934, |
| "loss": 0.8809, |
| "step": 2085 |
| }, |
| { |
| "epoch": 10.864583333333334, |
| "grad_norm": 0.8327028751373291, |
| "learning_rate": 0.00013188162972393658, |
| "loss": 0.9052, |
| "step": 2086 |
| }, |
| { |
| "epoch": 10.869791666666666, |
| "grad_norm": 1.6686367988586426, |
| "learning_rate": 0.0001317585660566838, |
| "loss": 0.9416, |
| "step": 2087 |
| }, |
| { |
| "epoch": 10.875, |
| "grad_norm": 1.0158665180206299, |
| "learning_rate": 0.00013163551485071974, |
| "loss": 0.7953, |
| "step": 2088 |
| }, |
| { |
| "epoch": 10.880208333333334, |
| "grad_norm": 1.237837553024292, |
| "learning_rate": 0.00013151247619010438, |
| "loss": 0.8824, |
| "step": 2089 |
| }, |
| { |
| "epoch": 10.885416666666666, |
| "grad_norm": 1.2813950777053833, |
| "learning_rate": 0.00013138945015888934, |
| "loss": 0.7992, |
| "step": 2090 |
| }, |
| { |
| "epoch": 10.890625, |
| "grad_norm": 1.4439313411712646, |
| "learning_rate": 0.00013126643684111742, |
| "loss": 0.82, |
| "step": 2091 |
| }, |
| { |
| "epoch": 10.895833333333334, |
| "grad_norm": 1.4384384155273438, |
| "learning_rate": 0.00013114343632082288, |
| "loss": 0.7704, |
| "step": 2092 |
| }, |
| { |
| "epoch": 10.901041666666666, |
| "grad_norm": 1.2647889852523804, |
| "learning_rate": 0.0001310204486820312, |
| "loss": 0.8926, |
| "step": 2093 |
| }, |
| { |
| "epoch": 10.90625, |
| "grad_norm": 1.488816499710083, |
| "learning_rate": 0.000130897474008759, |
| "loss": 0.8269, |
| "step": 2094 |
| }, |
| { |
| "epoch": 10.911458333333334, |
| "grad_norm": 1.1310994625091553, |
| "learning_rate": 0.00013077451238501414, |
| "loss": 0.8042, |
| "step": 2095 |
| }, |
| { |
| "epoch": 10.916666666666666, |
| "grad_norm": 1.4160122871398926, |
| "learning_rate": 0.00013065156389479546, |
| "loss": 0.8825, |
| "step": 2096 |
| }, |
| { |
| "epoch": 10.921875, |
| "grad_norm": 0.938277542591095, |
| "learning_rate": 0.00013052862862209295, |
| "loss": 0.8808, |
| "step": 2097 |
| }, |
| { |
| "epoch": 10.927083333333334, |
| "grad_norm": 1.2421224117279053, |
| "learning_rate": 0.00013040570665088743, |
| "loss": 0.9252, |
| "step": 2098 |
| }, |
| { |
| "epoch": 10.932291666666666, |
| "grad_norm": 1.5219831466674805, |
| "learning_rate": 0.00013028279806515068, |
| "loss": 0.8435, |
| "step": 2099 |
| }, |
| { |
| "epoch": 10.9375, |
| "grad_norm": 0.8221004009246826, |
| "learning_rate": 0.00013015990294884557, |
| "loss": 0.8414, |
| "step": 2100 |
| }, |
| { |
| "epoch": 10.9375, |
| "eval_f1_macro": 0.2652247469889331, |
| "eval_loss": 1.001282811164856, |
| "eval_runtime": 4.9962, |
| "eval_samples_per_second": 613.071, |
| "eval_steps_per_second": 9.607, |
| "step": 2100 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 3840, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.053734493536256e+16, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|