| { |
| "best_global_step": 545, |
| "best_metric": 0.5921985815602837, |
| "best_model_checkpoint": "./cysecbert-ttp-bert-base_data/checkpoint-545", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 545, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009174311926605505, |
| "grad_norm": 46530.15234375, |
| "learning_rate": 0.0, |
| "loss": 0.7023, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01834862385321101, |
| "grad_norm": 49571.35546875, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.7006, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.027522935779816515, |
| "grad_norm": 45885.55078125, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.7033, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.03669724770642202, |
| "grad_norm": 43333.73046875, |
| "learning_rate": 1.5e-06, |
| "loss": 0.7014, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.045871559633027525, |
| "grad_norm": 44340.41796875, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.6992, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05504587155963303, |
| "grad_norm": 48282.54296875, |
| "learning_rate": 2.5e-06, |
| "loss": 0.6924, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.06422018348623854, |
| "grad_norm": 48561.56640625, |
| "learning_rate": 3e-06, |
| "loss": 0.6948, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.07339449541284404, |
| "grad_norm": 43580.078125, |
| "learning_rate": 3.5000000000000004e-06, |
| "loss": 0.6916, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.08256880733944955, |
| "grad_norm": 46552.28515625, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.6854, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.09174311926605505, |
| "grad_norm": 49042.15234375, |
| "learning_rate": 4.5e-06, |
| "loss": 0.6822, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10091743119266056, |
| "grad_norm": 97274.8515625, |
| "learning_rate": 5e-06, |
| "loss": 0.6788, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.11009174311926606, |
| "grad_norm": 55310.375, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 0.6781, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.11926605504587157, |
| "grad_norm": 70317.6328125, |
| "learning_rate": 6e-06, |
| "loss": 0.6754, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.12844036697247707, |
| "grad_norm": 46825.9765625, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 0.6626, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.13761467889908258, |
| "grad_norm": 50745.15625, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 0.655, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.14678899082568808, |
| "grad_norm": 51342.453125, |
| "learning_rate": 7.5e-06, |
| "loss": 0.6491, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.1559633027522936, |
| "grad_norm": 51815.421875, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.6464, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1651376146788991, |
| "grad_norm": 54610.65234375, |
| "learning_rate": 8.500000000000002e-06, |
| "loss": 0.6298, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1743119266055046, |
| "grad_norm": 61936.31640625, |
| "learning_rate": 9e-06, |
| "loss": 0.623, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.1834862385321101, |
| "grad_norm": 56021.69921875, |
| "learning_rate": 9.5e-06, |
| "loss": 0.6156, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1926605504587156, |
| "grad_norm": 60806.10546875, |
| "learning_rate": 1e-05, |
| "loss": 0.5981, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.2018348623853211, |
| "grad_norm": 58582.4140625, |
| "learning_rate": 1.05e-05, |
| "loss": 0.5869, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.21100917431192662, |
| "grad_norm": 59099.16015625, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.5732, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.22018348623853212, |
| "grad_norm": 52971.81640625, |
| "learning_rate": 1.1500000000000002e-05, |
| "loss": 0.5587, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.22935779816513763, |
| "grad_norm": 51724.43359375, |
| "learning_rate": 1.2e-05, |
| "loss": 0.5477, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.23853211009174313, |
| "grad_norm": 58133.453125, |
| "learning_rate": 1.25e-05, |
| "loss": 0.5422, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.24770642201834864, |
| "grad_norm": 54134.3359375, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.5319, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.25688073394495414, |
| "grad_norm": 78195.7890625, |
| "learning_rate": 1.3500000000000001e-05, |
| "loss": 0.5259, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.26605504587155965, |
| "grad_norm": 47713.20703125, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.5105, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.27522935779816515, |
| "grad_norm": 45838.0703125, |
| "learning_rate": 1.45e-05, |
| "loss": 0.499, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.28440366972477066, |
| "grad_norm": 46735.58203125, |
| "learning_rate": 1.5e-05, |
| "loss": 0.4914, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.29357798165137616, |
| "grad_norm": 45432.8828125, |
| "learning_rate": 1.55e-05, |
| "loss": 0.4783, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.30275229357798167, |
| "grad_norm": 46758.58984375, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.4758, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.3119266055045872, |
| "grad_norm": 45245.82421875, |
| "learning_rate": 1.65e-05, |
| "loss": 0.4608, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.3211009174311927, |
| "grad_norm": 43336.75390625, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 0.4567, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3302752293577982, |
| "grad_norm": 41445.90625, |
| "learning_rate": 1.75e-05, |
| "loss": 0.4465, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.3394495412844037, |
| "grad_norm": 42278.80078125, |
| "learning_rate": 1.8e-05, |
| "loss": 0.4395, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.3486238532110092, |
| "grad_norm": 42039.28515625, |
| "learning_rate": 1.85e-05, |
| "loss": 0.4289, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3577981651376147, |
| "grad_norm": 41411.61328125, |
| "learning_rate": 1.9e-05, |
| "loss": 0.423, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 40133.84375, |
| "learning_rate": 1.9500000000000003e-05, |
| "loss": 0.4191, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3761467889908257, |
| "grad_norm": 39758.8828125, |
| "learning_rate": 2e-05, |
| "loss": 0.4071, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.3853211009174312, |
| "grad_norm": 38760.36328125, |
| "learning_rate": 2.05e-05, |
| "loss": 0.3996, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.3944954128440367, |
| "grad_norm": 38552.80078125, |
| "learning_rate": 2.1e-05, |
| "loss": 0.3918, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.4036697247706422, |
| "grad_norm": 38561.7578125, |
| "learning_rate": 2.15e-05, |
| "loss": 0.3865, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.41284403669724773, |
| "grad_norm": 37616.85546875, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.3794, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.42201834862385323, |
| "grad_norm": 37784.78515625, |
| "learning_rate": 2.25e-05, |
| "loss": 0.3745, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.43119266055045874, |
| "grad_norm": 36332.9140625, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 0.3711, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.44036697247706424, |
| "grad_norm": 38899.73046875, |
| "learning_rate": 2.35e-05, |
| "loss": 0.3643, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.44954128440366975, |
| "grad_norm": 36174.9765625, |
| "learning_rate": 2.4e-05, |
| "loss": 0.3575, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "grad_norm": 35699.83203125, |
| "learning_rate": 2.45e-05, |
| "loss": 0.347, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.46788990825688076, |
| "grad_norm": 35562.3671875, |
| "learning_rate": 2.5e-05, |
| "loss": 0.3438, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.47706422018348627, |
| "grad_norm": 35428.0625, |
| "learning_rate": 2.5500000000000003e-05, |
| "loss": 0.3325, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.48623853211009177, |
| "grad_norm": 34396.80078125, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.3302, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.4954128440366973, |
| "grad_norm": 38376.41796875, |
| "learning_rate": 2.6500000000000004e-05, |
| "loss": 0.3218, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.5045871559633027, |
| "grad_norm": 33996.23046875, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.3131, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.5137614678899083, |
| "grad_norm": 33070.55859375, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.3129, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.5229357798165137, |
| "grad_norm": 33200.44140625, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.3003, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.5321100917431193, |
| "grad_norm": 33099.1015625, |
| "learning_rate": 2.8499999999999998e-05, |
| "loss": 0.2904, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.5412844036697247, |
| "grad_norm": 31352.298828125, |
| "learning_rate": 2.9e-05, |
| "loss": 0.2911, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.5504587155963303, |
| "grad_norm": 31965.087890625, |
| "learning_rate": 2.95e-05, |
| "loss": 0.2815, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5596330275229358, |
| "grad_norm": 30810.849609375, |
| "learning_rate": 3e-05, |
| "loss": 0.2818, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.5688073394495413, |
| "grad_norm": 30034.779296875, |
| "learning_rate": 3.05e-05, |
| "loss": 0.2731, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.5779816513761468, |
| "grad_norm": 30009.12109375, |
| "learning_rate": 3.1e-05, |
| "loss": 0.266, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.5871559633027523, |
| "grad_norm": 29305.173828125, |
| "learning_rate": 3.15e-05, |
| "loss": 0.2609, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.5963302752293578, |
| "grad_norm": 29081.853515625, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.2543, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.6055045871559633, |
| "grad_norm": 28217.021484375, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.252, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.6146788990825688, |
| "grad_norm": 26448.12890625, |
| "learning_rate": 3.3e-05, |
| "loss": 0.2565, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.6238532110091743, |
| "grad_norm": 27198.80859375, |
| "learning_rate": 3.35e-05, |
| "loss": 0.2342, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.6330275229357798, |
| "grad_norm": 26946.30859375, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.2273, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.6422018348623854, |
| "grad_norm": 26236.7265625, |
| "learning_rate": 3.45e-05, |
| "loss": 0.2219, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6513761467889908, |
| "grad_norm": 25535.818359375, |
| "learning_rate": 3.5e-05, |
| "loss": 0.2197, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.6605504587155964, |
| "grad_norm": 24871.8515625, |
| "learning_rate": 3.55e-05, |
| "loss": 0.2144, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.6697247706422018, |
| "grad_norm": 24484.55078125, |
| "learning_rate": 3.6e-05, |
| "loss": 0.2066, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.6788990825688074, |
| "grad_norm": 23655.677734375, |
| "learning_rate": 3.65e-05, |
| "loss": 0.206, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6880733944954128, |
| "grad_norm": 23129.076171875, |
| "learning_rate": 3.7e-05, |
| "loss": 0.1979, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6972477064220184, |
| "grad_norm": 21871.90625, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.1973, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.7064220183486238, |
| "grad_norm": 21561.58984375, |
| "learning_rate": 3.8e-05, |
| "loss": 0.1935, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.7155963302752294, |
| "grad_norm": 22210.29296875, |
| "learning_rate": 3.85e-05, |
| "loss": 0.1848, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.7247706422018348, |
| "grad_norm": 20555.775390625, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.1801, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 20406.75, |
| "learning_rate": 3.9500000000000005e-05, |
| "loss": 0.1746, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7431192660550459, |
| "grad_norm": 19150.931640625, |
| "learning_rate": 4e-05, |
| "loss": 0.1688, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.7522935779816514, |
| "grad_norm": 18422.65625, |
| "learning_rate": 4.05e-05, |
| "loss": 0.1661, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.7614678899082569, |
| "grad_norm": 16746.20703125, |
| "learning_rate": 4.1e-05, |
| "loss": 0.1711, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.7706422018348624, |
| "grad_norm": 18229.41015625, |
| "learning_rate": 4.15e-05, |
| "loss": 0.1646, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.7798165137614679, |
| "grad_norm": 17156.267578125, |
| "learning_rate": 4.2e-05, |
| "loss": 0.1588, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7889908256880734, |
| "grad_norm": 15970.611328125, |
| "learning_rate": 4.25e-05, |
| "loss": 0.1547, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.7981651376146789, |
| "grad_norm": 16997.103515625, |
| "learning_rate": 4.3e-05, |
| "loss": 0.1493, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.8073394495412844, |
| "grad_norm": 15529.58984375, |
| "learning_rate": 4.35e-05, |
| "loss": 0.1439, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.8165137614678899, |
| "grad_norm": 15099.9052734375, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.1432, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.8256880733944955, |
| "grad_norm": 14261.3232421875, |
| "learning_rate": 4.4500000000000004e-05, |
| "loss": 0.1396, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8348623853211009, |
| "grad_norm": 13915.87890625, |
| "learning_rate": 4.5e-05, |
| "loss": 0.1317, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.8440366972477065, |
| "grad_norm": 13518.7568359375, |
| "learning_rate": 4.55e-05, |
| "loss": 0.1289, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.8532110091743119, |
| "grad_norm": 12170.1865234375, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.1371, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.8623853211009175, |
| "grad_norm": 11745.9775390625, |
| "learning_rate": 4.6500000000000005e-05, |
| "loss": 0.1434, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.8715596330275229, |
| "grad_norm": 12265.5556640625, |
| "learning_rate": 4.7e-05, |
| "loss": 0.1343, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.8807339449541285, |
| "grad_norm": 11386.3291015625, |
| "learning_rate": 4.75e-05, |
| "loss": 0.1317, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.8899082568807339, |
| "grad_norm": 10869.4599609375, |
| "learning_rate": 4.8e-05, |
| "loss": 0.1295, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.8990825688073395, |
| "grad_norm": 10752.25, |
| "learning_rate": 4.85e-05, |
| "loss": 0.124, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.908256880733945, |
| "grad_norm": 9687.6376953125, |
| "learning_rate": 4.9e-05, |
| "loss": 0.1271, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "grad_norm": 9697.15234375, |
| "learning_rate": 4.9500000000000004e-05, |
| "loss": 0.1194, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.926605504587156, |
| "grad_norm": 9423.21875, |
| "learning_rate": 5e-05, |
| "loss": 0.1176, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.9357798165137615, |
| "grad_norm": 10253.8564453125, |
| "learning_rate": 4.994949494949495e-05, |
| "loss": 0.121, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.944954128440367, |
| "grad_norm": 9756.7646484375, |
| "learning_rate": 4.98989898989899e-05, |
| "loss": 0.1173, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.9541284403669725, |
| "grad_norm": 8482.177734375, |
| "learning_rate": 4.984848484848485e-05, |
| "loss": 0.1233, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.963302752293578, |
| "grad_norm": 7755.4892578125, |
| "learning_rate": 4.97979797979798e-05, |
| "loss": 0.1251, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9724770642201835, |
| "grad_norm": 8553.6689453125, |
| "learning_rate": 4.974747474747475e-05, |
| "loss": 0.1287, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.981651376146789, |
| "grad_norm": 8794.58984375, |
| "learning_rate": 4.9696969696969694e-05, |
| "loss": 0.1178, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.9908256880733946, |
| "grad_norm": 8048.97509765625, |
| "learning_rate": 4.964646464646465e-05, |
| "loss": 0.1112, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 24349.96484375, |
| "learning_rate": 4.9595959595959594e-05, |
| "loss": 0.112, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1_macro": 0.0, |
| "eval_f1_micro": 0.0, |
| "eval_loss": 0.1133684441447258, |
| "eval_precision": 0.0, |
| "eval_recall": 0.0, |
| "eval_runtime": 0.8578, |
| "eval_samples_per_second": 336.892, |
| "eval_steps_per_second": 15.154, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.0091743119266054, |
| "grad_norm": 7603.02587890625, |
| "learning_rate": 4.9545454545454553e-05, |
| "loss": 0.1133, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.018348623853211, |
| "grad_norm": 8602.8974609375, |
| "learning_rate": 4.94949494949495e-05, |
| "loss": 0.1192, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.0275229357798166, |
| "grad_norm": 8388.9609375, |
| "learning_rate": 4.9444444444444446e-05, |
| "loss": 0.1246, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.036697247706422, |
| "grad_norm": 9368.150390625, |
| "learning_rate": 4.93939393939394e-05, |
| "loss": 0.1089, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.0458715596330275, |
| "grad_norm": 7053.083984375, |
| "learning_rate": 4.9343434343434346e-05, |
| "loss": 0.1042, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.0550458715596331, |
| "grad_norm": 9539.36328125, |
| "learning_rate": 4.92929292929293e-05, |
| "loss": 0.1049, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.0642201834862386, |
| "grad_norm": 6956.2763671875, |
| "learning_rate": 4.9242424242424245e-05, |
| "loss": 0.109, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.073394495412844, |
| "grad_norm": 6597.9580078125, |
| "learning_rate": 4.919191919191919e-05, |
| "loss": 0.1124, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.0825688073394495, |
| "grad_norm": 8124.76123046875, |
| "learning_rate": 4.9141414141414145e-05, |
| "loss": 0.1226, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.091743119266055, |
| "grad_norm": 7030.619140625, |
| "learning_rate": 4.909090909090909e-05, |
| "loss": 0.0989, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.1009174311926606, |
| "grad_norm": 9377.66796875, |
| "learning_rate": 4.9040404040404044e-05, |
| "loss": 0.1038, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.110091743119266, |
| "grad_norm": 9298.802734375, |
| "learning_rate": 4.898989898989899e-05, |
| "loss": 0.1216, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.1192660550458715, |
| "grad_norm": 7350.28369140625, |
| "learning_rate": 4.8939393939393944e-05, |
| "loss": 0.0915, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.1284403669724772, |
| "grad_norm": 8066.943359375, |
| "learning_rate": 4.888888888888889e-05, |
| "loss": 0.1291, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.1376146788990826, |
| "grad_norm": 8210.3095703125, |
| "learning_rate": 4.8838383838383836e-05, |
| "loss": 0.1196, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.146788990825688, |
| "grad_norm": 7568.0234375, |
| "learning_rate": 4.878787878787879e-05, |
| "loss": 0.1037, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.1559633027522935, |
| "grad_norm": 6394.8896484375, |
| "learning_rate": 4.8737373737373736e-05, |
| "loss": 0.0961, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.165137614678899, |
| "grad_norm": 7549.9951171875, |
| "learning_rate": 4.868686868686869e-05, |
| "loss": 0.1075, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.1743119266055047, |
| "grad_norm": 7983.36865234375, |
| "learning_rate": 4.863636363636364e-05, |
| "loss": 0.1055, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.18348623853211, |
| "grad_norm": 7613.1455078125, |
| "learning_rate": 4.858585858585859e-05, |
| "loss": 0.1097, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.1926605504587156, |
| "grad_norm": 7754.9228515625, |
| "learning_rate": 4.853535353535354e-05, |
| "loss": 0.1157, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.2018348623853212, |
| "grad_norm": 8360.388671875, |
| "learning_rate": 4.848484848484849e-05, |
| "loss": 0.1019, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.2110091743119267, |
| "grad_norm": 8300.9169921875, |
| "learning_rate": 4.843434343434344e-05, |
| "loss": 0.1098, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.2201834862385321, |
| "grad_norm": 7554.8017578125, |
| "learning_rate": 4.838383838383839e-05, |
| "loss": 0.1075, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.2293577981651376, |
| "grad_norm": 6510.427734375, |
| "learning_rate": 4.8333333333333334e-05, |
| "loss": 0.1041, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.238532110091743, |
| "grad_norm": 5649.77978515625, |
| "learning_rate": 4.828282828282829e-05, |
| "loss": 0.1012, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.2477064220183487, |
| "grad_norm": 7662.76513671875, |
| "learning_rate": 4.823232323232323e-05, |
| "loss": 0.1357, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.2568807339449541, |
| "grad_norm": 7261.087890625, |
| "learning_rate": 4.8181818181818186e-05, |
| "loss": 0.1149, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.2660550458715596, |
| "grad_norm": 6818.208984375, |
| "learning_rate": 4.813131313131313e-05, |
| "loss": 0.1067, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.2752293577981653, |
| "grad_norm": 8839.0810546875, |
| "learning_rate": 4.808080808080808e-05, |
| "loss": 0.089, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.2844036697247707, |
| "grad_norm": 8774.0615234375, |
| "learning_rate": 4.803030303030303e-05, |
| "loss": 0.1013, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.2935779816513762, |
| "grad_norm": 7869.935546875, |
| "learning_rate": 4.797979797979798e-05, |
| "loss": 0.1187, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.3027522935779816, |
| "grad_norm": 9808.5986328125, |
| "learning_rate": 4.792929292929293e-05, |
| "loss": 0.13, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.311926605504587, |
| "grad_norm": 7122.400390625, |
| "learning_rate": 4.787878787878788e-05, |
| "loss": 0.1089, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.3211009174311927, |
| "grad_norm": 6575.39892578125, |
| "learning_rate": 4.782828282828283e-05, |
| "loss": 0.1011, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.3302752293577982, |
| "grad_norm": 8656.4345703125, |
| "learning_rate": 4.7777777777777784e-05, |
| "loss": 0.0998, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.3394495412844036, |
| "grad_norm": 7247.53466796875, |
| "learning_rate": 4.772727272727273e-05, |
| "loss": 0.1255, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.3486238532110093, |
| "grad_norm": 7125.67822265625, |
| "learning_rate": 4.7676767676767684e-05, |
| "loss": 0.1123, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.3577981651376148, |
| "grad_norm": 7758.84375, |
| "learning_rate": 4.762626262626263e-05, |
| "loss": 0.1138, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.3669724770642202, |
| "grad_norm": 6958.53173828125, |
| "learning_rate": 4.7575757575757576e-05, |
| "loss": 0.0968, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.3761467889908257, |
| "grad_norm": 7246.337890625, |
| "learning_rate": 4.752525252525253e-05, |
| "loss": 0.0924, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.385321100917431, |
| "grad_norm": 7635.5615234375, |
| "learning_rate": 4.7474747474747476e-05, |
| "loss": 0.1152, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.3944954128440368, |
| "grad_norm": 7913.57080078125, |
| "learning_rate": 4.742424242424243e-05, |
| "loss": 0.0951, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.4036697247706422, |
| "grad_norm": 8531.388671875, |
| "learning_rate": 4.7373737373737375e-05, |
| "loss": 0.1161, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.4128440366972477, |
| "grad_norm": 9724.3291015625, |
| "learning_rate": 4.732323232323232e-05, |
| "loss": 0.1253, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.4220183486238533, |
| "grad_norm": 6726.74365234375, |
| "learning_rate": 4.7272727272727275e-05, |
| "loss": 0.1037, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.4311926605504588, |
| "grad_norm": 6435.69970703125, |
| "learning_rate": 4.722222222222222e-05, |
| "loss": 0.1036, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.4403669724770642, |
| "grad_norm": 6851.59814453125, |
| "learning_rate": 4.7171717171717174e-05, |
| "loss": 0.0954, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.4495412844036697, |
| "grad_norm": 7785.31640625, |
| "learning_rate": 4.712121212121212e-05, |
| "loss": 0.0996, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.4587155963302751, |
| "grad_norm": 6317.5576171875, |
| "learning_rate": 4.7070707070707074e-05, |
| "loss": 0.0977, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.4678899082568808, |
| "grad_norm": 8045.7197265625, |
| "learning_rate": 4.702020202020202e-05, |
| "loss": 0.1101, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.4770642201834863, |
| "grad_norm": 7386.67529296875, |
| "learning_rate": 4.696969696969697e-05, |
| "loss": 0.1137, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.4862385321100917, |
| "grad_norm": 8017.5791015625, |
| "learning_rate": 4.6919191919191926e-05, |
| "loss": 0.0959, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.4954128440366974, |
| "grad_norm": 7198.42138671875, |
| "learning_rate": 4.686868686868687e-05, |
| "loss": 0.1005, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.5045871559633026, |
| "grad_norm": 9139.7900390625, |
| "learning_rate": 4.681818181818182e-05, |
| "loss": 0.0918, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.5137614678899083, |
| "grad_norm": 6384.1640625, |
| "learning_rate": 4.676767676767677e-05, |
| "loss": 0.0948, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.5229357798165137, |
| "grad_norm": 5053.224609375, |
| "learning_rate": 4.671717171717172e-05, |
| "loss": 0.1056, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.5321100917431192, |
| "grad_norm": 9213.654296875, |
| "learning_rate": 4.666666666666667e-05, |
| "loss": 0.0931, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.5412844036697249, |
| "grad_norm": 7414.76171875, |
| "learning_rate": 4.661616161616162e-05, |
| "loss": 0.1123, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.5504587155963303, |
| "grad_norm": 6406.48583984375, |
| "learning_rate": 4.656565656565657e-05, |
| "loss": 0.0992, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.5596330275229358, |
| "grad_norm": 7213.58837890625, |
| "learning_rate": 4.651515151515152e-05, |
| "loss": 0.0952, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.5688073394495414, |
| "grad_norm": 7537.673828125, |
| "learning_rate": 4.6464646464646464e-05, |
| "loss": 0.0961, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.5779816513761467, |
| "grad_norm": 7876.294921875, |
| "learning_rate": 4.641414141414142e-05, |
| "loss": 0.1308, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.5871559633027523, |
| "grad_norm": 9550.125, |
| "learning_rate": 4.636363636363636e-05, |
| "loss": 0.0941, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.5963302752293578, |
| "grad_norm": 6364.3330078125, |
| "learning_rate": 4.6313131313131316e-05, |
| "loss": 0.0976, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.6055045871559632, |
| "grad_norm": 6976.2138671875, |
| "learning_rate": 4.626262626262626e-05, |
| "loss": 0.091, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.614678899082569, |
| "grad_norm": 7259.40234375, |
| "learning_rate": 4.621212121212121e-05, |
| "loss": 0.1096, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.6238532110091743, |
| "grad_norm": 8176.20849609375, |
| "learning_rate": 4.616161616161616e-05, |
| "loss": 0.1029, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.6330275229357798, |
| "grad_norm": 5628.04345703125, |
| "learning_rate": 4.6111111111111115e-05, |
| "loss": 0.1008, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.6422018348623855, |
| "grad_norm": 6802.91064453125, |
| "learning_rate": 4.606060606060607e-05, |
| "loss": 0.1014, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.6513761467889907, |
| "grad_norm": 14422.4482421875, |
| "learning_rate": 4.6010101010101015e-05, |
| "loss": 0.0912, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.6605504587155964, |
| "grad_norm": 8855.7744140625, |
| "learning_rate": 4.595959595959596e-05, |
| "loss": 0.0983, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.6697247706422018, |
| "grad_norm": 7411.93603515625, |
| "learning_rate": 4.5909090909090914e-05, |
| "loss": 0.1099, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.6788990825688073, |
| "grad_norm": 8138.41748046875, |
| "learning_rate": 4.585858585858586e-05, |
| "loss": 0.1054, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.688073394495413, |
| "grad_norm": 6157.80908203125, |
| "learning_rate": 4.5808080808080814e-05, |
| "loss": 0.1054, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.6972477064220184, |
| "grad_norm": 6791.07421875, |
| "learning_rate": 4.575757575757576e-05, |
| "loss": 0.1022, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.7064220183486238, |
| "grad_norm": 9991.0537109375, |
| "learning_rate": 4.5707070707070706e-05, |
| "loss": 0.1131, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.7155963302752295, |
| "grad_norm": 7240.28955078125, |
| "learning_rate": 4.565656565656566e-05, |
| "loss": 0.1005, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.7247706422018347, |
| "grad_norm": 6442.6259765625, |
| "learning_rate": 4.5606060606060606e-05, |
| "loss": 0.1015, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.7339449541284404, |
| "grad_norm": 8573.3955078125, |
| "learning_rate": 4.555555555555556e-05, |
| "loss": 0.1176, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.7431192660550459, |
| "grad_norm": 7330.88525390625, |
| "learning_rate": 4.5505050505050505e-05, |
| "loss": 0.0923, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.7522935779816513, |
| "grad_norm": 7870.916015625, |
| "learning_rate": 4.545454545454546e-05, |
| "loss": 0.1113, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.761467889908257, |
| "grad_norm": 7789.8564453125, |
| "learning_rate": 4.5404040404040405e-05, |
| "loss": 0.1062, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.7706422018348624, |
| "grad_norm": 7983.59326171875, |
| "learning_rate": 4.535353535353535e-05, |
| "loss": 0.1078, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.7798165137614679, |
| "grad_norm": 6533.43994140625, |
| "learning_rate": 4.5303030303030304e-05, |
| "loss": 0.1033, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.7889908256880735, |
| "grad_norm": 7317.1318359375, |
| "learning_rate": 4.525252525252526e-05, |
| "loss": 0.1106, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.7981651376146788, |
| "grad_norm": 7829.7880859375, |
| "learning_rate": 4.5202020202020204e-05, |
| "loss": 0.1163, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.8073394495412844, |
| "grad_norm": 7524.41357421875, |
| "learning_rate": 4.515151515151516e-05, |
| "loss": 0.1043, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.81651376146789, |
| "grad_norm": 7487.89892578125, |
| "learning_rate": 4.51010101010101e-05, |
| "loss": 0.1091, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.8256880733944953, |
| "grad_norm": 6964.3154296875, |
| "learning_rate": 4.5050505050505056e-05, |
| "loss": 0.0986, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.834862385321101, |
| "grad_norm": 7087.05029296875, |
| "learning_rate": 4.5e-05, |
| "loss": 0.0997, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.8440366972477065, |
| "grad_norm": 7148.7578125, |
| "learning_rate": 4.494949494949495e-05, |
| "loss": 0.0989, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.853211009174312, |
| "grad_norm": 7850.2470703125, |
| "learning_rate": 4.48989898989899e-05, |
| "loss": 0.0957, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.8623853211009176, |
| "grad_norm": 7752.841796875, |
| "learning_rate": 4.484848484848485e-05, |
| "loss": 0.1292, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.8715596330275228, |
| "grad_norm": 6098.35595703125, |
| "learning_rate": 4.47979797979798e-05, |
| "loss": 0.1001, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.8807339449541285, |
| "grad_norm": 7094.8173828125, |
| "learning_rate": 4.474747474747475e-05, |
| "loss": 0.1286, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.889908256880734, |
| "grad_norm": 7456.2265625, |
| "learning_rate": 4.46969696969697e-05, |
| "loss": 0.1256, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.8990825688073394, |
| "grad_norm": 8136.77197265625, |
| "learning_rate": 4.464646464646465e-05, |
| "loss": 0.1014, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.908256880733945, |
| "grad_norm": 7520.1650390625, |
| "learning_rate": 4.4595959595959594e-05, |
| "loss": 0.0919, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.9174311926605505, |
| "grad_norm": 8064.79296875, |
| "learning_rate": 4.454545454545455e-05, |
| "loss": 0.0985, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.926605504587156, |
| "grad_norm": 6153.19677734375, |
| "learning_rate": 4.4494949494949493e-05, |
| "loss": 0.1101, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.9357798165137616, |
| "grad_norm": 8089.42431640625, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.0988, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.9449541284403669, |
| "grad_norm": 8096.140625, |
| "learning_rate": 4.43939393939394e-05, |
| "loss": 0.0992, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.9541284403669725, |
| "grad_norm": 7865.43798828125, |
| "learning_rate": 4.4343434343434346e-05, |
| "loss": 0.0945, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.963302752293578, |
| "grad_norm": 6380.67724609375, |
| "learning_rate": 4.42929292929293e-05, |
| "loss": 0.1073, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.9724770642201834, |
| "grad_norm": 8548.4345703125, |
| "learning_rate": 4.4242424242424246e-05, |
| "loss": 0.1121, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.981651376146789, |
| "grad_norm": 8465.8466796875, |
| "learning_rate": 4.41919191919192e-05, |
| "loss": 0.1078, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.9908256880733946, |
| "grad_norm": 8595.419921875, |
| "learning_rate": 4.4141414141414145e-05, |
| "loss": 0.0924, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 22229.833984375, |
| "learning_rate": 4.409090909090909e-05, |
| "loss": 0.0785, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1_macro": 0.0, |
| "eval_f1_micro": 0.0, |
| "eval_loss": 0.10520372539758682, |
| "eval_precision": 0.0, |
| "eval_recall": 0.0, |
| "eval_runtime": 0.8127, |
| "eval_samples_per_second": 355.611, |
| "eval_steps_per_second": 15.996, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.0091743119266057, |
| "grad_norm": 8562.740234375, |
| "learning_rate": 4.4040404040404044e-05, |
| "loss": 0.1152, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.018348623853211, |
| "grad_norm": 7770.8720703125, |
| "learning_rate": 4.398989898989899e-05, |
| "loss": 0.0957, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.0275229357798166, |
| "grad_norm": 6170.166015625, |
| "learning_rate": 4.3939393939393944e-05, |
| "loss": 0.1027, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.036697247706422, |
| "grad_norm": 10314.91796875, |
| "learning_rate": 4.388888888888889e-05, |
| "loss": 0.1166, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.0458715596330275, |
| "grad_norm": 7144.62109375, |
| "learning_rate": 4.383838383838384e-05, |
| "loss": 0.1065, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.055045871559633, |
| "grad_norm": 7398.2109375, |
| "learning_rate": 4.378787878787879e-05, |
| "loss": 0.1165, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.0642201834862384, |
| "grad_norm": 7307.375, |
| "learning_rate": 4.3737373737373736e-05, |
| "loss": 0.0941, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.073394495412844, |
| "grad_norm": 8046.33837890625, |
| "learning_rate": 4.368686868686869e-05, |
| "loss": 0.1092, |
| "step": 226 |
| }, |
| { |
| "epoch": 2.0825688073394497, |
| "grad_norm": 8320.74609375, |
| "learning_rate": 4.3636363636363636e-05, |
| "loss": 0.0983, |
| "step": 227 |
| }, |
| { |
| "epoch": 2.091743119266055, |
| "grad_norm": 10485.9384765625, |
| "learning_rate": 4.358585858585859e-05, |
| "loss": 0.0981, |
| "step": 228 |
| }, |
| { |
| "epoch": 2.1009174311926606, |
| "grad_norm": 7908.02734375, |
| "learning_rate": 4.3535353535353535e-05, |
| "loss": 0.1021, |
| "step": 229 |
| }, |
| { |
| "epoch": 2.1100917431192663, |
| "grad_norm": 7627.345703125, |
| "learning_rate": 4.348484848484849e-05, |
| "loss": 0.106, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.1192660550458715, |
| "grad_norm": 7474.5263671875, |
| "learning_rate": 4.343434343434344e-05, |
| "loss": 0.1156, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.128440366972477, |
| "grad_norm": 6541.48828125, |
| "learning_rate": 4.338383838383839e-05, |
| "loss": 0.0974, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.1376146788990824, |
| "grad_norm": 10176.2197265625, |
| "learning_rate": 4.3333333333333334e-05, |
| "loss": 0.1172, |
| "step": 233 |
| }, |
| { |
| "epoch": 2.146788990825688, |
| "grad_norm": 10230.0478515625, |
| "learning_rate": 4.328282828282829e-05, |
| "loss": 0.1089, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.1559633027522938, |
| "grad_norm": 7409.98583984375, |
| "learning_rate": 4.3232323232323234e-05, |
| "loss": 0.095, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.165137614678899, |
| "grad_norm": 9656.2216796875, |
| "learning_rate": 4.318181818181819e-05, |
| "loss": 0.0862, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.1743119266055047, |
| "grad_norm": 10004.51171875, |
| "learning_rate": 4.313131313131313e-05, |
| "loss": 0.1019, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.18348623853211, |
| "grad_norm": 10041.599609375, |
| "learning_rate": 4.308080808080808e-05, |
| "loss": 0.0828, |
| "step": 238 |
| }, |
| { |
| "epoch": 2.1926605504587156, |
| "grad_norm": 10838.517578125, |
| "learning_rate": 4.303030303030303e-05, |
| "loss": 0.1093, |
| "step": 239 |
| }, |
| { |
| "epoch": 2.2018348623853212, |
| "grad_norm": 7099.97265625, |
| "learning_rate": 4.297979797979798e-05, |
| "loss": 0.0962, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.2110091743119265, |
| "grad_norm": 8438.68359375, |
| "learning_rate": 4.292929292929293e-05, |
| "loss": 0.1095, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.220183486238532, |
| "grad_norm": 7405.43359375, |
| "learning_rate": 4.287878787878788e-05, |
| "loss": 0.1035, |
| "step": 242 |
| }, |
| { |
| "epoch": 2.229357798165138, |
| "grad_norm": 9133.9833984375, |
| "learning_rate": 4.282828282828283e-05, |
| "loss": 0.0925, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.238532110091743, |
| "grad_norm": 7998.142578125, |
| "learning_rate": 4.277777777777778e-05, |
| "loss": 0.0998, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.2477064220183487, |
| "grad_norm": 9984.765625, |
| "learning_rate": 4.2727272727272724e-05, |
| "loss": 0.1027, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.2568807339449544, |
| "grad_norm": 8999.0556640625, |
| "learning_rate": 4.267676767676768e-05, |
| "loss": 0.1177, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.2660550458715596, |
| "grad_norm": 7668.86962890625, |
| "learning_rate": 4.262626262626263e-05, |
| "loss": 0.0946, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.2752293577981653, |
| "grad_norm": 7349.47998046875, |
| "learning_rate": 4.257575757575758e-05, |
| "loss": 0.1066, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.2844036697247705, |
| "grad_norm": 8360.69921875, |
| "learning_rate": 4.252525252525253e-05, |
| "loss": 0.1062, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.293577981651376, |
| "grad_norm": 8717.70703125, |
| "learning_rate": 4.2474747474747476e-05, |
| "loss": 0.0907, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.302752293577982, |
| "grad_norm": 8706.5791015625, |
| "learning_rate": 4.242424242424243e-05, |
| "loss": 0.0973, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.311926605504587, |
| "grad_norm": 8500.1220703125, |
| "learning_rate": 4.2373737373737376e-05, |
| "loss": 0.1022, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.3211009174311927, |
| "grad_norm": 10551.7021484375, |
| "learning_rate": 4.232323232323233e-05, |
| "loss": 0.0979, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.330275229357798, |
| "grad_norm": 8305.2802734375, |
| "learning_rate": 4.2272727272727275e-05, |
| "loss": 0.0926, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.3394495412844036, |
| "grad_norm": 10128.9423828125, |
| "learning_rate": 4.222222222222222e-05, |
| "loss": 0.1018, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.3486238532110093, |
| "grad_norm": 9000.6689453125, |
| "learning_rate": 4.2171717171717175e-05, |
| "loss": 0.091, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.3577981651376145, |
| "grad_norm": 8849.099609375, |
| "learning_rate": 4.212121212121212e-05, |
| "loss": 0.0993, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.36697247706422, |
| "grad_norm": 6667.78564453125, |
| "learning_rate": 4.2070707070707074e-05, |
| "loss": 0.1044, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.376146788990826, |
| "grad_norm": 7288.4970703125, |
| "learning_rate": 4.202020202020202e-05, |
| "loss": 0.0966, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.385321100917431, |
| "grad_norm": 7155.8310546875, |
| "learning_rate": 4.196969696969697e-05, |
| "loss": 0.1018, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.3944954128440368, |
| "grad_norm": 6328.56396484375, |
| "learning_rate": 4.191919191919192e-05, |
| "loss": 0.0862, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.4036697247706424, |
| "grad_norm": 8594.3603515625, |
| "learning_rate": 4.1868686868686866e-05, |
| "loss": 0.1028, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.4128440366972477, |
| "grad_norm": 8077.544921875, |
| "learning_rate": 4.181818181818182e-05, |
| "loss": 0.1044, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.4220183486238533, |
| "grad_norm": 6332.14453125, |
| "learning_rate": 4.176767676767677e-05, |
| "loss": 0.0902, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.4311926605504586, |
| "grad_norm": 7677.5009765625, |
| "learning_rate": 4.171717171717172e-05, |
| "loss": 0.098, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.4403669724770642, |
| "grad_norm": 7953.89501953125, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 0.0809, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.44954128440367, |
| "grad_norm": 6055.19287109375, |
| "learning_rate": 4.161616161616162e-05, |
| "loss": 0.0814, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.458715596330275, |
| "grad_norm": 6427.98046875, |
| "learning_rate": 4.156565656565657e-05, |
| "loss": 0.0994, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.467889908256881, |
| "grad_norm": 8880.2197265625, |
| "learning_rate": 4.151515151515152e-05, |
| "loss": 0.0804, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.477064220183486, |
| "grad_norm": 7500.67578125, |
| "learning_rate": 4.1464646464646464e-05, |
| "loss": 0.0997, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.4862385321100917, |
| "grad_norm": 7541.759765625, |
| "learning_rate": 4.141414141414142e-05, |
| "loss": 0.0901, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.4954128440366974, |
| "grad_norm": 6423.1025390625, |
| "learning_rate": 4.1363636363636364e-05, |
| "loss": 0.089, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.5045871559633026, |
| "grad_norm": 6384.140625, |
| "learning_rate": 4.131313131313132e-05, |
| "loss": 0.0848, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.5137614678899083, |
| "grad_norm": 6418.369140625, |
| "learning_rate": 4.126262626262626e-05, |
| "loss": 0.0845, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.522935779816514, |
| "grad_norm": 6441.12060546875, |
| "learning_rate": 4.1212121212121216e-05, |
| "loss": 0.0855, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.532110091743119, |
| "grad_norm": 6891.9501953125, |
| "learning_rate": 4.116161616161616e-05, |
| "loss": 0.0798, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.541284403669725, |
| "grad_norm": 8036.39306640625, |
| "learning_rate": 4.111111111111111e-05, |
| "loss": 0.1083, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.5504587155963305, |
| "grad_norm": 7460.015625, |
| "learning_rate": 4.106060606060606e-05, |
| "loss": 0.0934, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.5596330275229358, |
| "grad_norm": 7343.33154296875, |
| "learning_rate": 4.101010101010101e-05, |
| "loss": 0.0951, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.5688073394495414, |
| "grad_norm": 9859.599609375, |
| "learning_rate": 4.095959595959596e-05, |
| "loss": 0.0849, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.5779816513761467, |
| "grad_norm": 8394.8212890625, |
| "learning_rate": 4.0909090909090915e-05, |
| "loss": 0.079, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.5871559633027523, |
| "grad_norm": 8307.279296875, |
| "learning_rate": 4.085858585858586e-05, |
| "loss": 0.0985, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.5963302752293576, |
| "grad_norm": 7505.14794921875, |
| "learning_rate": 4.0808080808080814e-05, |
| "loss": 0.0932, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.6055045871559632, |
| "grad_norm": 6129.16552734375, |
| "learning_rate": 4.075757575757576e-05, |
| "loss": 0.0797, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.614678899082569, |
| "grad_norm": 6896.5908203125, |
| "learning_rate": 4.070707070707071e-05, |
| "loss": 0.0891, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.623853211009174, |
| "grad_norm": 8686.771484375, |
| "learning_rate": 4.065656565656566e-05, |
| "loss": 0.0838, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.63302752293578, |
| "grad_norm": 6954.4072265625, |
| "learning_rate": 4.0606060606060606e-05, |
| "loss": 0.0976, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.6422018348623855, |
| "grad_norm": 8336.8720703125, |
| "learning_rate": 4.055555555555556e-05, |
| "loss": 0.1339, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.6513761467889907, |
| "grad_norm": 6729.21044921875, |
| "learning_rate": 4.0505050505050506e-05, |
| "loss": 0.0897, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.6605504587155964, |
| "grad_norm": 8650.7490234375, |
| "learning_rate": 4.045454545454546e-05, |
| "loss": 0.1034, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.669724770642202, |
| "grad_norm": 8123.34228515625, |
| "learning_rate": 4.0404040404040405e-05, |
| "loss": 0.0752, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.6788990825688073, |
| "grad_norm": 7077.0859375, |
| "learning_rate": 4.035353535353535e-05, |
| "loss": 0.0889, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.688073394495413, |
| "grad_norm": 8160.107421875, |
| "learning_rate": 4.0303030303030305e-05, |
| "loss": 0.1067, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.6972477064220186, |
| "grad_norm": 11080.52734375, |
| "learning_rate": 4.025252525252525e-05, |
| "loss": 0.0984, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.706422018348624, |
| "grad_norm": 8655.8173828125, |
| "learning_rate": 4.0202020202020204e-05, |
| "loss": 0.0977, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.7155963302752295, |
| "grad_norm": 9417.9384765625, |
| "learning_rate": 4.015151515151515e-05, |
| "loss": 0.0863, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.7247706422018347, |
| "grad_norm": 7852.45361328125, |
| "learning_rate": 4.01010101010101e-05, |
| "loss": 0.0865, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.7339449541284404, |
| "grad_norm": 8051.74609375, |
| "learning_rate": 4.005050505050506e-05, |
| "loss": 0.11, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.7431192660550456, |
| "grad_norm": 7042.64013671875, |
| "learning_rate": 4e-05, |
| "loss": 0.0805, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.7522935779816513, |
| "grad_norm": 7486.0947265625, |
| "learning_rate": 3.9949494949494956e-05, |
| "loss": 0.0999, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.761467889908257, |
| "grad_norm": 8937.4580078125, |
| "learning_rate": 3.98989898989899e-05, |
| "loss": 0.0936, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.770642201834862, |
| "grad_norm": 7193.6708984375, |
| "learning_rate": 3.984848484848485e-05, |
| "loss": 0.0743, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.779816513761468, |
| "grad_norm": 7543.75341796875, |
| "learning_rate": 3.97979797979798e-05, |
| "loss": 0.0994, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.7889908256880735, |
| "grad_norm": 8849.693359375, |
| "learning_rate": 3.974747474747475e-05, |
| "loss": 0.0891, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.7981651376146788, |
| "grad_norm": 6587.58837890625, |
| "learning_rate": 3.96969696969697e-05, |
| "loss": 0.0921, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.8073394495412844, |
| "grad_norm": 7597.34521484375, |
| "learning_rate": 3.964646464646465e-05, |
| "loss": 0.079, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.81651376146789, |
| "grad_norm": 8688.705078125, |
| "learning_rate": 3.9595959595959594e-05, |
| "loss": 0.1014, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.8256880733944953, |
| "grad_norm": 14665.2021484375, |
| "learning_rate": 3.954545454545455e-05, |
| "loss": 0.1354, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.834862385321101, |
| "grad_norm": 10845.7890625, |
| "learning_rate": 3.9494949494949494e-05, |
| "loss": 0.1076, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.8440366972477067, |
| "grad_norm": 10935.6669921875, |
| "learning_rate": 3.944444444444445e-05, |
| "loss": 0.1054, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.853211009174312, |
| "grad_norm": 11541.736328125, |
| "learning_rate": 3.939393939393939e-05, |
| "loss": 0.0943, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.8623853211009176, |
| "grad_norm": 6934.3125, |
| "learning_rate": 3.9343434343434346e-05, |
| "loss": 0.1018, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.871559633027523, |
| "grad_norm": 8040.87939453125, |
| "learning_rate": 3.929292929292929e-05, |
| "loss": 0.0926, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.8807339449541285, |
| "grad_norm": 6876.23876953125, |
| "learning_rate": 3.924242424242424e-05, |
| "loss": 0.0806, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.8899082568807337, |
| "grad_norm": 8227.6435546875, |
| "learning_rate": 3.91919191919192e-05, |
| "loss": 0.0888, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.8990825688073394, |
| "grad_norm": 9191.8701171875, |
| "learning_rate": 3.9141414141414145e-05, |
| "loss": 0.0982, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.908256880733945, |
| "grad_norm": 8231.9013671875, |
| "learning_rate": 3.909090909090909e-05, |
| "loss": 0.0851, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.9174311926605503, |
| "grad_norm": 8839.322265625, |
| "learning_rate": 3.9040404040404045e-05, |
| "loss": 0.0757, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.926605504587156, |
| "grad_norm": 8270.8525390625, |
| "learning_rate": 3.898989898989899e-05, |
| "loss": 0.1015, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.9357798165137616, |
| "grad_norm": 12873.2529296875, |
| "learning_rate": 3.8939393939393944e-05, |
| "loss": 0.0871, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.944954128440367, |
| "grad_norm": 7598.33837890625, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.0884, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.9541284403669725, |
| "grad_norm": 8909.1279296875, |
| "learning_rate": 3.8838383838383844e-05, |
| "loss": 0.102, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.963302752293578, |
| "grad_norm": 9611.2451171875, |
| "learning_rate": 3.878787878787879e-05, |
| "loss": 0.094, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.9724770642201834, |
| "grad_norm": 8551.978515625, |
| "learning_rate": 3.8737373737373737e-05, |
| "loss": 0.0857, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.981651376146789, |
| "grad_norm": 7810.45703125, |
| "learning_rate": 3.868686868686869e-05, |
| "loss": 0.0854, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.9908256880733948, |
| "grad_norm": 8891.1826171875, |
| "learning_rate": 3.8636363636363636e-05, |
| "loss": 0.101, |
| "step": 326 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 16918.001953125, |
| "learning_rate": 3.858585858585859e-05, |
| "loss": 0.0625, |
| "step": 327 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1_macro": 0.05806976361668135, |
| "eval_f1_micro": 0.37575757575757573, |
| "eval_loss": 0.0906587690114975, |
| "eval_precision": 0.7380952380952381, |
| "eval_recall": 0.25203252032520324, |
| "eval_runtime": 0.8046, |
| "eval_samples_per_second": 359.182, |
| "eval_steps_per_second": 16.157, |
| "step": 327 |
| }, |
| { |
| "epoch": 3.0091743119266057, |
| "grad_norm": 8289.6953125, |
| "learning_rate": 3.8535353535353536e-05, |
| "loss": 0.0924, |
| "step": 328 |
| }, |
| { |
| "epoch": 3.018348623853211, |
| "grad_norm": 13682.880859375, |
| "learning_rate": 3.848484848484848e-05, |
| "loss": 0.0939, |
| "step": 329 |
| }, |
| { |
| "epoch": 3.0275229357798166, |
| "grad_norm": 8464.88671875, |
| "learning_rate": 3.8434343434343435e-05, |
| "loss": 0.0784, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.036697247706422, |
| "grad_norm": 8201.7705078125, |
| "learning_rate": 3.838383838383838e-05, |
| "loss": 0.0749, |
| "step": 331 |
| }, |
| { |
| "epoch": 3.0458715596330275, |
| "grad_norm": 9458.033203125, |
| "learning_rate": 3.8333333333333334e-05, |
| "loss": 0.1053, |
| "step": 332 |
| }, |
| { |
| "epoch": 3.055045871559633, |
| "grad_norm": 10062.501953125, |
| "learning_rate": 3.828282828282829e-05, |
| "loss": 0.0665, |
| "step": 333 |
| }, |
| { |
| "epoch": 3.0642201834862384, |
| "grad_norm": 7707.68359375, |
| "learning_rate": 3.8232323232323234e-05, |
| "loss": 0.0785, |
| "step": 334 |
| }, |
| { |
| "epoch": 3.073394495412844, |
| "grad_norm": 12023.5595703125, |
| "learning_rate": 3.818181818181819e-05, |
| "loss": 0.0952, |
| "step": 335 |
| }, |
| { |
| "epoch": 3.0825688073394497, |
| "grad_norm": 6632.79931640625, |
| "learning_rate": 3.8131313131313133e-05, |
| "loss": 0.0861, |
| "step": 336 |
| }, |
| { |
| "epoch": 3.091743119266055, |
| "grad_norm": 7961.10400390625, |
| "learning_rate": 3.8080808080808087e-05, |
| "loss": 0.0942, |
| "step": 337 |
| }, |
| { |
| "epoch": 3.1009174311926606, |
| "grad_norm": 7544.1826171875, |
| "learning_rate": 3.803030303030303e-05, |
| "loss": 0.0829, |
| "step": 338 |
| }, |
| { |
| "epoch": 3.1100917431192663, |
| "grad_norm": 12524.0947265625, |
| "learning_rate": 3.797979797979798e-05, |
| "loss": 0.0895, |
| "step": 339 |
| }, |
| { |
| "epoch": 3.1192660550458715, |
| "grad_norm": 15579.77734375, |
| "learning_rate": 3.792929292929293e-05, |
| "loss": 0.1132, |
| "step": 340 |
| }, |
| { |
| "epoch": 3.128440366972477, |
| "grad_norm": 7564.0458984375, |
| "learning_rate": 3.787878787878788e-05, |
| "loss": 0.0909, |
| "step": 341 |
| }, |
| { |
| "epoch": 3.1376146788990824, |
| "grad_norm": 8664.2548828125, |
| "learning_rate": 3.782828282828283e-05, |
| "loss": 0.0784, |
| "step": 342 |
| }, |
| { |
| "epoch": 3.146788990825688, |
| "grad_norm": 8917.251953125, |
| "learning_rate": 3.777777777777778e-05, |
| "loss": 0.0817, |
| "step": 343 |
| }, |
| { |
| "epoch": 3.1559633027522938, |
| "grad_norm": 8527.2666015625, |
| "learning_rate": 3.7727272727272725e-05, |
| "loss": 0.0852, |
| "step": 344 |
| }, |
| { |
| "epoch": 3.165137614678899, |
| "grad_norm": 7334.591796875, |
| "learning_rate": 3.767676767676768e-05, |
| "loss": 0.079, |
| "step": 345 |
| }, |
| { |
| "epoch": 3.1743119266055047, |
| "grad_norm": 7469.1240234375, |
| "learning_rate": 3.7626262626262624e-05, |
| "loss": 0.0742, |
| "step": 346 |
| }, |
| { |
| "epoch": 3.18348623853211, |
| "grad_norm": 10801.150390625, |
| "learning_rate": 3.757575757575758e-05, |
| "loss": 0.0915, |
| "step": 347 |
| }, |
| { |
| "epoch": 3.1926605504587156, |
| "grad_norm": 7579.3603515625, |
| "learning_rate": 3.7525252525252524e-05, |
| "loss": 0.0881, |
| "step": 348 |
| }, |
| { |
| "epoch": 3.2018348623853212, |
| "grad_norm": 9901.044921875, |
| "learning_rate": 3.747474747474748e-05, |
| "loss": 0.0823, |
| "step": 349 |
| }, |
| { |
| "epoch": 3.2110091743119265, |
| "grad_norm": 10092.9248046875, |
| "learning_rate": 3.742424242424243e-05, |
| "loss": 0.0788, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.220183486238532, |
| "grad_norm": 7686.80859375, |
| "learning_rate": 3.7373737373737376e-05, |
| "loss": 0.0828, |
| "step": 351 |
| }, |
| { |
| "epoch": 3.229357798165138, |
| "grad_norm": 7244.4970703125, |
| "learning_rate": 3.732323232323233e-05, |
| "loss": 0.0943, |
| "step": 352 |
| }, |
| { |
| "epoch": 3.238532110091743, |
| "grad_norm": 7361.87353515625, |
| "learning_rate": 3.7272727272727276e-05, |
| "loss": 0.0811, |
| "step": 353 |
| }, |
| { |
| "epoch": 3.2477064220183487, |
| "grad_norm": 9312.623046875, |
| "learning_rate": 3.722222222222222e-05, |
| "loss": 0.0843, |
| "step": 354 |
| }, |
| { |
| "epoch": 3.2568807339449544, |
| "grad_norm": 9429.6171875, |
| "learning_rate": 3.7171717171717175e-05, |
| "loss": 0.0856, |
| "step": 355 |
| }, |
| { |
| "epoch": 3.2660550458715596, |
| "grad_norm": 7771.47705078125, |
| "learning_rate": 3.712121212121212e-05, |
| "loss": 0.0815, |
| "step": 356 |
| }, |
| { |
| "epoch": 3.2752293577981653, |
| "grad_norm": 6975.99609375, |
| "learning_rate": 3.7070707070707075e-05, |
| "loss": 0.0842, |
| "step": 357 |
| }, |
| { |
| "epoch": 3.2844036697247705, |
| "grad_norm": 9537.3935546875, |
| "learning_rate": 3.702020202020202e-05, |
| "loss": 0.0913, |
| "step": 358 |
| }, |
| { |
| "epoch": 3.293577981651376, |
| "grad_norm": 8734.8681640625, |
| "learning_rate": 3.6969696969696974e-05, |
| "loss": 0.0777, |
| "step": 359 |
| }, |
| { |
| "epoch": 3.302752293577982, |
| "grad_norm": 9178.66015625, |
| "learning_rate": 3.691919191919192e-05, |
| "loss": 0.0833, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.311926605504587, |
| "grad_norm": 10208.962890625, |
| "learning_rate": 3.686868686868687e-05, |
| "loss": 0.0715, |
| "step": 361 |
| }, |
| { |
| "epoch": 3.3211009174311927, |
| "grad_norm": 6356.3134765625, |
| "learning_rate": 3.681818181818182e-05, |
| "loss": 0.0803, |
| "step": 362 |
| }, |
| { |
| "epoch": 3.330275229357798, |
| "grad_norm": 11730.859375, |
| "learning_rate": 3.6767676767676766e-05, |
| "loss": 0.0782, |
| "step": 363 |
| }, |
| { |
| "epoch": 3.3394495412844036, |
| "grad_norm": 7810.03369140625, |
| "learning_rate": 3.671717171717172e-05, |
| "loss": 0.0733, |
| "step": 364 |
| }, |
| { |
| "epoch": 3.3486238532110093, |
| "grad_norm": 8370.4697265625, |
| "learning_rate": 3.6666666666666666e-05, |
| "loss": 0.0786, |
| "step": 365 |
| }, |
| { |
| "epoch": 3.3577981651376145, |
| "grad_norm": 8111.5361328125, |
| "learning_rate": 3.661616161616162e-05, |
| "loss": 0.0894, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.36697247706422, |
| "grad_norm": 8765.7822265625, |
| "learning_rate": 3.656565656565657e-05, |
| "loss": 0.0904, |
| "step": 367 |
| }, |
| { |
| "epoch": 3.376146788990826, |
| "grad_norm": 18947.57421875, |
| "learning_rate": 3.651515151515152e-05, |
| "loss": 0.1098, |
| "step": 368 |
| }, |
| { |
| "epoch": 3.385321100917431, |
| "grad_norm": 7882.3369140625, |
| "learning_rate": 3.6464646464646465e-05, |
| "loss": 0.0892, |
| "step": 369 |
| }, |
| { |
| "epoch": 3.3944954128440368, |
| "grad_norm": 7774.26025390625, |
| "learning_rate": 3.641414141414142e-05, |
| "loss": 0.0775, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.4036697247706424, |
| "grad_norm": 7653.77880859375, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 0.0732, |
| "step": 371 |
| }, |
| { |
| "epoch": 3.4128440366972477, |
| "grad_norm": 7157.17529296875, |
| "learning_rate": 3.631313131313132e-05, |
| "loss": 0.0787, |
| "step": 372 |
| }, |
| { |
| "epoch": 3.4220183486238533, |
| "grad_norm": 8637.5361328125, |
| "learning_rate": 3.6262626262626264e-05, |
| "loss": 0.0811, |
| "step": 373 |
| }, |
| { |
| "epoch": 3.4311926605504586, |
| "grad_norm": 9387.2333984375, |
| "learning_rate": 3.621212121212122e-05, |
| "loss": 0.0914, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.4403669724770642, |
| "grad_norm": 6949.630859375, |
| "learning_rate": 3.616161616161616e-05, |
| "loss": 0.0693, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.44954128440367, |
| "grad_norm": 9442.3046875, |
| "learning_rate": 3.611111111111111e-05, |
| "loss": 0.0833, |
| "step": 376 |
| }, |
| { |
| "epoch": 3.458715596330275, |
| "grad_norm": 10019.27734375, |
| "learning_rate": 3.606060606060606e-05, |
| "loss": 0.0941, |
| "step": 377 |
| }, |
| { |
| "epoch": 3.467889908256881, |
| "grad_norm": 8413.140625, |
| "learning_rate": 3.601010101010101e-05, |
| "loss": 0.0788, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.477064220183486, |
| "grad_norm": 8428.1181640625, |
| "learning_rate": 3.595959595959596e-05, |
| "loss": 0.0999, |
| "step": 379 |
| }, |
| { |
| "epoch": 3.4862385321100917, |
| "grad_norm": 8074.10400390625, |
| "learning_rate": 3.590909090909091e-05, |
| "loss": 0.0803, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.4954128440366974, |
| "grad_norm": 9101.3134765625, |
| "learning_rate": 3.5858585858585855e-05, |
| "loss": 0.0679, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.5045871559633026, |
| "grad_norm": 7441.87255859375, |
| "learning_rate": 3.580808080808081e-05, |
| "loss": 0.0721, |
| "step": 382 |
| }, |
| { |
| "epoch": 3.5137614678899083, |
| "grad_norm": 7897.45458984375, |
| "learning_rate": 3.575757575757576e-05, |
| "loss": 0.0905, |
| "step": 383 |
| }, |
| { |
| "epoch": 3.522935779816514, |
| "grad_norm": 8986.716796875, |
| "learning_rate": 3.5707070707070714e-05, |
| "loss": 0.0715, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.532110091743119, |
| "grad_norm": 6766.3271484375, |
| "learning_rate": 3.565656565656566e-05, |
| "loss": 0.0647, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.541284403669725, |
| "grad_norm": 9427.990234375, |
| "learning_rate": 3.560606060606061e-05, |
| "loss": 0.0822, |
| "step": 386 |
| }, |
| { |
| "epoch": 3.5504587155963305, |
| "grad_norm": 10683.732421875, |
| "learning_rate": 3.555555555555556e-05, |
| "loss": 0.0738, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.5596330275229358, |
| "grad_norm": 14099.65625, |
| "learning_rate": 3.5505050505050506e-05, |
| "loss": 0.0832, |
| "step": 388 |
| }, |
| { |
| "epoch": 3.5688073394495414, |
| "grad_norm": 10605.873046875, |
| "learning_rate": 3.545454545454546e-05, |
| "loss": 0.0621, |
| "step": 389 |
| }, |
| { |
| "epoch": 3.5779816513761467, |
| "grad_norm": 7646.0703125, |
| "learning_rate": 3.5404040404040406e-05, |
| "loss": 0.0759, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.5871559633027523, |
| "grad_norm": 12968.7607421875, |
| "learning_rate": 3.535353535353535e-05, |
| "loss": 0.0724, |
| "step": 391 |
| }, |
| { |
| "epoch": 3.5963302752293576, |
| "grad_norm": 11772.3857421875, |
| "learning_rate": 3.5303030303030305e-05, |
| "loss": 0.0881, |
| "step": 392 |
| }, |
| { |
| "epoch": 3.6055045871559632, |
| "grad_norm": 7031.99755859375, |
| "learning_rate": 3.525252525252525e-05, |
| "loss": 0.0636, |
| "step": 393 |
| }, |
| { |
| "epoch": 3.614678899082569, |
| "grad_norm": 8371.5205078125, |
| "learning_rate": 3.5202020202020205e-05, |
| "loss": 0.0686, |
| "step": 394 |
| }, |
| { |
| "epoch": 3.623853211009174, |
| "grad_norm": 7546.57666015625, |
| "learning_rate": 3.515151515151515e-05, |
| "loss": 0.0715, |
| "step": 395 |
| }, |
| { |
| "epoch": 3.63302752293578, |
| "grad_norm": 9094.708984375, |
| "learning_rate": 3.5101010101010104e-05, |
| "loss": 0.0778, |
| "step": 396 |
| }, |
| { |
| "epoch": 3.6422018348623855, |
| "grad_norm": 8982.2861328125, |
| "learning_rate": 3.505050505050505e-05, |
| "loss": 0.0724, |
| "step": 397 |
| }, |
| { |
| "epoch": 3.6513761467889907, |
| "grad_norm": 10044.265625, |
| "learning_rate": 3.5e-05, |
| "loss": 0.1019, |
| "step": 398 |
| }, |
| { |
| "epoch": 3.6605504587155964, |
| "grad_norm": 10931.083984375, |
| "learning_rate": 3.494949494949495e-05, |
| "loss": 0.0732, |
| "step": 399 |
| }, |
| { |
| "epoch": 3.669724770642202, |
| "grad_norm": 10163.0771484375, |
| "learning_rate": 3.48989898989899e-05, |
| "loss": 0.0899, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.6788990825688073, |
| "grad_norm": 7464.64111328125, |
| "learning_rate": 3.484848484848485e-05, |
| "loss": 0.0802, |
| "step": 401 |
| }, |
| { |
| "epoch": 3.688073394495413, |
| "grad_norm": 12514.2763671875, |
| "learning_rate": 3.47979797979798e-05, |
| "loss": 0.0773, |
| "step": 402 |
| }, |
| { |
| "epoch": 3.6972477064220186, |
| "grad_norm": 9357.0361328125, |
| "learning_rate": 3.474747474747475e-05, |
| "loss": 0.0916, |
| "step": 403 |
| }, |
| { |
| "epoch": 3.706422018348624, |
| "grad_norm": 9900.4990234375, |
| "learning_rate": 3.46969696969697e-05, |
| "loss": 0.0679, |
| "step": 404 |
| }, |
| { |
| "epoch": 3.7155963302752295, |
| "grad_norm": 7504.73876953125, |
| "learning_rate": 3.464646464646465e-05, |
| "loss": 0.0646, |
| "step": 405 |
| }, |
| { |
| "epoch": 3.7247706422018347, |
| "grad_norm": 9507.84765625, |
| "learning_rate": 3.45959595959596e-05, |
| "loss": 0.0855, |
| "step": 406 |
| }, |
| { |
| "epoch": 3.7339449541284404, |
| "grad_norm": 7167.67041015625, |
| "learning_rate": 3.454545454545455e-05, |
| "loss": 0.0727, |
| "step": 407 |
| }, |
| { |
| "epoch": 3.7431192660550456, |
| "grad_norm": 8691.91796875, |
| "learning_rate": 3.4494949494949494e-05, |
| "loss": 0.0809, |
| "step": 408 |
| }, |
| { |
| "epoch": 3.7522935779816513, |
| "grad_norm": 7374.8330078125, |
| "learning_rate": 3.444444444444445e-05, |
| "loss": 0.0763, |
| "step": 409 |
| }, |
| { |
| "epoch": 3.761467889908257, |
| "grad_norm": 6899.1455078125, |
| "learning_rate": 3.4393939393939394e-05, |
| "loss": 0.0699, |
| "step": 410 |
| }, |
| { |
| "epoch": 3.770642201834862, |
| "grad_norm": 8236.75, |
| "learning_rate": 3.434343434343435e-05, |
| "loss": 0.0773, |
| "step": 411 |
| }, |
| { |
| "epoch": 3.779816513761468, |
| "grad_norm": 7490.56787109375, |
| "learning_rate": 3.429292929292929e-05, |
| "loss": 0.0705, |
| "step": 412 |
| }, |
| { |
| "epoch": 3.7889908256880735, |
| "grad_norm": 7241.3564453125, |
| "learning_rate": 3.424242424242424e-05, |
| "loss": 0.0636, |
| "step": 413 |
| }, |
| { |
| "epoch": 3.7981651376146788, |
| "grad_norm": 7502.1787109375, |
| "learning_rate": 3.419191919191919e-05, |
| "loss": 0.069, |
| "step": 414 |
| }, |
| { |
| "epoch": 3.8073394495412844, |
| "grad_norm": 10924.54296875, |
| "learning_rate": 3.414141414141414e-05, |
| "loss": 0.09, |
| "step": 415 |
| }, |
| { |
| "epoch": 3.81651376146789, |
| "grad_norm": 9266.40625, |
| "learning_rate": 3.409090909090909e-05, |
| "loss": 0.073, |
| "step": 416 |
| }, |
| { |
| "epoch": 3.8256880733944953, |
| "grad_norm": 10273.2900390625, |
| "learning_rate": 3.4040404040404045e-05, |
| "loss": 0.0951, |
| "step": 417 |
| }, |
| { |
| "epoch": 3.834862385321101, |
| "grad_norm": 6867.47802734375, |
| "learning_rate": 3.398989898989899e-05, |
| "loss": 0.0702, |
| "step": 418 |
| }, |
| { |
| "epoch": 3.8440366972477067, |
| "grad_norm": 8106.5517578125, |
| "learning_rate": 3.3939393939393945e-05, |
| "loss": 0.075, |
| "step": 419 |
| }, |
| { |
| "epoch": 3.853211009174312, |
| "grad_norm": 7965.34033203125, |
| "learning_rate": 3.388888888888889e-05, |
| "loss": 0.0695, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.8623853211009176, |
| "grad_norm": 8276.9873046875, |
| "learning_rate": 3.3838383838383844e-05, |
| "loss": 0.0837, |
| "step": 421 |
| }, |
| { |
| "epoch": 3.871559633027523, |
| "grad_norm": 13724.3798828125, |
| "learning_rate": 3.378787878787879e-05, |
| "loss": 0.0807, |
| "step": 422 |
| }, |
| { |
| "epoch": 3.8807339449541285, |
| "grad_norm": 8130.3525390625, |
| "learning_rate": 3.373737373737374e-05, |
| "loss": 0.0793, |
| "step": 423 |
| }, |
| { |
| "epoch": 3.8899082568807337, |
| "grad_norm": 8378.513671875, |
| "learning_rate": 3.368686868686869e-05, |
| "loss": 0.0669, |
| "step": 424 |
| }, |
| { |
| "epoch": 3.8990825688073394, |
| "grad_norm": 7662.083984375, |
| "learning_rate": 3.3636363636363636e-05, |
| "loss": 0.0755, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.908256880733945, |
| "grad_norm": 8806.6005859375, |
| "learning_rate": 3.358585858585859e-05, |
| "loss": 0.0735, |
| "step": 426 |
| }, |
| { |
| "epoch": 3.9174311926605503, |
| "grad_norm": 8842.380859375, |
| "learning_rate": 3.3535353535353536e-05, |
| "loss": 0.0666, |
| "step": 427 |
| }, |
| { |
| "epoch": 3.926605504587156, |
| "grad_norm": 8317.888671875, |
| "learning_rate": 3.348484848484848e-05, |
| "loss": 0.0848, |
| "step": 428 |
| }, |
| { |
| "epoch": 3.9357798165137616, |
| "grad_norm": 8649.2880859375, |
| "learning_rate": 3.3434343434343435e-05, |
| "loss": 0.0843, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.944954128440367, |
| "grad_norm": 10749.7431640625, |
| "learning_rate": 3.338383838383838e-05, |
| "loss": 0.0915, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.9541284403669725, |
| "grad_norm": 11217.1533203125, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.0833, |
| "step": 431 |
| }, |
| { |
| "epoch": 3.963302752293578, |
| "grad_norm": 7443.095703125, |
| "learning_rate": 3.328282828282828e-05, |
| "loss": 0.0763, |
| "step": 432 |
| }, |
| { |
| "epoch": 3.9724770642201834, |
| "grad_norm": 7243.30859375, |
| "learning_rate": 3.3232323232323234e-05, |
| "loss": 0.068, |
| "step": 433 |
| }, |
| { |
| "epoch": 3.981651376146789, |
| "grad_norm": 7107.99658203125, |
| "learning_rate": 3.318181818181819e-05, |
| "loss": 0.0749, |
| "step": 434 |
| }, |
| { |
| "epoch": 3.9908256880733948, |
| "grad_norm": 12792.3837890625, |
| "learning_rate": 3.3131313131313134e-05, |
| "loss": 0.0799, |
| "step": 435 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 16352.5830078125, |
| "learning_rate": 3.308080808080809e-05, |
| "loss": 0.0644, |
| "step": 436 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_f1_macro": 0.12554329514843507, |
| "eval_f1_micro": 0.4918032786885246, |
| "eval_loss": 0.07779007405042648, |
| "eval_precision": 0.75, |
| "eval_recall": 0.36585365853658536, |
| "eval_runtime": 0.8052, |
| "eval_samples_per_second": 358.932, |
| "eval_steps_per_second": 16.146, |
| "step": 436 |
| }, |
| { |
| "epoch": 4.009174311926605, |
| "grad_norm": 8458.75, |
| "learning_rate": 3.303030303030303e-05, |
| "loss": 0.0692, |
| "step": 437 |
| }, |
| { |
| "epoch": 4.018348623853211, |
| "grad_norm": 13337.8359375, |
| "learning_rate": 3.297979797979798e-05, |
| "loss": 0.0755, |
| "step": 438 |
| }, |
| { |
| "epoch": 4.027522935779817, |
| "grad_norm": 8051.86865234375, |
| "learning_rate": 3.292929292929293e-05, |
| "loss": 0.0578, |
| "step": 439 |
| }, |
| { |
| "epoch": 4.036697247706422, |
| "grad_norm": 10501.3466796875, |
| "learning_rate": 3.287878787878788e-05, |
| "loss": 0.0765, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.045871559633028, |
| "grad_norm": 8340.0751953125, |
| "learning_rate": 3.282828282828283e-05, |
| "loss": 0.0625, |
| "step": 441 |
| }, |
| { |
| "epoch": 4.055045871559633, |
| "grad_norm": 7503.3642578125, |
| "learning_rate": 3.277777777777778e-05, |
| "loss": 0.0677, |
| "step": 442 |
| }, |
| { |
| "epoch": 4.064220183486238, |
| "grad_norm": 6723.236328125, |
| "learning_rate": 3.272727272727273e-05, |
| "loss": 0.0635, |
| "step": 443 |
| }, |
| { |
| "epoch": 4.073394495412844, |
| "grad_norm": 7500.09033203125, |
| "learning_rate": 3.267676767676768e-05, |
| "loss": 0.0745, |
| "step": 444 |
| }, |
| { |
| "epoch": 4.08256880733945, |
| "grad_norm": 7736.67333984375, |
| "learning_rate": 3.2626262626262624e-05, |
| "loss": 0.0644, |
| "step": 445 |
| }, |
| { |
| "epoch": 4.091743119266055, |
| "grad_norm": 11797.1162109375, |
| "learning_rate": 3.257575757575758e-05, |
| "loss": 0.0781, |
| "step": 446 |
| }, |
| { |
| "epoch": 4.10091743119266, |
| "grad_norm": 6644.9296875, |
| "learning_rate": 3.2525252525252524e-05, |
| "loss": 0.0583, |
| "step": 447 |
| }, |
| { |
| "epoch": 4.110091743119266, |
| "grad_norm": 6017.3408203125, |
| "learning_rate": 3.247474747474748e-05, |
| "loss": 0.0584, |
| "step": 448 |
| }, |
| { |
| "epoch": 4.1192660550458715, |
| "grad_norm": 7825.36865234375, |
| "learning_rate": 3.2424242424242423e-05, |
| "loss": 0.0539, |
| "step": 449 |
| }, |
| { |
| "epoch": 4.128440366972477, |
| "grad_norm": 9209.607421875, |
| "learning_rate": 3.237373737373737e-05, |
| "loss": 0.0661, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.137614678899083, |
| "grad_norm": 7857.53271484375, |
| "learning_rate": 3.232323232323233e-05, |
| "loss": 0.0696, |
| "step": 451 |
| }, |
| { |
| "epoch": 4.146788990825688, |
| "grad_norm": 7992.9599609375, |
| "learning_rate": 3.2272727272727276e-05, |
| "loss": 0.06, |
| "step": 452 |
| }, |
| { |
| "epoch": 4.155963302752293, |
| "grad_norm": 8909.8310546875, |
| "learning_rate": 3.222222222222223e-05, |
| "loss": 0.0661, |
| "step": 453 |
| }, |
| { |
| "epoch": 4.165137614678899, |
| "grad_norm": 6850.833984375, |
| "learning_rate": 3.2171717171717176e-05, |
| "loss": 0.0686, |
| "step": 454 |
| }, |
| { |
| "epoch": 4.174311926605505, |
| "grad_norm": 6964.1201171875, |
| "learning_rate": 3.212121212121212e-05, |
| "loss": 0.0625, |
| "step": 455 |
| }, |
| { |
| "epoch": 4.18348623853211, |
| "grad_norm": 7593.55615234375, |
| "learning_rate": 3.2070707070707075e-05, |
| "loss": 0.0694, |
| "step": 456 |
| }, |
| { |
| "epoch": 4.192660550458716, |
| "grad_norm": 9576.828125, |
| "learning_rate": 3.202020202020202e-05, |
| "loss": 0.075, |
| "step": 457 |
| }, |
| { |
| "epoch": 4.201834862385321, |
| "grad_norm": 8686.0986328125, |
| "learning_rate": 3.1969696969696974e-05, |
| "loss": 0.0758, |
| "step": 458 |
| }, |
| { |
| "epoch": 4.2110091743119265, |
| "grad_norm": 8695.9306640625, |
| "learning_rate": 3.191919191919192e-05, |
| "loss": 0.0447, |
| "step": 459 |
| }, |
| { |
| "epoch": 4.220183486238533, |
| "grad_norm": 9451.28125, |
| "learning_rate": 3.186868686868687e-05, |
| "loss": 0.0538, |
| "step": 460 |
| }, |
| { |
| "epoch": 4.229357798165138, |
| "grad_norm": 6842.013671875, |
| "learning_rate": 3.181818181818182e-05, |
| "loss": 0.0815, |
| "step": 461 |
| }, |
| { |
| "epoch": 4.238532110091743, |
| "grad_norm": 6817.18994140625, |
| "learning_rate": 3.176767676767677e-05, |
| "loss": 0.0576, |
| "step": 462 |
| }, |
| { |
| "epoch": 4.247706422018348, |
| "grad_norm": 7361.92578125, |
| "learning_rate": 3.171717171717172e-05, |
| "loss": 0.0629, |
| "step": 463 |
| }, |
| { |
| "epoch": 4.256880733944954, |
| "grad_norm": 9081.8603515625, |
| "learning_rate": 3.1666666666666666e-05, |
| "loss": 0.0671, |
| "step": 464 |
| }, |
| { |
| "epoch": 4.26605504587156, |
| "grad_norm": 9699.37109375, |
| "learning_rate": 3.161616161616161e-05, |
| "loss": 0.0743, |
| "step": 465 |
| }, |
| { |
| "epoch": 4.275229357798165, |
| "grad_norm": 9937.20703125, |
| "learning_rate": 3.1565656565656566e-05, |
| "loss": 0.076, |
| "step": 466 |
| }, |
| { |
| "epoch": 4.284403669724771, |
| "grad_norm": 12852.361328125, |
| "learning_rate": 3.151515151515151e-05, |
| "loss": 0.0838, |
| "step": 467 |
| }, |
| { |
| "epoch": 4.293577981651376, |
| "grad_norm": 10356.1337890625, |
| "learning_rate": 3.146464646464647e-05, |
| "loss": 0.0727, |
| "step": 468 |
| }, |
| { |
| "epoch": 4.302752293577981, |
| "grad_norm": 13259.0107421875, |
| "learning_rate": 3.141414141414142e-05, |
| "loss": 0.0609, |
| "step": 469 |
| }, |
| { |
| "epoch": 4.3119266055045875, |
| "grad_norm": 9083.2255859375, |
| "learning_rate": 3.1363636363636365e-05, |
| "loss": 0.0731, |
| "step": 470 |
| }, |
| { |
| "epoch": 4.321100917431193, |
| "grad_norm": 9072.19921875, |
| "learning_rate": 3.131313131313132e-05, |
| "loss": 0.0634, |
| "step": 471 |
| }, |
| { |
| "epoch": 4.330275229357798, |
| "grad_norm": 7686.09765625, |
| "learning_rate": 3.1262626262626264e-05, |
| "loss": 0.0671, |
| "step": 472 |
| }, |
| { |
| "epoch": 4.339449541284404, |
| "grad_norm": 6557.51953125, |
| "learning_rate": 3.121212121212122e-05, |
| "loss": 0.0575, |
| "step": 473 |
| }, |
| { |
| "epoch": 4.348623853211009, |
| "grad_norm": 7452.078125, |
| "learning_rate": 3.1161616161616164e-05, |
| "loss": 0.0616, |
| "step": 474 |
| }, |
| { |
| "epoch": 4.3577981651376145, |
| "grad_norm": 10433.482421875, |
| "learning_rate": 3.111111111111111e-05, |
| "loss": 0.0807, |
| "step": 475 |
| }, |
| { |
| "epoch": 4.36697247706422, |
| "grad_norm": 7201.25927734375, |
| "learning_rate": 3.106060606060606e-05, |
| "loss": 0.0814, |
| "step": 476 |
| }, |
| { |
| "epoch": 4.376146788990826, |
| "grad_norm": 8066.92333984375, |
| "learning_rate": 3.101010101010101e-05, |
| "loss": 0.0615, |
| "step": 477 |
| }, |
| { |
| "epoch": 4.385321100917431, |
| "grad_norm": 10633.607421875, |
| "learning_rate": 3.095959595959596e-05, |
| "loss": 0.0732, |
| "step": 478 |
| }, |
| { |
| "epoch": 4.394495412844036, |
| "grad_norm": 12350.8046875, |
| "learning_rate": 3.090909090909091e-05, |
| "loss": 0.0784, |
| "step": 479 |
| }, |
| { |
| "epoch": 4.4036697247706424, |
| "grad_norm": 10261.6748046875, |
| "learning_rate": 3.085858585858586e-05, |
| "loss": 0.0643, |
| "step": 480 |
| }, |
| { |
| "epoch": 4.412844036697248, |
| "grad_norm": 19839.958984375, |
| "learning_rate": 3.080808080808081e-05, |
| "loss": 0.0885, |
| "step": 481 |
| }, |
| { |
| "epoch": 4.422018348623853, |
| "grad_norm": 7739.05615234375, |
| "learning_rate": 3.0757575757575755e-05, |
| "loss": 0.0529, |
| "step": 482 |
| }, |
| { |
| "epoch": 4.431192660550459, |
| "grad_norm": 7615.5048828125, |
| "learning_rate": 3.070707070707071e-05, |
| "loss": 0.0663, |
| "step": 483 |
| }, |
| { |
| "epoch": 4.440366972477064, |
| "grad_norm": 11299.6220703125, |
| "learning_rate": 3.0656565656565654e-05, |
| "loss": 0.0475, |
| "step": 484 |
| }, |
| { |
| "epoch": 4.4495412844036695, |
| "grad_norm": 11667.0, |
| "learning_rate": 3.060606060606061e-05, |
| "loss": 0.0598, |
| "step": 485 |
| }, |
| { |
| "epoch": 4.458715596330276, |
| "grad_norm": 14411.435546875, |
| "learning_rate": 3.055555555555556e-05, |
| "loss": 0.0651, |
| "step": 486 |
| }, |
| { |
| "epoch": 4.467889908256881, |
| "grad_norm": 9918.951171875, |
| "learning_rate": 3.050505050505051e-05, |
| "loss": 0.087, |
| "step": 487 |
| }, |
| { |
| "epoch": 4.477064220183486, |
| "grad_norm": 19711.619140625, |
| "learning_rate": 3.0454545454545456e-05, |
| "loss": 0.0764, |
| "step": 488 |
| }, |
| { |
| "epoch": 4.486238532110092, |
| "grad_norm": 8031.31591796875, |
| "learning_rate": 3.0404040404040406e-05, |
| "loss": 0.0589, |
| "step": 489 |
| }, |
| { |
| "epoch": 4.495412844036697, |
| "grad_norm": 9987.06640625, |
| "learning_rate": 3.0353535353535356e-05, |
| "loss": 0.0717, |
| "step": 490 |
| }, |
| { |
| "epoch": 4.504587155963303, |
| "grad_norm": 8235.3388671875, |
| "learning_rate": 3.0303030303030306e-05, |
| "loss": 0.0604, |
| "step": 491 |
| }, |
| { |
| "epoch": 4.513761467889909, |
| "grad_norm": 8193.5595703125, |
| "learning_rate": 3.0252525252525255e-05, |
| "loss": 0.059, |
| "step": 492 |
| }, |
| { |
| "epoch": 4.522935779816514, |
| "grad_norm": 9168.8310546875, |
| "learning_rate": 3.0202020202020205e-05, |
| "loss": 0.0681, |
| "step": 493 |
| }, |
| { |
| "epoch": 4.532110091743119, |
| "grad_norm": 7615.90234375, |
| "learning_rate": 3.015151515151515e-05, |
| "loss": 0.0701, |
| "step": 494 |
| }, |
| { |
| "epoch": 4.541284403669724, |
| "grad_norm": 9336.4892578125, |
| "learning_rate": 3.01010101010101e-05, |
| "loss": 0.0774, |
| "step": 495 |
| }, |
| { |
| "epoch": 4.5504587155963305, |
| "grad_norm": 12258.80078125, |
| "learning_rate": 3.005050505050505e-05, |
| "loss": 0.0688, |
| "step": 496 |
| }, |
| { |
| "epoch": 4.559633027522936, |
| "grad_norm": 8392.525390625, |
| "learning_rate": 3e-05, |
| "loss": 0.0564, |
| "step": 497 |
| }, |
| { |
| "epoch": 4.568807339449541, |
| "grad_norm": 9604.12890625, |
| "learning_rate": 2.994949494949495e-05, |
| "loss": 0.064, |
| "step": 498 |
| }, |
| { |
| "epoch": 4.577981651376147, |
| "grad_norm": 9194.5859375, |
| "learning_rate": 2.98989898989899e-05, |
| "loss": 0.0636, |
| "step": 499 |
| }, |
| { |
| "epoch": 4.587155963302752, |
| "grad_norm": 9700.25390625, |
| "learning_rate": 2.9848484848484847e-05, |
| "loss": 0.0679, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.5963302752293576, |
| "grad_norm": 8867.0146484375, |
| "learning_rate": 2.9797979797979796e-05, |
| "loss": 0.0701, |
| "step": 501 |
| }, |
| { |
| "epoch": 4.605504587155964, |
| "grad_norm": 10309.2646484375, |
| "learning_rate": 2.9747474747474746e-05, |
| "loss": 0.0691, |
| "step": 502 |
| }, |
| { |
| "epoch": 4.614678899082569, |
| "grad_norm": 8310.7587890625, |
| "learning_rate": 2.96969696969697e-05, |
| "loss": 0.0661, |
| "step": 503 |
| }, |
| { |
| "epoch": 4.623853211009174, |
| "grad_norm": 8526.0302734375, |
| "learning_rate": 2.964646464646465e-05, |
| "loss": 0.0629, |
| "step": 504 |
| }, |
| { |
| "epoch": 4.63302752293578, |
| "grad_norm": 7720.8388671875, |
| "learning_rate": 2.95959595959596e-05, |
| "loss": 0.068, |
| "step": 505 |
| }, |
| { |
| "epoch": 4.6422018348623855, |
| "grad_norm": 7792.38330078125, |
| "learning_rate": 2.954545454545455e-05, |
| "loss": 0.0655, |
| "step": 506 |
| }, |
| { |
| "epoch": 4.651376146788991, |
| "grad_norm": 9584.6845703125, |
| "learning_rate": 2.9494949494949498e-05, |
| "loss": 0.0703, |
| "step": 507 |
| }, |
| { |
| "epoch": 4.660550458715596, |
| "grad_norm": 9758.236328125, |
| "learning_rate": 2.9444444444444448e-05, |
| "loss": 0.0725, |
| "step": 508 |
| }, |
| { |
| "epoch": 4.669724770642202, |
| "grad_norm": 12375.326171875, |
| "learning_rate": 2.9393939393939394e-05, |
| "loss": 0.0644, |
| "step": 509 |
| }, |
| { |
| "epoch": 4.678899082568807, |
| "grad_norm": 10054.2705078125, |
| "learning_rate": 2.9343434343434344e-05, |
| "loss": 0.0462, |
| "step": 510 |
| }, |
| { |
| "epoch": 4.6880733944954125, |
| "grad_norm": 14444.8173828125, |
| "learning_rate": 2.9292929292929294e-05, |
| "loss": 0.0693, |
| "step": 511 |
| }, |
| { |
| "epoch": 4.697247706422019, |
| "grad_norm": 9948.7880859375, |
| "learning_rate": 2.9242424242424243e-05, |
| "loss": 0.0576, |
| "step": 512 |
| }, |
| { |
| "epoch": 4.706422018348624, |
| "grad_norm": 8357.00390625, |
| "learning_rate": 2.9191919191919193e-05, |
| "loss": 0.081, |
| "step": 513 |
| }, |
| { |
| "epoch": 4.715596330275229, |
| "grad_norm": 9313.916015625, |
| "learning_rate": 2.9141414141414143e-05, |
| "loss": 0.068, |
| "step": 514 |
| }, |
| { |
| "epoch": 4.724770642201835, |
| "grad_norm": 10014.9775390625, |
| "learning_rate": 2.909090909090909e-05, |
| "loss": 0.0672, |
| "step": 515 |
| }, |
| { |
| "epoch": 4.73394495412844, |
| "grad_norm": 10116.134765625, |
| "learning_rate": 2.904040404040404e-05, |
| "loss": 0.0668, |
| "step": 516 |
| }, |
| { |
| "epoch": 4.743119266055046, |
| "grad_norm": 7578.20703125, |
| "learning_rate": 2.898989898989899e-05, |
| "loss": 0.048, |
| "step": 517 |
| }, |
| { |
| "epoch": 4.752293577981652, |
| "grad_norm": 8988.373046875, |
| "learning_rate": 2.893939393939394e-05, |
| "loss": 0.0628, |
| "step": 518 |
| }, |
| { |
| "epoch": 4.761467889908257, |
| "grad_norm": 10147.2666015625, |
| "learning_rate": 2.8888888888888888e-05, |
| "loss": 0.0641, |
| "step": 519 |
| }, |
| { |
| "epoch": 4.770642201834862, |
| "grad_norm": 7943.02294921875, |
| "learning_rate": 2.883838383838384e-05, |
| "loss": 0.0557, |
| "step": 520 |
| }, |
| { |
| "epoch": 4.779816513761467, |
| "grad_norm": 8308.49609375, |
| "learning_rate": 2.878787878787879e-05, |
| "loss": 0.0617, |
| "step": 521 |
| }, |
| { |
| "epoch": 4.7889908256880735, |
| "grad_norm": 12425.205078125, |
| "learning_rate": 2.873737373737374e-05, |
| "loss": 0.0631, |
| "step": 522 |
| }, |
| { |
| "epoch": 4.798165137614679, |
| "grad_norm": 6772.55224609375, |
| "learning_rate": 2.868686868686869e-05, |
| "loss": 0.0535, |
| "step": 523 |
| }, |
| { |
| "epoch": 4.807339449541285, |
| "grad_norm": 10277.5205078125, |
| "learning_rate": 2.863636363636364e-05, |
| "loss": 0.0745, |
| "step": 524 |
| }, |
| { |
| "epoch": 4.81651376146789, |
| "grad_norm": 10549.9482421875, |
| "learning_rate": 2.8585858585858587e-05, |
| "loss": 0.0758, |
| "step": 525 |
| }, |
| { |
| "epoch": 4.825688073394495, |
| "grad_norm": 8067.88330078125, |
| "learning_rate": 2.8535353535353536e-05, |
| "loss": 0.0693, |
| "step": 526 |
| }, |
| { |
| "epoch": 4.834862385321101, |
| "grad_norm": 9647.41796875, |
| "learning_rate": 2.8484848484848486e-05, |
| "loss": 0.0523, |
| "step": 527 |
| }, |
| { |
| "epoch": 4.844036697247707, |
| "grad_norm": 7713.77587890625, |
| "learning_rate": 2.8434343434343436e-05, |
| "loss": 0.0561, |
| "step": 528 |
| }, |
| { |
| "epoch": 4.853211009174312, |
| "grad_norm": 7126.24658203125, |
| "learning_rate": 2.8383838383838386e-05, |
| "loss": 0.0602, |
| "step": 529 |
| }, |
| { |
| "epoch": 4.862385321100917, |
| "grad_norm": 9366.20703125, |
| "learning_rate": 2.8333333333333335e-05, |
| "loss": 0.0506, |
| "step": 530 |
| }, |
| { |
| "epoch": 4.871559633027523, |
| "grad_norm": 11138.2060546875, |
| "learning_rate": 2.8282828282828282e-05, |
| "loss": 0.0507, |
| "step": 531 |
| }, |
| { |
| "epoch": 4.8807339449541285, |
| "grad_norm": 8674.6513671875, |
| "learning_rate": 2.823232323232323e-05, |
| "loss": 0.0611, |
| "step": 532 |
| }, |
| { |
| "epoch": 4.889908256880734, |
| "grad_norm": 9256.6474609375, |
| "learning_rate": 2.818181818181818e-05, |
| "loss": 0.0641, |
| "step": 533 |
| }, |
| { |
| "epoch": 4.89908256880734, |
| "grad_norm": 11366.2158203125, |
| "learning_rate": 2.813131313131313e-05, |
| "loss": 0.0705, |
| "step": 534 |
| }, |
| { |
| "epoch": 4.908256880733945, |
| "grad_norm": 9709.6484375, |
| "learning_rate": 2.808080808080808e-05, |
| "loss": 0.0676, |
| "step": 535 |
| }, |
| { |
| "epoch": 4.91743119266055, |
| "grad_norm": 7580.935546875, |
| "learning_rate": 2.803030303030303e-05, |
| "loss": 0.0518, |
| "step": 536 |
| }, |
| { |
| "epoch": 4.926605504587156, |
| "grad_norm": 11798.3857421875, |
| "learning_rate": 2.7979797979797984e-05, |
| "loss": 0.0657, |
| "step": 537 |
| }, |
| { |
| "epoch": 4.935779816513762, |
| "grad_norm": 7368.62841796875, |
| "learning_rate": 2.7929292929292933e-05, |
| "loss": 0.0644, |
| "step": 538 |
| }, |
| { |
| "epoch": 4.944954128440367, |
| "grad_norm": 6701.8466796875, |
| "learning_rate": 2.7878787878787883e-05, |
| "loss": 0.0515, |
| "step": 539 |
| }, |
| { |
| "epoch": 4.954128440366972, |
| "grad_norm": 8849.685546875, |
| "learning_rate": 2.7828282828282833e-05, |
| "loss": 0.0547, |
| "step": 540 |
| }, |
| { |
| "epoch": 4.963302752293578, |
| "grad_norm": 9179.5751953125, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 0.0542, |
| "step": 541 |
| }, |
| { |
| "epoch": 4.972477064220183, |
| "grad_norm": 14458.0810546875, |
| "learning_rate": 2.772727272727273e-05, |
| "loss": 0.067, |
| "step": 542 |
| }, |
| { |
| "epoch": 4.981651376146789, |
| "grad_norm": 9508.0732421875, |
| "learning_rate": 2.767676767676768e-05, |
| "loss": 0.0666, |
| "step": 543 |
| }, |
| { |
| "epoch": 4.990825688073395, |
| "grad_norm": 9082.1591796875, |
| "learning_rate": 2.762626262626263e-05, |
| "loss": 0.0591, |
| "step": 544 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 38391.5390625, |
| "learning_rate": 2.7575757575757578e-05, |
| "loss": 0.0782, |
| "step": 545 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_f1_macro": 0.2588916080981378, |
| "eval_f1_micro": 0.5921985815602837, |
| "eval_loss": 0.06743249297142029, |
| "eval_precision": 0.8564102564102564, |
| "eval_recall": 0.45257452574525747, |
| "eval_runtime": 0.8048, |
| "eval_samples_per_second": 359.083, |
| "eval_steps_per_second": 16.153, |
| "step": 545 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1090, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3415337226086400.0, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|