| { |
| "best_global_step": 327, |
| "best_metric": 0.37575757575757573, |
| "best_model_checkpoint": "./cysecbert-ttp-bert-base_data/checkpoint-327", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 327, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009174311926605505, |
| "grad_norm": 46530.15234375, |
| "learning_rate": 0.0, |
| "loss": 0.7023, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01834862385321101, |
| "grad_norm": 49571.35546875, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.7006, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.027522935779816515, |
| "grad_norm": 45885.55078125, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.7033, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.03669724770642202, |
| "grad_norm": 43333.73046875, |
| "learning_rate": 1.5e-06, |
| "loss": 0.7014, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.045871559633027525, |
| "grad_norm": 44340.41796875, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.6992, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05504587155963303, |
| "grad_norm": 48282.54296875, |
| "learning_rate": 2.5e-06, |
| "loss": 0.6924, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.06422018348623854, |
| "grad_norm": 48561.56640625, |
| "learning_rate": 3e-06, |
| "loss": 0.6948, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.07339449541284404, |
| "grad_norm": 43580.078125, |
| "learning_rate": 3.5000000000000004e-06, |
| "loss": 0.6916, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.08256880733944955, |
| "grad_norm": 46552.28515625, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.6854, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.09174311926605505, |
| "grad_norm": 49042.15234375, |
| "learning_rate": 4.5e-06, |
| "loss": 0.6822, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10091743119266056, |
| "grad_norm": 97274.8515625, |
| "learning_rate": 5e-06, |
| "loss": 0.6788, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.11009174311926606, |
| "grad_norm": 55310.375, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 0.6781, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.11926605504587157, |
| "grad_norm": 70317.6328125, |
| "learning_rate": 6e-06, |
| "loss": 0.6754, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.12844036697247707, |
| "grad_norm": 46825.9765625, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 0.6626, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.13761467889908258, |
| "grad_norm": 50745.15625, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 0.655, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.14678899082568808, |
| "grad_norm": 51342.453125, |
| "learning_rate": 7.5e-06, |
| "loss": 0.6491, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.1559633027522936, |
| "grad_norm": 51815.421875, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.6464, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1651376146788991, |
| "grad_norm": 54610.65234375, |
| "learning_rate": 8.500000000000002e-06, |
| "loss": 0.6298, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1743119266055046, |
| "grad_norm": 61936.31640625, |
| "learning_rate": 9e-06, |
| "loss": 0.623, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.1834862385321101, |
| "grad_norm": 56021.69921875, |
| "learning_rate": 9.5e-06, |
| "loss": 0.6156, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1926605504587156, |
| "grad_norm": 60806.10546875, |
| "learning_rate": 1e-05, |
| "loss": 0.5981, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.2018348623853211, |
| "grad_norm": 58582.4140625, |
| "learning_rate": 1.05e-05, |
| "loss": 0.5869, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.21100917431192662, |
| "grad_norm": 59099.16015625, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.5732, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.22018348623853212, |
| "grad_norm": 52971.81640625, |
| "learning_rate": 1.1500000000000002e-05, |
| "loss": 0.5587, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.22935779816513763, |
| "grad_norm": 51724.43359375, |
| "learning_rate": 1.2e-05, |
| "loss": 0.5477, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.23853211009174313, |
| "grad_norm": 58133.453125, |
| "learning_rate": 1.25e-05, |
| "loss": 0.5422, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.24770642201834864, |
| "grad_norm": 54134.3359375, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.5319, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.25688073394495414, |
| "grad_norm": 78195.7890625, |
| "learning_rate": 1.3500000000000001e-05, |
| "loss": 0.5259, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.26605504587155965, |
| "grad_norm": 47713.20703125, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.5105, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.27522935779816515, |
| "grad_norm": 45838.0703125, |
| "learning_rate": 1.45e-05, |
| "loss": 0.499, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.28440366972477066, |
| "grad_norm": 46735.58203125, |
| "learning_rate": 1.5e-05, |
| "loss": 0.4914, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.29357798165137616, |
| "grad_norm": 45432.8828125, |
| "learning_rate": 1.55e-05, |
| "loss": 0.4783, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.30275229357798167, |
| "grad_norm": 46758.58984375, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.4758, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.3119266055045872, |
| "grad_norm": 45245.82421875, |
| "learning_rate": 1.65e-05, |
| "loss": 0.4608, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.3211009174311927, |
| "grad_norm": 43336.75390625, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 0.4567, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3302752293577982, |
| "grad_norm": 41445.90625, |
| "learning_rate": 1.75e-05, |
| "loss": 0.4465, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.3394495412844037, |
| "grad_norm": 42278.80078125, |
| "learning_rate": 1.8e-05, |
| "loss": 0.4395, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.3486238532110092, |
| "grad_norm": 42039.28515625, |
| "learning_rate": 1.85e-05, |
| "loss": 0.4289, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3577981651376147, |
| "grad_norm": 41411.61328125, |
| "learning_rate": 1.9e-05, |
| "loss": 0.423, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 40133.84375, |
| "learning_rate": 1.9500000000000003e-05, |
| "loss": 0.4191, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3761467889908257, |
| "grad_norm": 39758.8828125, |
| "learning_rate": 2e-05, |
| "loss": 0.4071, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.3853211009174312, |
| "grad_norm": 38760.36328125, |
| "learning_rate": 2.05e-05, |
| "loss": 0.3996, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.3944954128440367, |
| "grad_norm": 38552.80078125, |
| "learning_rate": 2.1e-05, |
| "loss": 0.3918, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.4036697247706422, |
| "grad_norm": 38561.7578125, |
| "learning_rate": 2.15e-05, |
| "loss": 0.3865, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.41284403669724773, |
| "grad_norm": 37616.85546875, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.3794, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.42201834862385323, |
| "grad_norm": 37784.78515625, |
| "learning_rate": 2.25e-05, |
| "loss": 0.3745, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.43119266055045874, |
| "grad_norm": 36332.9140625, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 0.3711, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.44036697247706424, |
| "grad_norm": 38899.73046875, |
| "learning_rate": 2.35e-05, |
| "loss": 0.3643, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.44954128440366975, |
| "grad_norm": 36174.9765625, |
| "learning_rate": 2.4e-05, |
| "loss": 0.3575, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "grad_norm": 35699.83203125, |
| "learning_rate": 2.45e-05, |
| "loss": 0.347, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.46788990825688076, |
| "grad_norm": 35562.3671875, |
| "learning_rate": 2.5e-05, |
| "loss": 0.3438, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.47706422018348627, |
| "grad_norm": 35428.0625, |
| "learning_rate": 2.5500000000000003e-05, |
| "loss": 0.3325, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.48623853211009177, |
| "grad_norm": 34396.80078125, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.3302, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.4954128440366973, |
| "grad_norm": 38376.41796875, |
| "learning_rate": 2.6500000000000004e-05, |
| "loss": 0.3218, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.5045871559633027, |
| "grad_norm": 33996.23046875, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.3131, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.5137614678899083, |
| "grad_norm": 33070.55859375, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.3129, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.5229357798165137, |
| "grad_norm": 33200.44140625, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.3003, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.5321100917431193, |
| "grad_norm": 33099.1015625, |
| "learning_rate": 2.8499999999999998e-05, |
| "loss": 0.2904, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.5412844036697247, |
| "grad_norm": 31352.298828125, |
| "learning_rate": 2.9e-05, |
| "loss": 0.2911, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.5504587155963303, |
| "grad_norm": 31965.087890625, |
| "learning_rate": 2.95e-05, |
| "loss": 0.2815, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5596330275229358, |
| "grad_norm": 30810.849609375, |
| "learning_rate": 3e-05, |
| "loss": 0.2818, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.5688073394495413, |
| "grad_norm": 30034.779296875, |
| "learning_rate": 3.05e-05, |
| "loss": 0.2731, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.5779816513761468, |
| "grad_norm": 30009.12109375, |
| "learning_rate": 3.1e-05, |
| "loss": 0.266, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.5871559633027523, |
| "grad_norm": 29305.173828125, |
| "learning_rate": 3.15e-05, |
| "loss": 0.2609, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.5963302752293578, |
| "grad_norm": 29081.853515625, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.2543, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.6055045871559633, |
| "grad_norm": 28217.021484375, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.252, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.6146788990825688, |
| "grad_norm": 26448.12890625, |
| "learning_rate": 3.3e-05, |
| "loss": 0.2565, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.6238532110091743, |
| "grad_norm": 27198.80859375, |
| "learning_rate": 3.35e-05, |
| "loss": 0.2342, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.6330275229357798, |
| "grad_norm": 26946.30859375, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.2273, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.6422018348623854, |
| "grad_norm": 26236.7265625, |
| "learning_rate": 3.45e-05, |
| "loss": 0.2219, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6513761467889908, |
| "grad_norm": 25535.818359375, |
| "learning_rate": 3.5e-05, |
| "loss": 0.2197, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.6605504587155964, |
| "grad_norm": 24871.8515625, |
| "learning_rate": 3.55e-05, |
| "loss": 0.2144, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.6697247706422018, |
| "grad_norm": 24484.55078125, |
| "learning_rate": 3.6e-05, |
| "loss": 0.2066, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.6788990825688074, |
| "grad_norm": 23655.677734375, |
| "learning_rate": 3.65e-05, |
| "loss": 0.206, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6880733944954128, |
| "grad_norm": 23129.076171875, |
| "learning_rate": 3.7e-05, |
| "loss": 0.1979, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6972477064220184, |
| "grad_norm": 21871.90625, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.1973, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.7064220183486238, |
| "grad_norm": 21561.58984375, |
| "learning_rate": 3.8e-05, |
| "loss": 0.1935, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.7155963302752294, |
| "grad_norm": 22210.29296875, |
| "learning_rate": 3.85e-05, |
| "loss": 0.1848, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.7247706422018348, |
| "grad_norm": 20555.775390625, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.1801, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 20406.75, |
| "learning_rate": 3.9500000000000005e-05, |
| "loss": 0.1746, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7431192660550459, |
| "grad_norm": 19150.931640625, |
| "learning_rate": 4e-05, |
| "loss": 0.1688, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.7522935779816514, |
| "grad_norm": 18422.65625, |
| "learning_rate": 4.05e-05, |
| "loss": 0.1661, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.7614678899082569, |
| "grad_norm": 16746.20703125, |
| "learning_rate": 4.1e-05, |
| "loss": 0.1711, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.7706422018348624, |
| "grad_norm": 18229.41015625, |
| "learning_rate": 4.15e-05, |
| "loss": 0.1646, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.7798165137614679, |
| "grad_norm": 17156.267578125, |
| "learning_rate": 4.2e-05, |
| "loss": 0.1588, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7889908256880734, |
| "grad_norm": 15970.611328125, |
| "learning_rate": 4.25e-05, |
| "loss": 0.1547, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.7981651376146789, |
| "grad_norm": 16997.103515625, |
| "learning_rate": 4.3e-05, |
| "loss": 0.1493, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.8073394495412844, |
| "grad_norm": 15529.58984375, |
| "learning_rate": 4.35e-05, |
| "loss": 0.1439, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.8165137614678899, |
| "grad_norm": 15099.9052734375, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.1432, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.8256880733944955, |
| "grad_norm": 14261.3232421875, |
| "learning_rate": 4.4500000000000004e-05, |
| "loss": 0.1396, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8348623853211009, |
| "grad_norm": 13915.87890625, |
| "learning_rate": 4.5e-05, |
| "loss": 0.1317, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.8440366972477065, |
| "grad_norm": 13518.7568359375, |
| "learning_rate": 4.55e-05, |
| "loss": 0.1289, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.8532110091743119, |
| "grad_norm": 12170.1865234375, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.1371, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.8623853211009175, |
| "grad_norm": 11745.9775390625, |
| "learning_rate": 4.6500000000000005e-05, |
| "loss": 0.1434, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.8715596330275229, |
| "grad_norm": 12265.5556640625, |
| "learning_rate": 4.7e-05, |
| "loss": 0.1343, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.8807339449541285, |
| "grad_norm": 11386.3291015625, |
| "learning_rate": 4.75e-05, |
| "loss": 0.1317, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.8899082568807339, |
| "grad_norm": 10869.4599609375, |
| "learning_rate": 4.8e-05, |
| "loss": 0.1295, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.8990825688073395, |
| "grad_norm": 10752.25, |
| "learning_rate": 4.85e-05, |
| "loss": 0.124, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.908256880733945, |
| "grad_norm": 9687.6376953125, |
| "learning_rate": 4.9e-05, |
| "loss": 0.1271, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "grad_norm": 9697.15234375, |
| "learning_rate": 4.9500000000000004e-05, |
| "loss": 0.1194, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.926605504587156, |
| "grad_norm": 9423.21875, |
| "learning_rate": 5e-05, |
| "loss": 0.1176, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.9357798165137615, |
| "grad_norm": 10253.8564453125, |
| "learning_rate": 4.994949494949495e-05, |
| "loss": 0.121, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.944954128440367, |
| "grad_norm": 9756.7646484375, |
| "learning_rate": 4.98989898989899e-05, |
| "loss": 0.1173, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.9541284403669725, |
| "grad_norm": 8482.177734375, |
| "learning_rate": 4.984848484848485e-05, |
| "loss": 0.1233, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.963302752293578, |
| "grad_norm": 7755.4892578125, |
| "learning_rate": 4.97979797979798e-05, |
| "loss": 0.1251, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9724770642201835, |
| "grad_norm": 8553.6689453125, |
| "learning_rate": 4.974747474747475e-05, |
| "loss": 0.1287, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.981651376146789, |
| "grad_norm": 8794.58984375, |
| "learning_rate": 4.9696969696969694e-05, |
| "loss": 0.1178, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.9908256880733946, |
| "grad_norm": 8048.97509765625, |
| "learning_rate": 4.964646464646465e-05, |
| "loss": 0.1112, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 24349.96484375, |
| "learning_rate": 4.9595959595959594e-05, |
| "loss": 0.112, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1_macro": 0.0, |
| "eval_f1_micro": 0.0, |
| "eval_loss": 0.1133684441447258, |
| "eval_precision": 0.0, |
| "eval_recall": 0.0, |
| "eval_runtime": 0.8578, |
| "eval_samples_per_second": 336.892, |
| "eval_steps_per_second": 15.154, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.0091743119266054, |
| "grad_norm": 7603.02587890625, |
| "learning_rate": 4.9545454545454553e-05, |
| "loss": 0.1133, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.018348623853211, |
| "grad_norm": 8602.8974609375, |
| "learning_rate": 4.94949494949495e-05, |
| "loss": 0.1192, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.0275229357798166, |
| "grad_norm": 8388.9609375, |
| "learning_rate": 4.9444444444444446e-05, |
| "loss": 0.1246, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.036697247706422, |
| "grad_norm": 9368.150390625, |
| "learning_rate": 4.93939393939394e-05, |
| "loss": 0.1089, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.0458715596330275, |
| "grad_norm": 7053.083984375, |
| "learning_rate": 4.9343434343434346e-05, |
| "loss": 0.1042, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.0550458715596331, |
| "grad_norm": 9539.36328125, |
| "learning_rate": 4.92929292929293e-05, |
| "loss": 0.1049, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.0642201834862386, |
| "grad_norm": 6956.2763671875, |
| "learning_rate": 4.9242424242424245e-05, |
| "loss": 0.109, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.073394495412844, |
| "grad_norm": 6597.9580078125, |
| "learning_rate": 4.919191919191919e-05, |
| "loss": 0.1124, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.0825688073394495, |
| "grad_norm": 8124.76123046875, |
| "learning_rate": 4.9141414141414145e-05, |
| "loss": 0.1226, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.091743119266055, |
| "grad_norm": 7030.619140625, |
| "learning_rate": 4.909090909090909e-05, |
| "loss": 0.0989, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.1009174311926606, |
| "grad_norm": 9377.66796875, |
| "learning_rate": 4.9040404040404044e-05, |
| "loss": 0.1038, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.110091743119266, |
| "grad_norm": 9298.802734375, |
| "learning_rate": 4.898989898989899e-05, |
| "loss": 0.1216, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.1192660550458715, |
| "grad_norm": 7350.28369140625, |
| "learning_rate": 4.8939393939393944e-05, |
| "loss": 0.0915, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.1284403669724772, |
| "grad_norm": 8066.943359375, |
| "learning_rate": 4.888888888888889e-05, |
| "loss": 0.1291, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.1376146788990826, |
| "grad_norm": 8210.3095703125, |
| "learning_rate": 4.8838383838383836e-05, |
| "loss": 0.1196, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.146788990825688, |
| "grad_norm": 7568.0234375, |
| "learning_rate": 4.878787878787879e-05, |
| "loss": 0.1037, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.1559633027522935, |
| "grad_norm": 6394.8896484375, |
| "learning_rate": 4.8737373737373736e-05, |
| "loss": 0.0961, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.165137614678899, |
| "grad_norm": 7549.9951171875, |
| "learning_rate": 4.868686868686869e-05, |
| "loss": 0.1075, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.1743119266055047, |
| "grad_norm": 7983.36865234375, |
| "learning_rate": 4.863636363636364e-05, |
| "loss": 0.1055, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.18348623853211, |
| "grad_norm": 7613.1455078125, |
| "learning_rate": 4.858585858585859e-05, |
| "loss": 0.1097, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.1926605504587156, |
| "grad_norm": 7754.9228515625, |
| "learning_rate": 4.853535353535354e-05, |
| "loss": 0.1157, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.2018348623853212, |
| "grad_norm": 8360.388671875, |
| "learning_rate": 4.848484848484849e-05, |
| "loss": 0.1019, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.2110091743119267, |
| "grad_norm": 8300.9169921875, |
| "learning_rate": 4.843434343434344e-05, |
| "loss": 0.1098, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.2201834862385321, |
| "grad_norm": 7554.8017578125, |
| "learning_rate": 4.838383838383839e-05, |
| "loss": 0.1075, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.2293577981651376, |
| "grad_norm": 6510.427734375, |
| "learning_rate": 4.8333333333333334e-05, |
| "loss": 0.1041, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.238532110091743, |
| "grad_norm": 5649.77978515625, |
| "learning_rate": 4.828282828282829e-05, |
| "loss": 0.1012, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.2477064220183487, |
| "grad_norm": 7662.76513671875, |
| "learning_rate": 4.823232323232323e-05, |
| "loss": 0.1357, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.2568807339449541, |
| "grad_norm": 7261.087890625, |
| "learning_rate": 4.8181818181818186e-05, |
| "loss": 0.1149, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.2660550458715596, |
| "grad_norm": 6818.208984375, |
| "learning_rate": 4.813131313131313e-05, |
| "loss": 0.1067, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.2752293577981653, |
| "grad_norm": 8839.0810546875, |
| "learning_rate": 4.808080808080808e-05, |
| "loss": 0.089, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.2844036697247707, |
| "grad_norm": 8774.0615234375, |
| "learning_rate": 4.803030303030303e-05, |
| "loss": 0.1013, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.2935779816513762, |
| "grad_norm": 7869.935546875, |
| "learning_rate": 4.797979797979798e-05, |
| "loss": 0.1187, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.3027522935779816, |
| "grad_norm": 9808.5986328125, |
| "learning_rate": 4.792929292929293e-05, |
| "loss": 0.13, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.311926605504587, |
| "grad_norm": 7122.400390625, |
| "learning_rate": 4.787878787878788e-05, |
| "loss": 0.1089, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.3211009174311927, |
| "grad_norm": 6575.39892578125, |
| "learning_rate": 4.782828282828283e-05, |
| "loss": 0.1011, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.3302752293577982, |
| "grad_norm": 8656.4345703125, |
| "learning_rate": 4.7777777777777784e-05, |
| "loss": 0.0998, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.3394495412844036, |
| "grad_norm": 7247.53466796875, |
| "learning_rate": 4.772727272727273e-05, |
| "loss": 0.1255, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.3486238532110093, |
| "grad_norm": 7125.67822265625, |
| "learning_rate": 4.7676767676767684e-05, |
| "loss": 0.1123, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.3577981651376148, |
| "grad_norm": 7758.84375, |
| "learning_rate": 4.762626262626263e-05, |
| "loss": 0.1138, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.3669724770642202, |
| "grad_norm": 6958.53173828125, |
| "learning_rate": 4.7575757575757576e-05, |
| "loss": 0.0968, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.3761467889908257, |
| "grad_norm": 7246.337890625, |
| "learning_rate": 4.752525252525253e-05, |
| "loss": 0.0924, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.385321100917431, |
| "grad_norm": 7635.5615234375, |
| "learning_rate": 4.7474747474747476e-05, |
| "loss": 0.1152, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.3944954128440368, |
| "grad_norm": 7913.57080078125, |
| "learning_rate": 4.742424242424243e-05, |
| "loss": 0.0951, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.4036697247706422, |
| "grad_norm": 8531.388671875, |
| "learning_rate": 4.7373737373737375e-05, |
| "loss": 0.1161, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.4128440366972477, |
| "grad_norm": 9724.3291015625, |
| "learning_rate": 4.732323232323232e-05, |
| "loss": 0.1253, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.4220183486238533, |
| "grad_norm": 6726.74365234375, |
| "learning_rate": 4.7272727272727275e-05, |
| "loss": 0.1037, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.4311926605504588, |
| "grad_norm": 6435.69970703125, |
| "learning_rate": 4.722222222222222e-05, |
| "loss": 0.1036, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.4403669724770642, |
| "grad_norm": 6851.59814453125, |
| "learning_rate": 4.7171717171717174e-05, |
| "loss": 0.0954, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.4495412844036697, |
| "grad_norm": 7785.31640625, |
| "learning_rate": 4.712121212121212e-05, |
| "loss": 0.0996, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.4587155963302751, |
| "grad_norm": 6317.5576171875, |
| "learning_rate": 4.7070707070707074e-05, |
| "loss": 0.0977, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.4678899082568808, |
| "grad_norm": 8045.7197265625, |
| "learning_rate": 4.702020202020202e-05, |
| "loss": 0.1101, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.4770642201834863, |
| "grad_norm": 7386.67529296875, |
| "learning_rate": 4.696969696969697e-05, |
| "loss": 0.1137, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.4862385321100917, |
| "grad_norm": 8017.5791015625, |
| "learning_rate": 4.6919191919191926e-05, |
| "loss": 0.0959, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.4954128440366974, |
| "grad_norm": 7198.42138671875, |
| "learning_rate": 4.686868686868687e-05, |
| "loss": 0.1005, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.5045871559633026, |
| "grad_norm": 9139.7900390625, |
| "learning_rate": 4.681818181818182e-05, |
| "loss": 0.0918, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.5137614678899083, |
| "grad_norm": 6384.1640625, |
| "learning_rate": 4.676767676767677e-05, |
| "loss": 0.0948, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.5229357798165137, |
| "grad_norm": 5053.224609375, |
| "learning_rate": 4.671717171717172e-05, |
| "loss": 0.1056, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.5321100917431192, |
| "grad_norm": 9213.654296875, |
| "learning_rate": 4.666666666666667e-05, |
| "loss": 0.0931, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.5412844036697249, |
| "grad_norm": 7414.76171875, |
| "learning_rate": 4.661616161616162e-05, |
| "loss": 0.1123, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.5504587155963303, |
| "grad_norm": 6406.48583984375, |
| "learning_rate": 4.656565656565657e-05, |
| "loss": 0.0992, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.5596330275229358, |
| "grad_norm": 7213.58837890625, |
| "learning_rate": 4.651515151515152e-05, |
| "loss": 0.0952, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.5688073394495414, |
| "grad_norm": 7537.673828125, |
| "learning_rate": 4.6464646464646464e-05, |
| "loss": 0.0961, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.5779816513761467, |
| "grad_norm": 7876.294921875, |
| "learning_rate": 4.641414141414142e-05, |
| "loss": 0.1308, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.5871559633027523, |
| "grad_norm": 9550.125, |
| "learning_rate": 4.636363636363636e-05, |
| "loss": 0.0941, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.5963302752293578, |
| "grad_norm": 6364.3330078125, |
| "learning_rate": 4.6313131313131316e-05, |
| "loss": 0.0976, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.6055045871559632, |
| "grad_norm": 6976.2138671875, |
| "learning_rate": 4.626262626262626e-05, |
| "loss": 0.091, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.614678899082569, |
| "grad_norm": 7259.40234375, |
| "learning_rate": 4.621212121212121e-05, |
| "loss": 0.1096, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.6238532110091743, |
| "grad_norm": 8176.20849609375, |
| "learning_rate": 4.616161616161616e-05, |
| "loss": 0.1029, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.6330275229357798, |
| "grad_norm": 5628.04345703125, |
| "learning_rate": 4.6111111111111115e-05, |
| "loss": 0.1008, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.6422018348623855, |
| "grad_norm": 6802.91064453125, |
| "learning_rate": 4.606060606060607e-05, |
| "loss": 0.1014, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.6513761467889907, |
| "grad_norm": 14422.4482421875, |
| "learning_rate": 4.6010101010101015e-05, |
| "loss": 0.0912, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.6605504587155964, |
| "grad_norm": 8855.7744140625, |
| "learning_rate": 4.595959595959596e-05, |
| "loss": 0.0983, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.6697247706422018, |
| "grad_norm": 7411.93603515625, |
| "learning_rate": 4.5909090909090914e-05, |
| "loss": 0.1099, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.6788990825688073, |
| "grad_norm": 8138.41748046875, |
| "learning_rate": 4.585858585858586e-05, |
| "loss": 0.1054, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.688073394495413, |
| "grad_norm": 6157.80908203125, |
| "learning_rate": 4.5808080808080814e-05, |
| "loss": 0.1054, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.6972477064220184, |
| "grad_norm": 6791.07421875, |
| "learning_rate": 4.575757575757576e-05, |
| "loss": 0.1022, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.7064220183486238, |
| "grad_norm": 9991.0537109375, |
| "learning_rate": 4.5707070707070706e-05, |
| "loss": 0.1131, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.7155963302752295, |
| "grad_norm": 7240.28955078125, |
| "learning_rate": 4.565656565656566e-05, |
| "loss": 0.1005, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.7247706422018347, |
| "grad_norm": 6442.6259765625, |
| "learning_rate": 4.5606060606060606e-05, |
| "loss": 0.1015, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.7339449541284404, |
| "grad_norm": 8573.3955078125, |
| "learning_rate": 4.555555555555556e-05, |
| "loss": 0.1176, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.7431192660550459, |
| "grad_norm": 7330.88525390625, |
| "learning_rate": 4.5505050505050505e-05, |
| "loss": 0.0923, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.7522935779816513, |
| "grad_norm": 7870.916015625, |
| "learning_rate": 4.545454545454546e-05, |
| "loss": 0.1113, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.761467889908257, |
| "grad_norm": 7789.8564453125, |
| "learning_rate": 4.5404040404040405e-05, |
| "loss": 0.1062, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.7706422018348624, |
| "grad_norm": 7983.59326171875, |
| "learning_rate": 4.535353535353535e-05, |
| "loss": 0.1078, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.7798165137614679, |
| "grad_norm": 6533.43994140625, |
| "learning_rate": 4.5303030303030304e-05, |
| "loss": 0.1033, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.7889908256880735, |
| "grad_norm": 7317.1318359375, |
| "learning_rate": 4.525252525252526e-05, |
| "loss": 0.1106, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.7981651376146788, |
| "grad_norm": 7829.7880859375, |
| "learning_rate": 4.5202020202020204e-05, |
| "loss": 0.1163, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.8073394495412844, |
| "grad_norm": 7524.41357421875, |
| "learning_rate": 4.515151515151516e-05, |
| "loss": 0.1043, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.81651376146789, |
| "grad_norm": 7487.89892578125, |
| "learning_rate": 4.51010101010101e-05, |
| "loss": 0.1091, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.8256880733944953, |
| "grad_norm": 6964.3154296875, |
| "learning_rate": 4.5050505050505056e-05, |
| "loss": 0.0986, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.834862385321101, |
| "grad_norm": 7087.05029296875, |
| "learning_rate": 4.5e-05, |
| "loss": 0.0997, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.8440366972477065, |
| "grad_norm": 7148.7578125, |
| "learning_rate": 4.494949494949495e-05, |
| "loss": 0.0989, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.853211009174312, |
| "grad_norm": 7850.2470703125, |
| "learning_rate": 4.48989898989899e-05, |
| "loss": 0.0957, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.8623853211009176, |
| "grad_norm": 7752.841796875, |
| "learning_rate": 4.484848484848485e-05, |
| "loss": 0.1292, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.8715596330275228, |
| "grad_norm": 6098.35595703125, |
| "learning_rate": 4.47979797979798e-05, |
| "loss": 0.1001, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.8807339449541285, |
| "grad_norm": 7094.8173828125, |
| "learning_rate": 4.474747474747475e-05, |
| "loss": 0.1286, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.889908256880734, |
| "grad_norm": 7456.2265625, |
| "learning_rate": 4.46969696969697e-05, |
| "loss": 0.1256, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.8990825688073394, |
| "grad_norm": 8136.77197265625, |
| "learning_rate": 4.464646464646465e-05, |
| "loss": 0.1014, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.908256880733945, |
| "grad_norm": 7520.1650390625, |
| "learning_rate": 4.4595959595959594e-05, |
| "loss": 0.0919, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.9174311926605505, |
| "grad_norm": 8064.79296875, |
| "learning_rate": 4.454545454545455e-05, |
| "loss": 0.0985, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.926605504587156, |
| "grad_norm": 6153.19677734375, |
| "learning_rate": 4.4494949494949493e-05, |
| "loss": 0.1101, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.9357798165137616, |
| "grad_norm": 8089.42431640625, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.0988, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.9449541284403669, |
| "grad_norm": 8096.140625, |
| "learning_rate": 4.43939393939394e-05, |
| "loss": 0.0992, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.9541284403669725, |
| "grad_norm": 7865.43798828125, |
| "learning_rate": 4.4343434343434346e-05, |
| "loss": 0.0945, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.963302752293578, |
| "grad_norm": 6380.67724609375, |
| "learning_rate": 4.42929292929293e-05, |
| "loss": 0.1073, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.9724770642201834, |
| "grad_norm": 8548.4345703125, |
| "learning_rate": 4.4242424242424246e-05, |
| "loss": 0.1121, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.981651376146789, |
| "grad_norm": 8465.8466796875, |
| "learning_rate": 4.41919191919192e-05, |
| "loss": 0.1078, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.9908256880733946, |
| "grad_norm": 8595.419921875, |
| "learning_rate": 4.4141414141414145e-05, |
| "loss": 0.0924, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 22229.833984375, |
| "learning_rate": 4.409090909090909e-05, |
| "loss": 0.0785, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1_macro": 0.0, |
| "eval_f1_micro": 0.0, |
| "eval_loss": 0.10520372539758682, |
| "eval_precision": 0.0, |
| "eval_recall": 0.0, |
| "eval_runtime": 0.8127, |
| "eval_samples_per_second": 355.611, |
| "eval_steps_per_second": 15.996, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.0091743119266057, |
| "grad_norm": 8562.740234375, |
| "learning_rate": 4.4040404040404044e-05, |
| "loss": 0.1152, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.018348623853211, |
| "grad_norm": 7770.8720703125, |
| "learning_rate": 4.398989898989899e-05, |
| "loss": 0.0957, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.0275229357798166, |
| "grad_norm": 6170.166015625, |
| "learning_rate": 4.3939393939393944e-05, |
| "loss": 0.1027, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.036697247706422, |
| "grad_norm": 10314.91796875, |
| "learning_rate": 4.388888888888889e-05, |
| "loss": 0.1166, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.0458715596330275, |
| "grad_norm": 7144.62109375, |
| "learning_rate": 4.383838383838384e-05, |
| "loss": 0.1065, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.055045871559633, |
| "grad_norm": 7398.2109375, |
| "learning_rate": 4.378787878787879e-05, |
| "loss": 0.1165, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.0642201834862384, |
| "grad_norm": 7307.375, |
| "learning_rate": 4.3737373737373736e-05, |
| "loss": 0.0941, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.073394495412844, |
| "grad_norm": 8046.33837890625, |
| "learning_rate": 4.368686868686869e-05, |
| "loss": 0.1092, |
| "step": 226 |
| }, |
| { |
| "epoch": 2.0825688073394497, |
| "grad_norm": 8320.74609375, |
| "learning_rate": 4.3636363636363636e-05, |
| "loss": 0.0983, |
| "step": 227 |
| }, |
| { |
| "epoch": 2.091743119266055, |
| "grad_norm": 10485.9384765625, |
| "learning_rate": 4.358585858585859e-05, |
| "loss": 0.0981, |
| "step": 228 |
| }, |
| { |
| "epoch": 2.1009174311926606, |
| "grad_norm": 7908.02734375, |
| "learning_rate": 4.3535353535353535e-05, |
| "loss": 0.1021, |
| "step": 229 |
| }, |
| { |
| "epoch": 2.1100917431192663, |
| "grad_norm": 7627.345703125, |
| "learning_rate": 4.348484848484849e-05, |
| "loss": 0.106, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.1192660550458715, |
| "grad_norm": 7474.5263671875, |
| "learning_rate": 4.343434343434344e-05, |
| "loss": 0.1156, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.128440366972477, |
| "grad_norm": 6541.48828125, |
| "learning_rate": 4.338383838383839e-05, |
| "loss": 0.0974, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.1376146788990824, |
| "grad_norm": 10176.2197265625, |
| "learning_rate": 4.3333333333333334e-05, |
| "loss": 0.1172, |
| "step": 233 |
| }, |
| { |
| "epoch": 2.146788990825688, |
| "grad_norm": 10230.0478515625, |
| "learning_rate": 4.328282828282829e-05, |
| "loss": 0.1089, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.1559633027522938, |
| "grad_norm": 7409.98583984375, |
| "learning_rate": 4.3232323232323234e-05, |
| "loss": 0.095, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.165137614678899, |
| "grad_norm": 9656.2216796875, |
| "learning_rate": 4.318181818181819e-05, |
| "loss": 0.0862, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.1743119266055047, |
| "grad_norm": 10004.51171875, |
| "learning_rate": 4.313131313131313e-05, |
| "loss": 0.1019, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.18348623853211, |
| "grad_norm": 10041.599609375, |
| "learning_rate": 4.308080808080808e-05, |
| "loss": 0.0828, |
| "step": 238 |
| }, |
| { |
| "epoch": 2.1926605504587156, |
| "grad_norm": 10838.517578125, |
| "learning_rate": 4.303030303030303e-05, |
| "loss": 0.1093, |
| "step": 239 |
| }, |
| { |
| "epoch": 2.2018348623853212, |
| "grad_norm": 7099.97265625, |
| "learning_rate": 4.297979797979798e-05, |
| "loss": 0.0962, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.2110091743119265, |
| "grad_norm": 8438.68359375, |
| "learning_rate": 4.292929292929293e-05, |
| "loss": 0.1095, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.220183486238532, |
| "grad_norm": 7405.43359375, |
| "learning_rate": 4.287878787878788e-05, |
| "loss": 0.1035, |
| "step": 242 |
| }, |
| { |
| "epoch": 2.229357798165138, |
| "grad_norm": 9133.9833984375, |
| "learning_rate": 4.282828282828283e-05, |
| "loss": 0.0925, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.238532110091743, |
| "grad_norm": 7998.142578125, |
| "learning_rate": 4.277777777777778e-05, |
| "loss": 0.0998, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.2477064220183487, |
| "grad_norm": 9984.765625, |
| "learning_rate": 4.2727272727272724e-05, |
| "loss": 0.1027, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.2568807339449544, |
| "grad_norm": 8999.0556640625, |
| "learning_rate": 4.267676767676768e-05, |
| "loss": 0.1177, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.2660550458715596, |
| "grad_norm": 7668.86962890625, |
| "learning_rate": 4.262626262626263e-05, |
| "loss": 0.0946, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.2752293577981653, |
| "grad_norm": 7349.47998046875, |
| "learning_rate": 4.257575757575758e-05, |
| "loss": 0.1066, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.2844036697247705, |
| "grad_norm": 8360.69921875, |
| "learning_rate": 4.252525252525253e-05, |
| "loss": 0.1062, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.293577981651376, |
| "grad_norm": 8717.70703125, |
| "learning_rate": 4.2474747474747476e-05, |
| "loss": 0.0907, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.302752293577982, |
| "grad_norm": 8706.5791015625, |
| "learning_rate": 4.242424242424243e-05, |
| "loss": 0.0973, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.311926605504587, |
| "grad_norm": 8500.1220703125, |
| "learning_rate": 4.2373737373737376e-05, |
| "loss": 0.1022, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.3211009174311927, |
| "grad_norm": 10551.7021484375, |
| "learning_rate": 4.232323232323233e-05, |
| "loss": 0.0979, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.330275229357798, |
| "grad_norm": 8305.2802734375, |
| "learning_rate": 4.2272727272727275e-05, |
| "loss": 0.0926, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.3394495412844036, |
| "grad_norm": 10128.9423828125, |
| "learning_rate": 4.222222222222222e-05, |
| "loss": 0.1018, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.3486238532110093, |
| "grad_norm": 9000.6689453125, |
| "learning_rate": 4.2171717171717175e-05, |
| "loss": 0.091, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.3577981651376145, |
| "grad_norm": 8849.099609375, |
| "learning_rate": 4.212121212121212e-05, |
| "loss": 0.0993, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.36697247706422, |
| "grad_norm": 6667.78564453125, |
| "learning_rate": 4.2070707070707074e-05, |
| "loss": 0.1044, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.376146788990826, |
| "grad_norm": 7288.4970703125, |
| "learning_rate": 4.202020202020202e-05, |
| "loss": 0.0966, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.385321100917431, |
| "grad_norm": 7155.8310546875, |
| "learning_rate": 4.196969696969697e-05, |
| "loss": 0.1018, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.3944954128440368, |
| "grad_norm": 6328.56396484375, |
| "learning_rate": 4.191919191919192e-05, |
| "loss": 0.0862, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.4036697247706424, |
| "grad_norm": 8594.3603515625, |
| "learning_rate": 4.1868686868686866e-05, |
| "loss": 0.1028, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.4128440366972477, |
| "grad_norm": 8077.544921875, |
| "learning_rate": 4.181818181818182e-05, |
| "loss": 0.1044, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.4220183486238533, |
| "grad_norm": 6332.14453125, |
| "learning_rate": 4.176767676767677e-05, |
| "loss": 0.0902, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.4311926605504586, |
| "grad_norm": 7677.5009765625, |
| "learning_rate": 4.171717171717172e-05, |
| "loss": 0.098, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.4403669724770642, |
| "grad_norm": 7953.89501953125, |
| "learning_rate": 4.166666666666667e-05, |
| "loss": 0.0809, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.44954128440367, |
| "grad_norm": 6055.19287109375, |
| "learning_rate": 4.161616161616162e-05, |
| "loss": 0.0814, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.458715596330275, |
| "grad_norm": 6427.98046875, |
| "learning_rate": 4.156565656565657e-05, |
| "loss": 0.0994, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.467889908256881, |
| "grad_norm": 8880.2197265625, |
| "learning_rate": 4.151515151515152e-05, |
| "loss": 0.0804, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.477064220183486, |
| "grad_norm": 7500.67578125, |
| "learning_rate": 4.1464646464646464e-05, |
| "loss": 0.0997, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.4862385321100917, |
| "grad_norm": 7541.759765625, |
| "learning_rate": 4.141414141414142e-05, |
| "loss": 0.0901, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.4954128440366974, |
| "grad_norm": 6423.1025390625, |
| "learning_rate": 4.1363636363636364e-05, |
| "loss": 0.089, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.5045871559633026, |
| "grad_norm": 6384.140625, |
| "learning_rate": 4.131313131313132e-05, |
| "loss": 0.0848, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.5137614678899083, |
| "grad_norm": 6418.369140625, |
| "learning_rate": 4.126262626262626e-05, |
| "loss": 0.0845, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.522935779816514, |
| "grad_norm": 6441.12060546875, |
| "learning_rate": 4.1212121212121216e-05, |
| "loss": 0.0855, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.532110091743119, |
| "grad_norm": 6891.9501953125, |
| "learning_rate": 4.116161616161616e-05, |
| "loss": 0.0798, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.541284403669725, |
| "grad_norm": 8036.39306640625, |
| "learning_rate": 4.111111111111111e-05, |
| "loss": 0.1083, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.5504587155963305, |
| "grad_norm": 7460.015625, |
| "learning_rate": 4.106060606060606e-05, |
| "loss": 0.0934, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.5596330275229358, |
| "grad_norm": 7343.33154296875, |
| "learning_rate": 4.101010101010101e-05, |
| "loss": 0.0951, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.5688073394495414, |
| "grad_norm": 9859.599609375, |
| "learning_rate": 4.095959595959596e-05, |
| "loss": 0.0849, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.5779816513761467, |
| "grad_norm": 8394.8212890625, |
| "learning_rate": 4.0909090909090915e-05, |
| "loss": 0.079, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.5871559633027523, |
| "grad_norm": 8307.279296875, |
| "learning_rate": 4.085858585858586e-05, |
| "loss": 0.0985, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.5963302752293576, |
| "grad_norm": 7505.14794921875, |
| "learning_rate": 4.0808080808080814e-05, |
| "loss": 0.0932, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.6055045871559632, |
| "grad_norm": 6129.16552734375, |
| "learning_rate": 4.075757575757576e-05, |
| "loss": 0.0797, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.614678899082569, |
| "grad_norm": 6896.5908203125, |
| "learning_rate": 4.070707070707071e-05, |
| "loss": 0.0891, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.623853211009174, |
| "grad_norm": 8686.771484375, |
| "learning_rate": 4.065656565656566e-05, |
| "loss": 0.0838, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.63302752293578, |
| "grad_norm": 6954.4072265625, |
| "learning_rate": 4.0606060606060606e-05, |
| "loss": 0.0976, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.6422018348623855, |
| "grad_norm": 8336.8720703125, |
| "learning_rate": 4.055555555555556e-05, |
| "loss": 0.1339, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.6513761467889907, |
| "grad_norm": 6729.21044921875, |
| "learning_rate": 4.0505050505050506e-05, |
| "loss": 0.0897, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.6605504587155964, |
| "grad_norm": 8650.7490234375, |
| "learning_rate": 4.045454545454546e-05, |
| "loss": 0.1034, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.669724770642202, |
| "grad_norm": 8123.34228515625, |
| "learning_rate": 4.0404040404040405e-05, |
| "loss": 0.0752, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.6788990825688073, |
| "grad_norm": 7077.0859375, |
| "learning_rate": 4.035353535353535e-05, |
| "loss": 0.0889, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.688073394495413, |
| "grad_norm": 8160.107421875, |
| "learning_rate": 4.0303030303030305e-05, |
| "loss": 0.1067, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.6972477064220186, |
| "grad_norm": 11080.52734375, |
| "learning_rate": 4.025252525252525e-05, |
| "loss": 0.0984, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.706422018348624, |
| "grad_norm": 8655.8173828125, |
| "learning_rate": 4.0202020202020204e-05, |
| "loss": 0.0977, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.7155963302752295, |
| "grad_norm": 9417.9384765625, |
| "learning_rate": 4.015151515151515e-05, |
| "loss": 0.0863, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.7247706422018347, |
| "grad_norm": 7852.45361328125, |
| "learning_rate": 4.01010101010101e-05, |
| "loss": 0.0865, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.7339449541284404, |
| "grad_norm": 8051.74609375, |
| "learning_rate": 4.005050505050506e-05, |
| "loss": 0.11, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.7431192660550456, |
| "grad_norm": 7042.64013671875, |
| "learning_rate": 4e-05, |
| "loss": 0.0805, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.7522935779816513, |
| "grad_norm": 7486.0947265625, |
| "learning_rate": 3.9949494949494956e-05, |
| "loss": 0.0999, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.761467889908257, |
| "grad_norm": 8937.4580078125, |
| "learning_rate": 3.98989898989899e-05, |
| "loss": 0.0936, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.770642201834862, |
| "grad_norm": 7193.6708984375, |
| "learning_rate": 3.984848484848485e-05, |
| "loss": 0.0743, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.779816513761468, |
| "grad_norm": 7543.75341796875, |
| "learning_rate": 3.97979797979798e-05, |
| "loss": 0.0994, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.7889908256880735, |
| "grad_norm": 8849.693359375, |
| "learning_rate": 3.974747474747475e-05, |
| "loss": 0.0891, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.7981651376146788, |
| "grad_norm": 6587.58837890625, |
| "learning_rate": 3.96969696969697e-05, |
| "loss": 0.0921, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.8073394495412844, |
| "grad_norm": 7597.34521484375, |
| "learning_rate": 3.964646464646465e-05, |
| "loss": 0.079, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.81651376146789, |
| "grad_norm": 8688.705078125, |
| "learning_rate": 3.9595959595959594e-05, |
| "loss": 0.1014, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.8256880733944953, |
| "grad_norm": 14665.2021484375, |
| "learning_rate": 3.954545454545455e-05, |
| "loss": 0.1354, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.834862385321101, |
| "grad_norm": 10845.7890625, |
| "learning_rate": 3.9494949494949494e-05, |
| "loss": 0.1076, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.8440366972477067, |
| "grad_norm": 10935.6669921875, |
| "learning_rate": 3.944444444444445e-05, |
| "loss": 0.1054, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.853211009174312, |
| "grad_norm": 11541.736328125, |
| "learning_rate": 3.939393939393939e-05, |
| "loss": 0.0943, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.8623853211009176, |
| "grad_norm": 6934.3125, |
| "learning_rate": 3.9343434343434346e-05, |
| "loss": 0.1018, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.871559633027523, |
| "grad_norm": 8040.87939453125, |
| "learning_rate": 3.929292929292929e-05, |
| "loss": 0.0926, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.8807339449541285, |
| "grad_norm": 6876.23876953125, |
| "learning_rate": 3.924242424242424e-05, |
| "loss": 0.0806, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.8899082568807337, |
| "grad_norm": 8227.6435546875, |
| "learning_rate": 3.91919191919192e-05, |
| "loss": 0.0888, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.8990825688073394, |
| "grad_norm": 9191.8701171875, |
| "learning_rate": 3.9141414141414145e-05, |
| "loss": 0.0982, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.908256880733945, |
| "grad_norm": 8231.9013671875, |
| "learning_rate": 3.909090909090909e-05, |
| "loss": 0.0851, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.9174311926605503, |
| "grad_norm": 8839.322265625, |
| "learning_rate": 3.9040404040404045e-05, |
| "loss": 0.0757, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.926605504587156, |
| "grad_norm": 8270.8525390625, |
| "learning_rate": 3.898989898989899e-05, |
| "loss": 0.1015, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.9357798165137616, |
| "grad_norm": 12873.2529296875, |
| "learning_rate": 3.8939393939393944e-05, |
| "loss": 0.0871, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.944954128440367, |
| "grad_norm": 7598.33837890625, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.0884, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.9541284403669725, |
| "grad_norm": 8909.1279296875, |
| "learning_rate": 3.8838383838383844e-05, |
| "loss": 0.102, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.963302752293578, |
| "grad_norm": 9611.2451171875, |
| "learning_rate": 3.878787878787879e-05, |
| "loss": 0.094, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.9724770642201834, |
| "grad_norm": 8551.978515625, |
| "learning_rate": 3.8737373737373737e-05, |
| "loss": 0.0857, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.981651376146789, |
| "grad_norm": 7810.45703125, |
| "learning_rate": 3.868686868686869e-05, |
| "loss": 0.0854, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.9908256880733948, |
| "grad_norm": 8891.1826171875, |
| "learning_rate": 3.8636363636363636e-05, |
| "loss": 0.101, |
| "step": 326 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 16918.001953125, |
| "learning_rate": 3.858585858585859e-05, |
| "loss": 0.0625, |
| "step": 327 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_f1_macro": 0.05806976361668135, |
| "eval_f1_micro": 0.37575757575757573, |
| "eval_loss": 0.0906587690114975, |
| "eval_precision": 0.7380952380952381, |
| "eval_recall": 0.25203252032520324, |
| "eval_runtime": 0.8046, |
| "eval_samples_per_second": 359.182, |
| "eval_steps_per_second": 16.157, |
| "step": 327 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1090, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2049202335651840.0, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|