| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 0, |
| "global_step": 452, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004424778761061947, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 0.00039911504424778763, |
| "loss": 1.3739, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008849557522123894, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 0.00039823008849557525, |
| "loss": 1.4091, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01327433628318584, |
| "grad_norm": 0.0751953125, |
| "learning_rate": 0.00039734513274336286, |
| "loss": 1.2628, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.017699115044247787, |
| "grad_norm": 0.064453125, |
| "learning_rate": 0.0003964601769911505, |
| "loss": 1.1101, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.022123893805309734, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 0.0003955752212389381, |
| "loss": 1.344, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02654867256637168, |
| "grad_norm": 0.0634765625, |
| "learning_rate": 0.00039469026548672565, |
| "loss": 1.1884, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.030973451327433628, |
| "grad_norm": 0.0625, |
| "learning_rate": 0.0003938053097345133, |
| "loss": 1.1329, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.035398230088495575, |
| "grad_norm": 0.052490234375, |
| "learning_rate": 0.0003929203539823009, |
| "loss": 1.138, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03982300884955752, |
| "grad_norm": 0.0625, |
| "learning_rate": 0.00039203539823008855, |
| "loss": 1.0113, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.04424778761061947, |
| "grad_norm": 0.041748046875, |
| "learning_rate": 0.0003911504424778761, |
| "loss": 1.087, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.048672566371681415, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 0.0003902654867256637, |
| "loss": 1.1459, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05309734513274336, |
| "grad_norm": 0.03662109375, |
| "learning_rate": 0.00038938053097345134, |
| "loss": 1.1421, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05752212389380531, |
| "grad_norm": 0.035888671875, |
| "learning_rate": 0.00038849557522123895, |
| "loss": 1.175, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.061946902654867256, |
| "grad_norm": 0.037109375, |
| "learning_rate": 0.00038761061946902657, |
| "loss": 1.2099, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06637168141592921, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 0.0003867256637168142, |
| "loss": 1.1295, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07079646017699115, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 0.00038584070796460174, |
| "loss": 1.0737, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0752212389380531, |
| "grad_norm": 0.037109375, |
| "learning_rate": 0.0003849557522123894, |
| "loss": 1.1563, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.07964601769911504, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 0.000384070796460177, |
| "loss": 1.1061, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.084070796460177, |
| "grad_norm": 0.050048828125, |
| "learning_rate": 0.00038318584070796464, |
| "loss": 1.1052, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.08849557522123894, |
| "grad_norm": 0.036865234375, |
| "learning_rate": 0.00038230088495575226, |
| "loss": 1.0009, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09292035398230089, |
| "grad_norm": 0.041015625, |
| "learning_rate": 0.0003814159292035398, |
| "loss": 0.9805, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.09734513274336283, |
| "grad_norm": 0.03173828125, |
| "learning_rate": 0.0003805309734513275, |
| "loss": 1.1098, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.10176991150442478, |
| "grad_norm": 0.0322265625, |
| "learning_rate": 0.00037964601769911505, |
| "loss": 1.0691, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.10619469026548672, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 0.00037876106194690266, |
| "loss": 1.2944, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.11061946902654868, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 0.0003778761061946903, |
| "loss": 1.0819, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11504424778761062, |
| "grad_norm": 0.0341796875, |
| "learning_rate": 0.0003769911504424779, |
| "loss": 1.215, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.11946902654867257, |
| "grad_norm": 0.041015625, |
| "learning_rate": 0.0003761061946902655, |
| "loss": 1.0624, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12389380530973451, |
| "grad_norm": 0.03271484375, |
| "learning_rate": 0.0003752212389380531, |
| "loss": 1.0258, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.12831858407079647, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 0.00037433628318584073, |
| "loss": 1.0544, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.13274336283185842, |
| "grad_norm": 0.035400390625, |
| "learning_rate": 0.00037345132743362835, |
| "loss": 1.0203, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13716814159292035, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 0.0003725663716814159, |
| "loss": 1.1584, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1415929203539823, |
| "grad_norm": 0.0341796875, |
| "learning_rate": 0.0003716814159292036, |
| "loss": 0.9215, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.14601769911504425, |
| "grad_norm": 0.03857421875, |
| "learning_rate": 0.0003707964601769912, |
| "loss": 1.1255, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.1504424778761062, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 0.00036991150442477875, |
| "loss": 1.3504, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.15486725663716813, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 0.0003690265486725664, |
| "loss": 1.0819, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1592920353982301, |
| "grad_norm": 0.041259765625, |
| "learning_rate": 0.000368141592920354, |
| "loss": 1.2328, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.16371681415929204, |
| "grad_norm": 0.04345703125, |
| "learning_rate": 0.00036725663716814165, |
| "loss": 1.1783, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.168141592920354, |
| "grad_norm": 0.044189453125, |
| "learning_rate": 0.0003663716814159292, |
| "loss": 1.105, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.17256637168141592, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 0.0003654867256637168, |
| "loss": 1.1757, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.17699115044247787, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.00036460176991150444, |
| "loss": 1.1601, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.18141592920353983, |
| "grad_norm": 0.04296875, |
| "learning_rate": 0.00036371681415929205, |
| "loss": 0.9869, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.18584070796460178, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 0.00036283185840707967, |
| "loss": 1.0769, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1902654867256637, |
| "grad_norm": 0.0361328125, |
| "learning_rate": 0.0003619469026548673, |
| "loss": 1.015, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.19469026548672566, |
| "grad_norm": 0.03564453125, |
| "learning_rate": 0.00036106194690265484, |
| "loss": 0.9435, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.19911504424778761, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 0.0003601769911504425, |
| "loss": 1.1832, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.20353982300884957, |
| "grad_norm": 0.052490234375, |
| "learning_rate": 0.00035929203539823007, |
| "loss": 1.1826, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.2079646017699115, |
| "grad_norm": 0.0625, |
| "learning_rate": 0.00035840707964601774, |
| "loss": 1.02, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.21238938053097345, |
| "grad_norm": 0.047607421875, |
| "learning_rate": 0.0003575221238938053, |
| "loss": 1.0803, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2168141592920354, |
| "grad_norm": 0.041015625, |
| "learning_rate": 0.0003566371681415929, |
| "loss": 1.021, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.22123893805309736, |
| "grad_norm": 0.041015625, |
| "learning_rate": 0.0003557522123893806, |
| "loss": 1.0058, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.22566371681415928, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.00035486725663716814, |
| "loss": 1.0489, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.23008849557522124, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.0003539823008849558, |
| "loss": 0.986, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2345132743362832, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 0.00035309734513274337, |
| "loss": 1.0928, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.23893805309734514, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 0.000352212389380531, |
| "loss": 1.0037, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.24336283185840707, |
| "grad_norm": 0.035888671875, |
| "learning_rate": 0.0003513274336283186, |
| "loss": 1.0165, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.24778761061946902, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 0.0003504424778761062, |
| "loss": 0.9856, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.252212389380531, |
| "grad_norm": 0.0390625, |
| "learning_rate": 0.00034955752212389383, |
| "loss": 1.0988, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.25663716814159293, |
| "grad_norm": 0.035400390625, |
| "learning_rate": 0.00034867256637168145, |
| "loss": 0.9983, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2610619469026549, |
| "grad_norm": 0.0390625, |
| "learning_rate": 0.000347787610619469, |
| "loss": 1.0727, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.26548672566371684, |
| "grad_norm": 0.0380859375, |
| "learning_rate": 0.0003469026548672567, |
| "loss": 0.9617, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.26991150442477874, |
| "grad_norm": 0.04638671875, |
| "learning_rate": 0.00034601769911504423, |
| "loss": 1.1435, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2743362831858407, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 0.0003451327433628319, |
| "loss": 1.0895, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.27876106194690264, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 0.00034424778761061946, |
| "loss": 1.0823, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2831858407079646, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.0003433628318584071, |
| "loss": 1.1119, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.28761061946902655, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 0.00034247787610619475, |
| "loss": 1.2428, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2920353982300885, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 0.0003415929203539823, |
| "loss": 0.9943, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.29646017699115046, |
| "grad_norm": 0.0439453125, |
| "learning_rate": 0.0003407079646017699, |
| "loss": 1.3215, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3008849557522124, |
| "grad_norm": 0.03515625, |
| "learning_rate": 0.00033982300884955754, |
| "loss": 0.9997, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3053097345132743, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 0.00033893805309734515, |
| "loss": 0.9796, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.30973451327433627, |
| "grad_norm": 0.044677734375, |
| "learning_rate": 0.00033805309734513277, |
| "loss": 1.1079, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3141592920353982, |
| "grad_norm": 0.041259765625, |
| "learning_rate": 0.0003371681415929204, |
| "loss": 1.0242, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3185840707964602, |
| "grad_norm": 0.04638671875, |
| "learning_rate": 0.000336283185840708, |
| "loss": 1.0227, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3230088495575221, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.0003353982300884956, |
| "loss": 0.9375, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3274336283185841, |
| "grad_norm": 0.03759765625, |
| "learning_rate": 0.00033451327433628317, |
| "loss": 1.0104, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.33185840707964603, |
| "grad_norm": 0.041748046875, |
| "learning_rate": 0.00033362831858407084, |
| "loss": 1.1685, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.336283185840708, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 0.0003327433628318584, |
| "loss": 1.2954, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3407079646017699, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 0.000331858407079646, |
| "loss": 0.9816, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.34513274336283184, |
| "grad_norm": 0.04248046875, |
| "learning_rate": 0.00033097345132743363, |
| "loss": 1.0791, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.3495575221238938, |
| "grad_norm": 0.043701171875, |
| "learning_rate": 0.00033008849557522124, |
| "loss": 1.0989, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.35398230088495575, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 0.00032920353982300886, |
| "loss": 1.1164, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3584070796460177, |
| "grad_norm": 0.0400390625, |
| "learning_rate": 0.00032831858407079647, |
| "loss": 1.2053, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.36283185840707965, |
| "grad_norm": 0.041748046875, |
| "learning_rate": 0.0003274336283185841, |
| "loss": 1.0322, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.3672566371681416, |
| "grad_norm": 0.064453125, |
| "learning_rate": 0.0003265486725663717, |
| "loss": 0.9184, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.37168141592920356, |
| "grad_norm": 0.037353515625, |
| "learning_rate": 0.0003256637168141593, |
| "loss": 1.0874, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.37610619469026546, |
| "grad_norm": 0.04638671875, |
| "learning_rate": 0.00032477876106194693, |
| "loss": 1.0051, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3805309734513274, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 0.00032389380530973454, |
| "loss": 1.1232, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.38495575221238937, |
| "grad_norm": 0.036865234375, |
| "learning_rate": 0.0003230088495575221, |
| "loss": 0.9745, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3893805309734513, |
| "grad_norm": 0.037353515625, |
| "learning_rate": 0.0003221238938053098, |
| "loss": 0.9092, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3938053097345133, |
| "grad_norm": 0.04931640625, |
| "learning_rate": 0.00032123893805309733, |
| "loss": 1.0712, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.39823008849557523, |
| "grad_norm": 0.043701171875, |
| "learning_rate": 0.000320353982300885, |
| "loss": 1.0908, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4026548672566372, |
| "grad_norm": 0.04150390625, |
| "learning_rate": 0.00031946902654867256, |
| "loss": 1.0897, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.40707964601769914, |
| "grad_norm": 0.03857421875, |
| "learning_rate": 0.0003185840707964602, |
| "loss": 0.8939, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.41150442477876104, |
| "grad_norm": 0.044677734375, |
| "learning_rate": 0.0003176991150442478, |
| "loss": 1.0992, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.415929203539823, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 0.0003168141592920354, |
| "loss": 0.937, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.42035398230088494, |
| "grad_norm": 0.0634765625, |
| "learning_rate": 0.000315929203539823, |
| "loss": 1.1744, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.4247787610619469, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.00031504424778761064, |
| "loss": 1.0227, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.42920353982300885, |
| "grad_norm": 0.041259765625, |
| "learning_rate": 0.00031415929203539825, |
| "loss": 1.112, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.4336283185840708, |
| "grad_norm": 0.047119140625, |
| "learning_rate": 0.00031327433628318586, |
| "loss": 0.9122, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.43805309734513276, |
| "grad_norm": 0.04931640625, |
| "learning_rate": 0.0003123893805309735, |
| "loss": 1.0073, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.4424778761061947, |
| "grad_norm": 0.040283203125, |
| "learning_rate": 0.0003115044247787611, |
| "loss": 1.0326, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4469026548672566, |
| "grad_norm": 0.046142578125, |
| "learning_rate": 0.0003106194690265487, |
| "loss": 1.0014, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.45132743362831856, |
| "grad_norm": 0.041015625, |
| "learning_rate": 0.00030973451327433627, |
| "loss": 1.1081, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.4557522123893805, |
| "grad_norm": 0.041015625, |
| "learning_rate": 0.00030884955752212394, |
| "loss": 1.1268, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.46017699115044247, |
| "grad_norm": 0.05078125, |
| "learning_rate": 0.0003079646017699115, |
| "loss": 1.0382, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.4646017699115044, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 0.00030707964601769917, |
| "loss": 0.9887, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4690265486725664, |
| "grad_norm": 0.0390625, |
| "learning_rate": 0.0003061946902654867, |
| "loss": 1.0143, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.47345132743362833, |
| "grad_norm": 0.06982421875, |
| "learning_rate": 0.00030530973451327434, |
| "loss": 1.0332, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.4778761061946903, |
| "grad_norm": 0.044921875, |
| "learning_rate": 0.00030442477876106196, |
| "loss": 0.9422, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.4823008849557522, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 0.00030353982300884957, |
| "loss": 1.0376, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.48672566371681414, |
| "grad_norm": 0.04833984375, |
| "learning_rate": 0.0003026548672566372, |
| "loss": 1.1175, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4911504424778761, |
| "grad_norm": 0.044189453125, |
| "learning_rate": 0.0003017699115044248, |
| "loss": 0.9571, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.49557522123893805, |
| "grad_norm": 0.0478515625, |
| "learning_rate": 0.00030088495575221236, |
| "loss": 1.0857, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.059814453125, |
| "learning_rate": 0.00030000000000000003, |
| "loss": 0.9346, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.504424778761062, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 0.00029911504424778764, |
| "loss": 1.0317, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5088495575221239, |
| "grad_norm": 0.0625, |
| "learning_rate": 0.00029823008849557526, |
| "loss": 1.0535, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5132743362831859, |
| "grad_norm": 0.04150390625, |
| "learning_rate": 0.00029734513274336287, |
| "loss": 1.0437, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5176991150442478, |
| "grad_norm": 0.046142578125, |
| "learning_rate": 0.00029646017699115043, |
| "loss": 1.0253, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5221238938053098, |
| "grad_norm": 0.07421875, |
| "learning_rate": 0.0002955752212389381, |
| "loss": 1.022, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5265486725663717, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 0.00029469026548672566, |
| "loss": 1.2344, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5309734513274337, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 0.0002938053097345133, |
| "loss": 0.9828, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5353982300884956, |
| "grad_norm": 0.05078125, |
| "learning_rate": 0.0002929203539823009, |
| "loss": 0.9207, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5398230088495575, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 0.0002920353982300885, |
| "loss": 0.9794, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5442477876106194, |
| "grad_norm": 0.05908203125, |
| "learning_rate": 0.0002911504424778761, |
| "loss": 1.0962, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5486725663716814, |
| "grad_norm": 0.041748046875, |
| "learning_rate": 0.00029026548672566373, |
| "loss": 1.1614, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5530973451327433, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 0.00028938053097345135, |
| "loss": 0.9082, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5575221238938053, |
| "grad_norm": 0.037353515625, |
| "learning_rate": 0.00028849557522123896, |
| "loss": 0.9406, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5619469026548672, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 0.0002876106194690265, |
| "loss": 1.1105, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5663716814159292, |
| "grad_norm": 0.051025390625, |
| "learning_rate": 0.0002867256637168142, |
| "loss": 0.9679, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5707964601769911, |
| "grad_norm": 0.037109375, |
| "learning_rate": 0.00028584070796460175, |
| "loss": 0.9529, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5752212389380531, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 0.00028495575221238937, |
| "loss": 1.0341, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5796460176991151, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 0.00028407079646017704, |
| "loss": 0.9493, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.584070796460177, |
| "grad_norm": 0.06591796875, |
| "learning_rate": 0.0002831858407079646, |
| "loss": 1.262, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.588495575221239, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 0.00028230088495575226, |
| "loss": 0.9412, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.5929203539823009, |
| "grad_norm": 0.046875, |
| "learning_rate": 0.0002814159292035398, |
| "loss": 1.0563, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.5973451327433629, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 0.00028053097345132744, |
| "loss": 1.0201, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.6017699115044248, |
| "grad_norm": 0.04052734375, |
| "learning_rate": 0.00027964601769911505, |
| "loss": 1.0401, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.6061946902654868, |
| "grad_norm": 0.05078125, |
| "learning_rate": 0.00027876106194690267, |
| "loss": 1.0241, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6106194690265486, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 0.0002778761061946903, |
| "loss": 1.1263, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6150442477876106, |
| "grad_norm": 0.048095703125, |
| "learning_rate": 0.0002769911504424779, |
| "loss": 1.0869, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6194690265486725, |
| "grad_norm": 0.447265625, |
| "learning_rate": 0.0002761061946902655, |
| "loss": 0.9944, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6238938053097345, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 0.0002752212389380531, |
| "loss": 0.9675, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6283185840707964, |
| "grad_norm": 0.068359375, |
| "learning_rate": 0.0002743362831858407, |
| "loss": 1.0227, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6327433628318584, |
| "grad_norm": 0.072265625, |
| "learning_rate": 0.00027345132743362836, |
| "loss": 1.0381, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6371681415929203, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 0.0002725663716814159, |
| "loss": 0.9385, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6415929203539823, |
| "grad_norm": 0.04248046875, |
| "learning_rate": 0.00027168141592920353, |
| "loss": 1.001, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6460176991150443, |
| "grad_norm": 0.06005859375, |
| "learning_rate": 0.0002707964601769912, |
| "loss": 1.04, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6504424778761062, |
| "grad_norm": 0.049072265625, |
| "learning_rate": 0.00026991150442477876, |
| "loss": 0.9735, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6548672566371682, |
| "grad_norm": 0.045654296875, |
| "learning_rate": 0.00026902654867256643, |
| "loss": 1.0873, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6592920353982301, |
| "grad_norm": 0.04638671875, |
| "learning_rate": 0.000268141592920354, |
| "loss": 1.1032, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6637168141592921, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 0.0002672566371681416, |
| "loss": 1.0414, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.668141592920354, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 0.0002663716814159292, |
| "loss": 0.892, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.672566371681416, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.00026548672566371683, |
| "loss": 0.9048, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6769911504424779, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 0.00026460176991150445, |
| "loss": 1.0745, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6814159292035398, |
| "grad_norm": 0.059814453125, |
| "learning_rate": 0.00026371681415929206, |
| "loss": 1.2796, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.6858407079646017, |
| "grad_norm": 0.050048828125, |
| "learning_rate": 0.0002628318584070796, |
| "loss": 0.9484, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6902654867256637, |
| "grad_norm": 0.0458984375, |
| "learning_rate": 0.0002619469026548673, |
| "loss": 1.0571, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6946902654867256, |
| "grad_norm": 0.0439453125, |
| "learning_rate": 0.00026106194690265485, |
| "loss": 1.1435, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.6991150442477876, |
| "grad_norm": 0.0458984375, |
| "learning_rate": 0.0002601769911504425, |
| "loss": 1.0, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.7035398230088495, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 0.0002592920353982301, |
| "loss": 1.0044, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.7079646017699115, |
| "grad_norm": 0.049072265625, |
| "learning_rate": 0.0002584070796460177, |
| "loss": 1.001, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7123893805309734, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 0.0002575221238938053, |
| "loss": 1.0643, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7168141592920354, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 0.0002566371681415929, |
| "loss": 1.2461, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7212389380530974, |
| "grad_norm": 0.0458984375, |
| "learning_rate": 0.00025575221238938054, |
| "loss": 1.297, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7256637168141593, |
| "grad_norm": 0.349609375, |
| "learning_rate": 0.00025486725663716815, |
| "loss": 0.9718, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.7300884955752213, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 0.00025398230088495577, |
| "loss": 0.9553, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.7345132743362832, |
| "grad_norm": 0.041748046875, |
| "learning_rate": 0.0002530973451327434, |
| "loss": 1.074, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7389380530973452, |
| "grad_norm": 0.0615234375, |
| "learning_rate": 0.000252212389380531, |
| "loss": 1.0015, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.7433628318584071, |
| "grad_norm": 0.043212890625, |
| "learning_rate": 0.0002513274336283186, |
| "loss": 1.021, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7477876106194691, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.0002504424778761062, |
| "loss": 1.063, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7522123893805309, |
| "grad_norm": 0.03759765625, |
| "learning_rate": 0.0002495575221238938, |
| "loss": 0.9415, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7566371681415929, |
| "grad_norm": 0.0673828125, |
| "learning_rate": 0.00024867256637168145, |
| "loss": 1.0556, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7610619469026548, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 0.000247787610619469, |
| "loss": 1.1345, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7654867256637168, |
| "grad_norm": 0.044189453125, |
| "learning_rate": 0.00024690265486725663, |
| "loss": 0.9686, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7699115044247787, |
| "grad_norm": 0.18359375, |
| "learning_rate": 0.00024601769911504424, |
| "loss": 0.8729, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7743362831858407, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 0.00024513274336283186, |
| "loss": 1.0424, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7787610619469026, |
| "grad_norm": 0.05322265625, |
| "learning_rate": 0.00024424778761061947, |
| "loss": 1.0317, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7831858407079646, |
| "grad_norm": 0.043212890625, |
| "learning_rate": 0.0002433628318584071, |
| "loss": 1.1979, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.7876106194690266, |
| "grad_norm": 0.0615234375, |
| "learning_rate": 0.00024247787610619473, |
| "loss": 1.0134, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.7920353982300885, |
| "grad_norm": 0.0615234375, |
| "learning_rate": 0.00024159292035398232, |
| "loss": 1.1044, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.7964601769911505, |
| "grad_norm": 0.04443359375, |
| "learning_rate": 0.00024070796460176993, |
| "loss": 1.0293, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8008849557522124, |
| "grad_norm": 0.04248046875, |
| "learning_rate": 0.00023982300884955752, |
| "loss": 0.9629, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.8053097345132744, |
| "grad_norm": 0.03857421875, |
| "learning_rate": 0.00023893805309734516, |
| "loss": 0.9511, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.8097345132743363, |
| "grad_norm": 0.046142578125, |
| "learning_rate": 0.00023805309734513275, |
| "loss": 1.0096, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8141592920353983, |
| "grad_norm": 0.0498046875, |
| "learning_rate": 0.0002371681415929204, |
| "loss": 0.8986, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8185840707964602, |
| "grad_norm": 0.050048828125, |
| "learning_rate": 0.00023628318584070798, |
| "loss": 0.9618, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.8230088495575221, |
| "grad_norm": 0.07177734375, |
| "learning_rate": 0.0002353982300884956, |
| "loss": 1.0183, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.827433628318584, |
| "grad_norm": 0.06982421875, |
| "learning_rate": 0.00023451327433628318, |
| "loss": 0.9824, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.831858407079646, |
| "grad_norm": 0.0439453125, |
| "learning_rate": 0.00023362831858407082, |
| "loss": 0.9304, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8362831858407079, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 0.0002327433628318584, |
| "loss": 0.9942, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8407079646017699, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 0.00023185840707964602, |
| "loss": 1.1299, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8451327433628318, |
| "grad_norm": 0.046875, |
| "learning_rate": 0.0002309734513274336, |
| "loss": 1.0395, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8495575221238938, |
| "grad_norm": 0.04296875, |
| "learning_rate": 0.00023008849557522125, |
| "loss": 0.9442, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8539823008849557, |
| "grad_norm": 0.05078125, |
| "learning_rate": 0.00022920353982300884, |
| "loss": 1.0056, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8584070796460177, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 0.00022831858407079648, |
| "loss": 0.9217, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8628318584070797, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.0002274336283185841, |
| "loss": 0.9522, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8672566371681416, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.00022654867256637168, |
| "loss": 0.9525, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.8716814159292036, |
| "grad_norm": 0.048095703125, |
| "learning_rate": 0.00022566371681415932, |
| "loss": 1.0493, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8761061946902655, |
| "grad_norm": 0.047607421875, |
| "learning_rate": 0.0002247787610619469, |
| "loss": 1.1643, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.8805309734513275, |
| "grad_norm": 0.041748046875, |
| "learning_rate": 0.00022389380530973453, |
| "loss": 0.8968, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.8849557522123894, |
| "grad_norm": 0.046875, |
| "learning_rate": 0.0002230088495575221, |
| "loss": 0.8145, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8893805309734514, |
| "grad_norm": 0.0693359375, |
| "learning_rate": 0.00022212389380530975, |
| "loss": 1.1892, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.8938053097345132, |
| "grad_norm": 0.0673828125, |
| "learning_rate": 0.00022123893805309734, |
| "loss": 0.9646, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.8982300884955752, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 0.00022035398230088498, |
| "loss": 1.0692, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.9026548672566371, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 0.00021946902654867257, |
| "loss": 0.9034, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.9070796460176991, |
| "grad_norm": 0.04150390625, |
| "learning_rate": 0.00021858407079646019, |
| "loss": 1.1094, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.911504424778761, |
| "grad_norm": 0.064453125, |
| "learning_rate": 0.00021769911504424777, |
| "loss": 1.1966, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.915929203539823, |
| "grad_norm": 0.049560546875, |
| "learning_rate": 0.00021681415929203541, |
| "loss": 1.1902, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.9203539823008849, |
| "grad_norm": 0.06884765625, |
| "learning_rate": 0.000215929203539823, |
| "loss": 1.1077, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.9247787610619469, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.00021504424778761064, |
| "loss": 0.9293, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9292035398230089, |
| "grad_norm": 0.040283203125, |
| "learning_rate": 0.00021415929203539826, |
| "loss": 1.0238, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9336283185840708, |
| "grad_norm": 0.046142578125, |
| "learning_rate": 0.00021327433628318585, |
| "loss": 0.9889, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9380530973451328, |
| "grad_norm": 0.048583984375, |
| "learning_rate": 0.0002123893805309735, |
| "loss": 1.0614, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9424778761061947, |
| "grad_norm": 0.048095703125, |
| "learning_rate": 0.00021150442477876107, |
| "loss": 1.0836, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9469026548672567, |
| "grad_norm": 0.047607421875, |
| "learning_rate": 0.0002106194690265487, |
| "loss": 1.0815, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9513274336283186, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 0.00020973451327433628, |
| "loss": 1.0021, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.9557522123893806, |
| "grad_norm": 0.049072265625, |
| "learning_rate": 0.00020884955752212392, |
| "loss": 1.0002, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9601769911504425, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 0.0002079646017699115, |
| "loss": 1.2081, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9646017699115044, |
| "grad_norm": 0.0439453125, |
| "learning_rate": 0.00020707964601769915, |
| "loss": 1.0711, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.9690265486725663, |
| "grad_norm": 0.049072265625, |
| "learning_rate": 0.00020619469026548673, |
| "loss": 1.0342, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.9734513274336283, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.00020530973451327435, |
| "loss": 1.0103, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9778761061946902, |
| "grad_norm": 0.04931640625, |
| "learning_rate": 0.00020442477876106194, |
| "loss": 0.9692, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.9823008849557522, |
| "grad_norm": 0.04296875, |
| "learning_rate": 0.00020353982300884958, |
| "loss": 0.9639, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.9867256637168141, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.00020265486725663717, |
| "loss": 0.9039, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.9911504424778761, |
| "grad_norm": 0.049560546875, |
| "learning_rate": 0.00020176991150442478, |
| "loss": 0.9265, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.995575221238938, |
| "grad_norm": 0.04248046875, |
| "learning_rate": 0.00020088495575221237, |
| "loss": 0.8961, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.0625, |
| "learning_rate": 0.0002, |
| "loss": 1.0299, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.0044247787610618, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 0.00019911504424778762, |
| "loss": 0.8533, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.008849557522124, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.00019823008849557524, |
| "loss": 0.937, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.0132743362831858, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 0.00019734513274336283, |
| "loss": 0.8202, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.0176991150442478, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 0.00019646017699115044, |
| "loss": 0.8976, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0221238938053097, |
| "grad_norm": 0.048828125, |
| "learning_rate": 0.00019557522123893806, |
| "loss": 0.8791, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.0265486725663717, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 0.00019469026548672567, |
| "loss": 1.0753, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.0309734513274336, |
| "grad_norm": 0.05615234375, |
| "learning_rate": 0.00019380530973451328, |
| "loss": 1.0464, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.0353982300884956, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 0.00019292035398230087, |
| "loss": 0.8115, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.0398230088495575, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 0.0001920353982300885, |
| "loss": 0.9851, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.0442477876106195, |
| "grad_norm": 0.068359375, |
| "learning_rate": 0.00019115044247787613, |
| "loss": 0.8867, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.0486725663716814, |
| "grad_norm": 0.059814453125, |
| "learning_rate": 0.00019026548672566374, |
| "loss": 0.7882, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.0530973451327434, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 0.00018938053097345133, |
| "loss": 1.0028, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.0575221238938053, |
| "grad_norm": 0.06103515625, |
| "learning_rate": 0.00018849557522123894, |
| "loss": 0.9446, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.0619469026548674, |
| "grad_norm": 0.059814453125, |
| "learning_rate": 0.00018761061946902656, |
| "loss": 1.0249, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0663716814159292, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 0.00018672566371681417, |
| "loss": 0.9277, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.0707964601769913, |
| "grad_norm": 0.0751953125, |
| "learning_rate": 0.0001858407079646018, |
| "loss": 0.8228, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.075221238938053, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 0.00018495575221238938, |
| "loss": 0.8757, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.079646017699115, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 0.000184070796460177, |
| "loss": 0.7868, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.084070796460177, |
| "grad_norm": 0.07275390625, |
| "learning_rate": 0.0001831858407079646, |
| "loss": 0.878, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.0884955752212389, |
| "grad_norm": 0.05908203125, |
| "learning_rate": 0.00018230088495575222, |
| "loss": 0.8944, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.092920353982301, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 0.00018141592920353983, |
| "loss": 0.8831, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.0973451327433628, |
| "grad_norm": 0.060302734375, |
| "learning_rate": 0.00018053097345132742, |
| "loss": 0.9312, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.1017699115044248, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 0.00017964601769911504, |
| "loss": 0.7488, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.1061946902654867, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 0.00017876106194690265, |
| "loss": 0.9677, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1106194690265487, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 0.0001778761061946903, |
| "loss": 0.8391, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.1150442477876106, |
| "grad_norm": 0.061279296875, |
| "learning_rate": 0.0001769911504424779, |
| "loss": 0.9225, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.1194690265486726, |
| "grad_norm": 0.080078125, |
| "learning_rate": 0.0001761061946902655, |
| "loss": 0.7969, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.1238938053097345, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 0.0001752212389380531, |
| "loss": 0.8957, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.1283185840707965, |
| "grad_norm": 0.062255859375, |
| "learning_rate": 0.00017433628318584072, |
| "loss": 0.9192, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.1327433628318584, |
| "grad_norm": 0.1005859375, |
| "learning_rate": 0.00017345132743362834, |
| "loss": 0.8669, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.1371681415929205, |
| "grad_norm": 0.0810546875, |
| "learning_rate": 0.00017256637168141595, |
| "loss": 0.9332, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.1415929203539823, |
| "grad_norm": 0.06689453125, |
| "learning_rate": 0.00017168141592920354, |
| "loss": 0.8392, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.1460176991150441, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 0.00017079646017699115, |
| "loss": 1.1159, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.1504424778761062, |
| "grad_norm": 0.0625, |
| "learning_rate": 0.00016991150442477877, |
| "loss": 0.9649, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.154867256637168, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 0.00016902654867256638, |
| "loss": 0.9653, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.1592920353982301, |
| "grad_norm": 0.05322265625, |
| "learning_rate": 0.000168141592920354, |
| "loss": 0.8342, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.163716814159292, |
| "grad_norm": 0.109375, |
| "learning_rate": 0.00016725663716814158, |
| "loss": 0.7385, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.168141592920354, |
| "grad_norm": 0.076171875, |
| "learning_rate": 0.0001663716814159292, |
| "loss": 0.7605, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.1725663716814159, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 0.00016548672566371681, |
| "loss": 0.8457, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.176991150442478, |
| "grad_norm": 0.08447265625, |
| "learning_rate": 0.00016460176991150443, |
| "loss": 0.872, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.1814159292035398, |
| "grad_norm": 0.07470703125, |
| "learning_rate": 0.00016371681415929204, |
| "loss": 1.0322, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.1858407079646018, |
| "grad_norm": 0.06640625, |
| "learning_rate": 0.00016283185840707966, |
| "loss": 1.0532, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.1902654867256637, |
| "grad_norm": 0.059814453125, |
| "learning_rate": 0.00016194690265486727, |
| "loss": 0.9205, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.1946902654867257, |
| "grad_norm": 0.060546875, |
| "learning_rate": 0.0001610619469026549, |
| "loss": 0.8789, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.1991150442477876, |
| "grad_norm": 0.0654296875, |
| "learning_rate": 0.0001601769911504425, |
| "loss": 1.0501, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.2035398230088497, |
| "grad_norm": 0.0634765625, |
| "learning_rate": 0.0001592920353982301, |
| "loss": 0.8666, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.2079646017699115, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 0.0001584070796460177, |
| "loss": 0.8761, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.2123893805309733, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 0.00015752212389380532, |
| "loss": 0.8827, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.2168141592920354, |
| "grad_norm": 0.07373046875, |
| "learning_rate": 0.00015663716814159293, |
| "loss": 0.8162, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.2212389380530975, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 0.00015575221238938055, |
| "loss": 0.7613, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.2256637168141593, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 0.00015486725663716813, |
| "loss": 0.825, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.2300884955752212, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 0.00015398230088495575, |
| "loss": 0.9633, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.2345132743362832, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 0.00015309734513274336, |
| "loss": 0.9036, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.238938053097345, |
| "grad_norm": 0.076171875, |
| "learning_rate": 0.00015221238938053098, |
| "loss": 0.9527, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2433628318584071, |
| "grad_norm": 0.06005859375, |
| "learning_rate": 0.0001513274336283186, |
| "loss": 0.9089, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.247787610619469, |
| "grad_norm": 0.056884765625, |
| "learning_rate": 0.00015044247787610618, |
| "loss": 0.8911, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.252212389380531, |
| "grad_norm": 0.0908203125, |
| "learning_rate": 0.00014955752212389382, |
| "loss": 0.7871, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.2566371681415929, |
| "grad_norm": 0.0771484375, |
| "learning_rate": 0.00014867256637168144, |
| "loss": 0.8415, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.261061946902655, |
| "grad_norm": 0.07177734375, |
| "learning_rate": 0.00014778761061946905, |
| "loss": 1.0105, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.2654867256637168, |
| "grad_norm": 0.0986328125, |
| "learning_rate": 0.00014690265486725664, |
| "loss": 0.9677, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.2699115044247788, |
| "grad_norm": 0.0888671875, |
| "learning_rate": 0.00014601769911504425, |
| "loss": 0.837, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.2743362831858407, |
| "grad_norm": 0.126953125, |
| "learning_rate": 0.00014513274336283187, |
| "loss": 0.8605, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.2787610619469025, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 0.00014424778761061948, |
| "loss": 0.8717, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.2831858407079646, |
| "grad_norm": 0.08740234375, |
| "learning_rate": 0.0001433628318584071, |
| "loss": 1.0469, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2876106194690267, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 0.00014247787610619468, |
| "loss": 0.9339, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.2920353982300885, |
| "grad_norm": 0.072265625, |
| "learning_rate": 0.0001415929203539823, |
| "loss": 0.7235, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.2964601769911503, |
| "grad_norm": 0.087890625, |
| "learning_rate": 0.0001407079646017699, |
| "loss": 0.8648, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.3008849557522124, |
| "grad_norm": 0.062255859375, |
| "learning_rate": 0.00013982300884955753, |
| "loss": 0.8842, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.3053097345132743, |
| "grad_norm": 0.08056640625, |
| "learning_rate": 0.00013893805309734514, |
| "loss": 0.9593, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.3097345132743363, |
| "grad_norm": 0.0771484375, |
| "learning_rate": 0.00013805309734513276, |
| "loss": 0.9122, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.3141592920353982, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 0.00013716814159292034, |
| "loss": 1.0082, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.3185840707964602, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 0.00013628318584070796, |
| "loss": 0.884, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.323008849557522, |
| "grad_norm": 0.08349609375, |
| "learning_rate": 0.0001353982300884956, |
| "loss": 0.8348, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.3274336283185841, |
| "grad_norm": 0.0732421875, |
| "learning_rate": 0.00013451327433628321, |
| "loss": 0.747, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.331858407079646, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 0.0001336283185840708, |
| "loss": 0.8841, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.336283185840708, |
| "grad_norm": 0.06005859375, |
| "learning_rate": 0.00013274336283185842, |
| "loss": 0.8985, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.3407079646017699, |
| "grad_norm": 0.068359375, |
| "learning_rate": 0.00013185840707964603, |
| "loss": 0.9008, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.3451327433628317, |
| "grad_norm": 0.076171875, |
| "learning_rate": 0.00013097345132743365, |
| "loss": 0.8909, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.3495575221238938, |
| "grad_norm": 0.09521484375, |
| "learning_rate": 0.00013008849557522126, |
| "loss": 0.8108, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.3539823008849559, |
| "grad_norm": 0.08154296875, |
| "learning_rate": 0.00012920353982300885, |
| "loss": 0.8546, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.3584070796460177, |
| "grad_norm": 0.0771484375, |
| "learning_rate": 0.00012831858407079646, |
| "loss": 1.0212, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.3628318584070795, |
| "grad_norm": 0.06201171875, |
| "learning_rate": 0.00012743362831858408, |
| "loss": 0.974, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.3672566371681416, |
| "grad_norm": 0.095703125, |
| "learning_rate": 0.0001265486725663717, |
| "loss": 0.7493, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.3716814159292037, |
| "grad_norm": 0.09765625, |
| "learning_rate": 0.0001256637168141593, |
| "loss": 1.0118, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3761061946902655, |
| "grad_norm": 0.08740234375, |
| "learning_rate": 0.0001247787610619469, |
| "loss": 0.8243, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.3805309734513274, |
| "grad_norm": 0.06884765625, |
| "learning_rate": 0.0001238938053097345, |
| "loss": 0.9024, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.3849557522123894, |
| "grad_norm": 0.08740234375, |
| "learning_rate": 0.00012300884955752212, |
| "loss": 0.9018, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.3893805309734513, |
| "grad_norm": 0.09814453125, |
| "learning_rate": 0.00012212389380530974, |
| "loss": 1.1168, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.3938053097345133, |
| "grad_norm": 0.07861328125, |
| "learning_rate": 0.00012123893805309736, |
| "loss": 0.9847, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.3982300884955752, |
| "grad_norm": 0.07080078125, |
| "learning_rate": 0.00012035398230088497, |
| "loss": 0.9884, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.4026548672566372, |
| "grad_norm": 0.07568359375, |
| "learning_rate": 0.00011946902654867258, |
| "loss": 0.9483, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.407079646017699, |
| "grad_norm": 0.06787109375, |
| "learning_rate": 0.0001185840707964602, |
| "loss": 0.8768, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.411504424778761, |
| "grad_norm": 0.0751953125, |
| "learning_rate": 0.0001176991150442478, |
| "loss": 0.9072, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.415929203539823, |
| "grad_norm": 0.0810546875, |
| "learning_rate": 0.00011681415929203541, |
| "loss": 0.8627, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.420353982300885, |
| "grad_norm": 0.07275390625, |
| "learning_rate": 0.00011592920353982301, |
| "loss": 0.9518, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.424778761061947, |
| "grad_norm": 0.0830078125, |
| "learning_rate": 0.00011504424778761063, |
| "loss": 0.8705, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.4292035398230087, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 0.00011415929203539824, |
| "loss": 0.8535, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.4336283185840708, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 0.00011327433628318584, |
| "loss": 0.8835, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.4380530973451329, |
| "grad_norm": 0.09033203125, |
| "learning_rate": 0.00011238938053097346, |
| "loss": 1.1187, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.4424778761061947, |
| "grad_norm": 0.08935546875, |
| "learning_rate": 0.00011150442477876106, |
| "loss": 0.6991, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.4469026548672566, |
| "grad_norm": 0.10546875, |
| "learning_rate": 0.00011061946902654867, |
| "loss": 0.8172, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.4513274336283186, |
| "grad_norm": 0.1015625, |
| "learning_rate": 0.00010973451327433629, |
| "loss": 0.8526, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.4557522123893805, |
| "grad_norm": 0.06640625, |
| "learning_rate": 0.00010884955752212389, |
| "loss": 0.8048, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.4601769911504425, |
| "grad_norm": 0.0693359375, |
| "learning_rate": 0.0001079646017699115, |
| "loss": 0.9438, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.4646017699115044, |
| "grad_norm": 0.08837890625, |
| "learning_rate": 0.00010707964601769913, |
| "loss": 0.9667, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.4690265486725664, |
| "grad_norm": 0.0810546875, |
| "learning_rate": 0.00010619469026548674, |
| "loss": 1.0007, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.4734513274336283, |
| "grad_norm": 0.07470703125, |
| "learning_rate": 0.00010530973451327434, |
| "loss": 0.971, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.4778761061946903, |
| "grad_norm": 0.09033203125, |
| "learning_rate": 0.00010442477876106196, |
| "loss": 0.8334, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.4823008849557522, |
| "grad_norm": 0.06640625, |
| "learning_rate": 0.00010353982300884957, |
| "loss": 0.7885, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.4867256637168142, |
| "grad_norm": 0.0947265625, |
| "learning_rate": 0.00010265486725663717, |
| "loss": 0.825, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.491150442477876, |
| "grad_norm": 0.08154296875, |
| "learning_rate": 0.00010176991150442479, |
| "loss": 0.9044, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.495575221238938, |
| "grad_norm": 0.07763671875, |
| "learning_rate": 0.00010088495575221239, |
| "loss": 0.7607, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.0693359375, |
| "learning_rate": 0.0001, |
| "loss": 0.966, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.504424778761062, |
| "grad_norm": 0.1005859375, |
| "learning_rate": 9.911504424778762e-05, |
| "loss": 0.7745, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.508849557522124, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 9.823008849557522e-05, |
| "loss": 0.8849, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.5132743362831858, |
| "grad_norm": 0.0703125, |
| "learning_rate": 9.734513274336283e-05, |
| "loss": 0.9905, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.5176991150442478, |
| "grad_norm": 0.1025390625, |
| "learning_rate": 9.646017699115044e-05, |
| "loss": 0.8459, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.5221238938053099, |
| "grad_norm": 0.07275390625, |
| "learning_rate": 9.557522123893806e-05, |
| "loss": 0.8842, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.5265486725663717, |
| "grad_norm": 0.083984375, |
| "learning_rate": 9.469026548672566e-05, |
| "loss": 1.0654, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.5309734513274336, |
| "grad_norm": 0.0615234375, |
| "learning_rate": 9.380530973451328e-05, |
| "loss": 0.8734, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.5353982300884956, |
| "grad_norm": 0.0791015625, |
| "learning_rate": 9.29203539823009e-05, |
| "loss": 0.9752, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.5398230088495575, |
| "grad_norm": 0.0751953125, |
| "learning_rate": 9.20353982300885e-05, |
| "loss": 0.7664, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.5442477876106193, |
| "grad_norm": 0.0888671875, |
| "learning_rate": 9.115044247787611e-05, |
| "loss": 0.8328, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.5486725663716814, |
| "grad_norm": 0.0712890625, |
| "learning_rate": 9.026548672566371e-05, |
| "loss": 0.8581, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.5530973451327434, |
| "grad_norm": 0.0888671875, |
| "learning_rate": 8.938053097345133e-05, |
| "loss": 0.7521, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.5575221238938053, |
| "grad_norm": 0.0810546875, |
| "learning_rate": 8.849557522123895e-05, |
| "loss": 1.1778, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.5619469026548671, |
| "grad_norm": 0.08447265625, |
| "learning_rate": 8.761061946902655e-05, |
| "loss": 0.8007, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.5663716814159292, |
| "grad_norm": 0.08544921875, |
| "learning_rate": 8.672566371681417e-05, |
| "loss": 1.1795, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.5707964601769913, |
| "grad_norm": 0.08642578125, |
| "learning_rate": 8.584070796460177e-05, |
| "loss": 0.9632, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.575221238938053, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 8.495575221238938e-05, |
| "loss": 0.7671, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.579646017699115, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 8.4070796460177e-05, |
| "loss": 0.692, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.584070796460177, |
| "grad_norm": 0.10791015625, |
| "learning_rate": 8.31858407079646e-05, |
| "loss": 0.6548, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.588495575221239, |
| "grad_norm": 0.080078125, |
| "learning_rate": 8.230088495575221e-05, |
| "loss": 0.805, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.592920353982301, |
| "grad_norm": 0.06005859375, |
| "learning_rate": 8.141592920353983e-05, |
| "loss": 0.7988, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.5973451327433628, |
| "grad_norm": 0.07861328125, |
| "learning_rate": 8.053097345132744e-05, |
| "loss": 0.9695, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.6017699115044248, |
| "grad_norm": 0.07421875, |
| "learning_rate": 7.964601769911504e-05, |
| "loss": 1.0397, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.606194690265487, |
| "grad_norm": 0.0830078125, |
| "learning_rate": 7.876106194690266e-05, |
| "loss": 0.9098, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.6106194690265485, |
| "grad_norm": 0.07861328125, |
| "learning_rate": 7.787610619469027e-05, |
| "loss": 0.9249, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.6150442477876106, |
| "grad_norm": 0.0615234375, |
| "learning_rate": 7.699115044247787e-05, |
| "loss": 0.7443, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.6194690265486726, |
| "grad_norm": 0.08935546875, |
| "learning_rate": 7.610619469026549e-05, |
| "loss": 0.8042, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.6238938053097345, |
| "grad_norm": 0.0810546875, |
| "learning_rate": 7.522123893805309e-05, |
| "loss": 0.8271, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.6283185840707963, |
| "grad_norm": 0.06884765625, |
| "learning_rate": 7.433628318584072e-05, |
| "loss": 0.9711, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.6327433628318584, |
| "grad_norm": 0.06689453125, |
| "learning_rate": 7.345132743362832e-05, |
| "loss": 0.8821, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.6371681415929205, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 7.256637168141593e-05, |
| "loss": 0.7417, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.6415929203539823, |
| "grad_norm": 0.06591796875, |
| "learning_rate": 7.168141592920355e-05, |
| "loss": 0.9247, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.6460176991150441, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 7.079646017699115e-05, |
| "loss": 0.9101, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.6504424778761062, |
| "grad_norm": 0.091796875, |
| "learning_rate": 6.991150442477876e-05, |
| "loss": 1.0123, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.6548672566371683, |
| "grad_norm": 0.103515625, |
| "learning_rate": 6.902654867256638e-05, |
| "loss": 0.7791, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.6592920353982301, |
| "grad_norm": 0.07275390625, |
| "learning_rate": 6.814159292035398e-05, |
| "loss": 1.0589, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.663716814159292, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 6.725663716814161e-05, |
| "loss": 0.8401, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.668141592920354, |
| "grad_norm": 0.059814453125, |
| "learning_rate": 6.637168141592921e-05, |
| "loss": 0.8201, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.672566371681416, |
| "grad_norm": 0.0927734375, |
| "learning_rate": 6.548672566371682e-05, |
| "loss": 0.913, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.676991150442478, |
| "grad_norm": 0.060302734375, |
| "learning_rate": 6.460176991150442e-05, |
| "loss": 0.8276, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.6814159292035398, |
| "grad_norm": 0.08349609375, |
| "learning_rate": 6.371681415929204e-05, |
| "loss": 0.7729, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.6858407079646018, |
| "grad_norm": 0.0703125, |
| "learning_rate": 6.283185840707965e-05, |
| "loss": 1.0113, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.6902654867256637, |
| "grad_norm": 0.0634765625, |
| "learning_rate": 6.194690265486725e-05, |
| "loss": 0.8446, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.6946902654867255, |
| "grad_norm": 0.0673828125, |
| "learning_rate": 6.106194690265487e-05, |
| "loss": 0.8878, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.6991150442477876, |
| "grad_norm": 0.1103515625, |
| "learning_rate": 6.017699115044248e-05, |
| "loss": 0.6718, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.7035398230088497, |
| "grad_norm": 0.060302734375, |
| "learning_rate": 5.92920353982301e-05, |
| "loss": 0.8153, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.7079646017699115, |
| "grad_norm": 0.0712890625, |
| "learning_rate": 5.8407079646017705e-05, |
| "loss": 0.9931, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.7123893805309733, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 5.752212389380531e-05, |
| "loss": 0.7466, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.7168141592920354, |
| "grad_norm": 0.09033203125, |
| "learning_rate": 5.663716814159292e-05, |
| "loss": 0.9364, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.7212389380530975, |
| "grad_norm": 0.068359375, |
| "learning_rate": 5.575221238938053e-05, |
| "loss": 0.8851, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.7256637168141593, |
| "grad_norm": 0.061279296875, |
| "learning_rate": 5.486725663716814e-05, |
| "loss": 0.8714, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.7300884955752212, |
| "grad_norm": 0.06982421875, |
| "learning_rate": 5.398230088495575e-05, |
| "loss": 0.8885, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.7345132743362832, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 5.309734513274337e-05, |
| "loss": 0.8724, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.7389380530973453, |
| "grad_norm": 0.08056640625, |
| "learning_rate": 5.221238938053098e-05, |
| "loss": 1.1328, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.7433628318584071, |
| "grad_norm": 0.099609375, |
| "learning_rate": 5.132743362831859e-05, |
| "loss": 0.7735, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.747787610619469, |
| "grad_norm": 0.06982421875, |
| "learning_rate": 5.0442477876106195e-05, |
| "loss": 0.9325, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.752212389380531, |
| "grad_norm": 0.07080078125, |
| "learning_rate": 4.955752212389381e-05, |
| "loss": 0.9273, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.7566371681415929, |
| "grad_norm": 0.10009765625, |
| "learning_rate": 4.867256637168142e-05, |
| "loss": 0.7756, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.7610619469026547, |
| "grad_norm": 0.0908203125, |
| "learning_rate": 4.778761061946903e-05, |
| "loss": 1.0591, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.7654867256637168, |
| "grad_norm": 0.09423828125, |
| "learning_rate": 4.690265486725664e-05, |
| "loss": 0.7867, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.7699115044247788, |
| "grad_norm": 0.0888671875, |
| "learning_rate": 4.601769911504425e-05, |
| "loss": 0.8369, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7743362831858407, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 4.5132743362831855e-05, |
| "loss": 0.9999, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.7787610619469025, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 4.4247787610619477e-05, |
| "loss": 0.8612, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.7831858407079646, |
| "grad_norm": 0.09716796875, |
| "learning_rate": 4.3362831858407084e-05, |
| "loss": 0.8529, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.7876106194690267, |
| "grad_norm": 0.07763671875, |
| "learning_rate": 4.247787610619469e-05, |
| "loss": 0.8809, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.7920353982300885, |
| "grad_norm": 0.07177734375, |
| "learning_rate": 4.15929203539823e-05, |
| "loss": 0.9739, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.7964601769911503, |
| "grad_norm": 0.07568359375, |
| "learning_rate": 4.0707964601769914e-05, |
| "loss": 0.9416, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.8008849557522124, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 3.982300884955752e-05, |
| "loss": 0.8359, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.8053097345132745, |
| "grad_norm": 0.0712890625, |
| "learning_rate": 3.893805309734514e-05, |
| "loss": 0.9323, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.8097345132743363, |
| "grad_norm": 0.0810546875, |
| "learning_rate": 3.8053097345132744e-05, |
| "loss": 0.8084, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.8141592920353982, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 3.716814159292036e-05, |
| "loss": 0.9237, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.8185840707964602, |
| "grad_norm": 0.08447265625, |
| "learning_rate": 3.628318584070797e-05, |
| "loss": 1.0047, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.823008849557522, |
| "grad_norm": 0.0654296875, |
| "learning_rate": 3.5398230088495574e-05, |
| "loss": 0.9763, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.827433628318584, |
| "grad_norm": 0.06201171875, |
| "learning_rate": 3.451327433628319e-05, |
| "loss": 0.7498, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.831858407079646, |
| "grad_norm": 0.087890625, |
| "learning_rate": 3.3628318584070804e-05, |
| "loss": 0.8973, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.836283185840708, |
| "grad_norm": 0.0966796875, |
| "learning_rate": 3.274336283185841e-05, |
| "loss": 0.9526, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.8407079646017699, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 3.185840707964602e-05, |
| "loss": 0.9184, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.8451327433628317, |
| "grad_norm": 0.0673828125, |
| "learning_rate": 3.097345132743363e-05, |
| "loss": 0.9124, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.8495575221238938, |
| "grad_norm": 0.056884765625, |
| "learning_rate": 3.008849557522124e-05, |
| "loss": 0.8303, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.8539823008849559, |
| "grad_norm": 0.0703125, |
| "learning_rate": 2.9203539823008852e-05, |
| "loss": 0.9533, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.8584070796460177, |
| "grad_norm": 0.064453125, |
| "learning_rate": 2.831858407079646e-05, |
| "loss": 0.8822, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.8628318584070795, |
| "grad_norm": 0.072265625, |
| "learning_rate": 2.743362831858407e-05, |
| "loss": 0.911, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.8672566371681416, |
| "grad_norm": 0.060546875, |
| "learning_rate": 2.6548672566371686e-05, |
| "loss": 0.8209, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.8716814159292037, |
| "grad_norm": 0.072265625, |
| "learning_rate": 2.5663716814159294e-05, |
| "loss": 0.8294, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.8761061946902655, |
| "grad_norm": 0.061279296875, |
| "learning_rate": 2.4778761061946905e-05, |
| "loss": 0.7602, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.8805309734513274, |
| "grad_norm": 0.0810546875, |
| "learning_rate": 2.3893805309734516e-05, |
| "loss": 0.8862, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.8849557522123894, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 2.3008849557522124e-05, |
| "loss": 0.8715, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.8893805309734515, |
| "grad_norm": 0.06982421875, |
| "learning_rate": 2.2123893805309738e-05, |
| "loss": 0.9235, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.893805309734513, |
| "grad_norm": 0.07958984375, |
| "learning_rate": 2.1238938053097346e-05, |
| "loss": 0.8975, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.8982300884955752, |
| "grad_norm": 0.08935546875, |
| "learning_rate": 2.0353982300884957e-05, |
| "loss": 1.0014, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.9026548672566372, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 1.946902654867257e-05, |
| "loss": 0.8397, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.907079646017699, |
| "grad_norm": 0.0859375, |
| "learning_rate": 1.858407079646018e-05, |
| "loss": 1.0832, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.911504424778761, |
| "grad_norm": 0.09375, |
| "learning_rate": 1.7699115044247787e-05, |
| "loss": 0.7726, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.915929203539823, |
| "grad_norm": 0.06884765625, |
| "learning_rate": 1.6814159292035402e-05, |
| "loss": 0.936, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.920353982300885, |
| "grad_norm": 0.062255859375, |
| "learning_rate": 1.592920353982301e-05, |
| "loss": 1.0048, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.924778761061947, |
| "grad_norm": 0.08349609375, |
| "learning_rate": 1.504424778761062e-05, |
| "loss": 0.864, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.9292035398230087, |
| "grad_norm": 0.0869140625, |
| "learning_rate": 1.415929203539823e-05, |
| "loss": 0.9952, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.9336283185840708, |
| "grad_norm": 0.06982421875, |
| "learning_rate": 1.3274336283185843e-05, |
| "loss": 0.8628, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.9380530973451329, |
| "grad_norm": 0.060546875, |
| "learning_rate": 1.2389380530973452e-05, |
| "loss": 0.8487, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.9424778761061947, |
| "grad_norm": 0.0634765625, |
| "learning_rate": 1.1504424778761062e-05, |
| "loss": 0.8495, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.9469026548672566, |
| "grad_norm": 0.06689453125, |
| "learning_rate": 1.0619469026548673e-05, |
| "loss": 0.8815, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.9513274336283186, |
| "grad_norm": 0.0634765625, |
| "learning_rate": 9.734513274336284e-06, |
| "loss": 0.8667, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.9557522123893807, |
| "grad_norm": 0.0869140625, |
| "learning_rate": 8.849557522123894e-06, |
| "loss": 0.7515, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.9601769911504425, |
| "grad_norm": 0.07275390625, |
| "learning_rate": 7.964601769911505e-06, |
| "loss": 0.8048, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.9646017699115044, |
| "grad_norm": 0.0625, |
| "learning_rate": 7.079646017699115e-06, |
| "loss": 0.9373, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.9690265486725664, |
| "grad_norm": 0.0859375, |
| "learning_rate": 6.194690265486726e-06, |
| "loss": 0.7985, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.9734513274336283, |
| "grad_norm": 0.1083984375, |
| "learning_rate": 5.3097345132743365e-06, |
| "loss": 0.9149, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.9778761061946901, |
| "grad_norm": 0.05615234375, |
| "learning_rate": 4.424778761061947e-06, |
| "loss": 0.8296, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.9823008849557522, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 3.5398230088495575e-06, |
| "loss": 0.8539, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.9867256637168142, |
| "grad_norm": 0.0625, |
| "learning_rate": 2.6548672566371683e-06, |
| "loss": 0.8847, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.991150442477876, |
| "grad_norm": 0.09033203125, |
| "learning_rate": 1.7699115044247788e-06, |
| "loss": 0.8814, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.995575221238938, |
| "grad_norm": 0.0625, |
| "learning_rate": 8.849557522123894e-07, |
| "loss": 0.8299, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 0.0, |
| "loss": 0.8232, |
| "step": 452 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 452, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4086515032577802e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|