| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 0, |
| "global_step": 339, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0029498525073746312, |
| "grad_norm": 0.478515625, |
| "learning_rate": 1e-05, |
| "loss": 2.2188, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0058997050147492625, |
| "grad_norm": 0.44921875, |
| "learning_rate": 9.970501474926254e-06, |
| "loss": 2.1776, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.008849557522123894, |
| "grad_norm": 0.4765625, |
| "learning_rate": 9.941002949852509e-06, |
| "loss": 2.2057, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.011799410029498525, |
| "grad_norm": 0.447265625, |
| "learning_rate": 9.911504424778762e-06, |
| "loss": 2.2486, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.014749262536873156, |
| "grad_norm": 0.453125, |
| "learning_rate": 9.882005899705015e-06, |
| "loss": 2.2377, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.017699115044247787, |
| "grad_norm": 0.498046875, |
| "learning_rate": 9.85250737463127e-06, |
| "loss": 2.3902, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02064896755162242, |
| "grad_norm": 0.447265625, |
| "learning_rate": 9.823008849557523e-06, |
| "loss": 2.1862, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02359882005899705, |
| "grad_norm": 0.40234375, |
| "learning_rate": 9.793510324483776e-06, |
| "loss": 2.1879, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02654867256637168, |
| "grad_norm": 0.408203125, |
| "learning_rate": 9.764011799410031e-06, |
| "loss": 2.1982, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.029498525073746312, |
| "grad_norm": 0.400390625, |
| "learning_rate": 9.734513274336284e-06, |
| "loss": 2.2228, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.032448377581120944, |
| "grad_norm": 0.337890625, |
| "learning_rate": 9.705014749262537e-06, |
| "loss": 2.0235, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.035398230088495575, |
| "grad_norm": 0.396484375, |
| "learning_rate": 9.67551622418879e-06, |
| "loss": 2.1517, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.038348082595870206, |
| "grad_norm": 0.33984375, |
| "learning_rate": 9.646017699115045e-06, |
| "loss": 2.015, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04129793510324484, |
| "grad_norm": 0.36328125, |
| "learning_rate": 9.616519174041298e-06, |
| "loss": 2.0683, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04424778761061947, |
| "grad_norm": 0.36328125, |
| "learning_rate": 9.587020648967552e-06, |
| "loss": 2.0281, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0471976401179941, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.557522123893806e-06, |
| "loss": 2.0722, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05014749262536873, |
| "grad_norm": 0.39453125, |
| "learning_rate": 9.52802359882006e-06, |
| "loss": 2.0929, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05309734513274336, |
| "grad_norm": 0.333984375, |
| "learning_rate": 9.498525073746313e-06, |
| "loss": 1.9962, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05604719764011799, |
| "grad_norm": 0.328125, |
| "learning_rate": 9.469026548672568e-06, |
| "loss": 1.9953, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.058997050147492625, |
| "grad_norm": 0.33984375, |
| "learning_rate": 9.43952802359882e-06, |
| "loss": 2.0052, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.061946902654867256, |
| "grad_norm": 0.34765625, |
| "learning_rate": 9.410029498525074e-06, |
| "loss": 1.9943, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06489675516224189, |
| "grad_norm": 0.29296875, |
| "learning_rate": 9.380530973451329e-06, |
| "loss": 1.8896, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06784660766961652, |
| "grad_norm": 0.296875, |
| "learning_rate": 9.351032448377582e-06, |
| "loss": 1.9064, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.07079646017699115, |
| "grad_norm": 0.2890625, |
| "learning_rate": 9.321533923303837e-06, |
| "loss": 1.8625, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07374631268436578, |
| "grad_norm": 0.287109375, |
| "learning_rate": 9.29203539823009e-06, |
| "loss": 1.8633, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07669616519174041, |
| "grad_norm": 0.318359375, |
| "learning_rate": 9.262536873156343e-06, |
| "loss": 1.8919, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07964601769911504, |
| "grad_norm": 0.30078125, |
| "learning_rate": 9.233038348082598e-06, |
| "loss": 1.8936, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.08259587020648967, |
| "grad_norm": 0.29296875, |
| "learning_rate": 9.203539823008851e-06, |
| "loss": 1.869, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0855457227138643, |
| "grad_norm": 0.275390625, |
| "learning_rate": 9.174041297935104e-06, |
| "loss": 1.8716, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.08849557522123894, |
| "grad_norm": 0.263671875, |
| "learning_rate": 9.144542772861357e-06, |
| "loss": 1.8047, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09144542772861357, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 9.11504424778761e-06, |
| "loss": 1.7172, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0943952802359882, |
| "grad_norm": 0.2734375, |
| "learning_rate": 9.085545722713865e-06, |
| "loss": 1.7374, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.09734513274336283, |
| "grad_norm": 0.234375, |
| "learning_rate": 9.056047197640118e-06, |
| "loss": 1.7511, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.10029498525073746, |
| "grad_norm": 0.2578125, |
| "learning_rate": 9.026548672566371e-06, |
| "loss": 1.7406, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.10324483775811209, |
| "grad_norm": 0.265625, |
| "learning_rate": 8.997050147492626e-06, |
| "loss": 1.7516, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10619469026548672, |
| "grad_norm": 0.234375, |
| "learning_rate": 8.96755162241888e-06, |
| "loss": 1.7358, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.10914454277286136, |
| "grad_norm": 0.24609375, |
| "learning_rate": 8.938053097345133e-06, |
| "loss": 1.748, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.11209439528023599, |
| "grad_norm": 0.5078125, |
| "learning_rate": 8.908554572271387e-06, |
| "loss": 1.7539, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.11504424778761062, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 8.87905604719764e-06, |
| "loss": 1.7187, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.11799410029498525, |
| "grad_norm": 0.2265625, |
| "learning_rate": 8.849557522123895e-06, |
| "loss": 1.7174, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12094395280235988, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 8.820058997050148e-06, |
| "loss": 1.6919, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.12389380530973451, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 8.790560471976402e-06, |
| "loss": 1.6938, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.12684365781710916, |
| "grad_norm": 0.232421875, |
| "learning_rate": 8.761061946902656e-06, |
| "loss": 1.7143, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.12979351032448377, |
| "grad_norm": 0.228515625, |
| "learning_rate": 8.73156342182891e-06, |
| "loss": 1.699, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.13274336283185842, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 8.702064896755163e-06, |
| "loss": 1.6395, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.13569321533923304, |
| "grad_norm": 0.2265625, |
| "learning_rate": 8.672566371681418e-06, |
| "loss": 1.6338, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.13864306784660768, |
| "grad_norm": 0.216796875, |
| "learning_rate": 8.64306784660767e-06, |
| "loss": 1.6455, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1415929203539823, |
| "grad_norm": 0.193359375, |
| "learning_rate": 8.613569321533924e-06, |
| "loss": 1.629, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.14454277286135694, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 8.584070796460177e-06, |
| "loss": 1.5925, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.14749262536873156, |
| "grad_norm": 0.232421875, |
| "learning_rate": 8.554572271386432e-06, |
| "loss": 1.6362, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1504424778761062, |
| "grad_norm": 0.20703125, |
| "learning_rate": 8.525073746312685e-06, |
| "loss": 1.6373, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.15339233038348082, |
| "grad_norm": 0.1796875, |
| "learning_rate": 8.495575221238938e-06, |
| "loss": 1.5635, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.15634218289085547, |
| "grad_norm": 0.21875, |
| "learning_rate": 8.466076696165191e-06, |
| "loss": 1.6385, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1592920353982301, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 8.436578171091446e-06, |
| "loss": 1.6139, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.16224188790560473, |
| "grad_norm": 0.201171875, |
| "learning_rate": 8.4070796460177e-06, |
| "loss": 1.5861, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.16519174041297935, |
| "grad_norm": 0.234375, |
| "learning_rate": 8.377581120943954e-06, |
| "loss": 1.6177, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.168141592920354, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 8.348082595870207e-06, |
| "loss": 1.581, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1710914454277286, |
| "grad_norm": 0.197265625, |
| "learning_rate": 8.31858407079646e-06, |
| "loss": 1.5891, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.17404129793510326, |
| "grad_norm": 0.232421875, |
| "learning_rate": 8.289085545722715e-06, |
| "loss": 1.5919, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.17699115044247787, |
| "grad_norm": 0.224609375, |
| "learning_rate": 8.259587020648968e-06, |
| "loss": 1.5822, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17994100294985252, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 8.230088495575221e-06, |
| "loss": 1.5672, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.18289085545722714, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 8.200589970501476e-06, |
| "loss": 1.5443, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.18584070796460178, |
| "grad_norm": 0.185546875, |
| "learning_rate": 8.17109144542773e-06, |
| "loss": 1.5654, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1887905604719764, |
| "grad_norm": 0.185546875, |
| "learning_rate": 8.141592920353984e-06, |
| "loss": 1.5489, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.19174041297935104, |
| "grad_norm": 0.236328125, |
| "learning_rate": 8.112094395280237e-06, |
| "loss": 1.5444, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.19469026548672566, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 8.08259587020649e-06, |
| "loss": 1.5211, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1976401179941003, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 8.053097345132744e-06, |
| "loss": 1.5071, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.20058997050147492, |
| "grad_norm": 0.19140625, |
| "learning_rate": 8.023598820058997e-06, |
| "loss": 1.5312, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.20353982300884957, |
| "grad_norm": 0.205078125, |
| "learning_rate": 7.994100294985252e-06, |
| "loss": 1.5499, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.20648967551622419, |
| "grad_norm": 0.220703125, |
| "learning_rate": 7.964601769911505e-06, |
| "loss": 1.5343, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20943952802359883, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 7.935103244837758e-06, |
| "loss": 1.5152, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.21238938053097345, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 7.905604719764013e-06, |
| "loss": 1.5295, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.2153392330383481, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 7.876106194690266e-06, |
| "loss": 1.5438, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2182890855457227, |
| "grad_norm": 0.18359375, |
| "learning_rate": 7.846607669616519e-06, |
| "loss": 1.5377, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.22123893805309736, |
| "grad_norm": 0.169921875, |
| "learning_rate": 7.817109144542774e-06, |
| "loss": 1.4838, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.22418879056047197, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 7.787610619469027e-06, |
| "loss": 1.5378, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.22713864306784662, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 7.75811209439528e-06, |
| "loss": 1.4742, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.23008849557522124, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 7.728613569321535e-06, |
| "loss": 1.4953, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.23303834808259588, |
| "grad_norm": 0.18359375, |
| "learning_rate": 7.699115044247788e-06, |
| "loss": 1.4887, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2359882005899705, |
| "grad_norm": 0.1875, |
| "learning_rate": 7.669616519174043e-06, |
| "loss": 1.4973, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23893805309734514, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 7.640117994100296e-06, |
| "loss": 1.4472, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.24188790560471976, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 7.610619469026549e-06, |
| "loss": 1.496, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.2448377581120944, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 7.581120943952803e-06, |
| "loss": 1.4909, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.24778761061946902, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 7.551622418879056e-06, |
| "loss": 1.4299, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.25073746312684364, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 7.5221238938053095e-06, |
| "loss": 1.443, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2536873156342183, |
| "grad_norm": 0.19140625, |
| "learning_rate": 7.492625368731564e-06, |
| "loss": 1.4519, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.25663716814159293, |
| "grad_norm": 0.185546875, |
| "learning_rate": 7.4631268436578175e-06, |
| "loss": 1.4633, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.25958702064896755, |
| "grad_norm": 0.16796875, |
| "learning_rate": 7.4336283185840714e-06, |
| "loss": 1.4548, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.26253687315634217, |
| "grad_norm": 0.203125, |
| "learning_rate": 7.4041297935103254e-06, |
| "loss": 1.475, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.26548672566371684, |
| "grad_norm": 0.17578125, |
| "learning_rate": 7.374631268436579e-06, |
| "loss": 1.4179, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.26843657817109146, |
| "grad_norm": 0.220703125, |
| "learning_rate": 7.3451327433628326e-06, |
| "loss": 1.4634, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2713864306784661, |
| "grad_norm": 0.162109375, |
| "learning_rate": 7.315634218289086e-06, |
| "loss": 1.4339, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2743362831858407, |
| "grad_norm": 0.2109375, |
| "learning_rate": 7.28613569321534e-06, |
| "loss": 1.4648, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.27728613569321536, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 7.256637168141594e-06, |
| "loss": 1.4224, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.28023598820059, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 7.227138643067847e-06, |
| "loss": 1.4728, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2831858407079646, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 7.197640117994102e-06, |
| "loss": 1.4738, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2861356932153392, |
| "grad_norm": 0.169921875, |
| "learning_rate": 7.168141592920355e-06, |
| "loss": 1.444, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2890855457227139, |
| "grad_norm": 0.212890625, |
| "learning_rate": 7.138643067846608e-06, |
| "loss": 1.4273, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2920353982300885, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 7.109144542772862e-06, |
| "loss": 1.4132, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2949852507374631, |
| "grad_norm": 0.17578125, |
| "learning_rate": 7.079646017699116e-06, |
| "loss": 1.4342, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.29793510324483774, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 7.050147492625369e-06, |
| "loss": 1.4515, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.3008849557522124, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 7.020648967551623e-06, |
| "loss": 1.4043, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.30383480825958703, |
| "grad_norm": 0.19140625, |
| "learning_rate": 6.991150442477876e-06, |
| "loss": 1.4069, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.30678466076696165, |
| "grad_norm": 0.189453125, |
| "learning_rate": 6.961651917404131e-06, |
| "loss": 1.418, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.30973451327433627, |
| "grad_norm": 0.1640625, |
| "learning_rate": 6.932153392330384e-06, |
| "loss": 1.4085, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.31268436578171094, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 6.902654867256637e-06, |
| "loss": 1.4305, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.31563421828908556, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 6.873156342182892e-06, |
| "loss": 1.3918, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3185840707964602, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 6.843657817109145e-06, |
| "loss": 1.4161, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3215339233038348, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 6.814159292035398e-06, |
| "loss": 1.4034, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.32448377581120946, |
| "grad_norm": 0.236328125, |
| "learning_rate": 6.784660766961652e-06, |
| "loss": 1.3563, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3274336283185841, |
| "grad_norm": 0.20703125, |
| "learning_rate": 6.7551622418879055e-06, |
| "loss": 1.3911, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3303834808259587, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 6.72566371681416e-06, |
| "loss": 1.4097, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 6.6961651917404135e-06, |
| "loss": 1.4039, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.336283185840708, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.3954, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3392330383480826, |
| "grad_norm": 0.193359375, |
| "learning_rate": 6.6371681415929215e-06, |
| "loss": 1.4159, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3421828908554572, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 6.607669616519175e-06, |
| "loss": 1.399, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.34513274336283184, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 6.578171091445428e-06, |
| "loss": 1.3996, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3480825958702065, |
| "grad_norm": 0.18359375, |
| "learning_rate": 6.548672566371682e-06, |
| "loss": 1.3744, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.35103244837758113, |
| "grad_norm": 0.173828125, |
| "learning_rate": 6.519174041297936e-06, |
| "loss": 1.368, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.35398230088495575, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 6.48967551622419e-06, |
| "loss": 1.365, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.35693215339233036, |
| "grad_norm": 0.185546875, |
| "learning_rate": 6.460176991150443e-06, |
| "loss": 1.3637, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.35988200589970504, |
| "grad_norm": 0.2421875, |
| "learning_rate": 6.430678466076696e-06, |
| "loss": 1.4489, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.36283185840707965, |
| "grad_norm": 0.193359375, |
| "learning_rate": 6.401179941002951e-06, |
| "loss": 1.411, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.36578171091445427, |
| "grad_norm": 0.234375, |
| "learning_rate": 6.371681415929204e-06, |
| "loss": 1.3798, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3687315634218289, |
| "grad_norm": 0.25390625, |
| "learning_rate": 6.342182890855457e-06, |
| "loss": 1.3825, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.37168141592920356, |
| "grad_norm": 0.177734375, |
| "learning_rate": 6.312684365781712e-06, |
| "loss": 1.3849, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.3746312684365782, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 6.283185840707965e-06, |
| "loss": 1.3233, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3775811209439528, |
| "grad_norm": 0.171875, |
| "learning_rate": 6.253687315634219e-06, |
| "loss": 1.3421, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.3805309734513274, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 6.224188790560472e-06, |
| "loss": 1.3716, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.3834808259587021, |
| "grad_norm": 0.234375, |
| "learning_rate": 6.194690265486726e-06, |
| "loss": 1.3768, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3864306784660767, |
| "grad_norm": 0.203125, |
| "learning_rate": 6.16519174041298e-06, |
| "loss": 1.4111, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.3893805309734513, |
| "grad_norm": 0.1796875, |
| "learning_rate": 6.135693215339233e-06, |
| "loss": 1.3745, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.39233038348082594, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 6.1061946902654865e-06, |
| "loss": 1.3745, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3952802359882006, |
| "grad_norm": 0.193359375, |
| "learning_rate": 6.076696165191741e-06, |
| "loss": 1.3605, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.39823008849557523, |
| "grad_norm": 0.19921875, |
| "learning_rate": 6.0471976401179945e-06, |
| "loss": 1.4043, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.40117994100294985, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 6.0176991150442484e-06, |
| "loss": 1.3669, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.40412979351032446, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 5.9882005899705024e-06, |
| "loss": 1.3748, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.40707964601769914, |
| "grad_norm": 0.185546875, |
| "learning_rate": 5.9587020648967556e-06, |
| "loss": 1.3631, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.41002949852507375, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 5.9292035398230096e-06, |
| "loss": 1.3478, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.41297935103244837, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 5.899705014749263e-06, |
| "loss": 1.4095, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.415929203539823, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 5.870206489675516e-06, |
| "loss": 1.3436, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.41887905604719766, |
| "grad_norm": 0.171875, |
| "learning_rate": 5.840707964601771e-06, |
| "loss": 1.3767, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4218289085545723, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 5.811209439528024e-06, |
| "loss": 1.3752, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.4247787610619469, |
| "grad_norm": 0.1796875, |
| "learning_rate": 5.781710914454279e-06, |
| "loss": 1.3371, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4277286135693215, |
| "grad_norm": 0.220703125, |
| "learning_rate": 5.752212389380532e-06, |
| "loss": 1.4065, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4306784660766962, |
| "grad_norm": 0.171875, |
| "learning_rate": 5.722713864306785e-06, |
| "loss": 1.3321, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.4336283185840708, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 5.693215339233039e-06, |
| "loss": 1.3785, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4365781710914454, |
| "grad_norm": 0.185546875, |
| "learning_rate": 5.663716814159292e-06, |
| "loss": 1.3556, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.43952802359882004, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 5.634218289085546e-06, |
| "loss": 1.3477, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.4424778761061947, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 5.6047197640118e-06, |
| "loss": 1.3571, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.44542772861356933, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 5.575221238938053e-06, |
| "loss": 1.3285, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.44837758112094395, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 5.545722713864308e-06, |
| "loss": 1.3473, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.45132743362831856, |
| "grad_norm": 0.216796875, |
| "learning_rate": 5.516224188790561e-06, |
| "loss": 1.3607, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.45427728613569324, |
| "grad_norm": 0.251953125, |
| "learning_rate": 5.486725663716814e-06, |
| "loss": 1.3727, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.45722713864306785, |
| "grad_norm": 0.2265625, |
| "learning_rate": 5.457227138643068e-06, |
| "loss": 1.3534, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.46017699115044247, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 5.427728613569322e-06, |
| "loss": 1.3656, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4631268436578171, |
| "grad_norm": 0.2109375, |
| "learning_rate": 5.398230088495575e-06, |
| "loss": 1.3822, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.46607669616519176, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 5.368731563421829e-06, |
| "loss": 1.3285, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4690265486725664, |
| "grad_norm": 0.201171875, |
| "learning_rate": 5.3392330383480825e-06, |
| "loss": 1.3563, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.471976401179941, |
| "grad_norm": 0.265625, |
| "learning_rate": 5.309734513274337e-06, |
| "loss": 1.3323, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4749262536873156, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 5.2802359882005905e-06, |
| "loss": 1.3342, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.4778761061946903, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 5.250737463126844e-06, |
| "loss": 1.3252, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.4808259587020649, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 5.2212389380530985e-06, |
| "loss": 1.3639, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.4837758112094395, |
| "grad_norm": 0.185546875, |
| "learning_rate": 5.191740412979352e-06, |
| "loss": 1.3415, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.48672566371681414, |
| "grad_norm": 0.18359375, |
| "learning_rate": 5.162241887905605e-06, |
| "loss": 1.3212, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4896755162241888, |
| "grad_norm": 0.203125, |
| "learning_rate": 5.132743362831859e-06, |
| "loss": 1.3474, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.49262536873156343, |
| "grad_norm": 0.208984375, |
| "learning_rate": 5.103244837758113e-06, |
| "loss": 1.361, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.49557522123893805, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.073746312684367e-06, |
| "loss": 1.3456, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.49852507374631266, |
| "grad_norm": 0.185546875, |
| "learning_rate": 5.04424778761062e-06, |
| "loss": 1.3438, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5014749262536873, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 5.014749262536873e-06, |
| "loss": 1.3618, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.504424778761062, |
| "grad_norm": 0.208984375, |
| "learning_rate": 4.985250737463127e-06, |
| "loss": 1.2994, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5073746312684366, |
| "grad_norm": 0.212890625, |
| "learning_rate": 4.955752212389381e-06, |
| "loss": 1.3388, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5103244837758112, |
| "grad_norm": 0.18359375, |
| "learning_rate": 4.926253687315635e-06, |
| "loss": 1.3331, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5132743362831859, |
| "grad_norm": 0.19140625, |
| "learning_rate": 4.896755162241888e-06, |
| "loss": 1.3574, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5162241887905604, |
| "grad_norm": 0.1796875, |
| "learning_rate": 4.867256637168142e-06, |
| "loss": 1.3254, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5191740412979351, |
| "grad_norm": 0.203125, |
| "learning_rate": 4.837758112094395e-06, |
| "loss": 1.3377, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5221238938053098, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 4.808259587020649e-06, |
| "loss": 1.3351, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5250737463126843, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 4.778761061946903e-06, |
| "loss": 1.308, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.528023598820059, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 4.749262536873156e-06, |
| "loss": 1.3153, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5309734513274337, |
| "grad_norm": 0.189453125, |
| "learning_rate": 4.71976401179941e-06, |
| "loss": 1.3464, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5339233038348082, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 4.690265486725664e-06, |
| "loss": 1.3148, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5368731563421829, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 4.660766961651918e-06, |
| "loss": 1.3276, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5398230088495575, |
| "grad_norm": 0.1953125, |
| "learning_rate": 4.6312684365781714e-06, |
| "loss": 1.3576, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5427728613569321, |
| "grad_norm": 0.181640625, |
| "learning_rate": 4.6017699115044254e-06, |
| "loss": 1.3077, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5457227138643068, |
| "grad_norm": 0.18359375, |
| "learning_rate": 4.5722713864306786e-06, |
| "loss": 1.3316, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5486725663716814, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 4.5427728613569326e-06, |
| "loss": 1.3587, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.551622418879056, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 4.513274336283186e-06, |
| "loss": 1.3164, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5545722713864307, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 4.48377581120944e-06, |
| "loss": 1.3251, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5575221238938053, |
| "grad_norm": 0.189453125, |
| "learning_rate": 4.454277286135694e-06, |
| "loss": 1.2977, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.56047197640118, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 4.424778761061948e-06, |
| "loss": 1.3375, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5634218289085545, |
| "grad_norm": 0.19921875, |
| "learning_rate": 4.395280235988201e-06, |
| "loss": 1.3363, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5663716814159292, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 4.365781710914455e-06, |
| "loss": 1.3525, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5693215339233039, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 4.336283185840709e-06, |
| "loss": 1.3077, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5722713864306784, |
| "grad_norm": 0.224609375, |
| "learning_rate": 4.306784660766962e-06, |
| "loss": 1.3048, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5752212389380531, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 4.277286135693216e-06, |
| "loss": 1.2968, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5781710914454278, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 4.247787610619469e-06, |
| "loss": 1.3044, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5811209439528023, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 4.218289085545723e-06, |
| "loss": 1.3061, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.584070796460177, |
| "grad_norm": 0.22265625, |
| "learning_rate": 4.188790560471977e-06, |
| "loss": 1.3133, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5870206489675516, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 4.15929203539823e-06, |
| "loss": 1.314, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5899705014749262, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 4.129793510324484e-06, |
| "loss": 1.3169, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5929203539823009, |
| "grad_norm": 0.177734375, |
| "learning_rate": 4.100294985250738e-06, |
| "loss": 1.3193, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5958702064896755, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 4.070796460176992e-06, |
| "loss": 1.3084, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5988200589970502, |
| "grad_norm": 0.197265625, |
| "learning_rate": 4.041297935103245e-06, |
| "loss": 1.2994, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6017699115044248, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 4.011799410029498e-06, |
| "loss": 1.3244, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6047197640117994, |
| "grad_norm": 0.19921875, |
| "learning_rate": 3.982300884955752e-06, |
| "loss": 1.3368, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6076696165191741, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 3.952802359882006e-06, |
| "loss": 1.2936, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6106194690265486, |
| "grad_norm": 0.20703125, |
| "learning_rate": 3.9233038348082595e-06, |
| "loss": 1.2995, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.6135693215339233, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 3.8938053097345135e-06, |
| "loss": 1.3311, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.616519174041298, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 3.8643067846607675e-06, |
| "loss": 1.3202, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6194690265486725, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 3.8348082595870215e-06, |
| "loss": 1.3003, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6224188790560472, |
| "grad_norm": 0.193359375, |
| "learning_rate": 3.8053097345132746e-06, |
| "loss": 1.3034, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6253687315634219, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 3.775811209439528e-06, |
| "loss": 1.3281, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6283185840707964, |
| "grad_norm": 0.197265625, |
| "learning_rate": 3.746312684365782e-06, |
| "loss": 1.3316, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6312684365781711, |
| "grad_norm": 0.205078125, |
| "learning_rate": 3.7168141592920357e-06, |
| "loss": 1.2954, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6342182890855457, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 3.6873156342182893e-06, |
| "loss": 1.3105, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6371681415929203, |
| "grad_norm": 0.22265625, |
| "learning_rate": 3.657817109144543e-06, |
| "loss": 1.3402, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.640117994100295, |
| "grad_norm": 0.21875, |
| "learning_rate": 3.628318584070797e-06, |
| "loss": 1.3048, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6430678466076696, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.598820058997051e-06, |
| "loss": 1.3021, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6460176991150443, |
| "grad_norm": 0.193359375, |
| "learning_rate": 3.569321533923304e-06, |
| "loss": 1.2856, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.6489675516224189, |
| "grad_norm": 0.240234375, |
| "learning_rate": 3.539823008849558e-06, |
| "loss": 1.3281, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6519174041297935, |
| "grad_norm": 0.212890625, |
| "learning_rate": 3.5103244837758115e-06, |
| "loss": 1.3158, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6548672566371682, |
| "grad_norm": 0.18359375, |
| "learning_rate": 3.4808259587020655e-06, |
| "loss": 1.3437, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6578171091445427, |
| "grad_norm": 0.263671875, |
| "learning_rate": 3.4513274336283186e-06, |
| "loss": 1.3024, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6607669616519174, |
| "grad_norm": 0.21484375, |
| "learning_rate": 3.4218289085545726e-06, |
| "loss": 1.2664, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6637168141592921, |
| "grad_norm": 0.21875, |
| "learning_rate": 3.392330383480826e-06, |
| "loss": 1.3116, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.36283185840708e-06, |
| "loss": 1.2882, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6696165191740413, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.3006, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.672566371681416, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 3.3038348082595873e-06, |
| "loss": 1.2978, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6755162241887905, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 3.274336283185841e-06, |
| "loss": 1.2895, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.6784660766961652, |
| "grad_norm": 0.171875, |
| "learning_rate": 3.244837758112095e-06, |
| "loss": 1.3211, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6814159292035398, |
| "grad_norm": 0.19921875, |
| "learning_rate": 3.215339233038348e-06, |
| "loss": 1.3047, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6843657817109144, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 3.185840707964602e-06, |
| "loss": 1.2873, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6873156342182891, |
| "grad_norm": 0.208984375, |
| "learning_rate": 3.156342182890856e-06, |
| "loss": 1.2933, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.6902654867256637, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 3.1268436578171095e-06, |
| "loss": 1.2708, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6932153392330384, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 3.097345132743363e-06, |
| "loss": 1.3345, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.696165191740413, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.0678466076696167e-06, |
| "loss": 1.3056, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6991150442477876, |
| "grad_norm": 0.17578125, |
| "learning_rate": 3.0383480825958707e-06, |
| "loss": 1.2851, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7020648967551623, |
| "grad_norm": 0.177734375, |
| "learning_rate": 3.0088495575221242e-06, |
| "loss": 1.287, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7050147492625368, |
| "grad_norm": 0.18359375, |
| "learning_rate": 2.9793510324483778e-06, |
| "loss": 1.3014, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.7079646017699115, |
| "grad_norm": 0.197265625, |
| "learning_rate": 2.9498525073746313e-06, |
| "loss": 1.2787, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7109144542772862, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 2.9203539823008853e-06, |
| "loss": 1.2925, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.7138643067846607, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 2.8908554572271393e-06, |
| "loss": 1.2843, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.7168141592920354, |
| "grad_norm": 0.212890625, |
| "learning_rate": 2.8613569321533925e-06, |
| "loss": 1.3316, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.7197640117994101, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.831858407079646e-06, |
| "loss": 1.2912, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.7227138643067846, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 2.8023598820059e-06, |
| "loss": 1.3214, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7256637168141593, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 2.772861356932154e-06, |
| "loss": 1.282, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7286135693215339, |
| "grad_norm": 0.19921875, |
| "learning_rate": 2.743362831858407e-06, |
| "loss": 1.3183, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7315634218289085, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 2.713864306784661e-06, |
| "loss": 1.28, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7345132743362832, |
| "grad_norm": 0.296875, |
| "learning_rate": 2.6843657817109147e-06, |
| "loss": 1.3048, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7374631268436578, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 2.6548672566371687e-06, |
| "loss": 1.2855, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7404129793510325, |
| "grad_norm": 0.185546875, |
| "learning_rate": 2.625368731563422e-06, |
| "loss": 1.2807, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7433628318584071, |
| "grad_norm": 0.19140625, |
| "learning_rate": 2.595870206489676e-06, |
| "loss": 1.2861, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7463126843657817, |
| "grad_norm": 0.1953125, |
| "learning_rate": 2.5663716814159294e-06, |
| "loss": 1.3174, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7492625368731564, |
| "grad_norm": 0.275390625, |
| "learning_rate": 2.5368731563421834e-06, |
| "loss": 1.2691, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7522123893805309, |
| "grad_norm": 0.248046875, |
| "learning_rate": 2.5073746312684365e-06, |
| "loss": 1.2973, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7551622418879056, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 2.4778761061946905e-06, |
| "loss": 1.3183, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7581120943952803, |
| "grad_norm": 0.234375, |
| "learning_rate": 2.448377581120944e-06, |
| "loss": 1.2897, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.7610619469026548, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.4188790560471976e-06, |
| "loss": 1.2821, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7640117994100295, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 2.3893805309734516e-06, |
| "loss": 1.2997, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.7669616519174042, |
| "grad_norm": 0.197265625, |
| "learning_rate": 2.359882005899705e-06, |
| "loss": 1.2973, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7699115044247787, |
| "grad_norm": 0.181640625, |
| "learning_rate": 2.330383480825959e-06, |
| "loss": 1.2923, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.7728613569321534, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 2.3008849557522127e-06, |
| "loss": 1.2672, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.775811209439528, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 2.2713864306784663e-06, |
| "loss": 1.3076, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.7787610619469026, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 2.24188790560472e-06, |
| "loss": 1.2684, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.7817109144542773, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 2.212389380530974e-06, |
| "loss": 1.2789, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7846607669616519, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 2.1828908554572274e-06, |
| "loss": 1.2669, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.7876106194690266, |
| "grad_norm": 0.193359375, |
| "learning_rate": 2.153392330383481e-06, |
| "loss": 1.2848, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.7905604719764012, |
| "grad_norm": 0.185546875, |
| "learning_rate": 2.1238938053097345e-06, |
| "loss": 1.2822, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7935103244837758, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 2.0943952802359885e-06, |
| "loss": 1.2974, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.7964601769911505, |
| "grad_norm": 0.19140625, |
| "learning_rate": 2.064896755162242e-06, |
| "loss": 1.3001, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.799410029498525, |
| "grad_norm": 0.259765625, |
| "learning_rate": 2.035398230088496e-06, |
| "loss": 1.2888, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.8023598820058997, |
| "grad_norm": 0.20703125, |
| "learning_rate": 2.005899705014749e-06, |
| "loss": 1.2936, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8053097345132744, |
| "grad_norm": 0.19140625, |
| "learning_rate": 1.976401179941003e-06, |
| "loss": 1.2981, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.8082595870206489, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 1.9469026548672567e-06, |
| "loss": 1.2572, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.8112094395280236, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 1.9174041297935107e-06, |
| "loss": 1.3272, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8141592920353983, |
| "grad_norm": 0.205078125, |
| "learning_rate": 1.887905604719764e-06, |
| "loss": 1.2519, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8171091445427728, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 1.8584070796460179e-06, |
| "loss": 1.2849, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.8200589970501475, |
| "grad_norm": 0.201171875, |
| "learning_rate": 1.8289085545722714e-06, |
| "loss": 1.3335, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8230088495575221, |
| "grad_norm": 0.1875, |
| "learning_rate": 1.7994100294985254e-06, |
| "loss": 1.2797, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.8259587020648967, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 1.769911504424779e-06, |
| "loss": 1.2837, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8289085545722714, |
| "grad_norm": 0.185546875, |
| "learning_rate": 1.7404129793510328e-06, |
| "loss": 1.2892, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.831858407079646, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 1.7109144542772863e-06, |
| "loss": 1.2632, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8348082595870207, |
| "grad_norm": 0.208984375, |
| "learning_rate": 1.68141592920354e-06, |
| "loss": 1.2891, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.8377581120943953, |
| "grad_norm": 0.2265625, |
| "learning_rate": 1.6519174041297937e-06, |
| "loss": 1.3033, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8407079646017699, |
| "grad_norm": 0.216796875, |
| "learning_rate": 1.6224188790560474e-06, |
| "loss": 1.2748, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8436578171091446, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 1.592920353982301e-06, |
| "loss": 1.3068, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8466076696165191, |
| "grad_norm": 0.185546875, |
| "learning_rate": 1.5634218289085548e-06, |
| "loss": 1.332, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.8495575221238938, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 1.5339233038348083e-06, |
| "loss": 1.2736, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8525073746312685, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 1.5044247787610621e-06, |
| "loss": 1.2714, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.855457227138643, |
| "grad_norm": 0.216796875, |
| "learning_rate": 1.4749262536873157e-06, |
| "loss": 1.2533, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8584070796460177, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 1.4454277286135697e-06, |
| "loss": 1.3096, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.8613569321533924, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 1.415929203539823e-06, |
| "loss": 1.2754, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8643067846607669, |
| "grad_norm": 0.27734375, |
| "learning_rate": 1.386430678466077e-06, |
| "loss": 1.2868, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.8672566371681416, |
| "grad_norm": 0.20703125, |
| "learning_rate": 1.3569321533923306e-06, |
| "loss": 1.2849, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.8702064896755162, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 1.3274336283185843e-06, |
| "loss": 1.2968, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8731563421828908, |
| "grad_norm": 0.189453125, |
| "learning_rate": 1.297935103244838e-06, |
| "loss": 1.2757, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.8761061946902655, |
| "grad_norm": 0.201171875, |
| "learning_rate": 1.2684365781710917e-06, |
| "loss": 1.286, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.8790560471976401, |
| "grad_norm": 0.1796875, |
| "learning_rate": 1.2389380530973452e-06, |
| "loss": 1.302, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.8820058997050148, |
| "grad_norm": 0.197265625, |
| "learning_rate": 1.2094395280235988e-06, |
| "loss": 1.2995, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.8849557522123894, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 1.1799410029498526e-06, |
| "loss": 1.2917, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.887905604719764, |
| "grad_norm": 0.1796875, |
| "learning_rate": 1.1504424778761064e-06, |
| "loss": 1.2757, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.8908554572271387, |
| "grad_norm": 0.205078125, |
| "learning_rate": 1.12094395280236e-06, |
| "loss": 1.2715, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.8938053097345132, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.0914454277286137e-06, |
| "loss": 1.3053, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.8967551622418879, |
| "grad_norm": 0.1796875, |
| "learning_rate": 1.0619469026548673e-06, |
| "loss": 1.2782, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.8997050147492626, |
| "grad_norm": 0.19140625, |
| "learning_rate": 1.032448377581121e-06, |
| "loss": 1.2747, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9026548672566371, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.0029498525073746e-06, |
| "loss": 1.3179, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.9056047197640118, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 9.734513274336284e-07, |
| "loss": 1.2919, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.9085545722713865, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 9.43952802359882e-07, |
| "loss": 1.271, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.911504424778761, |
| "grad_norm": 0.177734375, |
| "learning_rate": 9.144542772861357e-07, |
| "loss": 1.2845, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.9144542772861357, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 8.849557522123895e-07, |
| "loss": 1.3025, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9174041297935103, |
| "grad_norm": 0.203125, |
| "learning_rate": 8.554572271386432e-07, |
| "loss": 1.2876, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.9203539823008849, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 8.259587020648968e-07, |
| "loss": 1.2806, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.9233038348082596, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 7.964601769911505e-07, |
| "loss": 1.2914, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.9262536873156342, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 7.669616519174042e-07, |
| "loss": 1.3053, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.9292035398230089, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 7.374631268436578e-07, |
| "loss": 1.2478, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.9321533923303835, |
| "grad_norm": 0.189453125, |
| "learning_rate": 7.079646017699115e-07, |
| "loss": 1.3214, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.9351032448377581, |
| "grad_norm": 0.193359375, |
| "learning_rate": 6.784660766961653e-07, |
| "loss": 1.2822, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.9380530973451328, |
| "grad_norm": 0.2578125, |
| "learning_rate": 6.48967551622419e-07, |
| "loss": 1.2898, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.9410029498525073, |
| "grad_norm": 0.181640625, |
| "learning_rate": 6.194690265486726e-07, |
| "loss": 1.315, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.943952802359882, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 5.899705014749263e-07, |
| "loss": 1.2938, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9469026548672567, |
| "grad_norm": 0.2109375, |
| "learning_rate": 5.6047197640118e-07, |
| "loss": 1.2782, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.9498525073746312, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 5.309734513274336e-07, |
| "loss": 1.2715, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9528023598820059, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 5.014749262536873e-07, |
| "loss": 1.3221, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.9557522123893806, |
| "grad_norm": 0.201171875, |
| "learning_rate": 4.71976401179941e-07, |
| "loss": 1.2846, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9587020648967551, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 4.4247787610619474e-07, |
| "loss": 1.274, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9616519174041298, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 4.129793510324484e-07, |
| "loss": 1.2616, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.9646017699115044, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 3.834808259587021e-07, |
| "loss": 1.3451, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.967551622418879, |
| "grad_norm": 0.185546875, |
| "learning_rate": 3.5398230088495575e-07, |
| "loss": 1.3024, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9705014749262537, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 3.244837758112095e-07, |
| "loss": 1.2823, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.9734513274336283, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 2.9498525073746315e-07, |
| "loss": 1.2892, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.976401179941003, |
| "grad_norm": 0.181640625, |
| "learning_rate": 2.654867256637168e-07, |
| "loss": 1.3174, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.9793510324483776, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.359882005899705e-07, |
| "loss": 1.266, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.9823008849557522, |
| "grad_norm": 0.1953125, |
| "learning_rate": 2.064896755162242e-07, |
| "loss": 1.3157, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.9852507374631269, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 1.7699115044247788e-07, |
| "loss": 1.2756, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.9882005899705014, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 1.4749262536873157e-07, |
| "loss": 1.3074, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9911504424778761, |
| "grad_norm": 0.19921875, |
| "learning_rate": 1.1799410029498526e-07, |
| "loss": 1.3235, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.9941002949852508, |
| "grad_norm": 0.1953125, |
| "learning_rate": 8.849557522123894e-08, |
| "loss": 1.2919, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.9970501474926253, |
| "grad_norm": 0.19140625, |
| "learning_rate": 5.899705014749263e-08, |
| "loss": 1.307, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 2.9498525073746314e-08, |
| "loss": 1.2935, |
| "step": 339 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 339, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0764679328898744e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|