| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 713, |
| "global_step": 713, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001402524544179523, |
| "grad_norm": 0.89453125, |
| "learning_rate": 1e-05, |
| "loss": 2.048, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.002805049088359046, |
| "grad_norm": 0.765625, |
| "learning_rate": 9.985974754558206e-06, |
| "loss": 2.0929, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.004207573632538569, |
| "grad_norm": 0.84765625, |
| "learning_rate": 9.97194950911641e-06, |
| "loss": 1.8896, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.005610098176718092, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.957924263674615e-06, |
| "loss": 2.0957, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0070126227208976155, |
| "grad_norm": 0.79296875, |
| "learning_rate": 9.94389901823282e-06, |
| "loss": 1.8981, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.008415147265077139, |
| "grad_norm": 0.66015625, |
| "learning_rate": 9.929873772791025e-06, |
| "loss": 2.0162, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.009817671809256662, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.915848527349229e-06, |
| "loss": 1.7155, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.011220196353436185, |
| "grad_norm": 0.6875, |
| "learning_rate": 9.901823281907434e-06, |
| "loss": 2.0333, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.012622720897615708, |
| "grad_norm": 0.462890625, |
| "learning_rate": 9.887798036465639e-06, |
| "loss": 1.6522, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.014025245441795231, |
| "grad_norm": 1.765625, |
| "learning_rate": 9.873772791023844e-06, |
| "loss": 1.8498, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.015427769985974754, |
| "grad_norm": 0.5546875, |
| "learning_rate": 9.859747545582048e-06, |
| "loss": 1.9811, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.016830294530154277, |
| "grad_norm": 0.67578125, |
| "learning_rate": 9.845722300140253e-06, |
| "loss": 1.6495, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0182328190743338, |
| "grad_norm": 0.37890625, |
| "learning_rate": 9.831697054698458e-06, |
| "loss": 1.8677, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.019635343618513323, |
| "grad_norm": 0.4765625, |
| "learning_rate": 9.817671809256662e-06, |
| "loss": 1.998, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.021037868162692847, |
| "grad_norm": 0.380859375, |
| "learning_rate": 9.803646563814867e-06, |
| "loss": 1.8926, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02244039270687237, |
| "grad_norm": 0.36328125, |
| "learning_rate": 9.789621318373072e-06, |
| "loss": 1.7889, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.023842917251051893, |
| "grad_norm": 0.30859375, |
| "learning_rate": 9.775596072931277e-06, |
| "loss": 1.7603, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.025245441795231416, |
| "grad_norm": 0.4609375, |
| "learning_rate": 9.761570827489481e-06, |
| "loss": 2.0804, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.02664796633941094, |
| "grad_norm": 0.265625, |
| "learning_rate": 9.747545582047686e-06, |
| "loss": 1.8, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.028050490883590462, |
| "grad_norm": 0.2578125, |
| "learning_rate": 9.733520336605891e-06, |
| "loss": 1.5292, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.029453015427769985, |
| "grad_norm": 0.2734375, |
| "learning_rate": 9.719495091164096e-06, |
| "loss": 1.5226, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.030855539971949508, |
| "grad_norm": 0.28125, |
| "learning_rate": 9.7054698457223e-06, |
| "loss": 1.8072, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03225806451612903, |
| "grad_norm": 0.34765625, |
| "learning_rate": 9.691444600280505e-06, |
| "loss": 1.8934, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.033660589060308554, |
| "grad_norm": 0.248046875, |
| "learning_rate": 9.67741935483871e-06, |
| "loss": 1.4903, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03506311360448808, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 9.663394109396916e-06, |
| "loss": 1.4165, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0364656381486676, |
| "grad_norm": 0.19921875, |
| "learning_rate": 9.649368863955119e-06, |
| "loss": 1.5423, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.037868162692847124, |
| "grad_norm": 0.287109375, |
| "learning_rate": 9.635343618513324e-06, |
| "loss": 1.6054, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03927068723702665, |
| "grad_norm": 0.328125, |
| "learning_rate": 9.62131837307153e-06, |
| "loss": 1.9075, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.04067321178120617, |
| "grad_norm": 0.298828125, |
| "learning_rate": 9.607293127629735e-06, |
| "loss": 1.7197, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.04207573632538569, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 9.593267882187938e-06, |
| "loss": 1.52, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.043478260869565216, |
| "grad_norm": 0.240234375, |
| "learning_rate": 9.579242636746143e-06, |
| "loss": 1.5502, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.04488078541374474, |
| "grad_norm": 0.275390625, |
| "learning_rate": 9.565217391304349e-06, |
| "loss": 1.7388, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04628330995792426, |
| "grad_norm": 0.25390625, |
| "learning_rate": 9.551192145862554e-06, |
| "loss": 1.6555, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.047685834502103785, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 9.537166900420757e-06, |
| "loss": 1.5854, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.04908835904628331, |
| "grad_norm": 0.19140625, |
| "learning_rate": 9.523141654978963e-06, |
| "loss": 1.6306, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.05049088359046283, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 9.509116409537168e-06, |
| "loss": 1.3931, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.051893408134642355, |
| "grad_norm": 0.3046875, |
| "learning_rate": 9.495091164095373e-06, |
| "loss": 1.8825, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.05329593267882188, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 9.481065918653577e-06, |
| "loss": 1.1516, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0546984572230014, |
| "grad_norm": 0.203125, |
| "learning_rate": 9.467040673211782e-06, |
| "loss": 1.5705, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.056100981767180924, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 9.453015427769987e-06, |
| "loss": 1.4537, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05750350631136045, |
| "grad_norm": 0.197265625, |
| "learning_rate": 9.438990182328192e-06, |
| "loss": 1.3941, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.05890603085553997, |
| "grad_norm": 0.2431640625, |
| "learning_rate": 9.424964936886396e-06, |
| "loss": 1.7377, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.06030855539971949, |
| "grad_norm": 0.201171875, |
| "learning_rate": 9.410939691444601e-06, |
| "loss": 1.5313, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.061711079943899017, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 9.396914446002806e-06, |
| "loss": 1.451, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.06311360448807854, |
| "grad_norm": 0.171875, |
| "learning_rate": 9.382889200561011e-06, |
| "loss": 1.5415, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.06451612903225806, |
| "grad_norm": 0.294921875, |
| "learning_rate": 9.368863955119215e-06, |
| "loss": 1.6808, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.06591865357643759, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 9.35483870967742e-06, |
| "loss": 1.5434, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.06732117812061711, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 9.340813464235625e-06, |
| "loss": 1.5043, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.06872370266479663, |
| "grad_norm": 0.203125, |
| "learning_rate": 9.32678821879383e-06, |
| "loss": 1.6039, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.07012622720897616, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 9.312762973352034e-06, |
| "loss": 1.5649, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07152875175315568, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 9.298737727910239e-06, |
| "loss": 1.5116, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0729312762973352, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 9.284712482468444e-06, |
| "loss": 1.469, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.07433380084151472, |
| "grad_norm": 0.14453125, |
| "learning_rate": 9.27068723702665e-06, |
| "loss": 1.4499, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.07573632538569425, |
| "grad_norm": 0.201171875, |
| "learning_rate": 9.256661991584853e-06, |
| "loss": 1.5955, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.07713884992987377, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 9.242636746143058e-06, |
| "loss": 1.5542, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0785413744740533, |
| "grad_norm": 0.322265625, |
| "learning_rate": 9.228611500701263e-06, |
| "loss": 1.3431, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.07994389901823282, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 9.214586255259467e-06, |
| "loss": 1.5128, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.08134642356241234, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 9.200561009817672e-06, |
| "loss": 1.3224, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.08274894810659186, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 9.186535764375877e-06, |
| "loss": 1.4161, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.08415147265077139, |
| "grad_norm": 0.265625, |
| "learning_rate": 9.172510518934083e-06, |
| "loss": 1.4785, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.08555399719495091, |
| "grad_norm": 0.298828125, |
| "learning_rate": 9.158485273492286e-06, |
| "loss": 1.555, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "grad_norm": 0.126953125, |
| "learning_rate": 9.144460028050491e-06, |
| "loss": 1.3373, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.08835904628330996, |
| "grad_norm": 0.169921875, |
| "learning_rate": 9.130434782608697e-06, |
| "loss": 1.4302, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.08976157082748948, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 9.116409537166902e-06, |
| "loss": 1.4033, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.091164095371669, |
| "grad_norm": 0.138671875, |
| "learning_rate": 9.102384291725105e-06, |
| "loss": 1.3426, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.09256661991584852, |
| "grad_norm": 0.216796875, |
| "learning_rate": 9.08835904628331e-06, |
| "loss": 1.5167, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.09396914446002805, |
| "grad_norm": 0.154296875, |
| "learning_rate": 9.074333800841516e-06, |
| "loss": 1.3468, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.09537166900420757, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 9.060308555399721e-06, |
| "loss": 1.4049, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0967741935483871, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 9.046283309957924e-06, |
| "loss": 1.4426, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.09817671809256662, |
| "grad_norm": 0.138671875, |
| "learning_rate": 9.03225806451613e-06, |
| "loss": 1.3865, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.09957924263674614, |
| "grad_norm": 0.197265625, |
| "learning_rate": 9.018232819074335e-06, |
| "loss": 1.5355, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.10098176718092566, |
| "grad_norm": 0.1875, |
| "learning_rate": 9.00420757363254e-06, |
| "loss": 1.757, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.10238429172510519, |
| "grad_norm": 0.1484375, |
| "learning_rate": 8.990182328190744e-06, |
| "loss": 1.5123, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.10378681626928471, |
| "grad_norm": 0.19921875, |
| "learning_rate": 8.976157082748949e-06, |
| "loss": 1.4946, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.10518934081346423, |
| "grad_norm": 0.28125, |
| "learning_rate": 8.962131837307152e-06, |
| "loss": 1.3608, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.10659186535764376, |
| "grad_norm": 0.1005859375, |
| "learning_rate": 8.94810659186536e-06, |
| "loss": 1.1957, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.10799438990182328, |
| "grad_norm": 0.125, |
| "learning_rate": 8.934081346423563e-06, |
| "loss": 1.2561, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1093969144460028, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 8.920056100981768e-06, |
| "loss": 1.2952, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.11079943899018233, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 8.906030855539971e-06, |
| "loss": 1.2421, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.11220196353436185, |
| "grad_norm": 0.125, |
| "learning_rate": 8.892005610098178e-06, |
| "loss": 1.4074, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11360448807854137, |
| "grad_norm": 0.123046875, |
| "learning_rate": 8.877980364656382e-06, |
| "loss": 1.2973, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1150070126227209, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 8.863955119214587e-06, |
| "loss": 1.3492, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.11640953716690042, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 8.84992987377279e-06, |
| "loss": 1.2249, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.11781206171107994, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 8.835904628330997e-06, |
| "loss": 1.4241, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.11921458625525946, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 8.821879382889201e-06, |
| "loss": 1.3361, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.12061711079943899, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 8.807854137447406e-06, |
| "loss": 1.4971, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.12201963534361851, |
| "grad_norm": 0.109375, |
| "learning_rate": 8.79382889200561e-06, |
| "loss": 1.1059, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.12342215988779803, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 8.779803646563817e-06, |
| "loss": 1.5296, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.12482468443197756, |
| "grad_norm": 0.18359375, |
| "learning_rate": 8.76577840112202e-06, |
| "loss": 1.5274, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.12622720897615708, |
| "grad_norm": 0.162109375, |
| "learning_rate": 8.751753155680225e-06, |
| "loss": 1.3704, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1276297335203366, |
| "grad_norm": 0.15625, |
| "learning_rate": 8.737727910238429e-06, |
| "loss": 1.3388, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.12903225806451613, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 8.723702664796636e-06, |
| "loss": 1.3987, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.13043478260869565, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 8.70967741935484e-06, |
| "loss": 1.2487, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.13183730715287517, |
| "grad_norm": 0.181640625, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 1.345, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1332398316970547, |
| "grad_norm": 0.125, |
| "learning_rate": 8.681626928471248e-06, |
| "loss": 1.3105, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.13464235624123422, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 8.667601683029455e-06, |
| "loss": 1.3739, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.13604488078541374, |
| "grad_norm": 0.1123046875, |
| "learning_rate": 8.653576437587658e-06, |
| "loss": 1.2664, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.13744740532959326, |
| "grad_norm": 0.185546875, |
| "learning_rate": 8.639551192145864e-06, |
| "loss": 1.2584, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1388499298737728, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 8.625525946704067e-06, |
| "loss": 1.356, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.1402524544179523, |
| "grad_norm": 0.14453125, |
| "learning_rate": 8.611500701262272e-06, |
| "loss": 1.3034, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14165497896213183, |
| "grad_norm": 0.201171875, |
| "learning_rate": 8.597475455820477e-06, |
| "loss": 1.3498, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.14305750350631136, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 8.583450210378683e-06, |
| "loss": 1.3575, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.14446002805049088, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 8.569424964936886e-06, |
| "loss": 1.3232, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.1458625525946704, |
| "grad_norm": 0.1328125, |
| "learning_rate": 8.555399719495091e-06, |
| "loss": 1.3514, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.14726507713884993, |
| "grad_norm": 0.142578125, |
| "learning_rate": 8.541374474053297e-06, |
| "loss": 1.2854, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.14866760168302945, |
| "grad_norm": 0.134765625, |
| "learning_rate": 8.527349228611502e-06, |
| "loss": 1.365, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.15007012622720897, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 8.513323983169705e-06, |
| "loss": 1.2432, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.1514726507713885, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 8.49929873772791e-06, |
| "loss": 1.417, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.15287517531556802, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 8.485273492286116e-06, |
| "loss": 1.1604, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.15427769985974754, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 8.471248246844321e-06, |
| "loss": 1.4095, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.15568022440392706, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 8.457223001402524e-06, |
| "loss": 1.429, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1570827489481066, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 8.44319775596073e-06, |
| "loss": 1.3476, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1584852734922861, |
| "grad_norm": 0.10546875, |
| "learning_rate": 8.429172510518935e-06, |
| "loss": 1.2188, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.15988779803646563, |
| "grad_norm": 0.0888671875, |
| "learning_rate": 8.41514726507714e-06, |
| "loss": 1.1952, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.16129032258064516, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 8.401122019635344e-06, |
| "loss": 1.1998, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.16269284712482468, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 8.387096774193549e-06, |
| "loss": 1.2351, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1640953716690042, |
| "grad_norm": 0.166015625, |
| "learning_rate": 8.373071528751754e-06, |
| "loss": 1.2987, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.16549789621318373, |
| "grad_norm": 0.138671875, |
| "learning_rate": 8.35904628330996e-06, |
| "loss": 1.5069, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.16690042075736325, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 8.345021037868163e-06, |
| "loss": 1.3131, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.16830294530154277, |
| "grad_norm": 0.138671875, |
| "learning_rate": 8.330995792426368e-06, |
| "loss": 1.3799, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1697054698457223, |
| "grad_norm": 0.140625, |
| "learning_rate": 8.316970546984573e-06, |
| "loss": 1.3526, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.17110799438990182, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 8.302945301542777e-06, |
| "loss": 1.3056, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.17251051893408134, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 8.288920056100982e-06, |
| "loss": 1.244, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 8.274894810659187e-06, |
| "loss": 1.3172, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1753155680224404, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 8.260869565217392e-06, |
| "loss": 1.3831, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.1767180925666199, |
| "grad_norm": 0.10302734375, |
| "learning_rate": 8.246844319775596e-06, |
| "loss": 1.1699, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.17812061711079943, |
| "grad_norm": 0.10791015625, |
| "learning_rate": 8.232819074333801e-06, |
| "loss": 1.3282, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.17952314165497896, |
| "grad_norm": 0.119140625, |
| "learning_rate": 8.218793828892006e-06, |
| "loss": 1.122, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.18092566619915848, |
| "grad_norm": 0.158203125, |
| "learning_rate": 8.204768583450211e-06, |
| "loss": 1.1407, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.182328190743338, |
| "grad_norm": 0.130859375, |
| "learning_rate": 8.190743338008415e-06, |
| "loss": 1.3206, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.18373071528751753, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 8.17671809256662e-06, |
| "loss": 1.5346, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.18513323983169705, |
| "grad_norm": 0.19140625, |
| "learning_rate": 8.162692847124825e-06, |
| "loss": 1.4225, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.18653576437587657, |
| "grad_norm": 0.11962890625, |
| "learning_rate": 8.14866760168303e-06, |
| "loss": 1.3242, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.1879382889200561, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 8.134642356241234e-06, |
| "loss": 1.3555, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.18934081346423562, |
| "grad_norm": 0.130859375, |
| "learning_rate": 8.12061711079944e-06, |
| "loss": 1.1939, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.19074333800841514, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 8.106591865357644e-06, |
| "loss": 1.2565, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.19214586255259467, |
| "grad_norm": 0.10595703125, |
| "learning_rate": 8.09256661991585e-06, |
| "loss": 1.3652, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.1935483870967742, |
| "grad_norm": 0.142578125, |
| "learning_rate": 8.078541374474053e-06, |
| "loss": 1.1999, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.1949509116409537, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 8.064516129032258e-06, |
| "loss": 1.2727, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.19635343618513323, |
| "grad_norm": 0.119140625, |
| "learning_rate": 8.050490883590464e-06, |
| "loss": 1.1687, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.19775596072931276, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 8.036465638148669e-06, |
| "loss": 1.3706, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.19915848527349228, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 8.022440392706872e-06, |
| "loss": 1.2393, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2005610098176718, |
| "grad_norm": 0.14453125, |
| "learning_rate": 8.008415147265078e-06, |
| "loss": 1.2912, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.20196353436185133, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 7.994389901823283e-06, |
| "loss": 1.2348, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.20336605890603085, |
| "grad_norm": 0.181640625, |
| "learning_rate": 7.980364656381488e-06, |
| "loss": 1.4441, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.20476858345021037, |
| "grad_norm": 0.10546875, |
| "learning_rate": 7.966339410939691e-06, |
| "loss": 1.2566, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2061711079943899, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 7.952314165497897e-06, |
| "loss": 1.4199, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.20757363253856942, |
| "grad_norm": 0.2265625, |
| "learning_rate": 7.938288920056102e-06, |
| "loss": 1.2723, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.20897615708274894, |
| "grad_norm": 0.36328125, |
| "learning_rate": 7.924263674614307e-06, |
| "loss": 1.7504, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.21037868162692847, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 7.91023842917251e-06, |
| "loss": 1.2681, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.211781206171108, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 7.896213183730716e-06, |
| "loss": 1.3545, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2131837307152875, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 7.882187938288921e-06, |
| "loss": 1.3314, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.21458625525946703, |
| "grad_norm": 0.28125, |
| "learning_rate": 7.868162692847126e-06, |
| "loss": 1.2628, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.21598877980364656, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 7.85413744740533e-06, |
| "loss": 1.3766, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.21739130434782608, |
| "grad_norm": 0.142578125, |
| "learning_rate": 7.840112201963535e-06, |
| "loss": 1.5394, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2187938288920056, |
| "grad_norm": 0.166015625, |
| "learning_rate": 7.82608695652174e-06, |
| "loss": 1.4721, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.22019635343618513, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 7.812061711079945e-06, |
| "loss": 1.4292, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.22159887798036465, |
| "grad_norm": 0.126953125, |
| "learning_rate": 7.798036465638149e-06, |
| "loss": 1.2383, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.22300140252454417, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 7.784011220196354e-06, |
| "loss": 1.3895, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.2244039270687237, |
| "grad_norm": 0.158203125, |
| "learning_rate": 7.76998597475456e-06, |
| "loss": 1.3189, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.22580645161290322, |
| "grad_norm": 0.158203125, |
| "learning_rate": 7.755960729312764e-06, |
| "loss": 1.3514, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.22720897615708274, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 7.741935483870968e-06, |
| "loss": 1.2598, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.22861150070126227, |
| "grad_norm": 0.12890625, |
| "learning_rate": 7.727910238429173e-06, |
| "loss": 1.2271, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.2300140252454418, |
| "grad_norm": 0.107421875, |
| "learning_rate": 7.713884992987378e-06, |
| "loss": 1.1796, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.2314165497896213, |
| "grad_norm": 0.212890625, |
| "learning_rate": 7.699859747545582e-06, |
| "loss": 1.3644, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.23281907433380084, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 7.685834502103787e-06, |
| "loss": 1.31, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.23422159887798036, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 7.671809256661992e-06, |
| "loss": 1.1865, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.23562412342215988, |
| "grad_norm": 0.154296875, |
| "learning_rate": 7.657784011220198e-06, |
| "loss": 1.2996, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2370266479663394, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 7.643758765778401e-06, |
| "loss": 1.3056, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.23842917251051893, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 7.629733520336606e-06, |
| "loss": 1.321, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.23983169705469845, |
| "grad_norm": 0.205078125, |
| "learning_rate": 7.615708274894811e-06, |
| "loss": 1.2381, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.24123422159887797, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 7.601683029453017e-06, |
| "loss": 1.4478, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2426367461430575, |
| "grad_norm": 0.158203125, |
| "learning_rate": 7.587657784011221e-06, |
| "loss": 1.4235, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.24403927068723702, |
| "grad_norm": 0.10400390625, |
| "learning_rate": 7.573632538569425e-06, |
| "loss": 1.1866, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.24544179523141654, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 7.55960729312763e-06, |
| "loss": 1.3728, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.24684431977559607, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 7.545582047685836e-06, |
| "loss": 1.2539, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2482468443197756, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 7.53155680224404e-06, |
| "loss": 1.3318, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2496493688639551, |
| "grad_norm": 0.119140625, |
| "learning_rate": 7.5175315568022445e-06, |
| "loss": 1.4789, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.25105189340813466, |
| "grad_norm": 0.10498046875, |
| "learning_rate": 7.503506311360449e-06, |
| "loss": 1.2135, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.25245441795231416, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 7.489481065918655e-06, |
| "loss": 1.2406, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2538569424964937, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 7.475455820476859e-06, |
| "loss": 1.1215, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.2552594670406732, |
| "grad_norm": 0.1875, |
| "learning_rate": 7.461430575035064e-06, |
| "loss": 1.1114, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.25666199158485276, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 7.447405329593268e-06, |
| "loss": 1.3997, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.25806451612903225, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 7.433380084151473e-06, |
| "loss": 1.2383, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.2594670406732118, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 7.4193548387096784e-06, |
| "loss": 1.2564, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 7.405329593267883e-06, |
| "loss": 1.3568, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.26227208976157085, |
| "grad_norm": 0.18359375, |
| "learning_rate": 7.391304347826087e-06, |
| "loss": 1.4266, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.26367461430575034, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 7.377279102384292e-06, |
| "loss": 1.3553, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2650771388499299, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 7.3632538569424976e-06, |
| "loss": 1.4127, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.2664796633941094, |
| "grad_norm": 0.125, |
| "learning_rate": 7.349228611500702e-06, |
| "loss": 1.3274, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.26788218793828894, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 7.335203366058906e-06, |
| "loss": 1.2343, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.26928471248246844, |
| "grad_norm": 0.177734375, |
| "learning_rate": 7.3211781206171115e-06, |
| "loss": 1.1226, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.270687237026648, |
| "grad_norm": 0.09521484375, |
| "learning_rate": 7.307152875175316e-06, |
| "loss": 1.2653, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.2720897615708275, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 7.293127629733521e-06, |
| "loss": 1.1759, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.27349228611500703, |
| "grad_norm": 0.134765625, |
| "learning_rate": 7.2791023842917254e-06, |
| "loss": 1.3378, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.27489481065918653, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 7.265077138849931e-06, |
| "loss": 1.1542, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2762973352033661, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 7.251051893408135e-06, |
| "loss": 1.3159, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.2776998597475456, |
| "grad_norm": 0.1044921875, |
| "learning_rate": 7.23702664796634e-06, |
| "loss": 1.2158, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.2791023842917251, |
| "grad_norm": 0.15234375, |
| "learning_rate": 7.2230014025245446e-06, |
| "loss": 1.2818, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.2805049088359046, |
| "grad_norm": 0.212890625, |
| "learning_rate": 7.20897615708275e-06, |
| "loss": 1.1737, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.28190743338008417, |
| "grad_norm": 0.185546875, |
| "learning_rate": 7.194950911640954e-06, |
| "loss": 1.2758, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.28330995792426367, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 7.1809256661991585e-06, |
| "loss": 1.5019, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.2847124824684432, |
| "grad_norm": 0.140625, |
| "learning_rate": 7.166900420757364e-06, |
| "loss": 1.3002, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.2861150070126227, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 7.152875175315569e-06, |
| "loss": 1.1012, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.28751753155680226, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 7.138849929873773e-06, |
| "loss": 1.3503, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.28892005610098176, |
| "grad_norm": 0.189453125, |
| "learning_rate": 7.124824684431978e-06, |
| "loss": 1.2954, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.2903225806451613, |
| "grad_norm": 0.14453125, |
| "learning_rate": 7.110799438990183e-06, |
| "loss": 1.3856, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.2917251051893408, |
| "grad_norm": 0.15625, |
| "learning_rate": 7.096774193548388e-06, |
| "loss": 1.1842, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.29312762973352036, |
| "grad_norm": 0.1484375, |
| "learning_rate": 7.082748948106592e-06, |
| "loss": 1.3386, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.29453015427769985, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 7.068723702664797e-06, |
| "loss": 1.1526, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2959326788218794, |
| "grad_norm": 0.181640625, |
| "learning_rate": 7.054698457223001e-06, |
| "loss": 1.3657, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.2973352033660589, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 7.040673211781207e-06, |
| "loss": 1.2629, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.29873772791023845, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 7.0266479663394115e-06, |
| "loss": 1.2934, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.30014025245441794, |
| "grad_norm": 0.130859375, |
| "learning_rate": 7.012622720897616e-06, |
| "loss": 1.3227, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.3015427769985975, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 6.99859747545582e-06, |
| "loss": 1.2734, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.302945301542777, |
| "grad_norm": 0.09716796875, |
| "learning_rate": 6.984572230014026e-06, |
| "loss": 1.3514, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.30434782608695654, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 6.970546984572231e-06, |
| "loss": 1.1325, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.30575035063113604, |
| "grad_norm": 0.134765625, |
| "learning_rate": 6.956521739130435e-06, |
| "loss": 1.3717, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3071528751753156, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 6.942496493688639e-06, |
| "loss": 1.4719, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.3085553997194951, |
| "grad_norm": 0.107421875, |
| "learning_rate": 6.9284712482468454e-06, |
| "loss": 1.3102, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.30995792426367463, |
| "grad_norm": 0.125, |
| "learning_rate": 6.91444600280505e-06, |
| "loss": 1.1821, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.31136044880785413, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 6.900420757363254e-06, |
| "loss": 1.2056, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3127629733520337, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 6.8863955119214585e-06, |
| "loss": 1.2066, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.3141654978962132, |
| "grad_norm": 0.1328125, |
| "learning_rate": 6.8723702664796646e-06, |
| "loss": 1.4078, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.3155680224403927, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 6.858345021037869e-06, |
| "loss": 1.1525, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3169705469845722, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 6.844319775596073e-06, |
| "loss": 1.2254, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.31837307152875177, |
| "grad_norm": 0.185546875, |
| "learning_rate": 6.830294530154278e-06, |
| "loss": 1.2657, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.31977559607293127, |
| "grad_norm": 0.10400390625, |
| "learning_rate": 6.816269284712484e-06, |
| "loss": 1.2751, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3211781206171108, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 6.802244039270688e-06, |
| "loss": 1.3515, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.3225806451612903, |
| "grad_norm": 0.134765625, |
| "learning_rate": 6.788218793828892e-06, |
| "loss": 1.5331, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.32398316970546986, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 6.774193548387097e-06, |
| "loss": 1.2843, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.32538569424964936, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 6.760168302945303e-06, |
| "loss": 1.2427, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3267882187938289, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 6.746143057503507e-06, |
| "loss": 1.174, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.3281907433380084, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 6.7321178120617116e-06, |
| "loss": 1.2616, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.32959326788218796, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 6.718092566619916e-06, |
| "loss": 1.3096, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.33099579242636745, |
| "grad_norm": 0.08447265625, |
| "learning_rate": 6.704067321178121e-06, |
| "loss": 1.1866, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.332398316970547, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 6.690042075736326e-06, |
| "loss": 1.1565, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.3338008415147265, |
| "grad_norm": 0.14453125, |
| "learning_rate": 6.676016830294531e-06, |
| "loss": 1.442, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.33520336605890605, |
| "grad_norm": 0.134765625, |
| "learning_rate": 6.661991584852735e-06, |
| "loss": 1.2511, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.33660589060308554, |
| "grad_norm": 0.193359375, |
| "learning_rate": 6.64796633941094e-06, |
| "loss": 1.1966, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3380084151472651, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 6.6339410939691455e-06, |
| "loss": 1.1705, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.3394109396914446, |
| "grad_norm": 0.1123046875, |
| "learning_rate": 6.61991584852735e-06, |
| "loss": 1.3132, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.34081346423562414, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 6.605890603085554e-06, |
| "loss": 1.1844, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.34221598877980364, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 6.591865357643759e-06, |
| "loss": 1.4392, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.3436185133239832, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 6.577840112201964e-06, |
| "loss": 1.2875, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.3450210378681627, |
| "grad_norm": 0.12890625, |
| "learning_rate": 6.563814866760169e-06, |
| "loss": 1.1475, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.34642356241234223, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 6.549789621318373e-06, |
| "loss": 1.4414, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 0.1875, |
| "learning_rate": 6.5357643758765785e-06, |
| "loss": 1.2128, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3492286115007013, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 6.521739130434783e-06, |
| "loss": 1.2818, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.3506311360448808, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 6.507713884992988e-06, |
| "loss": 1.3934, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3520336605890603, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 6.4936886395511925e-06, |
| "loss": 1.2186, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.3534361851332398, |
| "grad_norm": 0.162109375, |
| "learning_rate": 6.479663394109398e-06, |
| "loss": 1.2839, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.3548387096774194, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 6.465638148667602e-06, |
| "loss": 1.1664, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.35624123422159887, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 6.451612903225806e-06, |
| "loss": 1.2641, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.3576437587657784, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 6.437587657784012e-06, |
| "loss": 1.204, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.3590462833099579, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 6.423562412342217e-06, |
| "loss": 1.3466, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.36044880785413747, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 6.409537166900421e-06, |
| "loss": 1.5334, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.36185133239831696, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 6.3955119214586255e-06, |
| "loss": 1.2872, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.3632538569424965, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 6.381486676016831e-06, |
| "loss": 1.3031, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.364656381486676, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 6.367461430575036e-06, |
| "loss": 1.1709, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.36605890603085556, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 6.35343618513324e-06, |
| "loss": 1.2545, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.36746143057503505, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 6.339410939691445e-06, |
| "loss": 1.2764, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3688639551192146, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 6.32538569424965e-06, |
| "loss": 1.3391, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.3702664796633941, |
| "grad_norm": 0.1103515625, |
| "learning_rate": 6.311360448807855e-06, |
| "loss": 1.3695, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.37166900420757365, |
| "grad_norm": 0.248046875, |
| "learning_rate": 6.297335203366059e-06, |
| "loss": 1.2392, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.37307152875175315, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 6.283309957924264e-06, |
| "loss": 1.1389, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.3744740532959327, |
| "grad_norm": 0.134765625, |
| "learning_rate": 6.269284712482468e-06, |
| "loss": 1.2203, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.3758765778401122, |
| "grad_norm": 0.130859375, |
| "learning_rate": 6.255259467040674e-06, |
| "loss": 1.2082, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.37727910238429174, |
| "grad_norm": 0.126953125, |
| "learning_rate": 6.2412342215988786e-06, |
| "loss": 1.3453, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.37868162692847124, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 6.227208976157083e-06, |
| "loss": 1.1697, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3800841514726508, |
| "grad_norm": 0.11328125, |
| "learning_rate": 6.213183730715287e-06, |
| "loss": 1.2059, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.3814866760168303, |
| "grad_norm": 0.14453125, |
| "learning_rate": 6.199158485273493e-06, |
| "loss": 1.5541, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.38288920056100983, |
| "grad_norm": 0.2890625, |
| "learning_rate": 6.185133239831698e-06, |
| "loss": 1.2391, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.38429172510518933, |
| "grad_norm": 0.138671875, |
| "learning_rate": 6.171107994389902e-06, |
| "loss": 1.4042, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3856942496493689, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 6.157082748948106e-06, |
| "loss": 1.149, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3870967741935484, |
| "grad_norm": 0.205078125, |
| "learning_rate": 6.1430575035063125e-06, |
| "loss": 1.209, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.3884992987377279, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 6.129032258064517e-06, |
| "loss": 1.102, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.3899018232819074, |
| "grad_norm": 0.19140625, |
| "learning_rate": 6.115007012622721e-06, |
| "loss": 1.2143, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.391304347826087, |
| "grad_norm": 0.1484375, |
| "learning_rate": 6.1009817671809255e-06, |
| "loss": 1.1747, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.39270687237026647, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 6.086956521739132e-06, |
| "loss": 1.1295, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.394109396914446, |
| "grad_norm": 0.150390625, |
| "learning_rate": 6.072931276297336e-06, |
| "loss": 1.1241, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.3955119214586255, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 6.05890603085554e-06, |
| "loss": 1.1516, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.39691444600280507, |
| "grad_norm": 0.251953125, |
| "learning_rate": 6.044880785413745e-06, |
| "loss": 1.1896, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.39831697054698456, |
| "grad_norm": 0.185546875, |
| "learning_rate": 6.030855539971951e-06, |
| "loss": 1.2702, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.3997194950911641, |
| "grad_norm": 0.189453125, |
| "learning_rate": 6.016830294530155e-06, |
| "loss": 1.2896, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4011220196353436, |
| "grad_norm": 0.15625, |
| "learning_rate": 6.0028050490883594e-06, |
| "loss": 1.1695, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.40252454417952316, |
| "grad_norm": 0.15234375, |
| "learning_rate": 5.988779803646564e-06, |
| "loss": 1.3022, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.40392706872370265, |
| "grad_norm": 0.1640625, |
| "learning_rate": 5.97475455820477e-06, |
| "loss": 1.4232, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4053295932678822, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 5.960729312762974e-06, |
| "loss": 1.1834, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.4067321178120617, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 5.946704067321179e-06, |
| "loss": 1.2576, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.40813464235624125, |
| "grad_norm": 0.146484375, |
| "learning_rate": 5.932678821879383e-06, |
| "loss": 1.1872, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.40953716690042075, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 5.918653576437588e-06, |
| "loss": 1.3551, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4109396914446003, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 5.904628330995793e-06, |
| "loss": 1.2281, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.4123422159887798, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 5.890603085553998e-06, |
| "loss": 1.225, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.41374474053295934, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 5.876577840112202e-06, |
| "loss": 1.2137, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.41514726507713884, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 5.862552594670407e-06, |
| "loss": 1.139, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.4165497896213184, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 5.8485273492286125e-06, |
| "loss": 1.1809, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.4179523141654979, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.834502103786817e-06, |
| "loss": 1.4761, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.41935483870967744, |
| "grad_norm": 0.140625, |
| "learning_rate": 5.820476858345021e-06, |
| "loss": 1.5597, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.42075736325385693, |
| "grad_norm": 0.15234375, |
| "learning_rate": 5.806451612903226e-06, |
| "loss": 1.2376, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4221598877980365, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 5.792426367461431e-06, |
| "loss": 1.3761, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.423562412342216, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 5.778401122019636e-06, |
| "loss": 1.0822, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.42496493688639553, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 5.76437587657784e-06, |
| "loss": 1.3255, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.426367461430575, |
| "grad_norm": 0.185546875, |
| "learning_rate": 5.7503506311360456e-06, |
| "loss": 1.2672, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.4277699859747546, |
| "grad_norm": 0.134765625, |
| "learning_rate": 5.73632538569425e-06, |
| "loss": 1.0457, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.42917251051893407, |
| "grad_norm": 0.150390625, |
| "learning_rate": 5.722300140252455e-06, |
| "loss": 1.4726, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4305750350631136, |
| "grad_norm": 0.162109375, |
| "learning_rate": 5.7082748948106595e-06, |
| "loss": 1.2004, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.4319775596072931, |
| "grad_norm": 0.15234375, |
| "learning_rate": 5.694249649368865e-06, |
| "loss": 1.2963, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.43338008415147267, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 5.680224403927069e-06, |
| "loss": 1.0556, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.1640625, |
| "learning_rate": 5.666199158485273e-06, |
| "loss": 1.0575, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4361851332398317, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 5.652173913043479e-06, |
| "loss": 1.2175, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.4375876577840112, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 5.638148667601684e-06, |
| "loss": 1.4628, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.43899018232819076, |
| "grad_norm": 0.1328125, |
| "learning_rate": 5.624123422159888e-06, |
| "loss": 1.1847, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.44039270687237025, |
| "grad_norm": 0.1796875, |
| "learning_rate": 5.6100981767180925e-06, |
| "loss": 1.1012, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.4417952314165498, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 5.596072931276298e-06, |
| "loss": 1.14, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.4431977559607293, |
| "grad_norm": 0.1484375, |
| "learning_rate": 5.582047685834503e-06, |
| "loss": 1.2151, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.44460028050490885, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 5.568022440392707e-06, |
| "loss": 1.2165, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.44600280504908835, |
| "grad_norm": 0.169921875, |
| "learning_rate": 5.553997194950912e-06, |
| "loss": 1.2602, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.4474053295932679, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 5.539971949509116e-06, |
| "loss": 1.2089, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.4488078541374474, |
| "grad_norm": 0.232421875, |
| "learning_rate": 5.525946704067322e-06, |
| "loss": 1.0351, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.45021037868162694, |
| "grad_norm": 0.21875, |
| "learning_rate": 5.5119214586255264e-06, |
| "loss": 1.3343, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.45161290322580644, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 5.497896213183731e-06, |
| "loss": 1.1131, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.453015427769986, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 5.483870967741935e-06, |
| "loss": 1.0596, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.4544179523141655, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 5.469845722300141e-06, |
| "loss": 1.2091, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.45582047685834504, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 5.455820476858346e-06, |
| "loss": 1.2544, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.45722300140252453, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.44179523141655e-06, |
| "loss": 1.1604, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.4586255259467041, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 5.427769985974754e-06, |
| "loss": 1.0818, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.4600280504908836, |
| "grad_norm": 0.203125, |
| "learning_rate": 5.41374474053296e-06, |
| "loss": 1.1955, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.46143057503506313, |
| "grad_norm": 0.1953125, |
| "learning_rate": 5.399719495091165e-06, |
| "loss": 1.1787, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.4628330995792426, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 5.385694249649369e-06, |
| "loss": 1.187, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4642356241234222, |
| "grad_norm": 0.171875, |
| "learning_rate": 5.3716690042075734e-06, |
| "loss": 1.1248, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.46563814866760167, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 5.3576437587657795e-06, |
| "loss": 1.0849, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.4670406732117812, |
| "grad_norm": 0.216796875, |
| "learning_rate": 5.343618513323984e-06, |
| "loss": 1.2235, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.4684431977559607, |
| "grad_norm": 0.154296875, |
| "learning_rate": 5.329593267882188e-06, |
| "loss": 1.0148, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.46984572230014027, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 5.3155680224403926e-06, |
| "loss": 1.2177, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.47124824684431976, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 5.301542776998599e-06, |
| "loss": 1.3309, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.4726507713884993, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 5.287517531556803e-06, |
| "loss": 1.457, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.4740532959326788, |
| "grad_norm": 0.173828125, |
| "learning_rate": 5.273492286115007e-06, |
| "loss": 1.0875, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.47545582047685836, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 5.259467040673212e-06, |
| "loss": 1.1302, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.47685834502103785, |
| "grad_norm": 0.095703125, |
| "learning_rate": 5.245441795231418e-06, |
| "loss": 1.2536, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.4782608695652174, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 5.231416549789622e-06, |
| "loss": 1.3286, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.4796633941093969, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 5.2173913043478265e-06, |
| "loss": 1.2036, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.48106591865357645, |
| "grad_norm": 0.2578125, |
| "learning_rate": 5.203366058906031e-06, |
| "loss": 1.2194, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.48246844319775595, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 5.189340813464236e-06, |
| "loss": 1.2176, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.4838709677419355, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 5.175315568022441e-06, |
| "loss": 1.1685, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.485273492286115, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 5.161290322580646e-06, |
| "loss": 1.1327, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.48667601683029454, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 5.14726507713885e-06, |
| "loss": 1.2903, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.48807854137447404, |
| "grad_norm": 0.140625, |
| "learning_rate": 5.133239831697055e-06, |
| "loss": 1.2387, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.4894810659186536, |
| "grad_norm": 0.177734375, |
| "learning_rate": 5.11921458625526e-06, |
| "loss": 1.1081, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.4908835904628331, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 5.105189340813465e-06, |
| "loss": 1.15, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.49228611500701264, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 5.091164095371669e-06, |
| "loss": 1.2073, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.49368863955119213, |
| "grad_norm": 0.1484375, |
| "learning_rate": 5.077138849929874e-06, |
| "loss": 1.2808, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.4950911640953717, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 5.063113604488079e-06, |
| "loss": 1.2547, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.4964936886395512, |
| "grad_norm": 0.142578125, |
| "learning_rate": 5.049088359046284e-06, |
| "loss": 1.1674, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.49789621318373073, |
| "grad_norm": 0.173828125, |
| "learning_rate": 5.035063113604488e-06, |
| "loss": 1.26, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.4992987377279102, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 5.0210378681626934e-06, |
| "loss": 1.3142, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5007012622720898, |
| "grad_norm": 0.16015625, |
| "learning_rate": 5.007012622720898e-06, |
| "loss": 1.2545, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.5021037868162693, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 4.992987377279103e-06, |
| "loss": 1.2984, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5035063113604488, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 4.978962131837307e-06, |
| "loss": 1.1791, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.5049088359046283, |
| "grad_norm": 0.171875, |
| "learning_rate": 4.964936886395513e-06, |
| "loss": 1.2114, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5063113604488079, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 4.950911640953717e-06, |
| "loss": 1.1979, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.5077138849929874, |
| "grad_norm": 0.19921875, |
| "learning_rate": 4.936886395511922e-06, |
| "loss": 1.4286, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5091164095371669, |
| "grad_norm": 0.15234375, |
| "learning_rate": 4.9228611500701265e-06, |
| "loss": 1.1784, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5105189340813464, |
| "grad_norm": 0.17578125, |
| "learning_rate": 4.908835904628331e-06, |
| "loss": 1.2079, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.511921458625526, |
| "grad_norm": 0.1640625, |
| "learning_rate": 4.894810659186536e-06, |
| "loss": 1.3151, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5133239831697055, |
| "grad_norm": 0.283203125, |
| "learning_rate": 4.8807854137447404e-06, |
| "loss": 1.1817, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.514726507713885, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 4.866760168302946e-06, |
| "loss": 1.0681, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.5161290322580645, |
| "grad_norm": 0.25, |
| "learning_rate": 4.85273492286115e-06, |
| "loss": 1.2658, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.517531556802244, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 4.838709677419355e-06, |
| "loss": 1.2459, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.5189340813464236, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 4.8246844319775596e-06, |
| "loss": 1.265, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.520336605890603, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 4.810659186535765e-06, |
| "loss": 1.0783, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 0.21875, |
| "learning_rate": 4.796633941093969e-06, |
| "loss": 1.2522, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5231416549789621, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 4.782608695652174e-06, |
| "loss": 1.2196, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.5245441795231417, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 4.768583450210379e-06, |
| "loss": 1.1709, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5259467040673211, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 4.754558204768584e-06, |
| "loss": 1.0803, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.5273492286115007, |
| "grad_norm": 0.138671875, |
| "learning_rate": 4.740532959326788e-06, |
| "loss": 1.4023, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5287517531556802, |
| "grad_norm": 0.1875, |
| "learning_rate": 4.7265077138849935e-06, |
| "loss": 1.225, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.5301542776998598, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 4.712482468443198e-06, |
| "loss": 1.1343, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.5315568022440392, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 4.698457223001403e-06, |
| "loss": 1.1392, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.5329593267882188, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 4.684431977559607e-06, |
| "loss": 1.1653, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5343618513323983, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 4.670406732117813e-06, |
| "loss": 1.3631, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.5357643758765779, |
| "grad_norm": 0.21875, |
| "learning_rate": 4.656381486676017e-06, |
| "loss": 1.201, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.5371669004207573, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 4.642356241234222e-06, |
| "loss": 1.2728, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.5385694249649369, |
| "grad_norm": 0.21875, |
| "learning_rate": 4.6283309957924265e-06, |
| "loss": 1.1816, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5399719495091164, |
| "grad_norm": 0.212890625, |
| "learning_rate": 4.614305750350632e-06, |
| "loss": 1.1313, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.541374474053296, |
| "grad_norm": 0.166015625, |
| "learning_rate": 4.600280504908836e-06, |
| "loss": 1.2561, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.5427769985974754, |
| "grad_norm": 0.193359375, |
| "learning_rate": 4.586255259467041e-06, |
| "loss": 1.324, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.544179523141655, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 4.572230014025246e-06, |
| "loss": 1.2157, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.5455820476858345, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 4.558204768583451e-06, |
| "loss": 1.1258, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.5469845722300141, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 4.544179523141655e-06, |
| "loss": 1.0754, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5483870967741935, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 4.5301542776998604e-06, |
| "loss": 1.1802, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.5497896213183731, |
| "grad_norm": 0.21484375, |
| "learning_rate": 4.516129032258065e-06, |
| "loss": 1.194, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.5511921458625526, |
| "grad_norm": 0.13671875, |
| "learning_rate": 4.50210378681627e-06, |
| "loss": 1.2355, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.5525946704067322, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 4.488078541374474e-06, |
| "loss": 1.1846, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.5539971949509116, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 4.47405329593268e-06, |
| "loss": 1.3068, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.5553997194950911, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 4.460028050490884e-06, |
| "loss": 1.455, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.5568022440392707, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 4.446002805049089e-06, |
| "loss": 1.2467, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.5582047685834503, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 4.4319775596072935e-06, |
| "loss": 1.1788, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.5596072931276297, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 4.417952314165499e-06, |
| "loss": 1.21, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.5610098176718092, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 4.403927068723703e-06, |
| "loss": 1.2269, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5624123422159888, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 4.389901823281908e-06, |
| "loss": 1.1959, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.5638148667601683, |
| "grad_norm": 0.234375, |
| "learning_rate": 4.375876577840113e-06, |
| "loss": 1.23, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.5652173913043478, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 4.361851332398318e-06, |
| "loss": 1.2101, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.5666199158485273, |
| "grad_norm": 0.203125, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 1.1597, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.5680224403927069, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 4.333800841514727e-06, |
| "loss": 1.1738, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.5694249649368864, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 4.319775596072932e-06, |
| "loss": 1.4462, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5708274894810659, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 4.305750350631136e-06, |
| "loss": 1.2194, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.5722300140252454, |
| "grad_norm": 0.166015625, |
| "learning_rate": 4.291725105189341e-06, |
| "loss": 1.1773, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.573632538569425, |
| "grad_norm": 0.263671875, |
| "learning_rate": 4.277699859747546e-06, |
| "loss": 1.1419, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.5750350631136045, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 4.263674614305751e-06, |
| "loss": 1.3138, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.576437587657784, |
| "grad_norm": 0.146484375, |
| "learning_rate": 4.249649368863955e-06, |
| "loss": 1.2294, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.5778401122019635, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 4.2356241234221605e-06, |
| "loss": 1.0948, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.5792426367461431, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 4.221598877980365e-06, |
| "loss": 1.2114, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.5806451612903226, |
| "grad_norm": 0.205078125, |
| "learning_rate": 4.20757363253857e-06, |
| "loss": 1.1497, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.5820476858345021, |
| "grad_norm": 0.14453125, |
| "learning_rate": 4.193548387096774e-06, |
| "loss": 1.1986, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.5834502103786816, |
| "grad_norm": 0.197265625, |
| "learning_rate": 4.17952314165498e-06, |
| "loss": 1.0188, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.5848527349228612, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 4.165497896213184e-06, |
| "loss": 1.1713, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.5862552594670407, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 4.151472650771388e-06, |
| "loss": 1.1821, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.5876577840112202, |
| "grad_norm": 0.10546875, |
| "learning_rate": 4.1374474053295935e-06, |
| "loss": 1.2692, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.5890603085553997, |
| "grad_norm": 0.12890625, |
| "learning_rate": 4.123422159887798e-06, |
| "loss": 1.4345, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5904628330995793, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 4.109396914446003e-06, |
| "loss": 1.1865, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.5918653576437588, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 4.0953716690042075e-06, |
| "loss": 1.2786, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.5932678821879382, |
| "grad_norm": 0.130859375, |
| "learning_rate": 4.081346423562413e-06, |
| "loss": 1.4287, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.5946704067321178, |
| "grad_norm": 0.2578125, |
| "learning_rate": 4.067321178120617e-06, |
| "loss": 1.3566, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.5960729312762973, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 4.053295932678822e-06, |
| "loss": 1.0893, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.5974754558204769, |
| "grad_norm": 0.2109375, |
| "learning_rate": 4.039270687237027e-06, |
| "loss": 1.3028, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.5988779803646563, |
| "grad_norm": 0.21875, |
| "learning_rate": 4.025245441795232e-06, |
| "loss": 1.1084, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.6002805049088359, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 4.011220196353436e-06, |
| "loss": 1.31, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6016830294530154, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 3.997194950911641e-06, |
| "loss": 1.2634, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.603085553997195, |
| "grad_norm": 0.2421875, |
| "learning_rate": 3.983169705469846e-06, |
| "loss": 1.1716, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6044880785413744, |
| "grad_norm": 0.212890625, |
| "learning_rate": 3.969144460028051e-06, |
| "loss": 1.246, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.605890603085554, |
| "grad_norm": 0.1640625, |
| "learning_rate": 3.955119214586255e-06, |
| "loss": 1.2066, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6072931276297335, |
| "grad_norm": 0.21875, |
| "learning_rate": 3.9410939691444605e-06, |
| "loss": 1.3996, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "grad_norm": 0.1796875, |
| "learning_rate": 3.927068723702665e-06, |
| "loss": 1.2979, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6100981767180925, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 3.91304347826087e-06, |
| "loss": 1.2356, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.6115007012622721, |
| "grad_norm": 0.15625, |
| "learning_rate": 3.8990182328190744e-06, |
| "loss": 1.2458, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6129032258064516, |
| "grad_norm": 0.177734375, |
| "learning_rate": 3.88499298737728e-06, |
| "loss": 1.399, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.6143057503506312, |
| "grad_norm": 0.208984375, |
| "learning_rate": 3.870967741935484e-06, |
| "loss": 1.2904, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.6157082748948106, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 3.856942496493689e-06, |
| "loss": 1.1914, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.6171107994389902, |
| "grad_norm": 0.20703125, |
| "learning_rate": 3.8429172510518936e-06, |
| "loss": 1.3626, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6185133239831697, |
| "grad_norm": 0.265625, |
| "learning_rate": 3.828892005610099e-06, |
| "loss": 1.1886, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.6199158485273493, |
| "grad_norm": 0.19140625, |
| "learning_rate": 3.814866760168303e-06, |
| "loss": 1.1162, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6213183730715287, |
| "grad_norm": 0.134765625, |
| "learning_rate": 3.8008415147265083e-06, |
| "loss": 1.1404, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.6227208976157083, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 3.7868162692847127e-06, |
| "loss": 1.1815, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.6241234221598878, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 3.772791023842918e-06, |
| "loss": 1.2118, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.6255259467040674, |
| "grad_norm": 0.20703125, |
| "learning_rate": 3.7587657784011223e-06, |
| "loss": 1.2007, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.6269284712482468, |
| "grad_norm": 0.1796875, |
| "learning_rate": 3.7447405329593275e-06, |
| "loss": 1.1749, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.6283309957924264, |
| "grad_norm": 0.166015625, |
| "learning_rate": 3.730715287517532e-06, |
| "loss": 1.0124, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.6297335203366059, |
| "grad_norm": 0.296875, |
| "learning_rate": 3.7166900420757366e-06, |
| "loss": 1.1405, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.6311360448807855, |
| "grad_norm": 0.138671875, |
| "learning_rate": 3.7026647966339414e-06, |
| "loss": 1.2862, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6325385694249649, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 3.688639551192146e-06, |
| "loss": 1.2599, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.6339410939691444, |
| "grad_norm": 0.216796875, |
| "learning_rate": 3.674614305750351e-06, |
| "loss": 1.1407, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.635343618513324, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 3.6605890603085557e-06, |
| "loss": 1.2165, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.6367461430575035, |
| "grad_norm": 0.263671875, |
| "learning_rate": 3.6465638148667605e-06, |
| "loss": 1.1954, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.638148667601683, |
| "grad_norm": 0.251953125, |
| "learning_rate": 3.6325385694249653e-06, |
| "loss": 1.2646, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.6395511921458625, |
| "grad_norm": 0.197265625, |
| "learning_rate": 3.61851332398317e-06, |
| "loss": 1.2184, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.6409537166900421, |
| "grad_norm": 0.22265625, |
| "learning_rate": 3.604488078541375e-06, |
| "loss": 1.121, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.6423562412342216, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 3.5904628330995792e-06, |
| "loss": 1.2237, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.6437587657784011, |
| "grad_norm": 0.203125, |
| "learning_rate": 3.5764375876577844e-06, |
| "loss": 1.2567, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 3.562412342215989e-06, |
| "loss": 1.3652, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6465638148667602, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.548387096774194e-06, |
| "loss": 1.2201, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.6479663394109397, |
| "grad_norm": 0.177734375, |
| "learning_rate": 3.5343618513323984e-06, |
| "loss": 1.1577, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.6493688639551192, |
| "grad_norm": 0.173828125, |
| "learning_rate": 3.5203366058906036e-06, |
| "loss": 1.3246, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.6507713884992987, |
| "grad_norm": 0.15234375, |
| "learning_rate": 3.506311360448808e-06, |
| "loss": 1.1457, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.6521739130434783, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 3.492286115007013e-06, |
| "loss": 1.2854, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.6535764375876578, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 3.4782608695652175e-06, |
| "loss": 1.1192, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.6549789621318373, |
| "grad_norm": 0.171875, |
| "learning_rate": 3.4642356241234227e-06, |
| "loss": 1.3773, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.6563814866760168, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 3.450210378681627e-06, |
| "loss": 1.1959, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.6577840112201964, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 3.4361851332398323e-06, |
| "loss": 1.3236, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.6591865357643759, |
| "grad_norm": 0.1875, |
| "learning_rate": 3.4221598877980366e-06, |
| "loss": 1.2094, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6605890603085554, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 3.408134642356242e-06, |
| "loss": 1.2986, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.6619915848527349, |
| "grad_norm": 0.255859375, |
| "learning_rate": 3.394109396914446e-06, |
| "loss": 1.1477, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.6633941093969145, |
| "grad_norm": 0.158203125, |
| "learning_rate": 3.3800841514726514e-06, |
| "loss": 1.2415, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.664796633941094, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 3.3660589060308558e-06, |
| "loss": 1.1927, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.6661991584852734, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 3.3520336605890606e-06, |
| "loss": 1.1582, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.667601683029453, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 3.3380084151472653e-06, |
| "loss": 1.0954, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.6690042075736325, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 3.32398316970547e-06, |
| "loss": 1.1115, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.6704067321178121, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 3.309957924263675e-06, |
| "loss": 1.1437, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.6718092566619915, |
| "grad_norm": 0.140625, |
| "learning_rate": 3.2959326788218797e-06, |
| "loss": 1.517, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.6732117812061711, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 3.2819074333800845e-06, |
| "loss": 1.168, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6746143057503506, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 3.2678821879382893e-06, |
| "loss": 1.2514, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.6760168302945302, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 3.253856942496494e-06, |
| "loss": 1.1855, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.6774193548387096, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 3.239831697054699e-06, |
| "loss": 1.139, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.6788218793828892, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 3.225806451612903e-06, |
| "loss": 1.2044, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.6802244039270687, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 3.2117812061711084e-06, |
| "loss": 1.2088, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.6816269284712483, |
| "grad_norm": 0.201171875, |
| "learning_rate": 3.1977559607293128e-06, |
| "loss": 1.0997, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.6830294530154277, |
| "grad_norm": 0.234375, |
| "learning_rate": 3.183730715287518e-06, |
| "loss": 1.1307, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.6844319775596073, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 3.1697054698457223e-06, |
| "loss": 1.2425, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.6858345021037868, |
| "grad_norm": 0.17578125, |
| "learning_rate": 3.1556802244039275e-06, |
| "loss": 1.1604, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.6872370266479664, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 3.141654978962132e-06, |
| "loss": 1.1059, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6886395511921458, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 3.127629733520337e-06, |
| "loss": 1.169, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.6900420757363254, |
| "grad_norm": 0.19921875, |
| "learning_rate": 3.1136044880785415e-06, |
| "loss": 1.3918, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.6914446002805049, |
| "grad_norm": 0.26171875, |
| "learning_rate": 3.0995792426367467e-06, |
| "loss": 1.0892, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.6928471248246845, |
| "grad_norm": 0.19140625, |
| "learning_rate": 3.085553997194951e-06, |
| "loss": 1.2275, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.6942496493688639, |
| "grad_norm": 0.142578125, |
| "learning_rate": 3.0715287517531562e-06, |
| "loss": 1.497, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 0.251953125, |
| "learning_rate": 3.0575035063113606e-06, |
| "loss": 1.1735, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.697054698457223, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 3.043478260869566e-06, |
| "loss": 1.1378, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.6984572230014026, |
| "grad_norm": 0.255859375, |
| "learning_rate": 3.02945301542777e-06, |
| "loss": 1.2152, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.699859747545582, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 3.0154277699859754e-06, |
| "loss": 1.3393, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.7012622720897616, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 3.0014025245441797e-06, |
| "loss": 1.1096, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7026647966339411, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.987377279102385e-06, |
| "loss": 1.2146, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.7040673211781207, |
| "grad_norm": 0.146484375, |
| "learning_rate": 2.9733520336605893e-06, |
| "loss": 1.3882, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7054698457223001, |
| "grad_norm": 0.185546875, |
| "learning_rate": 2.959326788218794e-06, |
| "loss": 1.3228, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.7068723702664796, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 2.945301542776999e-06, |
| "loss": 1.1921, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7082748948106592, |
| "grad_norm": 0.16796875, |
| "learning_rate": 2.9312762973352036e-06, |
| "loss": 1.2217, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.7096774193548387, |
| "grad_norm": 0.1181640625, |
| "learning_rate": 2.9172510518934084e-06, |
| "loss": 1.4375, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7110799438990182, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.903225806451613e-06, |
| "loss": 1.1508, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.7124824684431977, |
| "grad_norm": 0.25390625, |
| "learning_rate": 2.889200561009818e-06, |
| "loss": 1.1197, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7138849929873773, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 2.8751753155680228e-06, |
| "loss": 1.391, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.7152875175315568, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 2.8611500701262276e-06, |
| "loss": 1.2597, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7166900420757363, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.8471248246844323e-06, |
| "loss": 1.3079, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.7180925666199158, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 2.8330995792426367e-06, |
| "loss": 1.2022, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.7194950911640954, |
| "grad_norm": 0.205078125, |
| "learning_rate": 2.819074333800842e-06, |
| "loss": 1.094, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.7208976157082749, |
| "grad_norm": 0.21484375, |
| "learning_rate": 2.8050490883590463e-06, |
| "loss": 1.1829, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7223001402524544, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 2.7910238429172515e-06, |
| "loss": 1.2382, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.7237026647966339, |
| "grad_norm": 0.2578125, |
| "learning_rate": 2.776998597475456e-06, |
| "loss": 1.1327, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7251051893408135, |
| "grad_norm": 0.18359375, |
| "learning_rate": 2.762973352033661e-06, |
| "loss": 1.3151, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.726507713884993, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.7489481065918654e-06, |
| "loss": 1.3314, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.7279102384291725, |
| "grad_norm": 0.271484375, |
| "learning_rate": 2.7349228611500706e-06, |
| "loss": 1.2279, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.729312762973352, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 2.720897615708275e-06, |
| "loss": 1.1031, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7307152875175316, |
| "grad_norm": 0.142578125, |
| "learning_rate": 2.70687237026648e-06, |
| "loss": 1.3399, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.7321178120617111, |
| "grad_norm": 0.20703125, |
| "learning_rate": 2.6928471248246845e-06, |
| "loss": 1.0524, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.7335203366058906, |
| "grad_norm": 0.236328125, |
| "learning_rate": 2.6788218793828897e-06, |
| "loss": 1.1273, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.7349228611500701, |
| "grad_norm": 0.1875, |
| "learning_rate": 2.664796633941094e-06, |
| "loss": 1.4101, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.7363253856942497, |
| "grad_norm": 0.25, |
| "learning_rate": 2.6507713884992993e-06, |
| "loss": 0.9936, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.7377279102384292, |
| "grad_norm": 0.224609375, |
| "learning_rate": 2.6367461430575037e-06, |
| "loss": 1.1749, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.7391304347826086, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.622720897615709e-06, |
| "loss": 1.344, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.7405329593267882, |
| "grad_norm": 0.169921875, |
| "learning_rate": 2.6086956521739132e-06, |
| "loss": 1.3, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.7419354838709677, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 2.594670406732118e-06, |
| "loss": 1.1268, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.7433380084151473, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 2.580645161290323e-06, |
| "loss": 1.3111, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7447405329593267, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 2.5666199158485276e-06, |
| "loss": 1.2274, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.7461430575035063, |
| "grad_norm": 0.25390625, |
| "learning_rate": 2.5525946704067324e-06, |
| "loss": 1.1316, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.7475455820476858, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.538569424964937e-06, |
| "loss": 1.2295, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.7489481065918654, |
| "grad_norm": 0.181640625, |
| "learning_rate": 2.524544179523142e-06, |
| "loss": 1.0851, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.7503506311360448, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 2.5105189340813467e-06, |
| "loss": 1.1677, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.7517531556802244, |
| "grad_norm": 0.244140625, |
| "learning_rate": 2.4964936886395515e-06, |
| "loss": 1.2814, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.7531556802244039, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 2.4824684431977563e-06, |
| "loss": 1.1817, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.7545582047685835, |
| "grad_norm": 0.2578125, |
| "learning_rate": 2.468443197755961e-06, |
| "loss": 1.2785, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.7559607293127629, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 2.4544179523141654e-06, |
| "loss": 1.2524, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.7573632538569425, |
| "grad_norm": 0.1953125, |
| "learning_rate": 2.4403927068723702e-06, |
| "loss": 1.2224, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.758765778401122, |
| "grad_norm": 0.240234375, |
| "learning_rate": 2.426367461430575e-06, |
| "loss": 1.2381, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.7601683029453016, |
| "grad_norm": 0.205078125, |
| "learning_rate": 2.4123422159887798e-06, |
| "loss": 1.1549, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.761570827489481, |
| "grad_norm": 0.22265625, |
| "learning_rate": 2.3983169705469846e-06, |
| "loss": 0.9989, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.7629733520336606, |
| "grad_norm": 0.185546875, |
| "learning_rate": 2.3842917251051894e-06, |
| "loss": 1.3427, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.7643758765778401, |
| "grad_norm": 0.2294921875, |
| "learning_rate": 2.370266479663394e-06, |
| "loss": 1.2504, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.7657784011220197, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 2.356241234221599e-06, |
| "loss": 1.1086, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.7671809256661991, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 2.3422159887798037e-06, |
| "loss": 1.4594, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.7685834502103787, |
| "grad_norm": 0.251953125, |
| "learning_rate": 2.3281907433380085e-06, |
| "loss": 1.1633, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.7699859747545582, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 2.3141654978962133e-06, |
| "loss": 1.1294, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.7713884992987378, |
| "grad_norm": 0.236328125, |
| "learning_rate": 2.300140252454418e-06, |
| "loss": 1.3159, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7727910238429172, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.286115007012623e-06, |
| "loss": 1.2132, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.7741935483870968, |
| "grad_norm": 0.177734375, |
| "learning_rate": 2.2720897615708276e-06, |
| "loss": 1.0724, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.7755960729312763, |
| "grad_norm": 0.18359375, |
| "learning_rate": 2.2580645161290324e-06, |
| "loss": 1.2799, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.7769985974754559, |
| "grad_norm": 0.236328125, |
| "learning_rate": 2.244039270687237e-06, |
| "loss": 1.2397, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.7784011220196353, |
| "grad_norm": 0.236328125, |
| "learning_rate": 2.230014025245442e-06, |
| "loss": 1.258, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.7798036465638148, |
| "grad_norm": 0.23046875, |
| "learning_rate": 2.2159887798036468e-06, |
| "loss": 1.0467, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.7812061711079944, |
| "grad_norm": 0.212890625, |
| "learning_rate": 2.2019635343618515e-06, |
| "loss": 1.2859, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "grad_norm": 0.291015625, |
| "learning_rate": 2.1879382889200563e-06, |
| "loss": 1.2433, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.7840112201963534, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 2.173913043478261e-06, |
| "loss": 1.2297, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.7854137447405329, |
| "grad_norm": 0.169921875, |
| "learning_rate": 2.159887798036466e-06, |
| "loss": 1.3596, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7868162692847125, |
| "grad_norm": 0.2431640625, |
| "learning_rate": 2.1458625525946707e-06, |
| "loss": 1.2091, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.788218793828892, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 2.1318373071528755e-06, |
| "loss": 1.176, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.7896213183730715, |
| "grad_norm": 0.208984375, |
| "learning_rate": 2.1178120617110802e-06, |
| "loss": 1.211, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.791023842917251, |
| "grad_norm": 0.220703125, |
| "learning_rate": 2.103786816269285e-06, |
| "loss": 1.1723, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.7924263674614306, |
| "grad_norm": 0.1484375, |
| "learning_rate": 2.08976157082749e-06, |
| "loss": 1.1888, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.7938288920056101, |
| "grad_norm": 0.3359375, |
| "learning_rate": 2.075736325385694e-06, |
| "loss": 1.1476, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.7952314165497896, |
| "grad_norm": 0.2734375, |
| "learning_rate": 2.061711079943899e-06, |
| "loss": 1.1118, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.7966339410939691, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 2.0476858345021037e-06, |
| "loss": 1.1394, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.7980364656381487, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 2.0336605890603085e-06, |
| "loss": 1.2972, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.7994389901823282, |
| "grad_norm": 0.169921875, |
| "learning_rate": 2.0196353436185133e-06, |
| "loss": 1.3216, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8008415147265077, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 2.005610098176718e-06, |
| "loss": 1.2028, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.8022440392706872, |
| "grad_norm": 0.232421875, |
| "learning_rate": 1.991584852734923e-06, |
| "loss": 1.2873, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.8036465638148668, |
| "grad_norm": 0.2431640625, |
| "learning_rate": 1.9775596072931276e-06, |
| "loss": 1.0877, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.8050490883590463, |
| "grad_norm": 0.328125, |
| "learning_rate": 1.9635343618513324e-06, |
| "loss": 1.4381, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 1.9495091164095372e-06, |
| "loss": 1.1963, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.8078541374474053, |
| "grad_norm": 0.28125, |
| "learning_rate": 1.935483870967742e-06, |
| "loss": 1.1772, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.8092566619915849, |
| "grad_norm": 0.177734375, |
| "learning_rate": 1.9214586255259468e-06, |
| "loss": 1.3152, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.8106591865357644, |
| "grad_norm": 0.2265625, |
| "learning_rate": 1.9074333800841516e-06, |
| "loss": 1.2125, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.8120617110799438, |
| "grad_norm": 0.203125, |
| "learning_rate": 1.8934081346423563e-06, |
| "loss": 1.0921, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.8134642356241234, |
| "grad_norm": 0.205078125, |
| "learning_rate": 1.8793828892005611e-06, |
| "loss": 1.118, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.814866760168303, |
| "grad_norm": 0.208984375, |
| "learning_rate": 1.865357643758766e-06, |
| "loss": 1.1351, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.8162692847124825, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 1.8513323983169707e-06, |
| "loss": 1.1744, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8176718092566619, |
| "grad_norm": 0.11328125, |
| "learning_rate": 1.8373071528751755e-06, |
| "loss": 1.3168, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.8190743338008415, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 1.8232819074333803e-06, |
| "loss": 1.1647, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.820476858345021, |
| "grad_norm": 0.25, |
| "learning_rate": 1.809256661991585e-06, |
| "loss": 1.0769, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.8218793828892006, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 1.7952314165497896e-06, |
| "loss": 1.2422, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.82328190743338, |
| "grad_norm": 0.263671875, |
| "learning_rate": 1.7812061711079944e-06, |
| "loss": 1.1405, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.8246844319775596, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 1.7671809256661992e-06, |
| "loss": 1.1655, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.8260869565217391, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 1.753155680224404e-06, |
| "loss": 1.3527, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.8274894810659187, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 1.7391304347826088e-06, |
| "loss": 1.2396, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8288920056100981, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 1.7251051893408135e-06, |
| "loss": 1.4179, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.8302945301542777, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 1.7110799438990183e-06, |
| "loss": 1.1096, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.8316970546984572, |
| "grad_norm": 0.15234375, |
| "learning_rate": 1.697054698457223e-06, |
| "loss": 1.2698, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.8330995792426368, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 1.6830294530154279e-06, |
| "loss": 1.3, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.8345021037868162, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 1.6690042075736327e-06, |
| "loss": 1.1469, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.8359046283309958, |
| "grad_norm": 0.21875, |
| "learning_rate": 1.6549789621318375e-06, |
| "loss": 1.1219, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.8373071528751753, |
| "grad_norm": 0.224609375, |
| "learning_rate": 1.6409537166900422e-06, |
| "loss": 1.1502, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.8387096774193549, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 1.626928471248247e-06, |
| "loss": 1.3427, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.8401122019635343, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 1.6129032258064516e-06, |
| "loss": 1.2831, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.8415147265077139, |
| "grad_norm": 0.197265625, |
| "learning_rate": 1.5988779803646564e-06, |
| "loss": 1.1839, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8429172510518934, |
| "grad_norm": 0.17578125, |
| "learning_rate": 1.5848527349228612e-06, |
| "loss": 1.204, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.844319775596073, |
| "grad_norm": 0.244140625, |
| "learning_rate": 1.570827489481066e-06, |
| "loss": 1.0692, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.8457223001402524, |
| "grad_norm": 0.185546875, |
| "learning_rate": 1.5568022440392707e-06, |
| "loss": 1.2152, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.847124824684432, |
| "grad_norm": 0.22265625, |
| "learning_rate": 1.5427769985974755e-06, |
| "loss": 1.2749, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.8485273492286115, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 1.5287517531556803e-06, |
| "loss": 1.4086, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.8499298737727911, |
| "grad_norm": 0.267578125, |
| "learning_rate": 1.514726507713885e-06, |
| "loss": 1.2094, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.8513323983169705, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 1.5007012622720899e-06, |
| "loss": 1.1288, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.85273492286115, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 1.4866760168302946e-06, |
| "loss": 1.307, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.8541374474053296, |
| "grad_norm": 0.19140625, |
| "learning_rate": 1.4726507713884994e-06, |
| "loss": 1.1327, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.8555399719495091, |
| "grad_norm": 0.208984375, |
| "learning_rate": 1.4586255259467042e-06, |
| "loss": 1.2754, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8569424964936886, |
| "grad_norm": 0.216796875, |
| "learning_rate": 1.444600280504909e-06, |
| "loss": 1.2639, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.8583450210378681, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 1.4305750350631138e-06, |
| "loss": 1.2885, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.8597475455820477, |
| "grad_norm": 0.19921875, |
| "learning_rate": 1.4165497896213184e-06, |
| "loss": 1.2376, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.8611500701262272, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 1.4025245441795231e-06, |
| "loss": 1.1711, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.8625525946704067, |
| "grad_norm": 0.205078125, |
| "learning_rate": 1.388499298737728e-06, |
| "loss": 1.194, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.8639551192145862, |
| "grad_norm": 0.162109375, |
| "learning_rate": 1.3744740532959327e-06, |
| "loss": 1.1591, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.8653576437587658, |
| "grad_norm": 0.1796875, |
| "learning_rate": 1.3604488078541375e-06, |
| "loss": 1.1594, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.8667601683029453, |
| "grad_norm": 0.267578125, |
| "learning_rate": 1.3464235624123423e-06, |
| "loss": 1.3947, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.8681626928471248, |
| "grad_norm": 0.259765625, |
| "learning_rate": 1.332398316970547e-06, |
| "loss": 1.1734, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.119140625, |
| "learning_rate": 1.3183730715287518e-06, |
| "loss": 1.3298, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8709677419354839, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 1.3043478260869566e-06, |
| "loss": 1.1371, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.8723702664796634, |
| "grad_norm": 0.13671875, |
| "learning_rate": 1.2903225806451614e-06, |
| "loss": 1.1397, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.8737727910238429, |
| "grad_norm": 0.244140625, |
| "learning_rate": 1.2762973352033662e-06, |
| "loss": 1.0592, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.8751753155680224, |
| "grad_norm": 0.162109375, |
| "learning_rate": 1.262272089761571e-06, |
| "loss": 1.1607, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.876577840112202, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 1.2482468443197758e-06, |
| "loss": 1.1171, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.8779803646563815, |
| "grad_norm": 0.310546875, |
| "learning_rate": 1.2342215988779805e-06, |
| "loss": 1.1599, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.879382889200561, |
| "grad_norm": 0.2109375, |
| "learning_rate": 1.2201963534361851e-06, |
| "loss": 1.1901, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.8807854137447405, |
| "grad_norm": 0.30859375, |
| "learning_rate": 1.2061711079943899e-06, |
| "loss": 1.5766, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.8821879382889201, |
| "grad_norm": 0.27734375, |
| "learning_rate": 1.1921458625525947e-06, |
| "loss": 1.1917, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.8835904628330996, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 1.1781206171107995e-06, |
| "loss": 1.6177, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.884992987377279, |
| "grad_norm": 0.189453125, |
| "learning_rate": 1.1640953716690042e-06, |
| "loss": 1.3351, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.8863955119214586, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 1.150070126227209e-06, |
| "loss": 1.1969, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.8877980364656382, |
| "grad_norm": 0.171875, |
| "learning_rate": 1.1360448807854138e-06, |
| "loss": 1.2267, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.8892005610098177, |
| "grad_norm": 0.1640625, |
| "learning_rate": 1.1220196353436186e-06, |
| "loss": 1.2762, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.8906030855539971, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.1079943899018234e-06, |
| "loss": 1.4256, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.8920056100981767, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 1.0939691444600282e-06, |
| "loss": 1.1813, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.8934081346423562, |
| "grad_norm": 0.154296875, |
| "learning_rate": 1.079943899018233e-06, |
| "loss": 1.3887, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.8948106591865358, |
| "grad_norm": 0.19140625, |
| "learning_rate": 1.0659186535764377e-06, |
| "loss": 1.2293, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.8962131837307152, |
| "grad_norm": 0.14453125, |
| "learning_rate": 1.0518934081346425e-06, |
| "loss": 1.2268, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.8976157082748948, |
| "grad_norm": 0.234375, |
| "learning_rate": 1.037868162692847e-06, |
| "loss": 1.2701, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8990182328190743, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 1.0238429172510519e-06, |
| "loss": 1.2166, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.9004207573632539, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.0098176718092566e-06, |
| "loss": 1.195, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.9018232819074333, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 9.957924263674614e-07, |
| "loss": 1.2161, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.9032258064516129, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 9.817671809256662e-07, |
| "loss": 1.1612, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.9046283309957924, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 9.67741935483871e-07, |
| "loss": 1.1929, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.906030855539972, |
| "grad_norm": 0.25390625, |
| "learning_rate": 9.537166900420758e-07, |
| "loss": 1.2325, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.9074333800841514, |
| "grad_norm": 0.189453125, |
| "learning_rate": 9.396914446002806e-07, |
| "loss": 1.2358, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.908835904628331, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 9.256661991584853e-07, |
| "loss": 1.3516, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.9102384291725105, |
| "grad_norm": 0.203125, |
| "learning_rate": 9.116409537166901e-07, |
| "loss": 1.3763, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.9116409537166901, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 8.976157082748948e-07, |
| "loss": 1.3796, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9130434782608695, |
| "grad_norm": 0.2431640625, |
| "learning_rate": 8.835904628330996e-07, |
| "loss": 1.1251, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.9144460028050491, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 8.695652173913044e-07, |
| "loss": 1.2083, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.9158485273492286, |
| "grad_norm": 0.162109375, |
| "learning_rate": 8.555399719495092e-07, |
| "loss": 1.2821, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.9172510518934082, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 8.415147265077139e-07, |
| "loss": 1.2481, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.9186535764375876, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 8.274894810659187e-07, |
| "loss": 1.1164, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.9200561009817672, |
| "grad_norm": 0.27734375, |
| "learning_rate": 8.134642356241235e-07, |
| "loss": 1.2441, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.9214586255259467, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 7.994389901823282e-07, |
| "loss": 1.1084, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.9228611500701263, |
| "grad_norm": 0.22265625, |
| "learning_rate": 7.85413744740533e-07, |
| "loss": 1.1713, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.9242636746143057, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 7.713884992987378e-07, |
| "loss": 1.377, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.9256661991584852, |
| "grad_norm": 0.162109375, |
| "learning_rate": 7.573632538569425e-07, |
| "loss": 1.2492, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9270687237026648, |
| "grad_norm": 0.271484375, |
| "learning_rate": 7.433380084151473e-07, |
| "loss": 1.1586, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.9284712482468443, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 7.293127629733521e-07, |
| "loss": 1.2767, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.9298737727910238, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 7.152875175315569e-07, |
| "loss": 1.1998, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.9312762973352033, |
| "grad_norm": 0.251953125, |
| "learning_rate": 7.012622720897616e-07, |
| "loss": 1.1143, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.9326788218793829, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 6.872370266479664e-07, |
| "loss": 1.2119, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.9340813464235624, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 6.732117812061711e-07, |
| "loss": 1.1035, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.9354838709677419, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 6.591865357643759e-07, |
| "loss": 1.3081, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.9368863955119214, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 6.451612903225807e-07, |
| "loss": 1.2938, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.938288920056101, |
| "grad_norm": 0.25, |
| "learning_rate": 6.311360448807855e-07, |
| "loss": 1.2476, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.9396914446002805, |
| "grad_norm": 0.1328125, |
| "learning_rate": 6.171107994389903e-07, |
| "loss": 1.3178, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.94109396914446, |
| "grad_norm": 0.30078125, |
| "learning_rate": 6.030855539971949e-07, |
| "loss": 1.2007, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.9424964936886395, |
| "grad_norm": 0.212890625, |
| "learning_rate": 5.890603085553997e-07, |
| "loss": 1.1024, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.9438990182328191, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 5.750350631136045e-07, |
| "loss": 1.2341, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.9453015427769986, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 5.610098176718093e-07, |
| "loss": 1.1421, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.9467040673211781, |
| "grad_norm": 0.259765625, |
| "learning_rate": 5.469845722300141e-07, |
| "loss": 1.1908, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.9481065918653576, |
| "grad_norm": 0.193359375, |
| "learning_rate": 5.329593267882189e-07, |
| "loss": 1.0408, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.9495091164095372, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 5.189340813464235e-07, |
| "loss": 1.1853, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.9509116409537167, |
| "grad_norm": 0.146484375, |
| "learning_rate": 5.049088359046283e-07, |
| "loss": 1.2057, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.9523141654978962, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 4.908835904628331e-07, |
| "loss": 1.0716, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.9537166900420757, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 4.768583450210379e-07, |
| "loss": 1.2057, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9551192145862553, |
| "grad_norm": 0.208984375, |
| "learning_rate": 4.628330995792427e-07, |
| "loss": 1.3381, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 4.488078541374474e-07, |
| "loss": 1.209, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.9579242636746143, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 4.347826086956522e-07, |
| "loss": 1.2604, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.9593267882187938, |
| "grad_norm": 0.232421875, |
| "learning_rate": 4.2075736325385697e-07, |
| "loss": 1.099, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.9607293127629734, |
| "grad_norm": 0.23828125, |
| "learning_rate": 4.0673211781206176e-07, |
| "loss": 1.1337, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.9621318373071529, |
| "grad_norm": 0.181640625, |
| "learning_rate": 3.927068723702665e-07, |
| "loss": 1.1812, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.9635343618513323, |
| "grad_norm": 0.1875, |
| "learning_rate": 3.7868162692847127e-07, |
| "loss": 1.2327, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.9649368863955119, |
| "grad_norm": 0.16015625, |
| "learning_rate": 3.6465638148667605e-07, |
| "loss": 1.1862, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.9663394109396914, |
| "grad_norm": 0.19140625, |
| "learning_rate": 3.506311360448808e-07, |
| "loss": 1.2904, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.967741935483871, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 3.3660589060308557e-07, |
| "loss": 1.2837, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.9691444600280504, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 3.2258064516129035e-07, |
| "loss": 1.0958, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.97054698457223, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 3.0855539971949513e-07, |
| "loss": 1.3348, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.9719495091164095, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 2.9453015427769986e-07, |
| "loss": 1.2333, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.9733520336605891, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.8050490883590465e-07, |
| "loss": 1.1464, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.9747545582047685, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 2.6647966339410943e-07, |
| "loss": 1.29, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.9761570827489481, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 2.5245441795231416e-07, |
| "loss": 1.2551, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.9775596072931276, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 2.3842917251051895e-07, |
| "loss": 1.206, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.9789621318373072, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 2.244039270687237e-07, |
| "loss": 1.1592, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.9803646563814866, |
| "grad_norm": 0.19921875, |
| "learning_rate": 2.1037868162692849e-07, |
| "loss": 1.1878, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.9817671809256662, |
| "grad_norm": 0.259765625, |
| "learning_rate": 1.9635343618513324e-07, |
| "loss": 0.9784, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9831697054698457, |
| "grad_norm": 0.18359375, |
| "learning_rate": 1.8232819074333803e-07, |
| "loss": 1.1988, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.9845722300140253, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 1.6830294530154278e-07, |
| "loss": 1.0429, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.9859747545582047, |
| "grad_norm": 0.27734375, |
| "learning_rate": 1.5427769985974757e-07, |
| "loss": 1.4367, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.9873772791023843, |
| "grad_norm": 0.203125, |
| "learning_rate": 1.4025245441795232e-07, |
| "loss": 1.2093, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.9887798036465638, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.2622720897615708e-07, |
| "loss": 1.1985, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.9901823281907434, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 1.1220196353436185e-07, |
| "loss": 1.2731, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.9915848527349228, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 9.817671809256662e-08, |
| "loss": 1.3363, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.9929873772791024, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 8.415147265077139e-08, |
| "loss": 1.2392, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.9943899018232819, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 7.012622720897616e-08, |
| "loss": 1.19, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.9957924263674615, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 5.6100981767180926e-08, |
| "loss": 1.3113, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.9971949509116409, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 4.2075736325385696e-08, |
| "loss": 1.0762, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.9985974754558204, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 2.8050490883590463e-08, |
| "loss": 1.2402, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.30859375, |
| "learning_rate": 1.4025245441795231e-08, |
| "loss": 1.3883, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.2593402862548828, |
| "eval_runtime": 12.6935, |
| "eval_samples_per_second": 2.836, |
| "eval_steps_per_second": 0.394, |
| "step": 713 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 713, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.4643205711485993e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|