{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.470623681831877, "eval_steps": 200000, "global_step": 8200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015062509414068384, "eta": "00:00:00", "grad_norm": 49.0, "loss": 1.6921, "lr": "0.000e+00", "step": 1, "steps": "0s,1/33195" }, { "epoch": 0.0003012501882813677, "eta": "218:58:53", "grad_norm": 42.25, "loss": 1.2516, "lr": "2.500e-07", "step": 2, "steps": "23.75s,2/33195" }, { "epoch": 0.00045187528242205153, "eta": "227:32:58", "grad_norm": 37.75, "loss": 1.2419, "lr": "5.000e-07", "step": 3, "steps": "24.68s,3/33195" }, { "epoch": 0.0006025003765627354, "eta": "231:08:18", "grad_norm": 43.5, "loss": 1.2568, "lr": "7.500e-07", "step": 4, "steps": "25.07s,4/33195" }, { "epoch": 0.0007531254707034192, "eta": "220:31:44", "grad_norm": 46.5, "loss": 1.6534, "lr": "1.000e-06", "step": 5, "steps": "23.92s,5/33195" }, { "epoch": 0.0009037505648441031, "eta": "216:44:33", "grad_norm": 85.5, "loss": 1.6259, "lr": "1.250e-06", "step": 6, "steps": "23.51s,6/33195" }, { "epoch": 0.0010543756589847869, "eta": "219:02:26", "grad_norm": 58.75, "loss": 1.3652, "lr": "1.500e-06", "step": 7, "steps": "23.76s,7/33195" }, { "epoch": 0.0012050007531254707, "eta": "219:29:42", "grad_norm": 100.0, "loss": 1.2489, "lr": "1.750e-06", "step": 8, "steps": "23.81s,8/33195" }, { "epoch": 0.0013556258472661546, "eta": "227:52:37", "grad_norm": 7.375, "loss": 0.9945, "lr": "2.000e-06", "step": 9, "steps": "24.72s,9/33195" }, { "epoch": 0.0015062509414068383, "eta": "222:09:18", "grad_norm": 150.0, "loss": 1.2158, "lr": "2.250e-06", "step": 10, "steps": "24.1s,10/33195" }, { "epoch": 0.0016568760355475222, "eta": "220:07:13", "grad_norm": 140.0, "loss": 1.0047, "lr": "2.500e-06", "step": 11, "steps": "23.88s,11/33195" }, { "epoch": 0.0018075011296882061, "eta": "219:44:42", "grad_norm": 131.0, "loss": 0.6162, "lr": "2.750e-06", "step": 12, "steps": "23.84s,12/33195" }, { "epoch": 0.00195812622382889, "eta": "230:20:18", "grad_norm": 91.5, "loss": 0.8811, "lr": "3.000e-06", "step": 13, "steps": "24.99s,13/33195" }, { "epoch": 0.0021087513179695737, "eta": "219:27:19", "grad_norm": 98.5, "loss": 0.6211, "lr": "3.250e-06", "step": 14, "steps": "23.81s,14/33195" }, { "epoch": 0.0022593764121102574, "eta": "214:39:22", "grad_norm": 49.25, "loss": 0.4933, "lr": "3.500e-06", "step": 15, "steps": "23.29s,15/33195" }, { "epoch": 0.0024100015062509415, "eta": "219:09:56", "grad_norm": 45.0, "loss": 0.4818, "lr": "3.750e-06", "step": 16, "steps": "23.78s,16/33195" }, { "epoch": 0.002560626600391625, "eta": "215:39:25", "grad_norm": 29.25, "loss": 0.3859, "lr": "4.000e-06", "step": 17, "steps": "23.4s,17/33195" }, { "epoch": 0.0027112516945323093, "eta": "217:29:37", "grad_norm": 30.0, "loss": 0.85, "lr": "4.250e-06", "step": 18, "steps": "23.6s,18/33195" }, { "epoch": 0.002861876788672993, "eta": "220:59:20", "grad_norm": 14.5, "loss": 0.3087, "lr": "4.500e-06", "step": 19, "steps": "23.98s,19/33195" }, { "epoch": 0.0030125018828136767, "eta": "220:42:21", "grad_norm": 28.75, "loss": 0.2943, "lr": "4.750e-06", "step": 20, "steps": "23.95s,20/33195" }, { "epoch": 0.0031631269769543608, "eta": "217:17:22", "grad_norm": 50.25, "loss": 0.4143, "lr": "5.000e-06", "step": 21, "steps": "23.58s,21/33195" }, { "epoch": 0.0033137520710950445, "eta": "218:50:58", "grad_norm": 37.75, "loss": 0.251, "lr": "5.250e-06", "step": 22, "steps": "23.75s,22/33195" }, { "epoch": 0.003464377165235728, "eta": "215:20:29", "grad_norm": 21.25, "loss": 0.4319, "lr": "5.500e-06", "step": 23, "steps": "23.37s,23/33195" }, { "epoch": 0.0036150022593764122, "eta": "218:50:11", "grad_norm": 10.5, "loss": 0.6086, "lr": "5.750e-06", "step": 24, "steps": "23.75s,24/33195" }, { "epoch": 0.003765627353517096, "eta": "219:06:22", "grad_norm": 14.625, "loss": 0.3387, "lr": "6.000e-06", "step": 25, "steps": "23.78s,25/33195" }, { "epoch": 0.00391625244765778, "eta": "217:04:21", "grad_norm": 5.1875, "loss": 0.8982, "lr": "6.250e-06", "step": 26, "steps": "23.56s,26/33195" }, { "epoch": 0.004066877541798464, "eta": "217:15:01", "grad_norm": 12.8125, "loss": 0.1689, "lr": "6.500e-06", "step": 27, "steps": "23.58s,27/33195" }, { "epoch": 0.004217502635939147, "eta": "218:48:36", "grad_norm": 3.5, "loss": 0.5473, "lr": "6.750e-06", "step": 28, "steps": "23.75s,28/33195" }, { "epoch": 0.004368127730079831, "eta": "220:38:45", "grad_norm": 12.3125, "loss": 0.6251, "lr": "7.000e-06", "step": 29, "steps": "23.95s,29/33195" }, { "epoch": 0.004518752824220515, "eta": "220:43:53", "grad_norm": 1.7578, "loss": 1.0038, "lr": "7.250e-06", "step": 30, "steps": "23.96s,30/33195" }, { "epoch": 0.004669377918361199, "eta": "216:34:45", "grad_norm": 14.3125, "loss": 0.1236, "lr": "7.500e-06", "step": 31, "steps": "23.51s,31/33195" }, { "epoch": 0.004820003012501883, "eta": "221:10:43", "grad_norm": 13.625, "loss": 0.3094, "lr": "7.750e-06", "step": 32, "steps": "24.01s,32/33195" }, { "epoch": 0.004970628106642567, "eta": "217:40:18", "grad_norm": 10.3125, "loss": 0.1342, "lr": "8.000e-06", "step": 33, "steps": "23.63s,33/33195" }, { "epoch": 0.00512125320078325, "eta": "216:55:41", "grad_norm": 6.625, "loss": 0.3698, "lr": "8.250e-06", "step": 34, "steps": "23.55s,34/33195" }, { "epoch": 0.005271878294923934, "eta": "219:07:56", "grad_norm": 6.625, "loss": 0.2032, "lr": "8.500e-06", "step": 35, "steps": "23.79s,35/33195" }, { "epoch": 0.005422503389064619, "eta": "218:34:23", "grad_norm": 19.25, "loss": 0.2609, "lr": "8.750e-06", "step": 36, "steps": "23.73s,36/33195" }, { "epoch": 0.005573128483205302, "eta": "215:53:43", "grad_norm": 7.6875, "loss": 0.4572, "lr": "9.000e-06", "step": 37, "steps": "23.44s,37/33195" }, { "epoch": 0.005723753577345986, "eta": "219:01:13", "grad_norm": 12.3125, "loss": 0.1252, "lr": "9.250e-06", "step": 38, "steps": "23.78s,38/33195" }, { "epoch": 0.00587437867148667, "eta": "218:33:11", "grad_norm": 26.5, "loss": 0.1414, "lr": "9.500e-06", "step": 39, "steps": "23.73s,39/33195" }, { "epoch": 0.006025003765627353, "eta": "220:34:22", "grad_norm": 14.5, "loss": 0.3101, "lr": "9.750e-06", "step": 40, "steps": "23.95s,40/33195" }, { "epoch": 0.006175628859768037, "eta": "216:30:50", "grad_norm": 21.625, "loss": 0.1334, "lr": "1.000e-05", "step": 41, "steps": "23.51s,41/33195" }, { "epoch": 0.0063262539539087215, "eta": "220:44:37", "grad_norm": 2.2031, "loss": 0.3239, "lr": "1.025e-05", "step": 42, "steps": "23.97s,42/33195" }, { "epoch": 0.006476879048049405, "eta": "217:52:56", "grad_norm": 20.625, "loss": 0.4932, "lr": "1.050e-05", "step": 43, "steps": "23.66s,43/33195" }, { "epoch": 0.006627504142190089, "eta": "221:11:27", "grad_norm": 17.5, "loss": 0.525, "lr": "1.075e-05", "step": 44, "steps": "24.02s,44/33195" }, { "epoch": 0.006778129236330773, "eta": "215:34:01", "grad_norm": 25.125, "loss": 0.353, "lr": "1.100e-05", "step": 45, "steps": "23.41s,45/33195" }, { "epoch": 0.006928754330471456, "eta": "215:06:00", "grad_norm": 7.25, "loss": 0.1477, "lr": "1.125e-05", "step": 46, "steps": "23.36s,46/33195" }, { "epoch": 0.007079379424612141, "eta": "219:30:48", "grad_norm": 14.75, "loss": 0.236, "lr": "1.150e-05", "step": 47, "steps": "23.84s,47/33195" }, { "epoch": 0.0072300045187528245, "eta": "218:51:44", "grad_norm": 13.375, "loss": 0.2785, "lr": "1.175e-05", "step": 48, "steps": "23.77s,48/33195" }, { "epoch": 0.007380629612893508, "eta": "218:34:46", "grad_norm": 19.875, "loss": 0.1387, "lr": "1.200e-05", "step": 49, "steps": "23.74s,49/33195" }, { "epoch": 0.007531254707034192, "eta": "215:43:07", "grad_norm": 8.3125, "loss": 0.1126, "lr": "1.225e-05", "step": 50, "steps": "23.43s,50/33195" }, { "epoch": 0.0076818798011748755, "eta": "215:42:43", "grad_norm": 11.6875, "loss": 0.1074, "lr": "1.250e-05", "step": 51, "steps": "23.43s,51/33195" }, { "epoch": 0.00783250489531556, "eta": "220:07:29", "grad_norm": 14.9375, "loss": 0.1463, "lr": "1.275e-05", "step": 52, "steps": "23.91s,52/33195" }, { "epoch": 0.007983129989456243, "eta": "220:51:16", "grad_norm": 10.25, "loss": 0.1098, "lr": "1.300e-05", "step": 53, "steps": "23.99s,53/33195" }, { "epoch": 0.008133755083596927, "eta": "217:26:30", "grad_norm": 1.4297, "loss": 0.146, "lr": "1.325e-05", "step": 54, "steps": "23.62s,54/33195" }, { "epoch": 0.00828438017773761, "eta": "219:27:37", "grad_norm": 14.4375, "loss": 0.1743, "lr": "1.350e-05", "step": 55, "steps": "23.84s,55/33195" }, { "epoch": 0.008435005271878295, "eta": "219:27:13", "grad_norm": 21.0, "loss": 0.0931, "lr": "1.375e-05", "step": 56, "steps": "23.84s,56/33195" }, { "epoch": 0.00858563036601898, "eta": "218:59:13", "grad_norm": 8.5, "loss": 0.0902, "lr": "1.400e-05", "step": 57, "steps": "23.79s,57/33195" }, { "epoch": 0.008736255460159662, "eta": "218:53:17", "grad_norm": 7.2812, "loss": 0.412, "lr": "1.425e-05", "step": 58, "steps": "23.78s,58/33195" }, { "epoch": 0.008886880554300347, "eta": "220:48:52", "grad_norm": 1.8281, "loss": 0.4085, "lr": "1.450e-05", "step": 59, "steps": "23.99s,59/33195" }, { "epoch": 0.00903750564844103, "eta": "216:17:52", "grad_norm": 7.5, "loss": 0.2458, "lr": "1.475e-05", "step": 60, "steps": "23.5s,60/33195" }, { "epoch": 0.009188130742581714, "eta": "215:11:12", "grad_norm": 3.3906, "loss": 0.2086, "lr": "1.500e-05", "step": 61, "steps": "23.38s,61/33195" }, { "epoch": 0.009338755836722399, "eta": "221:15:17", "grad_norm": 7.6875, "loss": 0.2016, "lr": "1.525e-05", "step": 62, "steps": "24.04s,62/33195" }, { "epoch": 0.009489380930863081, "eta": "217:22:57", "grad_norm": 2.8281, "loss": 0.0769, "lr": "1.550e-05", "step": 63, "steps": "23.62s,63/33195" }, { "epoch": 0.009640006025003766, "eta": "214:31:23", "grad_norm": 1.8359, "loss": 0.2661, "lr": "1.575e-05", "step": 64, "steps": "23.31s,64/33195" }, { "epoch": 0.009790631119144449, "eta": "219:18:07", "grad_norm": 3.6719, "loss": 0.5058, "lr": "1.600e-05", "step": 65, "steps": "23.83s,65/33195" }, { "epoch": 0.009941256213285133, "eta": "216:32:05", "grad_norm": 7.8125, "loss": 0.1735, "lr": "1.625e-05", "step": 66, "steps": "23.53s,66/33195" }, { "epoch": 0.010091881307425818, "eta": "219:06:17", "grad_norm": 3.8594, "loss": 0.2579, "lr": "1.650e-05", "step": 67, "steps": "23.81s,67/33195" }, { "epoch": 0.0102425064015665, "eta": "217:48:36", "grad_norm": 2.6562, "loss": 0.3493, "lr": "1.675e-05", "step": 68, "steps": "23.67s,68/33195" }, { "epoch": 0.010393131495707185, "eta": "219:05:30", "grad_norm": 4.5312, "loss": 0.2017, "lr": "1.700e-05", "step": 69, "steps": "23.81s,69/33195" }, { "epoch": 0.010543756589847868, "eta": "215:40:50", "grad_norm": 1.7266, "loss": 0.2137, "lr": "1.725e-05", "step": 70, "steps": "23.44s,70/33195" }, { "epoch": 0.010694381683988553, "eta": "220:05:26", "grad_norm": 4.9375, "loss": 0.2462, "lr": "1.750e-05", "step": 71, "steps": "23.92s,71/33195" }, { "epoch": 0.010845006778129237, "eta": "216:46:17", "grad_norm": 1.9219, "loss": 0.1995, "lr": "1.775e-05", "step": 72, "steps": "23.56s,72/33195" }, { "epoch": 0.01099563187226992, "eta": "220:04:38", "grad_norm": 2.5781, "loss": 0.2049, "lr": "1.800e-05", "step": 73, "steps": "23.92s,73/33195" }, { "epoch": 0.011146256966410605, "eta": "220:20:47", "grad_norm": 2.375, "loss": 0.311, "lr": "1.825e-05", "step": 74, "steps": "23.95s,74/33195" }, { "epoch": 0.011296882060551287, "eta": "220:31:26", "grad_norm": 3.7344, "loss": 0.1143, "lr": "1.850e-05", "step": 75, "steps": "23.97s,75/33195" }, { "epoch": 0.011447507154691972, "eta": "217:45:26", "grad_norm": 3.9844, "loss": 0.0905, "lr": "1.875e-05", "step": 76, "steps": "23.67s,76/33195" }, { "epoch": 0.011598132248832655, "eta": "216:38:48", "grad_norm": 1.2422, "loss": 0.235, "lr": "1.900e-05", "step": 77, "steps": "23.55s,77/33195" }, { "epoch": 0.01174875734297334, "eta": "216:05:18", "grad_norm": 3.5, "loss": 0.1071, "lr": "1.925e-05", "step": 78, "steps": "23.49s,78/33195" }, { "epoch": 0.011899382437114024, "eta": "216:38:01", "grad_norm": 2.2969, "loss": 0.2072, "lr": "1.950e-05", "step": 79, "steps": "23.55s,79/33195" }, { "epoch": 0.012050007531254707, "eta": "214:47:15", "grad_norm": 3.9062, "loss": 0.1957, "lr": "1.975e-05", "step": 80, "steps": "23.35s,80/33195" }, { "epoch": 0.012200632625395391, "eta": "218:05:32", "grad_norm": 2.6406, "loss": 0.2408, "lr": "2.000e-05", "step": 81, "steps": "23.71s,81/33195" }, { "epoch": 0.012351257719536074, "eta": "216:25:48", "grad_norm": 4.375, "loss": 0.3332, "lr": "2.025e-05", "step": 82, "steps": "23.53s,82/33195" }, { "epoch": 0.012501882813676759, "eta": "216:14:23", "grad_norm": 1.5469, "loss": 0.0741, "lr": "2.050e-05", "step": 83, "steps": "23.51s,83/33195" }, { "epoch": 0.012652507907817443, "eta": "218:54:01", "grad_norm": 14.375, "loss": 0.1782, "lr": "2.075e-05", "step": 84, "steps": "23.8s,84/33195" }, { "epoch": 0.012803133001958126, "eta": "217:08:47", "grad_norm": 25.0, "loss": 0.1311, "lr": "2.100e-05", "step": 85, "steps": "23.61s,85/33195" }, { "epoch": 0.01295375809609881, "eta": "214:00:46", "grad_norm": 6.9375, "loss": 0.1471, "lr": "2.125e-05", "step": 86, "steps": "23.27s,86/33195" }, { "epoch": 0.013104383190239493, "eta": "217:13:30", "grad_norm": 8.5625, "loss": 0.1742, "lr": "2.150e-05", "step": 87, "steps": "23.62s,87/33195" }, { "epoch": 0.013255008284380178, "eta": "221:04:52", "grad_norm": 8.1875, "loss": 0.1598, "lr": "2.175e-05", "step": 88, "steps": "24.04s,88/33195" }, { "epoch": 0.013405633378520862, "eta": "218:24:27", "grad_norm": 3.0, "loss": 0.2966, "lr": "2.200e-05", "step": 89, "steps": "23.75s,89/33195" }, { "epoch": 0.013556258472661545, "eta": "216:22:40", "grad_norm": 1.0781, "loss": 0.2715, "lr": "2.225e-05", "step": 90, "steps": "23.53s,90/33195" }, { "epoch": 0.01370688356680223, "eta": "216:16:46", "grad_norm": 3.2656, "loss": 0.2677, "lr": "2.250e-05", "step": 91, "steps": "23.52s,91/33195" }, { "epoch": 0.013857508660942913, "eta": "217:28:05", "grad_norm": 1.0312, "loss": 0.0763, "lr": "2.275e-05", "step": 92, "steps": "23.65s,92/33195" }, { "epoch": 0.014008133755083597, "eta": "214:58:44", "grad_norm": 5.75, "loss": 0.3143, "lr": "2.300e-05", "step": 93, "steps": "23.38s,93/33195" }, { "epoch": 0.014158758849224282, "eta": "218:05:55", "grad_norm": 10.0625, "loss": 0.2065, "lr": "2.325e-05", "step": 94, "steps": "23.72s,94/33195" }, { "epoch": 0.014309383943364964, "eta": "216:15:12", "grad_norm": 1.8281, "loss": 0.2495, "lr": "2.350e-05", "step": 95, "steps": "23.52s,95/33195" }, { "epoch": 0.014460009037505649, "eta": "216:58:56", "grad_norm": 12.5625, "loss": 0.0916, "lr": "2.375e-05", "step": 96, "steps": "23.6s,96/33195" }, { "epoch": 0.014610634131646332, "eta": "222:46:04", "grad_norm": 5.7812, "loss": 0.3356, "lr": "2.400e-05", "step": 97, "steps": "24.23s,97/33195" }, { "epoch": 0.014761259225787016, "eta": "216:19:32", "grad_norm": 5.8125, "loss": 0.0854, "lr": "2.425e-05", "step": 98, "steps": "23.53s,98/33195" }, { "epoch": 0.0149118843199277, "eta": "216:13:37", "grad_norm": 9.25, "loss": 0.2616, "lr": "2.450e-05", "step": 99, "steps": "23.52s,99/33195" }, { "epoch": 0.015062509414068384, "eta": "219:59:23", "grad_norm": 12.0, "loss": 0.0782, "lr": "2.475e-05", "step": 100, "steps": "23.93s,100/33195" }, { "epoch": 0.015213134508209068, "eta": "219:42:26", "grad_norm": 1.3672, "loss": 0.271, "lr": "2.500e-05", "step": 101, "steps": "23.9s,101/33195" }, { "epoch": 0.015363759602349751, "eta": "219:31:00", "grad_norm": 12.875, "loss": 0.1595, "lr": "2.525e-05", "step": 102, "steps": "23.88s,102/33195" }, { "epoch": 0.015514384696490436, "eta": "216:12:03", "grad_norm": 9.6875, "loss": 0.2084, "lr": "2.550e-05", "step": 103, "steps": "23.52s,103/33195" }, { "epoch": 0.01566500979063112, "eta": "218:18:31", "grad_norm": 6.125, "loss": 0.0841, "lr": "2.575e-05", "step": 104, "steps": "23.75s,104/33195" }, { "epoch": 0.015815634884771803, "eta": "219:24:18", "grad_norm": 15.1875, "loss": 0.1496, "lr": "2.600e-05", "step": 105, "steps": "23.87s,105/33195" }, { "epoch": 0.015966259978912486, "eta": "215:48:49", "grad_norm": 9.5625, "loss": 0.1361, "lr": "2.625e-05", "step": 106, "steps": "23.48s,106/33195" }, { "epoch": 0.016116885073053172, "eta": "215:53:57", "grad_norm": 5.6562, "loss": 0.0795, "lr": "2.650e-05", "step": 107, "steps": "23.49s,107/33195" }, { "epoch": 0.016267510167193855, "eta": "217:43:51", "grad_norm": 0.1426, "loss": 0.3291, "lr": "2.675e-05", "step": 108, "steps": "23.69s,108/33195" }, { "epoch": 0.016418135261334538, "eta": "217:48:58", "grad_norm": 13.3125, "loss": 0.1932, "lr": "2.700e-05", "step": 109, "steps": "23.7s,109/33195" }, { "epoch": 0.01656876035547522, "eta": "216:14:50", "grad_norm": 12.25, "loss": 0.137, "lr": "2.725e-05", "step": 110, "steps": "23.53s,110/33195" }, { "epoch": 0.016719385449615907, "eta": "220:22:34", "grad_norm": 7.8438, "loss": 0.2879, "lr": "2.750e-05", "step": 111, "steps": "23.98s,111/33195" }, { "epoch": 0.01687001054375659, "eta": "220:00:07", "grad_norm": 0.6172, "loss": 0.0722, "lr": "2.775e-05", "step": 112, "steps": "23.94s,112/33195" }, { "epoch": 0.017020635637897272, "eta": "220:10:44", "grad_norm": 5.0938, "loss": 0.0971, "lr": "2.800e-05", "step": 113, "steps": "23.96s,113/33195" }, { "epoch": 0.01717126073203796, "eta": "218:20:04", "grad_norm": 0.3262, "loss": 0.121, "lr": "2.825e-05", "step": 114, "steps": "23.76s,114/33195" }, { "epoch": 0.01732188582617864, "eta": "215:28:46", "grad_norm": 1.1797, "loss": 0.1339, "lr": "2.850e-05", "step": 115, "steps": "23.45s,115/33195" }, { "epoch": 0.017472510920319324, "eta": "221:32:14", "grad_norm": 1.5625, "loss": 0.2736, "lr": "2.875e-05", "step": 116, "steps": "24.11s,116/33195" }, { "epoch": 0.01762313601446001, "eta": "215:50:02", "grad_norm": 2.3906, "loss": 0.1743, "lr": "2.900e-05", "step": 117, "steps": "23.49s,117/33195" }, { "epoch": 0.017773761108600693, "eta": "219:52:12", "grad_norm": 1.2422, "loss": 0.2062, "lr": "2.925e-05", "step": 118, "steps": "23.93s,118/33195" }, { "epoch": 0.017924386202741376, "eta": "220:13:51", "grad_norm": 4.0625, "loss": 0.1232, "lr": "2.950e-05", "step": 119, "steps": "23.97s,119/33195" }, { "epoch": 0.01807501129688206, "eta": "220:13:27", "grad_norm": 2.3906, "loss": 0.1457, "lr": "2.975e-05", "step": 120, "steps": "23.97s,120/33195" }, { "epoch": 0.018225636391022745, "eta": "219:17:56", "grad_norm": 2.9688, "loss": 0.1753, "lr": "3.000e-05", "step": 121, "steps": "23.87s,121/33195" }, { "epoch": 0.018376261485163428, "eta": "218:00:22", "grad_norm": 5.875, "loss": 0.1332, "lr": "3.025e-05", "step": 122, "steps": "23.73s,122/33195" }, { "epoch": 0.01852688657930411, "eta": "217:43:26", "grad_norm": 2.2656, "loss": 0.146, "lr": "3.050e-05", "step": 123, "steps": "23.7s,123/33195" }, { "epoch": 0.018677511673444797, "eta": "214:46:39", "grad_norm": 5.0938, "loss": 0.141, "lr": "3.075e-05", "step": 124, "steps": "23.38s,124/33195" }, { "epoch": 0.01882813676758548, "eta": "216:53:02", "grad_norm": 2.4375, "loss": 0.135, "lr": "3.100e-05", "step": 125, "steps": "23.61s,125/33195" }, { "epoch": 0.018978761861726163, "eta": "220:49:38", "grad_norm": 0.9102, "loss": 0.1886, "lr": "3.125e-05", "step": 126, "steps": "24.04s,126/33195" }, { "epoch": 0.019129386955866846, "eta": "219:54:07", "grad_norm": 0.7227, "loss": 0.1971, "lr": "3.150e-05", "step": 127, "steps": "23.94s,127/33195" }, { "epoch": 0.019280012050007532, "eta": "215:29:11", "grad_norm": 1.6328, "loss": 0.0719, "lr": "3.175e-05", "step": 128, "steps": "23.46s,128/33195" }, { "epoch": 0.019430637144148215, "eta": "219:36:48", "grad_norm": 1.7109, "loss": 0.1276, "lr": "3.200e-05", "step": 129, "steps": "23.91s,129/33195" }, { "epoch": 0.019581262238288898, "eta": "218:57:49", "grad_norm": 1.8828, "loss": 0.1225, "lr": "3.225e-05", "step": 130, "steps": "23.84s,130/33195" }, { "epoch": 0.019731887332429584, "eta": "219:47:01", "grad_norm": 0.707, "loss": 0.1896, "lr": "3.250e-05", "step": 131, "steps": "23.93s,131/33195" }, { "epoch": 0.019882512426570267, "eta": "214:54:34", "grad_norm": 0.8672, "loss": 0.1545, "lr": "3.275e-05", "step": 132, "steps": "23.4s,132/33195" }, { "epoch": 0.02003313752071095, "eta": "215:16:13", "grad_norm": 0.3848, "loss": 0.1479, "lr": "3.300e-05", "step": 133, "steps": "23.44s,133/33195" }, { "epoch": 0.020183762614851636, "eta": "219:29:17", "grad_norm": 0.3457, "loss": 0.0552, "lr": "3.325e-05", "step": 134, "steps": "23.9s,134/33195" }, { "epoch": 0.02033438770899232, "eta": "218:50:19", "grad_norm": 1.6641, "loss": 0.0707, "lr": "3.350e-05", "step": 135, "steps": "23.83s,135/33195" }, { "epoch": 0.020485012803133, "eta": "218:22:23", "grad_norm": 1.6719, "loss": 0.0858, "lr": "3.375e-05", "step": 136, "steps": "23.78s,136/33195" }, { "epoch": 0.020635637897273684, "eta": "216:15:15", "grad_norm": 1.9688, "loss": 0.0723, "lr": "3.400e-05", "step": 137, "steps": "23.55s,137/33195" }, { "epoch": 0.02078626299141437, "eta": "218:10:34", "grad_norm": 2.4062, "loss": 0.0896, "lr": "3.425e-05", "step": 138, "steps": "23.76s,138/33195" }, { "epoch": 0.020936888085555053, "eta": "217:59:09", "grad_norm": 9.6875, "loss": 0.083, "lr": "3.450e-05", "step": 139, "steps": "23.74s,139/33195" }, { "epoch": 0.021087513179695736, "eta": "216:08:34", "grad_norm": 1.6484, "loss": 0.0916, "lr": "3.475e-05", "step": 140, "steps": "23.54s,140/33195" }, { "epoch": 0.021238138273836422, "eta": "218:14:53", "grad_norm": 0.3828, "loss": 0.1348, "lr": "3.500e-05", "step": 141, "steps": "23.77s,141/33195" }, { "epoch": 0.021388763367977105, "eta": "215:56:46", "grad_norm": 1.4922, "loss": 0.1149, "lr": "3.525e-05", "step": 142, "steps": "23.52s,142/33195" }, { "epoch": 0.021539388462117788, "eta": "219:03:40", "grad_norm": 0.793, "loss": 0.1119, "lr": "3.550e-05", "step": 143, "steps": "23.86s,143/33195" }, { "epoch": 0.021690013556258474, "eta": "217:02:05", "grad_norm": 0.3418, "loss": 0.0924, "lr": "3.575e-05", "step": 144, "steps": "23.64s,144/33195" }, { "epoch": 0.021840638650399157, "eta": "219:13:54", "grad_norm": 1.2891, "loss": 0.0876, "lr": "3.600e-05", "step": 145, "steps": "23.88s,145/33195" }, { "epoch": 0.02199126374453984, "eta": "217:39:51", "grad_norm": 2.5469, "loss": 0.1287, "lr": "3.625e-05", "step": 146, "steps": "23.71s,146/33195" }, { "epoch": 0.022141888838680523, "eta": "221:08:46", "grad_norm": 1.6875, "loss": 0.0849, "lr": "3.650e-05", "step": 147, "steps": "24.09s,147/33195" }, { "epoch": 0.02229251393282121, "eta": "219:34:44", "grad_norm": 5.375, "loss": 0.1244, "lr": "3.675e-05", "step": 148, "steps": "23.92s,148/33195" }, { "epoch": 0.022443139026961892, "eta": "216:27:04", "grad_norm": 5.375, "loss": 0.099, "lr": "3.700e-05", "step": 149, "steps": "23.58s,149/33195" }, { "epoch": 0.022593764121102575, "eta": "215:42:37", "grad_norm": 0.6953, "loss": 0.092, "lr": "3.725e-05", "step": 150, "steps": "23.5s,150/33195" }, { "epoch": 0.02274438921524326, "eta": "219:06:00", "grad_norm": 3.9844, "loss": 0.1366, "lr": "3.750e-05", "step": 151, "steps": "23.87s,151/33195" }, { "epoch": 0.022895014309383944, "eta": "218:43:34", "grad_norm": 5.0, "loss": 0.1325, "lr": "3.775e-05", "step": 152, "steps": "23.83s,152/33195" }, { "epoch": 0.023045639403524627, "eta": "216:19:59", "grad_norm": 3.7812, "loss": 0.1038, "lr": "3.800e-05", "step": 153, "steps": "23.57s,153/33195" }, { "epoch": 0.02319626449766531, "eta": "216:14:05", "grad_norm": 4.5, "loss": 0.1118, "lr": "3.825e-05", "step": 154, "steps": "23.56s,154/33195" }, { "epoch": 0.023346889591805996, "eta": "220:49:02", "grad_norm": 3.4219, "loss": 0.1764, "lr": "3.850e-05", "step": 155, "steps": "24.06s,155/33195" }, { "epoch": 0.02349751468594668, "eta": "219:20:32", "grad_norm": 0.8203, "loss": 0.151, "lr": "3.875e-05", "step": 156, "steps": "23.9s,156/33195" }, { "epoch": 0.02364813978008736, "eta": "221:37:47", "grad_norm": 4.7188, "loss": 0.0773, "lr": "3.900e-05", "step": 157, "steps": "24.15s,157/33195" }, { "epoch": 0.023798764874228048, "eta": "216:45:33", "grad_norm": 3.6094, "loss": 0.0904, "lr": "3.925e-05", "step": 158, "steps": "23.62s,158/33195" }, { "epoch": 0.02394938996836873, "eta": "219:13:50", "grad_norm": 0.3418, "loss": 0.1457, "lr": "3.950e-05", "step": 159, "steps": "23.89s,159/33195" }, { "epoch": 0.024100015062509413, "eta": "217:34:19", "grad_norm": 2.4688, "loss": 0.0934, "lr": "3.975e-05", "step": 160, "steps": "23.71s,160/33195" }, { "epoch": 0.0242506401566501, "eta": "217:00:54", "grad_norm": 1.3516, "loss": 0.0605, "lr": "4.000e-05", "step": 161, "steps": "23.65s,161/33195" }, { "epoch": 0.024401265250790782, "eta": "220:24:12", "grad_norm": 1.0312, "loss": 0.1243, "lr": "4.025e-05", "step": 162, "steps": "24.02s,162/33195" }, { "epoch": 0.024551890344931465, "eta": "216:16:04", "grad_norm": 0.4219, "loss": 0.1104, "lr": "4.050e-05", "step": 163, "steps": "23.57s,163/33195" }, { "epoch": 0.024702515439072148, "eta": "219:22:51", "grad_norm": 0.8086, "loss": 0.0738, "lr": "4.075e-05", "step": 164, "steps": "23.91s,164/33195" }, { "epoch": 0.024853140533212834, "eta": "219:55:29", "grad_norm": 0.2393, "loss": 0.1045, "lr": "4.100e-05", "step": 165, "steps": "23.97s,165/33195" }, { "epoch": 0.025003765627353517, "eta": "215:52:52", "grad_norm": 1.0859, "loss": 0.0496, "lr": "4.125e-05", "step": 166, "steps": "23.53s,166/33195" }, { "epoch": 0.0251543907214942, "eta": "215:02:56", "grad_norm": 0.4668, "loss": 0.1038, "lr": "4.150e-05", "step": 167, "steps": "23.44s,167/33195" }, { "epoch": 0.025305015815634886, "eta": "219:15:45", "grad_norm": 1.0234, "loss": 0.103, "lr": "4.175e-05", "step": 168, "steps": "23.9s,168/33195" }, { "epoch": 0.02545564090977557, "eta": "217:52:47", "grad_norm": 0.4863, "loss": 0.1143, "lr": "4.200e-05", "step": 169, "steps": "23.75s,169/33195" }, { "epoch": 0.025606266003916252, "eta": "218:08:54", "grad_norm": 0.3281, "loss": 0.0965, "lr": "4.225e-05", "step": 170, "steps": "23.78s,170/33195" }, { "epoch": 0.025756891098056935, "eta": "216:12:55", "grad_norm": 1.0234, "loss": 0.0792, "lr": "4.250e-05", "step": 171, "steps": "23.57s,171/33195" }, { "epoch": 0.02590751619219762, "eta": "216:07:01", "grad_norm": 0.25, "loss": 0.0619, "lr": "4.275e-05", "step": 172, "steps": "23.56s,172/33195" }, { "epoch": 0.026058141286338304, "eta": "215:11:36", "grad_norm": 0.3672, "loss": 0.0607, "lr": "4.300e-05", "step": 173, "steps": "23.46s,173/33195" }, { "epoch": 0.026208766380478987, "eta": "215:16:42", "grad_norm": 0.2852, "loss": 0.1228, "lr": "4.325e-05", "step": 174, "steps": "23.47s,174/33195" }, { "epoch": 0.026359391474619673, "eta": "216:38:52", "grad_norm": 0.5859, "loss": 0.136, "lr": "4.350e-05", "step": 175, "steps": "23.62s,175/33195" }, { "epoch": 0.026510016568760356, "eta": "219:18:04", "grad_norm": 0.2412, "loss": 0.0915, "lr": "4.375e-05", "step": 176, "steps": "23.91s,176/33195" }, { "epoch": 0.02666064166290104, "eta": "215:15:32", "grad_norm": 0.707, "loss": 0.0895, "lr": "4.400e-05", "step": 177, "steps": "23.47s,177/33195" }, { "epoch": 0.026811266757041725, "eta": "218:22:14", "grad_norm": 0.1011, "loss": 0.117, "lr": "4.425e-05", "step": 178, "steps": "23.81s,178/33195" }, { "epoch": 0.026961891851182408, "eta": "219:00:22", "grad_norm": 0.0991, "loss": 0.0944, "lr": "4.450e-05", "step": 179, "steps": "23.88s,179/33195" }, { "epoch": 0.02711251694532309, "eta": "219:16:28", "grad_norm": 0.6992, "loss": 0.0635, "lr": "4.475e-05", "step": 180, "steps": "23.91s,180/33195" }, { "epoch": 0.027263142039463773, "eta": "217:09:31", "grad_norm": 0.2598, "loss": 0.08, "lr": "4.500e-05", "step": 181, "steps": "23.68s,181/33195" }, { "epoch": 0.02741376713360446, "eta": "217:25:38", "grad_norm": 0.582, "loss": 0.0714, "lr": "4.525e-05", "step": 182, "steps": "23.71s,182/33195" }, { "epoch": 0.027564392227745142, "eta": "219:09:46", "grad_norm": 0.3184, "loss": 0.1044, "lr": "4.550e-05", "step": 183, "steps": "23.9s,183/33195" }, { "epoch": 0.027715017321885825, "eta": "215:56:49", "grad_norm": 0.1494, "loss": 0.0659, "lr": "4.575e-05", "step": 184, "steps": "23.55s,184/33195" }, { "epoch": 0.02786564241602651, "eta": "214:22:53", "grad_norm": 0.2773, "loss": 0.1016, "lr": "4.600e-05", "step": 185, "steps": "23.38s,185/33195" }, { "epoch": 0.028016267510167194, "eta": "218:30:04", "grad_norm": 0.5664, "loss": 0.0753, "lr": "4.625e-05", "step": 186, "steps": "23.83s,186/33195" }, { "epoch": 0.028166892604307877, "eta": "219:02:41", "grad_norm": 0.1016, "loss": 0.0964, "lr": "4.650e-05", "step": 187, "steps": "23.89s,187/33195" }, { "epoch": 0.028317517698448563, "eta": "219:18:47", "grad_norm": 0.8867, "loss": 0.0784, "lr": "4.675e-05", "step": 188, "steps": "23.92s,188/33195" }, { "epoch": 0.028468142792589246, "eta": "219:12:53", "grad_norm": 0.498, "loss": 0.0714, "lr": "4.700e-05", "step": 189, "steps": "23.91s,189/33195" }, { "epoch": 0.02861876788672993, "eta": "215:59:57", "grad_norm": 0.957, "loss": 0.0926, "lr": "4.725e-05", "step": 190, "steps": "23.56s,190/33195" }, { "epoch": 0.02876939298087061, "eta": "216:32:34", "grad_norm": 0.3574, "loss": 0.1017, "lr": "4.750e-05", "step": 191, "steps": "23.62s,191/33195" }, { "epoch": 0.028920018075011298, "eta": "215:42:40", "grad_norm": 0.1719, "loss": 0.1065, "lr": "4.775e-05", "step": 192, "steps": "23.53s,192/33195" }, { "epoch": 0.02907064316915198, "eta": "215:58:47", "grad_norm": 0.3945, "loss": 0.0948, "lr": "4.800e-05", "step": 193, "steps": "23.56s,193/33195" }, { "epoch": 0.029221268263292664, "eta": "216:42:23", "grad_norm": 0.4004, "loss": 0.0844, "lr": "4.825e-05", "step": 194, "steps": "23.64s,194/33195" }, { "epoch": 0.02937189335743335, "eta": "217:59:00", "grad_norm": 0.377, "loss": 0.0611, "lr": "4.850e-05", "step": 195, "steps": "23.78s,195/33195" }, { "epoch": 0.029522518451574033, "eta": "215:30:06", "grad_norm": 0.1797, "loss": 0.0863, "lr": "4.875e-05", "step": 196, "steps": "23.51s,196/33195" }, { "epoch": 0.029673143545714716, "eta": "216:46:42", "grad_norm": 0.2949, "loss": 0.0711, "lr": "4.900e-05", "step": 197, "steps": "23.65s,197/33195" }, { "epoch": 0.0298237686398554, "eta": "219:47:48", "grad_norm": 0.1846, "loss": 0.0775, "lr": "4.925e-05", "step": 198, "steps": "23.98s,198/33195" }, { "epoch": 0.029974393733996085, "eta": "217:18:55", "grad_norm": 0.2314, "loss": 0.0606, "lr": "4.950e-05", "step": 199, "steps": "23.71s,199/33195" }, { "epoch": 0.030125018828136767, "eta": "216:45:31", "grad_norm": 0.1416, "loss": 0.081, "lr": "4.975e-05", "step": 200, "steps": "23.65s,200/33195" }, { "epoch": 0.03027564392227745, "eta": "421:57:23", "grad_norm": 0.5195, "loss": 0.0822, "lr": "5.000e-05", "step": 201, "steps": "46.04s,201/33195" }, { "epoch": 0.030426269016418137, "eta": "220:30:11", "grad_norm": 0.1924, "loss": 0.0746, "lr": "5.000e-05", "step": 202, "steps": "24.06s,202/33195" }, { "epoch": 0.03057689411055882, "eta": "219:12:48", "grad_norm": 0.3613, "loss": 0.0482, "lr": "5.000e-05", "step": 203, "steps": "23.92s,203/33195" }, { "epoch": 0.030727519204699502, "eta": "219:17:54", "grad_norm": 0.3691, "loss": 0.0558, "lr": "5.000e-05", "step": 204, "steps": "23.93s,204/33195" }, { "epoch": 0.03087814429884019, "eta": "215:10:05", "grad_norm": 0.2471, "loss": 0.0702, "lr": "5.000e-05", "step": 205, "steps": "23.48s,205/33195" }, { "epoch": 0.03102876939298087, "eta": "220:06:35", "grad_norm": 0.2012, "loss": 0.0658, "lr": "5.000e-05", "step": 206, "steps": "24.02s,206/33195" }, { "epoch": 0.031179394487121554, "eta": "218:49:13", "grad_norm": 0.2969, "loss": 0.0801, "lr": "5.000e-05", "step": 207, "steps": "23.88s,207/33195" }, { "epoch": 0.03133001958126224, "eta": "215:52:53", "grad_norm": 0.166, "loss": 0.07, "lr": "5.000e-05", "step": 208, "steps": "23.56s,208/33195" }, { "epoch": 0.03148064467540292, "eta": "217:03:58", "grad_norm": 0.3164, "loss": 0.082, "lr": "5.000e-05", "step": 209, "steps": "23.69s,209/33195" }, { "epoch": 0.031631269769543606, "eta": "214:18:39", "grad_norm": 0.1973, "loss": 0.0726, "lr": "5.000e-05", "step": 210, "steps": "23.39s,210/33195" }, { "epoch": 0.03178189486368429, "eta": "214:23:45", "grad_norm": 0.2598, "loss": 0.0689, "lr": "5.000e-05", "step": 211, "steps": "23.4s,211/33195" }, { "epoch": 0.03193251995782497, "eta": "219:20:13", "grad_norm": 0.1084, "loss": 0.0947, "lr": "5.000e-05", "step": 212, "steps": "23.94s,212/33195" }, { "epoch": 0.032083145051965654, "eta": "215:56:25", "grad_norm": 0.126, "loss": 0.0776, "lr": "5.000e-05", "step": 213, "steps": "23.57s,213/33195" }, { "epoch": 0.032233770146106344, "eta": "215:56:02", "grad_norm": 0.1079, "loss": 0.0864, "lr": "5.000e-05", "step": 214, "steps": "23.57s,214/33195" }, { "epoch": 0.03238439524024703, "eta": "217:45:34", "grad_norm": 0.0713, "loss": 0.0486, "lr": "5.000e-05", "step": 215, "steps": "23.77s,215/33195" }, { "epoch": 0.03253502033438771, "eta": "219:35:06", "grad_norm": 0.0884, "loss": 0.0561, "lr": "5.000e-05", "step": 216, "steps": "23.97s,216/33195" }, { "epoch": 0.03268564542852839, "eta": "219:56:41", "grad_norm": 0.126, "loss": 0.0534, "lr": "5.000e-05", "step": 217, "steps": "24.01s,217/33195" }, { "epoch": 0.032836270522669075, "eta": "218:11:52", "grad_norm": 0.2695, "loss": 0.0524, "lr": "5.000e-05", "step": 218, "steps": "23.82s,218/33195" }, { "epoch": 0.03298689561680976, "eta": "218:38:57", "grad_norm": 0.0625, "loss": 0.0737, "lr": "5.000e-05", "step": 219, "steps": "23.87s,219/33195" }, { "epoch": 0.03313752071095044, "eta": "218:22:04", "grad_norm": 0.1816, "loss": 0.0711, "lr": "5.000e-05", "step": 220, "steps": "23.84s,220/33195" }, { "epoch": 0.03328814580509113, "eta": "217:15:43", "grad_norm": 0.4512, "loss": 0.0815, "lr": "5.000e-05", "step": 221, "steps": "23.72s,221/33195" }, { "epoch": 0.033438770899231814, "eta": "218:26:46", "grad_norm": 0.1719, "loss": 0.0615, "lr": "5.000e-05", "step": 222, "steps": "23.85s,222/33195" }, { "epoch": 0.033589395993372496, "eta": "220:32:45", "grad_norm": 0.0815, "loss": 0.0762, "lr": "5.000e-05", "step": 223, "steps": "24.08s,223/33195" }, { "epoch": 0.03374002108751318, "eta": "215:19:08", "grad_norm": 0.0903, "loss": 0.071, "lr": "5.000e-05", "step": 224, "steps": "23.51s,224/33195" }, { "epoch": 0.03389064618165386, "eta": "217:14:08", "grad_norm": 0.0535, "loss": 0.0741, "lr": "5.000e-05", "step": 225, "steps": "23.72s,225/33195" }, { "epoch": 0.034041271275794545, "eta": "215:51:19", "grad_norm": 0.1367, "loss": 0.0829, "lr": "5.000e-05", "step": 226, "steps": "23.57s,226/33195" }, { "epoch": 0.034191896369935235, "eta": "215:28:57", "grad_norm": 0.125, "loss": 0.0504, "lr": "5.000e-05", "step": 227, "steps": "23.53s,227/33195" }, { "epoch": 0.03434252146407592, "eta": "215:28:33", "grad_norm": 0.0854, "loss": 0.0962, "lr": "5.000e-05", "step": 228, "steps": "23.53s,228/33195" }, { "epoch": 0.0344931465582166, "eta": "215:55:38", "grad_norm": 0.0713, "loss": 0.0782, "lr": "5.000e-05", "step": 229, "steps": "23.58s,229/33195" }, { "epoch": 0.03464377165235728, "eta": "218:56:33", "grad_norm": 0.1118, "loss": 0.0381, "lr": "5.000e-05", "step": 230, "steps": "23.91s,230/33195" }, { "epoch": 0.034794396746497966, "eta": "217:33:44", "grad_norm": 0.2734, "loss": 0.076, "lr": "5.000e-05", "step": 231, "steps": "23.76s,231/33195" }, { "epoch": 0.03494502184063865, "eta": "218:11:48", "grad_norm": 0.123, "loss": 0.0832, "lr": "5.000e-05", "step": 232, "steps": "23.83s,232/33195" }, { "epoch": 0.03509564693477933, "eta": "218:33:22", "grad_norm": 0.0364, "loss": 0.0663, "lr": "5.000e-05", "step": 233, "steps": "23.87s,233/33195" }, { "epoch": 0.03524627202892002, "eta": "218:27:29", "grad_norm": 0.1079, "loss": 0.0666, "lr": "5.000e-05", "step": 234, "steps": "23.86s,234/33195" }, { "epoch": 0.035396897123060704, "eta": "215:14:49", "grad_norm": 0.1069, "loss": 0.0579, "lr": "5.000e-05", "step": 235, "steps": "23.51s,235/33195" }, { "epoch": 0.03554752221720139, "eta": "214:41:28", "grad_norm": 0.0693, "loss": 0.0748, "lr": "5.000e-05", "step": 236, "steps": "23.45s,236/33195" }, { "epoch": 0.03569814731134207, "eta": "217:31:22", "grad_norm": 0.0505, "loss": 0.0518, "lr": "5.000e-05", "step": 237, "steps": "23.76s,237/33195" }, { "epoch": 0.03584877240548275, "eta": "215:41:06", "grad_norm": 0.0486, "loss": 0.0755, "lr": "5.000e-05", "step": 238, "steps": "23.56s,238/33195" }, { "epoch": 0.035999397499623435, "eta": "216:08:11", "grad_norm": 0.0588, "loss": 0.0745, "lr": "5.000e-05", "step": 239, "steps": "23.61s,239/33195" }, { "epoch": 0.03615002259376412, "eta": "219:20:01", "grad_norm": 0.0654, "loss": 0.0982, "lr": "5.000e-05", "step": 240, "steps": "23.96s,240/33195" }, { "epoch": 0.03630064768790481, "eta": "215:28:57", "grad_norm": 0.106, "loss": 0.0772, "lr": "5.000e-05", "step": 241, "steps": "23.54s,241/33195" }, { "epoch": 0.03645127278204549, "eta": "215:28:33", "grad_norm": 0.0469, "loss": 0.0558, "lr": "5.000e-05", "step": 242, "steps": "23.54s,242/33195" }, { "epoch": 0.036601897876186174, "eta": "218:40:23", "grad_norm": 0.0781, "loss": 0.0634, "lr": "5.000e-05", "step": 243, "steps": "23.89s,243/33195" }, { "epoch": 0.036752522970326856, "eta": "215:00:18", "grad_norm": 0.1631, "loss": 0.0471, "lr": "5.000e-05", "step": 244, "steps": "23.49s,244/33195" }, { "epoch": 0.03690314806446754, "eta": "218:01:09", "grad_norm": 0.1011, "loss": 0.0802, "lr": "5.000e-05", "step": 245, "steps": "23.82s,245/33195" }, { "epoch": 0.03705377315860822, "eta": "218:00:45", "grad_norm": 0.0476, "loss": 0.0749, "lr": "5.000e-05", "step": 246, "steps": "23.82s,246/33195" }, { "epoch": 0.037204398252748905, "eta": "216:10:31", "grad_norm": 0.0491, "loss": 0.0828, "lr": "5.000e-05", "step": 247, "steps": "23.62s,247/33195" }, { "epoch": 0.037355023346889595, "eta": "218:10:56", "grad_norm": 0.0491, "loss": 0.0777, "lr": "5.000e-05", "step": 248, "steps": "23.84s,248/33195" }, { "epoch": 0.03750564844103028, "eta": "217:43:05", "grad_norm": 0.025, "loss": 0.0635, "lr": "5.000e-05", "step": 249, "steps": "23.79s,249/33195" }, { "epoch": 0.03765627353517096, "eta": "219:10:32", "grad_norm": 0.0552, "loss": 0.0418, "lr": "5.000e-05", "step": 250, "steps": "23.95s,250/33195" }, { "epoch": 0.03780689862931164, "eta": "219:04:39", "grad_norm": 0.0752, "loss": 0.086, "lr": "5.000e-05", "step": 251, "steps": "23.94s,251/33195" }, { "epoch": 0.037957523723452326, "eta": "219:59:09", "grad_norm": 0.0347, "loss": 0.0743, "lr": "5.000e-05", "step": 252, "steps": "24.04s,252/33195" }, { "epoch": 0.03810814881759301, "eta": "215:07:46", "grad_norm": 0.0811, "loss": 0.045, "lr": "5.000e-05", "step": 253, "steps": "23.51s,253/33195" }, { "epoch": 0.03825877391173369, "eta": "216:46:12", "grad_norm": 0.0654, "loss": 0.0591, "lr": "5.000e-05", "step": 254, "steps": "23.69s,254/33195" }, { "epoch": 0.03840939900587438, "eta": "217:18:45", "grad_norm": 0.0435, "loss": 0.0674, "lr": "5.000e-05", "step": 255, "steps": "23.75s,255/33195" }, { "epoch": 0.038560024100015064, "eta": "215:01:06", "grad_norm": 0.0396, "loss": 0.0594, "lr": "5.000e-05", "step": 256, "steps": "23.5s,256/33195" }, { "epoch": 0.03871064919415575, "eta": "214:11:18", "grad_norm": 0.0535, "loss": 0.0559, "lr": "5.000e-05", "step": 257, "steps": "23.41s,257/33195" }, { "epoch": 0.03886127428829643, "eta": "214:21:53", "grad_norm": 0.0264, "loss": 0.0702, "lr": "5.000e-05", "step": 258, "steps": "23.43s,258/33195" }, { "epoch": 0.03901189938243711, "eta": "216:38:44", "grad_norm": 0.0349, "loss": 0.0562, "lr": "5.000e-05", "step": 259, "steps": "23.68s,259/33195" }, { "epoch": 0.039162524476577795, "eta": "214:48:33", "grad_norm": 0.0486, "loss": 0.0624, "lr": "5.000e-05", "step": 260, "steps": "23.48s,260/33195" }, { "epoch": 0.039313149570718485, "eta": "216:37:57", "grad_norm": 0.0518, "loss": 0.0714, "lr": "5.000e-05", "step": 261, "steps": "23.68s,261/33195" }, { "epoch": 0.03946377466485917, "eta": "214:31:18", "grad_norm": 0.0244, "loss": 0.0512, "lr": "5.000e-05", "step": 262, "steps": "23.45s,262/33195" }, { "epoch": 0.03961439975899985, "eta": "218:21:26", "grad_norm": 0.0305, "loss": 0.0399, "lr": "5.000e-05", "step": 263, "steps": "23.87s,263/33195" }, { "epoch": 0.03976502485314053, "eta": "214:30:31", "grad_norm": 0.031, "loss": 0.0889, "lr": "5.000e-05", "step": 264, "steps": "23.45s,264/33195" }, { "epoch": 0.039915649947281216, "eta": "214:24:39", "grad_norm": 0.0486, "loss": 0.0575, "lr": "5.000e-05", "step": 265, "steps": "23.44s,265/33195" }, { "epoch": 0.0400662750414219, "eta": "217:47:19", "grad_norm": 0.0425, "loss": 0.0705, "lr": "5.000e-05", "step": 266, "steps": "23.81s,266/33195" }, { "epoch": 0.04021690013556258, "eta": "218:08:52", "grad_norm": 0.0393, "loss": 0.0683, "lr": "5.000e-05", "step": 267, "steps": "23.85s,267/33195" }, { "epoch": 0.04036752522970327, "eta": "214:12:30", "grad_norm": 0.0342, "loss": 0.0634, "lr": "5.000e-05", "step": 268, "steps": "23.42s,268/33195" }, { "epoch": 0.040518150323843954, "eta": "216:51:15", "grad_norm": 0.0645, "loss": 0.0536, "lr": "5.000e-05", "step": 269, "steps": "23.71s,269/33195" }, { "epoch": 0.04066877541798464, "eta": "218:07:41", "grad_norm": 0.041, "loss": 0.0571, "lr": "5.000e-05", "step": 270, "steps": "23.85s,270/33195" }, { "epoch": 0.04081940051212532, "eta": "214:44:15", "grad_norm": 0.0757, "loss": 0.0502, "lr": "5.000e-05", "step": 271, "steps": "23.48s,271/33195" }, { "epoch": 0.040970025606266, "eta": "214:38:22", "grad_norm": 0.1289, "loss": 0.0499, "lr": "5.000e-05", "step": 272, "steps": "23.47s,272/33195" }, { "epoch": 0.041120650700406686, "eta": "218:11:58", "grad_norm": 0.0806, "loss": 0.0855, "lr": "5.000e-05", "step": 273, "steps": "23.86s,273/33195" }, { "epoch": 0.04127127579454737, "eta": "215:05:01", "grad_norm": 0.0481, "loss": 0.0761, "lr": "5.000e-05", "step": 274, "steps": "23.52s,274/33195" }, { "epoch": 0.04142190088868806, "eta": "216:43:24", "grad_norm": 0.0972, "loss": 0.0646, "lr": "5.000e-05", "step": 275, "steps": "23.7s,275/33195" }, { "epoch": 0.04157252598282874, "eta": "218:16:16", "grad_norm": 0.0388, "loss": 0.0952, "lr": "5.000e-05", "step": 276, "steps": "23.87s,276/33195" }, { "epoch": 0.041723151076969424, "eta": "219:10:44", "grad_norm": 0.0315, "loss": 0.0705, "lr": "5.000e-05", "step": 277, "steps": "23.97s,277/33195" }, { "epoch": 0.04187377617111011, "eta": "216:42:12", "grad_norm": 0.0361, "loss": 0.0734, "lr": "5.000e-05", "step": 278, "steps": "23.7s,278/33195" }, { "epoch": 0.04202440126525079, "eta": "215:57:55", "grad_norm": 0.0369, "loss": 0.0528, "lr": "5.000e-05", "step": 279, "steps": "23.62s,279/33195" }, { "epoch": 0.04217502635939147, "eta": "215:19:08", "grad_norm": 0.0292, "loss": 0.0548, "lr": "5.000e-05", "step": 280, "steps": "23.55s,280/33195" }, { "epoch": 0.042325651453532155, "eta": "213:56:27", "grad_norm": 0.0308, "loss": 0.0833, "lr": "5.000e-05", "step": 281, "steps": "23.4s,281/33195" }, { "epoch": 0.042476276547672845, "eta": "219:14:13", "grad_norm": 0.0381, "loss": 0.0753, "lr": "5.000e-05", "step": 282, "steps": "23.98s,282/33195" }, { "epoch": 0.04262690164181353, "eta": "218:40:55", "grad_norm": 0.0311, "loss": 0.0833, "lr": "5.000e-05", "step": 283, "steps": "23.92s,283/33195" }, { "epoch": 0.04277752673595421, "eta": "216:23:23", "grad_norm": 0.0486, "loss": 0.0524, "lr": "5.000e-05", "step": 284, "steps": "23.67s,284/33195" }, { "epoch": 0.04292815183009489, "eta": "218:01:43", "grad_norm": 0.0315, "loss": 0.0663, "lr": "5.000e-05", "step": 285, "steps": "23.85s,285/33195" }, { "epoch": 0.043078776924235576, "eta": "218:23:16", "grad_norm": 0.0532, "loss": 0.0567, "lr": "5.000e-05", "step": 286, "steps": "23.89s,286/33195" }, { "epoch": 0.04322940201837626, "eta": "215:32:50", "grad_norm": 0.0403, "loss": 0.0794, "lr": "5.000e-05", "step": 287, "steps": "23.58s,287/33195" }, { "epoch": 0.04338002711251695, "eta": "217:00:12", "grad_norm": 0.0376, "loss": 0.0904, "lr": "5.000e-05", "step": 288, "steps": "23.74s,288/33195" }, { "epoch": 0.04353065220665763, "eta": "218:00:08", "grad_norm": 0.0493, "loss": 0.07, "lr": "5.000e-05", "step": 289, "steps": "23.85s,289/33195" }, { "epoch": 0.043681277300798314, "eta": "218:16:11", "grad_norm": 0.0344, "loss": 0.0645, "lr": "5.000e-05", "step": 290, "steps": "23.88s,290/33195" }, { "epoch": 0.043831902394939, "eta": "217:04:30", "grad_norm": 0.0471, "loss": 0.0678, "lr": "5.000e-05", "step": 291, "steps": "23.75s,291/33195" }, { "epoch": 0.04398252748907968, "eta": "214:52:29", "grad_norm": 0.0256, "loss": 0.0633, "lr": "5.000e-05", "step": 292, "steps": "23.51s,292/33195" }, { "epoch": 0.04413315258322036, "eta": "217:42:05", "grad_norm": 0.0486, "loss": 0.041, "lr": "5.000e-05", "step": 293, "steps": "23.82s,293/33195" }, { "epoch": 0.044283777677361046, "eta": "215:35:34", "grad_norm": 0.0286, "loss": 0.0569, "lr": "5.000e-05", "step": 294, "steps": "23.59s,294/33195" }, { "epoch": 0.044434402771501735, "eta": "214:51:19", "grad_norm": 0.126, "loss": 0.0538, "lr": "5.000e-05", "step": 295, "steps": "23.51s,295/33195" }, { "epoch": 0.04458502786564242, "eta": "214:12:32", "grad_norm": 0.0408, "loss": 0.0727, "lr": "5.000e-05", "step": 296, "steps": "23.44s,296/33195" }, { "epoch": 0.0447356529597831, "eta": "215:06:58", "grad_norm": 0.0181, "loss": 0.0413, "lr": "5.000e-05", "step": 297, "steps": "23.54s,297/33195" }, { "epoch": 0.044886278053923784, "eta": "214:55:37", "grad_norm": 0.02, "loss": 0.0645, "lr": "5.000e-05", "step": 298, "steps": "23.52s,298/33195" }, { "epoch": 0.04503690314806447, "eta": "216:50:22", "grad_norm": 0.0262, "loss": 0.0559, "lr": "5.000e-05", "step": 299, "steps": "23.73s,299/33195" }, { "epoch": 0.04518752824220515, "eta": "218:28:39", "grad_norm": 0.0408, "loss": 0.0607, "lr": "5.000e-05", "step": 300, "steps": "23.91s,300/33195" }, { "epoch": 0.04533815333634583, "eta": "215:05:24", "grad_norm": 0.0192, "loss": 0.069, "lr": "5.000e-05", "step": 301, "steps": "23.54s,301/33195" }, { "epoch": 0.04548877843048652, "eta": "215:21:28", "grad_norm": 0.0327, "loss": 0.0754, "lr": "5.000e-05", "step": 302, "steps": "23.57s,302/33195" }, { "epoch": 0.045639403524627205, "eta": "218:43:54", "grad_norm": 0.0306, "loss": 0.058, "lr": "5.000e-05", "step": 303, "steps": "23.94s,303/33195" }, { "epoch": 0.04579002861876789, "eta": "218:10:37", "grad_norm": 0.0405, "loss": 0.0494, "lr": "5.000e-05", "step": 304, "steps": "23.88s,304/33195" }, { "epoch": 0.04594065371290857, "eta": "214:41:55", "grad_norm": 0.0476, "loss": 0.0724, "lr": "5.000e-05", "step": 305, "steps": "23.5s,305/33195" }, { "epoch": 0.04609127880704925, "eta": "214:14:07", "grad_norm": 0.1768, "loss": 0.0393, "lr": "5.000e-05", "step": 306, "steps": "23.45s,306/33195" }, { "epoch": 0.046241903901189936, "eta": "214:24:41", "grad_norm": 0.0598, "loss": 0.068, "lr": "5.000e-05", "step": 307, "steps": "23.47s,307/33195" }, { "epoch": 0.04639252899533062, "eta": "217:47:06", "grad_norm": 0.0189, "loss": 0.0645, "lr": "5.000e-05", "step": 308, "steps": "23.84s,308/33195" }, { "epoch": 0.04654315408947131, "eta": "218:08:37", "grad_norm": 0.0165, "loss": 0.0641, "lr": "5.000e-05", "step": 309, "steps": "23.88s,309/33195" }, { "epoch": 0.04669377918361199, "eta": "219:19:28", "grad_norm": 0.0242, "loss": 0.0523, "lr": "5.000e-05", "step": 310, "steps": "24.01s,310/33195" }, { "epoch": 0.046844404277752674, "eta": "214:34:05", "grad_norm": 0.0283, "loss": 0.087, "lr": "5.000e-05", "step": 311, "steps": "23.49s,311/33195" }, { "epoch": 0.04699502937189336, "eta": "217:01:40", "grad_norm": 0.0262, "loss": 0.0419, "lr": "5.000e-05", "step": 312, "steps": "23.76s,312/33195" }, { "epoch": 0.04714565446603404, "eta": "214:33:18", "grad_norm": 0.0388, "loss": 0.0617, "lr": "5.000e-05", "step": 313, "steps": "23.49s,313/33195" }, { "epoch": 0.04729627956017472, "eta": "214:10:59", "grad_norm": 0.0183, "loss": 0.076, "lr": "5.000e-05", "step": 314, "steps": "23.45s,314/33195" }, { "epoch": 0.047446904654315405, "eta": "214:32:31", "grad_norm": 0.0145, "loss": 0.0493, "lr": "5.000e-05", "step": 315, "steps": "23.49s,315/33195" }, { "epoch": 0.047597529748456095, "eta": "215:32:24", "grad_norm": 0.0315, "loss": 0.0437, "lr": "5.000e-05", "step": 316, "steps": "23.6s,316/33195" }, { "epoch": 0.04774815484259678, "eta": "218:05:26", "grad_norm": 0.0226, "loss": 0.0669, "lr": "5.000e-05", "step": 317, "steps": "23.88s,317/33195" }, { "epoch": 0.04789877993673746, "eta": "220:05:35", "grad_norm": 0.0256, "loss": 0.0531, "lr": "5.000e-05", "step": 318, "steps": "24.1s,318/33195" }, { "epoch": 0.048049405030878144, "eta": "214:52:52", "grad_norm": 0.0286, "loss": 0.0852, "lr": "5.000e-05", "step": 319, "steps": "23.53s,319/33195" }, { "epoch": 0.048200030125018826, "eta": "216:53:01", "grad_norm": 0.0356, "loss": 0.078, "lr": "5.000e-05", "step": 320, "steps": "23.75s,320/33195" }, { "epoch": 0.04835065521915951, "eta": "218:14:48", "grad_norm": 0.0315, "loss": 0.0579, "lr": "5.000e-05", "step": 321, "steps": "23.9s,321/33195" }, { "epoch": 0.0485012803133002, "eta": "214:18:49", "grad_norm": 0.0474, "loss": 0.045, "lr": "5.000e-05", "step": 322, "steps": "23.47s,322/33195" }, { "epoch": 0.04865190540744088, "eta": "214:51:18", "grad_norm": 0.0237, "loss": 0.0638, "lr": "5.000e-05", "step": 323, "steps": "23.53s,323/33195" }, { "epoch": 0.048802530501581565, "eta": "219:19:21", "grad_norm": 0.0391, "loss": 0.0729, "lr": "5.000e-05", "step": 324, "steps": "24.02s,324/33195" }, { "epoch": 0.04895315559572225, "eta": "219:29:54", "grad_norm": 0.0593, "loss": 0.0746, "lr": "5.000e-05", "step": 325, "steps": "24.04s,325/33195" }, { "epoch": 0.04910378068986293, "eta": "215:50:23", "grad_norm": 0.032, "loss": 0.0558, "lr": "5.000e-05", "step": 326, "steps": "23.64s,326/33195" }, { "epoch": 0.04925440578400361, "eta": "218:34:19", "grad_norm": 0.0352, "loss": 0.0807, "lr": "5.000e-05", "step": 327, "steps": "23.94s,327/33195" }, { "epoch": 0.049405030878144296, "eta": "215:11:15", "grad_norm": 0.017, "loss": 0.054, "lr": "5.000e-05", "step": 328, "steps": "23.57s,328/33195" }, { "epoch": 0.049555655972284986, "eta": "218:11:37", "grad_norm": 0.0193, "loss": 0.0571, "lr": "5.000e-05", "step": 329, "steps": "23.9s,329/33195" }, { "epoch": 0.04970628106642567, "eta": "215:21:25", "grad_norm": 0.0182, "loss": 0.0644, "lr": "5.000e-05", "step": 330, "steps": "23.59s,330/33195" }, { "epoch": 0.04985690616056635, "eta": "214:59:07", "grad_norm": 0.0199, "loss": 0.0679, "lr": "5.000e-05", "step": 331, "steps": "23.55s,331/33195" }, { "epoch": 0.050007531254707034, "eta": "218:32:20", "grad_norm": 0.0398, "loss": 0.0506, "lr": "5.000e-05", "step": 332, "steps": "23.94s,332/33195" }, { "epoch": 0.05015815634884772, "eta": "219:26:42", "grad_norm": 0.0157, "loss": 0.0654, "lr": "5.000e-05", "step": 333, "steps": "24.04s,333/33195" }, { "epoch": 0.0503087814429884, "eta": "215:47:14", "grad_norm": 0.0215, "loss": 0.0582, "lr": "5.000e-05", "step": 334, "steps": "23.64s,334/33195" }, { "epoch": 0.05045940653712908, "eta": "219:25:54", "grad_norm": 0.0282, "loss": 0.0484, "lr": "5.000e-05", "step": 335, "steps": "24.04s,335/33195" }, { "epoch": 0.05061003163126977, "eta": "215:19:03", "grad_norm": 0.0361, "loss": 0.0664, "lr": "5.000e-05", "step": 336, "steps": "23.59s,336/33195" }, { "epoch": 0.050760656725410455, "eta": "215:13:11", "grad_norm": 0.1338, "loss": 0.0653, "lr": "5.000e-05", "step": 337, "steps": "23.58s,337/33195" }, { "epoch": 0.05091128181955114, "eta": "215:34:42", "grad_norm": 0.0236, "loss": 0.0684, "lr": "5.000e-05", "step": 338, "steps": "23.62s,338/33195" }, { "epoch": 0.05106190691369182, "eta": "217:34:47", "grad_norm": 0.0197, "loss": 0.0486, "lr": "5.000e-05", "step": 339, "steps": "23.84s,339/33195" }, { "epoch": 0.051212532007832504, "eta": "218:34:37", "grad_norm": 0.0359, "loss": 0.0477, "lr": "5.000e-05", "step": 340, "steps": "23.95s,340/33195" }, { "epoch": 0.051363157101973186, "eta": "219:01:36", "grad_norm": 0.0371, "loss": 0.0785, "lr": "5.000e-05", "step": 341, "steps": "24.0s,341/33195" }, { "epoch": 0.05151378219611387, "eta": "213:10:46", "grad_norm": 0.0388, "loss": 0.0505, "lr": "5.000e-05", "step": 342, "steps": "23.36s,342/33195" }, { "epoch": 0.05166440729025456, "eta": "217:27:43", "grad_norm": 0.017, "loss": 0.052, "lr": "5.000e-05", "step": 343, "steps": "23.83s,343/33195" }, { "epoch": 0.05181503238439524, "eta": "214:54:01", "grad_norm": 0.0315, "loss": 0.0726, "lr": "5.000e-05", "step": 344, "steps": "23.55s,344/33195" }, { "epoch": 0.051965657478535925, "eta": "217:10:30", "grad_norm": 0.0256, "loss": 0.0498, "lr": "5.000e-05", "step": 345, "steps": "23.8s,345/33195" }, { "epoch": 0.05211628257267661, "eta": "216:42:43", "grad_norm": 0.0197, "loss": 0.0713, "lr": "5.000e-05", "step": 346, "steps": "23.75s,346/33195" }, { "epoch": 0.05226690766681729, "eta": "218:09:55", "grad_norm": 0.016, "loss": 0.0377, "lr": "5.000e-05", "step": 347, "steps": "23.91s,347/33195" }, { "epoch": 0.05241753276095797, "eta": "217:36:40", "grad_norm": 0.0933, "loss": 0.0638, "lr": "5.000e-05", "step": 348, "steps": "23.85s,348/33195" }, { "epoch": 0.05256815785509866, "eta": "217:52:42", "grad_norm": 0.0201, "loss": 0.0541, "lr": "5.000e-05", "step": 349, "steps": "23.88s,349/33195" }, { "epoch": 0.052718782949239346, "eta": "215:24:30", "grad_norm": 0.0155, "loss": 0.0528, "lr": "5.000e-05", "step": 350, "steps": "23.61s,350/33195" }, { "epoch": 0.05286940804338003, "eta": "219:14:01", "grad_norm": 0.0192, "loss": 0.06, "lr": "5.000e-05", "step": 351, "steps": "24.03s,351/33195" }, { "epoch": 0.05302003313752071, "eta": "214:23:30", "grad_norm": 0.0159, "loss": 0.0567, "lr": "5.000e-05", "step": 352, "steps": "23.5s,352/33195" }, { "epoch": 0.053170658231661394, "eta": "218:29:25", "grad_norm": 0.0137, "loss": 0.0381, "lr": "5.000e-05", "step": 353, "steps": "23.95s,353/33195" }, { "epoch": 0.05332128332580208, "eta": "214:28:11", "grad_norm": 0.0288, "loss": 0.0579, "lr": "5.000e-05", "step": 354, "steps": "23.51s,354/33195" }, { "epoch": 0.05347190841994276, "eta": "215:22:32", "grad_norm": 0.0147, "loss": 0.0492, "lr": "5.000e-05", "step": 355, "steps": "23.61s,355/33195" }, { "epoch": 0.05362253351408345, "eta": "219:06:32", "grad_norm": 0.0165, "loss": 0.0568, "lr": "5.000e-05", "step": 356, "steps": "24.02s,356/33195" }, { "epoch": 0.05377315860822413, "eta": "215:10:48", "grad_norm": 0.0208, "loss": 0.0613, "lr": "5.000e-05", "step": 357, "steps": "23.59s,357/33195" }, { "epoch": 0.053923783702364815, "eta": "213:48:19", "grad_norm": 0.0312, "loss": 0.0448, "lr": "5.000e-05", "step": 358, "steps": "23.44s,358/33195" }, { "epoch": 0.0540744087965055, "eta": "215:20:57", "grad_norm": 0.0131, "loss": 0.0535, "lr": "5.000e-05", "step": 359, "steps": "23.61s,359/33195" }, { "epoch": 0.05422503389064618, "eta": "217:59:16", "grad_norm": 0.042, "loss": 0.0551, "lr": "5.000e-05", "step": 360, "steps": "23.9s,360/33195" }, { "epoch": 0.054375658984786863, "eta": "218:20:45", "grad_norm": 0.0371, "loss": 0.0656, "lr": "5.000e-05", "step": 361, "steps": "23.94s,361/33195" }, { "epoch": 0.054526284078927546, "eta": "218:31:18", "grad_norm": 0.02, "loss": 0.083, "lr": "5.000e-05", "step": 362, "steps": "23.96s,362/33195" }, { "epoch": 0.054676909173068236, "eta": "213:51:50", "grad_norm": 0.0134, "loss": 0.0659, "lr": "5.000e-05", "step": 363, "steps": "23.45s,363/33195" }, { "epoch": 0.05482753426720892, "eta": "211:56:32", "grad_norm": 0.0177, "loss": 0.0717, "lr": "5.000e-05", "step": 364, "steps": "23.24s,364/33195" }, { "epoch": 0.0549781593613496, "eta": "214:51:14", "grad_norm": 0.0155, "loss": 0.0511, "lr": "5.000e-05", "step": 365, "steps": "23.56s,365/33195" }, { "epoch": 0.055128784455490284, "eta": "215:18:12", "grad_norm": 0.0208, "loss": 0.0662, "lr": "5.000e-05", "step": 366, "steps": "23.61s,366/33195" }, { "epoch": 0.05527940954963097, "eta": "219:07:36", "grad_norm": 0.0145, "loss": 0.0777, "lr": "5.000e-05", "step": 367, "steps": "24.03s,367/33195" }, { "epoch": 0.05543003464377165, "eta": "213:55:21", "grad_norm": 0.0228, "loss": 0.0494, "lr": "5.000e-05", "step": 368, "steps": "23.46s,368/33195" }, { "epoch": 0.05558065973791233, "eta": "215:33:26", "grad_norm": 0.0159, "loss": 0.0826, "lr": "5.000e-05", "step": 369, "steps": "23.64s,369/33195" }, { "epoch": 0.05573128483205302, "eta": "217:55:17", "grad_norm": 0.0143, "loss": 0.06, "lr": "5.000e-05", "step": 370, "steps": "23.9s,370/33195" }, { "epoch": 0.055881909926193706, "eta": "218:55:04", "grad_norm": 0.0148, "loss": 0.047, "lr": "5.000e-05", "step": 371, "steps": "24.01s,371/33195" }, { "epoch": 0.05603253502033439, "eta": "223:00:50", "grad_norm": 0.015, "loss": 0.0558, "lr": "5.000e-05", "step": 372, "steps": "24.46s,372/33195" }, { "epoch": 0.05618316011447507, "eta": "217:59:34", "grad_norm": 0.011, "loss": 0.0471, "lr": "5.000e-05", "step": 373, "steps": "23.91s,373/33195" }, { "epoch": 0.056333785208615754, "eta": "215:26:00", "grad_norm": 0.0167, "loss": 0.0422, "lr": "5.000e-05", "step": 374, "steps": "23.63s,374/33195" }, { "epoch": 0.05648441030275644, "eta": "218:04:14", "grad_norm": 0.0114, "loss": 0.0651, "lr": "5.000e-05", "step": 375, "steps": "23.92s,375/33195" }, { "epoch": 0.056635035396897127, "eta": "217:09:08", "grad_norm": 0.0138, "loss": 0.0537, "lr": "5.000e-05", "step": 376, "steps": "23.82s,376/33195" }, { "epoch": 0.05678566049103781, "eta": "218:25:19", "grad_norm": 0.0102, "loss": 0.0594, "lr": "5.000e-05", "step": 377, "steps": "23.96s,377/33195" }, { "epoch": 0.05693628558517849, "eta": "213:35:02", "grad_norm": 0.0145, "loss": 0.0447, "lr": "5.000e-05", "step": 378, "steps": "23.43s,378/33195" }, { "epoch": 0.057086910679319175, "eta": "217:46:14", "grad_norm": 0.0137, "loss": 0.0404, "lr": "5.000e-05", "step": 379, "steps": "23.89s,379/33195" }, { "epoch": 0.05723753577345986, "eta": "218:29:35", "grad_norm": 0.0162, "loss": 0.0404, "lr": "5.000e-05", "step": 380, "steps": "23.97s,380/33195" }, { "epoch": 0.05738816086760054, "eta": "213:22:55", "grad_norm": 0.015, "loss": 0.0677, "lr": "5.000e-05", "step": 381, "steps": "23.41s,381/33195" }, { "epoch": 0.05753878596174122, "eta": "221:12:51", "grad_norm": 0.0267, "loss": 0.077, "lr": "5.000e-05", "step": 382, "steps": "24.27s,382/33195" }, { "epoch": 0.05768941105588191, "eta": "217:55:34", "grad_norm": 0.0153, "loss": 0.0826, "lr": "5.000e-05", "step": 383, "steps": "23.91s,383/33195" }, { "epoch": 0.057840036150022596, "eta": "215:27:32", "grad_norm": 0.0126, "loss": 0.0382, "lr": "5.000e-05", "step": 384, "steps": "23.64s,384/33195" }, { "epoch": 0.05799066124416328, "eta": "215:21:40", "grad_norm": 0.0206, "loss": 0.0532, "lr": "5.000e-05", "step": 385, "steps": "23.63s,385/33195" }, { "epoch": 0.05814128633830396, "eta": "218:16:15", "grad_norm": 0.0195, "loss": 0.0801, "lr": "5.000e-05", "step": 386, "steps": "23.95s,386/33195" }, { "epoch": 0.058291911432444644, "eta": "218:21:19", "grad_norm": 0.0221, "loss": 0.049, "lr": "5.000e-05", "step": 387, "steps": "23.96s,387/33195" }, { "epoch": 0.05844253652658533, "eta": "213:20:11", "grad_norm": 0.0226, "loss": 0.0415, "lr": "5.000e-05", "step": 388, "steps": "23.41s,388/33195" }, { "epoch": 0.05859316162072601, "eta": "217:58:39", "grad_norm": 0.0142, "loss": 0.0567, "lr": "5.000e-05", "step": 389, "steps": "23.92s,389/33195" }, { "epoch": 0.0587437867148667, "eta": "215:08:46", "grad_norm": 0.0131, "loss": 0.0353, "lr": "5.000e-05", "step": 390, "steps": "23.61s,390/33195" }, { "epoch": 0.05889441180900738, "eta": "214:41:02", "grad_norm": 0.0135, "loss": 0.0447, "lr": "5.000e-05", "step": 391, "steps": "23.56s,391/33195" }, { "epoch": 0.059045036903148065, "eta": "219:19:28", "grad_norm": 0.0172, "loss": 0.0577, "lr": "5.000e-05", "step": 392, "steps": "24.07s,392/33195" }, { "epoch": 0.05919566199728875, "eta": "215:07:35", "grad_norm": 0.0344, "loss": 0.0436, "lr": "5.000e-05", "step": 393, "steps": "23.61s,393/33195" }, { "epoch": 0.05934628709142943, "eta": "214:01:35", "grad_norm": 0.0101, "loss": 0.0463, "lr": "5.000e-05", "step": 394, "steps": "23.49s,394/33195" }, { "epoch": 0.059496912185570114, "eta": "216:50:40", "grad_norm": 0.0142, "loss": 0.0631, "lr": "5.000e-05", "step": 395, "steps": "23.8s,395/33195" }, { "epoch": 0.0596475372797108, "eta": "214:39:04", "grad_norm": 0.0182, "loss": 0.0428, "lr": "5.000e-05", "step": 396, "steps": "23.56s,396/33195" }, { "epoch": 0.059798162373851486, "eta": "218:17:20", "grad_norm": 0.0145, "loss": 0.0569, "lr": "5.000e-05", "step": 397, "steps": "23.96s,397/33195" }, { "epoch": 0.05994878746799217, "eta": "218:27:52", "grad_norm": 0.0212, "loss": 0.044, "lr": "5.000e-05", "step": 398, "steps": "23.98s,398/33195" }, { "epoch": 0.06009941256213285, "eta": "215:05:13", "grad_norm": 0.0156, "loss": 0.0275, "lr": "5.000e-05", "step": 399, "steps": "23.61s,399/33195" }, { "epoch": 0.060250037656273535, "eta": "217:48:48", "grad_norm": 0.0129, "loss": 0.0708, "lr": "5.000e-05", "step": 400, "steps": "23.91s,400/33195" }, { "epoch": 0.06040066275041422, "eta": "00:00:00", "grad_norm": 0.0146, "loss": 0.07, "lr": "5.000e-05", "step": 401, "steps": "0s,401/33195" }, { "epoch": 0.0605512878445549, "eta": "224:37:55", "grad_norm": 0.03, "loss": 0.0534, "lr": "5.000e-05", "step": 402, "steps": "24.66s,402/33195" }, { "epoch": 0.06070191293869558, "eta": "232:11:08", "grad_norm": 0.0214, "loss": 0.0441, "lr": "5.000e-05", "step": 403, "steps": "25.49s,403/33195" }, { "epoch": 0.06085253803283627, "eta": "239:11:31", "grad_norm": 0.0234, "loss": 0.0543, "lr": "5.000e-05", "step": 404, "steps": "26.26s,404/33195" }, { "epoch": 0.061003163126976956, "eta": "222:30:59", "grad_norm": 0.0249, "loss": 0.0541, "lr": "5.000e-05", "step": 405, "steps": "24.43s,405/33195" }, { "epoch": 0.06115378822111764, "eta": "217:57:20", "grad_norm": 0.0173, "loss": 0.0474, "lr": "5.000e-05", "step": 406, "steps": "23.93s,406/33195" }, { "epoch": 0.06130441331525832, "eta": "223:46:41", "grad_norm": 0.032, "loss": 0.0618, "lr": "5.000e-05", "step": 407, "steps": "24.57s,407/33195" }, { "epoch": 0.061455038409399004, "eta": "224:46:23", "grad_norm": 0.015, "loss": 0.043, "lr": "5.000e-05", "step": 408, "steps": "24.68s,408/33195" }, { "epoch": 0.06160566350353969, "eta": "229:35:35", "grad_norm": 0.04, "loss": 0.0509, "lr": "5.000e-05", "step": 409, "steps": "25.21s,409/33195" }, { "epoch": 0.06175628859768038, "eta": "218:55:51", "grad_norm": 0.0166, "loss": 0.0771, "lr": "5.000e-05", "step": 410, "steps": "24.04s,410/33195" }, { "epoch": 0.06190691369182106, "eta": "216:06:04", "grad_norm": 0.0194, "loss": 0.0368, "lr": "5.000e-05", "step": 411, "steps": "23.73s,411/33195" }, { "epoch": 0.06205753878596174, "eta": "218:27:44", "grad_norm": 0.0188, "loss": 0.0619, "lr": "4.999e-05", "step": 412, "steps": "23.99s,412/33195" }, { "epoch": 0.062208163880102425, "eta": "231:34:06", "grad_norm": 0.0309, "loss": 0.0532, "lr": "4.999e-05", "step": 413, "steps": "25.43s,413/33195" }, { "epoch": 0.06235878897424311, "eta": "218:21:28", "grad_norm": 0.0105, "loss": 0.0441, "lr": "4.999e-05", "step": 414, "steps": "23.98s,414/33195" }, { "epoch": 0.0625094140683838, "eta": "214:26:09", "grad_norm": 0.0208, "loss": 0.0499, "lr": "4.999e-05", "step": 415, "steps": "23.55s,415/33195" }, { "epoch": 0.06266003916252448, "eta": "218:09:44", "grad_norm": 0.0205, "loss": 0.0693, "lr": "4.999e-05", "step": 416, "steps": "23.96s,416/33195" }, { "epoch": 0.06281066425666516, "eta": "213:47:07", "grad_norm": 0.0201, "loss": 0.0562, "lr": "4.999e-05", "step": 417, "steps": "23.48s,417/33195" }, { "epoch": 0.06296128935080585, "eta": "214:08:35", "grad_norm": 0.0189, "loss": 0.0458, "lr": "4.999e-05", "step": 418, "steps": "23.52s,418/33195" }, { "epoch": 0.06311191444494653, "eta": "218:24:56", "grad_norm": 0.0104, "loss": 0.0577, "lr": "4.999e-05", "step": 419, "steps": "23.99s,419/33195" }, { "epoch": 0.06326253953908721, "eta": "219:08:14", "grad_norm": 0.0082, "loss": 0.0569, "lr": "4.999e-05", "step": 420, "steps": "24.07s,420/33195" }, { "epoch": 0.0634131646332279, "eta": "214:29:15", "grad_norm": 0.0161, "loss": 0.0623, "lr": "4.999e-05", "step": 421, "steps": "23.56s,421/33195" }, { "epoch": 0.06356378972736858, "eta": "218:45:35", "grad_norm": 0.0237, "loss": 0.0548, "lr": "4.999e-05", "step": 422, "steps": "24.03s,422/33195" }, { "epoch": 0.06371441482150926, "eta": "214:01:09", "grad_norm": 0.0156, "loss": 0.0478, "lr": "4.999e-05", "step": 423, "steps": "23.51s,423/33195" }, { "epoch": 0.06386503991564994, "eta": "216:44:37", "grad_norm": 0.0118, "loss": 0.0753, "lr": "4.999e-05", "step": 424, "steps": "23.81s,424/33195" }, { "epoch": 0.06401566500979063, "eta": "218:38:55", "grad_norm": 0.0354, "loss": 0.0656, "lr": "4.999e-05", "step": 425, "steps": "24.02s,425/33195" }, { "epoch": 0.06416629010393131, "eta": "214:49:08", "grad_norm": 0.0101, "loss": 0.0444, "lr": "4.999e-05", "step": 426, "steps": "23.6s,426/33195" }, { "epoch": 0.064316915198072, "eta": "214:59:40", "grad_norm": 0.0132, "loss": 0.0655, "lr": "4.999e-05", "step": 427, "steps": "23.62s,427/33195" }, { "epoch": 0.06446754029221269, "eta": "217:37:38", "grad_norm": 0.0151, "loss": 0.0598, "lr": "4.999e-05", "step": 428, "steps": "23.91s,428/33195" }, { "epoch": 0.06461816538635337, "eta": "219:21:00", "grad_norm": 0.0117, "loss": 0.0584, "lr": "4.999e-05", "step": 429, "steps": "24.1s,429/33195" }, { "epoch": 0.06476879048049405, "eta": "215:36:42", "grad_norm": 0.016, "loss": 0.0683, "lr": "4.999e-05", "step": 430, "steps": "23.69s,430/33195" }, { "epoch": 0.06491941557463474, "eta": "215:14:28", "grad_norm": 0.0114, "loss": 0.0747, "lr": "4.999e-05", "step": 431, "steps": "23.65s,431/33195" }, { "epoch": 0.06507004066877542, "eta": "218:52:30", "grad_norm": 0.0138, "loss": 0.0604, "lr": "4.999e-05", "step": 432, "steps": "24.05s,432/33195" }, { "epoch": 0.0652206657629161, "eta": "218:57:33", "grad_norm": 0.0149, "loss": 0.0612, "lr": "4.999e-05", "step": 433, "steps": "24.06s,433/33195" }, { "epoch": 0.06537129085705679, "eta": "214:29:36", "grad_norm": 0.0137, "loss": 0.0651, "lr": "4.999e-05", "step": 434, "steps": "23.57s,434/33195" }, { "epoch": 0.06552191595119747, "eta": "217:56:42", "grad_norm": 0.0112, "loss": 0.0203, "lr": "4.999e-05", "step": 435, "steps": "23.95s,435/33195" }, { "epoch": 0.06567254104533815, "eta": "218:18:08", "grad_norm": 0.0109, "loss": 0.0386, "lr": "4.999e-05", "step": 436, "steps": "23.99s,436/33195" }, { "epoch": 0.06582316613947883, "eta": "214:33:53", "grad_norm": 0.0097, "loss": 0.0418, "lr": "4.999e-05", "step": 437, "steps": "23.58s,437/33195" }, { "epoch": 0.06597379123361952, "eta": "218:22:48", "grad_norm": 0.0092, "loss": 0.055, "lr": "4.999e-05", "step": 438, "steps": "24.0s,438/33195" }, { "epoch": 0.0661244163277602, "eta": "214:27:38", "grad_norm": 0.0106, "loss": 0.04, "lr": "4.999e-05", "step": 439, "steps": "23.57s,439/33195" }, { "epoch": 0.06627504142190088, "eta": "219:54:48", "grad_norm": 0.0143, "loss": 0.0537, "lr": "4.999e-05", "step": 440, "steps": "24.17s,440/33195" }, { "epoch": 0.06642566651604158, "eta": "214:05:01", "grad_norm": 0.014, "loss": 0.0359, "lr": "4.999e-05", "step": 441, "steps": "23.53s,441/33195" }, { "epoch": 0.06657629161018226, "eta": "218:59:24", "grad_norm": 0.0154, "loss": 0.065, "lr": "4.999e-05", "step": 442, "steps": "24.07s,442/33195" }, { "epoch": 0.06672691670432294, "eta": "216:42:32", "grad_norm": 0.012, "loss": 0.0748, "lr": "4.999e-05", "step": 443, "steps": "23.82s,443/33195" }, { "epoch": 0.06687754179846363, "eta": "218:36:46", "grad_norm": 0.0175, "loss": 0.0448, "lr": "4.999e-05", "step": 444, "steps": "24.03s,444/33195" }, { "epoch": 0.06702816689260431, "eta": "213:36:10", "grad_norm": 0.0142, "loss": 0.0366, "lr": "4.999e-05", "step": 445, "steps": "23.48s,445/33195" }, { "epoch": 0.06717879198674499, "eta": "213:41:14", "grad_norm": 0.0153, "loss": 0.0717, "lr": "4.999e-05", "step": 446, "steps": "23.49s,446/33195" }, { "epoch": 0.06732941708088568, "eta": "217:57:22", "grad_norm": 0.0103, "loss": 0.0339, "lr": "4.999e-05", "step": 447, "steps": "23.96s,447/33195" }, { "epoch": 0.06748004217502636, "eta": "221:18:54", "grad_norm": 0.009, "loss": 0.0317, "lr": "4.999e-05", "step": 448, "steps": "24.33s,448/33195" }, { "epoch": 0.06763066726916704, "eta": "215:12:50", "grad_norm": 0.0116, "loss": 0.055, "lr": "4.999e-05", "step": 449, "steps": "23.66s,449/33195" }, { "epoch": 0.06778129236330772, "eta": "213:23:17", "grad_norm": 0.0105, "loss": 0.0669, "lr": "4.999e-05", "step": 450, "steps": "23.46s,450/33195" }, { "epoch": 0.0679319174574484, "eta": "214:33:50", "grad_norm": 0.0119, "loss": 0.0698, "lr": "4.999e-05", "step": 451, "steps": "23.59s,451/33195" }, { "epoch": 0.06808254255158909, "eta": "216:55:20", "grad_norm": 0.0134, "loss": 0.0385, "lr": "4.999e-05", "step": 452, "steps": "23.85s,452/33195" }, { "epoch": 0.06823316764572977, "eta": "218:27:42", "grad_norm": 0.0137, "loss": 0.0419, "lr": "4.999e-05", "step": 453, "steps": "24.02s,453/33195" }, { "epoch": 0.06838379273987047, "eta": "213:27:11", "grad_norm": 0.0165, "loss": 0.0329, "lr": "4.999e-05", "step": 454, "steps": "23.47s,454/33195" }, { "epoch": 0.06853441783401115, "eta": "218:26:54", "grad_norm": 0.0121, "loss": 0.0542, "lr": "4.999e-05", "step": 455, "steps": "24.02s,455/33195" }, { "epoch": 0.06868504292815183, "eta": "216:37:22", "grad_norm": 0.0167, "loss": 0.0743, "lr": "4.999e-05", "step": 456, "steps": "23.82s,456/33195" }, { "epoch": 0.06883566802229252, "eta": "217:53:22", "grad_norm": 0.0134, "loss": 0.0719, "lr": "4.999e-05", "step": 457, "steps": "23.96s,457/33195" }, { "epoch": 0.0689862931164332, "eta": "217:58:25", "grad_norm": 0.0125, "loss": 0.0709, "lr": "4.999e-05", "step": 458, "steps": "23.97s,458/33195" }, { "epoch": 0.06913691821057388, "eta": "217:25:17", "grad_norm": 0.0172, "loss": 0.0664, "lr": "4.999e-05", "step": 459, "steps": "23.91s,459/33195" }, { "epoch": 0.06928754330471457, "eta": "213:24:50", "grad_norm": 0.0102, "loss": 0.0649, "lr": "4.999e-05", "step": 460, "steps": "23.47s,460/33195" }, { "epoch": 0.06943816839885525, "eta": "213:40:49", "grad_norm": 0.0189, "loss": 0.0698, "lr": "4.999e-05", "step": 461, "steps": "23.5s,461/33195" }, { "epoch": 0.06958879349299593, "eta": "218:02:17", "grad_norm": 0.0111, "loss": 0.0757, "lr": "4.999e-05", "step": 462, "steps": "23.98s,462/33195" }, { "epoch": 0.06973941858713661, "eta": "215:07:19", "grad_norm": 0.0126, "loss": 0.0558, "lr": "4.999e-05", "step": 463, "steps": "23.66s,463/33195" }, { "epoch": 0.0698900436812773, "eta": "212:34:10", "grad_norm": 0.0131, "loss": 0.0439, "lr": "4.999e-05", "step": 464, "steps": "23.38s,464/33195" }, { "epoch": 0.07004066877541798, "eta": "217:28:21", "grad_norm": 0.0096, "loss": 0.0582, "lr": "4.999e-05", "step": 465, "steps": "23.92s,465/33195" }, { "epoch": 0.07019129386955866, "eta": "213:27:56", "grad_norm": 0.0154, "loss": 0.0565, "lr": "4.999e-05", "step": 466, "steps": "23.48s,466/33195" }, { "epoch": 0.07034191896369935, "eta": "217:11:11", "grad_norm": 0.0161, "loss": 0.0435, "lr": "4.999e-05", "step": 467, "steps": "23.89s,467/33195" }, { "epoch": 0.07049254405784004, "eta": "215:16:15", "grad_norm": 0.0106, "loss": 0.0748, "lr": "4.999e-05", "step": 468, "steps": "23.68s,468/33195" }, { "epoch": 0.07064316915198073, "eta": "217:21:18", "grad_norm": 0.0128, "loss": 0.0487, "lr": "4.999e-05", "step": 469, "steps": "23.91s,469/33195" }, { "epoch": 0.07079379424612141, "eta": "213:59:06", "grad_norm": 0.0181, "loss": 0.0434, "lr": "4.999e-05", "step": 470, "steps": "23.54s,470/33195" }, { "epoch": 0.07094441934026209, "eta": "218:09:36", "grad_norm": 0.0142, "loss": 0.0652, "lr": "4.999e-05", "step": 471, "steps": "24.0s,471/33195" }, { "epoch": 0.07109504443440277, "eta": "214:36:30", "grad_norm": 0.0108, "loss": 0.0528, "lr": "4.999e-05", "step": 472, "steps": "23.61s,472/33195" }, { "epoch": 0.07124566952854346, "eta": "217:41:31", "grad_norm": 0.0137, "loss": 0.0462, "lr": "4.999e-05", "step": 473, "steps": "23.95s,473/33195" }, { "epoch": 0.07139629462268414, "eta": "217:52:02", "grad_norm": 0.0092, "loss": 0.059, "lr": "4.999e-05", "step": 474, "steps": "23.97s,474/33195" }, { "epoch": 0.07154691971682482, "eta": "219:13:26", "grad_norm": 0.0086, "loss": 0.0434, "lr": "4.999e-05", "step": 475, "steps": "24.12s,475/33195" }, { "epoch": 0.0716975448109655, "eta": "215:29:27", "grad_norm": 0.0139, "loss": 0.0538, "lr": "4.999e-05", "step": 476, "steps": "23.71s,476/33195" }, { "epoch": 0.07184816990510619, "eta": "214:23:37", "grad_norm": 0.0137, "loss": 0.049, "lr": "4.999e-05", "step": 477, "steps": "23.59s,477/33195" }, { "epoch": 0.07199879499924687, "eta": "218:01:20", "grad_norm": 0.0105, "loss": 0.0258, "lr": "4.999e-05", "step": 478, "steps": "23.99s,478/33195" }, { "epoch": 0.07214942009338755, "eta": "213:39:13", "grad_norm": 0.0098, "loss": 0.0598, "lr": "4.999e-05", "step": 479, "steps": "23.51s,479/33195" }, { "epoch": 0.07230004518752824, "eta": "213:11:33", "grad_norm": 0.0138, "loss": 0.0465, "lr": "4.999e-05", "step": 480, "steps": "23.46s,480/33195" }, { "epoch": 0.07245067028166893, "eta": "217:16:31", "grad_norm": 0.0112, "loss": 0.0508, "lr": "4.999e-05", "step": 481, "steps": "23.91s,481/33195" }, { "epoch": 0.07260129537580962, "eta": "214:27:06", "grad_norm": 0.1465, "loss": 0.0622, "lr": "4.999e-05", "step": 482, "steps": "23.6s,482/33195" }, { "epoch": 0.0727519204699503, "eta": "213:54:00", "grad_norm": 0.028, "loss": 0.0628, "lr": "4.999e-05", "step": 483, "steps": "23.54s,483/33195" }, { "epoch": 0.07290254556409098, "eta": "216:37:10", "grad_norm": 0.0121, "loss": 0.0588, "lr": "4.999e-05", "step": 484, "steps": "23.84s,484/33195" }, { "epoch": 0.07305317065823166, "eta": "214:20:28", "grad_norm": 0.0086, "loss": 0.0548, "lr": "4.999e-05", "step": 485, "steps": "23.59s,485/33195" }, { "epoch": 0.07320379575237235, "eta": "213:31:01", "grad_norm": 0.0104, "loss": 0.0428, "lr": "4.999e-05", "step": 486, "steps": "23.5s,486/33195" }, { "epoch": 0.07335442084651303, "eta": "217:03:14", "grad_norm": 0.0149, "loss": 0.0581, "lr": "4.999e-05", "step": 487, "steps": "23.89s,487/33195" }, { "epoch": 0.07350504594065371, "eta": "215:08:21", "grad_norm": 0.0125, "loss": 0.0604, "lr": "4.999e-05", "step": 488, "steps": "23.68s,488/33195" }, { "epoch": 0.0736556710347944, "eta": "216:46:05", "grad_norm": 0.0115, "loss": 0.0618, "lr": "4.999e-05", "step": 489, "steps": "23.86s,489/33195" }, { "epoch": 0.07380629612893508, "eta": "214:02:09", "grad_norm": 0.0104, "loss": 0.0481, "lr": "4.999e-05", "step": 490, "steps": "23.56s,490/33195" }, { "epoch": 0.07395692122307576, "eta": "214:34:28", "grad_norm": 0.0164, "loss": 0.032, "lr": "4.999e-05", "step": 491, "steps": "23.62s,491/33195" }, { "epoch": 0.07410754631721644, "eta": "214:44:58", "grad_norm": 0.0104, "loss": 0.0777, "lr": "4.999e-05", "step": 492, "steps": "23.64s,492/33195" }, { "epoch": 0.07425817141135713, "eta": "214:33:41", "grad_norm": 0.0159, "loss": 0.0813, "lr": "4.999e-05", "step": 493, "steps": "23.62s,493/33195" }, { "epoch": 0.07440879650549781, "eta": "215:11:26", "grad_norm": 0.0103, "loss": 0.075, "lr": "4.999e-05", "step": 494, "steps": "23.69s,494/33195" }, { "epoch": 0.0745594215996385, "eta": "214:43:48", "grad_norm": 0.01, "loss": 0.0488, "lr": "4.999e-05", "step": 495, "steps": "23.64s,495/33195" }, { "epoch": 0.07471004669377919, "eta": "215:10:39", "grad_norm": 0.017, "loss": 0.0549, "lr": "4.999e-05", "step": 496, "steps": "23.69s,496/33195" }, { "epoch": 0.07486067178791987, "eta": "219:10:02", "grad_norm": 0.0178, "loss": 0.0537, "lr": "4.999e-05", "step": 497, "steps": "24.13s,497/33195" }, { "epoch": 0.07501129688206055, "eta": "218:04:14", "grad_norm": 0.0151, "loss": 0.04, "lr": "4.999e-05", "step": 498, "steps": "24.01s,498/33195" }, { "epoch": 0.07516192197620124, "eta": "215:09:28", "grad_norm": 0.0203, "loss": 0.0509, "lr": "4.999e-05", "step": 499, "steps": "23.69s,499/33195" }, { "epoch": 0.07531254707034192, "eta": "218:41:35", "grad_norm": 0.0131, "loss": 0.0637, "lr": "4.999e-05", "step": 500, "steps": "24.08s,500/33195" }, { "epoch": 0.0754631721644826, "eta": "218:19:23", "grad_norm": 0.0118, "loss": 0.0626, "lr": "4.999e-05", "step": 501, "steps": "24.04s,501/33195" }, { "epoch": 0.07561379725862329, "eta": "218:18:59", "grad_norm": 0.0094, "loss": 0.0752, "lr": "4.999e-05", "step": 502, "steps": "24.04s,502/33195" }, { "epoch": 0.07576442235276397, "eta": "214:29:45", "grad_norm": 0.0073, "loss": 0.0351, "lr": "4.999e-05", "step": 503, "steps": "23.62s,503/33195" }, { "epoch": 0.07591504744690465, "eta": "217:45:30", "grad_norm": 0.0107, "loss": 0.0665, "lr": "4.999e-05", "step": 504, "steps": "23.98s,504/33195" }, { "epoch": 0.07606567254104533, "eta": "217:01:31", "grad_norm": 0.0123, "loss": 0.0752, "lr": "4.999e-05", "step": 505, "steps": "23.9s,505/33195" }, { "epoch": 0.07621629763518602, "eta": "214:01:19", "grad_norm": 0.0138, "loss": 0.0434, "lr": "4.999e-05", "step": 506, "steps": "23.57s,506/33195" }, { "epoch": 0.0763669227293267, "eta": "214:11:49", "grad_norm": 0.0134, "loss": 0.0639, "lr": "4.999e-05", "step": 507, "steps": "23.59s,507/33195" }, { "epoch": 0.07651754782346738, "eta": "214:22:20", "grad_norm": 0.0093, "loss": 0.0519, "lr": "4.999e-05", "step": 508, "steps": "23.61s,508/33195" }, { "epoch": 0.07666817291760808, "eta": "216:16:20", "grad_norm": 0.0079, "loss": 0.054, "lr": "4.999e-05", "step": 509, "steps": "23.82s,509/33195" }, { "epoch": 0.07681879801174876, "eta": "214:10:39", "grad_norm": 0.0129, "loss": 0.0562, "lr": "4.999e-05", "step": 510, "steps": "23.59s,510/33195" }, { "epoch": 0.07696942310588945, "eta": "218:09:56", "grad_norm": 0.0109, "loss": 0.0468, "lr": "4.999e-05", "step": 511, "steps": "24.03s,511/33195" }, { "epoch": 0.07712004820003013, "eta": "218:09:32", "grad_norm": 0.0167, "loss": 0.0359, "lr": "4.999e-05", "step": 512, "steps": "24.03s,512/33195" }, { "epoch": 0.07727067329417081, "eta": "217:52:48", "grad_norm": 0.011, "loss": 0.0598, "lr": "4.999e-05", "step": 513, "steps": "24.0s,513/33195" }, { "epoch": 0.0774212983883115, "eta": "215:52:34", "grad_norm": 0.0118, "loss": 0.0436, "lr": "4.999e-05", "step": 514, "steps": "23.78s,514/33195" }, { "epoch": 0.07757192348245218, "eta": "213:19:40", "grad_norm": 0.0084, "loss": 0.0804, "lr": "4.999e-05", "step": 515, "steps": "23.5s,515/33195" }, { "epoch": 0.07772254857659286, "eta": "218:24:16", "grad_norm": 0.0081, "loss": 0.0749, "lr": "4.999e-05", "step": 516, "steps": "24.06s,516/33195" }, { "epoch": 0.07787317367073354, "eta": "212:18:58", "grad_norm": 0.0099, "loss": 0.0753, "lr": "4.999e-05", "step": 517, "steps": "23.39s,517/33195" }, { "epoch": 0.07802379876487422, "eta": "221:01:24", "grad_norm": 0.0098, "loss": 0.055, "lr": "4.999e-05", "step": 518, "steps": "24.35s,518/33195" }, { "epoch": 0.07817442385901491, "eta": "218:06:44", "grad_norm": 0.014, "loss": 0.066, "lr": "4.999e-05", "step": 519, "steps": "24.03s,519/33195" }, { "epoch": 0.07832504895315559, "eta": "218:11:47", "grad_norm": 0.0076, "loss": 0.0702, "lr": "4.999e-05", "step": 520, "steps": "24.04s,520/33195" }, { "epoch": 0.07847567404729627, "eta": "214:55:20", "grad_norm": 0.0108, "loss": 0.0663, "lr": "4.999e-05", "step": 521, "steps": "23.68s,521/33195" }, { "epoch": 0.07862629914143697, "eta": "216:22:04", "grad_norm": 0.0142, "loss": 0.0523, "lr": "4.999e-05", "step": 522, "steps": "23.84s,522/33195" }, { "epoch": 0.07877692423557765, "eta": "214:05:32", "grad_norm": 0.0092, "loss": 0.0605, "lr": "4.999e-05", "step": 523, "steps": "23.59s,523/33195" }, { "epoch": 0.07892754932971834, "eta": "213:32:28", "grad_norm": 0.0075, "loss": 0.0516, "lr": "4.999e-05", "step": 524, "steps": "23.53s,524/33195" }, { "epoch": 0.07907817442385902, "eta": "217:04:26", "grad_norm": 0.0087, "loss": 0.0508, "lr": "4.999e-05", "step": 525, "steps": "23.92s,525/33195" }, { "epoch": 0.0792287995179997, "eta": "217:42:09", "grad_norm": 0.0098, "loss": 0.0678, "lr": "4.999e-05", "step": 526, "steps": "23.99s,526/33195" }, { "epoch": 0.07937942461214038, "eta": "214:20:18", "grad_norm": 0.0089, "loss": 0.0413, "lr": "4.999e-05", "step": 527, "steps": "23.62s,527/33195" }, { "epoch": 0.07953004970628107, "eta": "213:25:27", "grad_norm": 0.008, "loss": 0.0428, "lr": "4.999e-05", "step": 528, "steps": "23.52s,528/33195" }, { "epoch": 0.07968067480042175, "eta": "214:41:17", "grad_norm": 0.0168, "loss": 0.0651, "lr": "4.999e-05", "step": 529, "steps": "23.66s,529/33195" }, { "epoch": 0.07983129989456243, "eta": "217:07:53", "grad_norm": 0.0106, "loss": 0.0741, "lr": "4.999e-05", "step": 530, "steps": "23.93s,530/33195" }, { "epoch": 0.07998192498870312, "eta": "217:40:09", "grad_norm": 0.0112, "loss": 0.0479, "lr": "4.999e-05", "step": 531, "steps": "23.99s,531/33195" }, { "epoch": 0.0801325500828438, "eta": "213:13:00", "grad_norm": 0.0085, "loss": 0.0464, "lr": "4.999e-05", "step": 532, "steps": "23.5s,532/33195" }, { "epoch": 0.08028317517698448, "eta": "210:40:11", "grad_norm": 0.0094, "loss": 0.0495, "lr": "4.999e-05", "step": 533, "steps": "23.22s,533/33195" }, { "epoch": 0.08043380027112516, "eta": "215:01:05", "grad_norm": 0.0103, "loss": 0.0478, "lr": "4.999e-05", "step": 534, "steps": "23.7s,534/33195" }, { "epoch": 0.08058442536526585, "eta": "214:49:48", "grad_norm": 0.0107, "loss": 0.058, "lr": "4.999e-05", "step": 535, "steps": "23.68s,535/33195" }, { "epoch": 0.08073505045940654, "eta": "216:54:36", "grad_norm": 0.0096, "loss": 0.094, "lr": "4.999e-05", "step": 536, "steps": "23.91s,536/33195" }, { "epoch": 0.08088567555354723, "eta": "217:43:12", "grad_norm": 0.0125, "loss": 0.0599, "lr": "4.999e-05", "step": 537, "steps": "24.0s,537/33195" }, { "epoch": 0.08103630064768791, "eta": "216:48:22", "grad_norm": 0.0142, "loss": 0.0457, "lr": "4.999e-05", "step": 538, "steps": "23.9s,538/33195" }, { "epoch": 0.08118692574182859, "eta": "216:31:38", "grad_norm": 0.0104, "loss": 0.0785, "lr": "4.999e-05", "step": 539, "steps": "23.87s,539/33195" }, { "epoch": 0.08133755083596927, "eta": "214:36:57", "grad_norm": 0.0194, "loss": 0.0449, "lr": "4.999e-05", "step": 540, "steps": "23.66s,540/33195" }, { "epoch": 0.08148817593010996, "eta": "217:19:49", "grad_norm": 0.0099, "loss": 0.052, "lr": "4.999e-05", "step": 541, "steps": "23.96s,541/33195" }, { "epoch": 0.08163880102425064, "eta": "214:41:36", "grad_norm": 0.0103, "loss": 0.0512, "lr": "4.999e-05", "step": 542, "steps": "23.67s,542/33195" }, { "epoch": 0.08178942611839132, "eta": "218:02:34", "grad_norm": 0.0134, "loss": 0.0384, "lr": "4.999e-05", "step": 543, "steps": "24.04s,543/33195" }, { "epoch": 0.081940051212532, "eta": "214:24:29", "grad_norm": 0.0083, "loss": 0.0826, "lr": "4.999e-05", "step": 544, "steps": "23.64s,544/33195" }, { "epoch": 0.08209067630667269, "eta": "218:56:11", "grad_norm": 0.0114, "loss": 0.0677, "lr": "4.999e-05", "step": 545, "steps": "24.14s,545/33195" }, { "epoch": 0.08224130140081337, "eta": "212:40:19", "grad_norm": 0.009, "loss": 0.066, "lr": "4.999e-05", "step": 546, "steps": "23.45s,546/33195" }, { "epoch": 0.08239192649495405, "eta": "216:39:20", "grad_norm": 0.0084, "loss": 0.0463, "lr": "4.999e-05", "step": 547, "steps": "23.89s,547/33195" }, { "epoch": 0.08254255158909474, "eta": "216:33:30", "grad_norm": 0.0068, "loss": 0.0799, "lr": "4.999e-05", "step": 548, "steps": "23.88s,548/33195" }, { "epoch": 0.08269317668323543, "eta": "214:44:17", "grad_norm": 0.0092, "loss": 0.0555, "lr": "4.999e-05", "step": 549, "steps": "23.68s,549/33195" }, { "epoch": 0.08284380177737612, "eta": "213:49:29", "grad_norm": 0.0128, "loss": 0.0649, "lr": "4.999e-05", "step": 550, "steps": "23.58s,550/33195" }, { "epoch": 0.0829944268715168, "eta": "217:21:16", "grad_norm": 0.0093, "loss": 0.0616, "lr": "4.999e-05", "step": 551, "steps": "23.97s,551/33195" }, { "epoch": 0.08314505196565748, "eta": "217:48:04", "grad_norm": 0.0129, "loss": 0.0425, "lr": "4.999e-05", "step": 552, "steps": "24.02s,552/33195" }, { "epoch": 0.08329567705979816, "eta": "214:26:23", "grad_norm": 0.0093, "loss": 0.0434, "lr": "4.999e-05", "step": 553, "steps": "23.65s,553/33195" }, { "epoch": 0.08344630215393885, "eta": "213:58:47", "grad_norm": 0.011, "loss": 0.0656, "lr": "4.999e-05", "step": 554, "steps": "23.6s,554/33195" }, { "epoch": 0.08359692724807953, "eta": "217:41:26", "grad_norm": 0.0095, "loss": 0.0451, "lr": "4.999e-05", "step": 555, "steps": "24.01s,555/33195" }, { "epoch": 0.08374755234222021, "eta": "213:14:29", "grad_norm": 0.0077, "loss": 0.0476, "lr": "4.999e-05", "step": 556, "steps": "23.52s,556/33195" }, { "epoch": 0.0838981774363609, "eta": "217:08:00", "grad_norm": 0.01, "loss": 0.0558, "lr": "4.999e-05", "step": 557, "steps": "23.95s,557/33195" }, { "epoch": 0.08404880253050158, "eta": "213:19:08", "grad_norm": 0.0115, "loss": 0.0506, "lr": "4.999e-05", "step": 558, "steps": "23.53s,558/33195" }, { "epoch": 0.08419942762464226, "eta": "215:45:36", "grad_norm": 0.0082, "loss": 0.0521, "lr": "4.999e-05", "step": 559, "steps": "23.8s,559/33195" }, { "epoch": 0.08435005271878294, "eta": "215:23:27", "grad_norm": 0.0125, "loss": 0.0474, "lr": "4.999e-05", "step": 560, "steps": "23.76s,560/33195" }, { "epoch": 0.08450067781292363, "eta": "214:01:28", "grad_norm": 0.0089, "loss": 0.0339, "lr": "4.999e-05", "step": 561, "steps": "23.61s,561/33195" }, { "epoch": 0.08465130290706431, "eta": "214:17:24", "grad_norm": 0.0154, "loss": 0.0585, "lr": "4.999e-05", "step": 562, "steps": "23.64s,562/33195" }, { "epoch": 0.08480192800120501, "eta": "213:38:56", "grad_norm": 0.0075, "loss": 0.0382, "lr": "4.999e-05", "step": 563, "steps": "23.57s,563/33195" }, { "epoch": 0.08495255309534569, "eta": "215:27:18", "grad_norm": 0.0132, "loss": 0.0738, "lr": "4.999e-05", "step": 564, "steps": "23.77s,564/33195" }, { "epoch": 0.08510317818948637, "eta": "214:43:24", "grad_norm": 0.0117, "loss": 0.0509, "lr": "4.999e-05", "step": 565, "steps": "23.69s,565/33195" }, { "epoch": 0.08525380328362706, "eta": "213:32:19", "grad_norm": 0.0127, "loss": 0.0391, "lr": "4.998e-05", "step": 566, "steps": "23.56s,566/33195" }, { "epoch": 0.08540442837776774, "eta": "213:04:44", "grad_norm": 0.0089, "loss": 0.047, "lr": "4.998e-05", "step": 567, "steps": "23.51s,567/33195" }, { "epoch": 0.08555505347190842, "eta": "214:53:06", "grad_norm": 0.0078, "loss": 0.0527, "lr": "4.998e-05", "step": 568, "steps": "23.71s,568/33195" }, { "epoch": 0.0857056785660491, "eta": "216:41:27", "grad_norm": 0.0079, "loss": 0.0551, "lr": "4.998e-05", "step": 569, "steps": "23.91s,569/33195" }, { "epoch": 0.08585630366018979, "eta": "216:41:03", "grad_norm": 0.0114, "loss": 0.0409, "lr": "4.998e-05", "step": 570, "steps": "23.91s,570/33195" }, { "epoch": 0.08600692875433047, "eta": "213:19:28", "grad_norm": 0.0107, "loss": 0.0596, "lr": "4.998e-05", "step": 571, "steps": "23.54s,571/33195" }, { "epoch": 0.08615755384847115, "eta": "213:24:31", "grad_norm": 0.0111, "loss": 0.0715, "lr": "4.998e-05", "step": 572, "steps": "23.55s,572/33195" }, { "epoch": 0.08630817894261184, "eta": "213:18:41", "grad_norm": 0.0087, "loss": 0.0688, "lr": "4.998e-05", "step": 573, "steps": "23.54s,573/33195" }, { "epoch": 0.08645880403675252, "eta": "212:45:41", "grad_norm": 0.0084, "loss": 0.0568, "lr": "4.998e-05", "step": 574, "steps": "23.48s,574/33195" }, { "epoch": 0.0866094291308932, "eta": "216:01:00", "grad_norm": 0.0093, "loss": 0.065, "lr": "4.998e-05", "step": 575, "steps": "23.84s,575/33195" }, { "epoch": 0.0867600542250339, "eta": "217:00:25", "grad_norm": 0.0117, "loss": 0.0361, "lr": "4.998e-05", "step": 576, "steps": "23.95s,576/33195" }, { "epoch": 0.08691067931917458, "eta": "214:33:14", "grad_norm": 0.0103, "loss": 0.0581, "lr": "4.998e-05", "step": 577, "steps": "23.68s,577/33195" }, { "epoch": 0.08706130441331526, "eta": "218:32:02", "grad_norm": 0.0087, "loss": 0.0445, "lr": "4.998e-05", "step": 578, "steps": "24.12s,578/33195" }, { "epoch": 0.08721192950745595, "eta": "214:48:45", "grad_norm": 0.0096, "loss": 0.0844, "lr": "4.998e-05", "step": 579, "steps": "23.71s,579/33195" }, { "epoch": 0.08736255460159663, "eta": "216:58:49", "grad_norm": 0.0203, "loss": 0.0394, "lr": "4.998e-05", "step": 580, "steps": "23.95s,580/33195" }, { "epoch": 0.08751317969573731, "eta": "216:58:25", "grad_norm": 0.0107, "loss": 0.0549, "lr": "4.998e-05", "step": 581, "steps": "23.95s,581/33195" }, { "epoch": 0.087663804789878, "eta": "217:41:30", "grad_norm": 0.0097, "loss": 0.0437, "lr": "4.998e-05", "step": 582, "steps": "24.03s,582/33195" }, { "epoch": 0.08781442988401868, "eta": "216:52:11", "grad_norm": 0.0084, "loss": 0.0472, "lr": "4.998e-05", "step": 583, "steps": "23.94s,583/33195" }, { "epoch": 0.08796505497815936, "eta": "214:57:39", "grad_norm": 0.0092, "loss": 0.0629, "lr": "4.998e-05", "step": 584, "steps": "23.73s,584/33195" }, { "epoch": 0.08811568007230004, "eta": "218:45:31", "grad_norm": 0.0083, "loss": 0.0572, "lr": "4.998e-05", "step": 585, "steps": "24.15s,585/33195" }, { "epoch": 0.08826630516644073, "eta": "217:07:17", "grad_norm": 0.0107, "loss": 0.0419, "lr": "4.998e-05", "step": 586, "steps": "23.97s,586/33195" }, { "epoch": 0.08841693026058141, "eta": "216:56:01", "grad_norm": 0.0128, "loss": 0.0488, "lr": "4.998e-05", "step": 587, "steps": "23.95s,587/33195" }, { "epoch": 0.08856755535472209, "eta": "217:55:24", "grad_norm": 0.0101, "loss": 0.0347, "lr": "4.998e-05", "step": 588, "steps": "24.06s,588/33195" }, { "epoch": 0.08871818044886277, "eta": "217:11:31", "grad_norm": 0.0129, "loss": 0.0477, "lr": "4.998e-05", "step": 589, "steps": "23.98s,589/33195" }, { "epoch": 0.08886880554300347, "eta": "214:06:22", "grad_norm": 0.0117, "loss": 0.0543, "lr": "4.998e-05", "step": 590, "steps": "23.64s,590/33195" }, { "epoch": 0.08901943063714415, "eta": "217:37:54", "grad_norm": 0.0081, "loss": 0.0746, "lr": "4.998e-05", "step": 591, "steps": "24.03s,591/33195" }, { "epoch": 0.08917005573128484, "eta": "213:27:32", "grad_norm": 0.0095, "loss": 0.076, "lr": "4.998e-05", "step": 592, "steps": "23.57s,592/33195" }, { "epoch": 0.08932068082542552, "eta": "213:16:17", "grad_norm": 0.0101, "loss": 0.0489, "lr": "4.998e-05", "step": 593, "steps": "23.55s,593/33195" }, { "epoch": 0.0894713059195662, "eta": "216:47:47", "grad_norm": 0.0097, "loss": 0.0432, "lr": "4.998e-05", "step": 594, "steps": "23.94s,594/33195" }, { "epoch": 0.08962193101370688, "eta": "216:14:48", "grad_norm": 0.0088, "loss": 0.0419, "lr": "4.998e-05", "step": 595, "steps": "23.88s,595/33195" }, { "epoch": 0.08977255610784757, "eta": "215:41:48", "grad_norm": 0.009, "loss": 0.0681, "lr": "4.998e-05", "step": 596, "steps": "23.82s,596/33195" }, { "epoch": 0.08992318120198825, "eta": "216:46:36", "grad_norm": 0.0095, "loss": 0.054, "lr": "4.998e-05", "step": 597, "steps": "23.94s,597/33195" }, { "epoch": 0.09007380629612893, "eta": "216:29:54", "grad_norm": 0.0076, "loss": 0.081, "lr": "4.998e-05", "step": 598, "steps": "23.91s,598/33195" }, { "epoch": 0.09022443139026962, "eta": "215:46:02", "grad_norm": 0.0116, "loss": 0.0422, "lr": "4.998e-05", "step": 599, "steps": "23.83s,599/33195" }, { "epoch": 0.0903750564844103, "eta": "217:18:00", "grad_norm": 0.0139, "loss": 0.0622, "lr": "4.998e-05", "step": 600, "steps": "24.0s,600/33195" }, { "epoch": 0.09052568157855098, "eta": "434:29:46", "grad_norm": 0.0114, "loss": 0.0446, "lr": "4.998e-05", "step": 601, "steps": "47.99s,601/33195" }, { "epoch": 0.09067630667269166, "eta": "214:28:48", "grad_norm": 0.0074, "loss": 0.0304, "lr": "4.998e-05", "step": 602, "steps": "23.69s,602/33195" }, { "epoch": 0.09082693176683236, "eta": "214:01:14", "grad_norm": 0.0083, "loss": 0.0648, "lr": "4.998e-05", "step": 603, "steps": "23.64s,603/33195" }, { "epoch": 0.09097755686097304, "eta": "215:11:28", "grad_norm": 0.0065, "loss": 0.0441, "lr": "4.998e-05", "step": 604, "steps": "23.77s,604/33195" }, { "epoch": 0.09112818195511373, "eta": "212:55:16", "grad_norm": 0.0092, "loss": 0.0664, "lr": "4.998e-05", "step": 605, "steps": "23.52s,605/33195" }, { "epoch": 0.09127880704925441, "eta": "217:31:53", "grad_norm": 0.0095, "loss": 0.0559, "lr": "4.998e-05", "step": 606, "steps": "24.03s,606/33195" }, { "epoch": 0.09142943214339509, "eta": "217:53:13", "grad_norm": 0.0107, "loss": 0.0592, "lr": "4.998e-05", "step": 607, "steps": "24.07s,607/33195" }, { "epoch": 0.09158005723753578, "eta": "213:21:15", "grad_norm": 0.0092, "loss": 0.0518, "lr": "4.998e-05", "step": 608, "steps": "23.57s,608/33195" }, { "epoch": 0.09173068233167646, "eta": "212:37:25", "grad_norm": 0.0087, "loss": 0.0393, "lr": "4.998e-05", "step": 609, "steps": "23.49s,609/33195" }, { "epoch": 0.09188130742581714, "eta": "213:09:36", "grad_norm": 0.0081, "loss": 0.0798, "lr": "4.998e-05", "step": 610, "steps": "23.55s,610/33195" }, { "epoch": 0.09203193251995782, "eta": "213:30:56", "grad_norm": 0.0093, "loss": 0.0364, "lr": "4.998e-05", "step": 611, "steps": "23.59s,611/33195" }, { "epoch": 0.0921825576140985, "eta": "217:18:37", "grad_norm": 0.0075, "loss": 0.0509, "lr": "4.998e-05", "step": 612, "steps": "24.01s,612/33195" }, { "epoch": 0.09233318270823919, "eta": "213:51:52", "grad_norm": 0.0093, "loss": 0.0349, "lr": "4.998e-05", "step": 613, "steps": "23.63s,613/33195" }, { "epoch": 0.09248380780237987, "eta": "212:13:44", "grad_norm": 0.0078, "loss": 0.0612, "lr": "4.998e-05", "step": 614, "steps": "23.45s,614/33195" }, { "epoch": 0.09263443289652055, "eta": "216:39:25", "grad_norm": 0.009, "loss": 0.0506, "lr": "4.998e-05", "step": 615, "steps": "23.94s,615/33195" }, { "epoch": 0.09278505799066124, "eta": "217:11:36", "grad_norm": 0.0087, "loss": 0.0357, "lr": "4.998e-05", "step": 616, "steps": "24.0s,616/33195" }, { "epoch": 0.09293568308480193, "eta": "218:10:55", "grad_norm": 0.0087, "loss": 0.0732, "lr": "4.998e-05", "step": 617, "steps": "24.11s,617/33195" }, { "epoch": 0.09308630817894262, "eta": "216:11:04", "grad_norm": 0.009, "loss": 0.049, "lr": "4.998e-05", "step": 618, "steps": "23.89s,618/33195" }, { "epoch": 0.0932369332730833, "eta": "215:00:05", "grad_norm": 0.0088, "loss": 0.0646, "lr": "4.998e-05", "step": 619, "steps": "23.76s,619/33195" }, { "epoch": 0.09338755836722398, "eta": "214:59:42", "grad_norm": 0.0076, "loss": 0.0701, "lr": "4.998e-05", "step": 620, "steps": "23.76s,620/33195" }, { "epoch": 0.09353818346136467, "eta": "216:47:53", "grad_norm": 0.012, "loss": 0.0815, "lr": "4.998e-05", "step": 621, "steps": "23.96s,621/33195" }, { "epoch": 0.09368880855550535, "eta": "216:20:20", "grad_norm": 0.0131, "loss": 0.0725, "lr": "4.998e-05", "step": 622, "steps": "23.91s,622/33195" }, { "epoch": 0.09383943364964603, "eta": "215:09:22", "grad_norm": 0.0089, "loss": 0.0631, "lr": "4.998e-05", "step": 623, "steps": "23.78s,623/33195" }, { "epoch": 0.09399005874378671, "eta": "214:30:58", "grad_norm": 0.0086, "loss": 0.0958, "lr": "4.998e-05", "step": 624, "steps": "23.71s,624/33195" }, { "epoch": 0.0941406838379274, "eta": "215:57:25", "grad_norm": 0.0085, "loss": 0.0677, "lr": "4.998e-05", "step": 625, "steps": "23.87s,625/33195" }, { "epoch": 0.09429130893206808, "eta": "216:29:36", "grad_norm": 0.0094, "loss": 0.0734, "lr": "4.998e-05", "step": 626, "steps": "23.93s,626/33195" }, { "epoch": 0.09444193402620876, "eta": "213:02:56", "grad_norm": 0.007, "loss": 0.0446, "lr": "4.998e-05", "step": 627, "steps": "23.55s,627/33195" }, { "epoch": 0.09459255912034945, "eta": "212:29:58", "grad_norm": 0.0093, "loss": 0.0678, "lr": "4.998e-05", "step": 628, "steps": "23.49s,628/33195" }, { "epoch": 0.09474318421449013, "eta": "212:02:27", "grad_norm": 0.0109, "loss": 0.0495, "lr": "4.998e-05", "step": 629, "steps": "23.44s,629/33195" }, { "epoch": 0.09489380930863081, "eta": "216:38:51", "grad_norm": 0.0092, "loss": 0.061, "lr": "4.998e-05", "step": 630, "steps": "23.95s,630/33195" }, { "epoch": 0.09504443440277151, "eta": "216:05:53", "grad_norm": 0.0081, "loss": 0.0529, "lr": "4.998e-05", "step": 631, "steps": "23.89s,631/33195" }, { "epoch": 0.09519505949691219, "eta": "216:48:55", "grad_norm": 0.0112, "loss": 0.0533, "lr": "4.998e-05", "step": 632, "steps": "23.97s,632/33195" }, { "epoch": 0.09534568459105287, "eta": "216:05:06", "grad_norm": 0.0084, "loss": 0.0696, "lr": "4.998e-05", "step": 633, "steps": "23.89s,633/33195" }, { "epoch": 0.09549630968519356, "eta": "216:58:58", "grad_norm": 0.009, "loss": 0.0752, "lr": "4.998e-05", "step": 634, "steps": "23.99s,634/33195" }, { "epoch": 0.09564693477933424, "eta": "212:10:57", "grad_norm": 0.0094, "loss": 0.0383, "lr": "4.998e-05", "step": 635, "steps": "23.46s,635/33195" }, { "epoch": 0.09579755987347492, "eta": "211:27:09", "grad_norm": 0.0078, "loss": 0.0996, "lr": "4.998e-05", "step": 636, "steps": "23.38s,636/33195" }, { "epoch": 0.0959481849676156, "eta": "216:03:30", "grad_norm": 0.0069, "loss": 0.0527, "lr": "4.998e-05", "step": 637, "steps": "23.89s,637/33195" }, { "epoch": 0.09609881006175629, "eta": "212:09:47", "grad_norm": 0.0096, "loss": 0.0711, "lr": "4.998e-05", "step": 638, "steps": "23.46s,638/33195" }, { "epoch": 0.09624943515589697, "eta": "211:31:24", "grad_norm": 0.0114, "loss": 0.053, "lr": "4.998e-05", "step": 639, "steps": "23.39s,639/33195" }, { "epoch": 0.09640006025003765, "eta": "217:29:07", "grad_norm": 0.0091, "loss": 0.0408, "lr": "4.998e-05", "step": 640, "steps": "24.05s,640/33195" }, { "epoch": 0.09655068534417834, "eta": "212:08:36", "grad_norm": 0.0103, "loss": 0.0562, "lr": "4.998e-05", "step": 641, "steps": "23.46s,641/33195" }, { "epoch": 0.09670131043831902, "eta": "213:02:28", "grad_norm": 0.0088, "loss": 0.0632, "lr": "4.998e-05", "step": 642, "steps": "23.56s,642/33195" }, { "epoch": 0.0968519355324597, "eta": "216:28:14", "grad_norm": 0.0204, "loss": 0.0592, "lr": "4.998e-05", "step": 643, "steps": "23.94s,643/33195" }, { "epoch": 0.0970025606266004, "eta": "212:23:42", "grad_norm": 0.0101, "loss": 0.0875, "lr": "4.998e-05", "step": 644, "steps": "23.49s,644/33195" }, { "epoch": 0.09715318572074108, "eta": "215:27:46", "grad_norm": 0.0081, "loss": 0.0394, "lr": "4.998e-05", "step": 645, "steps": "23.83s,645/33195" }, { "epoch": 0.09730381081488176, "eta": "215:49:04", "grad_norm": 0.0068, "loss": 0.0449, "lr": "4.998e-05", "step": 646, "steps": "23.87s,646/33195" }, { "epoch": 0.09745443590902245, "eta": "212:44:14", "grad_norm": 0.0104, "loss": 0.0753, "lr": "4.998e-05", "step": 647, "steps": "23.53s,647/33195" }, { "epoch": 0.09760506100316313, "eta": "215:48:16", "grad_norm": 0.0093, "loss": 0.0496, "lr": "4.998e-05", "step": 648, "steps": "23.87s,648/33195" }, { "epoch": 0.09775568609730381, "eta": "215:26:11", "grad_norm": 0.0095, "loss": 0.0483, "lr": "4.998e-05", "step": 649, "steps": "23.83s,649/33195" }, { "epoch": 0.0979063111914445, "eta": "213:15:36", "grad_norm": 0.0099, "loss": 0.075, "lr": "4.998e-05", "step": 650, "steps": "23.59s,650/33195" }, { "epoch": 0.09805693628558518, "eta": "216:14:12", "grad_norm": 0.0093, "loss": 0.0463, "lr": "4.998e-05", "step": 651, "steps": "23.92s,651/33195" }, { "epoch": 0.09820756137972586, "eta": "215:14:08", "grad_norm": 0.0059, "loss": 0.0582, "lr": "4.998e-05", "step": 652, "steps": "23.81s,652/33195" }, { "epoch": 0.09835818647386654, "eta": "212:14:46", "grad_norm": 0.0081, "loss": 0.0625, "lr": "4.998e-05", "step": 653, "steps": "23.48s,653/33195" }, { "epoch": 0.09850881156800723, "eta": "213:03:11", "grad_norm": 0.0102, "loss": 0.0711, "lr": "4.998e-05", "step": 654, "steps": "23.57s,654/33195" }, { "epoch": 0.09865943666214791, "eta": "215:45:29", "grad_norm": 0.0092, "loss": 0.0598, "lr": "4.998e-05", "step": 655, "steps": "23.87s,655/33195" }, { "epoch": 0.09881006175628859, "eta": "216:01:22", "grad_norm": 0.0135, "loss": 0.0472, "lr": "4.998e-05", "step": 656, "steps": "23.9s,656/33195" }, { "epoch": 0.09896068685042927, "eta": "212:18:37", "grad_norm": 0.0086, "loss": 0.0597, "lr": "4.998e-05", "step": 657, "steps": "23.49s,657/33195" }, { "epoch": 0.09911131194456997, "eta": "212:45:20", "grad_norm": 0.0066, "loss": 0.0536, "lr": "4.998e-05", "step": 658, "steps": "23.54s,658/33195" }, { "epoch": 0.09926193703871065, "eta": "216:16:26", "grad_norm": 0.0094, "loss": 0.0653, "lr": "4.998e-05", "step": 659, "steps": "23.93s,659/33195" }, { "epoch": 0.09941256213285134, "eta": "212:39:08", "grad_norm": 0.0089, "loss": 0.0799, "lr": "4.998e-05", "step": 660, "steps": "23.53s,660/33195" }, { "epoch": 0.09956318722699202, "eta": "215:15:59", "grad_norm": 0.007, "loss": 0.0728, "lr": "4.998e-05", "step": 661, "steps": "23.82s,661/33195" }, { "epoch": 0.0997138123211327, "eta": "212:38:21", "grad_norm": 0.0082, "loss": 0.0463, "lr": "4.998e-05", "step": 662, "steps": "23.53s,662/33195" }, { "epoch": 0.09986443741527339, "eta": "215:58:34", "grad_norm": 0.0079, "loss": 0.0509, "lr": "4.998e-05", "step": 663, "steps": "23.9s,663/33195" }, { "epoch": 0.10001506250941407, "eta": "212:21:18", "grad_norm": 0.0112, "loss": 0.0567, "lr": "4.998e-05", "step": 664, "steps": "23.5s,664/33195" }, { "epoch": 0.10016568760355475, "eta": "211:59:13", "grad_norm": 0.0086, "loss": 0.0711, "lr": "4.998e-05", "step": 665, "steps": "23.46s,665/33195" }, { "epoch": 0.10031631269769543, "eta": "216:02:48", "grad_norm": 0.0083, "loss": 0.0705, "lr": "4.998e-05", "step": 666, "steps": "23.91s,666/33195" }, { "epoch": 0.10046693779183612, "eta": "215:51:33", "grad_norm": 0.0085, "loss": 0.0538, "lr": "4.998e-05", "step": 667, "steps": "23.89s,667/33195" }, { "epoch": 0.1006175628859768, "eta": "212:30:35", "grad_norm": 0.0074, "loss": 0.0349, "lr": "4.998e-05", "step": 668, "steps": "23.52s,668/33195" }, { "epoch": 0.10076818798011748, "eta": "215:45:20", "grad_norm": 0.0132, "loss": 0.0489, "lr": "4.998e-05", "step": 669, "steps": "23.88s,669/33195" }, { "epoch": 0.10091881307425817, "eta": "216:06:38", "grad_norm": 0.0109, "loss": 0.0409, "lr": "4.998e-05", "step": 670, "steps": "23.92s,670/33195" }, { "epoch": 0.10106943816839886, "eta": "212:18:34", "grad_norm": 0.007, "loss": 0.0441, "lr": "4.998e-05", "step": 671, "steps": "23.5s,671/33195" }, { "epoch": 0.10122006326253954, "eta": "212:29:00", "grad_norm": 0.0084, "loss": 0.0524, "lr": "4.997e-05", "step": 672, "steps": "23.52s,672/33195" }, { "epoch": 0.10137068835668023, "eta": "212:34:02", "grad_norm": 0.0091, "loss": 0.0423, "lr": "4.997e-05", "step": 673, "steps": "23.53s,673/33195" }, { "epoch": 0.10152131345082091, "eta": "212:49:54", "grad_norm": 0.0082, "loss": 0.0731, "lr": "4.997e-05", "step": 674, "steps": "23.56s,674/33195" }, { "epoch": 0.1016719385449616, "eta": "216:04:38", "grad_norm": 0.0078, "loss": 0.0607, "lr": "4.997e-05", "step": 675, "steps": "23.92s,675/33195" }, { "epoch": 0.10182256363910228, "eta": "216:15:04", "grad_norm": 0.0096, "loss": 0.0731, "lr": "4.997e-05", "step": 676, "steps": "23.94s,676/33195" }, { "epoch": 0.10197318873324296, "eta": "216:03:50", "grad_norm": 0.012, "loss": 0.0536, "lr": "4.997e-05", "step": 677, "steps": "23.92s,677/33195" }, { "epoch": 0.10212381382738364, "eta": "215:52:36", "grad_norm": 0.0085, "loss": 0.0426, "lr": "4.997e-05", "step": 678, "steps": "23.9s,678/33195" }, { "epoch": 0.10227443892152432, "eta": "212:20:51", "grad_norm": 0.0088, "loss": 0.052, "lr": "4.997e-05", "step": 679, "steps": "23.51s,679/33195" }, { "epoch": 0.10242506401566501, "eta": "212:25:52", "grad_norm": 0.0098, "loss": 0.0886, "lr": "4.997e-05", "step": 680, "steps": "23.52s,680/33195" }, { "epoch": 0.10257568910980569, "eta": "212:20:04", "grad_norm": 0.0098, "loss": 0.0507, "lr": "4.997e-05", "step": 681, "steps": "23.51s,681/33195" }, { "epoch": 0.10272631420394637, "eta": "213:08:26", "grad_norm": 0.0118, "loss": 0.0508, "lr": "4.997e-05", "step": 682, "steps": "23.6s,682/33195" }, { "epoch": 0.10287693929808706, "eta": "212:08:26", "grad_norm": 0.0114, "loss": 0.0935, "lr": "4.997e-05", "step": 683, "steps": "23.49s,683/33195" }, { "epoch": 0.10302756439222774, "eta": "215:06:52", "grad_norm": 0.0103, "loss": 0.0719, "lr": "4.997e-05", "step": 684, "steps": "23.82s,684/33195" }, { "epoch": 0.10317818948636844, "eta": "216:38:34", "grad_norm": 0.0073, "loss": 0.0738, "lr": "4.997e-05", "step": 685, "steps": "23.99s,685/33195" }, { "epoch": 0.10332881458050912, "eta": "214:28:08", "grad_norm": 0.0087, "loss": 0.0459, "lr": "4.997e-05", "step": 686, "steps": "23.75s,686/33195" }, { "epoch": 0.1034794396746498, "eta": "212:01:27", "grad_norm": 0.0069, "loss": 0.0729, "lr": "4.997e-05", "step": 687, "steps": "23.48s,687/33195" }, { "epoch": 0.10363006476879048, "eta": "213:22:20", "grad_norm": 0.0089, "loss": 0.065, "lr": "4.997e-05", "step": 688, "steps": "23.63s,688/33195" }, { "epoch": 0.10378068986293117, "eta": "216:09:53", "grad_norm": 0.0071, "loss": 0.0417, "lr": "4.997e-05", "step": 689, "steps": "23.94s,689/33195" }, { "epoch": 0.10393131495707185, "eta": "215:47:49", "grad_norm": 0.0112, "loss": 0.0397, "lr": "4.997e-05", "step": 690, "steps": "23.9s,690/33195" }, { "epoch": 0.10408194005121253, "eta": "215:42:00", "grad_norm": 0.0079, "loss": 0.0454, "lr": "4.997e-05", "step": 691, "steps": "23.89s,691/33195" }, { "epoch": 0.10423256514535321, "eta": "211:48:40", "grad_norm": 0.0069, "loss": 0.04, "lr": "4.997e-05", "step": 692, "steps": "23.46s,692/33195" }, { "epoch": 0.1043831902394939, "eta": "212:58:42", "grad_norm": 0.0086, "loss": 0.0373, "lr": "4.997e-05", "step": 693, "steps": "23.59s,693/33195" }, { "epoch": 0.10453381533363458, "eta": "212:25:48", "grad_norm": 0.0081, "loss": 0.0265, "lr": "4.997e-05", "step": 694, "steps": "23.53s,694/33195" }, { "epoch": 0.10468444042777526, "eta": "212:36:15", "grad_norm": 0.009, "loss": 0.0575, "lr": "4.997e-05", "step": 695, "steps": "23.55s,695/33195" }, { "epoch": 0.10483506552191595, "eta": "212:25:01", "grad_norm": 0.0072, "loss": 0.0632, "lr": "4.997e-05", "step": 696, "steps": "23.53s,696/33195" }, { "epoch": 0.10498569061605663, "eta": "212:19:12", "grad_norm": 0.0076, "loss": 0.0531, "lr": "4.997e-05", "step": 697, "steps": "23.52s,697/33195" }, { "epoch": 0.10513631571019733, "eta": "211:30:04", "grad_norm": 0.0082, "loss": 0.0372, "lr": "4.997e-05", "step": 698, "steps": "23.43s,698/33195" }, { "epoch": 0.10528694080433801, "eta": "212:50:55", "grad_norm": 0.0111, "loss": 0.031, "lr": "4.997e-05", "step": 699, "steps": "23.58s,699/33195" }, { "epoch": 0.10543756589847869, "eta": "213:06:46", "grad_norm": 0.0092, "loss": 0.0332, "lr": "4.997e-05", "step": 700, "steps": "23.61s,700/33195" }, { "epoch": 0.10558819099261937, "eta": "211:45:09", "grad_norm": 0.0084, "loss": 0.05, "lr": "4.997e-05", "step": 701, "steps": "23.46s,701/33195" }, { "epoch": 0.10573881608676006, "eta": "212:38:55", "grad_norm": 0.0087, "loss": 0.0478, "lr": "4.997e-05", "step": 702, "steps": "23.56s,702/33195" }, { "epoch": 0.10588944118090074, "eta": "213:48:55", "grad_norm": 0.0082, "loss": 0.0477, "lr": "4.997e-05", "step": 703, "steps": "23.69s,703/33195" }, { "epoch": 0.10604006627504142, "eta": "215:15:10", "grad_norm": 0.0107, "loss": 0.0558, "lr": "4.997e-05", "step": 704, "steps": "23.85s,704/33195" }, { "epoch": 0.1061906913691821, "eta": "212:43:09", "grad_norm": 0.0106, "loss": 0.0585, "lr": "4.997e-05", "step": 705, "steps": "23.57s,705/33195" }, { "epoch": 0.10634131646332279, "eta": "212:10:16", "grad_norm": 0.0101, "loss": 0.0365, "lr": "4.997e-05", "step": 706, "steps": "23.51s,706/33195" }, { "epoch": 0.10649194155746347, "eta": "211:59:03", "grad_norm": 0.0126, "loss": 0.057, "lr": "4.997e-05", "step": 707, "steps": "23.49s,707/33195" }, { "epoch": 0.10664256665160415, "eta": "215:13:34", "grad_norm": 0.009, "loss": 0.0477, "lr": "4.997e-05", "step": 708, "steps": "23.85s,708/33195" }, { "epoch": 0.10679319174574484, "eta": "214:02:47", "grad_norm": 0.0065, "loss": 0.0333, "lr": "4.997e-05", "step": 709, "steps": "23.72s,709/33195" }, { "epoch": 0.10694381683988552, "eta": "216:01:30", "grad_norm": 0.0072, "loss": 0.0394, "lr": "4.997e-05", "step": 710, "steps": "23.94s,710/33195" }, { "epoch": 0.1070944419340262, "eta": "212:19:08", "grad_norm": 0.0082, "loss": 0.049, "lr": "4.997e-05", "step": 711, "steps": "23.53s,711/33195" }, { "epoch": 0.1072450670281669, "eta": "216:54:51", "grad_norm": 0.0076, "loss": 0.0596, "lr": "4.997e-05", "step": 712, "steps": "24.04s,712/33195" }, { "epoch": 0.10739569212230758, "eta": "213:23:19", "grad_norm": 0.0071, "loss": 0.0358, "lr": "4.997e-05", "step": 713, "steps": "23.65s,713/33195" }, { "epoch": 0.10754631721644826, "eta": "212:39:37", "grad_norm": 0.0077, "loss": 0.0587, "lr": "4.997e-05", "step": 714, "steps": "23.57s,714/33195" }, { "epoch": 0.10769694231058895, "eta": "212:44:38", "grad_norm": 0.0084, "loss": 0.0796, "lr": "4.997e-05", "step": 715, "steps": "23.58s,715/33195" }, { "epoch": 0.10784756740472963, "eta": "217:47:23", "grad_norm": 0.0078, "loss": 0.0838, "lr": "4.997e-05", "step": 716, "steps": "24.14s,716/33195" }, { "epoch": 0.10799819249887031, "eta": "216:09:32", "grad_norm": 0.0081, "loss": 0.069, "lr": "4.997e-05", "step": 717, "steps": "23.96s,717/33195" }, { "epoch": 0.108148817593011, "eta": "217:19:30", "grad_norm": 0.0082, "loss": 0.0509, "lr": "4.997e-05", "step": 718, "steps": "24.09s,718/33195" }, { "epoch": 0.10829944268715168, "eta": "214:47:33", "grad_norm": 0.0081, "loss": 0.0612, "lr": "4.997e-05", "step": 719, "steps": "23.81s,719/33195" }, { "epoch": 0.10845006778129236, "eta": "215:46:42", "grad_norm": 0.0065, "loss": 0.0612, "lr": "4.997e-05", "step": 720, "steps": "23.92s,720/33195" }, { "epoch": 0.10860069287543304, "eta": "216:13:21", "grad_norm": 0.0078, "loss": 0.0633, "lr": "4.997e-05", "step": 721, "steps": "23.97s,721/33195" }, { "epoch": 0.10875131796957373, "eta": "212:41:53", "grad_norm": 0.0086, "loss": 0.076, "lr": "4.997e-05", "step": 722, "steps": "23.58s,722/33195" }, { "epoch": 0.10890194306371441, "eta": "212:25:15", "grad_norm": 0.0108, "loss": 0.0644, "lr": "4.997e-05", "step": 723, "steps": "23.55s,723/33195" }, { "epoch": 0.10905256815785509, "eta": "216:01:20", "grad_norm": 0.0092, "loss": 0.0784, "lr": "4.997e-05", "step": 724, "steps": "23.95s,724/33195" }, { "epoch": 0.10920319325199579, "eta": "216:44:14", "grad_norm": 0.0074, "loss": 0.0562, "lr": "4.997e-05", "step": 725, "steps": "24.03s,725/33195" }, { "epoch": 0.10935381834613647, "eta": "212:29:29", "grad_norm": 0.0092, "loss": 0.0517, "lr": "4.997e-05", "step": 726, "steps": "23.56s,726/33195" }, { "epoch": 0.10950444344027715, "eta": "216:16:22", "grad_norm": 0.0064, "loss": 0.0647, "lr": "4.997e-05", "step": 727, "steps": "23.98s,727/33195" }, { "epoch": 0.10965506853441784, "eta": "212:44:56", "grad_norm": 0.0076, "loss": 0.0511, "lr": "4.997e-05", "step": 728, "steps": "23.59s,728/33195" }, { "epoch": 0.10980569362855852, "eta": "216:04:45", "grad_norm": 0.0089, "loss": 0.0337, "lr": "4.997e-05", "step": 729, "steps": "23.96s,729/33195" }, { "epoch": 0.1099563187226992, "eta": "212:06:16", "grad_norm": 0.0085, "loss": 0.0517, "lr": "4.997e-05", "step": 730, "steps": "23.52s,730/33195" }, { "epoch": 0.11010694381683989, "eta": "212:43:45", "grad_norm": 0.0091, "loss": 0.0593, "lr": "4.997e-05", "step": 731, "steps": "23.59s,731/33195" }, { "epoch": 0.11025756891098057, "eta": "212:10:54", "grad_norm": 0.007, "loss": 0.0523, "lr": "4.997e-05", "step": 732, "steps": "23.53s,732/33195" }, { "epoch": 0.11040819400512125, "eta": "213:15:26", "grad_norm": 0.0082, "loss": 0.0537, "lr": "4.997e-05", "step": 733, "steps": "23.65s,733/33195" }, { "epoch": 0.11055881909926193, "eta": "212:20:56", "grad_norm": 0.0087, "loss": 0.0384, "lr": "4.997e-05", "step": 734, "steps": "23.55s,734/33195" }, { "epoch": 0.11070944419340262, "eta": "216:34:49", "grad_norm": 0.0064, "loss": 0.0559, "lr": "4.997e-05", "step": 735, "steps": "24.02s,735/33195" }, { "epoch": 0.1108600692875433, "eta": "211:58:31", "grad_norm": 0.0081, "loss": 0.0713, "lr": "4.997e-05", "step": 736, "steps": "23.51s,736/33195" }, { "epoch": 0.11101069438168398, "eta": "212:19:45", "grad_norm": 0.0069, "loss": 0.0502, "lr": "4.997e-05", "step": 737, "steps": "23.55s,737/33195" }, { "epoch": 0.11116131947582467, "eta": "212:08:33", "grad_norm": 0.0084, "loss": 0.0467, "lr": "4.997e-05", "step": 738, "steps": "23.53s,738/33195" }, { "epoch": 0.11131194456996536, "eta": "215:55:21", "grad_norm": 0.0082, "loss": 0.0422, "lr": "4.997e-05", "step": 739, "steps": "23.95s,739/33195" }, { "epoch": 0.11146256966410605, "eta": "216:00:21", "grad_norm": 0.0074, "loss": 0.058, "lr": "4.997e-05", "step": 740, "steps": "23.96s,740/33195" }, { "epoch": 0.11161319475824673, "eta": "212:45:14", "grad_norm": 0.0078, "loss": 0.068, "lr": "4.997e-05", "step": 741, "steps": "23.6s,741/33195" }, { "epoch": 0.11176381985238741, "eta": "212:28:37", "grad_norm": 0.007, "loss": 0.0662, "lr": "4.997e-05", "step": 742, "steps": "23.57s,742/33195" }, { "epoch": 0.1119144449465281, "eta": "216:15:23", "grad_norm": 0.0104, "loss": 0.037, "lr": "4.997e-05", "step": 743, "steps": "23.99s,743/33195" }, { "epoch": 0.11206507004066878, "eta": "212:22:25", "grad_norm": 0.0078, "loss": 0.0431, "lr": "4.997e-05", "step": 744, "steps": "23.56s,744/33195" }, { "epoch": 0.11221569513480946, "eta": "216:09:11", "grad_norm": 0.008, "loss": 0.0394, "lr": "4.997e-05", "step": 745, "steps": "23.98s,745/33195" }, { "epoch": 0.11236632022895014, "eta": "216:19:36", "grad_norm": 0.0083, "loss": 0.0393, "lr": "4.997e-05", "step": 746, "steps": "24.0s,746/33195" }, { "epoch": 0.11251694532309083, "eta": "215:57:34", "grad_norm": 0.0085, "loss": 0.0567, "lr": "4.997e-05", "step": 747, "steps": "23.96s,747/33195" }, { "epoch": 0.11266757041723151, "eta": "217:34:30", "grad_norm": 0.0072, "loss": 0.0436, "lr": "4.997e-05", "step": 748, "steps": "24.14s,748/33195" }, { "epoch": 0.11281819551137219, "eta": "214:08:36", "grad_norm": 0.0126, "loss": 0.0578, "lr": "4.997e-05", "step": 749, "steps": "23.76s,749/33195" }, { "epoch": 0.11296882060551287, "eta": "212:36:17", "grad_norm": 0.006, "loss": 0.0606, "lr": "4.997e-05", "step": 750, "steps": "23.59s,750/33195" }, { "epoch": 0.11311944569965356, "eta": "215:50:33", "grad_norm": 0.0088, "loss": 0.0638, "lr": "4.997e-05", "step": 751, "steps": "23.95s,751/33195" }, { "epoch": 0.11327007079379425, "eta": "212:03:03", "grad_norm": 0.0063, "loss": 0.0478, "lr": "4.997e-05", "step": 752, "steps": "23.53s,752/33195" }, { "epoch": 0.11342069588793494, "eta": "215:01:06", "grad_norm": 0.0082, "loss": 0.0587, "lr": "4.997e-05", "step": 753, "steps": "23.86s,753/33195" }, { "epoch": 0.11357132098207562, "eta": "211:13:37", "grad_norm": 0.0084, "loss": 0.06, "lr": "4.997e-05", "step": 754, "steps": "23.44s,754/33195" }, { "epoch": 0.1137219460762163, "eta": "213:44:36", "grad_norm": 0.009, "loss": 0.0469, "lr": "4.997e-05", "step": 755, "steps": "23.72s,755/33195" }, { "epoch": 0.11387257117035698, "eta": "215:37:45", "grad_norm": 0.0075, "loss": 0.0674, "lr": "4.997e-05", "step": 756, "steps": "23.93s,756/33195" }, { "epoch": 0.11402319626449767, "eta": "211:44:53", "grad_norm": 0.0085, "loss": 0.0418, "lr": "4.997e-05", "step": 757, "steps": "23.5s,757/33195" }, { "epoch": 0.11417382135863835, "eta": "211:22:52", "grad_norm": 0.009, "loss": 0.0535, "lr": "4.997e-05", "step": 758, "steps": "23.46s,758/33195" }, { "epoch": 0.11432444645277903, "eta": "211:27:52", "grad_norm": 0.0073, "loss": 0.0539, "lr": "4.996e-05", "step": 759, "steps": "23.47s,759/33195" }, { "epoch": 0.11447507154691972, "eta": "212:53:59", "grad_norm": 0.0096, "loss": 0.0472, "lr": "4.996e-05", "step": 760, "steps": "23.63s,760/33195" }, { "epoch": 0.1146256966410604, "eta": "211:48:43", "grad_norm": 0.0073, "loss": 0.0625, "lr": "4.996e-05", "step": 761, "steps": "23.51s,761/33195" }, { "epoch": 0.11477632173520108, "eta": "215:35:21", "grad_norm": 0.0084, "loss": 0.0465, "lr": "4.996e-05", "step": 762, "steps": "23.93s,762/33195" }, { "epoch": 0.11492694682934176, "eta": "211:37:07", "grad_norm": 0.0065, "loss": 0.0341, "lr": "4.996e-05", "step": 763, "steps": "23.49s,763/33195" }, { "epoch": 0.11507757192348245, "eta": "210:42:41", "grad_norm": 0.0074, "loss": 0.0571, "lr": "4.996e-05", "step": 764, "steps": "23.39s,764/33195" }, { "epoch": 0.11522819701762313, "eta": "212:35:48", "grad_norm": 0.0069, "loss": 0.052, "lr": "4.996e-05", "step": 765, "steps": "23.6s,765/33195" }, { "epoch": 0.11537882211176383, "eta": "212:35:24", "grad_norm": 0.0072, "loss": 0.0546, "lr": "4.996e-05", "step": 766, "steps": "23.6s,766/33195" }, { "epoch": 0.11552944720590451, "eta": "215:44:10", "grad_norm": 0.0068, "loss": 0.0442, "lr": "4.996e-05", "step": 767, "steps": "23.95s,767/33195" }, { "epoch": 0.11568007230004519, "eta": "214:28:06", "grad_norm": 0.0084, "loss": 0.0596, "lr": "4.996e-05", "step": 768, "steps": "23.81s,768/33195" }, { "epoch": 0.11583069739418587, "eta": "211:29:22", "grad_norm": 0.0074, "loss": 0.0482, "lr": "4.996e-05", "step": 769, "steps": "23.48s,769/33195" }, { "epoch": 0.11598132248832656, "eta": "213:38:41", "grad_norm": 0.0079, "loss": 0.0342, "lr": "4.996e-05", "step": 770, "steps": "23.72s,770/33195" }, { "epoch": 0.11613194758246724, "eta": "214:53:56", "grad_norm": 0.0086, "loss": 0.057, "lr": "4.996e-05", "step": 771, "steps": "23.86s,771/33195" }, { "epoch": 0.11628257267660792, "eta": "215:15:09", "grad_norm": 0.0081, "loss": 0.0649, "lr": "4.996e-05", "step": 772, "steps": "23.9s,772/33195" }, { "epoch": 0.1164331977707486, "eta": "213:05:04", "grad_norm": 0.0084, "loss": 0.0508, "lr": "4.996e-05", "step": 773, "steps": "23.66s,773/33195" }, { "epoch": 0.11658382286488929, "eta": "214:25:44", "grad_norm": 0.0099, "loss": 0.0587, "lr": "4.996e-05", "step": 774, "steps": "23.81s,774/33195" }, { "epoch": 0.11673444795902997, "eta": "216:13:24", "grad_norm": 0.0082, "loss": 0.0495, "lr": "4.996e-05", "step": 775, "steps": "24.01s,775/33195" }, { "epoch": 0.11688507305317065, "eta": "214:41:08", "grad_norm": 0.0074, "loss": 0.0513, "lr": "4.996e-05", "step": 776, "steps": "23.84s,776/33195" }, { "epoch": 0.11703569814731134, "eta": "213:03:29", "grad_norm": 0.0086, "loss": 0.0521, "lr": "4.996e-05", "step": 777, "steps": "23.66s,777/33195" }, { "epoch": 0.11718632324145202, "eta": "211:42:03", "grad_norm": 0.0114, "loss": 0.0399, "lr": "4.996e-05", "step": 778, "steps": "23.51s,778/33195" }, { "epoch": 0.1173369483355927, "eta": "215:06:58", "grad_norm": 0.0072, "loss": 0.0596, "lr": "4.996e-05", "step": 779, "steps": "23.89s,779/33195" }, { "epoch": 0.1174875734297334, "eta": "215:22:46", "grad_norm": 0.0095, "loss": 0.0589, "lr": "4.996e-05", "step": 780, "steps": "23.92s,780/33195" }, { "epoch": 0.11763819852387408, "eta": "211:30:04", "grad_norm": 0.0093, "loss": 0.0625, "lr": "4.996e-05", "step": 781, "steps": "23.49s,781/33195" }, { "epoch": 0.11778882361801477, "eta": "215:32:47", "grad_norm": 0.0072, "loss": 0.0596, "lr": "4.996e-05", "step": 782, "steps": "23.94s,782/33195" }, { "epoch": 0.11793944871215545, "eta": "215:32:23", "grad_norm": 0.0082, "loss": 0.0233, "lr": "4.996e-05", "step": 783, "steps": "23.94s,783/33195" }, { "epoch": 0.11809007380629613, "eta": "212:01:19", "grad_norm": 0.01, "loss": 0.0613, "lr": "4.996e-05", "step": 784, "steps": "23.55s,784/33195" }, { "epoch": 0.11824069890043681, "eta": "211:50:07", "grad_norm": 0.0065, "loss": 0.0507, "lr": "4.996e-05", "step": 785, "steps": "23.53s,785/33195" }, { "epoch": 0.1183913239945775, "eta": "215:52:47", "grad_norm": 0.0086, "loss": 0.0611, "lr": "4.996e-05", "step": 786, "steps": "23.98s,786/33195" }, { "epoch": 0.11854194908871818, "eta": "215:30:47", "grad_norm": 0.0079, "loss": 0.0442, "lr": "4.996e-05", "step": 787, "steps": "23.94s,787/33195" }, { "epoch": 0.11869257418285886, "eta": "211:32:44", "grad_norm": 0.0069, "loss": 0.066, "lr": "4.996e-05", "step": 788, "steps": "23.5s,788/33195" }, { "epoch": 0.11884319927699954, "eta": "212:53:21", "grad_norm": 0.008, "loss": 0.0521, "lr": "4.996e-05", "step": 789, "steps": "23.65s,789/33195" }, { "epoch": 0.11899382437114023, "eta": "211:21:09", "grad_norm": 0.0074, "loss": 0.0617, "lr": "4.996e-05", "step": 790, "steps": "23.48s,790/33195" }, { "epoch": 0.11914444946528091, "eta": "212:03:58", "grad_norm": 0.0142, "loss": 0.0416, "lr": "4.996e-05", "step": 791, "steps": "23.56s,791/33195" }, { "epoch": 0.1192950745594216, "eta": "215:12:35", "grad_norm": 0.0109, "loss": 0.051, "lr": "4.996e-05", "step": 792, "steps": "23.91s,792/33195" }, { "epoch": 0.11944569965356229, "eta": "211:41:35", "grad_norm": 0.0078, "loss": 0.043, "lr": "4.996e-05", "step": 793, "steps": "23.52s,793/33195" }, { "epoch": 0.11959632474770297, "eta": "211:14:11", "grad_norm": 0.0124, "loss": 0.0467, "lr": "4.996e-05", "step": 794, "steps": "23.47s,794/33195" }, { "epoch": 0.11974694984184366, "eta": "213:50:24", "grad_norm": 0.0084, "loss": 0.0582, "lr": "4.996e-05", "step": 795, "steps": "23.76s,795/33195" }, { "epoch": 0.11989757493598434, "eta": "210:14:00", "grad_norm": 0.0098, "loss": 0.0509, "lr": "4.996e-05", "step": 796, "steps": "23.36s,796/33195" }, { "epoch": 0.12004820003012502, "eta": "212:55:36", "grad_norm": 0.01, "loss": 0.0446, "lr": "4.996e-05", "step": 797, "steps": "23.66s,797/33195" }, { "epoch": 0.1201988251242657, "eta": "214:54:00", "grad_norm": 0.0101, "loss": 0.0366, "lr": "4.996e-05", "step": 798, "steps": "23.88s,798/33195" }, { "epoch": 0.12034945021840639, "eta": "211:01:26", "grad_norm": 0.0087, "loss": 0.0511, "lr": "4.996e-05", "step": 799, "steps": "23.45s,799/33195" }, { "epoch": 0.12050007531254707, "eta": "211:11:50", "grad_norm": 0.0089, "loss": 0.0603, "lr": "4.996e-05", "step": 800, "steps": "23.47s,800/33195" }, { "epoch": 0.12065070040668775, "eta": "00:00:00", "grad_norm": 0.008, "loss": 0.0424, "lr": "4.996e-05", "step": 801, "steps": "0s,801/33195" }, { "epoch": 0.12080132550082844, "eta": "211:00:15", "grad_norm": 0.0081, "loss": 0.0512, "lr": "4.996e-05", "step": 802, "steps": "23.45s,802/33195" }, { "epoch": 0.12095195059496912, "eta": "220:10:32", "grad_norm": 0.0062, "loss": 0.0573, "lr": "4.996e-05", "step": 803, "steps": "24.47s,803/33195" }, { "epoch": 0.1211025756891098, "eta": "217:11:58", "grad_norm": 0.0073, "loss": 0.044, "lr": "4.996e-05", "step": 804, "steps": "24.14s,804/33195" }, { "epoch": 0.12125320078325048, "eta": "212:41:39", "grad_norm": 0.0067, "loss": 0.0582, "lr": "4.996e-05", "step": 805, "steps": "23.64s,805/33195" }, { "epoch": 0.12140382587739117, "eta": "208:59:56", "grad_norm": 0.0087, "loss": 0.0552, "lr": "4.996e-05", "step": 806, "steps": "23.23s,806/33195" }, { "epoch": 0.12155445097153186, "eta": "213:56:26", "grad_norm": 0.0074, "loss": 0.0381, "lr": "4.996e-05", "step": 807, "steps": "23.78s,807/33195" }, { "epoch": 0.12170507606567255, "eta": "210:20:08", "grad_norm": 0.0073, "loss": 0.0492, "lr": "4.996e-05", "step": 808, "steps": "23.38s,808/33195" }, { "epoch": 0.12185570115981323, "eta": "217:15:21", "grad_norm": 0.0077, "loss": 0.0557, "lr": "4.996e-05", "step": 809, "steps": "24.15s,809/33195" }, { "epoch": 0.12200632625395391, "eta": "210:30:09", "grad_norm": 0.009, "loss": 0.0541, "lr": "4.996e-05", "step": 810, "steps": "23.4s,810/33195" }, { "epoch": 0.1221569513480946, "eta": "213:17:04", "grad_norm": 0.0076, "loss": 0.0588, "lr": "4.996e-05", "step": 811, "steps": "23.71s,811/33195" }, { "epoch": 0.12230757644223528, "eta": "213:00:29", "grad_norm": 0.0079, "loss": 0.0548, "lr": "4.996e-05", "step": 812, "steps": "23.68s,812/33195" }, { "epoch": 0.12245820153637596, "eta": "214:26:26", "grad_norm": 0.0077, "loss": 0.0633, "lr": "4.996e-05", "step": 813, "steps": "23.84s,813/33195" }, { "epoch": 0.12260882663051664, "eta": "209:29:13", "grad_norm": 0.0074, "loss": 0.0535, "lr": "4.996e-05", "step": 814, "steps": "23.29s,814/33195" }, { "epoch": 0.12275945172465733, "eta": "209:34:14", "grad_norm": 0.0092, "loss": 0.0688, "lr": "4.996e-05", "step": 815, "steps": "23.3s,815/33195" }, { "epoch": 0.12291007681879801, "eta": "209:01:27", "grad_norm": 0.008, "loss": 0.051, "lr": "4.996e-05", "step": 816, "steps": "23.24s,816/33195" }, { "epoch": 0.12306070191293869, "eta": "209:17:16", "grad_norm": 0.0095, "loss": 0.054, "lr": "4.996e-05", "step": 817, "steps": "23.27s,817/33195" }, { "epoch": 0.12321132700707937, "eta": "206:51:11", "grad_norm": 0.0084, "loss": 0.0547, "lr": "4.996e-05", "step": 818, "steps": "23.0s,818/33195" }, { "epoch": 0.12336195210122006, "eta": "212:46:56", "grad_norm": 0.0091, "loss": 0.0472, "lr": "4.996e-05", "step": 819, "steps": "23.66s,819/33195" }, { "epoch": 0.12351257719536075, "eta": "209:37:41", "grad_norm": 0.0081, "loss": 0.0543, "lr": "4.996e-05", "step": 820, "steps": "23.31s,820/33195" }, { "epoch": 0.12366320228950144, "eta": "206:33:50", "grad_norm": 0.0066, "loss": 0.0411, "lr": "4.996e-05", "step": 821, "steps": "22.97s,821/33195" }, { "epoch": 0.12381382738364212, "eta": "212:29:33", "grad_norm": 0.0073, "loss": 0.0364, "lr": "4.996e-05", "step": 822, "steps": "23.63s,822/33195" }, { "epoch": 0.1239644524777828, "eta": "206:49:16", "grad_norm": 0.0074, "loss": 0.0419, "lr": "4.996e-05", "step": 823, "steps": "23.0s,823/33195" }, { "epoch": 0.12411507757192349, "eta": "211:29:25", "grad_norm": 0.0084, "loss": 0.0783, "lr": "4.996e-05", "step": 824, "steps": "23.52s,824/33195" }, { "epoch": 0.12426570266606417, "eta": "209:51:55", "grad_norm": 0.0082, "loss": 0.034, "lr": "4.996e-05", "step": 825, "steps": "23.34s,825/33195" }, { "epoch": 0.12441632776020485, "eta": "205:32:35", "grad_norm": 0.0088, "loss": 0.0389, "lr": "4.996e-05", "step": 826, "steps": "22.86s,826/33195" }, { "epoch": 0.12456695285434553, "eta": "208:57:12", "grad_norm": 0.0103, "loss": 0.0574, "lr": "4.996e-05", "step": 827, "steps": "23.24s,827/33195" }, { "epoch": 0.12471757794848622, "eta": "213:15:45", "grad_norm": 0.0092, "loss": 0.0465, "lr": "4.996e-05", "step": 828, "steps": "23.72s,828/33195" }, { "epoch": 0.1248682030426269, "eta": "210:28:08", "grad_norm": 0.0067, "loss": 0.0508, "lr": "4.996e-05", "step": 829, "steps": "23.41s,829/33195" }, { "epoch": 0.1250188281367676, "eta": "208:34:28", "grad_norm": 0.0073, "loss": 0.0538, "lr": "4.996e-05", "step": 830, "steps": "23.2s,830/33195" }, { "epoch": 0.12516945323090828, "eta": "208:28:41", "grad_norm": 0.0116, "loss": 0.0551, "lr": "4.996e-05", "step": 831, "steps": "23.19s,831/33195" }, { "epoch": 0.12532007832504896, "eta": "209:33:01", "grad_norm": 0.0083, "loss": 0.0529, "lr": "4.996e-05", "step": 832, "steps": "23.31s,832/33195" }, { "epoch": 0.12547070341918964, "eta": "209:16:27", "grad_norm": 0.0076, "loss": 0.0594, "lr": "4.995e-05", "step": 833, "steps": "23.28s,833/33195" }, { "epoch": 0.12562132851333033, "eta": "209:21:27", "grad_norm": 0.0099, "loss": 0.0481, "lr": "4.995e-05", "step": 834, "steps": "23.29s,834/33195" }, { "epoch": 0.125771953607471, "eta": "210:58:09", "grad_norm": 0.0076, "loss": 0.0442, "lr": "4.995e-05", "step": 835, "steps": "23.47s,835/33195" }, { "epoch": 0.1259225787016117, "eta": "209:42:15", "grad_norm": 0.0082, "loss": 0.0502, "lr": "4.995e-05", "step": 836, "steps": "23.33s,836/33195" }, { "epoch": 0.12607320379575238, "eta": "208:15:34", "grad_norm": 0.008, "loss": 0.0625, "lr": "4.995e-05", "step": 837, "steps": "23.17s,837/33195" }, { "epoch": 0.12622382888989306, "eta": "209:14:30", "grad_norm": 0.0074, "loss": 0.0715, "lr": "4.995e-05", "step": 838, "steps": "23.28s,838/33195" }, { "epoch": 0.12637445398403374, "eta": "207:58:37", "grad_norm": 0.0078, "loss": 0.0679, "lr": "4.995e-05", "step": 839, "steps": "23.14s,839/33195" }, { "epoch": 0.12652507907817442, "eta": "207:36:40", "grad_norm": 0.006, "loss": 0.0592, "lr": "4.995e-05", "step": 840, "steps": "23.1s,840/33195" }, { "epoch": 0.1266757041723151, "eta": "205:43:03", "grad_norm": 0.0134, "loss": 0.027, "lr": "4.995e-05", "step": 841, "steps": "22.89s,841/33195" }, { "epoch": 0.1268263292664558, "eta": "208:13:39", "grad_norm": 0.0068, "loss": 0.0422, "lr": "4.995e-05", "step": 842, "steps": "23.17s,842/33195" }, { "epoch": 0.12697695436059647, "eta": "211:27:22", "grad_norm": 0.0103, "loss": 0.0708, "lr": "4.995e-05", "step": 843, "steps": "23.53s,843/33195" }, { "epoch": 0.12712757945473716, "eta": "209:17:34", "grad_norm": 0.0069, "loss": 0.0584, "lr": "4.995e-05", "step": 844, "steps": "23.29s,844/33195" }, { "epoch": 0.12727820454887784, "eta": "208:39:27", "grad_norm": 0.0081, "loss": 0.0611, "lr": "4.995e-05", "step": 845, "steps": "23.22s,845/33195" }, { "epoch": 0.12742882964301852, "eta": "208:33:40", "grad_norm": 0.0117, "loss": 0.0462, "lr": "4.995e-05", "step": 846, "steps": "23.21s,846/33195" }, { "epoch": 0.1275794547371592, "eta": "214:02:09", "grad_norm": 0.0072, "loss": 0.0511, "lr": "4.995e-05", "step": 847, "steps": "23.82s,847/33195" }, { "epoch": 0.1277300798312999, "eta": "212:03:09", "grad_norm": 0.0079, "loss": 0.0422, "lr": "4.995e-05", "step": 848, "steps": "23.6s,848/33195" }, { "epoch": 0.12788070492544057, "eta": "208:00:09", "grad_norm": 0.0089, "loss": 0.0415, "lr": "4.995e-05", "step": 849, "steps": "23.15s,849/33195" }, { "epoch": 0.12803133001958125, "eta": "208:37:30", "grad_norm": 0.0068, "loss": 0.055, "lr": "4.995e-05", "step": 850, "steps": "23.22s,850/33195" }, { "epoch": 0.12818195511372193, "eta": "209:20:15", "grad_norm": 0.0067, "loss": 0.0567, "lr": "4.995e-05", "step": 851, "steps": "23.3s,851/33195" }, { "epoch": 0.12833258020786262, "eta": "212:12:21", "grad_norm": 0.0107, "loss": 0.0744, "lr": "4.995e-05", "step": 852, "steps": "23.62s,852/33195" }, { "epoch": 0.1284832053020033, "eta": "208:47:08", "grad_norm": 0.0074, "loss": 0.0364, "lr": "4.995e-05", "step": 853, "steps": "23.24s,853/33195" }, { "epoch": 0.128633830396144, "eta": "208:46:44", "grad_norm": 0.0109, "loss": 0.0538, "lr": "4.995e-05", "step": 854, "steps": "23.24s,854/33195" }, { "epoch": 0.1287844554902847, "eta": "208:46:21", "grad_norm": 0.0076, "loss": 0.0617, "lr": "4.995e-05", "step": 855, "steps": "23.24s,855/33195" }, { "epoch": 0.12893508058442538, "eta": "212:21:33", "grad_norm": 0.0065, "loss": 0.037, "lr": "4.995e-05", "step": 856, "steps": "23.64s,856/33195" }, { "epoch": 0.12908570567856606, "eta": "209:12:32", "grad_norm": 0.0089, "loss": 0.0624, "lr": "4.995e-05", "step": 857, "steps": "23.29s,857/33195" }, { "epoch": 0.12923633077270674, "eta": "208:29:01", "grad_norm": 0.007, "loss": 0.0552, "lr": "4.995e-05", "step": 858, "steps": "23.21s,858/33195" }, { "epoch": 0.12938695586684743, "eta": "210:59:32", "grad_norm": 0.0094, "loss": 0.067, "lr": "4.995e-05", "step": 859, "steps": "23.49s,859/33195" }, { "epoch": 0.1295375809609881, "eta": "208:17:28", "grad_norm": 0.0081, "loss": 0.057, "lr": "4.995e-05", "step": 860, "steps": "23.19s,860/33195" }, { "epoch": 0.1296882060551288, "eta": "209:05:35", "grad_norm": 0.0095, "loss": 0.0292, "lr": "4.995e-05", "step": 861, "steps": "23.28s,861/33195" }, { "epoch": 0.12983883114926947, "eta": "209:05:12", "grad_norm": 0.0099, "loss": 0.0403, "lr": "4.995e-05", "step": 862, "steps": "23.28s,862/33195" }, { "epoch": 0.12998945624341016, "eta": "208:21:42", "grad_norm": 0.0079, "loss": 0.0444, "lr": "4.995e-05", "step": 863, "steps": "23.2s,863/33195" }, { "epoch": 0.13014008133755084, "eta": "206:38:56", "grad_norm": 0.0074, "loss": 0.0693, "lr": "4.995e-05", "step": 864, "steps": "23.01s,864/33195" }, { "epoch": 0.13029070643169152, "eta": "211:40:18", "grad_norm": 0.0069, "loss": 0.0742, "lr": "4.995e-05", "step": 865, "steps": "23.57s,865/33195" }, { "epoch": 0.1304413315258322, "eta": "207:10:30", "grad_norm": 0.0079, "loss": 0.0466, "lr": "4.995e-05", "step": 866, "steps": "23.07s,866/33195" }, { "epoch": 0.1305919566199729, "eta": "210:34:51", "grad_norm": 0.006, "loss": 0.0438, "lr": "4.995e-05", "step": 867, "steps": "23.45s,867/33195" }, { "epoch": 0.13074258171411357, "eta": "211:28:20", "grad_norm": 0.0084, "loss": 0.0535, "lr": "4.995e-05", "step": 868, "steps": "23.55s,868/33195" }, { "epoch": 0.13089320680825425, "eta": "210:12:31", "grad_norm": 0.0084, "loss": 0.0498, "lr": "4.995e-05", "step": 869, "steps": "23.41s,869/33195" }, { "epoch": 0.13104383190239494, "eta": "206:20:28", "grad_norm": 0.0076, "loss": 0.0896, "lr": "4.995e-05", "step": 870, "steps": "22.98s,870/33195" }, { "epoch": 0.13119445699653562, "eta": "209:12:29", "grad_norm": 0.0071, "loss": 0.0585, "lr": "4.995e-05", "step": 871, "steps": "23.3s,871/33195" }, { "epoch": 0.1313450820906763, "eta": "207:02:48", "grad_norm": 0.0103, "loss": 0.0641, "lr": "4.995e-05", "step": 872, "steps": "23.06s,872/33195" }, { "epoch": 0.13149570718481698, "eta": "208:07:03", "grad_norm": 0.0093, "loss": 0.0669, "lr": "4.995e-05", "step": 873, "steps": "23.18s,873/33195" }, { "epoch": 0.13164633227895767, "eta": "212:46:47", "grad_norm": 0.007, "loss": 0.0481, "lr": "4.995e-05", "step": 874, "steps": "23.7s,874/33195" }, { "epoch": 0.13179695737309835, "eta": "210:10:11", "grad_norm": 0.009, "loss": 0.0546, "lr": "4.995e-05", "step": 875, "steps": "23.41s,875/33195" }, { "epoch": 0.13194758246723903, "eta": "208:54:23", "grad_norm": 0.0069, "loss": 0.0395, "lr": "4.995e-05", "step": 876, "steps": "23.27s,876/33195" }, { "epoch": 0.13209820756137972, "eta": "206:39:20", "grad_norm": 0.0072, "loss": 0.0503, "lr": "4.995e-05", "step": 877, "steps": "23.02s,877/33195" }, { "epoch": 0.1322488326555204, "eta": "205:28:56", "grad_norm": 0.0074, "loss": 0.063, "lr": "4.995e-05", "step": 878, "steps": "22.89s,878/33195" }, { "epoch": 0.13239945774966108, "eta": "208:37:03", "grad_norm": 0.0081, "loss": 0.0518, "lr": "4.995e-05", "step": 879, "steps": "23.24s,879/33195" }, { "epoch": 0.13255008284380176, "eta": "207:58:58", "grad_norm": 0.0073, "loss": 0.0541, "lr": "4.995e-05", "step": 880, "steps": "23.17s,880/33195" }, { "epoch": 0.13270070793794247, "eta": "210:07:50", "grad_norm": 0.0075, "loss": 0.0398, "lr": "4.995e-05", "step": 881, "steps": "23.41s,881/33195" }, { "epoch": 0.13285133303208316, "eta": "208:08:58", "grad_norm": 0.0084, "loss": 0.0464, "lr": "4.995e-05", "step": 882, "steps": "23.19s,882/33195" }, { "epoch": 0.13300195812622384, "eta": "208:24:44", "grad_norm": 0.0121, "loss": 0.0488, "lr": "4.995e-05", "step": 883, "steps": "23.22s,883/33195" }, { "epoch": 0.13315258322036452, "eta": "209:45:08", "grad_norm": 0.0075, "loss": 0.0472, "lr": "4.995e-05", "step": 884, "steps": "23.37s,884/33195" }, { "epoch": 0.1333032083145052, "eta": "208:07:48", "grad_norm": 0.0066, "loss": 0.0389, "lr": "4.995e-05", "step": 885, "steps": "23.19s,885/33195" }, { "epoch": 0.1334538334086459, "eta": "206:08:57", "grad_norm": 0.0142, "loss": 0.0499, "lr": "4.995e-05", "step": 886, "steps": "22.97s,886/33195" }, { "epoch": 0.13360445850278657, "eta": "209:38:34", "grad_norm": 0.0076, "loss": 0.0585, "lr": "4.995e-05", "step": 887, "steps": "23.36s,887/33195" }, { "epoch": 0.13375508359692725, "eta": "211:52:48", "grad_norm": 0.0089, "loss": 0.0498, "lr": "4.995e-05", "step": 888, "steps": "23.61s,888/33195" }, { "epoch": 0.13390570869106794, "eta": "209:59:20", "grad_norm": 0.0069, "loss": 0.0897, "lr": "4.995e-05", "step": 889, "steps": "23.4s,889/33195" }, { "epoch": 0.13405633378520862, "eta": "208:38:11", "grad_norm": 0.0089, "loss": 0.0363, "lr": "4.995e-05", "step": 890, "steps": "23.25s,890/33195" }, { "epoch": 0.1342069588793493, "eta": "208:16:15", "grad_norm": 0.0103, "loss": 0.0517, "lr": "4.995e-05", "step": 891, "steps": "23.21s,891/33195" }, { "epoch": 0.13435758397348999, "eta": "207:54:20", "grad_norm": 0.0074, "loss": 0.0358, "lr": "4.995e-05", "step": 892, "steps": "23.17s,892/33195" }, { "epoch": 0.13450820906763067, "eta": "207:53:57", "grad_norm": 0.0065, "loss": 0.0404, "lr": "4.995e-05", "step": 893, "steps": "23.17s,893/33195" }, { "epoch": 0.13465883416177135, "eta": "212:01:12", "grad_norm": 0.0079, "loss": 0.0519, "lr": "4.995e-05", "step": 894, "steps": "23.63s,894/33195" }, { "epoch": 0.13480945925591203, "eta": "208:20:06", "grad_norm": 0.0071, "loss": 0.0467, "lr": "4.995e-05", "step": 895, "steps": "23.22s,895/33195" }, { "epoch": 0.13496008435005272, "eta": "207:52:47", "grad_norm": 0.0066, "loss": 0.0636, "lr": "4.995e-05", "step": 896, "steps": "23.17s,896/33195" }, { "epoch": 0.1351107094441934, "eta": "211:27:43", "grad_norm": 0.0067, "loss": 0.0388, "lr": "4.995e-05", "step": 897, "steps": "23.57s,897/33195" }, { "epoch": 0.13526133453833408, "eta": "208:51:14", "grad_norm": 0.0088, "loss": 0.0746, "lr": "4.995e-05", "step": 898, "steps": "23.28s,898/33195" }, { "epoch": 0.13541195963247477, "eta": "204:59:23", "grad_norm": 0.0082, "loss": 0.0649, "lr": "4.995e-05", "step": 899, "steps": "22.85s,899/33195" }, { "epoch": 0.13556258472661545, "eta": "211:21:10", "grad_norm": 0.0187, "loss": 0.0588, "lr": "4.994e-05", "step": 900, "steps": "23.56s,900/33195" }, { "epoch": 0.13571320982075613, "eta": "208:55:27", "grad_norm": 0.0072, "loss": 0.0287, "lr": "4.994e-05", "step": 901, "steps": "23.29s,901/33195" }, { "epoch": 0.1358638349148968, "eta": "207:28:57", "grad_norm": 0.0071, "loss": 0.0436, "lr": "4.994e-05", "step": 902, "steps": "23.13s,902/33195" }, { "epoch": 0.1360144600090375, "eta": "208:22:23", "grad_norm": 0.0081, "loss": 0.0479, "lr": "4.994e-05", "step": 903, "steps": "23.23s,903/33195" }, { "epoch": 0.13616508510317818, "eta": "208:16:37", "grad_norm": 0.009, "loss": 0.0465, "lr": "4.994e-05", "step": 904, "steps": "23.22s,904/33195" }, { "epoch": 0.13631571019731886, "eta": "212:13:01", "grad_norm": 0.0069, "loss": 0.0459, "lr": "4.994e-05", "step": 905, "steps": "23.66s,905/33195" }, { "epoch": 0.13646633529145955, "eta": "208:26:36", "grad_norm": 0.0081, "loss": 0.0439, "lr": "4.994e-05", "step": 906, "steps": "23.24s,906/33195" }, { "epoch": 0.13661696038560023, "eta": "208:31:36", "grad_norm": 0.0073, "loss": 0.0598, "lr": "4.994e-05", "step": 907, "steps": "23.25s,907/33195" }, { "epoch": 0.13676758547974094, "eta": "208:25:49", "grad_norm": 0.0093, "loss": 0.086, "lr": "4.994e-05", "step": 908, "steps": "23.24s,908/33195" }, { "epoch": 0.13691821057388162, "eta": "209:30:00", "grad_norm": 0.0068, "loss": 0.0511, "lr": "4.994e-05", "step": 909, "steps": "23.36s,909/33195" }, { "epoch": 0.1370688356680223, "eta": "205:05:57", "grad_norm": 0.0101, "loss": 0.0474, "lr": "4.994e-05", "step": 910, "steps": "22.87s,910/33195" }, { "epoch": 0.137219460762163, "eta": "208:51:34", "grad_norm": 0.0123, "loss": 0.0427, "lr": "4.994e-05", "step": 911, "steps": "23.29s,911/33195" }, { "epoch": 0.13737008585630367, "eta": "211:48:44", "grad_norm": 0.0087, "loss": 0.0401, "lr": "4.994e-05", "step": 912, "steps": "23.62s,912/33195" }, { "epoch": 0.13752071095044435, "eta": "209:17:41", "grad_norm": 0.0085, "loss": 0.0525, "lr": "4.994e-05", "step": 913, "steps": "23.34s,913/33195" }, { "epoch": 0.13767133604458504, "eta": "209:01:10", "grad_norm": 0.0084, "loss": 0.0542, "lr": "4.994e-05", "step": 914, "steps": "23.31s,914/33195" }, { "epoch": 0.13782196113872572, "eta": "208:44:38", "grad_norm": 0.0072, "loss": 0.0487, "lr": "4.994e-05", "step": 915, "steps": "23.28s,915/33195" }, { "epoch": 0.1379725862328664, "eta": "209:27:17", "grad_norm": 0.01, "loss": 0.0552, "lr": "4.994e-05", "step": 916, "steps": "23.36s,916/33195" }, { "epoch": 0.13812321132700708, "eta": "205:30:11", "grad_norm": 0.0086, "loss": 0.0688, "lr": "4.994e-05", "step": 917, "steps": "22.92s,917/33195" }, { "epoch": 0.13827383642114777, "eta": "207:33:32", "grad_norm": 0.0112, "loss": 0.0408, "lr": "4.994e-05", "step": 918, "steps": "23.15s,918/33195" }, { "epoch": 0.13842446151528845, "eta": "210:09:09", "grad_norm": 0.0067, "loss": 0.0533, "lr": "4.994e-05", "step": 919, "steps": "23.44s,919/33195" }, { "epoch": 0.13857508660942913, "eta": "212:12:29", "grad_norm": 0.0078, "loss": 0.0443, "lr": "4.994e-05", "step": 920, "steps": "23.67s,920/33195" }, { "epoch": 0.13872571170356982, "eta": "212:01:20", "grad_norm": 0.0073, "loss": 0.0758, "lr": "4.994e-05", "step": 921, "steps": "23.65s,921/33195" }, { "epoch": 0.1388763367977105, "eta": "212:38:35", "grad_norm": 0.006, "loss": 0.0357, "lr": "4.994e-05", "step": 922, "steps": "23.72s,922/33195" }, { "epoch": 0.13902696189185118, "eta": "211:55:10", "grad_norm": 0.0074, "loss": 0.0433, "lr": "4.994e-05", "step": 923, "steps": "23.64s,923/33195" }, { "epoch": 0.13917758698599186, "eta": "205:59:47", "grad_norm": 0.0095, "loss": 0.0706, "lr": "4.994e-05", "step": 924, "steps": "22.98s,924/33195" }, { "epoch": 0.13932821208013255, "eta": "209:02:16", "grad_norm": 0.0067, "loss": 0.0489, "lr": "4.994e-05", "step": 925, "steps": "23.32s,925/33195" }, { "epoch": 0.13947883717427323, "eta": "208:51:07", "grad_norm": 0.0087, "loss": 0.0589, "lr": "4.994e-05", "step": 926, "steps": "23.3s,926/33195" }, { "epoch": 0.1396294622684139, "eta": "210:06:01", "grad_norm": 0.0074, "loss": 0.0566, "lr": "4.994e-05", "step": 927, "steps": "23.44s,927/33195" }, { "epoch": 0.1397800873625546, "eta": "208:28:50", "grad_norm": 0.0084, "loss": 0.0547, "lr": "4.994e-05", "step": 928, "steps": "23.26s,928/33195" }, { "epoch": 0.13993071245669528, "eta": "211:31:17", "grad_norm": 0.0068, "loss": 0.0595, "lr": "4.994e-05", "step": 929, "steps": "23.6s,929/33195" }, { "epoch": 0.14008133755083596, "eta": "209:11:05", "grad_norm": 0.0064, "loss": 0.0494, "lr": "4.994e-05", "step": 930, "steps": "23.34s,930/33195" }, { "epoch": 0.14023196264497664, "eta": "208:54:33", "grad_norm": 0.0079, "loss": 0.0516, "lr": "4.994e-05", "step": 931, "steps": "23.31s,931/33195" }, { "epoch": 0.14038258773911733, "eta": "208:11:09", "grad_norm": 0.0084, "loss": 0.0852, "lr": "4.994e-05", "step": 932, "steps": "23.23s,932/33195" }, { "epoch": 0.140533212833258, "eta": "208:10:46", "grad_norm": 0.0076, "loss": 0.0499, "lr": "4.994e-05", "step": 933, "steps": "23.23s,933/33195" }, { "epoch": 0.1406838379273987, "eta": "211:50:50", "grad_norm": 0.0084, "loss": 0.0346, "lr": "4.994e-05", "step": 934, "steps": "23.64s,934/33195" }, { "epoch": 0.1408344630215394, "eta": "211:07:25", "grad_norm": 0.0073, "loss": 0.035, "lr": "4.994e-05", "step": 935, "steps": "23.56s,935/33195" }, { "epoch": 0.14098508811568009, "eta": "209:24:52", "grad_norm": 0.0087, "loss": 0.0469, "lr": "4.994e-05", "step": 936, "steps": "23.37s,936/33195" }, { "epoch": 0.14113571320982077, "eta": "209:24:29", "grad_norm": 0.0074, "loss": 0.0404, "lr": "4.994e-05", "step": 937, "steps": "23.37s,937/33195" }, { "epoch": 0.14128633830396145, "eta": "210:01:44", "grad_norm": 0.0069, "loss": 0.0483, "lr": "4.994e-05", "step": 938, "steps": "23.44s,938/33195" }, { "epoch": 0.14143696339810213, "eta": "207:41:34", "grad_norm": 0.0078, "loss": 0.0507, "lr": "4.994e-05", "step": 939, "steps": "23.18s,939/33195" }, { "epoch": 0.14158758849224282, "eta": "207:46:33", "grad_norm": 0.0078, "loss": 0.0613, "lr": "4.994e-05", "step": 940, "steps": "23.19s,940/33195" }, { "epoch": 0.1417382135863835, "eta": "210:27:26", "grad_norm": 0.0082, "loss": 0.07, "lr": "4.994e-05", "step": 941, "steps": "23.49s,941/33195" }, { "epoch": 0.14188883868052418, "eta": "206:52:01", "grad_norm": 0.0068, "loss": 0.0653, "lr": "4.994e-05", "step": 942, "steps": "23.09s,942/33195" }, { "epoch": 0.14203946377466486, "eta": "213:02:32", "grad_norm": 0.0091, "loss": 0.0407, "lr": "4.994e-05", "step": 943, "steps": "23.78s,943/33195" }, { "epoch": 0.14219008886880555, "eta": "208:54:53", "grad_norm": 0.0066, "loss": 0.0468, "lr": "4.994e-05", "step": 944, "steps": "23.32s,944/33195" }, { "epoch": 0.14234071396294623, "eta": "208:00:45", "grad_norm": 0.0349, "loss": 0.0501, "lr": "4.994e-05", "step": 945, "steps": "23.22s,945/33195" }, { "epoch": 0.1424913390570869, "eta": "207:54:59", "grad_norm": 0.0066, "loss": 0.0505, "lr": "4.994e-05", "step": 946, "steps": "23.21s,946/33195" }, { "epoch": 0.1426419641512276, "eta": "214:43:04", "grad_norm": 0.0068, "loss": 0.0752, "lr": "4.994e-05", "step": 947, "steps": "23.97s,947/33195" }, { "epoch": 0.14279258924536828, "eta": "210:24:42", "grad_norm": 0.0074, "loss": 0.0507, "lr": "4.994e-05", "step": 948, "steps": "23.49s,948/33195" }, { "epoch": 0.14294321433950896, "eta": "208:09:57", "grad_norm": 0.008, "loss": 0.0723, "lr": "4.994e-05", "step": 949, "steps": "23.24s,949/33195" }, { "epoch": 0.14309383943364964, "eta": "207:10:26", "grad_norm": 0.0082, "loss": 0.0416, "lr": "4.994e-05", "step": 950, "steps": "23.13s,950/33195" }, { "epoch": 0.14324446452779033, "eta": "210:07:24", "grad_norm": 0.0078, "loss": 0.0422, "lr": "4.994e-05", "step": 951, "steps": "23.46s,951/33195" }, { "epoch": 0.143395089621931, "eta": "210:23:08", "grad_norm": 0.0078, "loss": 0.0521, "lr": "4.994e-05", "step": 952, "steps": "23.49s,952/33195" }, { "epoch": 0.1435457147160717, "eta": "208:08:24", "grad_norm": 0.0084, "loss": 0.0545, "lr": "4.994e-05", "step": 953, "steps": "23.24s,953/33195" }, { "epoch": 0.14369633981021238, "eta": "204:43:49", "grad_norm": 0.0081, "loss": 0.044, "lr": "4.994e-05", "step": 954, "steps": "22.86s,954/33195" }, { "epoch": 0.14384696490435306, "eta": "211:47:56", "grad_norm": 0.0079, "loss": 0.0587, "lr": "4.994e-05", "step": 955, "steps": "23.65s,955/33195" }, { "epoch": 0.14399758999849374, "eta": "208:01:51", "grad_norm": 0.0075, "loss": 0.0444, "lr": "4.994e-05", "step": 956, "steps": "23.23s,956/33195" }, { "epoch": 0.14414821509263442, "eta": "211:52:31", "grad_norm": 0.0068, "loss": 0.0543, "lr": "4.994e-05", "step": 957, "steps": "23.66s,957/33195" }, { "epoch": 0.1442988401867751, "eta": "207:50:20", "grad_norm": 0.0083, "loss": 0.0701, "lr": "4.994e-05", "step": 958, "steps": "23.21s,958/33195" }, { "epoch": 0.1444494652809158, "eta": "210:47:15", "grad_norm": 0.0088, "loss": 0.0394, "lr": "4.994e-05", "step": 959, "steps": "23.54s,959/33195" }, { "epoch": 0.14460009037505647, "eta": "208:32:33", "grad_norm": 0.0149, "loss": 0.0415, "lr": "4.994e-05", "step": 960, "steps": "23.29s,960/33195" }, { "epoch": 0.14475071546919716, "eta": "207:49:11", "grad_norm": 0.0084, "loss": 0.0497, "lr": "4.993e-05", "step": 961, "steps": "23.21s,961/33195" }, { "epoch": 0.14490134056333787, "eta": "210:51:27", "grad_norm": 0.0076, "loss": 0.0573, "lr": "4.993e-05", "step": 962, "steps": "23.55s,962/33195" }, { "epoch": 0.14505196565747855, "eta": "207:53:47", "grad_norm": 0.009, "loss": 0.0301, "lr": "4.993e-05", "step": 963, "steps": "23.22s,963/33195" }, { "epoch": 0.14520259075161923, "eta": "210:56:02", "grad_norm": 0.0077, "loss": 0.0508, "lr": "4.993e-05", "step": 964, "steps": "23.56s,964/33195" }, { "epoch": 0.14535321584575991, "eta": "211:54:44", "grad_norm": 0.011, "loss": 0.0771, "lr": "4.993e-05", "step": 965, "steps": "23.67s,965/33195" }, { "epoch": 0.1455038409399006, "eta": "207:41:52", "grad_norm": 0.0106, "loss": 0.0448, "lr": "4.993e-05", "step": 966, "steps": "23.2s,966/33195" }, { "epoch": 0.14565446603404128, "eta": "205:54:04", "grad_norm": 0.0099, "loss": 0.0476, "lr": "4.993e-05", "step": 967, "steps": "23.0s,967/33195" }, { "epoch": 0.14580509112818196, "eta": "211:10:34", "grad_norm": 0.0095, "loss": 0.0537, "lr": "4.993e-05", "step": 968, "steps": "23.59s,968/33195" }, { "epoch": 0.14595571622232265, "eta": "209:33:30", "grad_norm": 0.011, "loss": 0.0613, "lr": "4.993e-05", "step": 969, "steps": "23.41s,969/33195" }, { "epoch": 0.14610634131646333, "eta": "209:27:45", "grad_norm": 0.0079, "loss": 0.0407, "lr": "4.993e-05", "step": 970, "steps": "23.4s,970/33195" }, { "epoch": 0.146256966410604, "eta": "208:17:32", "grad_norm": 0.009, "loss": 0.071, "lr": "4.993e-05", "step": 971, "steps": "23.27s,971/33195" }, { "epoch": 0.1464075915047447, "eta": "208:01:02", "grad_norm": 0.0105, "loss": 0.0455, "lr": "4.993e-05", "step": 972, "steps": "23.24s,972/33195" }, { "epoch": 0.14655821659888538, "eta": "208:06:01", "grad_norm": 0.0085, "loss": 0.0491, "lr": "4.993e-05", "step": 973, "steps": "23.25s,973/33195" }, { "epoch": 0.14670884169302606, "eta": "207:49:31", "grad_norm": 0.0072, "loss": 0.0708, "lr": "4.993e-05", "step": 974, "steps": "23.22s,974/33195" }, { "epoch": 0.14685946678716674, "eta": "207:00:48", "grad_norm": 0.0061, "loss": 0.0537, "lr": "4.993e-05", "step": 975, "steps": "23.13s,975/33195" }, { "epoch": 0.14701009188130743, "eta": "208:37:04", "grad_norm": 0.0069, "loss": 0.0427, "lr": "4.993e-05", "step": 976, "steps": "23.31s,976/33195" }, { "epoch": 0.1471607169754481, "eta": "207:53:44", "grad_norm": 0.0063, "loss": 0.0395, "lr": "4.993e-05", "step": 977, "steps": "23.23s,977/33195" }, { "epoch": 0.1473113420695888, "eta": "209:35:22", "grad_norm": 0.0089, "loss": 0.0456, "lr": "4.993e-05", "step": 978, "steps": "23.42s,978/33195" }, { "epoch": 0.14746196716372947, "eta": "211:11:37", "grad_norm": 0.009, "loss": 0.0532, "lr": "4.993e-05", "step": 979, "steps": "23.6s,979/33195" }, { "epoch": 0.14761259225787016, "eta": "208:19:25", "grad_norm": 0.0081, "loss": 0.0709, "lr": "4.993e-05", "step": 980, "steps": "23.28s,980/33195" }, { "epoch": 0.14776321735201084, "eta": "207:09:14", "grad_norm": 0.0082, "loss": 0.0471, "lr": "4.993e-05", "step": 981, "steps": "23.15s,981/33195" }, { "epoch": 0.14791384244615152, "eta": "206:58:06", "grad_norm": 0.0085, "loss": 0.0413, "lr": "4.993e-05", "step": 982, "steps": "23.13s,982/33195" }, { "epoch": 0.1480644675402922, "eta": "209:28:02", "grad_norm": 0.0085, "loss": 0.0591, "lr": "4.993e-05", "step": 983, "steps": "23.41s,983/33195" }, { "epoch": 0.1482150926344329, "eta": "207:51:01", "grad_norm": 0.0085, "loss": 0.0552, "lr": "4.993e-05", "step": 984, "steps": "23.23s,984/33195" }, { "epoch": 0.14836571772857357, "eta": "206:24:44", "grad_norm": 0.0075, "loss": 0.0571, "lr": "4.993e-05", "step": 985, "steps": "23.07s,985/33195" }, { "epoch": 0.14851634282271425, "eta": "209:37:36", "grad_norm": 0.0073, "loss": 0.0475, "lr": "4.993e-05", "step": 986, "steps": "23.43s,986/33195" }, { "epoch": 0.14866696791685494, "eta": "211:29:57", "grad_norm": 0.0074, "loss": 0.0637, "lr": "4.993e-05", "step": 987, "steps": "23.64s,987/33195" }, { "epoch": 0.14881759301099562, "eta": "207:54:50", "grad_norm": 0.0078, "loss": 0.0525, "lr": "4.993e-05", "step": 988, "steps": "23.24s,988/33195" }, { "epoch": 0.1489682181051363, "eta": "208:26:39", "grad_norm": 0.0071, "loss": 0.0605, "lr": "4.993e-05", "step": 989, "steps": "23.3s,989/33195" }, { "epoch": 0.149118843199277, "eta": "207:27:13", "grad_norm": 0.0075, "loss": 0.0665, "lr": "4.993e-05", "step": 990, "steps": "23.19s,990/33195" }, { "epoch": 0.1492694682934177, "eta": "208:47:21", "grad_norm": 0.0082, "loss": 0.06, "lr": "4.993e-05", "step": 991, "steps": "23.34s,991/33195" }, { "epoch": 0.14942009338755838, "eta": "207:42:33", "grad_norm": 0.0065, "loss": 0.0609, "lr": "4.993e-05", "step": 992, "steps": "23.22s,992/33195" }, { "epoch": 0.14957071848169906, "eta": "208:14:22", "grad_norm": 0.0066, "loss": 0.0533, "lr": "4.993e-05", "step": 993, "steps": "23.28s,993/33195" }, { "epoch": 0.14972134357583974, "eta": "208:08:37", "grad_norm": 0.0079, "loss": 0.0335, "lr": "4.993e-05", "step": 994, "steps": "23.27s,994/33195" }, { "epoch": 0.14987196866998043, "eta": "209:39:28", "grad_norm": 0.0105, "loss": 0.0483, "lr": "4.993e-05", "step": 995, "steps": "23.44s,995/33195" }, { "epoch": 0.1500225937641211, "eta": "209:01:30", "grad_norm": 0.0073, "loss": 0.0321, "lr": "4.993e-05", "step": 996, "steps": "23.37s,996/33195" }, { "epoch": 0.1501732188582618, "eta": "208:18:11", "grad_norm": 0.0079, "loss": 0.0509, "lr": "4.993e-05", "step": 997, "steps": "23.29s,997/33195" }, { "epoch": 0.15032384395240247, "eta": "211:09:31", "grad_norm": 0.0084, "loss": 0.064, "lr": "4.993e-05", "step": 998, "steps": "23.61s,998/33195" }, { "epoch": 0.15047446904654316, "eta": "207:34:29", "grad_norm": 0.0064, "loss": 0.0753, "lr": "4.993e-05", "step": 999, "steps": "23.21s,999/33195" }, { "epoch": 0.15062509414068384, "eta": "209:26:46", "grad_norm": 0.0061, "loss": 0.0405, "lr": "4.993e-05", "step": 1000, "steps": "23.42s,1000/33195" }, { "epoch": 0.15077571923482452, "eta": "347:20:14", "grad_norm": 0.0067, "loss": 0.0733, "lr": "4.993e-05", "step": 1001, "steps": "38.84s,1001/33195" }, { "epoch": 0.1509263443289652, "eta": "210:35:45", "grad_norm": 0.0073, "loss": 0.0663, "lr": "4.993e-05", "step": 1002, "steps": "23.55s,1002/33195" }, { "epoch": 0.1510769694231059, "eta": "211:18:16", "grad_norm": 0.0102, "loss": 0.0383, "lr": "4.993e-05", "step": 1003, "steps": "23.63s,1003/33195" }, { "epoch": 0.15122759451724657, "eta": "208:15:28", "grad_norm": 0.0075, "loss": 0.0689, "lr": "4.993e-05", "step": 1004, "steps": "23.29s,1004/33195" }, { "epoch": 0.15137821961138725, "eta": "204:35:07", "grad_norm": 0.0072, "loss": 0.0617, "lr": "4.993e-05", "step": 1005, "steps": "22.88s,1005/33195" }, { "epoch": 0.15152884470552794, "eta": "211:06:22", "grad_norm": 0.0091, "loss": 0.0468, "lr": "4.993e-05", "step": 1006, "steps": "23.61s,1006/33195" }, { "epoch": 0.15167946979966862, "eta": "208:03:34", "grad_norm": 0.009, "loss": 0.0794, "lr": "4.993e-05", "step": 1007, "steps": "23.27s,1007/33195" }, { "epoch": 0.1518300948938093, "eta": "207:47:05", "grad_norm": 0.0082, "loss": 0.0539, "lr": "4.993e-05", "step": 1008, "steps": "23.24s,1008/33195" }, { "epoch": 0.15198071998794999, "eta": "207:52:04", "grad_norm": 0.0076, "loss": 0.0774, "lr": "4.993e-05", "step": 1009, "steps": "23.25s,1009/33195" }, { "epoch": 0.15213134508209067, "eta": "206:36:35", "grad_norm": 0.0098, "loss": 0.0309, "lr": "4.993e-05", "step": 1010, "steps": "23.11s,1010/33195" }, { "epoch": 0.15228197017623135, "eta": "204:32:49", "grad_norm": 0.007, "loss": 0.061, "lr": "4.993e-05", "step": 1011, "steps": "22.88s,1011/33195" }, { "epoch": 0.15243259527037203, "eta": "212:08:22", "grad_norm": 0.0096, "loss": 0.0716, "lr": "4.993e-05", "step": 1012, "steps": "23.73s,1012/33195" }, { "epoch": 0.15258322036451272, "eta": "207:55:53", "grad_norm": 0.0084, "loss": 0.0629, "lr": "4.993e-05", "step": 1013, "steps": "23.26s,1013/33195" }, { "epoch": 0.1527338454586534, "eta": "207:39:24", "grad_norm": 0.0066, "loss": 0.0369, "lr": "4.993e-05", "step": 1014, "steps": "23.23s,1014/33195" }, { "epoch": 0.15288447055279408, "eta": "210:52:06", "grad_norm": 0.0082, "loss": 0.0476, "lr": "4.993e-05", "step": 1015, "steps": "23.59s,1015/33195" }, { "epoch": 0.15303509564693477, "eta": "208:42:59", "grad_norm": 0.0081, "loss": 0.0576, "lr": "4.993e-05", "step": 1016, "steps": "23.35s,1016/33195" }, { "epoch": 0.15318572074107548, "eta": "211:55:40", "grad_norm": 0.0094, "loss": 0.0467, "lr": "4.992e-05", "step": 1017, "steps": "23.71s,1017/33195" }, { "epoch": 0.15333634583521616, "eta": "207:37:51", "grad_norm": 0.0086, "loss": 0.0563, "lr": "4.992e-05", "step": 1018, "steps": "23.23s,1018/33195" }, { "epoch": 0.15348697092935684, "eta": "211:28:04", "grad_norm": 0.0087, "loss": 0.0657, "lr": "4.992e-05", "step": 1019, "steps": "23.66s,1019/33195" }, { "epoch": 0.15363759602349752, "eta": "211:00:51", "grad_norm": 0.0063, "loss": 0.0634, "lr": "4.992e-05", "step": 1020, "steps": "23.61s,1020/33195" }, { "epoch": 0.1537882211176382, "eta": "209:29:18", "grad_norm": 0.0094, "loss": 0.033, "lr": "4.992e-05", "step": 1021, "steps": "23.44s,1021/33195" }, { "epoch": 0.1539388462117789, "eta": "207:47:02", "grad_norm": 0.0077, "loss": 0.0799, "lr": "4.992e-05", "step": 1022, "steps": "23.25s,1022/33195" }, { "epoch": 0.15408947130591957, "eta": "208:29:32", "grad_norm": 0.0085, "loss": 0.0423, "lr": "4.992e-05", "step": 1023, "steps": "23.33s,1023/33195" }, { "epoch": 0.15424009640006026, "eta": "207:56:59", "grad_norm": 0.0096, "loss": 0.0519, "lr": "4.992e-05", "step": 1024, "steps": "23.27s,1024/33195" }, { "epoch": 0.15439072149420094, "eta": "207:02:58", "grad_norm": 0.0069, "loss": 0.0351, "lr": "4.992e-05", "step": 1025, "steps": "23.17s,1025/33195" }, { "epoch": 0.15454134658834162, "eta": "210:26:19", "grad_norm": 0.0099, "loss": 0.0601, "lr": "4.992e-05", "step": 1026, "steps": "23.55s,1026/33195" }, { "epoch": 0.1546919716824823, "eta": "207:18:17", "grad_norm": 0.0079, "loss": 0.0699, "lr": "4.992e-05", "step": 1027, "steps": "23.2s,1027/33195" }, { "epoch": 0.154842596776623, "eta": "208:06:09", "grad_norm": 0.0092, "loss": 0.0535, "lr": "4.992e-05", "step": 1028, "steps": "23.29s,1028/33195" }, { "epoch": 0.15499322187076367, "eta": "207:44:19", "grad_norm": 0.0084, "loss": 0.0652, "lr": "4.992e-05", "step": 1029, "steps": "23.25s,1029/33195" }, { "epoch": 0.15514384696490435, "eta": "207:33:12", "grad_norm": 0.008, "loss": 0.0633, "lr": "4.992e-05", "step": 1030, "steps": "23.23s,1030/33195" }, { "epoch": 0.15529447205904504, "eta": "207:16:44", "grad_norm": 0.008, "loss": 0.0654, "lr": "4.992e-05", "step": 1031, "steps": "23.2s,1031/33195" }, { "epoch": 0.15544509715318572, "eta": "210:29:20", "grad_norm": 0.0058, "loss": 0.0495, "lr": "4.992e-05", "step": 1032, "steps": "23.56s,1032/33195" }, { "epoch": 0.1555957222473264, "eta": "209:35:20", "grad_norm": 0.0069, "loss": 0.0258, "lr": "4.992e-05", "step": 1033, "steps": "23.46s,1033/33195" }, { "epoch": 0.15574634734146708, "eta": "207:53:06", "grad_norm": 0.0099, "loss": 0.0559, "lr": "4.992e-05", "step": 1034, "steps": "23.27s,1034/33195" }, { "epoch": 0.15589697243560777, "eta": "207:20:33", "grad_norm": 0.0078, "loss": 0.0608, "lr": "4.992e-05", "step": 1035, "steps": "23.21s,1035/33195" }, { "epoch": 0.15604759752974845, "eta": "205:54:24", "grad_norm": 0.007, "loss": 0.0544, "lr": "4.992e-05", "step": 1036, "steps": "23.05s,1036/33195" }, { "epoch": 0.15619822262388913, "eta": "207:46:35", "grad_norm": 0.0071, "loss": 0.0344, "lr": "4.992e-05", "step": 1037, "steps": "23.26s,1037/33195" }, { "epoch": 0.15634884771802982, "eta": "207:03:19", "grad_norm": 0.0092, "loss": 0.0523, "lr": "4.992e-05", "step": 1038, "steps": "23.18s,1038/33195" }, { "epoch": 0.1564994728121705, "eta": "204:11:26", "grad_norm": 0.007, "loss": 0.06, "lr": "4.992e-05", "step": 1039, "steps": "22.86s,1039/33195" }, { "epoch": 0.15665009790631118, "eta": "210:31:33", "grad_norm": 0.0082, "loss": 0.054, "lr": "4.992e-05", "step": 1040, "steps": "23.57s,1040/33195" }, { "epoch": 0.15680072300045186, "eta": "207:34:18", "grad_norm": 0.0078, "loss": 0.0389, "lr": "4.992e-05", "step": 1041, "steps": "23.24s,1041/33195" }, { "epoch": 0.15695134809459255, "eta": "206:51:03", "grad_norm": 0.0095, "loss": 0.0619, "lr": "4.992e-05", "step": 1042, "steps": "23.16s,1042/33195" }, { "epoch": 0.15710197318873323, "eta": "207:49:37", "grad_norm": 0.0065, "loss": 0.0392, "lr": "4.992e-05", "step": 1043, "steps": "23.27s,1043/33195" }, { "epoch": 0.15725259828287394, "eta": "205:24:33", "grad_norm": 0.0112, "loss": 0.0513, "lr": "4.992e-05", "step": 1044, "steps": "23.0s,1044/33195" }, { "epoch": 0.15740322337701462, "eta": "208:58:30", "grad_norm": 0.0082, "loss": 0.0689, "lr": "4.992e-05", "step": 1045, "steps": "23.4s,1045/33195" }, { "epoch": 0.1575538484711553, "eta": "209:08:49", "grad_norm": 0.0072, "loss": 0.0585, "lr": "4.992e-05", "step": 1046, "steps": "23.42s,1046/33195" }, { "epoch": 0.157704473565296, "eta": "207:26:38", "grad_norm": 0.0099, "loss": 0.0514, "lr": "4.992e-05", "step": 1047, "steps": "23.23s,1047/33195" }, { "epoch": 0.15785509865943667, "eta": "209:18:45", "grad_norm": 0.0151, "loss": 0.0493, "lr": "4.992e-05", "step": 1048, "steps": "23.44s,1048/33195" }, { "epoch": 0.15800572375357735, "eta": "206:53:42", "grad_norm": 0.0067, "loss": 0.0522, "lr": "4.992e-05", "step": 1049, "steps": "23.17s,1049/33195" }, { "epoch": 0.15815634884771804, "eta": "209:50:07", "grad_norm": 0.0078, "loss": 0.0595, "lr": "4.992e-05", "step": 1050, "steps": "23.5s,1050/33195" }, { "epoch": 0.15830697394185872, "eta": "207:46:30", "grad_norm": 0.0071, "loss": 0.076, "lr": "4.992e-05", "step": 1051, "steps": "23.27s,1051/33195" }, { "epoch": 0.1584575990359994, "eta": "210:21:29", "grad_norm": 0.0072, "loss": 0.0392, "lr": "4.992e-05", "step": 1052, "steps": "23.56s,1052/33195" }, { "epoch": 0.15860822413014009, "eta": "206:46:48", "grad_norm": 0.0079, "loss": 0.0504, "lr": "4.992e-05", "step": 1053, "steps": "23.16s,1053/33195" }, { "epoch": 0.15875884922428077, "eta": "206:35:42", "grad_norm": 0.0082, "loss": 0.0551, "lr": "4.992e-05", "step": 1054, "steps": "23.14s,1054/33195" }, { "epoch": 0.15890947431842145, "eta": "207:18:10", "grad_norm": 0.0092, "loss": 0.0411, "lr": "4.992e-05", "step": 1055, "steps": "23.22s,1055/33195" }, { "epoch": 0.15906009941256213, "eta": "206:08:09", "grad_norm": 0.0077, "loss": 0.0577, "lr": "4.992e-05", "step": 1056, "steps": "23.09s,1056/33195" }, { "epoch": 0.15921072450670282, "eta": "207:01:20", "grad_norm": 0.0073, "loss": 0.069, "lr": "4.992e-05", "step": 1057, "steps": "23.19s,1057/33195" }, { "epoch": 0.1593613496008435, "eta": "207:06:18", "grad_norm": 0.0087, "loss": 0.0388, "lr": "4.992e-05", "step": 1058, "steps": "23.2s,1058/33195" }, { "epoch": 0.15951197469498418, "eta": "207:27:20", "grad_norm": 0.0073, "loss": 0.0437, "lr": "4.992e-05", "step": 1059, "steps": "23.24s,1059/33195" }, { "epoch": 0.15966259978912486, "eta": "207:05:32", "grad_norm": 0.0072, "loss": 0.045, "lr": "4.992e-05", "step": 1060, "steps": "23.2s,1060/33195" }, { "epoch": 0.15981322488326555, "eta": "209:13:40", "grad_norm": 0.0068, "loss": 0.0599, "lr": "4.992e-05", "step": 1061, "steps": "23.44s,1061/33195" }, { "epoch": 0.15996384997740623, "eta": "207:04:45", "grad_norm": 0.0064, "loss": 0.0442, "lr": "4.992e-05", "step": 1062, "steps": "23.2s,1062/33195" }, { "epoch": 0.1601144750715469, "eta": "211:05:21", "grad_norm": 0.007, "loss": 0.0626, "lr": "4.992e-05", "step": 1063, "steps": "23.65s,1063/33195" }, { "epoch": 0.1602651001656876, "eta": "207:20:03", "grad_norm": 0.0072, "loss": 0.045, "lr": "4.992e-05", "step": 1064, "steps": "23.23s,1064/33195" }, { "epoch": 0.16041572525982828, "eta": "209:22:49", "grad_norm": 0.0074, "loss": 0.0725, "lr": "4.992e-05", "step": 1065, "steps": "23.46s,1065/33195" }, { "epoch": 0.16056635035396896, "eta": "209:22:26", "grad_norm": 0.0164, "loss": 0.0439, "lr": "4.992e-05", "step": 1066, "steps": "23.46s,1066/33195" }, { "epoch": 0.16071697544810964, "eta": "209:54:10", "grad_norm": 0.0078, "loss": 0.044, "lr": "4.992e-05", "step": 1067, "steps": "23.52s,1067/33195" }, { "epoch": 0.16086760054225033, "eta": "206:57:05", "grad_norm": 0.0066, "loss": 0.0374, "lr": "4.992e-05", "step": 1068, "steps": "23.19s,1068/33195" }, { "epoch": 0.161018225636391, "eta": "209:31:58", "grad_norm": 0.0071, "loss": 0.0571, "lr": "4.992e-05", "step": 1069, "steps": "23.48s,1069/33195" }, { "epoch": 0.1611688507305317, "eta": "207:39:08", "grad_norm": 0.0073, "loss": 0.04, "lr": "4.991e-05", "step": 1070, "steps": "23.27s,1070/33195" }, { "epoch": 0.1613194758246724, "eta": "207:28:03", "grad_norm": 0.0073, "loss": 0.0362, "lr": "4.991e-05", "step": 1071, "steps": "23.25s,1071/33195" }, { "epoch": 0.1614701009188131, "eta": "206:44:49", "grad_norm": 0.0121, "loss": 0.0472, "lr": "4.991e-05", "step": 1072, "steps": "23.17s,1072/33195" }, { "epoch": 0.16162072601295377, "eta": "206:55:09", "grad_norm": 0.0077, "loss": 0.049, "lr": "4.991e-05", "step": 1073, "steps": "23.19s,1073/33195" }, { "epoch": 0.16177135110709445, "eta": "207:26:53", "grad_norm": 0.0089, "loss": 0.0503, "lr": "4.991e-05", "step": 1074, "steps": "23.25s,1074/33195" }, { "epoch": 0.16192197620123513, "eta": "207:21:08", "grad_norm": 0.0091, "loss": 0.0454, "lr": "4.991e-05", "step": 1075, "steps": "23.24s,1075/33195" }, { "epoch": 0.16207260129537582, "eta": "207:36:49", "grad_norm": 0.0095, "loss": 0.0562, "lr": "4.991e-05", "step": 1076, "steps": "23.27s,1076/33195" }, { "epoch": 0.1622232263895165, "eta": "207:57:50", "grad_norm": 0.0112, "loss": 0.047, "lr": "4.991e-05", "step": 1077, "steps": "23.31s,1077/33195" }, { "epoch": 0.16237385148365718, "eta": "207:46:44", "grad_norm": 0.0073, "loss": 0.0812, "lr": "4.991e-05", "step": 1078, "steps": "23.29s,1078/33195" }, { "epoch": 0.16252447657779787, "eta": "207:14:14", "grad_norm": 0.0073, "loss": 0.0532, "lr": "4.991e-05", "step": 1079, "steps": "23.23s,1079/33195" }, { "epoch": 0.16267510167193855, "eta": "207:03:09", "grad_norm": 0.0062, "loss": 0.0483, "lr": "4.991e-05", "step": 1080, "steps": "23.21s,1080/33195" }, { "epoch": 0.16282572676607923, "eta": "204:22:11", "grad_norm": 0.0085, "loss": 0.0672, "lr": "4.991e-05", "step": 1081, "steps": "22.91s,1081/33195" }, { "epoch": 0.16297635186021991, "eta": "210:04:21", "grad_norm": 0.0081, "loss": 0.06, "lr": "4.991e-05", "step": 1082, "steps": "23.55s,1082/33195" }, { "epoch": 0.1631269769543606, "eta": "206:35:13", "grad_norm": 0.0068, "loss": 0.0402, "lr": "4.991e-05", "step": 1083, "steps": "23.16s,1083/33195" }, { "epoch": 0.16327760204850128, "eta": "206:45:32", "grad_norm": 0.0084, "loss": 0.0573, "lr": "4.991e-05", "step": 1084, "steps": "23.18s,1084/33195" }, { "epoch": 0.16342822714264196, "eta": "208:32:11", "grad_norm": 0.007, "loss": 0.0517, "lr": "4.991e-05", "step": 1085, "steps": "23.38s,1085/33195" }, { "epoch": 0.16357885223678265, "eta": "208:31:48", "grad_norm": 0.0107, "loss": 0.0365, "lr": "4.991e-05", "step": 1086, "steps": "23.38s,1086/33195" }, { "epoch": 0.16372947733092333, "eta": "206:49:44", "grad_norm": 0.0082, "loss": 0.0831, "lr": "4.991e-05", "step": 1087, "steps": "23.19s,1087/33195" }, { "epoch": 0.163880102425064, "eta": "210:28:45", "grad_norm": 0.0085, "loss": 0.0418, "lr": "4.991e-05", "step": 1088, "steps": "23.6s,1088/33195" }, { "epoch": 0.1640307275192047, "eta": "207:15:43", "grad_norm": 0.0085, "loss": 0.0412, "lr": "4.991e-05", "step": 1089, "steps": "23.24s,1089/33195" }, { "epoch": 0.16418135261334538, "eta": "207:42:05", "grad_norm": 0.0075, "loss": 0.0439, "lr": "4.991e-05", "step": 1090, "steps": "23.29s,1090/33195" }, { "epoch": 0.16433197770748606, "eta": "207:57:45", "grad_norm": 0.0114, "loss": 0.0613, "lr": "4.991e-05", "step": 1091, "steps": "23.32s,1091/33195" }, { "epoch": 0.16448260280162674, "eta": "206:37:06", "grad_norm": 0.0088, "loss": 0.068, "lr": "4.991e-05", "step": 1092, "steps": "23.17s,1092/33195" }, { "epoch": 0.16463322789576743, "eta": "207:46:16", "grad_norm": 0.0077, "loss": 0.036, "lr": "4.991e-05", "step": 1093, "steps": "23.3s,1093/33195" }, { "epoch": 0.1647838529899081, "eta": "206:09:35", "grad_norm": 0.0095, "loss": 0.0381, "lr": "4.991e-05", "step": 1094, "steps": "23.12s,1094/33195" }, { "epoch": 0.1649344780840488, "eta": "206:30:36", "grad_norm": 0.0056, "loss": 0.0556, "lr": "4.991e-05", "step": 1095, "steps": "23.16s,1095/33195" }, { "epoch": 0.16508510317818947, "eta": "206:56:57", "grad_norm": 0.0116, "loss": 0.0423, "lr": "4.991e-05", "step": 1096, "steps": "23.21s,1096/33195" }, { "epoch": 0.16523572827233016, "eta": "206:56:34", "grad_norm": 0.0073, "loss": 0.0554, "lr": "4.991e-05", "step": 1097, "steps": "23.21s,1097/33195" }, { "epoch": 0.16538635336647087, "eta": "206:56:11", "grad_norm": 0.0078, "loss": 0.0425, "lr": "4.991e-05", "step": 1098, "steps": "23.21s,1098/33195" }, { "epoch": 0.16553697846061155, "eta": "207:33:14", "grad_norm": 0.0073, "loss": 0.0406, "lr": "4.991e-05", "step": 1099, "steps": "23.28s,1099/33195" }, { "epoch": 0.16568760355475223, "eta": "210:02:38", "grad_norm": 0.0061, "loss": 0.0839, "lr": "4.991e-05", "step": 1100, "steps": "23.56s,1100/33195" }, { "epoch": 0.16583822864889292, "eta": "204:57:21", "grad_norm": 0.0082, "loss": 0.0517, "lr": "4.991e-05", "step": 1101, "steps": "22.99s,1101/33195" }, { "epoch": 0.1659888537430336, "eta": "206:43:56", "grad_norm": 0.009, "loss": 0.0535, "lr": "4.991e-05", "step": 1102, "steps": "23.19s,1102/33195" }, { "epoch": 0.16613947883717428, "eta": "210:33:33", "grad_norm": 0.0099, "loss": 0.0356, "lr": "4.991e-05", "step": 1103, "steps": "23.62s,1103/33195" }, { "epoch": 0.16629010393131496, "eta": "208:08:44", "grad_norm": 0.0081, "loss": 0.0618, "lr": "4.991e-05", "step": 1104, "steps": "23.35s,1104/33195" }, { "epoch": 0.16644072902545565, "eta": "206:21:23", "grad_norm": 0.0098, "loss": 0.032, "lr": "4.991e-05", "step": 1105, "steps": "23.15s,1105/33195" }, { "epoch": 0.16659135411959633, "eta": "207:41:13", "grad_norm": 0.0078, "loss": 0.0711, "lr": "4.991e-05", "step": 1106, "steps": "23.3s,1106/33195" }, { "epoch": 0.166741979213737, "eta": "206:58:03", "grad_norm": 0.0085, "loss": 0.0444, "lr": "4.991e-05", "step": 1107, "steps": "23.22s,1107/33195" }, { "epoch": 0.1668926043078777, "eta": "210:58:19", "grad_norm": 0.0074, "loss": 0.0648, "lr": "4.991e-05", "step": 1108, "steps": "23.67s,1108/33195" }, { "epoch": 0.16704322940201838, "eta": "210:15:08", "grad_norm": 0.0068, "loss": 0.0543, "lr": "4.991e-05", "step": 1109, "steps": "23.59s,1109/33195" }, { "epoch": 0.16719385449615906, "eta": "210:14:45", "grad_norm": 0.0092, "loss": 0.0613, "lr": "4.991e-05", "step": 1110, "steps": "23.59s,1110/33195" }, { "epoch": 0.16734447959029974, "eta": "206:13:43", "grad_norm": 0.0087, "loss": 0.0632, "lr": "4.991e-05", "step": 1111, "steps": "23.14s,1111/33195" }, { "epoch": 0.16749510468444043, "eta": "209:04:27", "grad_norm": 0.0071, "loss": 0.0609, "lr": "4.991e-05", "step": 1112, "steps": "23.46s,1112/33195" }, { "epoch": 0.1676457297785811, "eta": "205:46:13", "grad_norm": 0.0079, "loss": 0.0544, "lr": "4.991e-05", "step": 1113, "steps": "23.09s,1113/33195" }, { "epoch": 0.1677963548727218, "eta": "206:12:34", "grad_norm": 0.0091, "loss": 0.0404, "lr": "4.991e-05", "step": 1114, "steps": "23.14s,1114/33195" }, { "epoch": 0.16794697996686248, "eta": "206:54:57", "grad_norm": 0.0065, "loss": 0.0423, "lr": "4.991e-05", "step": 1115, "steps": "23.22s,1115/33195" }, { "epoch": 0.16809760506100316, "eta": "207:37:20", "grad_norm": 0.0079, "loss": 0.0561, "lr": "4.991e-05", "step": 1116, "steps": "23.3s,1116/33195" }, { "epoch": 0.16824823015514384, "eta": "207:10:13", "grad_norm": 0.0083, "loss": 0.0422, "lr": "4.991e-05", "step": 1117, "steps": "23.25s,1117/33195" }, { "epoch": 0.16839885524928452, "eta": "208:30:01", "grad_norm": 0.0099, "loss": 0.0754, "lr": "4.991e-05", "step": 1118, "steps": "23.4s,1118/33195" }, { "epoch": 0.1685494803434252, "eta": "206:26:40", "grad_norm": 0.0097, "loss": 0.0476, "lr": "4.991e-05", "step": 1119, "steps": "23.17s,1119/33195" }, { "epoch": 0.1687001054375659, "eta": "207:19:45", "grad_norm": 0.007, "loss": 0.0471, "lr": "4.990e-05", "step": 1120, "steps": "23.27s,1120/33195" }, { "epoch": 0.16885073053170657, "eta": "209:16:58", "grad_norm": 0.0069, "loss": 0.0405, "lr": "4.990e-05", "step": 1121, "steps": "23.49s,1121/33195" }, { "epoch": 0.16900135562584725, "eta": "205:26:43", "grad_norm": 0.0063, "loss": 0.0631, "lr": "4.990e-05", "step": 1122, "steps": "23.06s,1122/33195" }, { "epoch": 0.16915198071998794, "eta": "206:57:12", "grad_norm": 0.0093, "loss": 0.0473, "lr": "4.990e-05", "step": 1123, "steps": "23.23s,1123/33195" }, { "epoch": 0.16930260581412862, "eta": "209:42:31", "grad_norm": 0.0081, "loss": 0.0683, "lr": "4.990e-05", "step": 1124, "steps": "23.54s,1124/33195" }, { "epoch": 0.16945323090826933, "eta": "210:40:55", "grad_norm": 0.0098, "loss": 0.0481, "lr": "4.990e-05", "step": 1125, "steps": "23.65s,1125/33195" }, { "epoch": 0.16960385600241001, "eta": "206:02:35", "grad_norm": 0.0112, "loss": 0.041, "lr": "4.990e-05", "step": 1126, "steps": "23.13s,1126/33195" }, { "epoch": 0.1697544810965507, "eta": "206:07:33", "grad_norm": 0.0073, "loss": 0.0665, "lr": "4.990e-05", "step": 1127, "steps": "23.14s,1127/33195" }, { "epoch": 0.16990510619069138, "eta": "204:52:21", "grad_norm": 0.0134, "loss": 0.0392, "lr": "4.990e-05", "step": 1128, "steps": "23.0s,1128/33195" }, { "epoch": 0.17005573128483206, "eta": "208:41:46", "grad_norm": 0.0089, "loss": 0.0628, "lr": "4.990e-05", "step": 1129, "steps": "23.43s,1129/33195" }, { "epoch": 0.17020635637897275, "eta": "206:27:46", "grad_norm": 0.0087, "loss": 0.0658, "lr": "4.990e-05", "step": 1130, "steps": "23.18s,1130/33195" }, { "epoch": 0.17035698147311343, "eta": "206:22:02", "grad_norm": 0.0081, "loss": 0.0372, "lr": "4.990e-05", "step": 1131, "steps": "23.17s,1131/33195" }, { "epoch": 0.1705076065672541, "eta": "205:22:52", "grad_norm": 0.0078, "loss": 0.0506, "lr": "4.990e-05", "step": 1132, "steps": "23.06s,1132/33195" }, { "epoch": 0.1706582316613948, "eta": "209:44:20", "grad_norm": 0.0066, "loss": 0.0514, "lr": "4.990e-05", "step": 1133, "steps": "23.55s,1133/33195" }, { "epoch": 0.17080885675553548, "eta": "206:36:55", "grad_norm": 0.0067, "loss": 0.0387, "lr": "4.990e-05", "step": 1134, "steps": "23.2s,1134/33195" }, { "epoch": 0.17095948184967616, "eta": "206:52:33", "grad_norm": 0.0088, "loss": 0.0529, "lr": "4.990e-05", "step": 1135, "steps": "23.23s,1135/33195" }, { "epoch": 0.17111010694381684, "eta": "206:52:10", "grad_norm": 0.0076, "loss": 0.0529, "lr": "4.990e-05", "step": 1136, "steps": "23.23s,1136/33195" }, { "epoch": 0.17126073203795752, "eta": "204:59:35", "grad_norm": 0.0063, "loss": 0.0486, "lr": "4.990e-05", "step": 1137, "steps": "23.02s,1137/33195" }, { "epoch": 0.1714113571320982, "eta": "206:19:20", "grad_norm": 0.0079, "loss": 0.0466, "lr": "4.990e-05", "step": 1138, "steps": "23.17s,1138/33195" }, { "epoch": 0.1715619822262389, "eta": "207:07:02", "grad_norm": 0.0087, "loss": 0.0733, "lr": "4.990e-05", "step": 1139, "steps": "23.26s,1139/33195" }, { "epoch": 0.17171260732037957, "eta": "208:58:50", "grad_norm": 0.0101, "loss": 0.043, "lr": "4.990e-05", "step": 1140, "steps": "23.47s,1140/33195" }, { "epoch": 0.17186323241452026, "eta": "210:34:37", "grad_norm": 0.0079, "loss": 0.0616, "lr": "4.990e-05", "step": 1141, "steps": "23.65s,1141/33195" }, { "epoch": 0.17201385750866094, "eta": "206:17:48", "grad_norm": 0.0084, "loss": 0.0592, "lr": "4.990e-05", "step": 1142, "steps": "23.17s,1142/33195" }, { "epoch": 0.17216448260280162, "eta": "214:34:13", "grad_norm": 0.0091, "loss": 0.0471, "lr": "4.990e-05", "step": 1143, "steps": "24.1s,1143/33195" }, { "epoch": 0.1723151076969423, "eta": "206:06:20", "grad_norm": 0.0094, "loss": 0.0562, "lr": "4.990e-05", "step": 1144, "steps": "23.15s,1144/33195" }, { "epoch": 0.172465732791083, "eta": "209:02:14", "grad_norm": 0.0071, "loss": 0.0293, "lr": "4.990e-05", "step": 1145, "steps": "23.48s,1145/33195" }, { "epoch": 0.17261635788522367, "eta": "206:42:57", "grad_norm": 0.0064, "loss": 0.0483, "lr": "4.990e-05", "step": 1146, "steps": "23.22s,1146/33195" }, { "epoch": 0.17276698297936435, "eta": "207:19:57", "grad_norm": 0.008, "loss": 0.0339, "lr": "4.990e-05", "step": 1147, "steps": "23.29s,1147/33195" }, { "epoch": 0.17291760807350504, "eta": "210:10:29", "grad_norm": 0.0074, "loss": 0.0304, "lr": "4.990e-05", "step": 1148, "steps": "23.61s,1148/33195" }, { "epoch": 0.17306823316764572, "eta": "209:38:03", "grad_norm": 0.0061, "loss": 0.0555, "lr": "4.990e-05", "step": 1149, "steps": "23.55s,1149/33195" }, { "epoch": 0.1732188582617864, "eta": "205:37:19", "grad_norm": 0.009, "loss": 0.0462, "lr": "4.990e-05", "step": 1150, "steps": "23.1s,1150/33195" }, { "epoch": 0.17336948335592708, "eta": "210:03:58", "grad_norm": 0.0087, "loss": 0.0345, "lr": "4.990e-05", "step": 1151, "steps": "23.6s,1151/33195" }, { "epoch": 0.1735201084500678, "eta": "203:39:03", "grad_norm": 0.0077, "loss": 0.0925, "lr": "4.990e-05", "step": 1152, "steps": "22.88s,1152/33195" }, { "epoch": 0.17367073354420848, "eta": "210:29:53", "grad_norm": 0.0131, "loss": 0.0404, "lr": "4.990e-05", "step": 1153, "steps": "23.65s,1153/33195" }, { "epoch": 0.17382135863834916, "eta": "206:50:32", "grad_norm": 0.007, "loss": 0.0538, "lr": "4.990e-05", "step": 1154, "steps": "23.24s,1154/33195" }, { "epoch": 0.17397198373248984, "eta": "207:27:32", "grad_norm": 0.0074, "loss": 0.0579, "lr": "4.990e-05", "step": 1155, "steps": "23.31s,1155/33195" }, { "epoch": 0.17412260882663053, "eta": "209:13:56", "grad_norm": 0.008, "loss": 0.0695, "lr": "4.990e-05", "step": 1156, "steps": "23.51s,1156/33195" }, { "epoch": 0.1742732339207712, "eta": "206:38:42", "grad_norm": 0.0087, "loss": 0.0476, "lr": "4.990e-05", "step": 1157, "steps": "23.22s,1157/33195" }, { "epoch": 0.1744238590149119, "eta": "207:05:00", "grad_norm": 0.0089, "loss": 0.0459, "lr": "4.990e-05", "step": 1158, "steps": "23.27s,1158/33195" }, { "epoch": 0.17457448410905257, "eta": "205:33:51", "grad_norm": 0.0079, "loss": 0.0552, "lr": "4.990e-05", "step": 1159, "steps": "23.1s,1159/33195" }, { "epoch": 0.17472510920319326, "eta": "210:16:27", "grad_norm": 0.009, "loss": 0.0601, "lr": "4.990e-05", "step": 1160, "steps": "23.63s,1160/33195" }, { "epoch": 0.17487573429733394, "eta": "206:26:28", "grad_norm": 0.0087, "loss": 0.0414, "lr": "4.990e-05", "step": 1161, "steps": "23.2s,1161/33195" }, { "epoch": 0.17502635939147462, "eta": "206:58:07", "grad_norm": 0.0079, "loss": 0.0559, "lr": "4.990e-05", "step": 1162, "steps": "23.26s,1162/33195" }, { "epoch": 0.1751769844856153, "eta": "206:31:02", "grad_norm": 0.0075, "loss": 0.0689, "lr": "4.990e-05", "step": 1163, "steps": "23.21s,1163/33195" }, { "epoch": 0.175327609579756, "eta": "203:39:49", "grad_norm": 0.008, "loss": 0.074, "lr": "4.990e-05", "step": 1164, "steps": "22.89s,1164/33195" }, { "epoch": 0.17547823467389667, "eta": "206:51:37", "grad_norm": 0.0074, "loss": 0.0327, "lr": "4.990e-05", "step": 1165, "steps": "23.25s,1165/33195" }, { "epoch": 0.17562885976803735, "eta": "206:45:53", "grad_norm": 0.0079, "loss": 0.0468, "lr": "4.990e-05", "step": 1166, "steps": "23.24s,1166/33195" }, { "epoch": 0.17577948486217804, "eta": "206:29:29", "grad_norm": 0.0066, "loss": 0.0474, "lr": "4.989e-05", "step": 1167, "steps": "23.21s,1167/33195" }, { "epoch": 0.17593010995631872, "eta": "206:34:26", "grad_norm": 0.0074, "loss": 0.0695, "lr": "4.989e-05", "step": 1168, "steps": "23.22s,1168/33195" }, { "epoch": 0.1760807350504594, "eta": "205:08:39", "grad_norm": 0.0077, "loss": 0.058, "lr": "4.989e-05", "step": 1169, "steps": "23.06s,1169/33195" }, { "epoch": 0.17623136014460009, "eta": "208:57:47", "grad_norm": 0.0106, "loss": 0.0334, "lr": "4.989e-05", "step": 1170, "steps": "23.49s,1170/33195" }, { "epoch": 0.17638198523874077, "eta": "206:22:36", "grad_norm": 0.0071, "loss": 0.0526, "lr": "4.989e-05", "step": 1171, "steps": "23.2s,1171/33195" }, { "epoch": 0.17653261033288145, "eta": "208:51:40", "grad_norm": 0.0091, "loss": 0.0508, "lr": "4.989e-05", "step": 1172, "steps": "23.48s,1172/33195" }, { "epoch": 0.17668323542702213, "eta": "209:44:38", "grad_norm": 0.0085, "loss": 0.0513, "lr": "4.989e-05", "step": 1173, "steps": "23.58s,1173/33195" }, { "epoch": 0.17683386052116282, "eta": "205:01:24", "grad_norm": 0.0059, "loss": 0.0655, "lr": "4.989e-05", "step": 1174, "steps": "23.05s,1174/33195" }, { "epoch": 0.1769844856153035, "eta": "206:53:05", "grad_norm": 0.0107, "loss": 0.0317, "lr": "4.989e-05", "step": 1175, "steps": "23.26s,1175/33195" }, { "epoch": 0.17713511070944418, "eta": "207:08:42", "grad_norm": 0.0098, "loss": 0.0529, "lr": "4.989e-05", "step": 1176, "steps": "23.29s,1176/33195" }, { "epoch": 0.17728573580358487, "eta": "209:21:43", "grad_norm": 0.0104, "loss": 0.0497, "lr": "4.989e-05", "step": 1177, "steps": "23.54s,1177/33195" }, { "epoch": 0.17743636089772555, "eta": "204:38:31", "grad_norm": 0.0061, "loss": 0.0762, "lr": "4.989e-05", "step": 1178, "steps": "23.01s,1178/33195" }, { "epoch": 0.17758698599186626, "eta": "209:42:17", "grad_norm": 0.0067, "loss": 0.0592, "lr": "4.989e-05", "step": 1179, "steps": "23.58s,1179/33195" }, { "epoch": 0.17773761108600694, "eta": "210:03:14", "grad_norm": 0.0062, "loss": 0.0637, "lr": "4.989e-05", "step": 1180, "steps": "23.62s,1180/33195" }, { "epoch": 0.17788823618014762, "eta": "203:06:39", "grad_norm": 0.0077, "loss": 0.0367, "lr": "4.989e-05", "step": 1181, "steps": "22.84s,1181/33195" }, { "epoch": 0.1780388612742883, "eta": "208:37:04", "grad_norm": 0.0095, "loss": 0.0612, "lr": "4.989e-05", "step": 1182, "steps": "23.46s,1182/33195" }, { "epoch": 0.178189486368429, "eta": "208:36:41", "grad_norm": 0.0081, "loss": 0.0519, "lr": "4.989e-05", "step": 1183, "steps": "23.46s,1183/33195" }, { "epoch": 0.17834011146256967, "eta": "204:57:33", "grad_norm": 0.012, "loss": 0.0425, "lr": "4.989e-05", "step": 1184, "steps": "23.05s,1184/33195" }, { "epoch": 0.17849073655671036, "eta": "206:33:12", "grad_norm": 0.0086, "loss": 0.073, "lr": "4.989e-05", "step": 1185, "steps": "23.23s,1185/33195" }, { "epoch": 0.17864136165085104, "eta": "206:43:29", "grad_norm": 0.0067, "loss": 0.0587, "lr": "4.989e-05", "step": 1186, "steps": "23.25s,1186/33195" }, { "epoch": 0.17879198674499172, "eta": "208:29:47", "grad_norm": 0.0074, "loss": 0.0385, "lr": "4.989e-05", "step": 1187, "steps": "23.45s,1187/33195" }, { "epoch": 0.1789426118391324, "eta": "205:06:41", "grad_norm": 0.0069, "loss": 0.0458, "lr": "4.989e-05", "step": 1188, "steps": "23.07s,1188/33195" }, { "epoch": 0.1790932369332731, "eta": "209:59:41", "grad_norm": 0.0072, "loss": 0.0371, "lr": "4.989e-05", "step": 1189, "steps": "23.62s,1189/33195" }, { "epoch": 0.17924386202741377, "eta": "205:53:55", "grad_norm": 0.0101, "loss": 0.0414, "lr": "4.989e-05", "step": 1190, "steps": "23.16s,1190/33195" }, { "epoch": 0.17939448712155445, "eta": "206:20:12", "grad_norm": 0.0058, "loss": 0.0392, "lr": "4.989e-05", "step": 1191, "steps": "23.21s,1191/33195" }, { "epoch": 0.17954511221569514, "eta": "206:19:49", "grad_norm": 0.0086, "loss": 0.0426, "lr": "4.989e-05", "step": 1192, "steps": "23.21s,1192/33195" }, { "epoch": 0.17969573730983582, "eta": "204:48:46", "grad_norm": 0.008, "loss": 0.0383, "lr": "4.989e-05", "step": 1193, "steps": "23.04s,1193/33195" }, { "epoch": 0.1798463624039765, "eta": "205:57:43", "grad_norm": 0.0076, "loss": 0.0687, "lr": "4.989e-05", "step": 1194, "steps": "23.17s,1194/33195" }, { "epoch": 0.17999698749811718, "eta": "208:21:20", "grad_norm": 0.0085, "loss": 0.0619, "lr": "4.989e-05", "step": 1195, "steps": "23.44s,1195/33195" }, { "epoch": 0.18014761259225787, "eta": "206:07:36", "grad_norm": 0.0085, "loss": 0.041, "lr": "4.989e-05", "step": 1196, "steps": "23.19s,1196/33195" }, { "epoch": 0.18029823768639855, "eta": "208:47:13", "grad_norm": 0.0099, "loss": 0.0407, "lr": "4.989e-05", "step": 1197, "steps": "23.49s,1197/33195" }, { "epoch": 0.18044886278053923, "eta": "210:06:49", "grad_norm": 0.0065, "loss": 0.0585, "lr": "4.989e-05", "step": 1198, "steps": "23.64s,1198/33195" }, { "epoch": 0.18059948787467991, "eta": "216:51:42", "grad_norm": 0.0073, "loss": 0.045, "lr": "4.989e-05", "step": 1199, "steps": "24.4s,1199/33195" }, { "epoch": 0.1807501129688206, "eta": "206:54:03", "grad_norm": 0.0067, "loss": 0.0488, "lr": "4.989e-05", "step": 1200, "steps": "23.28s,1200/33195" }, { "epoch": 0.18090073806296128, "eta": "374:03:47", "grad_norm": 0.0069, "loss": 0.0435, "lr": "4.989e-05", "step": 1201, "steps": "42.09s,1201/33195" }, { "epoch": 0.18105136315710196, "eta": "209:11:55", "grad_norm": 0.0101, "loss": 0.0347, "lr": "4.989e-05", "step": 1202, "steps": "23.54s,1202/33195" }, { "epoch": 0.18120198825124265, "eta": "205:43:34", "grad_norm": 0.0081, "loss": 0.0655, "lr": "4.989e-05", "step": 1203, "steps": "23.15s,1203/33195" }, { "epoch": 0.18135261334538333, "eta": "206:09:51", "grad_norm": 0.0082, "loss": 0.0551, "lr": "4.989e-05", "step": 1204, "steps": "23.2s,1204/33195" }, { "epoch": 0.181503238439524, "eta": "206:52:07", "grad_norm": 0.0059, "loss": 0.0445, "lr": "4.989e-05", "step": 1205, "steps": "23.28s,1205/33195" }, { "epoch": 0.18165386353366472, "eta": "205:47:45", "grad_norm": 0.0069, "loss": 0.0596, "lr": "4.989e-05", "step": 1206, "steps": "23.16s,1206/33195" }, { "epoch": 0.1818044886278054, "eta": "205:36:42", "grad_norm": 0.0068, "loss": 0.0417, "lr": "4.989e-05", "step": 1207, "steps": "23.14s,1207/33195" }, { "epoch": 0.1819551137219461, "eta": "206:13:38", "grad_norm": 0.0081, "loss": 0.0476, "lr": "4.989e-05", "step": 1208, "steps": "23.21s,1208/33195" }, { "epoch": 0.18210573881608677, "eta": "209:35:49", "grad_norm": 0.0088, "loss": 0.0371, "lr": "4.989e-05", "step": 1209, "steps": "23.59s,1209/33195" }, { "epoch": 0.18225636391022745, "eta": "206:02:12", "grad_norm": 0.0078, "loss": 0.0479, "lr": "4.989e-05", "step": 1210, "steps": "23.19s,1210/33195" }, { "epoch": 0.18240698900436814, "eta": "208:15:04", "grad_norm": 0.0101, "loss": 0.0381, "lr": "4.989e-05", "step": 1211, "steps": "23.44s,1211/33195" }, { "epoch": 0.18255761409850882, "eta": "208:25:21", "grad_norm": 0.0077, "loss": 0.0796, "lr": "4.988e-05", "step": 1212, "steps": "23.46s,1212/33195" }, { "epoch": 0.1827082391926495, "eta": "206:54:20", "grad_norm": 0.0084, "loss": 0.0446, "lr": "4.988e-05", "step": 1213, "steps": "23.29s,1213/33195" }, { "epoch": 0.18285886428679018, "eta": "208:35:13", "grad_norm": 0.0086, "loss": 0.0678, "lr": "4.988e-05", "step": 1214, "steps": "23.48s,1214/33195" }, { "epoch": 0.18300948938093087, "eta": "206:00:16", "grad_norm": 0.0094, "loss": 0.0526, "lr": "4.988e-05", "step": 1215, "steps": "23.19s,1215/33195" }, { "epoch": 0.18316011447507155, "eta": "206:53:10", "grad_norm": 0.0075, "loss": 0.0247, "lr": "4.988e-05", "step": 1216, "steps": "23.29s,1216/33195" }, { "epoch": 0.18331073956921223, "eta": "205:54:10", "grad_norm": 0.0064, "loss": 0.0668, "lr": "4.988e-05", "step": 1217, "steps": "23.18s,1217/33195" }, { "epoch": 0.18346136466335292, "eta": "202:52:34", "grad_norm": 0.0088, "loss": 0.058, "lr": "4.988e-05", "step": 1218, "steps": "22.84s,1218/33195" }, { "epoch": 0.1836119897574936, "eta": "209:26:34", "grad_norm": 0.0089, "loss": 0.0463, "lr": "4.988e-05", "step": 1219, "steps": "23.58s,1219/33195" }, { "epoch": 0.18376261485163428, "eta": "205:47:40", "grad_norm": 0.0082, "loss": 0.0633, "lr": "4.988e-05", "step": 1220, "steps": "23.17s,1220/33195" }, { "epoch": 0.18391323994577496, "eta": "209:15:07", "grad_norm": 0.0074, "loss": 0.0733, "lr": "4.988e-05", "step": 1221, "steps": "23.56s,1221/33195" }, { "epoch": 0.18406386503991565, "eta": "208:48:05", "grad_norm": 0.0084, "loss": 0.0512, "lr": "4.988e-05", "step": 1222, "steps": "23.51s,1222/33195" }, { "epoch": 0.18421449013405633, "eta": "209:46:18", "grad_norm": 0.0075, "loss": 0.0694, "lr": "4.988e-05", "step": 1223, "steps": "23.62s,1223/33195" }, { "epoch": 0.184365115228197, "eta": "205:46:08", "grad_norm": 0.0074, "loss": 0.0549, "lr": "4.988e-05", "step": 1224, "steps": "23.17s,1224/33195" }, { "epoch": 0.1845157403223377, "eta": "207:00:20", "grad_norm": 0.0109, "loss": 0.0452, "lr": "4.988e-05", "step": 1225, "steps": "23.31s,1225/33195" }, { "epoch": 0.18466636541647838, "eta": "206:06:40", "grad_norm": 0.0079, "loss": 0.064, "lr": "4.988e-05", "step": 1226, "steps": "23.21s,1226/33195" }, { "epoch": 0.18481699051061906, "eta": "205:02:21", "grad_norm": 0.0098, "loss": 0.0541, "lr": "4.988e-05", "step": 1227, "steps": "23.09s,1227/33195" }, { "epoch": 0.18496761560475974, "eta": "208:08:26", "grad_norm": 0.0082, "loss": 0.0538, "lr": "4.988e-05", "step": 1228, "steps": "23.44s,1228/33195" }, { "epoch": 0.18511824069890043, "eta": "206:48:08", "grad_norm": 0.0072, "loss": 0.0488, "lr": "4.988e-05", "step": 1229, "steps": "23.29s,1229/33195" }, { "epoch": 0.1852688657930411, "eta": "207:51:40", "grad_norm": 0.0074, "loss": 0.0329, "lr": "4.988e-05", "step": 1230, "steps": "23.41s,1230/33195" }, { "epoch": 0.1854194908871818, "eta": "204:44:49", "grad_norm": 0.0065, "loss": 0.0377, "lr": "4.988e-05", "step": 1231, "steps": "23.06s,1231/33195" }, { "epoch": 0.18557011598132248, "eta": "206:57:37", "grad_norm": 0.0108, "loss": 0.0548, "lr": "4.988e-05", "step": 1232, "steps": "23.31s,1232/33195" }, { "epoch": 0.18572074107546319, "eta": "204:22:45", "grad_norm": 0.0078, "loss": 0.0673, "lr": "4.988e-05", "step": 1233, "steps": "23.02s,1233/33195" }, { "epoch": 0.18587136616960387, "eta": "205:58:15", "grad_norm": 0.0069, "loss": 0.0481, "lr": "4.988e-05", "step": 1234, "steps": "23.2s,1234/33195" }, { "epoch": 0.18602199126374455, "eta": "203:23:23", "grad_norm": 0.0072, "loss": 0.054, "lr": "4.988e-05", "step": 1235, "steps": "22.91s,1235/33195" }, { "epoch": 0.18617261635788523, "eta": "206:45:25", "grad_norm": 0.0077, "loss": 0.0507, "lr": "4.988e-05", "step": 1236, "steps": "23.29s,1236/33195" }, { "epoch": 0.18632324145202592, "eta": "209:03:30", "grad_norm": 0.0067, "loss": 0.0643, "lr": "4.988e-05", "step": 1237, "steps": "23.55s,1237/33195" }, { "epoch": 0.1864738665461666, "eta": "207:48:33", "grad_norm": 0.0093, "loss": 0.0495, "lr": "4.988e-05", "step": 1238, "steps": "23.41s,1238/33195" }, { "epoch": 0.18662449164030728, "eta": "205:56:19", "grad_norm": 0.0079, "loss": 0.0538, "lr": "4.988e-05", "step": 1239, "steps": "23.2s,1239/33195" }, { "epoch": 0.18677511673444797, "eta": "205:55:56", "grad_norm": 0.0064, "loss": 0.0509, "lr": "4.988e-05", "step": 1240, "steps": "23.2s,1240/33195" }, { "epoch": 0.18692574182858865, "eta": "207:36:44", "grad_norm": 0.0062, "loss": 0.055, "lr": "4.988e-05", "step": 1241, "steps": "23.39s,1241/33195" }, { "epoch": 0.18707636692272933, "eta": "207:25:41", "grad_norm": 0.0078, "loss": 0.0542, "lr": "4.988e-05", "step": 1242, "steps": "23.37s,1242/33195" }, { "epoch": 0.18722699201687001, "eta": "207:14:39", "grad_norm": 0.007, "loss": 0.0378, "lr": "4.988e-05", "step": 1243, "steps": "23.35s,1243/33195" }, { "epoch": 0.1873776171110107, "eta": "208:39:28", "grad_norm": 0.0071, "loss": 0.0315, "lr": "4.988e-05", "step": 1244, "steps": "23.51s,1244/33195" }, { "epoch": 0.18752824220515138, "eta": "204:28:48", "grad_norm": 0.0071, "loss": 0.0546, "lr": "4.988e-05", "step": 1245, "steps": "23.04s,1245/33195" }, { "epoch": 0.18767886729929206, "eta": "205:32:18", "grad_norm": 0.0073, "loss": 0.0643, "lr": "4.988e-05", "step": 1246, "steps": "23.16s,1246/33195" }, { "epoch": 0.18782949239343275, "eta": "206:03:52", "grad_norm": 0.0077, "loss": 0.0502, "lr": "4.988e-05", "step": 1247, "steps": "23.22s,1247/33195" }, { "epoch": 0.18798011748757343, "eta": "206:03:29", "grad_norm": 0.006, "loss": 0.0407, "lr": "4.988e-05", "step": 1248, "steps": "23.22s,1248/33195" }, { "epoch": 0.1881307425817141, "eta": "209:52:02", "grad_norm": 0.0064, "loss": 0.0554, "lr": "4.988e-05", "step": 1249, "steps": "23.65s,1249/33195" }, { "epoch": 0.1882813676758548, "eta": "208:53:05", "grad_norm": 0.0087, "loss": 0.0524, "lr": "4.988e-05", "step": 1250, "steps": "23.54s,1250/33195" }, { "epoch": 0.18843199276999548, "eta": "203:49:13", "grad_norm": 0.0092, "loss": 0.0389, "lr": "4.988e-05", "step": 1251, "steps": "22.97s,1251/33195" }, { "epoch": 0.18858261786413616, "eta": "206:23:14", "grad_norm": 0.0063, "loss": 0.0614, "lr": "4.988e-05", "step": 1252, "steps": "23.26s,1252/33195" }, { "epoch": 0.18873324295827684, "eta": "204:15:04", "grad_norm": 0.0063, "loss": 0.0361, "lr": "4.988e-05", "step": 1253, "steps": "23.02s,1253/33195" }, { "epoch": 0.18888386805241753, "eta": "205:50:31", "grad_norm": 0.0086, "loss": 0.0542, "lr": "4.988e-05", "step": 1254, "steps": "23.2s,1254/33195" }, { "epoch": 0.1890344931465582, "eta": "204:24:57", "grad_norm": 0.007, "loss": 0.0549, "lr": "4.987e-05", "step": 1255, "steps": "23.04s,1255/33195" }, { "epoch": 0.1891851182406989, "eta": "202:59:24", "grad_norm": 0.0089, "loss": 0.0604, "lr": "4.987e-05", "step": 1256, "steps": "22.88s,1256/33195" }, { "epoch": 0.18933574333483957, "eta": "207:30:29", "grad_norm": 0.0139, "loss": 0.0414, "lr": "4.987e-05", "step": 1257, "steps": "23.39s,1257/33195" }, { "epoch": 0.18948636842898026, "eta": "209:16:33", "grad_norm": 0.0079, "loss": 0.08, "lr": "4.987e-05", "step": 1258, "steps": "23.59s,1258/33195" }, { "epoch": 0.18963699352312094, "eta": "207:40:21", "grad_norm": 0.0088, "loss": 0.0463, "lr": "4.987e-05", "step": 1259, "steps": "23.41s,1259/33195" }, { "epoch": 0.18978761861726162, "eta": "207:39:58", "grad_norm": 0.0068, "loss": 0.0375, "lr": "4.987e-05", "step": 1260, "steps": "23.41s,1260/33195" }, { "epoch": 0.18993824371140233, "eta": "202:57:29", "grad_norm": 0.0074, "loss": 0.0592, "lr": "4.987e-05", "step": 1261, "steps": "22.88s,1261/33195" }, { "epoch": 0.19008886880554302, "eta": "210:13:32", "grad_norm": 0.0083, "loss": 0.0432, "lr": "4.987e-05", "step": 1262, "steps": "23.7s,1262/33195" }, { "epoch": 0.1902394938996837, "eta": "208:53:18", "grad_norm": 0.0092, "loss": 0.0535, "lr": "4.987e-05", "step": 1263, "steps": "23.55s,1263/33195" }, { "epoch": 0.19039011899382438, "eta": "208:58:14", "grad_norm": 0.0088, "loss": 0.0566, "lr": "4.987e-05", "step": 1264, "steps": "23.56s,1264/33195" }, { "epoch": 0.19054074408796506, "eta": "207:48:39", "grad_norm": 0.0085, "loss": 0.0736, "lr": "4.987e-05", "step": 1265, "steps": "23.43s,1265/33195" }, { "epoch": 0.19069136918210575, "eta": "202:39:37", "grad_norm": 0.0084, "loss": 0.0471, "lr": "4.987e-05", "step": 1266, "steps": "22.85s,1266/33195" }, { "epoch": 0.19084199427624643, "eta": "206:33:23", "grad_norm": 0.0084, "loss": 0.0723, "lr": "4.987e-05", "step": 1267, "steps": "23.29s,1267/33195" }, { "epoch": 0.1909926193703871, "eta": "206:22:21", "grad_norm": 0.0077, "loss": 0.0309, "lr": "4.987e-05", "step": 1268, "steps": "23.27s,1268/33195" }, { "epoch": 0.1911432444645278, "eta": "205:18:06", "grad_norm": 0.0084, "loss": 0.068, "lr": "4.987e-05", "step": 1269, "steps": "23.15s,1269/33195" }, { "epoch": 0.19129386955866848, "eta": "209:22:29", "grad_norm": 0.0069, "loss": 0.058, "lr": "4.987e-05", "step": 1270, "steps": "23.61s,1270/33195" }, { "epoch": 0.19144449465280916, "eta": "203:46:53", "grad_norm": 0.0067, "loss": 0.0562, "lr": "4.987e-05", "step": 1271, "steps": "22.98s,1271/33195" }, { "epoch": 0.19159511974694984, "eta": "205:59:31", "grad_norm": 0.0081, "loss": 0.0478, "lr": "4.987e-05", "step": 1272, "steps": "23.23s,1272/33195" }, { "epoch": 0.19174574484109053, "eta": "209:58:32", "grad_norm": 0.0085, "loss": 0.0321, "lr": "4.987e-05", "step": 1273, "steps": "23.68s,1273/33195" }, { "epoch": 0.1918963699352312, "eta": "204:28:18", "grad_norm": 0.0074, "loss": 0.0349, "lr": "4.987e-05", "step": 1274, "steps": "23.06s,1274/33195" }, { "epoch": 0.1920469950293719, "eta": "205:58:21", "grad_norm": 0.014, "loss": 0.0345, "lr": "4.987e-05", "step": 1275, "steps": "23.23s,1275/33195" }, { "epoch": 0.19219762012351257, "eta": "209:09:29", "grad_norm": 0.007, "loss": 0.054, "lr": "4.987e-05", "step": 1276, "steps": "23.59s,1276/33195" }, { "epoch": 0.19234824521765326, "eta": "208:47:48", "grad_norm": 0.0075, "loss": 0.0678, "lr": "4.987e-05", "step": 1277, "steps": "23.55s,1277/33195" }, { "epoch": 0.19249887031179394, "eta": "208:26:08", "grad_norm": 0.0152, "loss": 0.0769, "lr": "4.987e-05", "step": 1278, "steps": "23.51s,1278/33195" }, { "epoch": 0.19264949540593462, "eta": "206:02:07", "grad_norm": 0.0075, "loss": 0.0524, "lr": "4.987e-05", "step": 1279, "steps": "23.24s,1279/33195" }, { "epoch": 0.1928001205000753, "eta": "205:35:08", "grad_norm": 0.0114, "loss": 0.0303, "lr": "4.987e-05", "step": 1280, "steps": "23.19s,1280/33195" }, { "epoch": 0.192950745594216, "eta": "206:06:40", "grad_norm": 0.0085, "loss": 0.0809, "lr": "4.987e-05", "step": 1281, "steps": "23.25s,1281/33195" }, { "epoch": 0.19310137068835667, "eta": "205:02:27", "grad_norm": 0.0076, "loss": 0.0312, "lr": "4.987e-05", "step": 1282, "steps": "23.13s,1282/33195" }, { "epoch": 0.19325199578249735, "eta": "205:49:56", "grad_norm": 0.0086, "loss": 0.0455, "lr": "4.987e-05", "step": 1283, "steps": "23.22s,1283/33195" }, { "epoch": 0.19340262087663804, "eta": "205:12:19", "grad_norm": 0.01, "loss": 0.038, "lr": "4.987e-05", "step": 1284, "steps": "23.15s,1284/33195" }, { "epoch": 0.19355324597077872, "eta": "207:46:10", "grad_norm": 0.0111, "loss": 0.0509, "lr": "4.987e-05", "step": 1285, "steps": "23.44s,1285/33195" }, { "epoch": 0.1937038710649194, "eta": "209:26:49", "grad_norm": 0.0154, "loss": 0.0514, "lr": "4.987e-05", "step": 1286, "steps": "23.63s,1286/33195" }, { "epoch": 0.19385449615906009, "eta": "206:14:59", "grad_norm": 0.0079, "loss": 0.0323, "lr": "4.987e-05", "step": 1287, "steps": "23.27s,1287/33195" }, { "epoch": 0.1940051212532008, "eta": "208:16:54", "grad_norm": 0.0074, "loss": 0.0771, "lr": "4.987e-05", "step": 1288, "steps": "23.5s,1288/33195" }, { "epoch": 0.19415574634734148, "eta": "208:48:25", "grad_norm": 0.0104, "loss": 0.0461, "lr": "4.987e-05", "step": 1289, "steps": "23.56s,1289/33195" }, { "epoch": 0.19430637144148216, "eta": "205:52:33", "grad_norm": 0.0076, "loss": 0.0515, "lr": "4.987e-05", "step": 1290, "steps": "23.23s,1290/33195" }, { "epoch": 0.19445699653562284, "eta": "205:57:28", "grad_norm": 0.0075, "loss": 0.0404, "lr": "4.987e-05", "step": 1291, "steps": "23.24s,1291/33195" }, { "epoch": 0.19460762162976353, "eta": "208:04:42", "grad_norm": 0.0096, "loss": 0.0583, "lr": "4.987e-05", "step": 1292, "steps": "23.48s,1292/33195" }, { "epoch": 0.1947582467239042, "eta": "205:14:10", "grad_norm": 0.0075, "loss": 0.0555, "lr": "4.987e-05", "step": 1293, "steps": "23.16s,1293/33195" }, { "epoch": 0.1949088718180449, "eta": "205:40:22", "grad_norm": 0.0071, "loss": 0.0471, "lr": "4.987e-05", "step": 1294, "steps": "23.21s,1294/33195" }, { "epoch": 0.19505949691218558, "eta": "206:59:44", "grad_norm": 0.006, "loss": 0.0684, "lr": "4.987e-05", "step": 1295, "steps": "23.36s,1295/33195" }, { "epoch": 0.19521012200632626, "eta": "206:00:51", "grad_norm": 0.0092, "loss": 0.0589, "lr": "4.986e-05", "step": 1296, "steps": "23.25s,1296/33195" }, { "epoch": 0.19536074710046694, "eta": "207:46:48", "grad_norm": 0.0106, "loss": 0.0561, "lr": "4.986e-05", "step": 1297, "steps": "23.45s,1297/33195" }, { "epoch": 0.19551137219460762, "eta": "204:13:45", "grad_norm": 0.008, "loss": 0.0639, "lr": "4.986e-05", "step": 1298, "steps": "23.05s,1298/33195" }, { "epoch": 0.1956619972887483, "eta": "207:14:07", "grad_norm": 0.0082, "loss": 0.0668, "lr": "4.986e-05", "step": 1299, "steps": "23.39s,1299/33195" }, { "epoch": 0.195812622382889, "eta": "205:48:40", "grad_norm": 0.0086, "loss": 0.0428, "lr": "4.986e-05", "step": 1300, "steps": "23.23s,1300/33195" }, { "epoch": 0.19596324747702967, "eta": "208:11:49", "grad_norm": 0.0085, "loss": 0.064, "lr": "4.986e-05", "step": 1301, "steps": "23.5s,1301/33195" }, { "epoch": 0.19611387257117036, "eta": "205:21:19", "grad_norm": 0.0072, "loss": 0.0334, "lr": "4.986e-05", "step": 1302, "steps": "23.18s,1302/33195" }, { "epoch": 0.19626449766531104, "eta": "205:31:34", "grad_norm": 0.0069, "loss": 0.0512, "lr": "4.986e-05", "step": 1303, "steps": "23.2s,1303/33195" }, { "epoch": 0.19641512275945172, "eta": "206:24:20", "grad_norm": 0.009, "loss": 0.0242, "lr": "4.986e-05", "step": 1304, "steps": "23.3s,1304/33195" }, { "epoch": 0.1965657478535924, "eta": "206:13:19", "grad_norm": 0.0079, "loss": 0.0713, "lr": "4.986e-05", "step": 1305, "steps": "23.28s,1305/33195" }, { "epoch": 0.1967163729477331, "eta": "207:48:35", "grad_norm": 0.0095, "loss": 0.042, "lr": "4.986e-05", "step": 1306, "steps": "23.46s,1306/33195" }, { "epoch": 0.19686699804187377, "eta": "207:26:56", "grad_norm": 0.0104, "loss": 0.0426, "lr": "4.986e-05", "step": 1307, "steps": "23.42s,1307/33195" }, { "epoch": 0.19701762313601445, "eta": "206:33:24", "grad_norm": 0.0085, "loss": 0.0476, "lr": "4.986e-05", "step": 1308, "steps": "23.32s,1308/33195" }, { "epoch": 0.19716824823015514, "eta": "202:33:52", "grad_norm": 0.0082, "loss": 0.0543, "lr": "4.986e-05", "step": 1309, "steps": "22.87s,1309/33195" }, { "epoch": 0.19731887332429582, "eta": "205:34:10", "grad_norm": 0.0076, "loss": 0.0635, "lr": "4.986e-05", "step": 1310, "steps": "23.21s,1310/33195" }, { "epoch": 0.1974694984184365, "eta": "208:18:31", "grad_norm": 0.0085, "loss": 0.044, "lr": "4.986e-05", "step": 1311, "steps": "23.52s,1311/33195" }, { "epoch": 0.19762012351257718, "eta": "205:49:20", "grad_norm": 0.0101, "loss": 0.0649, "lr": "4.986e-05", "step": 1312, "steps": "23.24s,1312/33195" }, { "epoch": 0.19777074860671787, "eta": "207:40:32", "grad_norm": 0.0078, "loss": 0.0486, "lr": "4.986e-05", "step": 1313, "steps": "23.45s,1313/33195" }, { "epoch": 0.19792137370085855, "eta": "205:11:22", "grad_norm": 0.0078, "loss": 0.0618, "lr": "4.986e-05", "step": 1314, "steps": "23.17s,1314/33195" }, { "epoch": 0.19807199879499926, "eta": "205:16:18", "grad_norm": 0.0143, "loss": 0.0746, "lr": "4.986e-05", "step": 1315, "steps": "23.18s,1315/33195" }, { "epoch": 0.19822262388913994, "eta": "205:53:06", "grad_norm": 0.0062, "loss": 0.0457, "lr": "4.986e-05", "step": 1316, "steps": "23.25s,1316/33195" }, { "epoch": 0.19837324898328063, "eta": "203:02:42", "grad_norm": 0.0065, "loss": 0.0654, "lr": "4.986e-05", "step": 1317, "steps": "22.93s,1317/33195" }, { "epoch": 0.1985238740774213, "eta": "205:57:39", "grad_norm": 0.0094, "loss": 0.0669, "lr": "4.986e-05", "step": 1318, "steps": "23.26s,1318/33195" }, { "epoch": 0.198674499171562, "eta": "205:25:23", "grad_norm": 0.0073, "loss": 0.0502, "lr": "4.986e-05", "step": 1319, "steps": "23.2s,1319/33195" }, { "epoch": 0.19882512426570267, "eta": "205:46:15", "grad_norm": 0.0077, "loss": 0.0648, "lr": "4.986e-05", "step": 1320, "steps": "23.24s,1320/33195" }, { "epoch": 0.19897574935984336, "eta": "206:33:40", "grad_norm": 0.0095, "loss": 0.0551, "lr": "4.986e-05", "step": 1321, "steps": "23.33s,1321/33195" }, { "epoch": 0.19912637445398404, "eta": "205:24:13", "grad_norm": 0.0079, "loss": 0.0532, "lr": "4.986e-05", "step": 1322, "steps": "23.2s,1322/33195" }, { "epoch": 0.19927699954812472, "eta": "204:57:16", "grad_norm": 0.012, "loss": 0.0253, "lr": "4.986e-05", "step": 1323, "steps": "23.15s,1323/33195" }, { "epoch": 0.1994276246422654, "eta": "207:52:11", "grad_norm": 0.0085, "loss": 0.0762, "lr": "4.986e-05", "step": 1324, "steps": "23.48s,1324/33195" }, { "epoch": 0.1995782497364061, "eta": "208:44:54", "grad_norm": 0.0166, "loss": 0.046, "lr": "4.986e-05", "step": 1325, "steps": "23.58s,1325/33195" }, { "epoch": 0.19972887483054677, "eta": "207:56:42", "grad_norm": 0.0106, "loss": 0.0674, "lr": "4.986e-05", "step": 1326, "steps": "23.49s,1326/33195" }, { "epoch": 0.19987949992468745, "eta": "202:00:27", "grad_norm": 0.0097, "loss": 0.0452, "lr": "4.986e-05", "step": 1327, "steps": "22.82s,1327/33195" }, { "epoch": 0.20003012501882814, "eta": "205:48:27", "grad_norm": 0.0073, "loss": 0.0367, "lr": "4.986e-05", "step": 1328, "steps": "23.25s,1328/33195" }, { "epoch": 0.20018075011296882, "eta": "205:37:27", "grad_norm": 0.0089, "loss": 0.049, "lr": "4.986e-05", "step": 1329, "steps": "23.23s,1329/33195" }, { "epoch": 0.2003313752071095, "eta": "208:21:42", "grad_norm": 0.0089, "loss": 0.066, "lr": "4.986e-05", "step": 1330, "steps": "23.54s,1330/33195" }, { "epoch": 0.20048200030125019, "eta": "205:15:26", "grad_norm": 0.0095, "loss": 0.0504, "lr": "4.986e-05", "step": 1331, "steps": "23.19s,1331/33195" }, { "epoch": 0.20063262539539087, "eta": "205:41:36", "grad_norm": 0.0076, "loss": 0.0453, "lr": "4.986e-05", "step": 1332, "steps": "23.24s,1332/33195" }, { "epoch": 0.20078325048953155, "eta": "205:25:17", "grad_norm": 0.0074, "loss": 0.0525, "lr": "4.986e-05", "step": 1333, "steps": "23.21s,1333/33195" }, { "epoch": 0.20093387558367223, "eta": "205:19:35", "grad_norm": 0.0124, "loss": 0.0389, "lr": "4.986e-05", "step": 1334, "steps": "23.2s,1334/33195" }, { "epoch": 0.20108450067781292, "eta": "204:47:20", "grad_norm": 0.0092, "loss": 0.0493, "lr": "4.986e-05", "step": 1335, "steps": "23.14s,1335/33195" }, { "epoch": 0.2012351257719536, "eta": "201:46:25", "grad_norm": 0.0082, "loss": 0.034, "lr": "4.985e-05", "step": 1336, "steps": "22.8s,1336/33195" }, { "epoch": 0.20138575086609428, "eta": "207:20:32", "grad_norm": 0.014, "loss": 0.0391, "lr": "4.985e-05", "step": 1337, "steps": "23.43s,1337/33195" }, { "epoch": 0.20153637596023496, "eta": "205:07:25", "grad_norm": 0.009, "loss": 0.0503, "lr": "4.985e-05", "step": 1338, "steps": "23.18s,1338/33195" }, { "epoch": 0.20168700105437565, "eta": "205:17:39", "grad_norm": 0.0069, "loss": 0.0353, "lr": "4.985e-05", "step": 1339, "steps": "23.2s,1339/33195" }, { "epoch": 0.20183762614851633, "eta": "204:45:24", "grad_norm": 0.0074, "loss": 0.0503, "lr": "4.985e-05", "step": 1340, "steps": "23.14s,1340/33195" }, { "epoch": 0.201988251242657, "eta": "206:04:39", "grad_norm": 0.0072, "loss": 0.0463, "lr": "4.985e-05", "step": 1341, "steps": "23.29s,1341/33195" }, { "epoch": 0.20213887633679772, "eta": "206:46:44", "grad_norm": 0.0083, "loss": 0.0598, "lr": "4.985e-05", "step": 1342, "steps": "23.37s,1342/33195" }, { "epoch": 0.2022895014309384, "eta": "203:29:56", "grad_norm": 0.0076, "loss": 0.0676, "lr": "4.985e-05", "step": 1343, "steps": "23.0s,1343/33195" }, { "epoch": 0.2024401265250791, "eta": "204:43:52", "grad_norm": 0.0076, "loss": 0.0614, "lr": "4.985e-05", "step": 1344, "steps": "23.14s,1344/33195" }, { "epoch": 0.20259075161921977, "eta": "205:52:29", "grad_norm": 0.0081, "loss": 0.0386, "lr": "4.985e-05", "step": 1345, "steps": "23.27s,1345/33195" }, { "epoch": 0.20274137671336045, "eta": "204:43:05", "grad_norm": 0.0068, "loss": 0.0653, "lr": "4.985e-05", "step": 1346, "steps": "23.14s,1346/33195" }, { "epoch": 0.20289200180750114, "eta": "205:19:52", "grad_norm": 0.0089, "loss": 0.054, "lr": "4.985e-05", "step": 1347, "steps": "23.21s,1347/33195" }, { "epoch": 0.20304262690164182, "eta": "208:14:38", "grad_norm": 0.007, "loss": 0.0564, "lr": "4.985e-05", "step": 1348, "steps": "23.54s,1348/33195" }, { "epoch": 0.2031932519957825, "eta": "206:54:37", "grad_norm": 0.0091, "loss": 0.0447, "lr": "4.985e-05", "step": 1349, "steps": "23.39s,1349/33195" }, { "epoch": 0.2033438770899232, "eta": "205:34:37", "grad_norm": 0.0074, "loss": 0.0681, "lr": "4.985e-05", "step": 1350, "steps": "23.24s,1350/33195" }, { "epoch": 0.20349450218406387, "eta": "204:57:05", "grad_norm": 0.0067, "loss": 0.0589, "lr": "4.985e-05", "step": 1351, "steps": "23.17s,1351/33195" }, { "epoch": 0.20364512727820455, "eta": "208:07:45", "grad_norm": 0.0075, "loss": 0.0508, "lr": "4.985e-05", "step": 1352, "steps": "23.53s,1352/33195" }, { "epoch": 0.20379575237234523, "eta": "207:08:59", "grad_norm": 0.0082, "loss": 0.0508, "lr": "4.985e-05", "step": 1353, "steps": "23.42s,1353/33195" }, { "epoch": 0.20394637746648592, "eta": "205:01:14", "grad_norm": 0.0056, "loss": 0.0579, "lr": "4.985e-05", "step": 1354, "steps": "23.18s,1354/33195" }, { "epoch": 0.2040970025606266, "eta": "208:22:30", "grad_norm": 0.011, "loss": 0.0601, "lr": "4.985e-05", "step": 1355, "steps": "23.56s,1355/33195" }, { "epoch": 0.20424762765476728, "eta": "206:46:35", "grad_norm": 0.0067, "loss": 0.0605, "lr": "4.985e-05", "step": 1356, "steps": "23.38s,1356/33195" }, { "epoch": 0.20439825274890797, "eta": "205:10:41", "grad_norm": 0.0066, "loss": 0.0512, "lr": "4.985e-05", "step": 1357, "steps": "23.2s,1357/33195" }, { "epoch": 0.20454887784304865, "eta": "207:17:39", "grad_norm": 0.0077, "loss": 0.0574, "lr": "4.985e-05", "step": 1358, "steps": "23.44s,1358/33195" }, { "epoch": 0.20469950293718933, "eta": "206:13:35", "grad_norm": 0.0074, "loss": 0.0447, "lr": "4.985e-05", "step": 1359, "steps": "23.32s,1359/33195" }, { "epoch": 0.20485012803133001, "eta": "203:49:56", "grad_norm": 0.0078, "loss": 0.0438, "lr": "4.985e-05", "step": 1360, "steps": "23.05s,1360/33195" }, { "epoch": 0.2050007531254707, "eta": "205:25:03", "grad_norm": 0.0067, "loss": 0.0507, "lr": "4.985e-05", "step": 1361, "steps": "23.23s,1361/33195" }, { "epoch": 0.20515137821961138, "eta": "204:47:32", "grad_norm": 0.0082, "loss": 0.0727, "lr": "4.985e-05", "step": 1362, "steps": "23.16s,1362/33195" }, { "epoch": 0.20530200331375206, "eta": "203:43:29", "grad_norm": 0.0074, "loss": 0.0641, "lr": "4.985e-05", "step": 1363, "steps": "23.04s,1363/33195" }, { "epoch": 0.20545262840789275, "eta": "206:48:47", "grad_norm": 0.007, "loss": 0.0515, "lr": "4.985e-05", "step": 1364, "steps": "23.39s,1364/33195" }, { "epoch": 0.20560325350203343, "eta": "205:23:30", "grad_norm": 0.0073, "loss": 0.0449, "lr": "4.985e-05", "step": 1365, "steps": "23.23s,1365/33195" }, { "epoch": 0.2057538785961741, "eta": "205:28:25", "grad_norm": 0.0082, "loss": 0.0702, "lr": "4.985e-05", "step": 1366, "steps": "23.24s,1366/33195" }, { "epoch": 0.2059045036903148, "eta": "204:45:36", "grad_norm": 0.0082, "loss": 0.0509, "lr": "4.985e-05", "step": 1367, "steps": "23.16s,1367/33195" }, { "epoch": 0.20605512878445548, "eta": "208:49:13", "grad_norm": 0.0075, "loss": 0.055, "lr": "4.985e-05", "step": 1368, "steps": "23.62s,1368/33195" }, { "epoch": 0.2062057538785962, "eta": "207:45:11", "grad_norm": 0.0071, "loss": 0.0573, "lr": "4.985e-05", "step": 1369, "steps": "23.5s,1369/33195" }, { "epoch": 0.20635637897273687, "eta": "205:05:40", "grad_norm": 0.0074, "loss": 0.0399, "lr": "4.985e-05", "step": 1370, "steps": "23.2s,1370/33195" }, { "epoch": 0.20650700406687755, "eta": "208:05:36", "grad_norm": 0.0075, "loss": 0.0364, "lr": "4.985e-05", "step": 1371, "steps": "23.54s,1371/33195" }, { "epoch": 0.20665762916101824, "eta": "205:31:24", "grad_norm": 0.0082, "loss": 0.0562, "lr": "4.985e-05", "step": 1372, "steps": "23.25s,1372/33195" }, { "epoch": 0.20680825425515892, "eta": "203:39:38", "grad_norm": 0.0093, "loss": 0.0537, "lr": "4.985e-05", "step": 1373, "steps": "23.04s,1373/33195" }, { "epoch": 0.2069588793492996, "eta": "205:51:51", "grad_norm": 0.0095, "loss": 0.0548, "lr": "4.985e-05", "step": 1374, "steps": "23.29s,1374/33195" }, { "epoch": 0.20710950444344028, "eta": "207:42:50", "grad_norm": 0.0094, "loss": 0.0447, "lr": "4.984e-05", "step": 1375, "steps": "23.5s,1375/33195" }, { "epoch": 0.20726012953758097, "eta": "208:08:57", "grad_norm": 0.0084, "loss": 0.0512, "lr": "4.984e-05", "step": 1376, "steps": "23.55s,1376/33195" }, { "epoch": 0.20741075463172165, "eta": "205:24:10", "grad_norm": 0.0068, "loss": 0.0657, "lr": "4.984e-05", "step": 1377, "steps": "23.24s,1377/33195" }, { "epoch": 0.20756137972586233, "eta": "203:27:07", "grad_norm": 0.0079, "loss": 0.0495, "lr": "4.984e-05", "step": 1378, "steps": "23.02s,1378/33195" }, { "epoch": 0.20771200482000302, "eta": "207:04:08", "grad_norm": 0.0128, "loss": 0.0429, "lr": "4.984e-05", "step": 1379, "steps": "23.43s,1379/33195" }, { "epoch": 0.2078626299141437, "eta": "204:35:17", "grad_norm": 0.0072, "loss": 0.0517, "lr": "4.984e-05", "step": 1380, "steps": "23.15s,1380/33195" }, { "epoch": 0.20801325500828438, "eta": "204:24:17", "grad_norm": 0.0093, "loss": 0.068, "lr": "4.984e-05", "step": 1381, "steps": "23.13s,1381/33195" }, { "epoch": 0.20816388010242506, "eta": "208:33:06", "grad_norm": 0.0079, "loss": 0.0418, "lr": "4.984e-05", "step": 1382, "steps": "23.6s,1382/33195" }, { "epoch": 0.20831450519656575, "eta": "205:43:03", "grad_norm": 0.0079, "loss": 0.0754, "lr": "4.984e-05", "step": 1383, "steps": "23.28s,1383/33195" }, { "epoch": 0.20846513029070643, "eta": "208:21:43", "grad_norm": 0.0079, "loss": 0.0498, "lr": "4.984e-05", "step": 1384, "steps": "23.58s,1384/33195" }, { "epoch": 0.2086157553848471, "eta": "204:49:15", "grad_norm": 0.0076, "loss": 0.0495, "lr": "4.984e-05", "step": 1385, "steps": "23.18s,1385/33195" }, { "epoch": 0.2087663804789878, "eta": "207:54:25", "grad_norm": 0.0068, "loss": 0.047, "lr": "4.984e-05", "step": 1386, "steps": "23.53s,1386/33195" }, { "epoch": 0.20891700557312848, "eta": "204:43:11", "grad_norm": 0.0079, "loss": 0.0361, "lr": "4.984e-05", "step": 1387, "steps": "23.17s,1387/33195" }, { "epoch": 0.20906763066726916, "eta": "204:53:24", "grad_norm": 0.007, "loss": 0.066, "lr": "4.984e-05", "step": 1388, "steps": "23.19s,1388/33195" }, { "epoch": 0.20921825576140984, "eta": "204:58:19", "grad_norm": 0.0071, "loss": 0.0489, "lr": "4.984e-05", "step": 1389, "steps": "23.2s,1389/33195" }, { "epoch": 0.20936888085555053, "eta": "207:05:09", "grad_norm": 0.0084, "loss": 0.0448, "lr": "4.984e-05", "step": 1390, "steps": "23.44s,1390/33195" }, { "epoch": 0.2095195059496912, "eta": "204:46:56", "grad_norm": 0.0093, "loss": 0.0634, "lr": "4.984e-05", "step": 1391, "steps": "23.18s,1391/33195" }, { "epoch": 0.2096701310438319, "eta": "208:23:52", "grad_norm": 0.0072, "loss": 0.0507, "lr": "4.984e-05", "step": 1392, "steps": "23.59s,1392/33195" }, { "epoch": 0.20982075613797257, "eta": "205:12:40", "grad_norm": 0.0074, "loss": 0.0665, "lr": "4.984e-05", "step": 1393, "steps": "23.23s,1393/33195" }, { "epoch": 0.20997138123211326, "eta": "206:58:17", "grad_norm": 0.0079, "loss": 0.0492, "lr": "4.984e-05", "step": 1394, "steps": "23.43s,1394/33195" }, { "epoch": 0.21012200632625394, "eta": "208:43:54", "grad_norm": 0.0087, "loss": 0.0547, "lr": "4.984e-05", "step": 1395, "steps": "23.63s,1395/33195" }, { "epoch": 0.21027263142039465, "eta": "204:55:36", "grad_norm": 0.0078, "loss": 0.0541, "lr": "4.984e-05", "step": 1396, "steps": "23.2s,1396/33195" }, { "epoch": 0.21042325651453533, "eta": "205:00:31", "grad_norm": 0.0066, "loss": 0.041, "lr": "4.984e-05", "step": 1397, "steps": "23.21s,1397/33195" }, { "epoch": 0.21057388160867602, "eta": "205:05:26", "grad_norm": 0.0081, "loss": 0.0537, "lr": "4.984e-05", "step": 1398, "steps": "23.22s,1398/33195" }, { "epoch": 0.2107245067028167, "eta": "202:52:34", "grad_norm": 0.0098, "loss": 0.0323, "lr": "4.984e-05", "step": 1399, "steps": "22.97s,1399/33195" }, { "epoch": 0.21087513179695738, "eta": "205:20:33", "grad_norm": 0.0071, "loss": 0.0564, "lr": "4.984e-05", "step": 1400, "steps": "23.25s,1400/33195" }, { "epoch": 0.21102575689109807, "eta": "419:09:03", "grad_norm": 0.0071, "loss": 0.0559, "lr": "4.984e-05", "step": 1401, "steps": "47.46s,1401/33195" }, { "epoch": 0.21117638198523875, "eta": "204:05:36", "grad_norm": 0.0081, "loss": 0.051, "lr": "4.984e-05", "step": 1402, "steps": "23.11s,1402/33195" }, { "epoch": 0.21132700707937943, "eta": "204:31:42", "grad_norm": 0.0075, "loss": 0.0764, "lr": "4.984e-05", "step": 1403, "steps": "23.16s,1403/33195" }, { "epoch": 0.2114776321735201, "eta": "208:13:51", "grad_norm": 0.0079, "loss": 0.0534, "lr": "4.984e-05", "step": 1404, "steps": "23.58s,1404/33195" }, { "epoch": 0.2116282572676608, "eta": "205:02:43", "grad_norm": 0.0065, "loss": 0.0337, "lr": "4.984e-05", "step": 1405, "steps": "23.22s,1405/33195" }, { "epoch": 0.21177888236180148, "eta": "204:30:33", "grad_norm": 0.0078, "loss": 0.0637, "lr": "4.984e-05", "step": 1406, "steps": "23.16s,1406/33195" }, { "epoch": 0.21192950745594216, "eta": "205:07:15", "grad_norm": 0.0073, "loss": 0.0544, "lr": "4.984e-05", "step": 1407, "steps": "23.23s,1407/33195" }, { "epoch": 0.21208013255008284, "eta": "206:05:08", "grad_norm": 0.0092, "loss": 0.037, "lr": "4.984e-05", "step": 1408, "steps": "23.34s,1408/33195" }, { "epoch": 0.21223075764422353, "eta": "205:43:33", "grad_norm": 0.0064, "loss": 0.0582, "lr": "4.984e-05", "step": 1409, "steps": "23.3s,1409/33195" }, { "epoch": 0.2123813827383642, "eta": "204:55:29", "grad_norm": 0.0081, "loss": 0.0395, "lr": "4.984e-05", "step": 1410, "steps": "23.21s,1410/33195" }, { "epoch": 0.2125320078325049, "eta": "205:26:53", "grad_norm": 0.0066, "loss": 0.0538, "lr": "4.984e-05", "step": 1411, "steps": "23.27s,1411/33195" }, { "epoch": 0.21268263292664558, "eta": "204:49:25", "grad_norm": 0.0069, "loss": 0.0357, "lr": "4.983e-05", "step": 1412, "steps": "23.2s,1412/33195" }, { "epoch": 0.21283325802078626, "eta": "204:49:02", "grad_norm": 0.0079, "loss": 0.0706, "lr": "4.983e-05", "step": 1413, "steps": "23.2s,1413/33195" }, { "epoch": 0.21298388311492694, "eta": "204:38:03", "grad_norm": 0.0074, "loss": 0.0639, "lr": "4.983e-05", "step": 1414, "steps": "23.18s,1414/33195" }, { "epoch": 0.21313450820906762, "eta": "205:14:45", "grad_norm": 0.011, "loss": 0.0714, "lr": "4.983e-05", "step": 1415, "steps": "23.25s,1415/33195" }, { "epoch": 0.2132851333032083, "eta": "206:07:19", "grad_norm": 0.0088, "loss": 0.0524, "lr": "4.983e-05", "step": 1416, "steps": "23.35s,1416/33195" }, { "epoch": 0.213435758397349, "eta": "204:58:05", "grad_norm": 0.0065, "loss": 0.046, "lr": "4.983e-05", "step": 1417, "steps": "23.22s,1417/33195" }, { "epoch": 0.21358638349148967, "eta": "204:47:06", "grad_norm": 0.0061, "loss": 0.0707, "lr": "4.983e-05", "step": 1418, "steps": "23.2s,1418/33195" }, { "epoch": 0.21373700858563036, "eta": "205:23:47", "grad_norm": 0.0067, "loss": 0.0583, "lr": "4.983e-05", "step": 1419, "steps": "23.27s,1419/33195" }, { "epoch": 0.21388763367977104, "eta": "202:55:07", "grad_norm": 0.0098, "loss": 0.0417, "lr": "4.983e-05", "step": 1420, "steps": "22.99s,1420/33195" }, { "epoch": 0.21403825877391172, "eta": "206:47:44", "grad_norm": 0.0077, "loss": 0.0491, "lr": "4.983e-05", "step": 1421, "steps": "23.43s,1421/33195" }, { "epoch": 0.2141888838680524, "eta": "204:34:58", "grad_norm": 0.0076, "loss": 0.0434, "lr": "4.983e-05", "step": 1422, "steps": "23.18s,1422/33195" }, { "epoch": 0.21433950896219311, "eta": "202:38:05", "grad_norm": 0.0074, "loss": 0.0506, "lr": "4.983e-05", "step": 1423, "steps": "22.96s,1423/33195" }, { "epoch": 0.2144901340563338, "eta": "204:02:25", "grad_norm": 0.0094, "loss": 0.0469, "lr": "4.983e-05", "step": 1424, "steps": "23.12s,1424/33195" }, { "epoch": 0.21464075915047448, "eta": "204:49:41", "grad_norm": 0.009, "loss": 0.039, "lr": "4.983e-05", "step": 1425, "steps": "23.21s,1425/33195" }, { "epoch": 0.21479138424461516, "eta": "205:36:57", "grad_norm": 0.0075, "loss": 0.0628, "lr": "4.983e-05", "step": 1426, "steps": "23.3s,1426/33195" }, { "epoch": 0.21494200933875585, "eta": "206:24:13", "grad_norm": 0.0105, "loss": 0.0624, "lr": "4.983e-05", "step": 1427, "steps": "23.39s,1427/33195" }, { "epoch": 0.21509263443289653, "eta": "204:48:32", "grad_norm": 0.014, "loss": 0.0481, "lr": "4.983e-05", "step": 1428, "steps": "23.21s,1428/33195" }, { "epoch": 0.2152432595270372, "eta": "204:21:40", "grad_norm": 0.0078, "loss": 0.0686, "lr": "4.983e-05", "step": 1429, "steps": "23.16s,1429/33195" }, { "epoch": 0.2153938846211779, "eta": "205:40:42", "grad_norm": 0.0063, "loss": 0.0501, "lr": "4.983e-05", "step": 1430, "steps": "23.31s,1430/33195" }, { "epoch": 0.21554450971531858, "eta": "210:47:21", "grad_norm": 0.0078, "loss": 0.0487, "lr": "4.983e-05", "step": 1431, "steps": "23.89s,1431/33195" }, { "epoch": 0.21569513480945926, "eta": "204:04:38", "grad_norm": 0.0129, "loss": 0.0562, "lr": "4.983e-05", "step": 1432, "steps": "23.13s,1432/33195" }, { "epoch": 0.21584575990359994, "eta": "202:28:57", "grad_norm": 0.006, "loss": 0.0562, "lr": "4.983e-05", "step": 1433, "steps": "22.95s,1433/33195" }, { "epoch": 0.21599638499774063, "eta": "207:03:50", "grad_norm": 0.0087, "loss": 0.0704, "lr": "4.983e-05", "step": 1434, "steps": "23.47s,1434/33195" }, { "epoch": 0.2161470100918813, "eta": "205:07:00", "grad_norm": 0.0058, "loss": 0.0443, "lr": "4.983e-05", "step": 1435, "steps": "23.25s,1435/33195" }, { "epoch": 0.216297635186022, "eta": "205:59:32", "grad_norm": 0.0072, "loss": 0.0499, "lr": "4.983e-05", "step": 1436, "steps": "23.35s,1436/33195" }, { "epoch": 0.21644826028016267, "eta": "202:38:01", "grad_norm": 0.0072, "loss": 0.0695, "lr": "4.983e-05", "step": 1437, "steps": "22.97s,1437/33195" }, { "epoch": 0.21659888537430336, "eta": "204:49:57", "grad_norm": 0.0097, "loss": 0.0707, "lr": "4.983e-05", "step": 1438, "steps": "23.22s,1438/33195" }, { "epoch": 0.21674951046844404, "eta": "204:12:31", "grad_norm": 0.0101, "loss": 0.0394, "lr": "4.983e-05", "step": 1439, "steps": "23.15s,1439/33195" }, { "epoch": 0.21690013556258472, "eta": "204:22:43", "grad_norm": 0.0088, "loss": 0.0611, "lr": "4.983e-05", "step": 1440, "steps": "23.17s,1440/33195" }, { "epoch": 0.2170507606567254, "eta": "205:57:35", "grad_norm": 0.0081, "loss": 0.048, "lr": "4.983e-05", "step": 1441, "steps": "23.35s,1441/33195" }, { "epoch": 0.2172013857508661, "eta": "204:58:59", "grad_norm": 0.0065, "loss": 0.0511, "lr": "4.983e-05", "step": 1442, "steps": "23.24s,1442/33195" }, { "epoch": 0.21735201084500677, "eta": "204:53:18", "grad_norm": 0.0064, "loss": 0.054, "lr": "4.983e-05", "step": 1443, "steps": "23.23s,1443/33195" }, { "epoch": 0.21750263593914745, "eta": "205:08:48", "grad_norm": 0.008, "loss": 0.0571, "lr": "4.983e-05", "step": 1444, "steps": "23.26s,1444/33195" }, { "epoch": 0.21765326103328814, "eta": "207:57:45", "grad_norm": 0.0073, "loss": 0.0741, "lr": "4.983e-05", "step": 1445, "steps": "23.58s,1445/33195" }, { "epoch": 0.21780388612742882, "eta": "207:57:21", "grad_norm": 0.0083, "loss": 0.0416, "lr": "4.983e-05", "step": 1446, "steps": "23.58s,1446/33195" }, { "epoch": 0.2179545112215695, "eta": "207:09:20", "grad_norm": 0.0112, "loss": 0.0628, "lr": "4.983e-05", "step": 1447, "steps": "23.49s,1447/33195" }, { "epoch": 0.21810513631571019, "eta": "206:37:12", "grad_norm": 0.0071, "loss": 0.0555, "lr": "4.982e-05", "step": 1448, "steps": "23.43s,1448/33195" }, { "epoch": 0.21825576140985087, "eta": "204:13:57", "grad_norm": 0.0098, "loss": 0.0443, "lr": "4.982e-05", "step": 1449, "steps": "23.16s,1449/33195" }, { "epoch": 0.21840638650399158, "eta": "204:18:51", "grad_norm": 0.0068, "loss": 0.0546, "lr": "4.982e-05", "step": 1450, "steps": "23.17s,1450/33195" }, { "epoch": 0.21855701159813226, "eta": "206:20:09", "grad_norm": 0.0081, "loss": 0.0413, "lr": "4.982e-05", "step": 1451, "steps": "23.4s,1451/33195" }, { "epoch": 0.21870763669227294, "eta": "205:05:42", "grad_norm": 0.0085, "loss": 0.0619, "lr": "4.982e-05", "step": 1452, "steps": "23.26s,1452/33195" }, { "epoch": 0.21885826178641363, "eta": "205:21:11", "grad_norm": 0.0066, "loss": 0.0633, "lr": "4.982e-05", "step": 1453, "steps": "23.29s,1453/33195" }, { "epoch": 0.2190088868805543, "eta": "205:57:49", "grad_norm": 0.0068, "loss": 0.0536, "lr": "4.982e-05", "step": 1454, "steps": "23.36s,1454/33195" }, { "epoch": 0.219159511974695, "eta": "203:39:54", "grad_norm": 0.0077, "loss": 0.0548, "lr": "4.982e-05", "step": 1455, "steps": "23.1s,1455/33195" }, { "epoch": 0.21931013706883568, "eta": "206:18:12", "grad_norm": 0.0097, "loss": 0.0549, "lr": "4.982e-05", "step": 1456, "steps": "23.4s,1456/33195" }, { "epoch": 0.21946076216297636, "eta": "204:32:01", "grad_norm": 0.0071, "loss": 0.0446, "lr": "4.982e-05", "step": 1457, "steps": "23.2s,1457/33195" }, { "epoch": 0.21961138725711704, "eta": "203:44:02", "grad_norm": 0.007, "loss": 0.0475, "lr": "4.982e-05", "step": 1458, "steps": "23.11s,1458/33195" }, { "epoch": 0.21976201235125772, "eta": "205:08:16", "grad_norm": 0.007, "loss": 0.0515, "lr": "4.982e-05", "step": 1459, "steps": "23.27s,1459/33195" }, { "epoch": 0.2199126374453984, "eta": "207:09:32", "grad_norm": 0.0072, "loss": 0.0427, "lr": "4.982e-05", "step": 1460, "steps": "23.5s,1460/33195" }, { "epoch": 0.2200632625395391, "eta": "204:04:02", "grad_norm": 0.0081, "loss": 0.0676, "lr": "4.982e-05", "step": 1461, "steps": "23.15s,1461/33195" }, { "epoch": 0.22021388763367977, "eta": "206:42:18", "grad_norm": 0.007, "loss": 0.0511, "lr": "4.982e-05", "step": 1462, "steps": "23.45s,1462/33195" }, { "epoch": 0.22036451272782046, "eta": "202:12:12", "grad_norm": 0.0062, "loss": 0.0485, "lr": "4.982e-05", "step": 1463, "steps": "22.94s,1463/33195" }, { "epoch": 0.22051513782196114, "eta": "203:52:18", "grad_norm": 0.0061, "loss": 0.06, "lr": "4.982e-05", "step": 1464, "steps": "23.13s,1464/33195" }, { "epoch": 0.22066576291610182, "eta": "207:07:35", "grad_norm": 0.0064, "loss": 0.0395, "lr": "4.982e-05", "step": 1465, "steps": "23.5s,1465/33195" }, { "epoch": 0.2208163880102425, "eta": "202:32:12", "grad_norm": 0.009, "loss": 0.0431, "lr": "4.982e-05", "step": 1466, "steps": "22.98s,1466/33195" }, { "epoch": 0.2209670131043832, "eta": "204:12:17", "grad_norm": 0.0062, "loss": 0.0394, "lr": "4.982e-05", "step": 1467, "steps": "23.17s,1467/33195" }, { "epoch": 0.22111763819852387, "eta": "207:32:50", "grad_norm": 0.0069, "loss": 0.0436, "lr": "4.982e-05", "step": 1468, "steps": "23.55s,1468/33195" }, { "epoch": 0.22126826329266455, "eta": "202:25:46", "grad_norm": 0.0072, "loss": 0.0322, "lr": "4.982e-05", "step": 1469, "steps": "22.97s,1469/33195" }, { "epoch": 0.22141888838680523, "eta": "204:42:51", "grad_norm": 0.0072, "loss": 0.0711, "lr": "4.982e-05", "step": 1470, "steps": "23.23s,1470/33195" }, { "epoch": 0.22156951348094592, "eta": "201:21:33", "grad_norm": 0.007, "loss": 0.0845, "lr": "4.982e-05", "step": 1471, "steps": "22.85s,1471/33195" }, { "epoch": 0.2217201385750866, "eta": "203:43:55", "grad_norm": 0.0074, "loss": 0.0446, "lr": "4.982e-05", "step": 1472, "steps": "23.12s,1472/33195" }, { "epoch": 0.22187076366922728, "eta": "204:52:16", "grad_norm": 0.0148, "loss": 0.0396, "lr": "4.982e-05", "step": 1473, "steps": "23.25s,1473/33195" }, { "epoch": 0.22202138876336797, "eta": "207:14:37", "grad_norm": 0.0095, "loss": 0.0631, "lr": "4.982e-05", "step": 1474, "steps": "23.52s,1474/33195" }, { "epoch": 0.22217201385750865, "eta": "204:25:04", "grad_norm": 0.0109, "loss": 0.0464, "lr": "4.982e-05", "step": 1475, "steps": "23.2s,1475/33195" }, { "epoch": 0.22232263895164933, "eta": "203:52:57", "grad_norm": 0.0092, "loss": 0.0509, "lr": "4.982e-05", "step": 1476, "steps": "23.14s,1476/33195" }, { "epoch": 0.22247326404579004, "eta": "206:57:35", "grad_norm": 0.0068, "loss": 0.0627, "lr": "4.982e-05", "step": 1477, "steps": "23.49s,1477/33195" }, { "epoch": 0.22262388913993073, "eta": "202:17:02", "grad_norm": 0.0085, "loss": 0.0473, "lr": "4.982e-05", "step": 1478, "steps": "22.96s,1478/33195" }, { "epoch": 0.2227745142340714, "eta": "204:02:22", "grad_norm": 0.0121, "loss": 0.0547, "lr": "4.982e-05", "step": 1479, "steps": "23.16s,1479/33195" }, { "epoch": 0.2229251393282121, "eta": "202:26:50", "grad_norm": 0.0067, "loss": 0.0587, "lr": "4.982e-05", "step": 1480, "steps": "22.98s,1480/33195" }, { "epoch": 0.22307576442235277, "eta": "201:38:53", "grad_norm": 0.0081, "loss": 0.0407, "lr": "4.982e-05", "step": 1481, "steps": "22.89s,1481/33195" }, { "epoch": 0.22322638951649346, "eta": "203:34:47", "grad_norm": 0.0072, "loss": 0.0272, "lr": "4.982e-05", "step": 1482, "steps": "23.11s,1482/33195" }, { "epoch": 0.22337701461063414, "eta": "207:37:31", "grad_norm": 0.0098, "loss": 0.0474, "lr": "4.981e-05", "step": 1483, "steps": "23.57s,1483/33195" }, { "epoch": 0.22352763970477482, "eta": "204:53:17", "grad_norm": 0.0093, "loss": 0.0472, "lr": "4.981e-05", "step": 1484, "steps": "23.26s,1484/33195" }, { "epoch": 0.2236782647989155, "eta": "203:28:21", "grad_norm": 0.0099, "loss": 0.0518, "lr": "4.981e-05", "step": 1485, "steps": "23.1s,1485/33195" }, { "epoch": 0.2238288898930562, "eta": "203:17:23", "grad_norm": 0.0082, "loss": 0.037, "lr": "4.981e-05", "step": 1486, "steps": "23.08s,1486/33195" }, { "epoch": 0.22397951498719687, "eta": "207:04:15", "grad_norm": 0.0064, "loss": 0.045, "lr": "4.981e-05", "step": 1487, "steps": "23.51s,1487/33195" }, { "epoch": 0.22413014008133755, "eta": "203:53:37", "grad_norm": 0.0084, "loss": 0.0319, "lr": "4.981e-05", "step": 1488, "steps": "23.15s,1488/33195" }, { "epoch": 0.22428076517547824, "eta": "202:33:58", "grad_norm": 0.0079, "loss": 0.0561, "lr": "4.981e-05", "step": 1489, "steps": "23.0s,1489/33195" }, { "epoch": 0.22443139026961892, "eta": "204:56:15", "grad_norm": 0.0106, "loss": 0.0585, "lr": "4.981e-05", "step": 1490, "steps": "23.27s,1490/33195" }, { "epoch": 0.2245820153637596, "eta": "204:50:35", "grad_norm": 0.0062, "loss": 0.0633, "lr": "4.981e-05", "step": 1491, "steps": "23.26s,1491/33195" }, { "epoch": 0.22473264045790028, "eta": "203:41:30", "grad_norm": 0.0091, "loss": 0.0518, "lr": "4.981e-05", "step": 1492, "steps": "23.13s,1492/33195" }, { "epoch": 0.22488326555204097, "eta": "204:18:06", "grad_norm": 0.0081, "loss": 0.0463, "lr": "4.981e-05", "step": 1493, "steps": "23.2s,1493/33195" }, { "epoch": 0.22503389064618165, "eta": "203:35:27", "grad_norm": 0.0085, "loss": 0.0713, "lr": "4.981e-05", "step": 1494, "steps": "23.12s,1494/33195" }, { "epoch": 0.22518451574032233, "eta": "204:59:36", "grad_norm": 0.0084, "loss": 0.0598, "lr": "4.981e-05", "step": 1495, "steps": "23.28s,1495/33195" }, { "epoch": 0.22533514083446302, "eta": "207:00:43", "grad_norm": 0.0072, "loss": 0.0499, "lr": "4.981e-05", "step": 1496, "steps": "23.51s,1496/33195" }, { "epoch": 0.2254857659286037, "eta": "204:32:24", "grad_norm": 0.0079, "loss": 0.0522, "lr": "4.981e-05", "step": 1497, "steps": "23.23s,1497/33195" }, { "epoch": 0.22563639102274438, "eta": "206:59:56", "grad_norm": 0.0105, "loss": 0.0719, "lr": "4.981e-05", "step": 1498, "steps": "23.51s,1498/33195" }, { "epoch": 0.22578701611688506, "eta": "206:22:34", "grad_norm": 0.0077, "loss": 0.0529, "lr": "4.981e-05", "step": 1499, "steps": "23.44s,1499/33195" }, { "epoch": 0.22593764121102575, "eta": "203:43:42", "grad_norm": 0.0121, "loss": 0.0359, "lr": "4.981e-05", "step": 1500, "steps": "23.14s,1500/33195" }, { "epoch": 0.22608826630516643, "eta": "206:27:04", "grad_norm": 0.0087, "loss": 0.0656, "lr": "4.981e-05", "step": 1501, "steps": "23.45s,1501/33195" }, { "epoch": 0.2262388913993071, "eta": "205:23:17", "grad_norm": 0.0084, "loss": 0.0697, "lr": "4.981e-05", "step": 1502, "steps": "23.33s,1502/33195" }, { "epoch": 0.2263895164934478, "eta": "205:22:54", "grad_norm": 0.0081, "loss": 0.0608, "lr": "4.981e-05", "step": 1503, "steps": "23.33s,1503/33195" }, { "epoch": 0.2265401415875885, "eta": "204:50:49", "grad_norm": 0.0073, "loss": 0.0532, "lr": "4.981e-05", "step": 1504, "steps": "23.27s,1504/33195" }, { "epoch": 0.2266907666817292, "eta": "207:28:53", "grad_norm": 0.0142, "loss": 0.0644, "lr": "4.981e-05", "step": 1505, "steps": "23.57s,1505/33195" }, { "epoch": 0.22684139177586987, "eta": "204:39:29", "grad_norm": 0.0082, "loss": 0.0799, "lr": "4.981e-05", "step": 1506, "steps": "23.25s,1506/33195" }, { "epoch": 0.22699201687001055, "eta": "201:34:15", "grad_norm": 0.0091, "loss": 0.0487, "lr": "4.981e-05", "step": 1507, "steps": "22.9s,1507/33195" }, { "epoch": 0.22714264196415124, "eta": "203:51:10", "grad_norm": 0.0067, "loss": 0.0535, "lr": "4.981e-05", "step": 1508, "steps": "23.16s,1508/33195" }, { "epoch": 0.22729326705829192, "eta": "202:31:34", "grad_norm": 0.0068, "loss": 0.0555, "lr": "4.981e-05", "step": 1509, "steps": "23.01s,1509/33195" }, { "epoch": 0.2274438921524326, "eta": "204:53:46", "grad_norm": 0.0078, "loss": 0.0276, "lr": "4.981e-05", "step": 1510, "steps": "23.28s,1510/33195" }, { "epoch": 0.22759451724657329, "eta": "205:03:57", "grad_norm": 0.016, "loss": 0.047, "lr": "4.981e-05", "step": 1511, "steps": "23.3s,1511/33195" }, { "epoch": 0.22774514234071397, "eta": "206:54:27", "grad_norm": 0.0078, "loss": 0.0365, "lr": "4.981e-05", "step": 1512, "steps": "23.51s,1512/33195" }, { "epoch": 0.22789576743485465, "eta": "203:59:48", "grad_norm": 0.0083, "loss": 0.0592, "lr": "4.981e-05", "step": 1513, "steps": "23.18s,1513/33195" }, { "epoch": 0.22804639252899533, "eta": "203:27:44", "grad_norm": 0.0084, "loss": 0.0456, "lr": "4.981e-05", "step": 1514, "steps": "23.12s,1514/33195" }, { "epoch": 0.22819701762313602, "eta": "207:19:40", "grad_norm": 0.0094, "loss": 0.0678, "lr": "4.981e-05", "step": 1515, "steps": "23.56s,1515/33195" }, { "epoch": 0.2283476427172767, "eta": "203:16:24", "grad_norm": 0.0081, "loss": 0.047, "lr": "4.981e-05", "step": 1516, "steps": "23.1s,1516/33195" }, { "epoch": 0.22849826781141738, "eta": "203:52:59", "grad_norm": 0.0069, "loss": 0.0706, "lr": "4.980e-05", "step": 1517, "steps": "23.17s,1517/33195" }, { "epoch": 0.22864889290555807, "eta": "205:48:45", "grad_norm": 0.0077, "loss": 0.0674, "lr": "4.980e-05", "step": 1518, "steps": "23.39s,1518/33195" }, { "epoch": 0.22879951799969875, "eta": "203:57:29", "grad_norm": 0.0066, "loss": 0.0609, "lr": "4.980e-05", "step": 1519, "steps": "23.18s,1519/33195" }, { "epoch": 0.22895014309383943, "eta": "203:14:52", "grad_norm": 0.0086, "loss": 0.0759, "lr": "4.980e-05", "step": 1520, "steps": "23.1s,1520/33195" }, { "epoch": 0.22910076818798011, "eta": "202:11:08", "grad_norm": 0.0071, "loss": 0.0783, "lr": "4.980e-05", "step": 1521, "steps": "22.98s,1521/33195" }, { "epoch": 0.2292513932821208, "eta": "203:51:03", "grad_norm": 0.0069, "loss": 0.0573, "lr": "4.980e-05", "step": 1522, "steps": "23.17s,1522/33195" }, { "epoch": 0.22940201837626148, "eta": "207:21:49", "grad_norm": 0.01, "loss": 0.0604, "lr": "4.980e-05", "step": 1523, "steps": "23.57s,1523/33195" }, { "epoch": 0.22955264347040216, "eta": "203:34:26", "grad_norm": 0.0091, "loss": 0.0427, "lr": "4.980e-05", "step": 1524, "steps": "23.14s,1524/33195" }, { "epoch": 0.22970326856454285, "eta": "204:11:00", "grad_norm": 0.0069, "loss": 0.0468, "lr": "4.980e-05", "step": 1525, "steps": "23.21s,1525/33195" }, { "epoch": 0.22985389365868353, "eta": "201:53:23", "grad_norm": 0.0084, "loss": 0.052, "lr": "4.980e-05", "step": 1526, "steps": "22.95s,1526/33195" }, { "epoch": 0.2300045187528242, "eta": "205:18:51", "grad_norm": 0.0082, "loss": 0.071, "lr": "4.980e-05", "step": 1527, "steps": "23.34s,1527/33195" }, { "epoch": 0.2301551438469649, "eta": "207:30:24", "grad_norm": 0.0099, "loss": 0.0553, "lr": "4.980e-05", "step": 1528, "steps": "23.59s,1528/33195" }, { "epoch": 0.23030576894110558, "eta": "201:57:31", "grad_norm": 0.0092, "loss": 0.0619, "lr": "4.980e-05", "step": 1529, "steps": "22.96s,1529/33195" }, { "epoch": 0.23045639403524626, "eta": "205:17:41", "grad_norm": 0.0071, "loss": 0.0547, "lr": "4.980e-05", "step": 1530, "steps": "23.34s,1530/33195" }, { "epoch": 0.23060701912938694, "eta": "201:46:12", "grad_norm": 0.0072, "loss": 0.0552, "lr": "4.980e-05", "step": 1531, "steps": "22.94s,1531/33195" }, { "epoch": 0.23075764422352765, "eta": "203:31:21", "grad_norm": 0.0081, "loss": 0.043, "lr": "4.980e-05", "step": 1532, "steps": "23.14s,1532/33195" }, { "epoch": 0.23090826931766834, "eta": "204:50:07", "grad_norm": 0.008, "loss": 0.068, "lr": "4.980e-05", "step": 1533, "steps": "23.29s,1533/33195" }, { "epoch": 0.23105889441180902, "eta": "205:10:51", "grad_norm": 0.0069, "loss": 0.0637, "lr": "4.980e-05", "step": 1534, "steps": "23.33s,1534/33195" }, { "epoch": 0.2312095195059497, "eta": "203:46:02", "grad_norm": 0.0075, "loss": 0.0392, "lr": "4.980e-05", "step": 1535, "steps": "23.17s,1535/33195" }, { "epoch": 0.23136014460009038, "eta": "201:39:00", "grad_norm": 0.0071, "loss": 0.0624, "lr": "4.980e-05", "step": 1536, "steps": "22.93s,1536/33195" }, { "epoch": 0.23151076969423107, "eta": "203:08:19", "grad_norm": 0.0068, "loss": 0.0548, "lr": "4.980e-05", "step": 1537, "steps": "23.1s,1537/33195" }, { "epoch": 0.23166139478837175, "eta": "206:38:59", "grad_norm": 0.0077, "loss": 0.0656, "lr": "4.980e-05", "step": 1538, "steps": "23.5s,1538/33195" }, { "epoch": 0.23181201988251243, "eta": "206:01:40", "grad_norm": 0.0124, "loss": 0.039, "lr": "4.980e-05", "step": 1539, "steps": "23.43s,1539/33195" }, { "epoch": 0.23196264497665312, "eta": "204:57:58", "grad_norm": 0.0079, "loss": 0.0648, "lr": "4.980e-05", "step": 1540, "steps": "23.31s,1540/33195" }, { "epoch": 0.2321132700707938, "eta": "203:12:03", "grad_norm": 0.0062, "loss": 0.0491, "lr": "4.980e-05", "step": 1541, "steps": "23.11s,1541/33195" }, { "epoch": 0.23226389516493448, "eta": "201:20:53", "grad_norm": 0.0073, "loss": 0.044, "lr": "4.980e-05", "step": 1542, "steps": "22.9s,1542/33195" }, { "epoch": 0.23241452025907516, "eta": "205:49:33", "grad_norm": 0.0088, "loss": 0.0414, "lr": "4.980e-05", "step": 1543, "steps": "23.41s,1543/33195" }, { "epoch": 0.23256514535321585, "eta": "206:04:59", "grad_norm": 0.0057, "loss": 0.0542, "lr": "4.980e-05", "step": 1544, "steps": "23.44s,1544/33195" }, { "epoch": 0.23271577044735653, "eta": "203:21:04", "grad_norm": 0.0065, "loss": 0.0415, "lr": "4.980e-05", "step": 1545, "steps": "23.13s,1545/33195" }, { "epoch": 0.2328663955414972, "eta": "206:20:02", "grad_norm": 0.0082, "loss": 0.0682, "lr": "4.980e-05", "step": 1546, "steps": "23.47s,1546/33195" }, { "epoch": 0.2330170206356379, "eta": "204:39:25", "grad_norm": 0.0085, "loss": 0.066, "lr": "4.980e-05", "step": 1547, "steps": "23.28s,1547/33195" }, { "epoch": 0.23316764572977858, "eta": "201:44:58", "grad_norm": 0.0066, "loss": 0.0452, "lr": "4.980e-05", "step": 1548, "steps": "22.95s,1548/33195" }, { "epoch": 0.23331827082391926, "eta": "209:07:38", "grad_norm": 0.0067, "loss": 0.054, "lr": "4.980e-05", "step": 1549, "steps": "23.79s,1549/33195" }, { "epoch": 0.23346889591805994, "eta": "203:08:35", "grad_norm": 0.0081, "loss": 0.0764, "lr": "4.980e-05", "step": 1550, "steps": "23.11s,1550/33195" }, { "epoch": 0.23361952101220063, "eta": "203:39:51", "grad_norm": 0.0086, "loss": 0.0528, "lr": "4.979e-05", "step": 1551, "steps": "23.17s,1551/33195" }, { "epoch": 0.2337701461063413, "eta": "204:00:34", "grad_norm": 0.0093, "loss": 0.0537, "lr": "4.979e-05", "step": 1552, "steps": "23.21s,1552/33195" }, { "epoch": 0.233920771200482, "eta": "206:01:28", "grad_norm": 0.0085, "loss": 0.0415, "lr": "4.979e-05", "step": 1553, "steps": "23.44s,1553/33195" }, { "epoch": 0.23407139629462267, "eta": "206:06:21", "grad_norm": 0.006, "loss": 0.0553, "lr": "4.979e-05", "step": 1554, "steps": "23.45s,1554/33195" }, { "epoch": 0.23422202138876336, "eta": "203:43:35", "grad_norm": 0.0057, "loss": 0.0764, "lr": "4.979e-05", "step": 1555, "steps": "23.18s,1555/33195" }, { "epoch": 0.23437264648290404, "eta": "207:08:51", "grad_norm": 0.0067, "loss": 0.0536, "lr": "4.979e-05", "step": 1556, "steps": "23.57s,1556/33195" }, { "epoch": 0.23452327157704472, "eta": "204:51:21", "grad_norm": 0.0085, "loss": 0.0535, "lr": "4.979e-05", "step": 1557, "steps": "23.31s,1557/33195" }, { "epoch": 0.2346738966711854, "eta": "206:57:31", "grad_norm": 0.0082, "loss": 0.0275, "lr": "4.979e-05", "step": 1558, "steps": "23.55s,1558/33195" }, { "epoch": 0.23482452176532612, "eta": "204:03:07", "grad_norm": 0.0084, "loss": 0.0525, "lr": "4.979e-05", "step": 1559, "steps": "23.22s,1559/33195" }, { "epoch": 0.2349751468594668, "eta": "202:59:28", "grad_norm": 0.011, "loss": 0.062, "lr": "4.979e-05", "step": 1560, "steps": "23.1s,1560/33195" }, { "epoch": 0.23512577195360748, "eta": "203:25:27", "grad_norm": 0.0076, "loss": 0.0506, "lr": "4.979e-05", "step": 1561, "steps": "23.15s,1561/33195" }, { "epoch": 0.23527639704774816, "eta": "203:30:20", "grad_norm": 0.0085, "loss": 0.0627, "lr": "4.979e-05", "step": 1562, "steps": "23.16s,1562/33195" }, { "epoch": 0.23542702214188885, "eta": "202:58:19", "grad_norm": 0.0068, "loss": 0.0477, "lr": "4.979e-05", "step": 1563, "steps": "23.1s,1563/33195" }, { "epoch": 0.23557764723602953, "eta": "203:34:50", "grad_norm": 0.006, "loss": 0.0495, "lr": "4.979e-05", "step": 1564, "steps": "23.17s,1564/33195" }, { "epoch": 0.2357282723301702, "eta": "202:47:00", "grad_norm": 0.0075, "loss": 0.0575, "lr": "4.979e-05", "step": 1565, "steps": "23.08s,1565/33195" }, { "epoch": 0.2358788974243109, "eta": "202:36:04", "grad_norm": 0.0071, "loss": 0.0405, "lr": "4.979e-05", "step": 1566, "steps": "23.06s,1566/33195" }, { "epoch": 0.23602952251845158, "eta": "204:52:44", "grad_norm": 0.0083, "loss": 0.0494, "lr": "4.979e-05", "step": 1567, "steps": "23.32s,1567/33195" }, { "epoch": 0.23618014761259226, "eta": "203:12:12", "grad_norm": 0.0082, "loss": 0.0696, "lr": "4.979e-05", "step": 1568, "steps": "23.13s,1568/33195" }, { "epoch": 0.23633077270673294, "eta": "203:22:21", "grad_norm": 0.006, "loss": 0.034, "lr": "4.979e-05", "step": 1569, "steps": "23.15s,1569/33195" }, { "epoch": 0.23648139780087363, "eta": "203:37:47", "grad_norm": 0.0067, "loss": 0.0408, "lr": "4.979e-05", "step": 1570, "steps": "23.18s,1570/33195" }, { "epoch": 0.2366320228950143, "eta": "203:58:29", "grad_norm": 0.0068, "loss": 0.0469, "lr": "4.979e-05", "step": 1571, "steps": "23.22s,1571/33195" }, { "epoch": 0.236782647989155, "eta": "202:39:02", "grad_norm": 0.0096, "loss": 0.0427, "lr": "4.979e-05", "step": 1572, "steps": "23.07s,1572/33195" }, { "epoch": 0.23693327308329568, "eta": "203:52:26", "grad_norm": 0.007, "loss": 0.0506, "lr": "4.979e-05", "step": 1573, "steps": "23.21s,1573/33195" }, { "epoch": 0.23708389817743636, "eta": "202:54:05", "grad_norm": 0.0081, "loss": 0.0666, "lr": "4.979e-05", "step": 1574, "steps": "23.1s,1574/33195" }, { "epoch": 0.23723452327157704, "eta": "203:46:24", "grad_norm": 0.0054, "loss": 0.0358, "lr": "4.979e-05", "step": 1575, "steps": "23.2s,1575/33195" }, { "epoch": 0.23738514836571772, "eta": "205:57:45", "grad_norm": 0.006, "loss": 0.0438, "lr": "4.979e-05", "step": 1576, "steps": "23.45s,1576/33195" }, { "epoch": 0.2375357734598584, "eta": "203:03:28", "grad_norm": 0.0067, "loss": 0.0444, "lr": "4.979e-05", "step": 1577, "steps": "23.12s,1577/33195" }, { "epoch": 0.2376863985539991, "eta": "202:52:32", "grad_norm": 0.0094, "loss": 0.0509, "lr": "4.979e-05", "step": 1578, "steps": "23.1s,1578/33195" }, { "epoch": 0.23783702364813977, "eta": "203:29:02", "grad_norm": 0.0125, "loss": 0.0416, "lr": "4.979e-05", "step": 1579, "steps": "23.17s,1579/33195" }, { "epoch": 0.23798764874228046, "eta": "203:33:55", "grad_norm": 0.0074, "loss": 0.0602, "lr": "4.979e-05", "step": 1580, "steps": "23.18s,1580/33195" }, { "epoch": 0.23813827383642114, "eta": "202:51:23", "grad_norm": 0.0063, "loss": 0.0308, "lr": "4.979e-05", "step": 1581, "steps": "23.1s,1581/33195" }, { "epoch": 0.23828889893056182, "eta": "202:40:28", "grad_norm": 0.0079, "loss": 0.0508, "lr": "4.979e-05", "step": 1582, "steps": "23.08s,1582/33195" }, { "epoch": 0.2384395240247025, "eta": "202:29:32", "grad_norm": 0.0085, "loss": 0.054, "lr": "4.978e-05", "step": 1583, "steps": "23.06s,1583/33195" }, { "epoch": 0.2385901491188432, "eta": "203:21:50", "grad_norm": 0.0085, "loss": 0.0405, "lr": "4.978e-05", "step": 1584, "steps": "23.16s,1584/33195" }, { "epoch": 0.23874077421298387, "eta": "202:55:07", "grad_norm": 0.0071, "loss": 0.0627, "lr": "4.978e-05", "step": 1585, "steps": "23.11s,1585/33195" }, { "epoch": 0.23889139930712458, "eta": "202:54:43", "grad_norm": 0.0071, "loss": 0.0302, "lr": "4.978e-05", "step": 1586, "steps": "23.11s,1586/33195" }, { "epoch": 0.23904202440126526, "eta": "203:31:13", "grad_norm": 0.0073, "loss": 0.0786, "lr": "4.978e-05", "step": 1587, "steps": "23.18s,1587/33195" }, { "epoch": 0.23919264949540595, "eta": "202:43:25", "grad_norm": 0.0084, "loss": 0.0553, "lr": "4.978e-05", "step": 1588, "steps": "23.09s,1588/33195" }, { "epoch": 0.23934327458954663, "eta": "205:52:40", "grad_norm": 0.0068, "loss": 0.0338, "lr": "4.978e-05", "step": 1589, "steps": "23.45s,1589/33195" }, { "epoch": 0.2394938996836873, "eta": "204:01:40", "grad_norm": 0.0083, "loss": 0.0589, "lr": "4.978e-05", "step": 1590, "steps": "23.24s,1590/33195" }, { "epoch": 0.239644524777828, "eta": "202:58:04", "grad_norm": 0.0088, "loss": 0.0613, "lr": "4.978e-05", "step": 1591, "steps": "23.12s,1591/33195" }, { "epoch": 0.23979514987196868, "eta": "200:09:08", "grad_norm": 0.0084, "loss": 0.0689, "lr": "4.978e-05", "step": 1592, "steps": "22.8s,1592/33195" }, { "epoch": 0.23994577496610936, "eta": "209:58:39", "grad_norm": 0.0085, "loss": 0.0444, "lr": "4.978e-05", "step": 1593, "steps": "23.92s,1593/33195" }, { "epoch": 0.24009640006025004, "eta": "203:54:51", "grad_norm": 0.007, "loss": 0.0532, "lr": "4.978e-05", "step": 1594, "steps": "23.23s,1594/33195" }, { "epoch": 0.24024702515439073, "eta": "203:12:20", "grad_norm": 0.0068, "loss": 0.0484, "lr": "4.978e-05", "step": 1595, "steps": "23.15s,1595/33195" }, { "epoch": 0.2403976502485314, "eta": "202:19:16", "grad_norm": 0.0078, "loss": 0.0637, "lr": "4.978e-05", "step": 1596, "steps": "23.05s,1596/33195" }, { "epoch": 0.2405482753426721, "eta": "202:45:13", "grad_norm": 0.0078, "loss": 0.0627, "lr": "4.978e-05", "step": 1597, "steps": "23.1s,1597/33195" }, { "epoch": 0.24069890043681277, "eta": "204:19:38", "grad_norm": 0.0061, "loss": 0.037, "lr": "4.978e-05", "step": 1598, "steps": "23.28s,1598/33195" }, { "epoch": 0.24084952553095346, "eta": "202:49:43", "grad_norm": 0.0099, "loss": 0.0616, "lr": "4.978e-05", "step": 1599, "steps": "23.11s,1599/33195" }, { "epoch": 0.24100015062509414, "eta": "202:49:20", "grad_norm": 0.02, "loss": 0.0412, "lr": "4.978e-05", "step": 1600, "steps": "23.11s,1600/33195" }, { "epoch": 0.24115077571923482, "eta": "411:09:38", "grad_norm": 0.0089, "loss": 0.06, "lr": "4.978e-05", "step": 1601, "steps": "46.85s,1601/33195" }, { "epoch": 0.2413014008133755, "eta": "202:53:50", "grad_norm": 0.0084, "loss": 0.0608, "lr": "4.978e-05", "step": 1602, "steps": "23.12s,1602/33195" }, { "epoch": 0.2414520259075162, "eta": "205:41:56", "grad_norm": 0.0067, "loss": 0.0443, "lr": "4.978e-05", "step": 1603, "steps": "23.44s,1603/33195" }, { "epoch": 0.24160265100165687, "eta": "205:52:04", "grad_norm": 0.0065, "loss": 0.0559, "lr": "4.978e-05", "step": 1604, "steps": "23.46s,1604/33195" }, { "epoch": 0.24175327609579755, "eta": "202:36:53", "grad_norm": 0.0092, "loss": 0.0457, "lr": "4.978e-05", "step": 1605, "steps": "23.09s,1605/33195" }, { "epoch": 0.24190390118993824, "eta": "201:28:03", "grad_norm": 0.0069, "loss": 0.0556, "lr": "4.978e-05", "step": 1606, "steps": "22.96s,1606/33195" }, { "epoch": 0.24205452628407892, "eta": "205:45:38", "grad_norm": 0.0065, "loss": 0.0639, "lr": "4.978e-05", "step": 1607, "steps": "23.45s,1607/33195" }, { "epoch": 0.2422051513782196, "eta": "201:43:05", "grad_norm": 0.0062, "loss": 0.0393, "lr": "4.978e-05", "step": 1608, "steps": "22.99s,1608/33195" }, { "epoch": 0.24235577647236028, "eta": "203:01:40", "grad_norm": 0.007, "loss": 0.0554, "lr": "4.978e-05", "step": 1609, "steps": "23.14s,1609/33195" }, { "epoch": 0.24250640156650097, "eta": "202:24:25", "grad_norm": 0.0079, "loss": 0.0632, "lr": "4.978e-05", "step": 1610, "steps": "23.07s,1610/33195" }, { "epoch": 0.24265702666064165, "eta": "206:05:08", "grad_norm": 0.0079, "loss": 0.05, "lr": "4.978e-05", "step": 1611, "steps": "23.49s,1611/33195" }, { "epoch": 0.24280765175478233, "eta": "202:34:11", "grad_norm": 0.0104, "loss": 0.0444, "lr": "4.978e-05", "step": 1612, "steps": "23.09s,1612/33195" }, { "epoch": 0.24295827684892304, "eta": "205:38:02", "grad_norm": 0.0072, "loss": 0.0339, "lr": "4.978e-05", "step": 1613, "steps": "23.44s,1613/33195" }, { "epoch": 0.24310890194306373, "eta": "205:58:41", "grad_norm": 0.0066, "loss": 0.0565, "lr": "4.978e-05", "step": 1614, "steps": "23.48s,1614/33195" }, { "epoch": 0.2432595270372044, "eta": "205:00:24", "grad_norm": 0.0159, "loss": 0.0661, "lr": "4.977e-05", "step": 1615, "steps": "23.37s,1615/33195" }, { "epoch": 0.2434101521313451, "eta": "205:57:54", "grad_norm": 0.0095, "loss": 0.0578, "lr": "4.977e-05", "step": 1616, "steps": "23.48s,1616/33195" }, { "epoch": 0.24356077722548578, "eta": "206:18:34", "grad_norm": 0.0092, "loss": 0.0522, "lr": "4.977e-05", "step": 1617, "steps": "23.52s,1617/33195" }, { "epoch": 0.24371140231962646, "eta": "204:43:27", "grad_norm": 0.0061, "loss": 0.0295, "lr": "4.977e-05", "step": 1618, "steps": "23.34s,1618/33195" }, { "epoch": 0.24386202741376714, "eta": "203:34:38", "grad_norm": 0.0081, "loss": 0.0255, "lr": "4.977e-05", "step": 1619, "steps": "23.21s,1619/33195" }, { "epoch": 0.24401265250790782, "eta": "202:15:19", "grad_norm": 0.0076, "loss": 0.0341, "lr": "4.977e-05", "step": 1620, "steps": "23.06s,1620/33195" }, { "epoch": 0.2441632776020485, "eta": "205:34:54", "grad_norm": 0.0069, "loss": 0.0439, "lr": "4.977e-05", "step": 1621, "steps": "23.44s,1621/33195" }, { "epoch": 0.2443139026961892, "eta": "205:13:28", "grad_norm": 0.0073, "loss": 0.0548, "lr": "4.977e-05", "step": 1622, "steps": "23.4s,1622/33195" }, { "epoch": 0.24446452779032987, "eta": "203:22:34", "grad_norm": 0.0069, "loss": 0.0393, "lr": "4.977e-05", "step": 1623, "steps": "23.19s,1623/33195" }, { "epoch": 0.24461515288447055, "eta": "202:08:31", "grad_norm": 0.0076, "loss": 0.0718, "lr": "4.977e-05", "step": 1624, "steps": "23.05s,1624/33195" }, { "epoch": 0.24476577797861124, "eta": "203:48:06", "grad_norm": 0.0063, "loss": 0.0366, "lr": "4.977e-05", "step": 1625, "steps": "23.24s,1625/33195" }, { "epoch": 0.24491640307275192, "eta": "205:43:28", "grad_norm": 0.0092, "loss": 0.0963, "lr": "4.977e-05", "step": 1626, "steps": "23.46s,1626/33195" }, { "epoch": 0.2450670281668926, "eta": "202:33:40", "grad_norm": 0.0061, "loss": 0.0625, "lr": "4.977e-05", "step": 1627, "steps": "23.1s,1627/33195" }, { "epoch": 0.24521765326103329, "eta": "202:38:33", "grad_norm": 0.0091, "loss": 0.0612, "lr": "4.977e-05", "step": 1628, "steps": "23.11s,1628/33195" }, { "epoch": 0.24536827835517397, "eta": "202:27:38", "grad_norm": 0.0068, "loss": 0.046, "lr": "4.977e-05", "step": 1629, "steps": "23.09s,1629/33195" }, { "epoch": 0.24551890344931465, "eta": "206:02:57", "grad_norm": 0.0071, "loss": 0.0387, "lr": "4.977e-05", "step": 1630, "steps": "23.5s,1630/33195" }, { "epoch": 0.24566952854345533, "eta": "203:45:47", "grad_norm": 0.0079, "loss": 0.0589, "lr": "4.977e-05", "step": 1631, "steps": "23.24s,1631/33195" }, { "epoch": 0.24582015363759602, "eta": "205:25:21", "grad_norm": 0.0069, "loss": 0.041, "lr": "4.977e-05", "step": 1632, "steps": "23.43s,1632/33195" }, { "epoch": 0.2459707787317367, "eta": "202:47:09", "grad_norm": 0.0067, "loss": 0.0551, "lr": "4.977e-05", "step": 1633, "steps": "23.13s,1633/33195" }, { "epoch": 0.24612140382587738, "eta": "201:59:25", "grad_norm": 0.0083, "loss": 0.0243, "lr": "4.977e-05", "step": 1634, "steps": "23.04s,1634/33195" }, { "epoch": 0.24627202892001807, "eta": "205:29:26", "grad_norm": 0.0065, "loss": 0.0554, "lr": "4.977e-05", "step": 1635, "steps": "23.44s,1635/33195" }, { "epoch": 0.24642265401415875, "eta": "202:14:26", "grad_norm": 0.0058, "loss": 0.0366, "lr": "4.977e-05", "step": 1636, "steps": "23.07s,1636/33195" }, { "epoch": 0.24657327910829943, "eta": "202:24:34", "grad_norm": 0.0098, "loss": 0.0501, "lr": "4.977e-05", "step": 1637, "steps": "23.09s,1637/33195" }, { "epoch": 0.24672390420244011, "eta": "203:16:46", "grad_norm": 0.0075, "loss": 0.056, "lr": "4.977e-05", "step": 1638, "steps": "23.19s,1638/33195" }, { "epoch": 0.2468745292965808, "eta": "203:42:41", "grad_norm": 0.0077, "loss": 0.0458, "lr": "4.977e-05", "step": 1639, "steps": "23.24s,1639/33195" }, { "epoch": 0.2470251543907215, "eta": "203:00:13", "grad_norm": 0.0076, "loss": 0.0548, "lr": "4.977e-05", "step": 1640, "steps": "23.16s,1640/33195" }, { "epoch": 0.2471757794848622, "eta": "202:38:48", "grad_norm": 0.0092, "loss": 0.0316, "lr": "4.977e-05", "step": 1641, "steps": "23.12s,1641/33195" }, { "epoch": 0.24732640457900287, "eta": "202:06:52", "grad_norm": 0.0081, "loss": 0.0444, "lr": "4.977e-05", "step": 1642, "steps": "23.06s,1642/33195" }, { "epoch": 0.24747702967314356, "eta": "202:48:33", "grad_norm": 0.0078, "loss": 0.0409, "lr": "4.977e-05", "step": 1643, "steps": "23.14s,1643/33195" }, { "epoch": 0.24762765476728424, "eta": "203:19:43", "grad_norm": 0.0075, "loss": 0.0586, "lr": "4.977e-05", "step": 1644, "steps": "23.2s,1644/33195" }, { "epoch": 0.24777827986142492, "eta": "202:53:02", "grad_norm": 0.007, "loss": 0.057, "lr": "4.977e-05", "step": 1645, "steps": "23.15s,1645/33195" }, { "epoch": 0.2479289049555656, "eta": "205:46:10", "grad_norm": 0.009, "loss": 0.065, "lr": "4.976e-05", "step": 1646, "steps": "23.48s,1646/33195" }, { "epoch": 0.2480795300497063, "eta": "206:12:04", "grad_norm": 0.0095, "loss": 0.0366, "lr": "4.976e-05", "step": 1647, "steps": "23.53s,1647/33195" }, { "epoch": 0.24823015514384697, "eta": "202:57:08", "grad_norm": 0.0063, "loss": 0.0466, "lr": "4.976e-05", "step": 1648, "steps": "23.16s,1648/33195" }, { "epoch": 0.24838078023798765, "eta": "200:24:17", "grad_norm": 0.0071, "loss": 0.0406, "lr": "4.976e-05", "step": 1649, "steps": "22.87s,1649/33195" }, { "epoch": 0.24853140533212834, "eta": "203:12:08", "grad_norm": 0.0071, "loss": 0.0634, "lr": "4.976e-05", "step": 1650, "steps": "23.19s,1650/33195" }, { "epoch": 0.24868203042626902, "eta": "201:58:09", "grad_norm": 0.0067, "loss": 0.056, "lr": "4.976e-05", "step": 1651, "steps": "23.05s,1651/33195" }, { "epoch": 0.2488326555204097, "eta": "203:11:22", "grad_norm": 0.0101, "loss": 0.0403, "lr": "4.976e-05", "step": 1652, "steps": "23.19s,1652/33195" }, { "epoch": 0.24898328061455038, "eta": "203:32:00", "grad_norm": 0.0065, "loss": 0.0463, "lr": "4.976e-05", "step": 1653, "steps": "23.23s,1653/33195" }, { "epoch": 0.24913390570869107, "eta": "205:58:48", "grad_norm": 0.0071, "loss": 0.0398, "lr": "4.976e-05", "step": 1654, "steps": "23.51s,1654/33195" }, { "epoch": 0.24928453080283175, "eta": "203:10:12", "grad_norm": 0.0069, "loss": 0.0403, "lr": "4.976e-05", "step": 1655, "steps": "23.19s,1655/33195" }, { "epoch": 0.24943515589697243, "eta": "206:40:05", "grad_norm": 0.0092, "loss": 0.0568, "lr": "4.976e-05", "step": 1656, "steps": "23.59s,1656/33195" }, { "epoch": 0.24958578099111312, "eta": "206:02:53", "grad_norm": 0.0101, "loss": 0.0662, "lr": "4.976e-05", "step": 1657, "steps": "23.52s,1657/33195" }, { "epoch": 0.2497364060852538, "eta": "206:07:45", "grad_norm": 0.0085, "loss": 0.0377, "lr": "4.976e-05", "step": 1658, "steps": "23.53s,1658/33195" }, { "epoch": 0.24988703117939448, "eta": "203:29:41", "grad_norm": 0.0082, "loss": 0.0572, "lr": "4.976e-05", "step": 1659, "steps": "23.23s,1659/33195" }, { "epoch": 0.2500376562735352, "eta": "205:51:12", "grad_norm": 0.0069, "loss": 0.0506, "lr": "4.976e-05", "step": 1660, "steps": "23.5s,1660/33195" }, { "epoch": 0.25018828136767585, "eta": "204:42:29", "grad_norm": 0.0082, "loss": 0.0682, "lr": "4.976e-05", "step": 1661, "steps": "23.37s,1661/33195" }, { "epoch": 0.25033890646181656, "eta": "201:17:08", "grad_norm": 0.0092, "loss": 0.0539, "lr": "4.976e-05", "step": 1662, "steps": "22.98s,1662/33195" }, { "epoch": 0.2504895315559572, "eta": "205:55:17", "grad_norm": 0.0061, "loss": 0.0628, "lr": "4.976e-05", "step": 1663, "steps": "23.51s,1663/33195" }, { "epoch": 0.2506401566500979, "eta": "202:24:41", "grad_norm": 0.0079, "loss": 0.0649, "lr": "4.976e-05", "step": 1664, "steps": "23.11s,1664/33195" }, { "epoch": 0.2507907817442386, "eta": "203:01:05", "grad_norm": 0.0078, "loss": 0.052, "lr": "4.976e-05", "step": 1665, "steps": "23.18s,1665/33195" }, { "epoch": 0.2509414068383793, "eta": "209:24:18", "grad_norm": 0.0076, "loss": 0.0487, "lr": "4.976e-05", "step": 1666, "steps": "23.91s,1666/33195" }, { "epoch": 0.25109203193251994, "eta": "206:09:29", "grad_norm": 0.0076, "loss": 0.0331, "lr": "4.976e-05", "step": 1667, "steps": "23.54s,1667/33195" }, { "epoch": 0.25124265702666065, "eta": "204:39:45", "grad_norm": 0.0106, "loss": 0.0459, "lr": "4.976e-05", "step": 1668, "steps": "23.37s,1668/33195" }, { "epoch": 0.2513932821208013, "eta": "205:31:55", "grad_norm": 0.0074, "loss": 0.07, "lr": "4.976e-05", "step": 1669, "steps": "23.47s,1669/33195" }, { "epoch": 0.251543907214942, "eta": "200:53:03", "grad_norm": 0.0086, "loss": 0.046, "lr": "4.976e-05", "step": 1670, "steps": "22.94s,1670/33195" }, { "epoch": 0.2516945323090827, "eta": "202:58:46", "grad_norm": 0.0181, "loss": 0.0442, "lr": "4.976e-05", "step": 1671, "steps": "23.18s,1671/33195" }, { "epoch": 0.2518451574032234, "eta": "204:32:57", "grad_norm": 0.0096, "loss": 0.064, "lr": "4.976e-05", "step": 1672, "steps": "23.36s,1672/33195" }, { "epoch": 0.25199578249736404, "eta": "202:15:58", "grad_norm": 0.0087, "loss": 0.026, "lr": "4.976e-05", "step": 1673, "steps": "23.1s,1673/33195" }, { "epoch": 0.25214640759150475, "eta": "205:08:57", "grad_norm": 0.0086, "loss": 0.0415, "lr": "4.976e-05", "step": 1674, "steps": "23.43s,1674/33195" }, { "epoch": 0.2522970326856454, "eta": "204:26:32", "grad_norm": 0.0085, "loss": 0.0598, "lr": "4.976e-05", "step": 1675, "steps": "23.35s,1675/33195" }, { "epoch": 0.2524476577797861, "eta": "203:07:20", "grad_norm": 0.0072, "loss": 0.0393, "lr": "4.976e-05", "step": 1676, "steps": "23.2s,1676/33195" }, { "epoch": 0.25259828287392677, "eta": "204:52:01", "grad_norm": 0.01, "loss": 0.05, "lr": "4.975e-05", "step": 1677, "steps": "23.4s,1677/33195" }, { "epoch": 0.2527489079680675, "eta": "203:11:49", "grad_norm": 0.0098, "loss": 0.0558, "lr": "4.975e-05", "step": 1678, "steps": "23.21s,1678/33195" }, { "epoch": 0.25289953306220814, "eta": "203:27:11", "grad_norm": 0.0079, "loss": 0.0774, "lr": "4.975e-05", "step": 1679, "steps": "23.24s,1679/33195" }, { "epoch": 0.25305015815634885, "eta": "205:17:06", "grad_norm": 0.0085, "loss": 0.0481, "lr": "4.975e-05", "step": 1680, "steps": "23.45s,1680/33195" }, { "epoch": 0.25320078325048956, "eta": "202:54:54", "grad_norm": 0.0068, "loss": 0.0523, "lr": "4.975e-05", "step": 1681, "steps": "23.18s,1681/33195" }, { "epoch": 0.2533514083446302, "eta": "203:31:17", "grad_norm": 0.0084, "loss": 0.0698, "lr": "4.975e-05", "step": 1682, "steps": "23.25s,1682/33195" }, { "epoch": 0.2535020334387709, "eta": "202:33:07", "grad_norm": 0.0079, "loss": 0.0355, "lr": "4.975e-05", "step": 1683, "steps": "23.14s,1683/33195" }, { "epoch": 0.2536526585329116, "eta": "204:49:17", "grad_norm": 0.007, "loss": 0.0569, "lr": "4.975e-05", "step": 1684, "steps": "23.4s,1684/33195" }, { "epoch": 0.2538032836270523, "eta": "201:50:20", "grad_norm": 0.0079, "loss": 0.049, "lr": "4.975e-05", "step": 1685, "steps": "23.06s,1685/33195" }, { "epoch": 0.25395390872119294, "eta": "202:26:43", "grad_norm": 0.0064, "loss": 0.0665, "lr": "4.975e-05", "step": 1686, "steps": "23.13s,1686/33195" }, { "epoch": 0.25410453381533366, "eta": "204:16:36", "grad_norm": 0.0064, "loss": 0.0556, "lr": "4.975e-05", "step": 1687, "steps": "23.34s,1687/33195" }, { "epoch": 0.2542551589094743, "eta": "201:59:41", "grad_norm": 0.0077, "loss": 0.0494, "lr": "4.975e-05", "step": 1688, "steps": "23.08s,1688/33195" }, { "epoch": 0.254405784003615, "eta": "204:52:35", "grad_norm": 0.0069, "loss": 0.0519, "lr": "4.975e-05", "step": 1689, "steps": "23.41s,1689/33195" }, { "epoch": 0.2545564090977557, "eta": "205:44:42", "grad_norm": 0.006, "loss": 0.0496, "lr": "4.975e-05", "step": 1690, "steps": "23.51s,1690/33195" }, { "epoch": 0.2547070341918964, "eta": "203:12:02", "grad_norm": 0.0072, "loss": 0.0642, "lr": "4.975e-05", "step": 1691, "steps": "23.22s,1691/33195" }, { "epoch": 0.25485765928603704, "eta": "202:50:39", "grad_norm": 0.0084, "loss": 0.0458, "lr": "4.975e-05", "step": 1692, "steps": "23.18s,1692/33195" }, { "epoch": 0.25500828438017775, "eta": "204:24:46", "grad_norm": 0.0056, "loss": 0.0486, "lr": "4.975e-05", "step": 1693, "steps": "23.36s,1693/33195" }, { "epoch": 0.2551589094743184, "eta": "205:22:08", "grad_norm": 0.0072, "loss": 0.0689, "lr": "4.975e-05", "step": 1694, "steps": "23.47s,1694/33195" }, { "epoch": 0.2553095345684591, "eta": "203:10:30", "grad_norm": 0.007, "loss": 0.0555, "lr": "4.975e-05", "step": 1695, "steps": "23.22s,1695/33195" }, { "epoch": 0.2554601596625998, "eta": "207:06:21", "grad_norm": 0.006, "loss": 0.0622, "lr": "4.975e-05", "step": 1696, "steps": "23.67s,1696/33195" }, { "epoch": 0.2556107847567405, "eta": "202:22:28", "grad_norm": 0.0099, "loss": 0.059, "lr": "4.975e-05", "step": 1697, "steps": "23.13s,1697/33195" }, { "epoch": 0.25576140985088114, "eta": "205:31:04", "grad_norm": 0.0073, "loss": 0.0727, "lr": "4.975e-05", "step": 1698, "steps": "23.49s,1698/33195" }, { "epoch": 0.25591203494502185, "eta": "202:53:12", "grad_norm": 0.0073, "loss": 0.0666, "lr": "4.975e-05", "step": 1699, "steps": "23.19s,1699/33195" }, { "epoch": 0.2560626600391625, "eta": "201:49:49", "grad_norm": 0.0085, "loss": 0.0375, "lr": "4.975e-05", "step": 1700, "steps": "23.07s,1700/33195" }, { "epoch": 0.2562132851333032, "eta": "202:47:10", "grad_norm": 0.0092, "loss": 0.0378, "lr": "4.975e-05", "step": 1701, "steps": "23.18s,1701/33195" }, { "epoch": 0.25636391022744387, "eta": "201:43:48", "grad_norm": 0.0081, "loss": 0.0576, "lr": "4.975e-05", "step": 1702, "steps": "23.06s,1702/33195" }, { "epoch": 0.2565145353215846, "eta": "205:39:36", "grad_norm": 0.0072, "loss": 0.0517, "lr": "4.975e-05", "step": 1703, "steps": "23.51s,1703/33195" }, { "epoch": 0.25666516041572524, "eta": "202:40:46", "grad_norm": 0.0062, "loss": 0.046, "lr": "4.975e-05", "step": 1704, "steps": "23.17s,1704/33195" }, { "epoch": 0.25681578550986595, "eta": "201:26:54", "grad_norm": 0.0067, "loss": 0.0586, "lr": "4.975e-05", "step": 1705, "steps": "23.03s,1705/33195" }, { "epoch": 0.2569664106040066, "eta": "204:45:57", "grad_norm": 0.0051, "loss": 0.0548, "lr": "4.975e-05", "step": 1706, "steps": "23.41s,1706/33195" }, { "epoch": 0.2571170356981473, "eta": "202:02:52", "grad_norm": 0.0072, "loss": 0.0405, "lr": "4.974e-05", "step": 1707, "steps": "23.1s,1707/33195" }, { "epoch": 0.257267660792288, "eta": "203:26:27", "grad_norm": 0.0093, "loss": 0.0502, "lr": "4.974e-05", "step": 1708, "steps": "23.26s,1708/33195" }, { "epoch": 0.2574182858864287, "eta": "205:37:15", "grad_norm": 0.0076, "loss": 0.0698, "lr": "4.974e-05", "step": 1709, "steps": "23.51s,1709/33195" }, { "epoch": 0.2575689109805694, "eta": "199:40:02", "grad_norm": 0.0067, "loss": 0.0458, "lr": "4.974e-05", "step": 1710, "steps": "22.83s,1710/33195" }, { "epoch": 0.25771953607471004, "eta": "204:54:30", "grad_norm": 0.009, "loss": 0.0442, "lr": "4.974e-05", "step": 1711, "steps": "23.43s,1711/33195" }, { "epoch": 0.25787016116885075, "eta": "202:32:26", "grad_norm": 0.0066, "loss": 0.0391, "lr": "4.974e-05", "step": 1712, "steps": "23.16s,1712/33195" }, { "epoch": 0.2580207862629914, "eta": "202:26:48", "grad_norm": 0.0093, "loss": 0.0462, "lr": "4.974e-05", "step": 1713, "steps": "23.15s,1713/33195" }, { "epoch": 0.2581714113571321, "eta": "202:31:39", "grad_norm": 0.007, "loss": 0.0372, "lr": "4.974e-05", "step": 1714, "steps": "23.16s,1714/33195" }, { "epoch": 0.2583220364512728, "eta": "202:41:46", "grad_norm": 0.0082, "loss": 0.063, "lr": "4.974e-05", "step": 1715, "steps": "23.18s,1715/33195" }, { "epoch": 0.2584726615454135, "eta": "204:05:19", "grad_norm": 0.007, "loss": 0.0354, "lr": "4.974e-05", "step": 1716, "steps": "23.34s,1716/33195" }, { "epoch": 0.25862328663955414, "eta": "202:09:31", "grad_norm": 0.008, "loss": 0.05, "lr": "4.974e-05", "step": 1717, "steps": "23.12s,1717/33195" }, { "epoch": 0.25877391173369485, "eta": "202:19:37", "grad_norm": 0.0074, "loss": 0.0689, "lr": "4.974e-05", "step": 1718, "steps": "23.14s,1718/33195" }, { "epoch": 0.2589245368278355, "eta": "201:32:01", "grad_norm": 0.0068, "loss": 0.0527, "lr": "4.974e-05", "step": 1719, "steps": "23.05s,1719/33195" }, { "epoch": 0.2590751619219762, "eta": "202:24:06", "grad_norm": 0.0066, "loss": 0.0466, "lr": "4.974e-05", "step": 1720, "steps": "23.15s,1720/33195" }, { "epoch": 0.25922578701611687, "eta": "205:37:48", "grad_norm": 0.009, "loss": 0.0529, "lr": "4.974e-05", "step": 1721, "steps": "23.52s,1721/33195" }, { "epoch": 0.2593764121102576, "eta": "202:28:34", "grad_norm": 0.0078, "loss": 0.0248, "lr": "4.974e-05", "step": 1722, "steps": "23.16s,1722/33195" }, { "epoch": 0.25952703720439824, "eta": "202:59:39", "grad_norm": 0.0058, "loss": 0.0348, "lr": "4.974e-05", "step": 1723, "steps": "23.22s,1723/33195" }, { "epoch": 0.25967766229853895, "eta": "204:44:10", "grad_norm": 0.0063, "loss": 0.0494, "lr": "4.974e-05", "step": 1724, "steps": "23.42s,1724/33195" }, { "epoch": 0.2598282873926796, "eta": "201:50:42", "grad_norm": 0.007, "loss": 0.0776, "lr": "4.974e-05", "step": 1725, "steps": "23.09s,1725/33195" }, { "epoch": 0.2599789124868203, "eta": "205:14:52", "grad_norm": 0.0111, "loss": 0.0548, "lr": "4.974e-05", "step": 1726, "steps": "23.48s,1726/33195" }, { "epoch": 0.26012953758096097, "eta": "203:08:36", "grad_norm": 0.0118, "loss": 0.0379, "lr": "4.974e-05", "step": 1727, "steps": "23.24s,1727/33195" }, { "epoch": 0.2602801626751017, "eta": "202:57:43", "grad_norm": 0.0082, "loss": 0.0355, "lr": "4.974e-05", "step": 1728, "steps": "23.22s,1728/33195" }, { "epoch": 0.26043078776924233, "eta": "204:52:43", "grad_norm": 0.0079, "loss": 0.0405, "lr": "4.974e-05", "step": 1729, "steps": "23.44s,1729/33195" }, { "epoch": 0.26058141286338304, "eta": "203:12:41", "grad_norm": 0.0183, "loss": 0.076, "lr": "4.974e-05", "step": 1730, "steps": "23.25s,1730/33195" }, { "epoch": 0.2607320379575237, "eta": "202:04:07", "grad_norm": 0.0082, "loss": 0.0562, "lr": "4.974e-05", "step": 1731, "steps": "23.12s,1731/33195" }, { "epoch": 0.2608826630516644, "eta": "203:22:24", "grad_norm": 0.0082, "loss": 0.0346, "lr": "4.974e-05", "step": 1732, "steps": "23.27s,1732/33195" }, { "epoch": 0.26103328814580506, "eta": "258:46:29", "grad_norm": 0.0113, "loss": 0.0366, "lr": "4.974e-05", "step": 1733, "steps": "29.61s,1733/33195" }, { "epoch": 0.2611839132399458, "eta": "202:44:55", "grad_norm": 0.0103, "loss": 0.067, "lr": "4.974e-05", "step": 1734, "steps": "23.2s,1734/33195" }, { "epoch": 0.2613345383340865, "eta": "208:09:37", "grad_norm": 0.0081, "loss": 0.0478, "lr": "4.974e-05", "step": 1735, "steps": "23.82s,1735/33195" }, { "epoch": 0.26148516342822714, "eta": "202:54:37", "grad_norm": 0.0069, "loss": 0.0443, "lr": "4.973e-05", "step": 1736, "steps": "23.22s,1736/33195" }, { "epoch": 0.26163578852236785, "eta": "202:38:31", "grad_norm": 0.0069, "loss": 0.0561, "lr": "4.973e-05", "step": 1737, "steps": "23.19s,1737/33195" }, { "epoch": 0.2617864136165085, "eta": "202:22:24", "grad_norm": 0.0105, "loss": 0.0562, "lr": "4.973e-05", "step": 1738, "steps": "23.16s,1738/33195" }, { "epoch": 0.2619370387106492, "eta": "201:08:37", "grad_norm": 0.0067, "loss": 0.0633, "lr": "4.973e-05", "step": 1739, "steps": "23.02s,1739/33195" }, { "epoch": 0.26208766380478987, "eta": "206:07:03", "grad_norm": 0.0095, "loss": 0.0709, "lr": "4.973e-05", "step": 1740, "steps": "23.59s,1740/33195" }, { "epoch": 0.2622382888989306, "eta": "202:42:12", "grad_norm": 0.009, "loss": 0.0449, "lr": "4.973e-05", "step": 1741, "steps": "23.2s,1741/33195" }, { "epoch": 0.26238891399307124, "eta": "201:54:38", "grad_norm": 0.0085, "loss": 0.0549, "lr": "4.973e-05", "step": 1742, "steps": "23.11s,1742/33195" }, { "epoch": 0.26253953908721195, "eta": "203:23:22", "grad_norm": 0.0071, "loss": 0.0449, "lr": "4.973e-05", "step": 1743, "steps": "23.28s,1743/33195" }, { "epoch": 0.2626901641813526, "eta": "203:38:42", "grad_norm": 0.007, "loss": 0.0388, "lr": "4.973e-05", "step": 1744, "steps": "23.31s,1744/33195" }, { "epoch": 0.2628407892754933, "eta": "202:51:09", "grad_norm": 0.0071, "loss": 0.0695, "lr": "4.973e-05", "step": 1745, "steps": "23.22s,1745/33195" }, { "epoch": 0.26299141436963397, "eta": "202:14:04", "grad_norm": 0.0074, "loss": 0.0472, "lr": "4.973e-05", "step": 1746, "steps": "23.15s,1746/33195" }, { "epoch": 0.2631420394637747, "eta": "205:38:05", "grad_norm": 0.0092, "loss": 0.0441, "lr": "4.973e-05", "step": 1747, "steps": "23.54s,1747/33195" }, { "epoch": 0.26329266455791533, "eta": "205:53:25", "grad_norm": 0.0062, "loss": 0.0344, "lr": "4.973e-05", "step": 1748, "steps": "23.57s,1748/33195" }, { "epoch": 0.26344328965205605, "eta": "203:10:33", "grad_norm": 0.0063, "loss": 0.0526, "lr": "4.973e-05", "step": 1749, "steps": "23.26s,1749/33195" }, { "epoch": 0.2635939147461967, "eta": "202:17:46", "grad_norm": 0.0074, "loss": 0.0478, "lr": "4.973e-05", "step": 1750, "steps": "23.16s,1750/33195" }, { "epoch": 0.2637445398403374, "eta": "203:04:33", "grad_norm": 0.0077, "loss": 0.0654, "lr": "4.973e-05", "step": 1751, "steps": "23.25s,1751/33195" }, { "epoch": 0.26389516493447807, "eta": "203:19:53", "grad_norm": 0.0068, "loss": 0.0463, "lr": "4.973e-05", "step": 1752, "steps": "23.28s,1752/33195" }, { "epoch": 0.2640457900286188, "eta": "204:32:51", "grad_norm": 0.0087, "loss": 0.0427, "lr": "4.973e-05", "step": 1753, "steps": "23.42s,1753/33195" }, { "epoch": 0.26419641512275943, "eta": "205:56:18", "grad_norm": 0.0075, "loss": 0.036, "lr": "4.973e-05", "step": 1754, "steps": "23.58s,1754/33195" }, { "epoch": 0.26434704021690014, "eta": "207:04:02", "grad_norm": 0.0074, "loss": 0.0633, "lr": "4.973e-05", "step": 1755, "steps": "23.71s,1755/33195" }, { "epoch": 0.2644976653110408, "eta": "202:25:56", "grad_norm": 0.007, "loss": 0.0503, "lr": "4.973e-05", "step": 1756, "steps": "23.18s,1756/33195" }, { "epoch": 0.2646482904051815, "eta": "206:31:48", "grad_norm": 0.008, "loss": 0.048, "lr": "4.973e-05", "step": 1757, "steps": "23.65s,1757/33195" }, { "epoch": 0.26479891549932216, "eta": "204:51:52", "grad_norm": 0.008, "loss": 0.0452, "lr": "4.973e-05", "step": 1758, "steps": "23.46s,1758/33195" }, { "epoch": 0.2649495405934629, "eta": "204:20:02", "grad_norm": 0.0064, "loss": 0.0596, "lr": "4.973e-05", "step": 1759, "steps": "23.4s,1759/33195" }, { "epoch": 0.26510016568760353, "eta": "202:03:25", "grad_norm": 0.0073, "loss": 0.0459, "lr": "4.973e-05", "step": 1760, "steps": "23.14s,1760/33195" }, { "epoch": 0.26525079078174424, "eta": "202:50:11", "grad_norm": 0.0067, "loss": 0.069, "lr": "4.973e-05", "step": 1761, "steps": "23.23s,1761/33195" }, { "epoch": 0.26540141587588495, "eta": "202:55:02", "grad_norm": 0.0071, "loss": 0.0599, "lr": "4.973e-05", "step": 1762, "steps": "23.24s,1762/33195" }, { "epoch": 0.2655520409700256, "eta": "202:17:59", "grad_norm": 0.0088, "loss": 0.0454, "lr": "4.973e-05", "step": 1763, "steps": "23.17s,1763/33195" }, { "epoch": 0.2657026660641663, "eta": "205:15:42", "grad_norm": 0.0092, "loss": 0.0424, "lr": "4.973e-05", "step": 1764, "steps": "23.51s,1764/33195" }, { "epoch": 0.26585329115830697, "eta": "202:01:30", "grad_norm": 0.0079, "loss": 0.0608, "lr": "4.972e-05", "step": 1765, "steps": "23.14s,1765/33195" }, { "epoch": 0.2660039162524477, "eta": "203:24:55", "grad_norm": 0.0054, "loss": 0.0441, "lr": "4.972e-05", "step": 1766, "steps": "23.3s,1766/33195" }, { "epoch": 0.26615454134658834, "eta": "204:27:23", "grad_norm": 0.0079, "loss": 0.0443, "lr": "4.972e-05", "step": 1767, "steps": "23.42s,1767/33195" }, { "epoch": 0.26630516644072905, "eta": "202:05:35", "grad_norm": 0.0089, "loss": 0.0616, "lr": "4.972e-05", "step": 1768, "steps": "23.15s,1768/33195" }, { "epoch": 0.2664557915348697, "eta": "202:41:51", "grad_norm": 0.0079, "loss": 0.0554, "lr": "4.972e-05", "step": 1769, "steps": "23.22s,1769/33195" }, { "epoch": 0.2666064166290104, "eta": "205:02:53", "grad_norm": 0.0076, "loss": 0.0562, "lr": "4.972e-05", "step": 1770, "steps": "23.49s,1770/33195" }, { "epoch": 0.26675704172315107, "eta": "204:31:04", "grad_norm": 0.0068, "loss": 0.0373, "lr": "4.972e-05", "step": 1771, "steps": "23.43s,1771/33195" }, { "epoch": 0.2669076668172918, "eta": "202:19:45", "grad_norm": 0.0065, "loss": 0.0472, "lr": "4.972e-05", "step": 1772, "steps": "23.18s,1772/33195" }, { "epoch": 0.26705829191143243, "eta": "202:56:01", "grad_norm": 0.0076, "loss": 0.0439, "lr": "4.972e-05", "step": 1773, "steps": "23.25s,1773/33195" }, { "epoch": 0.26720891700557314, "eta": "202:55:38", "grad_norm": 0.0087, "loss": 0.0351, "lr": "4.972e-05", "step": 1774, "steps": "23.25s,1774/33195" }, { "epoch": 0.2673595420997138, "eta": "204:19:02", "grad_norm": 0.0106, "loss": 0.064, "lr": "4.972e-05", "step": 1775, "steps": "23.41s,1775/33195" }, { "epoch": 0.2675101671938545, "eta": "202:49:37", "grad_norm": 0.0126, "loss": 0.056, "lr": "4.972e-05", "step": 1776, "steps": "23.24s,1776/33195" }, { "epoch": 0.26766079228799516, "eta": "204:54:54", "grad_norm": 0.0091, "loss": 0.0524, "lr": "4.972e-05", "step": 1777, "steps": "23.48s,1777/33195" }, { "epoch": 0.2678114173821359, "eta": "201:51:15", "grad_norm": 0.0077, "loss": 0.0546, "lr": "4.972e-05", "step": 1778, "steps": "23.13s,1778/33195" }, { "epoch": 0.26796204247627653, "eta": "202:32:45", "grad_norm": 0.0059, "loss": 0.0433, "lr": "4.972e-05", "step": 1779, "steps": "23.21s,1779/33195" }, { "epoch": 0.26811266757041724, "eta": "198:52:27", "grad_norm": 0.0066, "loss": 0.0587, "lr": "4.972e-05", "step": 1780, "steps": "22.79s,1780/33195" }, { "epoch": 0.2682632926645579, "eta": "205:03:49", "grad_norm": 0.0066, "loss": 0.0704, "lr": "4.972e-05", "step": 1781, "steps": "23.5s,1781/33195" }, { "epoch": 0.2684139177586986, "eta": "204:00:35", "grad_norm": 0.0089, "loss": 0.038, "lr": "4.972e-05", "step": 1782, "steps": "23.38s,1782/33195" }, { "epoch": 0.26856454285283926, "eta": "202:46:54", "grad_norm": 0.0086, "loss": 0.0526, "lr": "4.972e-05", "step": 1783, "steps": "23.24s,1783/33195" }, { "epoch": 0.26871516794697997, "eta": "208:32:02", "grad_norm": 0.0084, "loss": 0.0576, "lr": "4.972e-05", "step": 1784, "steps": "23.9s,1784/33195" }, { "epoch": 0.2688657930411206, "eta": "204:46:32", "grad_norm": 0.0064, "loss": 0.0457, "lr": "4.972e-05", "step": 1785, "steps": "23.47s,1785/33195" }, { "epoch": 0.26901641813526134, "eta": "204:46:09", "grad_norm": 0.0056, "loss": 0.0256, "lr": "4.972e-05", "step": 1786, "steps": "23.47s,1786/33195" }, { "epoch": 0.269167043229402, "eta": "199:10:44", "grad_norm": 0.0078, "loss": 0.0529, "lr": "4.972e-05", "step": 1787, "steps": "22.83s,1787/33195" }, { "epoch": 0.2693176683235427, "eta": "202:34:30", "grad_norm": 0.0074, "loss": 0.0475, "lr": "4.972e-05", "step": 1788, "steps": "23.22s,1788/33195" }, { "epoch": 0.2694682934176834, "eta": "206:03:29", "grad_norm": 0.0077, "loss": 0.0532, "lr": "4.972e-05", "step": 1789, "steps": "23.62s,1789/33195" }, { "epoch": 0.26961891851182407, "eta": "201:30:55", "grad_norm": 0.0071, "loss": 0.0472, "lr": "4.972e-05", "step": 1790, "steps": "23.1s,1790/33195" }, { "epoch": 0.2697695436059648, "eta": "202:38:34", "grad_norm": 0.0121, "loss": 0.0549, "lr": "4.972e-05", "step": 1791, "steps": "23.23s,1791/33195" }, { "epoch": 0.26992016870010543, "eta": "205:09:58", "grad_norm": 0.0073, "loss": 0.0532, "lr": "4.972e-05", "step": 1792, "steps": "23.52s,1792/33195" }, { "epoch": 0.27007079379424614, "eta": "205:35:45", "grad_norm": 0.0066, "loss": 0.0671, "lr": "4.971e-05", "step": 1793, "steps": "23.57s,1793/33195" }, { "epoch": 0.2702214188883868, "eta": "205:35:21", "grad_norm": 0.0076, "loss": 0.0643, "lr": "4.971e-05", "step": 1794, "steps": "23.57s,1794/33195" }, { "epoch": 0.2703720439825275, "eta": "202:26:34", "grad_norm": 0.0066, "loss": 0.0572, "lr": "4.971e-05", "step": 1795, "steps": "23.21s,1795/33195" }, { "epoch": 0.27052266907666817, "eta": "203:39:26", "grad_norm": 0.0066, "loss": 0.0552, "lr": "4.971e-05", "step": 1796, "steps": "23.35s,1796/33195" }, { "epoch": 0.2706732941708089, "eta": "203:44:17", "grad_norm": 0.0064, "loss": 0.0472, "lr": "4.971e-05", "step": 1797, "steps": "23.36s,1797/33195" }, { "epoch": 0.27082391926494953, "eta": "205:07:37", "grad_norm": 0.0057, "loss": 0.0639, "lr": "4.971e-05", "step": 1798, "steps": "23.52s,1798/33195" }, { "epoch": 0.27097454435909024, "eta": "202:40:43", "grad_norm": 0.0077, "loss": 0.0414, "lr": "4.971e-05", "step": 1799, "steps": "23.24s,1799/33195" }, { "epoch": 0.2711251694532309, "eta": "203:16:57", "grad_norm": 0.0074, "loss": 0.0637, "lr": "4.971e-05", "step": 1800, "steps": "23.31s,1800/33195" }, { "epoch": 0.2712757945473716, "eta": "392:04:34", "grad_norm": 0.0093, "loss": 0.0695, "lr": "4.971e-05", "step": 1801, "steps": "44.96s,1801/33195" }, { "epoch": 0.27142641964151226, "eta": "201:52:27", "grad_norm": 0.0089, "loss": 0.0521, "lr": "4.971e-05", "step": 1802, "steps": "23.15s,1802/33195" }, { "epoch": 0.271577044735653, "eta": "202:28:42", "grad_norm": 0.0082, "loss": 0.03, "lr": "4.971e-05", "step": 1803, "steps": "23.22s,1803/33195" }, { "epoch": 0.2717276698297936, "eta": "205:20:58", "grad_norm": 0.0079, "loss": 0.033, "lr": "4.971e-05", "step": 1804, "steps": "23.55s,1804/33195" }, { "epoch": 0.27187829492393434, "eta": "202:27:55", "grad_norm": 0.0071, "loss": 0.0388, "lr": "4.971e-05", "step": 1805, "steps": "23.22s,1805/33195" }, { "epoch": 0.272028920018075, "eta": "202:58:55", "grad_norm": 0.0088, "loss": 0.0393, "lr": "4.971e-05", "step": 1806, "steps": "23.28s,1806/33195" }, { "epoch": 0.2721795451122157, "eta": "200:21:36", "grad_norm": 0.008, "loss": 0.0425, "lr": "4.971e-05", "step": 1807, "steps": "22.98s,1807/33195" }, { "epoch": 0.27233017020635636, "eta": "205:29:51", "grad_norm": 0.0098, "loss": 0.0267, "lr": "4.971e-05", "step": 1808, "steps": "23.57s,1808/33195" }, { "epoch": 0.27248079530049707, "eta": "202:10:41", "grad_norm": 0.0072, "loss": 0.0474, "lr": "4.971e-05", "step": 1809, "steps": "23.19s,1809/33195" }, { "epoch": 0.2726314203946377, "eta": "201:44:08", "grad_norm": 0.0089, "loss": 0.0574, "lr": "4.971e-05", "step": 1810, "steps": "23.14s,1810/33195" }, { "epoch": 0.27278204548877844, "eta": "205:23:27", "grad_norm": 0.0081, "loss": 0.0477, "lr": "4.971e-05", "step": 1811, "steps": "23.56s,1811/33195" }, { "epoch": 0.2729326705829191, "eta": "201:43:22", "grad_norm": 0.0099, "loss": 0.0547, "lr": "4.971e-05", "step": 1812, "steps": "23.14s,1812/33195" }, { "epoch": 0.2730832956770598, "eta": "202:09:08", "grad_norm": 0.0081, "loss": 0.0542, "lr": "4.971e-05", "step": 1813, "steps": "23.19s,1813/33195" }, { "epoch": 0.27323392077120046, "eta": "201:58:17", "grad_norm": 0.0092, "loss": 0.0569, "lr": "4.971e-05", "step": 1814, "steps": "23.17s,1814/33195" }, { "epoch": 0.27338454586534117, "eta": "201:47:27", "grad_norm": 0.0094, "loss": 0.0584, "lr": "4.971e-05", "step": 1815, "steps": "23.15s,1815/33195" }, { "epoch": 0.2735351709594819, "eta": "202:44:35", "grad_norm": 0.0088, "loss": 0.0566, "lr": "4.971e-05", "step": 1816, "steps": "23.26s,1816/33195" }, { "epoch": 0.27368579605362253, "eta": "201:36:13", "grad_norm": 0.011, "loss": 0.0767, "lr": "4.971e-05", "step": 1817, "steps": "23.13s,1817/33195" }, { "epoch": 0.27383642114776324, "eta": "205:10:14", "grad_norm": 0.0064, "loss": 0.0564, "lr": "4.971e-05", "step": 1818, "steps": "23.54s,1818/33195" }, { "epoch": 0.2739870462419039, "eta": "202:06:49", "grad_norm": 0.0096, "loss": 0.0539, "lr": "4.971e-05", "step": 1819, "steps": "23.19s,1819/33195" }, { "epoch": 0.2741376713360446, "eta": "205:25:08", "grad_norm": 0.0077, "loss": 0.0603, "lr": "4.971e-05", "step": 1820, "steps": "23.57s,1820/33195" }, { "epoch": 0.27428829643018526, "eta": "203:55:51", "grad_norm": 0.0077, "loss": 0.0642, "lr": "4.970e-05", "step": 1821, "steps": "23.4s,1821/33195" }, { "epoch": 0.274438921524326, "eta": "202:31:48", "grad_norm": 0.0082, "loss": 0.0322, "lr": "4.970e-05", "step": 1822, "steps": "23.24s,1822/33195" }, { "epoch": 0.27458954661846663, "eta": "205:44:52", "grad_norm": 0.0094, "loss": 0.0475, "lr": "4.970e-05", "step": 1823, "steps": "23.61s,1823/33195" }, { "epoch": 0.27474017171260734, "eta": "205:18:20", "grad_norm": 0.0058, "loss": 0.0363, "lr": "4.970e-05", "step": 1824, "steps": "23.56s,1824/33195" }, { "epoch": 0.274890796806748, "eta": "201:43:35", "grad_norm": 0.0078, "loss": 0.064, "lr": "4.970e-05", "step": 1825, "steps": "23.15s,1825/33195" }, { "epoch": 0.2750414219008887, "eta": "201:32:44", "grad_norm": 0.0088, "loss": 0.0336, "lr": "4.970e-05", "step": 1826, "steps": "23.13s,1826/33195" }, { "epoch": 0.27519204699502936, "eta": "200:24:24", "grad_norm": 0.0073, "loss": 0.039, "lr": "4.970e-05", "step": 1827, "steps": "23.0s,1827/33195" }, { "epoch": 0.27534267208917007, "eta": "202:50:23", "grad_norm": 0.0067, "loss": 0.0617, "lr": "4.970e-05", "step": 1828, "steps": "23.28s,1828/33195" }, { "epoch": 0.2754932971833107, "eta": "204:08:25", "grad_norm": 0.0065, "loss": 0.0545, "lr": "4.970e-05", "step": 1829, "steps": "23.43s,1829/33195" }, { "epoch": 0.27564392227745144, "eta": "201:41:39", "grad_norm": 0.0074, "loss": 0.0584, "lr": "4.970e-05", "step": 1830, "steps": "23.15s,1830/33195" }, { "epoch": 0.2757945473715921, "eta": "205:36:30", "grad_norm": 0.0067, "loss": 0.0461, "lr": "4.970e-05", "step": 1831, "steps": "23.6s,1831/33195" }, { "epoch": 0.2759451724657328, "eta": "204:59:31", "grad_norm": 0.01, "loss": 0.0602, "lr": "4.970e-05", "step": 1832, "steps": "23.53s,1832/33195" }, { "epoch": 0.27609579755987346, "eta": "205:14:48", "grad_norm": 0.0066, "loss": 0.0538, "lr": "4.970e-05", "step": 1833, "steps": "23.56s,1833/33195" }, { "epoch": 0.27624642265401417, "eta": "205:14:25", "grad_norm": 0.0069, "loss": 0.041, "lr": "4.970e-05", "step": 1834, "steps": "23.56s,1834/33195" }, { "epoch": 0.2763970477481548, "eta": "202:16:19", "grad_norm": 0.0076, "loss": 0.0562, "lr": "4.970e-05", "step": 1835, "steps": "23.22s,1835/33195" }, { "epoch": 0.27654767284229553, "eta": "205:08:24", "grad_norm": 0.0081, "loss": 0.0749, "lr": "4.970e-05", "step": 1836, "steps": "23.55s,1836/33195" }, { "epoch": 0.2766982979364362, "eta": "201:59:52", "grad_norm": 0.0064, "loss": 0.0429, "lr": "4.970e-05", "step": 1837, "steps": "23.19s,1837/33195" }, { "epoch": 0.2768489230305769, "eta": "205:07:37", "grad_norm": 0.0063, "loss": 0.0615, "lr": "4.970e-05", "step": 1838, "steps": "23.55s,1838/33195" }, { "epoch": 0.27699954812471755, "eta": "204:46:19", "grad_norm": 0.0089, "loss": 0.0354, "lr": "4.970e-05", "step": 1839, "steps": "23.51s,1839/33195" }, { "epoch": 0.27715017321885826, "eta": "205:01:36", "grad_norm": 0.0123, "loss": 0.05, "lr": "4.970e-05", "step": 1840, "steps": "23.54s,1840/33195" }, { "epoch": 0.2773007983129989, "eta": "205:06:26", "grad_norm": 0.0084, "loss": 0.0546, "lr": "4.970e-05", "step": 1841, "steps": "23.55s,1841/33195" }, { "epoch": 0.27745142340713963, "eta": "205:11:16", "grad_norm": 0.0111, "loss": 0.0542, "lr": "4.970e-05", "step": 1842, "steps": "23.56s,1842/33195" }, { "epoch": 0.27760204850128034, "eta": "204:18:37", "grad_norm": 0.0074, "loss": 0.0319, "lr": "4.970e-05", "step": 1843, "steps": "23.46s,1843/33195" }, { "epoch": 0.277752673595421, "eta": "201:51:56", "grad_norm": 0.0075, "loss": 0.0433, "lr": "4.970e-05", "step": 1844, "steps": "23.18s,1844/33195" }, { "epoch": 0.2779032986895617, "eta": "205:31:00", "grad_norm": 0.006, "loss": 0.0483, "lr": "4.970e-05", "step": 1845, "steps": "23.6s,1845/33195" }, { "epoch": 0.27805392378370236, "eta": "203:40:53", "grad_norm": 0.0068, "loss": 0.0587, "lr": "4.970e-05", "step": 1846, "steps": "23.39s,1846/33195" }, { "epoch": 0.27820454887784307, "eta": "205:04:05", "grad_norm": 0.0074, "loss": 0.0508, "lr": "4.970e-05", "step": 1847, "steps": "23.55s,1847/33195" }, { "epoch": 0.2783551739719837, "eta": "205:03:41", "grad_norm": 0.0087, "loss": 0.044, "lr": "4.969e-05", "step": 1848, "steps": "23.55s,1848/33195" }, { "epoch": 0.27850579906612444, "eta": "202:00:27", "grad_norm": 0.007, "loss": 0.0467, "lr": "4.969e-05", "step": 1849, "steps": "23.2s,1849/33195" }, { "epoch": 0.2786564241602651, "eta": "201:23:29", "grad_norm": 0.0087, "loss": 0.0497, "lr": "4.969e-05", "step": 1850, "steps": "23.13s,1850/33195" }, { "epoch": 0.2788070492544058, "eta": "204:36:24", "grad_norm": 0.0067, "loss": 0.0573, "lr": "4.969e-05", "step": 1851, "steps": "23.5s,1851/33195" }, { "epoch": 0.27895767434854646, "eta": "202:09:44", "grad_norm": 0.0073, "loss": 0.0573, "lr": "4.969e-05", "step": 1852, "steps": "23.22s,1852/33195" }, { "epoch": 0.27910829944268717, "eta": "204:19:56", "grad_norm": 0.0076, "loss": 0.0276, "lr": "4.969e-05", "step": 1853, "steps": "23.47s,1853/33195" }, { "epoch": 0.2792589245368278, "eta": "205:17:00", "grad_norm": 0.0083, "loss": 0.0507, "lr": "4.969e-05", "step": 1854, "steps": "23.58s,1854/33195" }, { "epoch": 0.27940954963096853, "eta": "202:24:15", "grad_norm": 0.0087, "loss": 0.0607, "lr": "4.969e-05", "step": 1855, "steps": "23.25s,1855/33195" }, { "epoch": 0.2795601747251092, "eta": "205:00:33", "grad_norm": 0.0101, "loss": 0.0555, "lr": "4.969e-05", "step": 1856, "steps": "23.55s,1856/33195" }, { "epoch": 0.2797107998192499, "eta": "204:54:56", "grad_norm": 0.0061, "loss": 0.0385, "lr": "4.969e-05", "step": 1857, "steps": "23.54s,1857/33195" }, { "epoch": 0.27986142491339056, "eta": "205:04:59", "grad_norm": 0.0061, "loss": 0.0488, "lr": "4.969e-05", "step": 1858, "steps": "23.56s,1858/33195" }, { "epoch": 0.28001205000753127, "eta": "202:17:28", "grad_norm": 0.0074, "loss": 0.0455, "lr": "4.969e-05", "step": 1859, "steps": "23.24s,1859/33195" }, { "epoch": 0.2801626751016719, "eta": "204:43:19", "grad_norm": 0.0063, "loss": 0.0542, "lr": "4.969e-05", "step": 1860, "steps": "23.52s,1860/33195" }, { "epoch": 0.28031330019581263, "eta": "202:16:42", "grad_norm": 0.0069, "loss": 0.0559, "lr": "4.969e-05", "step": 1861, "steps": "23.24s,1861/33195" }, { "epoch": 0.2804639252899533, "eta": "202:05:52", "grad_norm": 0.0073, "loss": 0.0674, "lr": "4.969e-05", "step": 1862, "steps": "23.22s,1862/33195" }, { "epoch": 0.280614550384094, "eta": "203:49:55", "grad_norm": 0.0065, "loss": 0.0535, "lr": "4.969e-05", "step": 1863, "steps": "23.42s,1863/33195" }, { "epoch": 0.28076517547823465, "eta": "202:52:05", "grad_norm": 0.0104, "loss": 0.0551, "lr": "4.969e-05", "step": 1864, "steps": "23.31s,1864/33195" }, { "epoch": 0.28091580057237536, "eta": "200:35:56", "grad_norm": 0.0086, "loss": 0.025, "lr": "4.969e-05", "step": 1865, "steps": "23.05s,1865/33195" }, { "epoch": 0.281066425666516, "eta": "201:01:39", "grad_norm": 0.0092, "loss": 0.0397, "lr": "4.969e-05", "step": 1866, "steps": "23.1s,1866/33195" }, { "epoch": 0.28121705076065673, "eta": "204:14:28", "grad_norm": 0.007, "loss": 0.0599, "lr": "4.969e-05", "step": 1867, "steps": "23.47s,1867/33195" }, { "epoch": 0.2813676758547974, "eta": "202:08:46", "grad_norm": 0.0115, "loss": 0.0608, "lr": "4.969e-05", "step": 1868, "steps": "23.23s,1868/33195" }, { "epoch": 0.2815183009489381, "eta": "198:55:12", "grad_norm": 0.0104, "loss": 0.034, "lr": "4.969e-05", "step": 1869, "steps": "22.86s,1869/33195" }, { "epoch": 0.2816689260430788, "eta": "204:44:37", "grad_norm": 0.0079, "loss": 0.0433, "lr": "4.969e-05", "step": 1870, "steps": "23.53s,1870/33195" }, { "epoch": 0.28181955113721946, "eta": "204:54:40", "grad_norm": 0.0085, "loss": 0.0465, "lr": "4.969e-05", "step": 1871, "steps": "23.55s,1871/33195" }, { "epoch": 0.28197017623136017, "eta": "202:54:12", "grad_norm": 0.0068, "loss": 0.046, "lr": "4.969e-05", "step": 1872, "steps": "23.32s,1872/33195" }, { "epoch": 0.2821208013255008, "eta": "203:51:14", "grad_norm": 0.0065, "loss": 0.0555, "lr": "4.969e-05", "step": 1873, "steps": "23.43s,1873/33195" }, { "epoch": 0.28227142641964154, "eta": "202:27:19", "grad_norm": 0.0069, "loss": 0.0447, "lr": "4.969e-05", "step": 1874, "steps": "23.27s,1874/33195" }, { "epoch": 0.2824220515137822, "eta": "202:00:50", "grad_norm": 0.0067, "loss": 0.0627, "lr": "4.968e-05", "step": 1875, "steps": "23.22s,1875/33195" }, { "epoch": 0.2825726766079229, "eta": "202:10:53", "grad_norm": 0.0078, "loss": 0.0465, "lr": "4.968e-05", "step": 1876, "steps": "23.24s,1876/33195" }, { "epoch": 0.28272330170206356, "eta": "203:39:14", "grad_norm": 0.0097, "loss": 0.0488, "lr": "4.968e-05", "step": 1877, "steps": "23.41s,1877/33195" }, { "epoch": 0.28287392679620427, "eta": "200:36:10", "grad_norm": 0.0072, "loss": 0.0585, "lr": "4.968e-05", "step": 1878, "steps": "23.06s,1878/33195" }, { "epoch": 0.2830245518903449, "eta": "204:20:12", "grad_norm": 0.0066, "loss": 0.0577, "lr": "4.968e-05", "step": 1879, "steps": "23.49s,1879/33195" }, { "epoch": 0.28317517698448563, "eta": "201:48:28", "grad_norm": 0.0063, "loss": 0.0657, "lr": "4.968e-05", "step": 1880, "steps": "23.2s,1880/33195" }, { "epoch": 0.2833258020786263, "eta": "201:37:38", "grad_norm": 0.0063, "loss": 0.0331, "lr": "4.968e-05", "step": 1881, "steps": "23.18s,1881/33195" }, { "epoch": 0.283476427172767, "eta": "203:26:51", "grad_norm": 0.0094, "loss": 0.0531, "lr": "4.968e-05", "step": 1882, "steps": "23.39s,1882/33195" }, { "epoch": 0.28362705226690765, "eta": "200:13:22", "grad_norm": 0.0063, "loss": 0.0519, "lr": "4.968e-05", "step": 1883, "steps": "23.02s,1883/33195" }, { "epoch": 0.28377767736104836, "eta": "203:31:17", "grad_norm": 0.0064, "loss": 0.0441, "lr": "4.968e-05", "step": 1884, "steps": "23.4s,1884/33195" }, { "epoch": 0.283928302455189, "eta": "200:43:54", "grad_norm": 0.0065, "loss": 0.0409, "lr": "4.968e-05", "step": 1885, "steps": "23.08s,1885/33195" }, { "epoch": 0.28407892754932973, "eta": "203:46:09", "grad_norm": 0.0115, "loss": 0.0424, "lr": "4.968e-05", "step": 1886, "steps": "23.43s,1886/33195" }, { "epoch": 0.2842295526434704, "eta": "203:56:12", "grad_norm": 0.0096, "loss": 0.0378, "lr": "4.968e-05", "step": 1887, "steps": "23.45s,1887/33195" }, { "epoch": 0.2843801777376111, "eta": "203:40:09", "grad_norm": 0.0064, "loss": 0.0737, "lr": "4.968e-05", "step": 1888, "steps": "23.42s,1888/33195" }, { "epoch": 0.28453080283175175, "eta": "198:21:29", "grad_norm": 0.009, "loss": 0.05, "lr": "4.968e-05", "step": 1889, "steps": "22.81s,1889/33195" }, { "epoch": 0.28468142792589246, "eta": "203:34:10", "grad_norm": 0.0134, "loss": 0.037, "lr": "4.968e-05", "step": 1890, "steps": "23.41s,1890/33195" }, { "epoch": 0.2848320530200331, "eta": "200:15:31", "grad_norm": 0.0066, "loss": 0.0484, "lr": "4.968e-05", "step": 1891, "steps": "23.03s,1891/33195" }, { "epoch": 0.2849826781141738, "eta": "202:25:33", "grad_norm": 0.0076, "loss": 0.0507, "lr": "4.968e-05", "step": 1892, "steps": "23.28s,1892/33195" }, { "epoch": 0.2851333032083145, "eta": "205:48:38", "grad_norm": 0.0079, "loss": 0.051, "lr": "4.968e-05", "step": 1893, "steps": "23.67s,1893/33195" }, { "epoch": 0.2852839283024552, "eta": "203:53:28", "grad_norm": 0.0062, "loss": 0.0529, "lr": "4.968e-05", "step": 1894, "steps": "23.45s,1894/33195" }, { "epoch": 0.28543455339659585, "eta": "201:00:56", "grad_norm": 0.0064, "loss": 0.053, "lr": "4.968e-05", "step": 1895, "steps": "23.12s,1895/33195" }, { "epoch": 0.28558517849073656, "eta": "201:16:11", "grad_norm": 0.0087, "loss": 0.0515, "lr": "4.968e-05", "step": 1896, "steps": "23.15s,1896/33195" }, { "epoch": 0.28573580358487727, "eta": "200:28:51", "grad_norm": 0.0073, "loss": 0.0451, "lr": "4.968e-05", "step": 1897, "steps": "23.06s,1897/33195" }, { "epoch": 0.2858864286790179, "eta": "202:38:53", "grad_norm": 0.0085, "loss": 0.0457, "lr": "4.968e-05", "step": 1898, "steps": "23.31s,1898/33195" }, { "epoch": 0.28603705377315863, "eta": "200:54:10", "grad_norm": 0.0081, "loss": 0.0364, "lr": "4.968e-05", "step": 1899, "steps": "23.11s,1899/33195" }, { "epoch": 0.2861876788672993, "eta": "198:12:06", "grad_norm": 0.007, "loss": 0.0803, "lr": "4.968e-05", "step": 1900, "steps": "22.8s,1900/33195" }, { "epoch": 0.28633830396144, "eta": "200:16:53", "grad_norm": 0.0075, "loss": 0.0643, "lr": "4.967e-05", "step": 1901, "steps": "23.04s,1901/33195" }, { "epoch": 0.28648892905558065, "eta": "202:16:28", "grad_norm": 0.0077, "loss": 0.0425, "lr": "4.967e-05", "step": 1902, "steps": "23.27s,1902/33195" }, { "epoch": 0.28663955414972137, "eta": "204:10:49", "grad_norm": 0.0071, "loss": 0.065, "lr": "4.967e-05", "step": 1903, "steps": "23.49s,1903/33195" }, { "epoch": 0.286790179243862, "eta": "200:26:10", "grad_norm": 0.0078, "loss": 0.0558, "lr": "4.967e-05", "step": 1904, "steps": "23.06s,1904/33195" }, { "epoch": 0.28694080433800273, "eta": "201:59:39", "grad_norm": 0.0106, "loss": 0.0659, "lr": "4.967e-05", "step": 1905, "steps": "23.24s,1905/33195" }, { "epoch": 0.2870914294321434, "eta": "199:54:07", "grad_norm": 0.0058, "loss": 0.0568, "lr": "4.967e-05", "step": 1906, "steps": "23.0s,1906/33195" }, { "epoch": 0.2872420545262841, "eta": "201:06:44", "grad_norm": 0.0072, "loss": 0.0585, "lr": "4.967e-05", "step": 1907, "steps": "23.14s,1907/33195" }, { "epoch": 0.28739267962042475, "eta": "204:08:51", "grad_norm": 0.0099, "loss": 0.062, "lr": "4.967e-05", "step": 1908, "steps": "23.49s,1908/33195" }, { "epoch": 0.28754330471456546, "eta": "202:39:49", "grad_norm": 0.0068, "loss": 0.0502, "lr": "4.967e-05", "step": 1909, "steps": "23.32s,1909/33195" }, { "epoch": 0.2876939298087061, "eta": "200:13:26", "grad_norm": 0.0062, "loss": 0.095, "lr": "4.967e-05", "step": 1910, "steps": "23.04s,1910/33195" }, { "epoch": 0.2878445549028468, "eta": "200:28:41", "grad_norm": 0.0067, "loss": 0.0579, "lr": "4.967e-05", "step": 1911, "steps": "23.07s,1911/33195" }, { "epoch": 0.2879951799969875, "eta": "204:49:00", "grad_norm": 0.0072, "loss": 0.051, "lr": "4.967e-05", "step": 1912, "steps": "23.57s,1912/33195" }, { "epoch": 0.2881458050911282, "eta": "202:22:37", "grad_norm": 0.0084, "loss": 0.0482, "lr": "4.967e-05", "step": 1913, "steps": "23.29s,1913/33195" }, { "epoch": 0.28829643018526885, "eta": "202:01:23", "grad_norm": 0.0057, "loss": 0.0577, "lr": "4.967e-05", "step": 1914, "steps": "23.25s,1914/33195" }, { "epoch": 0.28844705527940956, "eta": "200:58:26", "grad_norm": 0.0084, "loss": 0.0493, "lr": "4.967e-05", "step": 1915, "steps": "23.13s,1915/33195" }, { "epoch": 0.2885976803735502, "eta": "202:52:44", "grad_norm": 0.0075, "loss": 0.0349, "lr": "4.967e-05", "step": 1916, "steps": "23.35s,1916/33195" }, { "epoch": 0.2887483054676909, "eta": "201:13:18", "grad_norm": 0.0079, "loss": 0.0367, "lr": "4.967e-05", "step": 1917, "steps": "23.16s,1917/33195" }, { "epoch": 0.2888989305618316, "eta": "203:38:52", "grad_norm": 0.0072, "loss": 0.0555, "lr": "4.967e-05", "step": 1918, "steps": "23.44s,1918/33195" }, { "epoch": 0.2890495556559723, "eta": "207:17:25", "grad_norm": 0.0072, "loss": 0.0358, "lr": "4.967e-05", "step": 1919, "steps": "23.86s,1919/33195" }, { "epoch": 0.28920018075011295, "eta": "200:09:36", "grad_norm": 0.0066, "loss": 0.0362, "lr": "4.967e-05", "step": 1920, "steps": "23.04s,1920/33195" }, { "epoch": 0.28935080584425366, "eta": "201:58:40", "grad_norm": 0.0072, "loss": 0.0501, "lr": "4.967e-05", "step": 1921, "steps": "23.25s,1921/33195" }, { "epoch": 0.2895014309383943, "eta": "203:47:44", "grad_norm": 0.0069, "loss": 0.067, "lr": "4.967e-05", "step": 1922, "steps": "23.46s,1922/33195" }, { "epoch": 0.289652056032535, "eta": "201:00:34", "grad_norm": 0.0081, "loss": 0.0468, "lr": "4.967e-05", "step": 1923, "steps": "23.14s,1923/33195" }, { "epoch": 0.28980268112667573, "eta": "204:28:39", "grad_norm": 0.0073, "loss": 0.0559, "lr": "4.967e-05", "step": 1924, "steps": "23.54s,1924/33195" }, { "epoch": 0.2899533062208164, "eta": "204:07:25", "grad_norm": 0.0072, "loss": 0.0574, "lr": "4.967e-05", "step": 1925, "steps": "23.5s,1925/33195" }, { "epoch": 0.2901039313149571, "eta": "204:01:48", "grad_norm": 0.0081, "loss": 0.0503, "lr": "4.967e-05", "step": 1926, "steps": "23.49s,1926/33195" }, { "epoch": 0.29025455640909775, "eta": "204:01:25", "grad_norm": 0.0085, "loss": 0.0265, "lr": "4.966e-05", "step": 1927, "steps": "23.49s,1927/33195" }, { "epoch": 0.29040518150323846, "eta": "197:56:14", "grad_norm": 0.0064, "loss": 0.0487, "lr": "4.966e-05", "step": 1928, "steps": "22.79s,1928/33195" }, { "epoch": 0.2905558065973791, "eta": "204:16:16", "grad_norm": 0.0083, "loss": 0.0503, "lr": "4.966e-05", "step": 1929, "steps": "23.52s,1929/33195" }, { "epoch": 0.29070643169151983, "eta": "204:15:52", "grad_norm": 0.0068, "loss": 0.0503, "lr": "4.966e-05", "step": 1930, "steps": "23.52s,1930/33195" }, { "epoch": 0.2908570567856605, "eta": "201:44:22", "grad_norm": 0.0078, "loss": 0.0431, "lr": "4.966e-05", "step": 1931, "steps": "23.23s,1931/33195" }, { "epoch": 0.2910076818798012, "eta": "200:31:02", "grad_norm": 0.007, "loss": 0.0469, "lr": "4.966e-05", "step": 1932, "steps": "23.09s,1932/33195" }, { "epoch": 0.29115830697394185, "eta": "201:27:58", "grad_norm": 0.0063, "loss": 0.0364, "lr": "4.966e-05", "step": 1933, "steps": "23.2s,1933/33195" }, { "epoch": 0.29130893206808256, "eta": "203:37:50", "grad_norm": 0.0085, "loss": 0.039, "lr": "4.966e-05", "step": 1934, "steps": "23.45s,1934/33195" }, { "epoch": 0.2914595571622232, "eta": "201:21:59", "grad_norm": 0.007, "loss": 0.0515, "lr": "4.966e-05", "step": 1935, "steps": "23.19s,1935/33195" }, { "epoch": 0.2916101822563639, "eta": "203:26:38", "grad_norm": 0.007, "loss": 0.0468, "lr": "4.966e-05", "step": 1936, "steps": "23.43s,1936/33195" }, { "epoch": 0.2917608073505046, "eta": "203:47:05", "grad_norm": 0.0063, "loss": 0.0687, "lr": "4.966e-05", "step": 1937, "steps": "23.47s,1937/33195" }, { "epoch": 0.2919114324446453, "eta": "200:02:41", "grad_norm": 0.0084, "loss": 0.0435, "lr": "4.966e-05", "step": 1938, "steps": "23.04s,1938/33195" }, { "epoch": 0.29206205753878595, "eta": "203:30:40", "grad_norm": 0.0067, "loss": 0.0467, "lr": "4.966e-05", "step": 1939, "steps": "23.44s,1939/33195" }, { "epoch": 0.29221268263292666, "eta": "201:14:50", "grad_norm": 0.0099, "loss": 0.0507, "lr": "4.966e-05", "step": 1940, "steps": "23.18s,1940/33195" }, { "epoch": 0.2923633077270673, "eta": "200:53:37", "grad_norm": 0.0065, "loss": 0.0613, "lr": "4.966e-05", "step": 1941, "steps": "23.14s,1941/33195" }, { "epoch": 0.292513932821208, "eta": "198:11:45", "grad_norm": 0.0063, "loss": 0.037, "lr": "4.966e-05", "step": 1942, "steps": "22.83s,1942/33195" }, { "epoch": 0.2926645579153487, "eta": "201:39:43", "grad_norm": 0.0072, "loss": 0.047, "lr": "4.966e-05", "step": 1943, "steps": "23.23s,1943/33195" }, { "epoch": 0.2928151830094894, "eta": "203:59:58", "grad_norm": 0.0067, "loss": 0.0413, "lr": "4.966e-05", "step": 1944, "steps": "23.5s,1944/33195" }, { "epoch": 0.29296580810363004, "eta": "201:44:10", "grad_norm": 0.0083, "loss": 0.0562, "lr": "4.966e-05", "step": 1945, "steps": "23.24s,1945/33195" }, { "epoch": 0.29311643319777075, "eta": "203:53:59", "grad_norm": 0.0072, "loss": 0.07, "lr": "4.966e-05", "step": 1946, "steps": "23.49s,1946/33195" }, { "epoch": 0.2932670582919114, "eta": "200:20:03", "grad_norm": 0.0073, "loss": 0.0525, "lr": "4.966e-05", "step": 1947, "steps": "23.08s,1947/33195" }, { "epoch": 0.2934176833860521, "eta": "206:39:51", "grad_norm": 0.0067, "loss": 0.0794, "lr": "4.966e-05", "step": 1948, "steps": "23.81s,1948/33195" }, { "epoch": 0.2935683084801928, "eta": "203:47:36", "grad_norm": 0.0068, "loss": 0.0513, "lr": "4.966e-05", "step": 1949, "steps": "23.48s,1949/33195" }, { "epoch": 0.2937189335743335, "eta": "199:47:39", "grad_norm": 0.0065, "loss": 0.0261, "lr": "4.966e-05", "step": 1950, "steps": "23.02s,1950/33195" }, { "epoch": 0.29386955866847414, "eta": "201:00:11", "grad_norm": 0.0078, "loss": 0.0633, "lr": "4.966e-05", "step": 1951, "steps": "23.16s,1951/33195" }, { "epoch": 0.29402018376261485, "eta": "197:41:55", "grad_norm": 0.0077, "loss": 0.0761, "lr": "4.966e-05", "step": 1952, "steps": "22.78s,1952/33195" }, { "epoch": 0.29417080885675556, "eta": "203:46:02", "grad_norm": 0.0084, "loss": 0.059, "lr": "4.965e-05", "step": 1953, "steps": "23.48s,1953/33195" }, { "epoch": 0.2943214339508962, "eta": "203:24:49", "grad_norm": 0.0084, "loss": 0.043, "lr": "4.965e-05", "step": 1954, "steps": "23.44s,1954/33195" }, { "epoch": 0.2944720590450369, "eta": "200:48:13", "grad_norm": 0.009, "loss": 0.043, "lr": "4.965e-05", "step": 1955, "steps": "23.14s,1955/33195" }, { "epoch": 0.2946226841391776, "eta": "200:47:50", "grad_norm": 0.0059, "loss": 0.0451, "lr": "4.965e-05", "step": 1956, "steps": "23.14s,1956/33195" }, { "epoch": 0.2947733092333183, "eta": "200:52:39", "grad_norm": 0.0079, "loss": 0.0473, "lr": "4.965e-05", "step": 1957, "steps": "23.15s,1957/33195" }, { "epoch": 0.29492393432745895, "eta": "201:07:53", "grad_norm": 0.0086, "loss": 0.0584, "lr": "4.965e-05", "step": 1958, "steps": "23.18s,1958/33195" }, { "epoch": 0.29507455942159966, "eta": "203:28:04", "grad_norm": 0.0088, "loss": 0.0552, "lr": "4.965e-05", "step": 1959, "steps": "23.45s,1959/33195" }, { "epoch": 0.2952251845157403, "eta": "200:41:05", "grad_norm": 0.0064, "loss": 0.0834, "lr": "4.965e-05", "step": 1960, "steps": "23.13s,1960/33195" }, { "epoch": 0.295375809609881, "eta": "203:58:31", "grad_norm": 0.0082, "loss": 0.0591, "lr": "4.965e-05", "step": 1961, "steps": "23.51s,1961/33195" }, { "epoch": 0.2955264347040217, "eta": "204:13:44", "grad_norm": 0.0079, "loss": 0.0602, "lr": "4.965e-05", "step": 1962, "steps": "23.54s,1962/33195" }, { "epoch": 0.2956770597981624, "eta": "206:39:06", "grad_norm": 0.0073, "loss": 0.0753, "lr": "4.965e-05", "step": 1963, "steps": "23.82s,1963/33195" }, { "epoch": 0.29582768489230304, "eta": "201:21:11", "grad_norm": 0.0088, "loss": 0.0397, "lr": "4.965e-05", "step": 1964, "steps": "23.21s,1964/33195" }, { "epoch": 0.29597830998644375, "eta": "204:12:34", "grad_norm": 0.0085, "loss": 0.0584, "lr": "4.965e-05", "step": 1965, "steps": "23.54s,1965/33195" }, { "epoch": 0.2961289350805844, "eta": "204:12:10", "grad_norm": 0.0063, "loss": 0.0439, "lr": "4.965e-05", "step": 1966, "steps": "23.54s,1966/33195" }, { "epoch": 0.2962795601747251, "eta": "204:27:23", "grad_norm": 0.0074, "loss": 0.0593, "lr": "4.965e-05", "step": 1967, "steps": "23.57s,1967/33195" }, { "epoch": 0.2964301852688658, "eta": "201:30:03", "grad_norm": 0.0072, "loss": 0.0709, "lr": "4.965e-05", "step": 1968, "steps": "23.23s,1968/33195" }, { "epoch": 0.2965808103630065, "eta": "200:58:26", "grad_norm": 0.0084, "loss": 0.0597, "lr": "4.965e-05", "step": 1969, "steps": "23.17s,1969/33195" }, { "epoch": 0.29673143545714714, "eta": "202:10:54", "grad_norm": 0.009, "loss": 0.0595, "lr": "4.965e-05", "step": 1970, "steps": "23.31s,1970/33195" }, { "epoch": 0.29688206055128785, "eta": "203:49:24", "grad_norm": 0.0069, "loss": 0.0508, "lr": "4.965e-05", "step": 1971, "steps": "23.5s,1971/33195" }, { "epoch": 0.2970326856454285, "eta": "204:46:15", "grad_norm": 0.0067, "loss": 0.0476, "lr": "4.965e-05", "step": 1972, "steps": "23.61s,1972/33195" }, { "epoch": 0.2971833107395692, "eta": "200:56:53", "grad_norm": 0.0079, "loss": 0.0438, "lr": "4.965e-05", "step": 1973, "steps": "23.17s,1973/33195" }, { "epoch": 0.2973339358337099, "eta": "203:53:25", "grad_norm": 0.0087, "loss": 0.0619, "lr": "4.965e-05", "step": 1974, "steps": "23.51s,1974/33195" }, { "epoch": 0.2974845609278506, "eta": "204:34:39", "grad_norm": 0.0073, "loss": 0.0566, "lr": "4.965e-05", "step": 1975, "steps": "23.59s,1975/33195" }, { "epoch": 0.29763518602199124, "eta": "203:47:26", "grad_norm": 0.0084, "loss": 0.039, "lr": "4.965e-05", "step": 1976, "steps": "23.5s,1976/33195" }, { "epoch": 0.29778581111613195, "eta": "203:52:15", "grad_norm": 0.0064, "loss": 0.0497, "lr": "4.965e-05", "step": 1977, "steps": "23.51s,1977/33195" }, { "epoch": 0.2979364362102726, "eta": "203:51:51", "grad_norm": 0.0085, "loss": 0.0635, "lr": "4.964e-05", "step": 1978, "steps": "23.51s,1978/33195" }, { "epoch": 0.2980870613044133, "eta": "200:07:45", "grad_norm": 0.0072, "loss": 0.0638, "lr": "4.964e-05", "step": 1979, "steps": "23.08s,1979/33195" }, { "epoch": 0.298237686398554, "eta": "201:04:35", "grad_norm": 0.007, "loss": 0.0509, "lr": "4.964e-05", "step": 1980, "steps": "23.19s,1980/33195" }, { "epoch": 0.2983883114926947, "eta": "203:29:52", "grad_norm": 0.0064, "loss": 0.059, "lr": "4.964e-05", "step": 1981, "steps": "23.47s,1981/33195" }, { "epoch": 0.2985389365868354, "eta": "202:58:16", "grad_norm": 0.0085, "loss": 0.0615, "lr": "4.964e-05", "step": 1982, "steps": "23.41s,1982/33195" }, { "epoch": 0.29868956168097605, "eta": "200:11:25", "grad_norm": 0.011, "loss": 0.0644, "lr": "4.964e-05", "step": 1983, "steps": "23.09s,1983/33195" }, { "epoch": 0.29884018677511676, "eta": "199:55:25", "grad_norm": 0.009, "loss": 0.0753, "lr": "4.964e-05", "step": 1984, "steps": "23.06s,1984/33195" }, { "epoch": 0.2989908118692574, "eta": "200:36:39", "grad_norm": 0.0066, "loss": 0.0528, "lr": "4.964e-05", "step": 1985, "steps": "23.14s,1985/33195" }, { "epoch": 0.2991414369633981, "eta": "203:48:43", "grad_norm": 0.0059, "loss": 0.0499, "lr": "4.964e-05", "step": 1986, "steps": "23.51s,1986/33195" }, { "epoch": 0.2992920620575388, "eta": "200:25:28", "grad_norm": 0.0072, "loss": 0.0474, "lr": "4.964e-05", "step": 1987, "steps": "23.12s,1987/33195" }, { "epoch": 0.2994426871516795, "eta": "201:22:18", "grad_norm": 0.0076, "loss": 0.0463, "lr": "4.964e-05", "step": 1988, "steps": "23.23s,1988/33195" }, { "epoch": 0.29959331224582014, "eta": "200:24:42", "grad_norm": 0.0062, "loss": 0.0611, "lr": "4.964e-05", "step": 1989, "steps": "23.12s,1989/33195" }, { "epoch": 0.29974393733996085, "eta": "200:19:07", "grad_norm": 0.0063, "loss": 0.0569, "lr": "4.964e-05", "step": 1990, "steps": "23.11s,1990/33195" }, { "epoch": 0.2998945624341015, "eta": "203:46:46", "grad_norm": 0.0082, "loss": 0.0529, "lr": "4.964e-05", "step": 1991, "steps": "23.51s,1991/33195" }, { "epoch": 0.3000451875282422, "eta": "201:15:33", "grad_norm": 0.0093, "loss": 0.0609, "lr": "4.964e-05", "step": 1992, "steps": "23.22s,1992/33195" }, { "epoch": 0.3001958126223829, "eta": "199:57:10", "grad_norm": 0.0069, "loss": 0.0585, "lr": "4.964e-05", "step": 1993, "steps": "23.07s,1993/33195" }, { "epoch": 0.3003464377165236, "eta": "200:27:59", "grad_norm": 0.0064, "loss": 0.0411, "lr": "4.964e-05", "step": 1994, "steps": "23.13s,1994/33195" }, { "epoch": 0.30049706281066424, "eta": "200:53:36", "grad_norm": 0.0077, "loss": 0.0552, "lr": "4.964e-05", "step": 1995, "steps": "23.18s,1995/33195" }, { "epoch": 0.30064768790480495, "eta": "203:13:36", "grad_norm": 0.0061, "loss": 0.0561, "lr": "4.964e-05", "step": 1996, "steps": "23.45s,1996/33195" }, { "epoch": 0.3007983129989456, "eta": "200:58:01", "grad_norm": 0.0053, "loss": 0.0434, "lr": "4.964e-05", "step": 1997, "steps": "23.19s,1997/33195" }, { "epoch": 0.3009489380930863, "eta": "203:07:37", "grad_norm": 0.0061, "loss": 0.0565, "lr": "4.964e-05", "step": 1998, "steps": "23.44s,1998/33195" }, { "epoch": 0.30109956318722697, "eta": "200:10:27", "grad_norm": 0.008, "loss": 0.0412, "lr": "4.964e-05", "step": 1999, "steps": "23.1s,1999/33195" }, { "epoch": 0.3012501882813677, "eta": "200:10:04", "grad_norm": 0.0067, "loss": 0.0533, "lr": "4.964e-05", "step": 2000, "steps": "23.1s,2000/33195" }, { "epoch": 0.30140081337550834, "eta": "351:47:56", "grad_norm": 0.009, "loss": 0.0615, "lr": "4.964e-05", "step": 2001, "steps": "40.6s,2001/33195" }, { "epoch": 0.30155143846964905, "eta": "200:35:17", "grad_norm": 0.0072, "loss": 0.0481, "lr": "4.964e-05", "step": 2002, "steps": "23.15s,2002/33195" }, { "epoch": 0.3017020635637897, "eta": "200:45:18", "grad_norm": 0.0069, "loss": 0.0487, "lr": "4.963e-05", "step": 2003, "steps": "23.17s,2003/33195" }, { "epoch": 0.3018526886579304, "eta": "200:34:31", "grad_norm": 0.007, "loss": 0.0434, "lr": "4.963e-05", "step": 2004, "steps": "23.15s,2004/33195" }, { "epoch": 0.30200331375207107, "eta": "202:49:17", "grad_norm": 0.0087, "loss": 0.0569, "lr": "4.963e-05", "step": 2005, "steps": "23.41s,2005/33195" }, { "epoch": 0.3021539388462118, "eta": "200:23:21", "grad_norm": 0.0066, "loss": 0.0671, "lr": "4.963e-05", "step": 2006, "steps": "23.13s,2006/33195" }, { "epoch": 0.3023045639403525, "eta": "200:43:45", "grad_norm": 0.0066, "loss": 0.042, "lr": "4.963e-05", "step": 2007, "steps": "23.17s,2007/33195" }, { "epoch": 0.30245518903449314, "eta": "200:38:10", "grad_norm": 0.0068, "loss": 0.0587, "lr": "4.963e-05", "step": 2008, "steps": "23.16s,2008/33195" }, { "epoch": 0.30260581412863385, "eta": "200:27:24", "grad_norm": 0.0057, "loss": 0.0633, "lr": "4.963e-05", "step": 2009, "steps": "23.14s,2009/33195" }, { "epoch": 0.3027564392227745, "eta": "200:27:00", "grad_norm": 0.0061, "loss": 0.0558, "lr": "4.963e-05", "step": 2010, "steps": "23.14s,2010/33195" }, { "epoch": 0.3029070643169152, "eta": "203:38:55", "grad_norm": 0.0093, "loss": 0.0446, "lr": "4.963e-05", "step": 2011, "steps": "23.51s,2011/33195" }, { "epoch": 0.3030576894110559, "eta": "203:28:08", "grad_norm": 0.0066, "loss": 0.0461, "lr": "4.963e-05", "step": 2012, "steps": "23.49s,2012/33195" }, { "epoch": 0.3032083145051966, "eta": "207:16:25", "grad_norm": 0.0075, "loss": 0.0502, "lr": "4.963e-05", "step": 2013, "steps": "23.93s,2013/33195" }, { "epoch": 0.30335893959933724, "eta": "201:33:01", "grad_norm": 0.0096, "loss": 0.0359, "lr": "4.963e-05", "step": 2014, "steps": "23.27s,2014/33195" }, { "epoch": 0.30350956469347795, "eta": "203:26:58", "grad_norm": 0.0062, "loss": 0.0451, "lr": "4.963e-05", "step": 2015, "steps": "23.49s,2015/33195" }, { "epoch": 0.3036601897876186, "eta": "204:13:20", "grad_norm": 0.0077, "loss": 0.0514, "lr": "4.963e-05", "step": 2016, "steps": "23.58s,2016/33195" }, { "epoch": 0.3038108148817593, "eta": "203:15:47", "grad_norm": 0.006, "loss": 0.0741, "lr": "4.963e-05", "step": 2017, "steps": "23.47s,2017/33195" }, { "epoch": 0.30396143997589997, "eta": "200:55:06", "grad_norm": 0.0054, "loss": 0.0596, "lr": "4.963e-05", "step": 2018, "steps": "23.2s,2018/33195" }, { "epoch": 0.3041120650700407, "eta": "200:33:56", "grad_norm": 0.0073, "loss": 0.0588, "lr": "4.963e-05", "step": 2019, "steps": "23.16s,2019/33195" }, { "epoch": 0.30426269016418134, "eta": "203:14:37", "grad_norm": 0.0073, "loss": 0.0676, "lr": "4.963e-05", "step": 2020, "steps": "23.47s,2020/33195" }, { "epoch": 0.30441331525832205, "eta": "200:59:08", "grad_norm": 0.0143, "loss": 0.0605, "lr": "4.963e-05", "step": 2021, "steps": "23.21s,2021/33195" }, { "epoch": 0.3045639403524627, "eta": "201:19:32", "grad_norm": 0.007, "loss": 0.0667, "lr": "4.963e-05", "step": 2022, "steps": "23.25s,2022/33195" }, { "epoch": 0.3047145654466034, "eta": "200:16:48", "grad_norm": 0.0069, "loss": 0.0501, "lr": "4.963e-05", "step": 2023, "steps": "23.13s,2023/33195" }, { "epoch": 0.30486519054074407, "eta": "199:55:38", "grad_norm": 0.0093, "loss": 0.0505, "lr": "4.963e-05", "step": 2024, "steps": "23.09s,2024/33195" }, { "epoch": 0.3050158156348848, "eta": "200:52:24", "grad_norm": 0.0068, "loss": 0.0572, "lr": "4.963e-05", "step": 2025, "steps": "23.2s,2025/33195" }, { "epoch": 0.30516644072902543, "eta": "202:46:17", "grad_norm": 0.0071, "loss": 0.0794, "lr": "4.963e-05", "step": 2026, "steps": "23.42s,2026/33195" }, { "epoch": 0.30531706582316614, "eta": "204:03:49", "grad_norm": 0.0066, "loss": 0.089, "lr": "4.962e-05", "step": 2027, "steps": "23.57s,2027/33195" }, { "epoch": 0.3054676909173068, "eta": "200:09:41", "grad_norm": 0.0086, "loss": 0.0666, "lr": "4.962e-05", "step": 2028, "steps": "23.12s,2028/33195" }, { "epoch": 0.3056183160114475, "eta": "203:05:54", "grad_norm": 0.0076, "loss": 0.0459, "lr": "4.962e-05", "step": 2029, "steps": "23.46s,2029/33195" }, { "epoch": 0.30576894110558817, "eta": "201:16:26", "grad_norm": 0.0106, "loss": 0.0671, "lr": "4.962e-05", "step": 2030, "steps": "23.25s,2030/33195" }, { "epoch": 0.3059195661997289, "eta": "200:50:04", "grad_norm": 0.0067, "loss": 0.0502, "lr": "4.962e-05", "step": 2031, "steps": "23.2s,2031/33195" }, { "epoch": 0.30607019129386953, "eta": "201:05:16", "grad_norm": 0.0076, "loss": 0.0794, "lr": "4.962e-05", "step": 2032, "steps": "23.23s,2032/33195" }, { "epoch": 0.30622081638801024, "eta": "204:01:28", "grad_norm": 0.0094, "loss": 0.0385, "lr": "4.962e-05", "step": 2033, "steps": "23.57s,2033/33195" }, { "epoch": 0.30637144148215095, "eta": "199:56:59", "grad_norm": 0.0073, "loss": 0.0738, "lr": "4.962e-05", "step": 2034, "steps": "23.1s,2034/33195" }, { "epoch": 0.3065220665762916, "eta": "200:48:32", "grad_norm": 0.0091, "loss": 0.0697, "lr": "4.962e-05", "step": 2035, "steps": "23.2s,2035/33195" }, { "epoch": 0.3066726916704323, "eta": "200:53:20", "grad_norm": 0.0069, "loss": 0.0475, "lr": "4.962e-05", "step": 2036, "steps": "23.21s,2036/33195" }, { "epoch": 0.306823316764573, "eta": "200:37:22", "grad_norm": 0.0063, "loss": 0.038, "lr": "4.962e-05", "step": 2037, "steps": "23.18s,2037/33195" }, { "epoch": 0.3069739418587137, "eta": "200:05:49", "grad_norm": 0.0073, "loss": 0.0709, "lr": "4.962e-05", "step": 2038, "steps": "23.12s,2038/33195" }, { "epoch": 0.30712456695285434, "eta": "205:48:09", "grad_norm": 0.0143, "loss": 0.0651, "lr": "4.962e-05", "step": 2039, "steps": "23.78s,2039/33195" }, { "epoch": 0.30727519204699505, "eta": "200:46:36", "grad_norm": 0.0074, "loss": 0.0339, "lr": "4.962e-05", "step": 2040, "steps": "23.2s,2040/33195" }, { "epoch": 0.3074258171411357, "eta": "199:38:42", "grad_norm": 0.0056, "loss": 0.0455, "lr": "4.962e-05", "step": 2041, "steps": "23.07s,2041/33195" }, { "epoch": 0.3075764422352764, "eta": "199:59:05", "grad_norm": 0.0081, "loss": 0.0394, "lr": "4.962e-05", "step": 2042, "steps": "23.11s,2042/33195" }, { "epoch": 0.30772706732941707, "eta": "202:08:30", "grad_norm": 0.0066, "loss": 0.0784, "lr": "4.962e-05", "step": 2043, "steps": "23.36s,2043/33195" }, { "epoch": 0.3078776924235578, "eta": "200:24:17", "grad_norm": 0.0064, "loss": 0.0692, "lr": "4.962e-05", "step": 2044, "steps": "23.16s,2044/33195" }, { "epoch": 0.30802831751769844, "eta": "200:44:40", "grad_norm": 0.0097, "loss": 0.0628, "lr": "4.962e-05", "step": 2045, "steps": "23.2s,2045/33195" }, { "epoch": 0.30817894261183915, "eta": "200:23:30", "grad_norm": 0.0084, "loss": 0.048, "lr": "4.962e-05", "step": 2046, "steps": "23.16s,2046/33195" }, { "epoch": 0.3083295677059798, "eta": "202:58:52", "grad_norm": 0.0083, "loss": 0.0478, "lr": "4.962e-05", "step": 2047, "steps": "23.46s,2047/33195" }, { "epoch": 0.3084801928001205, "eta": "202:42:54", "grad_norm": 0.0074, "loss": 0.0412, "lr": "4.962e-05", "step": 2048, "steps": "23.43s,2048/33195" }, { "epoch": 0.30863081789426117, "eta": "202:21:44", "grad_norm": 0.0079, "loss": 0.0352, "lr": "4.962e-05", "step": 2049, "steps": "23.39s,2049/33195" }, { "epoch": 0.3087814429884019, "eta": "202:47:18", "grad_norm": 0.008, "loss": 0.0515, "lr": "4.962e-05", "step": 2050, "steps": "23.44s,2050/33195" }, { "epoch": 0.30893206808254253, "eta": "197:30:17", "grad_norm": 0.0056, "loss": 0.0643, "lr": "4.962e-05", "step": 2051, "steps": "22.83s,2051/33195" }, { "epoch": 0.30908269317668324, "eta": "202:56:54", "grad_norm": 0.008, "loss": 0.0591, "lr": "4.961e-05", "step": 2052, "steps": "23.46s,2052/33195" }, { "epoch": 0.3092333182708239, "eta": "199:34:05", "grad_norm": 0.0061, "loss": 0.0479, "lr": "4.961e-05", "step": 2053, "steps": "23.07s,2053/33195" }, { "epoch": 0.3093839433649646, "eta": "199:02:34", "grad_norm": 0.0085, "loss": 0.0563, "lr": "4.961e-05", "step": 2054, "steps": "23.01s,2054/33195" }, { "epoch": 0.30953456845910526, "eta": "202:09:01", "grad_norm": 0.0076, "loss": 0.0565, "lr": "4.961e-05", "step": 2055, "steps": "23.37s,2055/33195" }, { "epoch": 0.309685193553246, "eta": "203:21:17", "grad_norm": 0.0062, "loss": 0.0364, "lr": "4.961e-05", "step": 2056, "steps": "23.51s,2056/33195" }, { "epoch": 0.30983581864738663, "eta": "197:17:37", "grad_norm": 0.0078, "loss": 0.049, "lr": "4.961e-05", "step": 2057, "steps": "22.81s,2057/33195" }, { "epoch": 0.30998644374152734, "eta": "203:15:19", "grad_norm": 0.0064, "loss": 0.0521, "lr": "4.961e-05", "step": 2058, "steps": "23.5s,2058/33195" }, { "epoch": 0.310137068835668, "eta": "201:00:00", "grad_norm": 0.007, "loss": 0.0486, "lr": "4.961e-05", "step": 2059, "steps": "23.24s,2059/33195" }, { "epoch": 0.3102876939298087, "eta": "203:30:06", "grad_norm": 0.0074, "loss": 0.0465, "lr": "4.961e-05", "step": 2060, "steps": "23.53s,2060/33195" }, { "epoch": 0.3104383190239494, "eta": "200:33:17", "grad_norm": 0.0124, "loss": 0.0601, "lr": "4.961e-05", "step": 2061, "steps": "23.19s,2061/33195" }, { "epoch": 0.31058894411809007, "eta": "200:22:31", "grad_norm": 0.0113, "loss": 0.0643, "lr": "4.961e-05", "step": 2062, "steps": "23.17s,2062/33195" }, { "epoch": 0.3107395692122308, "eta": "203:39:18", "grad_norm": 0.0073, "loss": 0.03, "lr": "4.961e-05", "step": 2063, "steps": "23.55s,2063/33195" }, { "epoch": 0.31089019430637144, "eta": "202:47:01", "grad_norm": 0.0073, "loss": 0.0429, "lr": "4.961e-05", "step": 2064, "steps": "23.45s,2064/33195" }, { "epoch": 0.31104081940051215, "eta": "197:50:54", "grad_norm": 0.0074, "loss": 0.0694, "lr": "4.961e-05", "step": 2065, "steps": "22.88s,2065/33195" }, { "epoch": 0.3111914444946528, "eta": "200:00:13", "grad_norm": 0.009, "loss": 0.0675, "lr": "4.961e-05", "step": 2066, "steps": "23.13s,2066/33195" }, { "epoch": 0.3113420695887935, "eta": "203:32:33", "grad_norm": 0.0068, "loss": 0.0507, "lr": "4.961e-05", "step": 2067, "steps": "23.54s,2067/33195" }, { "epoch": 0.31149269468293417, "eta": "200:04:38", "grad_norm": 0.006, "loss": 0.0616, "lr": "4.961e-05", "step": 2068, "steps": "23.14s,2068/33195" }, { "epoch": 0.3116433197770749, "eta": "199:43:30", "grad_norm": 0.0067, "loss": 0.077, "lr": "4.961e-05", "step": 2069, "steps": "23.1s,2069/33195" }, { "epoch": 0.31179394487121553, "eta": "202:49:52", "grad_norm": 0.0079, "loss": 0.039, "lr": "4.961e-05", "step": 2070, "steps": "23.46s,2070/33195" }, { "epoch": 0.31194456996535624, "eta": "203:51:43", "grad_norm": 0.0091, "loss": 0.0482, "lr": "4.961e-05", "step": 2071, "steps": "23.58s,2071/33195" }, { "epoch": 0.3120951950594969, "eta": "204:22:27", "grad_norm": 0.0078, "loss": 0.0295, "lr": "4.961e-05", "step": 2072, "steps": "23.64s,2072/33195" }, { "epoch": 0.3122458201536376, "eta": "199:47:09", "grad_norm": 0.006, "loss": 0.0588, "lr": "4.961e-05", "step": 2073, "steps": "23.11s,2073/33195" }, { "epoch": 0.31239644524777826, "eta": "199:36:23", "grad_norm": 0.0066, "loss": 0.0564, "lr": "4.961e-05", "step": 2074, "steps": "23.09s,2074/33195" }, { "epoch": 0.312547070341919, "eta": "200:17:30", "grad_norm": 0.0058, "loss": 0.0609, "lr": "4.961e-05", "step": 2075, "steps": "23.17s,2075/33195" }, { "epoch": 0.31269769543605963, "eta": "199:30:26", "grad_norm": 0.0083, "loss": 0.0699, "lr": "4.960e-05", "step": 2076, "steps": "23.08s,2076/33195" }, { "epoch": 0.31284832053020034, "eta": "201:13:47", "grad_norm": 0.0071, "loss": 0.07, "lr": "4.960e-05", "step": 2077, "steps": "23.28s,2077/33195" }, { "epoch": 0.312998945624341, "eta": "201:08:12", "grad_norm": 0.0072, "loss": 0.0746, "lr": "4.960e-05", "step": 2078, "steps": "23.27s,2078/33195" }, { "epoch": 0.3131495707184817, "eta": "199:50:01", "grad_norm": 0.0064, "loss": 0.0456, "lr": "4.960e-05", "step": 2079, "steps": "23.12s,2079/33195" }, { "epoch": 0.31330019581262236, "eta": "200:46:41", "grad_norm": 0.008, "loss": 0.0512, "lr": "4.960e-05", "step": 2080, "steps": "23.23s,2080/33195" }, { "epoch": 0.3134508209067631, "eta": "200:25:33", "grad_norm": 0.0093, "loss": 0.0417, "lr": "4.960e-05", "step": 2081, "steps": "23.19s,2081/33195" }, { "epoch": 0.3136014460009037, "eta": "199:28:08", "grad_norm": 0.007, "loss": 0.0485, "lr": "4.960e-05", "step": 2082, "steps": "23.08s,2082/33195" }, { "epoch": 0.31375207109504444, "eta": "199:53:40", "grad_norm": 0.0067, "loss": 0.0679, "lr": "4.960e-05", "step": 2083, "steps": "23.13s,2083/33195" }, { "epoch": 0.3139026961891851, "eta": "199:37:44", "grad_norm": 0.0079, "loss": 0.0429, "lr": "4.960e-05", "step": 2084, "steps": "23.1s,2084/33195" }, { "epoch": 0.3140533212833258, "eta": "204:06:58", "grad_norm": 0.0065, "loss": 0.0646, "lr": "4.960e-05", "step": 2085, "steps": "23.62s,2085/33195" }, { "epoch": 0.31420394637746646, "eta": "200:13:15", "grad_norm": 0.0061, "loss": 0.0465, "lr": "4.960e-05", "step": 2086, "steps": "23.17s,2086/33195" }, { "epoch": 0.31435457147160717, "eta": "203:24:42", "grad_norm": 0.0105, "loss": 0.0532, "lr": "4.960e-05", "step": 2087, "steps": "23.54s,2087/33195" }, { "epoch": 0.3145051965657479, "eta": "200:02:07", "grad_norm": 0.0074, "loss": 0.0614, "lr": "4.960e-05", "step": 2088, "steps": "23.15s,2088/33195" }, { "epoch": 0.31465582165988853, "eta": "199:40:59", "grad_norm": 0.0149, "loss": 0.0215, "lr": "4.960e-05", "step": 2089, "steps": "23.11s,2089/33195" }, { "epoch": 0.31480644675402925, "eta": "200:06:31", "grad_norm": 0.0064, "loss": 0.0623, "lr": "4.960e-05", "step": 2090, "steps": "23.16s,2090/33195" }, { "epoch": 0.3149570718481699, "eta": "200:11:19", "grad_norm": 0.0084, "loss": 0.0515, "lr": "4.960e-05", "step": 2091, "steps": "23.17s,2091/33195" }, { "epoch": 0.3151076969423106, "eta": "199:24:17", "grad_norm": 0.0098, "loss": 0.0322, "lr": "4.960e-05", "step": 2092, "steps": "23.08s,2092/33195" }, { "epoch": 0.31525832203645127, "eta": "202:56:25", "grad_norm": 0.0074, "loss": 0.0748, "lr": "4.960e-05", "step": 2093, "steps": "23.49s,2093/33195" }, { "epoch": 0.315408947130592, "eta": "200:51:38", "grad_norm": 0.0066, "loss": 0.0545, "lr": "4.960e-05", "step": 2094, "steps": "23.25s,2094/33195" }, { "epoch": 0.31555957222473263, "eta": "202:45:17", "grad_norm": 0.0078, "loss": 0.0589, "lr": "4.960e-05", "step": 2095, "steps": "23.47s,2095/33195" }, { "epoch": 0.31571019731887334, "eta": "199:22:44", "grad_norm": 0.0067, "loss": 0.0574, "lr": "4.960e-05", "step": 2096, "steps": "23.08s,2096/33195" }, { "epoch": 0.315860822413014, "eta": "199:06:48", "grad_norm": 0.0082, "loss": 0.04, "lr": "4.960e-05", "step": 2097, "steps": "23.05s,2097/33195" }, { "epoch": 0.3160114475071547, "eta": "199:47:53", "grad_norm": 0.0086, "loss": 0.0365, "lr": "4.960e-05", "step": 2098, "steps": "23.13s,2098/33195" }, { "epoch": 0.31616207260129536, "eta": "199:42:19", "grad_norm": 0.0062, "loss": 0.0521, "lr": "4.959e-05", "step": 2099, "steps": "23.12s,2099/33195" }, { "epoch": 0.3163126976954361, "eta": "201:20:24", "grad_norm": 0.0082, "loss": 0.032, "lr": "4.959e-05", "step": 2100, "steps": "23.31s,2100/33195" }, { "epoch": 0.31646332278957673, "eta": "202:58:29", "grad_norm": 0.0077, "loss": 0.0644, "lr": "4.959e-05", "step": 2101, "steps": "23.5s,2101/33195" }, { "epoch": 0.31661394788371744, "eta": "198:13:04", "grad_norm": 0.0062, "loss": 0.0518, "lr": "4.959e-05", "step": 2102, "steps": "22.95s,2102/33195" }, { "epoch": 0.3167645729778581, "eta": "202:57:42", "grad_norm": 0.0081, "loss": 0.047, "lr": "4.959e-05", "step": 2103, "steps": "23.5s,2103/33195" }, { "epoch": 0.3169151980719988, "eta": "200:52:56", "grad_norm": 0.0088, "loss": 0.0314, "lr": "4.959e-05", "step": 2104, "steps": "23.26s,2104/33195" }, { "epoch": 0.31706582316613946, "eta": "200:00:44", "grad_norm": 0.0089, "loss": 0.0627, "lr": "4.959e-05", "step": 2105, "steps": "23.16s,2105/33195" }, { "epoch": 0.31721644826028017, "eta": "203:17:15", "grad_norm": 0.0064, "loss": 0.0546, "lr": "4.959e-05", "step": 2106, "steps": "23.54s,2106/33195" }, { "epoch": 0.3173670733544208, "eta": "199:59:58", "grad_norm": 0.0077, "loss": 0.0444, "lr": "4.959e-05", "step": 2107, "steps": "23.16s,2107/33195" }, { "epoch": 0.31751769844856154, "eta": "199:44:02", "grad_norm": 0.0074, "loss": 0.0466, "lr": "4.959e-05", "step": 2108, "steps": "23.13s,2108/33195" }, { "epoch": 0.3176683235427022, "eta": "203:10:53", "grad_norm": 0.0067, "loss": 0.0487, "lr": "4.959e-05", "step": 2109, "steps": "23.53s,2109/33195" }, { "epoch": 0.3178189486368429, "eta": "196:10:51", "grad_norm": 0.0079, "loss": 0.0588, "lr": "4.959e-05", "step": 2110, "steps": "22.72s,2110/33195" }, { "epoch": 0.31796957373098356, "eta": "199:37:42", "grad_norm": 0.0074, "loss": 0.0728, "lr": "4.959e-05", "step": 2111, "steps": "23.12s,2111/33195" }, { "epoch": 0.31812019882512427, "eta": "199:58:02", "grad_norm": 0.0082, "loss": 0.0479, "lr": "4.959e-05", "step": 2112, "steps": "23.16s,2112/33195" }, { "epoch": 0.3182708239192649, "eta": "200:33:54", "grad_norm": 0.0076, "loss": 0.0738, "lr": "4.959e-05", "step": 2113, "steps": "23.23s,2113/33195" }, { "epoch": 0.31842144901340563, "eta": "196:24:52", "grad_norm": 0.0079, "loss": 0.0544, "lr": "4.959e-05", "step": 2114, "steps": "22.75s,2114/33195" }, { "epoch": 0.31857207410754634, "eta": "202:58:10", "grad_norm": 0.0073, "loss": 0.083, "lr": "4.959e-05", "step": 2115, "steps": "23.51s,2115/33195" }, { "epoch": 0.318722699201687, "eta": "200:32:45", "grad_norm": 0.009, "loss": 0.0487, "lr": "4.959e-05", "step": 2116, "steps": "23.23s,2116/33195" }, { "epoch": 0.3188733242958277, "eta": "196:18:33", "grad_norm": 0.0073, "loss": 0.0479, "lr": "4.959e-05", "step": 2117, "steps": "22.74s,2117/33195" }, { "epoch": 0.31902394938996836, "eta": "199:14:17", "grad_norm": 0.0063, "loss": 0.0494, "lr": "4.959e-05", "step": 2118, "steps": "23.08s,2118/33195" }, { "epoch": 0.3191745744841091, "eta": "196:43:42", "grad_norm": 0.0075, "loss": 0.0464, "lr": "4.959e-05", "step": 2119, "steps": "22.79s,2119/33195" }, { "epoch": 0.31932519957824973, "eta": "199:49:46", "grad_norm": 0.0082, "loss": 0.06, "lr": "4.959e-05", "step": 2120, "steps": "23.15s,2120/33195" }, { "epoch": 0.31947582467239044, "eta": "199:54:33", "grad_norm": 0.0085, "loss": 0.0415, "lr": "4.959e-05", "step": 2121, "steps": "23.16s,2121/33195" }, { "epoch": 0.3196264497665311, "eta": "198:41:40", "grad_norm": 0.0067, "loss": 0.0353, "lr": "4.959e-05", "step": 2122, "steps": "23.02s,2122/33195" }, { "epoch": 0.3197770748606718, "eta": "202:03:15", "grad_norm": 0.0065, "loss": 0.0269, "lr": "4.958e-05", "step": 2123, "steps": "23.41s,2123/33195" }, { "epoch": 0.31992769995481246, "eta": "202:18:24", "grad_norm": 0.0076, "loss": 0.049, "lr": "4.958e-05", "step": 2124, "steps": "23.44s,2124/33195" }, { "epoch": 0.32007832504895317, "eta": "199:37:29", "grad_norm": 0.0075, "loss": 0.0553, "lr": "4.958e-05", "step": 2125, "steps": "23.13s,2125/33195" }, { "epoch": 0.3202289501430938, "eta": "199:31:55", "grad_norm": 0.0081, "loss": 0.0404, "lr": "4.958e-05", "step": 2126, "steps": "23.12s,2126/33195" }, { "epoch": 0.32037957523723454, "eta": "202:32:45", "grad_norm": 0.0072, "loss": 0.0604, "lr": "4.958e-05", "step": 2127, "steps": "23.47s,2127/33195" }, { "epoch": 0.3205302003313752, "eta": "199:31:09", "grad_norm": 0.0084, "loss": 0.0509, "lr": "4.958e-05", "step": 2128, "steps": "23.12s,2128/33195" }, { "epoch": 0.3206808254255159, "eta": "199:51:28", "grad_norm": 0.0096, "loss": 0.052, "lr": "4.958e-05", "step": 2129, "steps": "23.16s,2129/33195" }, { "epoch": 0.32083145051965656, "eta": "199:56:16", "grad_norm": 0.0088, "loss": 0.0388, "lr": "4.958e-05", "step": 2130, "steps": "23.17s,2130/33195" }, { "epoch": 0.32098207561379727, "eta": "199:45:31", "grad_norm": 0.0089, "loss": 0.0398, "lr": "4.958e-05", "step": 2131, "steps": "23.15s,2131/33195" }, { "epoch": 0.3211327007079379, "eta": "199:29:36", "grad_norm": 0.007, "loss": 0.0397, "lr": "4.958e-05", "step": 2132, "steps": "23.12s,2132/33195" }, { "epoch": 0.32128332580207863, "eta": "199:39:34", "grad_norm": 0.0078, "loss": 0.045, "lr": "4.958e-05", "step": 2133, "steps": "23.14s,2133/33195" }, { "epoch": 0.3214339508962193, "eta": "199:54:43", "grad_norm": 0.0081, "loss": 0.0535, "lr": "4.958e-05", "step": 2134, "steps": "23.17s,2134/33195" }, { "epoch": 0.32158457599036, "eta": "199:07:44", "grad_norm": 0.0074, "loss": 0.0612, "lr": "4.958e-05", "step": 2135, "steps": "23.08s,2135/33195" }, { "epoch": 0.32173520108450065, "eta": "198:57:00", "grad_norm": 0.0081, "loss": 0.049, "lr": "4.958e-05", "step": 2136, "steps": "23.06s,2136/33195" }, { "epoch": 0.32188582617864137, "eta": "201:06:01", "grad_norm": 0.0066, "loss": 0.0521, "lr": "4.958e-05", "step": 2137, "steps": "23.31s,2137/33195" }, { "epoch": 0.322036451272782, "eta": "199:32:28", "grad_norm": 0.0069, "loss": 0.0421, "lr": "4.958e-05", "step": 2138, "steps": "23.13s,2138/33195" }, { "epoch": 0.32218707636692273, "eta": "202:28:04", "grad_norm": 0.0066, "loss": 0.0494, "lr": "4.958e-05", "step": 2139, "steps": "23.47s,2139/33195" }, { "epoch": 0.3223377014610634, "eta": "199:26:31", "grad_norm": 0.0067, "loss": 0.0404, "lr": "4.958e-05", "step": 2140, "steps": "23.12s,2140/33195" }, { "epoch": 0.3224883265552041, "eta": "198:08:30", "grad_norm": 0.0084, "loss": 0.0436, "lr": "4.958e-05", "step": 2141, "steps": "22.97s,2141/33195" }, { "epoch": 0.3226389516493448, "eta": "202:57:57", "grad_norm": 0.0064, "loss": 0.0506, "lr": "4.958e-05", "step": 2142, "steps": "23.53s,2142/33195" }, { "epoch": 0.32278957674348546, "eta": "199:40:53", "grad_norm": 0.0065, "loss": 0.0543, "lr": "4.958e-05", "step": 2143, "steps": "23.15s,2143/33195" }, { "epoch": 0.3229402018376262, "eta": "202:10:35", "grad_norm": 0.0067, "loss": 0.0668, "lr": "4.958e-05", "step": 2144, "steps": "23.44s,2144/33195" }, { "epoch": 0.32309082693176683, "eta": "201:54:40", "grad_norm": 0.0073, "loss": 0.0359, "lr": "4.958e-05", "step": 2145, "steps": "23.41s,2145/33195" }, { "epoch": 0.32324145202590754, "eta": "202:46:01", "grad_norm": 0.006, "loss": 0.0524, "lr": "4.957e-05", "step": 2146, "steps": "23.51s,2146/33195" }, { "epoch": 0.3233920771200482, "eta": "199:34:10", "grad_norm": 0.009, "loss": 0.0658, "lr": "4.957e-05", "step": 2147, "steps": "23.14s,2147/33195" }, { "epoch": 0.3235427022141889, "eta": "203:00:46", "grad_norm": 0.0083, "loss": 0.0683, "lr": "4.957e-05", "step": 2148, "steps": "23.54s,2148/33195" }, { "epoch": 0.32369332730832956, "eta": "199:28:13", "grad_norm": 0.0072, "loss": 0.0546, "lr": "4.957e-05", "step": 2149, "steps": "23.13s,2149/33195" }, { "epoch": 0.32384395240247027, "eta": "202:23:46", "grad_norm": 0.0088, "loss": 0.054, "lr": "4.957e-05", "step": 2150, "steps": "23.47s,2150/33195" }, { "epoch": 0.3239945774966109, "eta": "199:32:38", "grad_norm": 0.0084, "loss": 0.0755, "lr": "4.957e-05", "step": 2151, "steps": "23.14s,2151/33195" }, { "epoch": 0.32414520259075164, "eta": "199:42:35", "grad_norm": 0.0075, "loss": 0.0546, "lr": "4.957e-05", "step": 2152, "steps": "23.16s,2152/33195" }, { "epoch": 0.3242958276848923, "eta": "202:43:17", "grad_norm": 0.0067, "loss": 0.062, "lr": "4.957e-05", "step": 2153, "steps": "23.51s,2153/33195" }, { "epoch": 0.324446452779033, "eta": "203:24:17", "grad_norm": 0.007, "loss": 0.0445, "lr": "4.957e-05", "step": 2154, "steps": "23.59s,2154/33195" }, { "epoch": 0.32459707787317366, "eta": "200:22:49", "grad_norm": 0.0086, "loss": 0.0498, "lr": "4.957e-05", "step": 2155, "steps": "23.24s,2155/33195" }, { "epoch": 0.32474770296731437, "eta": "199:41:03", "grad_norm": 0.0059, "loss": 0.0516, "lr": "4.957e-05", "step": 2156, "steps": "23.16s,2156/33195" }, { "epoch": 0.324898328061455, "eta": "197:15:49", "grad_norm": 0.0081, "loss": 0.0785, "lr": "4.957e-05", "step": 2157, "steps": "22.88s,2157/33195" }, { "epoch": 0.32504895315559573, "eta": "199:50:37", "grad_norm": 0.0095, "loss": 0.0571, "lr": "4.957e-05", "step": 2158, "steps": "23.18s,2158/33195" }, { "epoch": 0.3251995782497364, "eta": "199:39:53", "grad_norm": 0.0074, "loss": 0.0628, "lr": "4.957e-05", "step": 2159, "steps": "23.16s,2159/33195" }, { "epoch": 0.3253502033438771, "eta": "200:26:03", "grad_norm": 0.0067, "loss": 0.0607, "lr": "4.957e-05", "step": 2160, "steps": "23.25s,2160/33195" }, { "epoch": 0.32550082843801775, "eta": "200:30:50", "grad_norm": 0.0081, "loss": 0.0329, "lr": "4.957e-05", "step": 2161, "steps": "23.26s,2161/33195" }, { "epoch": 0.32565145353215846, "eta": "203:15:58", "grad_norm": 0.0063, "loss": 0.0547, "lr": "4.957e-05", "step": 2162, "steps": "23.58s,2162/33195" }, { "epoch": 0.3258020786262991, "eta": "199:48:41", "grad_norm": 0.0074, "loss": 0.0367, "lr": "4.957e-05", "step": 2163, "steps": "23.18s,2163/33195" }, { "epoch": 0.32595270372043983, "eta": "199:53:28", "grad_norm": 0.008, "loss": 0.0518, "lr": "4.957e-05", "step": 2164, "steps": "23.19s,2164/33195" }, { "epoch": 0.3261033288145805, "eta": "202:48:55", "grad_norm": 0.0079, "loss": 0.0451, "lr": "4.957e-05", "step": 2165, "steps": "23.53s,2165/33195" }, { "epoch": 0.3262539539087212, "eta": "199:42:21", "grad_norm": 0.0063, "loss": 0.057, "lr": "4.957e-05", "step": 2166, "steps": "23.17s,2166/33195" }, { "epoch": 0.32640457900286185, "eta": "202:42:58", "grad_norm": 0.007, "loss": 0.0481, "lr": "4.957e-05", "step": 2167, "steps": "23.52s,2167/33195" }, { "epoch": 0.32655520409700256, "eta": "199:05:23", "grad_norm": 0.0064, "loss": 0.0667, "lr": "4.957e-05", "step": 2168, "steps": "23.1s,2168/33195" }, { "epoch": 0.32670582919114327, "eta": "202:21:30", "grad_norm": 0.0057, "loss": 0.075, "lr": "4.956e-05", "step": 2169, "steps": "23.48s,2169/33195" }, { "epoch": 0.3268564542852839, "eta": "199:45:59", "grad_norm": 0.0072, "loss": 0.049, "lr": "4.956e-05", "step": 2170, "steps": "23.18s,2170/33195" }, { "epoch": 0.32700707937942464, "eta": "199:04:14", "grad_norm": 0.0081, "loss": 0.0577, "lr": "4.956e-05", "step": 2171, "steps": "23.1s,2171/33195" }, { "epoch": 0.3271577044735653, "eta": "202:35:50", "grad_norm": 0.0063, "loss": 0.0459, "lr": "4.956e-05", "step": 2172, "steps": "23.51s,2172/33195" }, { "epoch": 0.327308329567706, "eta": "202:56:08", "grad_norm": 0.0082, "loss": 0.0494, "lr": "4.956e-05", "step": 2173, "steps": "23.55s,2173/33195" }, { "epoch": 0.32745895466184666, "eta": "199:39:16", "grad_norm": 0.0067, "loss": 0.0426, "lr": "4.956e-05", "step": 2174, "steps": "23.17s,2174/33195" }, { "epoch": 0.32760957975598737, "eta": "199:28:33", "grad_norm": 0.0079, "loss": 0.0476, "lr": "4.956e-05", "step": 2175, "steps": "23.15s,2175/33195" }, { "epoch": 0.327760204850128, "eta": "199:07:29", "grad_norm": 0.007, "loss": 0.0457, "lr": "4.956e-05", "step": 2176, "steps": "23.11s,2176/33195" }, { "epoch": 0.32791082994426873, "eta": "198:56:45", "grad_norm": 0.007, "loss": 0.0527, "lr": "4.956e-05", "step": 2177, "steps": "23.09s,2177/33195" }, { "epoch": 0.3280614550384094, "eta": "199:06:42", "grad_norm": 0.0073, "loss": 0.0473, "lr": "4.956e-05", "step": 2178, "steps": "23.11s,2178/33195" }, { "epoch": 0.3282120801325501, "eta": "199:42:30", "grad_norm": 0.007, "loss": 0.0538, "lr": "4.956e-05", "step": 2179, "steps": "23.18s,2179/33195" }, { "epoch": 0.32836270522669075, "eta": "200:02:48", "grad_norm": 0.0056, "loss": 0.0719, "lr": "4.956e-05", "step": 2180, "steps": "23.22s,2180/33195" }, { "epoch": 0.32851333032083146, "eta": "202:47:49", "grad_norm": 0.007, "loss": 0.0417, "lr": "4.956e-05", "step": 2181, "steps": "23.54s,2181/33195" }, { "epoch": 0.3286639554149721, "eta": "199:20:40", "grad_norm": 0.0069, "loss": 0.0418, "lr": "4.956e-05", "step": 2182, "steps": "23.14s,2182/33195" }, { "epoch": 0.32881458050911283, "eta": "202:41:52", "grad_norm": 0.0064, "loss": 0.0487, "lr": "4.956e-05", "step": 2183, "steps": "23.53s,2183/33195" }, { "epoch": 0.3289652056032535, "eta": "199:04:24", "grad_norm": 0.0073, "loss": 0.0572, "lr": "4.956e-05", "step": 2184, "steps": "23.11s,2184/33195" }, { "epoch": 0.3291158306973942, "eta": "203:53:26", "grad_norm": 0.0093, "loss": 0.0544, "lr": "4.956e-05", "step": 2185, "steps": "23.67s,2185/33195" }, { "epoch": 0.32926645579153485, "eta": "199:24:18", "grad_norm": 0.006, "loss": 0.0518, "lr": "4.956e-05", "step": 2186, "steps": "23.15s,2186/33195" }, { "epoch": 0.32941708088567556, "eta": "201:33:07", "grad_norm": 0.0068, "loss": 0.0604, "lr": "4.956e-05", "step": 2187, "steps": "23.4s,2187/33195" }, { "epoch": 0.3295677059798162, "eta": "202:19:14", "grad_norm": 0.0082, "loss": 0.0518, "lr": "4.956e-05", "step": 2188, "steps": "23.49s,2188/33195" }, { "epoch": 0.3297183310739569, "eta": "198:52:08", "grad_norm": 0.007, "loss": 0.0686, "lr": "4.956e-05", "step": 2189, "steps": "23.09s,2189/33195" }, { "epoch": 0.3298689561680976, "eta": "199:22:45", "grad_norm": 0.0082, "loss": 0.0394, "lr": "4.956e-05", "step": 2190, "steps": "23.15s,2190/33195" }, { "epoch": 0.3300195812622383, "eta": "198:51:22", "grad_norm": 0.0087, "loss": 0.0622, "lr": "4.955e-05", "step": 2191, "steps": "23.09s,2191/33195" }, { "epoch": 0.33017020635637895, "eta": "199:42:39", "grad_norm": 0.0064, "loss": 0.0594, "lr": "4.955e-05", "step": 2192, "steps": "23.19s,2192/33195" }, { "epoch": 0.33032083145051966, "eta": "202:22:27", "grad_norm": 0.0098, "loss": 0.0725, "lr": "4.955e-05", "step": 2193, "steps": "23.5s,2193/33195" }, { "epoch": 0.3304714565446603, "eta": "200:07:43", "grad_norm": 0.0089, "loss": 0.0256, "lr": "4.955e-05", "step": 2194, "steps": "23.24s,2194/33195" }, { "epoch": 0.330622081638801, "eta": "199:15:40", "grad_norm": 0.0066, "loss": 0.0564, "lr": "4.955e-05", "step": 2195, "steps": "23.14s,2195/33195" }, { "epoch": 0.33077270673294173, "eta": "198:49:26", "grad_norm": 0.0069, "loss": 0.0366, "lr": "4.955e-05", "step": 2196, "steps": "23.09s,2196/33195" }, { "epoch": 0.3309233318270824, "eta": "196:29:34", "grad_norm": 0.0081, "loss": 0.0499, "lr": "4.955e-05", "step": 2197, "steps": "22.82s,2197/33195" }, { "epoch": 0.3310739569212231, "eta": "198:12:30", "grad_norm": 0.0099, "loss": 0.056, "lr": "4.955e-05", "step": 2198, "steps": "23.02s,2198/33195" }, { "epoch": 0.33122458201536376, "eta": "199:19:17", "grad_norm": 0.0074, "loss": 0.0506, "lr": "4.955e-05", "step": 2199, "steps": "23.15s,2199/33195" }, { "epoch": 0.33137520710950447, "eta": "198:22:04", "grad_norm": 0.0063, "loss": 0.0587, "lr": "4.955e-05", "step": 2200, "steps": "23.04s,2200/33195" }, { "epoch": 0.3315258322036451, "eta": "449:40:16", "grad_norm": 0.0067, "loss": 0.0456, "lr": "4.955e-05", "step": 2201, "steps": "52.23s,2201/33195" }, { "epoch": 0.33167645729778583, "eta": "198:52:18", "grad_norm": 0.0076, "loss": 0.0429, "lr": "4.955e-05", "step": 2202, "steps": "23.1s,2202/33195" }, { "epoch": 0.3318270823919265, "eta": "200:55:53", "grad_norm": 0.0065, "loss": 0.066, "lr": "4.955e-05", "step": 2203, "steps": "23.34s,2203/33195" }, { "epoch": 0.3319777074860672, "eta": "199:01:51", "grad_norm": 0.0065, "loss": 0.0474, "lr": "4.955e-05", "step": 2204, "steps": "23.12s,2204/33195" }, { "epoch": 0.33212833258020785, "eta": "198:51:09", "grad_norm": 0.0084, "loss": 0.0549, "lr": "4.955e-05", "step": 2205, "steps": "23.1s,2205/33195" }, { "epoch": 0.33227895767434856, "eta": "199:52:44", "grad_norm": 0.0102, "loss": 0.0497, "lr": "4.955e-05", "step": 2206, "steps": "23.22s,2206/33195" }, { "epoch": 0.3324295827684892, "eta": "199:21:22", "grad_norm": 0.0065, "loss": 0.044, "lr": "4.955e-05", "step": 2207, "steps": "23.16s,2207/33195" }, { "epoch": 0.33258020786262993, "eta": "199:10:39", "grad_norm": 0.0072, "loss": 0.0435, "lr": "4.955e-05", "step": 2208, "steps": "23.14s,2208/33195" }, { "epoch": 0.3327308329567706, "eta": "199:36:05", "grad_norm": 0.0072, "loss": 0.0364, "lr": "4.955e-05", "step": 2209, "steps": "23.19s,2209/33195" }, { "epoch": 0.3328814580509113, "eta": "202:26:07", "grad_norm": 0.0151, "loss": 0.0617, "lr": "4.955e-05", "step": 2210, "steps": "23.52s,2210/33195" }, { "epoch": 0.33303208314505195, "eta": "198:07:31", "grad_norm": 0.0063, "loss": 0.0729, "lr": "4.955e-05", "step": 2211, "steps": "23.02s,2211/33195" }, { "epoch": 0.33318270823919266, "eta": "199:45:15", "grad_norm": 0.0078, "loss": 0.0428, "lr": "4.955e-05", "step": 2212, "steps": "23.21s,2212/33195" }, { "epoch": 0.3333333333333333, "eta": "199:24:12", "grad_norm": 0.0065, "loss": 0.0635, "lr": "4.954e-05", "step": 2213, "steps": "23.17s,2213/33195" }, { "epoch": 0.333483958427474, "eta": "202:45:12", "grad_norm": 0.0079, "loss": 0.0454, "lr": "4.954e-05", "step": 2214, "steps": "23.56s,2214/33195" }, { "epoch": 0.3336345835216147, "eta": "198:42:08", "grad_norm": 0.0067, "loss": 0.064, "lr": "4.954e-05", "step": 2215, "steps": "23.09s,2215/33195" }, { "epoch": 0.3337852086157554, "eta": "199:48:52", "grad_norm": 0.0071, "loss": 0.0473, "lr": "4.954e-05", "step": 2216, "steps": "23.22s,2216/33195" }, { "epoch": 0.33393583370989605, "eta": "202:44:01", "grad_norm": 0.0073, "loss": 0.071, "lr": "4.954e-05", "step": 2217, "steps": "23.56s,2217/33195" }, { "epoch": 0.33408645880403676, "eta": "199:32:36", "grad_norm": 0.0078, "loss": 0.0538, "lr": "4.954e-05", "step": 2218, "steps": "23.19s,2218/33195" }, { "epoch": 0.3342370838981774, "eta": "202:43:14", "grad_norm": 0.0062, "loss": 0.0479, "lr": "4.954e-05", "step": 2219, "steps": "23.56s,2219/33195" }, { "epoch": 0.3343877089923181, "eta": "198:40:12", "grad_norm": 0.0065, "loss": 0.0497, "lr": "4.954e-05", "step": 2220, "steps": "23.09s,2220/33195" }, { "epoch": 0.3345383340864588, "eta": "198:24:20", "grad_norm": 0.0071, "loss": 0.0547, "lr": "4.954e-05", "step": 2221, "steps": "23.06s,2221/33195" }, { "epoch": 0.3346889591805995, "eta": "198:23:57", "grad_norm": 0.0078, "loss": 0.0637, "lr": "4.954e-05", "step": 2222, "steps": "23.06s,2222/33195" }, { "epoch": 0.3348395842747402, "eta": "199:41:00", "grad_norm": 0.0082, "loss": 0.0594, "lr": "4.954e-05", "step": 2223, "steps": "23.21s,2223/33195" }, { "epoch": 0.33499020936888085, "eta": "198:43:50", "grad_norm": 0.0083, "loss": 0.0372, "lr": "4.954e-05", "step": 2224, "steps": "23.1s,2224/33195" }, { "epoch": 0.33514083446302156, "eta": "199:24:44", "grad_norm": 0.0072, "loss": 0.0553, "lr": "4.954e-05", "step": 2225, "steps": "23.18s,2225/33195" }, { "epoch": 0.3352914595571622, "eta": "203:32:06", "grad_norm": 0.0099, "loss": 0.0559, "lr": "4.954e-05", "step": 2226, "steps": "23.66s,2226/33195" }, { "epoch": 0.33544208465130293, "eta": "199:34:17", "grad_norm": 0.0102, "loss": 0.0644, "lr": "4.954e-05", "step": 2227, "steps": "23.2s,2227/33195" }, { "epoch": 0.3355927097454436, "eta": "197:50:41", "grad_norm": 0.0071, "loss": 0.0507, "lr": "4.954e-05", "step": 2228, "steps": "23.0s,2228/33195" }, { "epoch": 0.3357433348395843, "eta": "200:04:29", "grad_norm": 0.0055, "loss": 0.0538, "lr": "4.954e-05", "step": 2229, "steps": "23.26s,2229/33195" }, { "epoch": 0.33589395993372495, "eta": "202:33:45", "grad_norm": 0.0066, "loss": 0.0411, "lr": "4.954e-05", "step": 2230, "steps": "23.55s,2230/33195" }, { "epoch": 0.33604458502786566, "eta": "199:06:56", "grad_norm": 0.0053, "loss": 0.0557, "lr": "4.954e-05", "step": 2231, "steps": "23.15s,2231/33195" }, { "epoch": 0.3361952101220063, "eta": "199:42:40", "grad_norm": 0.007, "loss": 0.043, "lr": "4.954e-05", "step": 2232, "steps": "23.22s,2232/33195" }, { "epoch": 0.336345835216147, "eta": "202:22:15", "grad_norm": 0.0063, "loss": 0.0568, "lr": "4.954e-05", "step": 2233, "steps": "23.53s,2233/33195" }, { "epoch": 0.3364964603102877, "eta": "202:16:42", "grad_norm": 0.0077, "loss": 0.0774, "lr": "4.954e-05", "step": 2234, "steps": "23.52s,2234/33195" }, { "epoch": 0.3366470854044284, "eta": "198:29:16", "grad_norm": 0.0079, "loss": 0.0536, "lr": "4.953e-05", "step": 2235, "steps": "23.08s,2235/33195" }, { "epoch": 0.33679771049856905, "eta": "198:39:12", "grad_norm": 0.007, "loss": 0.0444, "lr": "4.953e-05", "step": 2236, "steps": "23.1s,2236/33195" }, { "epoch": 0.33694833559270976, "eta": "199:20:06", "grad_norm": 0.0069, "loss": 0.0633, "lr": "4.953e-05", "step": 2237, "steps": "23.18s,2237/33195" }, { "epoch": 0.3370989606868504, "eta": "199:14:33", "grad_norm": 0.0065, "loss": 0.048, "lr": "4.953e-05", "step": 2238, "steps": "23.17s,2238/33195" }, { "epoch": 0.3372495857809911, "eta": "199:09:00", "grad_norm": 0.011, "loss": 0.0315, "lr": "4.953e-05", "step": 2239, "steps": "23.16s,2239/33195" }, { "epoch": 0.3374002108751318, "eta": "199:13:47", "grad_norm": 0.0073, "loss": 0.0482, "lr": "4.953e-05", "step": 2240, "steps": "23.17s,2240/33195" }, { "epoch": 0.3375508359692725, "eta": "201:01:44", "grad_norm": 0.0083, "loss": 0.0388, "lr": "4.953e-05", "step": 2241, "steps": "23.38s,2241/33195" }, { "epoch": 0.33770146106341314, "eta": "198:42:03", "grad_norm": 0.0066, "loss": 0.0524, "lr": "4.953e-05", "step": 2242, "steps": "23.11s,2242/33195" }, { "epoch": 0.33785208615755385, "eta": "201:16:26", "grad_norm": 0.0107, "loss": 0.0442, "lr": "4.953e-05", "step": 2243, "steps": "23.41s,2243/33195" }, { "epoch": 0.3380027112516945, "eta": "201:16:02", "grad_norm": 0.0091, "loss": 0.0381, "lr": "4.953e-05", "step": 2244, "steps": "23.41s,2244/33195" }, { "epoch": 0.3381533363458352, "eta": "201:15:39", "grad_norm": 0.0155, "loss": 0.0563, "lr": "4.953e-05", "step": 2245, "steps": "23.41s,2245/33195" }, { "epoch": 0.3383039614399759, "eta": "198:35:21", "grad_norm": 0.0079, "loss": 0.0443, "lr": "4.953e-05", "step": 2246, "steps": "23.1s,2246/33195" }, { "epoch": 0.3384545865341166, "eta": "198:45:17", "grad_norm": 0.0069, "loss": 0.0629, "lr": "4.953e-05", "step": 2247, "steps": "23.12s,2247/33195" }, { "epoch": 0.33860521162825724, "eta": "196:35:57", "grad_norm": 0.0062, "loss": 0.0775, "lr": "4.953e-05", "step": 2248, "steps": "22.87s,2248/33195" }, { "epoch": 0.33875583672239795, "eta": "199:05:09", "grad_norm": 0.0067, "loss": 0.0452, "lr": "4.953e-05", "step": 2249, "steps": "23.16s,2249/33195" }, { "epoch": 0.33890646181653866, "eta": "199:15:05", "grad_norm": 0.0066, "loss": 0.0749, "lr": "4.953e-05", "step": 2250, "steps": "23.18s,2250/33195" }, { "epoch": 0.3390570869106793, "eta": "202:20:21", "grad_norm": 0.0066, "loss": 0.0563, "lr": "4.953e-05", "step": 2251, "steps": "23.54s,2251/33195" }, { "epoch": 0.33920771200482003, "eta": "198:48:31", "grad_norm": 0.0054, "loss": 0.0601, "lr": "4.953e-05", "step": 2252, "steps": "23.13s,2252/33195" }, { "epoch": 0.3393583370989607, "eta": "201:38:19", "grad_norm": 0.0068, "loss": 0.0575, "lr": "4.953e-05", "step": 2253, "steps": "23.46s,2253/33195" }, { "epoch": 0.3395089621931014, "eta": "196:33:40", "grad_norm": 0.0072, "loss": 0.051, "lr": "4.953e-05", "step": 2254, "steps": "22.87s,2254/33195" }, { "epoch": 0.33965958728724205, "eta": "199:13:09", "grad_norm": 0.0069, "loss": 0.072, "lr": "4.953e-05", "step": 2255, "steps": "23.18s,2255/33195" }, { "epoch": 0.33981021238138276, "eta": "201:37:08", "grad_norm": 0.0064, "loss": 0.0395, "lr": "4.953e-05", "step": 2256, "steps": "23.46s,2256/33195" }, { "epoch": 0.3399608374755234, "eta": "201:52:13", "grad_norm": 0.0092, "loss": 0.0444, "lr": "4.952e-05", "step": 2257, "steps": "23.49s,2257/33195" }, { "epoch": 0.3401114625696641, "eta": "205:23:14", "grad_norm": 0.0075, "loss": 0.0908, "lr": "4.952e-05", "step": 2258, "steps": "23.9s,2258/33195" }, { "epoch": 0.3402620876638048, "eta": "201:25:39", "grad_norm": 0.0068, "loss": 0.036, "lr": "4.952e-05", "step": 2259, "steps": "23.44s,2259/33195" }, { "epoch": 0.3404127127579455, "eta": "198:19:39", "grad_norm": 0.0111, "loss": 0.0313, "lr": "4.952e-05", "step": 2260, "steps": "23.08s,2260/33195" }, { "epoch": 0.34056333785208615, "eta": "201:40:20", "grad_norm": 0.0092, "loss": 0.0317, "lr": "4.952e-05", "step": 2261, "steps": "23.47s,2261/33195" }, { "epoch": 0.34071396294622686, "eta": "201:39:57", "grad_norm": 0.006, "loss": 0.0472, "lr": "4.952e-05", "step": 2262, "steps": "23.47s,2262/33195" }, { "epoch": 0.3408645880403675, "eta": "198:39:07", "grad_norm": 0.0092, "loss": 0.0361, "lr": "4.952e-05", "step": 2263, "steps": "23.12s,2263/33195" }, { "epoch": 0.3410152131345082, "eta": "201:59:47", "grad_norm": 0.009, "loss": 0.0469, "lr": "4.952e-05", "step": 2264, "steps": "23.51s,2264/33195" }, { "epoch": 0.3411658382286489, "eta": "197:46:48", "grad_norm": 0.007, "loss": 0.0803, "lr": "4.952e-05", "step": 2265, "steps": "23.02s,2265/33195" }, { "epoch": 0.3413164633227896, "eta": "201:22:55", "grad_norm": 0.0056, "loss": 0.0537, "lr": "4.952e-05", "step": 2266, "steps": "23.44s,2266/33195" }, { "epoch": 0.34146708841693024, "eta": "199:13:40", "grad_norm": 0.0067, "loss": 0.0474, "lr": "4.952e-05", "step": 2267, "steps": "23.19s,2267/33195" }, { "epoch": 0.34161771351107095, "eta": "198:16:35", "grad_norm": 0.006, "loss": 0.0321, "lr": "4.952e-05", "step": 2268, "steps": "23.08s,2268/33195" }, { "epoch": 0.3417683386052116, "eta": "202:39:04", "grad_norm": 0.0075, "loss": 0.0463, "lr": "4.952e-05", "step": 2269, "steps": "23.59s,2269/33195" }, { "epoch": 0.3419189636993523, "eta": "201:41:59", "grad_norm": 0.0067, "loss": 0.0497, "lr": "4.952e-05", "step": 2270, "steps": "23.48s,2270/33195" }, { "epoch": 0.342069588793493, "eta": "201:57:03", "grad_norm": 0.0093, "loss": 0.0325, "lr": "4.952e-05", "step": 2271, "steps": "23.51s,2271/33195" }, { "epoch": 0.3422202138876337, "eta": "199:27:12", "grad_norm": 0.0114, "loss": 0.0328, "lr": "4.952e-05", "step": 2272, "steps": "23.22s,2272/33195" }, { "epoch": 0.34237083898177434, "eta": "198:09:30", "grad_norm": 0.0071, "loss": 0.0639, "lr": "4.952e-05", "step": 2273, "steps": "23.07s,2273/33195" }, { "epoch": 0.34252146407591505, "eta": "201:19:48", "grad_norm": 0.0076, "loss": 0.0391, "lr": "4.952e-05", "step": 2274, "steps": "23.44s,2274/33195" }, { "epoch": 0.3426720891700557, "eta": "199:26:02", "grad_norm": 0.0061, "loss": 0.0771, "lr": "4.952e-05", "step": 2275, "steps": "23.22s,2275/33195" }, { "epoch": 0.3428227142641964, "eta": "200:48:06", "grad_norm": 0.0063, "loss": 0.0579, "lr": "4.952e-05", "step": 2276, "steps": "23.38s,2276/33195" }, { "epoch": 0.3429733393583371, "eta": "201:54:42", "grad_norm": 0.0082, "loss": 0.0518, "lr": "4.952e-05", "step": 2277, "steps": "23.51s,2277/33195" }, { "epoch": 0.3431239644524778, "eta": "200:42:10", "grad_norm": 0.0069, "loss": 0.0644, "lr": "4.952e-05", "step": 2278, "steps": "23.37s,2278/33195" }, { "epoch": 0.3432745895466185, "eta": "201:38:27", "grad_norm": 0.0092, "loss": 0.0464, "lr": "4.951e-05", "step": 2279, "steps": "23.48s,2279/33195" }, { "epoch": 0.34342521464075915, "eta": "199:44:42", "grad_norm": 0.009, "loss": 0.0439, "lr": "4.951e-05", "step": 2280, "steps": "23.26s,2280/33195" }, { "epoch": 0.34357583973489986, "eta": "198:21:53", "grad_norm": 0.0066, "loss": 0.04, "lr": "4.951e-05", "step": 2281, "steps": "23.1s,2281/33195" }, { "epoch": 0.3437264648290405, "eta": "198:47:15", "grad_norm": 0.0087, "loss": 0.0597, "lr": "4.951e-05", "step": 2282, "steps": "23.15s,2282/33195" }, { "epoch": 0.3438770899231812, "eta": "201:21:26", "grad_norm": 0.0071, "loss": 0.061, "lr": "4.951e-05", "step": 2283, "steps": "23.45s,2283/33195" }, { "epoch": 0.3440277150173219, "eta": "198:05:16", "grad_norm": 0.0062, "loss": 0.036, "lr": "4.951e-05", "step": 2284, "steps": "23.07s,2284/33195" }, { "epoch": 0.3441783401114626, "eta": "198:51:15", "grad_norm": 0.0092, "loss": 0.0594, "lr": "4.951e-05", "step": 2285, "steps": "23.16s,2285/33195" }, { "epoch": 0.34432896520560324, "eta": "198:35:25", "grad_norm": 0.0131, "loss": 0.0511, "lr": "4.951e-05", "step": 2286, "steps": "23.13s,2286/33195" }, { "epoch": 0.34447959029974395, "eta": "198:35:02", "grad_norm": 0.0081, "loss": 0.0545, "lr": "4.951e-05", "step": 2287, "steps": "23.13s,2287/33195" }, { "epoch": 0.3446302153938846, "eta": "200:53:43", "grad_norm": 0.0065, "loss": 0.0559, "lr": "4.951e-05", "step": 2288, "steps": "23.4s,2288/33195" }, { "epoch": 0.3447808404880253, "eta": "198:08:30", "grad_norm": 0.0073, "loss": 0.0418, "lr": "4.951e-05", "step": 2289, "steps": "23.08s,2289/33195" }, { "epoch": 0.344931465582166, "eta": "198:44:10", "grad_norm": 0.0078, "loss": 0.0391, "lr": "4.951e-05", "step": 2290, "steps": "23.15s,2290/33195" }, { "epoch": 0.3450820906763067, "eta": "199:14:41", "grad_norm": 0.0064, "loss": 0.0372, "lr": "4.951e-05", "step": 2291, "steps": "23.21s,2291/33195" }, { "epoch": 0.34523271577044734, "eta": "197:15:50", "grad_norm": 0.0095, "loss": 0.0639, "lr": "4.951e-05", "step": 2292, "steps": "22.98s,2292/33195" }, { "epoch": 0.34538334086458805, "eta": "200:26:01", "grad_norm": 0.0078, "loss": 0.0564, "lr": "4.951e-05", "step": 2293, "steps": "23.35s,2293/33195" }, { "epoch": 0.3455339659587287, "eta": "212:26:39", "grad_norm": 0.0058, "loss": 0.0513, "lr": "4.951e-05", "step": 2294, "steps": "24.75s,2294/33195" }, { "epoch": 0.3456845910528694, "eta": "430:11:48", "grad_norm": 0.0099, "loss": 0.0408, "lr": "4.951e-05", "step": 2295, "steps": "50.12s,2295/33195" }, { "epoch": 0.34583521614701007, "eta": "225:23:27", "grad_norm": 0.0089, "loss": 0.061, "lr": "4.951e-05", "step": 2296, "steps": "26.26s,2296/33195" }, { "epoch": 0.3459858412411508, "eta": "202:28:03", "grad_norm": 0.0087, "loss": 0.037, "lr": "4.951e-05", "step": 2297, "steps": "23.59s,2297/33195" }, { "epoch": 0.34613646633529144, "eta": "205:58:48", "grad_norm": 0.0069, "loss": 0.0796, "lr": "4.951e-05", "step": 2298, "steps": "24.0s,2298/33195" }, { "epoch": 0.34628709142943215, "eta": "198:25:15", "grad_norm": 0.0085, "loss": 0.0442, "lr": "4.951e-05", "step": 2299, "steps": "23.12s,2299/33195" }, { "epoch": 0.3464377165235728, "eta": "204:20:09", "grad_norm": 0.0079, "loss": 0.0412, "lr": "4.950e-05", "step": 2300, "steps": "23.81s,2300/33195" }, { "epoch": 0.3465883416177135, "eta": "200:58:57", "grad_norm": 0.009, "loss": 0.0606, "lr": "4.950e-05", "step": 2301, "steps": "23.42s,2301/33195" }, { "epoch": 0.34673896671185417, "eta": "201:08:51", "grad_norm": 0.0065, "loss": 0.0614, "lr": "4.950e-05", "step": 2302, "steps": "23.44s,2302/33195" }, { "epoch": 0.3468895918059949, "eta": "200:06:41", "grad_norm": 0.0064, "loss": 0.0449, "lr": "4.950e-05", "step": 2303, "steps": "23.32s,2303/33195" }, { "epoch": 0.3470402169001356, "eta": "200:06:18", "grad_norm": 0.0077, "loss": 0.0757, "lr": "4.950e-05", "step": 2304, "steps": "23.32s,2304/33195" }, { "epoch": 0.34719084199427624, "eta": "198:12:39", "grad_norm": 0.0064, "loss": 0.0503, "lr": "4.950e-05", "step": 2305, "steps": "23.1s,2305/33195" }, { "epoch": 0.34734146708841696, "eta": "198:38:00", "grad_norm": 0.0073, "loss": 0.0438, "lr": "4.950e-05", "step": 2306, "steps": "23.15s,2306/33195" }, { "epoch": 0.3474920921825576, "eta": "198:01:35", "grad_norm": 0.0058, "loss": 0.0514, "lr": "4.950e-05", "step": 2307, "steps": "23.08s,2307/33195" }, { "epoch": 0.3476427172766983, "eta": "198:37:14", "grad_norm": 0.0068, "loss": 0.0568, "lr": "4.950e-05", "step": 2308, "steps": "23.15s,2308/33195" }, { "epoch": 0.347793342370839, "eta": "198:52:17", "grad_norm": 0.0066, "loss": 0.0559, "lr": "4.950e-05", "step": 2309, "steps": "23.18s,2309/33195" }, { "epoch": 0.3479439674649797, "eta": "198:36:27", "grad_norm": 0.0068, "loss": 0.0528, "lr": "4.950e-05", "step": 2310, "steps": "23.15s,2310/33195" }, { "epoch": 0.34809459255912034, "eta": "195:25:37", "grad_norm": 0.0067, "loss": 0.049, "lr": "4.950e-05", "step": 2311, "steps": "22.78s,2311/33195" }, { "epoch": 0.34824521765326105, "eta": "200:34:04", "grad_norm": 0.0066, "loss": 0.0436, "lr": "4.950e-05", "step": 2312, "steps": "23.38s,2312/33195" }, { "epoch": 0.3483958427474017, "eta": "197:48:58", "grad_norm": 0.0066, "loss": 0.0408, "lr": "4.950e-05", "step": 2313, "steps": "23.06s,2313/33195" }, { "epoch": 0.3485464678415424, "eta": "200:43:35", "grad_norm": 0.007, "loss": 0.0662, "lr": "4.950e-05", "step": 2314, "steps": "23.4s,2314/33195" }, { "epoch": 0.3486970929356831, "eta": "197:58:30", "grad_norm": 0.0059, "loss": 0.084, "lr": "4.950e-05", "step": 2315, "steps": "23.08s,2315/33195" }, { "epoch": 0.3488477180298238, "eta": "201:08:32", "grad_norm": 0.0076, "loss": 0.0504, "lr": "4.950e-05", "step": 2316, "steps": "23.45s,2316/33195" }, { "epoch": 0.34899834312396444, "eta": "201:08:09", "grad_norm": 0.0082, "loss": 0.0439, "lr": "4.950e-05", "step": 2317, "steps": "23.45s,2317/33195" }, { "epoch": 0.34914896821810515, "eta": "200:52:19", "grad_norm": 0.0077, "loss": 0.0467, "lr": "4.950e-05", "step": 2318, "steps": "23.42s,2318/33195" }, { "epoch": 0.3492995933122458, "eta": "200:41:38", "grad_norm": 0.0068, "loss": 0.0507, "lr": "4.950e-05", "step": 2319, "steps": "23.4s,2319/33195" }, { "epoch": 0.3494502184063865, "eta": "195:32:30", "grad_norm": 0.0062, "loss": 0.0642, "lr": "4.950e-05", "step": 2320, "steps": "22.8s,2320/33195" }, { "epoch": 0.34960084350052717, "eta": "201:16:52", "grad_norm": 0.0073, "loss": 0.0642, "lr": "4.949e-05", "step": 2321, "steps": "23.47s,2321/33195" }, { "epoch": 0.3497514685946679, "eta": "198:26:41", "grad_norm": 0.0092, "loss": 0.0452, "lr": "4.949e-05", "step": 2322, "steps": "23.14s,2322/33195" }, { "epoch": 0.34990209368880854, "eta": "201:57:15", "grad_norm": 0.0072, "loss": 0.0604, "lr": "4.949e-05", "step": 2323, "steps": "23.55s,2323/33195" }, { "epoch": 0.35005271878294925, "eta": "197:44:45", "grad_norm": 0.0084, "loss": 0.037, "lr": "4.949e-05", "step": 2324, "steps": "23.06s,2324/33195" }, { "epoch": 0.3502033438770899, "eta": "198:20:23", "grad_norm": 0.0069, "loss": 0.0625, "lr": "4.949e-05", "step": 2325, "steps": "23.13s,2325/33195" }, { "epoch": 0.3503539689712306, "eta": "195:35:21", "grad_norm": 0.0066, "loss": 0.0459, "lr": "4.949e-05", "step": 2326, "steps": "22.81s,2326/33195" }, { "epoch": 0.35050459406537127, "eta": "199:11:03", "grad_norm": 0.0085, "loss": 0.0639, "lr": "4.949e-05", "step": 2327, "steps": "23.23s,2327/33195" }, { "epoch": 0.350655219159512, "eta": "194:58:35", "grad_norm": 0.0063, "loss": 0.0872, "lr": "4.949e-05", "step": 2328, "steps": "22.74s,2328/33195" }, { "epoch": 0.35080584425365263, "eta": "198:49:42", "grad_norm": 0.0071, "loss": 0.0574, "lr": "4.949e-05", "step": 2329, "steps": "23.19s,2329/33195" }, { "epoch": 0.35095646934779334, "eta": "200:32:12", "grad_norm": 0.0088, "loss": 0.0494, "lr": "4.949e-05", "step": 2330, "steps": "23.39s,2330/33195" }, { "epoch": 0.35110709444193405, "eta": "195:38:36", "grad_norm": 0.0058, "loss": 0.066, "lr": "4.949e-05", "step": 2331, "steps": "22.82s,2331/33195" }, { "epoch": 0.3512577195360747, "eta": "197:15:57", "grad_norm": 0.0078, "loss": 0.0438, "lr": "4.949e-05", "step": 2332, "steps": "23.01s,2332/33195" }, { "epoch": 0.3514083446302154, "eta": "199:13:52", "grad_norm": 0.0061, "loss": 0.0704, "lr": "4.949e-05", "step": 2333, "steps": "23.24s,2333/33195" }, { "epoch": 0.3515589697243561, "eta": "198:32:20", "grad_norm": 0.0071, "loss": 0.0489, "lr": "4.949e-05", "step": 2334, "steps": "23.16s,2334/33195" }, { "epoch": 0.3517095948184968, "eta": "198:11:23", "grad_norm": 0.0072, "loss": 0.0322, "lr": "4.949e-05", "step": 2335, "steps": "23.12s,2335/33195" }, { "epoch": 0.35186021991263744, "eta": "203:50:27", "grad_norm": 0.0077, "loss": 0.0784, "lr": "4.949e-05", "step": 2336, "steps": "23.78s,2336/33195" }, { "epoch": 0.35201084500677815, "eta": "197:44:54", "grad_norm": 0.0067, "loss": 0.0513, "lr": "4.949e-05", "step": 2337, "steps": "23.07s,2337/33195" }, { "epoch": 0.3521614701009188, "eta": "200:13:39", "grad_norm": 0.006, "loss": 0.0734, "lr": "4.949e-05", "step": 2338, "steps": "23.36s,2338/33195" }, { "epoch": 0.3523120951950595, "eta": "195:40:42", "grad_norm": 0.006, "loss": 0.0633, "lr": "4.949e-05", "step": 2339, "steps": "22.83s,2339/33195" }, { "epoch": 0.35246272028920017, "eta": "198:09:27", "grad_norm": 0.0063, "loss": 0.0606, "lr": "4.949e-05", "step": 2340, "steps": "23.12s,2340/33195" }, { "epoch": 0.3526133453833409, "eta": "200:53:37", "grad_norm": 0.0075, "loss": 0.0548, "lr": "4.949e-05", "step": 2341, "steps": "23.44s,2341/33195" }, { "epoch": 0.35276397047748154, "eta": "204:29:12", "grad_norm": 0.0053, "loss": 0.0697, "lr": "4.948e-05", "step": 2342, "steps": "23.86s,2342/33195" }, { "epoch": 0.35291459557162225, "eta": "198:49:26", "grad_norm": 0.0078, "loss": 0.0744, "lr": "4.948e-05", "step": 2343, "steps": "23.2s,2343/33195" }, { "epoch": 0.3530652206657629, "eta": "197:42:12", "grad_norm": 0.0076, "loss": 0.0403, "lr": "4.948e-05", "step": 2344, "steps": "23.07s,2344/33195" }, { "epoch": 0.3532158457599036, "eta": "201:58:54", "grad_norm": 0.0078, "loss": 0.0581, "lr": "4.948e-05", "step": 2345, "steps": "23.57s,2345/33195" }, { "epoch": 0.35336647085404427, "eta": "197:56:51", "grad_norm": 0.006, "loss": 0.0438, "lr": "4.948e-05", "step": 2346, "steps": "23.1s,2346/33195" }, { "epoch": 0.353517095948185, "eta": "195:53:05", "grad_norm": 0.0062, "loss": 0.0765, "lr": "4.948e-05", "step": 2347, "steps": "22.86s,2347/33195" }, { "epoch": 0.35366772104232563, "eta": "198:42:21", "grad_norm": 0.012, "loss": 0.0542, "lr": "4.948e-05", "step": 2348, "steps": "23.19s,2348/33195" }, { "epoch": 0.35381834613646634, "eta": "198:05:59", "grad_norm": 0.007, "loss": 0.0614, "lr": "4.948e-05", "step": 2349, "steps": "23.12s,2349/33195" }, { "epoch": 0.353968971230607, "eta": "198:46:44", "grad_norm": 0.0065, "loss": 0.0488, "lr": "4.948e-05", "step": 2350, "steps": "23.2s,2350/33195" }, { "epoch": 0.3541195963247477, "eta": "195:20:43", "grad_norm": 0.0073, "loss": 0.0611, "lr": "4.948e-05", "step": 2351, "steps": "22.8s,2351/33195" }, { "epoch": 0.35427022141888836, "eta": "200:03:04", "grad_norm": 0.0074, "loss": 0.055, "lr": "4.948e-05", "step": 2352, "steps": "23.35s,2352/33195" }, { "epoch": 0.3544208465130291, "eta": "201:09:30", "grad_norm": 0.0065, "loss": 0.0405, "lr": "4.948e-05", "step": 2353, "steps": "23.48s,2353/33195" }, { "epoch": 0.35457147160716973, "eta": "195:40:08", "grad_norm": 0.0067, "loss": 0.0394, "lr": "4.948e-05", "step": 2354, "steps": "22.84s,2354/33195" }, { "epoch": 0.35472209670131044, "eta": "200:17:19", "grad_norm": 0.0087, "loss": 0.0339, "lr": "4.948e-05", "step": 2355, "steps": "23.38s,2355/33195" }, { "epoch": 0.3548727217954511, "eta": "198:59:49", "grad_norm": 0.0065, "loss": 0.0479, "lr": "4.948e-05", "step": 2356, "steps": "23.23s,2356/33195" }, { "epoch": 0.3550233468895918, "eta": "198:02:54", "grad_norm": 0.0066, "loss": 0.0707, "lr": "4.948e-05", "step": 2357, "steps": "23.12s,2357/33195" }, { "epoch": 0.3551739719837325, "eta": "198:23:04", "grad_norm": 0.0096, "loss": 0.0573, "lr": "4.948e-05", "step": 2358, "steps": "23.16s,2358/33195" }, { "epoch": 0.35532459707787317, "eta": "200:05:28", "grad_norm": 0.0071, "loss": 0.0555, "lr": "4.948e-05", "step": 2359, "steps": "23.36s,2359/33195" }, { "epoch": 0.3554752221720139, "eta": "201:37:35", "grad_norm": 0.0115, "loss": 0.0692, "lr": "4.948e-05", "step": 2360, "steps": "23.54s,2360/33195" }, { "epoch": 0.35562584726615454, "eta": "200:04:42", "grad_norm": 0.0068, "loss": 0.0648, "lr": "4.948e-05", "step": 2361, "steps": "23.36s,2361/33195" }, { "epoch": 0.35577647236029525, "eta": "198:26:40", "grad_norm": 0.0066, "loss": 0.0556, "lr": "4.948e-05", "step": 2362, "steps": "23.17s,2362/33195" }, { "epoch": 0.3559270974544359, "eta": "199:53:38", "grad_norm": 0.0056, "loss": 0.0467, "lr": "4.947e-05", "step": 2363, "steps": "23.34s,2363/33195" }, { "epoch": 0.3560777225485766, "eta": "198:20:45", "grad_norm": 0.0056, "loss": 0.0562, "lr": "4.947e-05", "step": 2364, "steps": "23.16s,2364/33195" }, { "epoch": 0.35622834764271727, "eta": "198:35:47", "grad_norm": 0.0085, "loss": 0.0594, "lr": "4.947e-05", "step": 2365, "steps": "23.19s,2365/33195" }, { "epoch": 0.356378972736858, "eta": "198:35:24", "grad_norm": 0.0077, "loss": 0.0686, "lr": "4.947e-05", "step": 2366, "steps": "23.19s,2366/33195" }, { "epoch": 0.35652959783099863, "eta": "200:48:36", "grad_norm": 0.0092, "loss": 0.0448, "lr": "4.947e-05", "step": 2367, "steps": "23.45s,2367/33195" }, { "epoch": 0.35668022292513935, "eta": "197:43:15", "grad_norm": 0.0114, "loss": 0.0472, "lr": "4.947e-05", "step": 2368, "steps": "23.09s,2368/33195" }, { "epoch": 0.35683084801928, "eta": "196:51:29", "grad_norm": 0.0069, "loss": 0.0676, "lr": "4.947e-05", "step": 2369, "steps": "22.99s,2369/33195" }, { "epoch": 0.3569814731134207, "eta": "197:32:12", "grad_norm": 0.0078, "loss": 0.0584, "lr": "4.947e-05", "step": 2370, "steps": "23.07s,2370/33195" }, { "epoch": 0.35713209820756137, "eta": "194:42:17", "grad_norm": 0.0181, "loss": 0.0923, "lr": "4.947e-05", "step": 2371, "steps": "22.74s,2371/33195" }, { "epoch": 0.3572827233017021, "eta": "196:29:47", "grad_norm": 0.0093, "loss": 0.0518, "lr": "4.947e-05", "step": 2372, "steps": "22.95s,2372/33195" }, { "epoch": 0.35743334839584273, "eta": "197:46:28", "grad_norm": 0.0064, "loss": 0.0318, "lr": "4.947e-05", "step": 2373, "steps": "23.1s,2373/33195" }, { "epoch": 0.35758397348998344, "eta": "204:11:20", "grad_norm": 0.0074, "loss": 0.0545, "lr": "4.947e-05", "step": 2374, "steps": "23.85s,2374/33195" }, { "epoch": 0.3577345985841241, "eta": "196:49:11", "grad_norm": 0.007, "loss": 0.053, "lr": "4.947e-05", "step": 2375, "steps": "22.99s,2375/33195" }, { "epoch": 0.3578852236782648, "eta": "197:55:35", "grad_norm": 0.0121, "loss": 0.0507, "lr": "4.947e-05", "step": 2376, "steps": "23.12s,2376/33195" }, { "epoch": 0.35803584877240546, "eta": "197:34:39", "grad_norm": 0.0062, "loss": 0.0521, "lr": "4.947e-05", "step": 2377, "steps": "23.08s,2377/33195" }, { "epoch": 0.3581864738665462, "eta": "198:10:13", "grad_norm": 0.0079, "loss": 0.0546, "lr": "4.947e-05", "step": 2378, "steps": "23.15s,2378/33195" }, { "epoch": 0.35833709896068683, "eta": "198:14:58", "grad_norm": 0.0092, "loss": 0.0269, "lr": "4.947e-05", "step": 2379, "steps": "23.16s,2379/33195" }, { "epoch": 0.35848772405482754, "eta": "196:42:08", "grad_norm": 0.0074, "loss": 0.0408, "lr": "4.947e-05", "step": 2380, "steps": "22.98s,2380/33195" }, { "epoch": 0.3586383491489682, "eta": "199:51:46", "grad_norm": 0.0064, "loss": 0.0669, "lr": "4.947e-05", "step": 2381, "steps": "23.35s,2381/33195" }, { "epoch": 0.3587889742431089, "eta": "201:18:41", "grad_norm": 0.0063, "loss": 0.0413, "lr": "4.947e-05", "step": 2382, "steps": "23.52s,2382/33195" }, { "epoch": 0.35893959933724956, "eta": "194:53:09", "grad_norm": 0.0085, "loss": 0.0422, "lr": "4.947e-05", "step": 2383, "steps": "22.77s,2383/33195" }, { "epoch": 0.35909022443139027, "eta": "201:23:02", "grad_norm": 0.0073, "loss": 0.0433, "lr": "4.946e-05", "step": 2384, "steps": "23.53s,2384/33195" }, { "epoch": 0.359240849525531, "eta": "197:00:46", "grad_norm": 0.0059, "loss": 0.0545, "lr": "4.946e-05", "step": 2385, "steps": "23.02s,2385/33195" }, { "epoch": 0.35939147461967164, "eta": "201:17:07", "grad_norm": 0.0106, "loss": 0.0454, "lr": "4.946e-05", "step": 2386, "steps": "23.52s,2386/33195" }, { "epoch": 0.35954209971381235, "eta": "199:39:10", "grad_norm": 0.0054, "loss": 0.0678, "lr": "4.946e-05", "step": 2387, "steps": "23.33s,2387/33195" }, { "epoch": 0.359692724807953, "eta": "198:01:13", "grad_norm": 0.0077, "loss": 0.0538, "lr": "4.946e-05", "step": 2388, "steps": "23.14s,2388/33195" }, { "epoch": 0.3598433499020937, "eta": "201:10:49", "grad_norm": 0.007, "loss": 0.038, "lr": "4.946e-05", "step": 2389, "steps": "23.51s,2389/33195" }, { "epoch": 0.35999397499623437, "eta": "198:00:27", "grad_norm": 0.0064, "loss": 0.059, "lr": "4.946e-05", "step": 2390, "steps": "23.14s,2390/33195" }, { "epoch": 0.3601446000903751, "eta": "197:44:40", "grad_norm": 0.0081, "loss": 0.0618, "lr": "4.946e-05", "step": 2391, "steps": "23.11s,2391/33195" }, { "epoch": 0.36029522518451573, "eta": "194:49:44", "grad_norm": 0.0062, "loss": 0.0357, "lr": "4.946e-05", "step": 2392, "steps": "22.77s,2392/33195" }, { "epoch": 0.36044585027865644, "eta": "201:04:07", "grad_norm": 0.0071, "loss": 0.0727, "lr": "4.946e-05", "step": 2393, "steps": "23.5s,2393/33195" }, { "epoch": 0.3605964753727971, "eta": "198:39:59", "grad_norm": 0.0272, "loss": 0.043, "lr": "4.946e-05", "step": 2394, "steps": "23.22s,2394/33195" }, { "epoch": 0.3607471004669378, "eta": "199:56:36", "grad_norm": 0.0067, "loss": 0.0566, "lr": "4.946e-05", "step": 2395, "steps": "23.37s,2395/33195" }, { "epoch": 0.36089772556107846, "eta": "196:46:17", "grad_norm": 0.0086, "loss": 0.0574, "lr": "4.946e-05", "step": 2396, "steps": "23.0s,2396/33195" }, { "epoch": 0.3610483506552192, "eta": "201:07:40", "grad_norm": 0.007, "loss": 0.0587, "lr": "4.946e-05", "step": 2397, "steps": "23.51s,2397/33195" }, { "epoch": 0.36119897574935983, "eta": "200:05:41", "grad_norm": 0.0101, "loss": 0.0453, "lr": "4.946e-05", "step": 2398, "steps": "23.39s,2398/33195" }, { "epoch": 0.36134960084350054, "eta": "200:15:34", "grad_norm": 0.0068, "loss": 0.0355, "lr": "4.946e-05", "step": 2399, "steps": "23.41s,2399/33195" }, { "epoch": 0.3615002259376412, "eta": "198:01:44", "grad_norm": 0.0112, "loss": 0.083, "lr": "4.946e-05", "step": 2400, "steps": "23.15s,2400/33195" }, { "epoch": 0.3616508510317819, "eta": "377:34:07", "grad_norm": 0.0089, "loss": 0.0582, "lr": "4.946e-05", "step": 2401, "steps": "44.14s,2401/33195" }, { "epoch": 0.36180147612592256, "eta": "197:55:50", "grad_norm": 0.0067, "loss": 0.0691, "lr": "4.946e-05", "step": 2402, "steps": "23.14s,2402/33195" }, { "epoch": 0.36195210122006327, "eta": "194:55:49", "grad_norm": 0.0074, "loss": 0.0689, "lr": "4.946e-05", "step": 2403, "steps": "22.79s,2403/33195" }, { "epoch": 0.3621027263142039, "eta": "197:44:47", "grad_norm": 0.0067, "loss": 0.0737, "lr": "4.945e-05", "step": 2404, "steps": "23.12s,2404/33195" }, { "epoch": 0.36225335140834464, "eta": "198:30:35", "grad_norm": 0.0081, "loss": 0.0625, "lr": "4.945e-05", "step": 2405, "steps": "23.21s,2405/33195" }, { "epoch": 0.3624039765024853, "eta": "198:09:41", "grad_norm": 0.0058, "loss": 0.0379, "lr": "4.945e-05", "step": 2406, "steps": "23.17s,2406/33195" }, { "epoch": 0.362554601596626, "eta": "197:48:46", "grad_norm": 0.007, "loss": 0.0796, "lr": "4.945e-05", "step": 2407, "steps": "23.13s,2407/33195" }, { "epoch": 0.36270522669076666, "eta": "200:53:06", "grad_norm": 0.0069, "loss": 0.0682, "lr": "4.945e-05", "step": 2408, "steps": "23.49s,2408/33195" }, { "epoch": 0.36285585178490737, "eta": "200:42:27", "grad_norm": 0.0076, "loss": 0.0684, "lr": "4.945e-05", "step": 2409, "steps": "23.47s,2409/33195" }, { "epoch": 0.363006476879048, "eta": "200:57:27", "grad_norm": 0.0071, "loss": 0.0688, "lr": "4.945e-05", "step": 2410, "steps": "23.5s,2410/33195" }, { "epoch": 0.36315710197318873, "eta": "200:26:16", "grad_norm": 0.0068, "loss": 0.0701, "lr": "4.945e-05", "step": 2411, "steps": "23.44s,2411/33195" }, { "epoch": 0.36330772706732944, "eta": "201:27:27", "grad_norm": 0.0081, "loss": 0.0539, "lr": "4.945e-05", "step": 2412, "steps": "23.56s,2412/33195" }, { "epoch": 0.3634583521614701, "eta": "201:11:40", "grad_norm": 0.0078, "loss": 0.0531, "lr": "4.945e-05", "step": 2413, "steps": "23.53s,2413/33195" }, { "epoch": 0.3636089772556108, "eta": "197:30:41", "grad_norm": 0.0073, "loss": 0.0443, "lr": "4.945e-05", "step": 2414, "steps": "23.1s,2414/33195" }, { "epoch": 0.36375960234975147, "eta": "199:12:54", "grad_norm": 0.0072, "loss": 0.0367, "lr": "4.945e-05", "step": 2415, "steps": "23.3s,2415/33195" }, { "epoch": 0.3639102274438922, "eta": "201:05:22", "grad_norm": 0.0072, "loss": 0.0429, "lr": "4.945e-05", "step": 2416, "steps": "23.52s,2416/33195" }, { "epoch": 0.36406085253803283, "eta": "201:10:06", "grad_norm": 0.0072, "loss": 0.0456, "lr": "4.945e-05", "step": 2417, "steps": "23.53s,2417/33195" }, { "epoch": 0.36421147763217354, "eta": "198:05:03", "grad_norm": 0.008, "loss": 0.0522, "lr": "4.945e-05", "step": 2418, "steps": "23.17s,2418/33195" }, { "epoch": 0.3643621027263142, "eta": "198:20:03", "grad_norm": 0.0135, "loss": 0.0489, "lr": "4.945e-05", "step": 2419, "steps": "23.2s,2419/33195" }, { "epoch": 0.3645127278204549, "eta": "198:24:47", "grad_norm": 0.0056, "loss": 0.0401, "lr": "4.945e-05", "step": 2420, "steps": "23.21s,2420/33195" }, { "epoch": 0.36466335291459556, "eta": "195:14:38", "grad_norm": 0.0068, "loss": 0.0445, "lr": "4.945e-05", "step": 2421, "steps": "22.84s,2421/33195" }, { "epoch": 0.3648139780087363, "eta": "201:13:16", "grad_norm": 0.0072, "loss": 0.0453, "lr": "4.945e-05", "step": 2422, "steps": "23.54s,2422/33195" }, { "epoch": 0.3649646031028769, "eta": "197:32:20", "grad_norm": 0.0072, "loss": 0.0598, "lr": "4.945e-05", "step": 2423, "steps": "23.11s,2423/33195" }, { "epoch": 0.36511522819701764, "eta": "200:10:56", "grad_norm": 0.0061, "loss": 0.0408, "lr": "4.944e-05", "step": 2424, "steps": "23.42s,2424/33195" }, { "epoch": 0.3652658532911583, "eta": "197:31:34", "grad_norm": 0.0061, "loss": 0.0742, "lr": "4.944e-05", "step": 2425, "steps": "23.11s,2425/33195" }, { "epoch": 0.365416478385299, "eta": "198:17:20", "grad_norm": 0.0084, "loss": 0.0563, "lr": "4.944e-05", "step": 2426, "steps": "23.2s,2426/33195" }, { "epoch": 0.36556710347943966, "eta": "197:15:25", "grad_norm": 0.0076, "loss": 0.047, "lr": "4.944e-05", "step": 2427, "steps": "23.08s,2427/33195" }, { "epoch": 0.36571772857358037, "eta": "200:14:30", "grad_norm": 0.0176, "loss": 0.0519, "lr": "4.944e-05", "step": 2428, "steps": "23.43s,2428/33195" }, { "epoch": 0.365868353667721, "eta": "197:50:32", "grad_norm": 0.0089, "loss": 0.0405, "lr": "4.944e-05", "step": 2429, "steps": "23.15s,2429/33195" }, { "epoch": 0.36601897876186174, "eta": "197:04:00", "grad_norm": 0.0068, "loss": 0.0642, "lr": "4.944e-05", "step": 2430, "steps": "23.06s,2430/33195" }, { "epoch": 0.3661696038560024, "eta": "197:34:23", "grad_norm": 0.0065, "loss": 0.0577, "lr": "4.944e-05", "step": 2431, "steps": "23.12s,2431/33195" }, { "epoch": 0.3663202289501431, "eta": "198:30:24", "grad_norm": 0.0079, "loss": 0.0609, "lr": "4.944e-05", "step": 2432, "steps": "23.23s,2432/33195" }, { "epoch": 0.36647085404428376, "eta": "201:19:12", "grad_norm": 0.0129, "loss": 0.0595, "lr": "4.944e-05", "step": 2433, "steps": "23.56s,2433/33195" }, { "epoch": 0.36662147913842447, "eta": "199:56:47", "grad_norm": 0.0081, "loss": 0.0482, "lr": "4.944e-05", "step": 2434, "steps": "23.4s,2434/33195" }, { "epoch": 0.3667721042325651, "eta": "200:42:32", "grad_norm": 0.007, "loss": 0.0577, "lr": "4.944e-05", "step": 2435, "steps": "23.49s,2435/33195" }, { "epoch": 0.36692272932670583, "eta": "199:50:53", "grad_norm": 0.0064, "loss": 0.0746, "lr": "4.944e-05", "step": 2436, "steps": "23.39s,2436/33195" }, { "epoch": 0.3670733544208465, "eta": "197:47:27", "grad_norm": 0.0063, "loss": 0.0545, "lr": "4.944e-05", "step": 2437, "steps": "23.15s,2437/33195" }, { "epoch": 0.3672239795149872, "eta": "199:29:35", "grad_norm": 0.0068, "loss": 0.0573, "lr": "4.944e-05", "step": 2438, "steps": "23.35s,2438/33195" }, { "epoch": 0.3673746046091279, "eta": "200:51:13", "grad_norm": 0.0077, "loss": 0.0621, "lr": "4.944e-05", "step": 2439, "steps": "23.51s,2439/33195" }, { "epoch": 0.36752522970326856, "eta": "202:33:21", "grad_norm": 0.0091, "loss": 0.0494, "lr": "4.944e-05", "step": 2440, "steps": "23.71s,2440/33195" }, { "epoch": 0.3676758547974093, "eta": "197:25:24", "grad_norm": 0.0068, "loss": 0.0486, "lr": "4.944e-05", "step": 2441, "steps": "23.11s,2441/33195" }, { "epoch": 0.36782647989154993, "eta": "197:04:31", "grad_norm": 0.0072, "loss": 0.0475, "lr": "4.944e-05", "step": 2442, "steps": "23.07s,2442/33195" }, { "epoch": 0.36797710498569064, "eta": "197:19:31", "grad_norm": 0.008, "loss": 0.0496, "lr": "4.944e-05", "step": 2443, "steps": "23.1s,2443/33195" }, { "epoch": 0.3681277300798313, "eta": "198:30:53", "grad_norm": 0.0077, "loss": 0.0431, "lr": "4.943e-05", "step": 2444, "steps": "23.24s,2444/33195" }, { "epoch": 0.368278355173972, "eta": "199:47:22", "grad_norm": 0.0084, "loss": 0.0739, "lr": "4.943e-05", "step": 2445, "steps": "23.39s,2445/33195" }, { "epoch": 0.36842898026811266, "eta": "200:02:21", "grad_norm": 0.0082, "loss": 0.0363, "lr": "4.943e-05", "step": 2446, "steps": "23.42s,2446/33195" }, { "epoch": 0.36857960536225337, "eta": "199:51:43", "grad_norm": 0.0063, "loss": 0.0704, "lr": "4.943e-05", "step": 2447, "steps": "23.4s,2447/33195" }, { "epoch": 0.368730230456394, "eta": "197:17:35", "grad_norm": 0.0118, "loss": 0.0494, "lr": "4.943e-05", "step": 2448, "steps": "23.1s,2448/33195" }, { "epoch": 0.36888085555053474, "eta": "198:13:34", "grad_norm": 0.0073, "loss": 0.0546, "lr": "4.943e-05", "step": 2449, "steps": "23.21s,2449/33195" }, { "epoch": 0.3690314806446754, "eta": "196:46:04", "grad_norm": 0.0069, "loss": 0.0585, "lr": "4.943e-05", "step": 2450, "steps": "23.04s,2450/33195" }, { "epoch": 0.3691821057388161, "eta": "197:36:56", "grad_norm": 0.0081, "loss": 0.0353, "lr": "4.943e-05", "step": 2451, "steps": "23.14s,2451/33195" }, { "epoch": 0.36933273083295676, "eta": "197:51:55", "grad_norm": 0.019, "loss": 0.0611, "lr": "4.943e-05", "step": 2452, "steps": "23.17s,2452/33195" }, { "epoch": 0.36948335592709747, "eta": "198:06:54", "grad_norm": 0.0099, "loss": 0.0255, "lr": "4.943e-05", "step": 2453, "steps": "23.2s,2453/33195" }, { "epoch": 0.3696339810212381, "eta": "200:24:51", "grad_norm": 0.007, "loss": 0.0533, "lr": "4.943e-05", "step": 2454, "steps": "23.47s,2454/33195" }, { "epoch": 0.36978460611537883, "eta": "197:35:23", "grad_norm": 0.0069, "loss": 0.0574, "lr": "4.943e-05", "step": 2455, "steps": "23.14s,2455/33195" }, { "epoch": 0.3699352312095195, "eta": "197:19:38", "grad_norm": 0.0074, "loss": 0.0765, "lr": "4.943e-05", "step": 2456, "steps": "23.11s,2456/33195" }, { "epoch": 0.3700858563036602, "eta": "197:34:37", "grad_norm": 0.007, "loss": 0.0606, "lr": "4.943e-05", "step": 2457, "steps": "23.14s,2457/33195" }, { "epoch": 0.37023648139780085, "eta": "199:47:25", "grad_norm": 0.0093, "loss": 0.0562, "lr": "4.943e-05", "step": 2458, "steps": "23.4s,2458/33195" }, { "epoch": 0.37038710649194156, "eta": "196:52:52", "grad_norm": 0.012, "loss": 0.0397, "lr": "4.943e-05", "step": 2459, "steps": "23.06s,2459/33195" }, { "epoch": 0.3705377315860822, "eta": "196:52:29", "grad_norm": 0.0086, "loss": 0.0418, "lr": "4.943e-05", "step": 2460, "steps": "23.06s,2460/33195" }, { "epoch": 0.37068835668022293, "eta": "199:51:22", "grad_norm": 0.0081, "loss": 0.0613, "lr": "4.943e-05", "step": 2461, "steps": "23.41s,2461/33195" }, { "epoch": 0.3708389817743636, "eta": "197:37:48", "grad_norm": 0.0072, "loss": 0.0673, "lr": "4.943e-05", "step": 2462, "steps": "23.15s,2462/33195" }, { "epoch": 0.3709896068685043, "eta": "197:27:11", "grad_norm": 0.0075, "loss": 0.0899, "lr": "4.943e-05", "step": 2463, "steps": "23.13s,2463/33195" }, { "epoch": 0.37114023196264495, "eta": "197:42:09", "grad_norm": 0.0098, "loss": 0.0606, "lr": "4.942e-05", "step": 2464, "steps": "23.16s,2464/33195" }, { "epoch": 0.37129085705678566, "eta": "199:13:58", "grad_norm": 0.0107, "loss": 0.048, "lr": "4.942e-05", "step": 2465, "steps": "23.34s,2465/33195" }, { "epoch": 0.37144148215092637, "eta": "197:20:54", "grad_norm": 0.0091, "loss": 0.043, "lr": "4.942e-05", "step": 2466, "steps": "23.12s,2466/33195" }, { "epoch": 0.371592107245067, "eta": "200:09:31", "grad_norm": 0.007, "loss": 0.0454, "lr": "4.942e-05", "step": 2467, "steps": "23.45s,2467/33195" }, { "epoch": 0.37174273233920774, "eta": "200:09:08", "grad_norm": 0.0073, "loss": 0.0629, "lr": "4.942e-05", "step": 2468, "steps": "23.45s,2468/33195" }, { "epoch": 0.3718933574333484, "eta": "196:49:01", "grad_norm": 0.0065, "loss": 0.0392, "lr": "4.942e-05", "step": 2469, "steps": "23.06s,2469/33195" }, { "epoch": 0.3720439825274891, "eta": "196:48:38", "grad_norm": 0.0114, "loss": 0.0628, "lr": "4.942e-05", "step": 2470, "steps": "23.06s,2470/33195" }, { "epoch": 0.37219460762162976, "eta": "200:28:26", "grad_norm": 0.0057, "loss": 0.0249, "lr": "4.942e-05", "step": 2471, "steps": "23.49s,2471/33195" }, { "epoch": 0.37234523271577047, "eta": "196:27:23", "grad_norm": 0.0101, "loss": 0.0478, "lr": "4.942e-05", "step": 2472, "steps": "23.02s,2472/33195" }, { "epoch": 0.3724958578099111, "eta": "199:15:58", "grad_norm": 0.0068, "loss": 0.062, "lr": "4.942e-05", "step": 2473, "steps": "23.35s,2473/33195" }, { "epoch": 0.37264648290405183, "eta": "197:22:56", "grad_norm": 0.0069, "loss": 0.0492, "lr": "4.942e-05", "step": 2474, "steps": "23.13s,2474/33195" }, { "epoch": 0.3727971079981925, "eta": "197:27:40", "grad_norm": 0.0067, "loss": 0.0567, "lr": "4.942e-05", "step": 2475, "steps": "23.14s,2475/33195" }, { "epoch": 0.3729477330923332, "eta": "199:30:10", "grad_norm": 0.0081, "loss": 0.0649, "lr": "4.942e-05", "step": 2476, "steps": "23.38s,2476/33195" }, { "epoch": 0.37309835818647386, "eta": "200:26:05", "grad_norm": 0.0059, "loss": 0.0661, "lr": "4.942e-05", "step": 2477, "steps": "23.49s,2477/33195" }, { "epoch": 0.37324898328061457, "eta": "197:41:52", "grad_norm": 0.0077, "loss": 0.0582, "lr": "4.942e-05", "step": 2478, "steps": "23.17s,2478/33195" }, { "epoch": 0.3733996083747552, "eta": "197:21:01", "grad_norm": 0.0068, "loss": 0.0668, "lr": "4.942e-05", "step": 2479, "steps": "23.13s,2479/33195" }, { "epoch": 0.37355023346889593, "eta": "197:10:23", "grad_norm": 0.0072, "loss": 0.0453, "lr": "4.942e-05", "step": 2480, "steps": "23.11s,2480/33195" }, { "epoch": 0.3737008585630366, "eta": "201:25:57", "grad_norm": 0.0084, "loss": 0.0399, "lr": "4.942e-05", "step": 2481, "steps": "23.61s,2481/33195" }, { "epoch": 0.3738514836571773, "eta": "197:50:34", "grad_norm": 0.0063, "loss": 0.0437, "lr": "4.942e-05", "step": 2482, "steps": "23.19s,2482/33195" }, { "epoch": 0.37400210875131795, "eta": "197:50:11", "grad_norm": 0.0087, "loss": 0.0488, "lr": "4.942e-05", "step": 2483, "steps": "23.19s,2483/33195" }, { "epoch": 0.37415273384545866, "eta": "200:33:35", "grad_norm": 0.0062, "loss": 0.0524, "lr": "4.941e-05", "step": 2484, "steps": "23.51s,2484/33195" }, { "epoch": 0.3743033589395993, "eta": "197:18:42", "grad_norm": 0.0062, "loss": 0.0653, "lr": "4.941e-05", "step": 2485, "steps": "23.13s,2485/33195" }, { "epoch": 0.37445398403374003, "eta": "196:06:39", "grad_norm": 0.0068, "loss": 0.0609, "lr": "4.941e-05", "step": 2486, "steps": "22.99s,2486/33195" }, { "epoch": 0.3746046091278807, "eta": "196:26:45", "grad_norm": 0.0075, "loss": 0.0436, "lr": "4.941e-05", "step": 2487, "steps": "23.03s,2487/33195" }, { "epoch": 0.3747552342220214, "eta": "200:47:22", "grad_norm": 0.0079, "loss": 0.0337, "lr": "4.941e-05", "step": 2488, "steps": "23.54s,2488/33195" }, { "epoch": 0.37490585931616205, "eta": "197:01:48", "grad_norm": 0.0055, "loss": 0.0455, "lr": "4.941e-05", "step": 2489, "steps": "23.1s,2489/33195" }, { "epoch": 0.37505648441030276, "eta": "200:26:07", "grad_norm": 0.007, "loss": 0.0365, "lr": "4.941e-05", "step": 2490, "steps": "23.5s,2490/33195" }, { "epoch": 0.3752071095044434, "eta": "197:01:02", "grad_norm": 0.0067, "loss": 0.069, "lr": "4.941e-05", "step": 2491, "steps": "23.1s,2491/33195" }, { "epoch": 0.3753577345985841, "eta": "197:26:14", "grad_norm": 0.0081, "loss": 0.0435, "lr": "4.941e-05", "step": 2492, "steps": "23.15s,2492/33195" }, { "epoch": 0.3755083596927248, "eta": "197:30:58", "grad_norm": 0.0063, "loss": 0.0487, "lr": "4.941e-05", "step": 2493, "steps": "23.16s,2493/33195" }, { "epoch": 0.3756589847868655, "eta": "200:45:01", "grad_norm": 0.0073, "loss": 0.0408, "lr": "4.941e-05", "step": 2494, "steps": "23.54s,2494/33195" }, { "epoch": 0.3758096098810062, "eta": "196:13:27", "grad_norm": 0.0085, "loss": 0.0311, "lr": "4.941e-05", "step": 2495, "steps": "23.01s,2495/33195" }, { "epoch": 0.37596023497514686, "eta": "197:40:02", "grad_norm": 0.0076, "loss": 0.0369, "lr": "4.941e-05", "step": 2496, "steps": "23.18s,2496/33195" }, { "epoch": 0.37611086006928757, "eta": "200:33:36", "grad_norm": 0.0078, "loss": 0.0258, "lr": "4.941e-05", "step": 2497, "steps": "23.52s,2497/33195" }, { "epoch": 0.3762614851634282, "eta": "199:42:03", "grad_norm": 0.0052, "loss": 0.0588, "lr": "4.941e-05", "step": 2498, "steps": "23.42s,2498/33195" }, { "epoch": 0.37641211025756893, "eta": "197:08:11", "grad_norm": 0.0076, "loss": 0.067, "lr": "4.941e-05", "step": 2499, "steps": "23.12s,2499/33195" }, { "epoch": 0.3765627353517096, "eta": "196:52:27", "grad_norm": 0.0066, "loss": 0.0577, "lr": "4.941e-05", "step": 2500, "steps": "23.09s,2500/33195" }, { "epoch": 0.3767133604458503, "eta": "196:57:11", "grad_norm": 0.0054, "loss": 0.0679, "lr": "4.941e-05", "step": 2501, "steps": "23.1s,2501/33195" }, { "epoch": 0.37686398553999095, "eta": "202:34:25", "grad_norm": 0.006, "loss": 0.0555, "lr": "4.941e-05", "step": 2502, "steps": "23.76s,2502/33195" }, { "epoch": 0.37701461063413166, "eta": "198:23:22", "grad_norm": 0.0068, "loss": 0.0453, "lr": "4.940e-05", "step": 2503, "steps": "23.27s,2503/33195" }, { "epoch": 0.3771652357282723, "eta": "197:21:36", "grad_norm": 0.0092, "loss": 0.0452, "lr": "4.940e-05", "step": 2504, "steps": "23.15s,2504/33195" }, { "epoch": 0.37731586082241303, "eta": "197:26:20", "grad_norm": 0.0074, "loss": 0.0686, "lr": "4.940e-05", "step": 2505, "steps": "23.16s,2505/33195" }, { "epoch": 0.3774664859165537, "eta": "200:04:30", "grad_norm": 0.0065, "loss": 0.0559, "lr": "4.940e-05", "step": 2506, "steps": "23.47s,2506/33195" }, { "epoch": 0.3776171110106944, "eta": "197:40:54", "grad_norm": 0.0064, "loss": 0.0707, "lr": "4.940e-05", "step": 2507, "steps": "23.19s,2507/33195" }, { "epoch": 0.37776773610483505, "eta": "200:49:45", "grad_norm": 0.0056, "loss": 0.0474, "lr": "4.940e-05", "step": 2508, "steps": "23.56s,2508/33195" }, { "epoch": 0.37791836119897576, "eta": "200:28:54", "grad_norm": 0.0065, "loss": 0.0733, "lr": "4.940e-05", "step": 2509, "steps": "23.52s,2509/33195" }, { "epoch": 0.3780689862931164, "eta": "200:18:17", "grad_norm": 0.0068, "loss": 0.0585, "lr": "4.940e-05", "step": 2510, "steps": "23.5s,2510/33195" }, { "epoch": 0.3782196113872571, "eta": "197:29:08", "grad_norm": 0.0065, "loss": 0.0249, "lr": "4.940e-05", "step": 2511, "steps": "23.17s,2511/33195" }, { "epoch": 0.3783702364813978, "eta": "198:14:46", "grad_norm": 0.0066, "loss": 0.0512, "lr": "4.940e-05", "step": 2512, "steps": "23.26s,2512/33195" }, { "epoch": 0.3785208615755385, "eta": "196:57:41", "grad_norm": 0.0089, "loss": 0.0481, "lr": "4.940e-05", "step": 2513, "steps": "23.11s,2513/33195" }, { "epoch": 0.37867148666967915, "eta": "201:23:12", "grad_norm": 0.0056, "loss": 0.0668, "lr": "4.940e-05", "step": 2514, "steps": "23.63s,2514/33195" }, { "epoch": 0.37882211176381986, "eta": "200:57:14", "grad_norm": 0.0072, "loss": 0.0392, "lr": "4.940e-05", "step": 2515, "steps": "23.58s,2515/33195" }, { "epoch": 0.3789727368579605, "eta": "200:31:16", "grad_norm": 0.0067, "loss": 0.0581, "lr": "4.940e-05", "step": 2516, "steps": "23.53s,2516/33195" }, { "epoch": 0.3791233619521012, "eta": "200:51:20", "grad_norm": 0.007, "loss": 0.0326, "lr": "4.940e-05", "step": 2517, "steps": "23.57s,2517/33195" }, { "epoch": 0.3792739870462419, "eta": "193:56:48", "grad_norm": 0.0106, "loss": 0.0654, "lr": "4.940e-05", "step": 2518, "steps": "22.76s,2518/33195" }, { "epoch": 0.3794246121403826, "eta": "199:54:18", "grad_norm": 0.0075, "loss": 0.0445, "lr": "4.940e-05", "step": 2519, "steps": "23.46s,2519/33195" }, { "epoch": 0.37957523723452324, "eta": "197:25:39", "grad_norm": 0.0076, "loss": 0.0549, "lr": "4.940e-05", "step": 2520, "steps": "23.17s,2520/33195" }, { "epoch": 0.37972586232866395, "eta": "200:29:19", "grad_norm": 0.0066, "loss": 0.0641, "lr": "4.940e-05", "step": 2521, "steps": "23.53s,2521/33195" }, { "epoch": 0.37987648742280467, "eta": "197:35:06", "grad_norm": 0.0063, "loss": 0.0399, "lr": "4.940e-05", "step": 2522, "steps": "23.19s,2522/33195" }, { "epoch": 0.3800271125169453, "eta": "199:52:45", "grad_norm": 0.0065, "loss": 0.0541, "lr": "4.939e-05", "step": 2523, "steps": "23.46s,2523/33195" }, { "epoch": 0.38017773761108603, "eta": "200:33:15", "grad_norm": 0.0154, "loss": 0.0381, "lr": "4.939e-05", "step": 2524, "steps": "23.54s,2524/33195" }, { "epoch": 0.3803283627052267, "eta": "200:32:51", "grad_norm": 0.0075, "loss": 0.0404, "lr": "4.939e-05", "step": 2525, "steps": "23.54s,2525/33195" }, { "epoch": 0.3804789877993674, "eta": "200:12:01", "grad_norm": 0.0059, "loss": 0.0676, "lr": "4.939e-05", "step": 2526, "steps": "23.5s,2526/33195" }, { "epoch": 0.38062961289350805, "eta": "196:47:10", "grad_norm": 0.0066, "loss": 0.0612, "lr": "4.939e-05", "step": 2527, "steps": "23.1s,2527/33195" }, { "epoch": 0.38078023798764876, "eta": "196:41:41", "grad_norm": 0.0072, "loss": 0.046, "lr": "4.939e-05", "step": 2528, "steps": "23.09s,2528/33195" }, { "epoch": 0.3809308630817894, "eta": "194:53:58", "grad_norm": 0.0065, "loss": 0.0574, "lr": "4.939e-05", "step": 2529, "steps": "22.88s,2529/33195" }, { "epoch": 0.3810814881759301, "eta": "196:35:48", "grad_norm": 0.0081, "loss": 0.0466, "lr": "4.939e-05", "step": 2530, "steps": "23.08s,2530/33195" }, { "epoch": 0.3812321132700708, "eta": "200:10:04", "grad_norm": 0.0075, "loss": 0.0616, "lr": "4.939e-05", "step": 2531, "steps": "23.5s,2531/33195" }, { "epoch": 0.3813827383642115, "eta": "197:10:48", "grad_norm": 0.0074, "loss": 0.0539, "lr": "4.939e-05", "step": 2532, "steps": "23.15s,2532/33195" }, { "epoch": 0.38153336345835215, "eta": "200:14:23", "grad_norm": 0.0067, "loss": 0.0421, "lr": "4.939e-05", "step": 2533, "steps": "23.51s,2533/33195" }, { "epoch": 0.38168398855249286, "eta": "198:42:01", "grad_norm": 0.0082, "loss": 0.0455, "lr": "4.939e-05", "step": 2534, "steps": "23.33s,2534/33195" }, { "epoch": 0.3818346136466335, "eta": "197:04:32", "grad_norm": 0.0103, "loss": 0.0798, "lr": "4.939e-05", "step": 2535, "steps": "23.14s,2535/33195" }, { "epoch": 0.3819852387407742, "eta": "196:59:02", "grad_norm": 0.0058, "loss": 0.0475, "lr": "4.939e-05", "step": 2536, "steps": "23.13s,2536/33195" }, { "epoch": 0.3821358638349149, "eta": "197:54:51", "grad_norm": 0.0074, "loss": 0.0439, "lr": "4.939e-05", "step": 2537, "steps": "23.24s,2537/33195" }, { "epoch": 0.3822864889290556, "eta": "200:02:12", "grad_norm": 0.0087, "loss": 0.0557, "lr": "4.939e-05", "step": 2538, "steps": "23.49s,2538/33195" }, { "epoch": 0.38243711402319625, "eta": "196:17:00", "grad_norm": 0.0078, "loss": 0.0564, "lr": "4.939e-05", "step": 2539, "steps": "23.05s,2539/33195" }, { "epoch": 0.38258773911733696, "eta": "197:07:43", "grad_norm": 0.0097, "loss": 0.05, "lr": "4.939e-05", "step": 2540, "steps": "23.15s,2540/33195" }, { "epoch": 0.3827383642114776, "eta": "200:21:28", "grad_norm": 0.0073, "loss": 0.0503, "lr": "4.939e-05", "step": 2541, "steps": "23.53s,2541/33195" }, { "epoch": 0.3828889893056183, "eta": "197:12:03", "grad_norm": 0.007, "loss": 0.0412, "lr": "4.938e-05", "step": 2542, "steps": "23.16s,2542/33195" }, { "epoch": 0.383039614399759, "eta": "197:32:06", "grad_norm": 0.0071, "loss": 0.0587, "lr": "4.938e-05", "step": 2543, "steps": "23.2s,2543/33195" }, { "epoch": 0.3831902394938997, "eta": "196:40:38", "grad_norm": 0.0084, "loss": 0.0547, "lr": "4.938e-05", "step": 2544, "steps": "23.1s,2544/33195" }, { "epoch": 0.38334086458804034, "eta": "196:24:55", "grad_norm": 0.0075, "loss": 0.0554, "lr": "4.938e-05", "step": 2545, "steps": "23.07s,2545/33195" }, { "epoch": 0.38349148968218105, "eta": "197:41:09", "grad_norm": 0.0077, "loss": 0.0763, "lr": "4.938e-05", "step": 2546, "steps": "23.22s,2546/33195" }, { "epoch": 0.3836421147763217, "eta": "200:29:20", "grad_norm": 0.0073, "loss": 0.055, "lr": "4.938e-05", "step": 2547, "steps": "23.55s,2547/33195" }, { "epoch": 0.3837927398704624, "eta": "200:28:56", "grad_norm": 0.0071, "loss": 0.0436, "lr": "4.938e-05", "step": 2548, "steps": "23.55s,2548/33195" }, { "epoch": 0.38394336496460313, "eta": "199:52:48", "grad_norm": 0.0059, "loss": 0.0414, "lr": "4.938e-05", "step": 2549, "steps": "23.48s,2549/33195" }, { "epoch": 0.3840939900587438, "eta": "193:59:59", "grad_norm": 0.0079, "loss": 0.0302, "lr": "4.938e-05", "step": 2550, "steps": "22.79s,2550/33195" }, { "epoch": 0.3842446151528845, "eta": "200:12:26", "grad_norm": 0.0077, "loss": 0.0545, "lr": "4.938e-05", "step": 2551, "steps": "23.52s,2551/33195" }, { "epoch": 0.38439524024702515, "eta": "197:03:05", "grad_norm": 0.0089, "loss": 0.0506, "lr": "4.938e-05", "step": 2552, "steps": "23.15s,2552/33195" }, { "epoch": 0.38454586534116586, "eta": "194:34:36", "grad_norm": 0.007, "loss": 0.0478, "lr": "4.938e-05", "step": 2553, "steps": "22.86s,2553/33195" }, { "epoch": 0.3846964904353065, "eta": "200:01:03", "grad_norm": 0.0087, "loss": 0.0466, "lr": "4.938e-05", "step": 2554, "steps": "23.5s,2554/33195" }, { "epoch": 0.3848471155294472, "eta": "200:15:59", "grad_norm": 0.0069, "loss": 0.0461, "lr": "4.938e-05", "step": 2555, "steps": "23.53s,2555/33195" }, { "epoch": 0.3849977406235879, "eta": "200:36:01", "grad_norm": 0.0072, "loss": 0.0615, "lr": "4.938e-05", "step": 2556, "steps": "23.57s,2556/33195" }, { "epoch": 0.3851483657177286, "eta": "197:31:47", "grad_norm": 0.0074, "loss": 0.0571, "lr": "4.938e-05", "step": 2557, "steps": "23.21s,2557/33195" }, { "epoch": 0.38529899081186925, "eta": "194:32:41", "grad_norm": 0.0107, "loss": 0.0258, "lr": "4.938e-05", "step": 2558, "steps": "22.86s,2558/33195" }, { "epoch": 0.38544961590600996, "eta": "197:36:07", "grad_norm": 0.0064, "loss": 0.0541, "lr": "4.938e-05", "step": 2559, "steps": "23.22s,2559/33195" }, { "epoch": 0.3856002410001506, "eta": "197:10:12", "grad_norm": 0.0071, "loss": 0.0642, "lr": "4.938e-05", "step": 2560, "steps": "23.17s,2560/33195" }, { "epoch": 0.3857508660942913, "eta": "199:32:47", "grad_norm": 0.0092, "loss": 0.0493, "lr": "4.937e-05", "step": 2561, "steps": "23.45s,2561/33195" }, { "epoch": 0.385901491188432, "eta": "197:04:20", "grad_norm": 0.0079, "loss": 0.0516, "lr": "4.937e-05", "step": 2562, "steps": "23.16s,2562/33195" }, { "epoch": 0.3860521162825727, "eta": "200:53:41", "grad_norm": 0.0065, "loss": 0.0551, "lr": "4.937e-05", "step": 2563, "steps": "23.61s,2563/33195" }, { "epoch": 0.38620274137671334, "eta": "199:41:49", "grad_norm": 0.0074, "loss": 0.0502, "lr": "4.937e-05", "step": 2564, "steps": "23.47s,2564/33195" }, { "epoch": 0.38635336647085405, "eta": "197:18:29", "grad_norm": 0.0069, "loss": 0.0693, "lr": "4.937e-05", "step": 2565, "steps": "23.19s,2565/33195" }, { "epoch": 0.3865039915649947, "eta": "197:13:00", "grad_norm": 0.0076, "loss": 0.0537, "lr": "4.937e-05", "step": 2566, "steps": "23.18s,2566/33195" }, { "epoch": 0.3866546166591354, "eta": "197:17:43", "grad_norm": 0.007, "loss": 0.056, "lr": "4.937e-05", "step": 2567, "steps": "23.19s,2567/33195" }, { "epoch": 0.3868052417532761, "eta": "197:32:38", "grad_norm": 0.0081, "loss": 0.0519, "lr": "4.937e-05", "step": 2568, "steps": "23.22s,2568/33195" }, { "epoch": 0.3869558668474168, "eta": "196:25:54", "grad_norm": 0.0063, "loss": 0.0572, "lr": "4.937e-05", "step": 2569, "steps": "23.09s,2569/33195" }, { "epoch": 0.38710649194155744, "eta": "200:25:25", "grad_norm": 0.0084, "loss": 0.0519, "lr": "4.937e-05", "step": 2570, "steps": "23.56s,2570/33195" }, { "epoch": 0.38725711703569815, "eta": "199:13:34", "grad_norm": 0.0065, "loss": 0.0471, "lr": "4.937e-05", "step": 2571, "steps": "23.42s,2571/33195" }, { "epoch": 0.3874077421298388, "eta": "196:19:38", "grad_norm": 0.0091, "loss": 0.0414, "lr": "4.937e-05", "step": 2572, "steps": "23.08s,2572/33195" }, { "epoch": 0.3875583672239795, "eta": "199:28:05", "grad_norm": 0.0078, "loss": 0.0536, "lr": "4.937e-05", "step": 2573, "steps": "23.45s,2573/33195" }, { "epoch": 0.38770899231812017, "eta": "194:06:11", "grad_norm": 0.007, "loss": 0.0554, "lr": "4.937e-05", "step": 2574, "steps": "22.82s,2574/33195" }, { "epoch": 0.3878596174122609, "eta": "222:50:44", "grad_norm": 0.0062, "loss": 0.039, "lr": "4.937e-05", "step": 2575, "steps": "26.2s,2575/33195" }, { "epoch": 0.3880102425064016, "eta": "215:31:25", "grad_norm": 0.0108, "loss": 0.0426, "lr": "4.937e-05", "step": 2576, "steps": "25.34s,2576/33195" }, { "epoch": 0.38816086760054225, "eta": "214:50:10", "grad_norm": 0.0098, "loss": 0.0543, "lr": "4.937e-05", "step": 2577, "steps": "25.26s,2577/33195" }, { "epoch": 0.38831149269468296, "eta": "200:22:16", "grad_norm": 0.0084, "loss": 0.0529, "lr": "4.937e-05", "step": 2578, "steps": "23.56s,2578/33195" }, { "epoch": 0.3884621177888236, "eta": "199:30:51", "grad_norm": 0.0106, "loss": 0.0472, "lr": "4.937e-05", "step": 2579, "steps": "23.46s,2579/33195" }, { "epoch": 0.3886127428829643, "eta": "200:21:29", "grad_norm": 0.0056, "loss": 0.0528, "lr": "4.936e-05", "step": 2580, "steps": "23.56s,2580/33195" }, { "epoch": 0.388763367977105, "eta": "199:04:33", "grad_norm": 0.0129, "loss": 0.0522, "lr": "4.936e-05", "step": 2581, "steps": "23.41s,2581/33195" }, { "epoch": 0.3889139930712457, "eta": "197:42:32", "grad_norm": 0.0072, "loss": 0.0576, "lr": "4.936e-05", "step": 2582, "steps": "23.25s,2582/33195" }, { "epoch": 0.38906461816538634, "eta": "197:47:15", "grad_norm": 0.0065, "loss": 0.0641, "lr": "4.936e-05", "step": 2583, "steps": "23.26s,2583/33195" }, { "epoch": 0.38921524325952706, "eta": "196:25:14", "grad_norm": 0.0093, "loss": 0.0513, "lr": "4.936e-05", "step": 2584, "steps": "23.1s,2584/33195" }, { "epoch": 0.3893658683536677, "eta": "197:10:45", "grad_norm": 0.007, "loss": 0.077, "lr": "4.936e-05", "step": 2585, "steps": "23.19s,2585/33195" }, { "epoch": 0.3895164934478084, "eta": "196:04:03", "grad_norm": 0.0094, "loss": 0.0478, "lr": "4.936e-05", "step": 2586, "steps": "23.06s,2586/33195" }, { "epoch": 0.3896671185419491, "eta": "194:21:38", "grad_norm": 0.0067, "loss": 0.0469, "lr": "4.936e-05", "step": 2587, "steps": "22.86s,2587/33195" }, { "epoch": 0.3898177436360898, "eta": "199:17:08", "grad_norm": 0.0106, "loss": 0.0367, "lr": "4.936e-05", "step": 2588, "steps": "23.44s,2588/33195" }, { "epoch": 0.38996836873023044, "eta": "197:09:13", "grad_norm": 0.0097, "loss": 0.0478, "lr": "4.936e-05", "step": 2589, "steps": "23.19s,2589/33195" }, { "epoch": 0.39011899382437115, "eta": "196:02:31", "grad_norm": 0.0102, "loss": 0.0684, "lr": "4.936e-05", "step": 2590, "steps": "23.06s,2590/33195" }, { "epoch": 0.3902696189185118, "eta": "199:36:21", "grad_norm": 0.0093, "loss": 0.052, "lr": "4.936e-05", "step": 2591, "steps": "23.48s,2591/33195" }, { "epoch": 0.3904202440126525, "eta": "197:23:21", "grad_norm": 0.0055, "loss": 0.0524, "lr": "4.936e-05", "step": 2592, "steps": "23.22s,2592/33195" }, { "epoch": 0.3905708691067932, "eta": "201:02:17", "grad_norm": 0.0165, "loss": 0.0633, "lr": "4.936e-05", "step": 2593, "steps": "23.65s,2593/33195" }, { "epoch": 0.3907214942009339, "eta": "201:42:41", "grad_norm": 0.0071, "loss": 0.0447, "lr": "4.936e-05", "step": 2594, "steps": "23.73s,2594/33195" }, { "epoch": 0.39087211929507454, "eta": "197:37:30", "grad_norm": 0.0081, "loss": 0.0396, "lr": "4.936e-05", "step": 2595, "steps": "23.25s,2595/33195" }, { "epoch": 0.39102274438921525, "eta": "199:14:00", "grad_norm": 0.0087, "loss": 0.0482, "lr": "4.936e-05", "step": 2596, "steps": "23.44s,2596/33195" }, { "epoch": 0.3911733694833559, "eta": "196:15:07", "grad_norm": 0.0049, "loss": 0.0606, "lr": "4.936e-05", "step": 2597, "steps": "23.09s,2597/33195" }, { "epoch": 0.3913239945774966, "eta": "196:09:38", "grad_norm": 0.0098, "loss": 0.0379, "lr": "4.935e-05", "step": 2598, "steps": "23.08s,2598/33195" }, { "epoch": 0.39147461967163727, "eta": "196:04:09", "grad_norm": 0.0103, "loss": 0.0424, "lr": "4.935e-05", "step": 2599, "steps": "23.07s,2599/33195" }, { "epoch": 0.391625244765778, "eta": "200:03:26", "grad_norm": 0.007, "loss": 0.048, "lr": "4.935e-05", "step": 2600, "steps": "23.54s,2600/33195" }, { "epoch": 0.39177586985991864, "eta": "469:37:04", "grad_norm": 0.0066, "loss": 0.0353, "lr": "4.935e-05", "step": 2601, "steps": "55.26s,2601/33195" }, { "epoch": 0.39192649495405935, "eta": "203:06:12", "grad_norm": 0.0082, "loss": 0.0651, "lr": "4.935e-05", "step": 2602, "steps": "23.9s,2602/33195" }, { "epoch": 0.39207712004820006, "eta": "196:43:24", "grad_norm": 0.0084, "loss": 0.0526, "lr": "4.935e-05", "step": 2603, "steps": "23.15s,2603/33195" }, { "epoch": 0.3922277451423407, "eta": "200:01:52", "grad_norm": 0.0075, "loss": 0.0745, "lr": "4.935e-05", "step": 2604, "steps": "23.54s,2604/33195" }, { "epoch": 0.3923783702364814, "eta": "200:32:04", "grad_norm": 0.006, "loss": 0.037, "lr": "4.935e-05", "step": 2605, "steps": "23.6s,2605/33195" }, { "epoch": 0.3925289953306221, "eta": "198:14:01", "grad_norm": 0.0072, "loss": 0.0659, "lr": "4.935e-05", "step": 2606, "steps": "23.33s,2606/33195" }, { "epoch": 0.3926796204247628, "eta": "197:12:27", "grad_norm": 0.0094, "loss": 0.0515, "lr": "4.935e-05", "step": 2607, "steps": "23.21s,2607/33195" }, { "epoch": 0.39283024551890344, "eta": "200:00:17", "grad_norm": 0.0074, "loss": 0.0393, "lr": "4.935e-05", "step": 2608, "steps": "23.54s,2608/33195" }, { "epoch": 0.39298087061304415, "eta": "198:33:14", "grad_norm": 0.0087, "loss": 0.0558, "lr": "4.935e-05", "step": 2609, "steps": "23.37s,2609/33195" }, { "epoch": 0.3931314957071848, "eta": "198:27:45", "grad_norm": 0.0066, "loss": 0.0844, "lr": "4.935e-05", "step": 2610, "steps": "23.36s,2610/33195" }, { "epoch": 0.3932821208013255, "eta": "197:21:06", "grad_norm": 0.0111, "loss": 0.0394, "lr": "4.935e-05", "step": 2611, "steps": "23.23s,2611/33195" }, { "epoch": 0.3934327458954662, "eta": "200:19:07", "grad_norm": 0.0075, "loss": 0.0481, "lr": "4.935e-05", "step": 2612, "steps": "23.58s,2612/33195" }, { "epoch": 0.3935833709896069, "eta": "196:44:39", "grad_norm": 0.0063, "loss": 0.0524, "lr": "4.935e-05", "step": 2613, "steps": "23.16s,2613/33195" }, { "epoch": 0.39373399608374754, "eta": "194:31:44", "grad_norm": 0.0082, "loss": 0.0463, "lr": "4.935e-05", "step": 2614, "steps": "22.9s,2614/33195" }, { "epoch": 0.39388462117788825, "eta": "197:19:33", "grad_norm": 0.007, "loss": 0.0311, "lr": "4.935e-05", "step": 2615, "steps": "23.23s,2615/33195" }, { "epoch": 0.3940352462720289, "eta": "196:53:41", "grad_norm": 0.0068, "loss": 0.045, "lr": "4.935e-05", "step": 2616, "steps": "23.18s,2616/33195" }, { "epoch": 0.3941858713661696, "eta": "194:15:18", "grad_norm": 0.0078, "loss": 0.0579, "lr": "4.934e-05", "step": 2617, "steps": "22.87s,2617/33195" }, { "epoch": 0.39433649646031027, "eta": "198:34:50", "grad_norm": 0.0084, "loss": 0.0571, "lr": "4.934e-05", "step": 2618, "steps": "23.38s,2618/33195" }, { "epoch": 0.394487121554451, "eta": "198:34:26", "grad_norm": 0.0063, "loss": 0.0418, "lr": "4.934e-05", "step": 2619, "steps": "23.38s,2619/33195" }, { "epoch": 0.39463774664859164, "eta": "199:30:06", "grad_norm": 0.0073, "loss": 0.0624, "lr": "4.934e-05", "step": 2620, "steps": "23.49s,2620/33195" }, { "epoch": 0.39478837174273235, "eta": "199:24:37", "grad_norm": 0.0069, "loss": 0.0761, "lr": "4.934e-05", "step": 2621, "steps": "23.48s,2621/33195" }, { "epoch": 0.394938996836873, "eta": "200:15:11", "grad_norm": 0.0124, "loss": 0.0417, "lr": "4.934e-05", "step": 2622, "steps": "23.58s,2622/33195" }, { "epoch": 0.3950896219310137, "eta": "199:18:44", "grad_norm": 0.0062, "loss": 0.064, "lr": "4.934e-05", "step": 2623, "steps": "23.47s,2623/33195" }, { "epoch": 0.39524024702515437, "eta": "200:09:18", "grad_norm": 0.0089, "loss": 0.0531, "lr": "4.934e-05", "step": 2624, "steps": "23.57s,2624/33195" }, { "epoch": 0.3953908721192951, "eta": "196:04:21", "grad_norm": 0.0076, "loss": 0.0646, "lr": "4.934e-05", "step": 2625, "steps": "23.09s,2625/33195" }, { "epoch": 0.39554149721343573, "eta": "197:30:34", "grad_norm": 0.0081, "loss": 0.0618, "lr": "4.934e-05", "step": 2626, "steps": "23.26s,2626/33195" }, { "epoch": 0.39569212230757644, "eta": "197:14:54", "grad_norm": 0.0053, "loss": 0.0512, "lr": "4.934e-05", "step": 2627, "steps": "23.23s,2627/33195" }, { "epoch": 0.3958427474017171, "eta": "200:07:44", "grad_norm": 0.0132, "loss": 0.0464, "lr": "4.934e-05", "step": 2628, "steps": "23.57s,2628/33195" }, { "epoch": 0.3959933724958578, "eta": "194:26:01", "grad_norm": 0.0055, "loss": 0.0579, "lr": "4.934e-05", "step": 2629, "steps": "22.9s,2629/33195" }, { "epoch": 0.3961439975899985, "eta": "198:04:41", "grad_norm": 0.0068, "loss": 0.0764, "lr": "4.934e-05", "step": 2630, "steps": "23.33s,2630/33195" }, { "epoch": 0.3962946226841392, "eta": "197:49:01", "grad_norm": 0.0067, "loss": 0.0474, "lr": "4.934e-05", "step": 2631, "steps": "23.3s,2631/33195" }, { "epoch": 0.3964452477782799, "eta": "197:28:15", "grad_norm": 0.0122, "loss": 0.0638, "lr": "4.934e-05", "step": 2632, "steps": "23.26s,2632/33195" }, { "epoch": 0.39659587287242054, "eta": "197:38:03", "grad_norm": 0.0062, "loss": 0.0667, "lr": "4.934e-05", "step": 2633, "steps": "23.28s,2633/33195" }, { "epoch": 0.39674649796656125, "eta": "199:45:00", "grad_norm": 0.0081, "loss": 0.0451, "lr": "4.934e-05", "step": 2634, "steps": "23.53s,2634/33195" }, { "epoch": 0.3968971230607019, "eta": "196:41:15", "grad_norm": 0.0079, "loss": 0.0719, "lr": "4.933e-05", "step": 2635, "steps": "23.17s,2635/33195" }, { "epoch": 0.3970477481548426, "eta": "196:25:35", "grad_norm": 0.0089, "loss": 0.0418, "lr": "4.933e-05", "step": 2636, "steps": "23.14s,2636/33195" }, { "epoch": 0.39719837324898327, "eta": "198:42:42", "grad_norm": 0.0086, "loss": 0.0439, "lr": "4.933e-05", "step": 2637, "steps": "23.41s,2637/33195" }, { "epoch": 0.397348998343124, "eta": "198:47:24", "grad_norm": 0.0103, "loss": 0.0666, "lr": "4.933e-05", "step": 2638, "steps": "23.42s,2638/33195" }, { "epoch": 0.39749962343726464, "eta": "195:33:30", "grad_norm": 0.0064, "loss": 0.0486, "lr": "4.933e-05", "step": 2639, "steps": "23.04s,2639/33195" }, { "epoch": 0.39765024853140535, "eta": "196:13:51", "grad_norm": 0.0059, "loss": 0.0494, "lr": "4.933e-05", "step": 2640, "steps": "23.12s,2640/33195" }, { "epoch": 0.397800873625546, "eta": "197:14:34", "grad_norm": 0.0063, "loss": 0.0489, "lr": "4.933e-05", "step": 2641, "steps": "23.24s,2641/33195" }, { "epoch": 0.3979514987196867, "eta": "197:09:06", "grad_norm": 0.0068, "loss": 0.049, "lr": "4.933e-05", "step": 2642, "steps": "23.23s,2642/33195" }, { "epoch": 0.39810212381382737, "eta": "197:03:37", "grad_norm": 0.0071, "loss": 0.0584, "lr": "4.933e-05", "step": 2643, "steps": "23.22s,2643/33195" }, { "epoch": 0.3982527489079681, "eta": "193:29:22", "grad_norm": 0.0079, "loss": 0.0628, "lr": "4.933e-05", "step": 2644, "steps": "22.8s,2644/33195" }, { "epoch": 0.39840337400210873, "eta": "201:22:31", "grad_norm": 0.0072, "loss": 0.0507, "lr": "4.933e-05", "step": 2645, "steps": "23.73s,2645/33195" }, { "epoch": 0.39855399909624944, "eta": "200:21:01", "grad_norm": 0.0081, "loss": 0.053, "lr": "4.933e-05", "step": 2646, "steps": "23.61s,2646/33195" }, { "epoch": 0.3987046241903901, "eta": "197:58:04", "grad_norm": 0.0071, "loss": 0.054, "lr": "4.933e-05", "step": 2647, "steps": "23.33s,2647/33195" }, { "epoch": 0.3988552492845308, "eta": "198:12:57", "grad_norm": 0.006, "loss": 0.0677, "lr": "4.933e-05", "step": 2648, "steps": "23.36s,2648/33195" }, { "epoch": 0.39900587437867147, "eta": "196:35:50", "grad_norm": 0.0065, "loss": 0.0417, "lr": "4.933e-05", "step": 2649, "steps": "23.17s,2649/33195" }, { "epoch": 0.3991564994728122, "eta": "195:54:44", "grad_norm": 0.0067, "loss": 0.071, "lr": "4.933e-05", "step": 2650, "steps": "23.09s,2650/33195" }, { "epoch": 0.39930712456695283, "eta": "194:43:04", "grad_norm": 0.0087, "loss": 0.0448, "lr": "4.933e-05", "step": 2651, "steps": "22.95s,2651/33195" }, { "epoch": 0.39945774966109354, "eta": "196:34:41", "grad_norm": 0.0081, "loss": 0.0645, "lr": "4.933e-05", "step": 2652, "steps": "23.17s,2652/33195" }, { "epoch": 0.3996083747552342, "eta": "195:12:51", "grad_norm": 0.0081, "loss": 0.0683, "lr": "4.933e-05", "step": 2653, "steps": "23.01s,2653/33195" }, { "epoch": 0.3997589998493749, "eta": "197:45:10", "grad_norm": 0.0087, "loss": 0.0413, "lr": "4.932e-05", "step": 2654, "steps": "23.31s,2654/33195" }, { "epoch": 0.39990962494351556, "eta": "196:23:21", "grad_norm": 0.0082, "loss": 0.0588, "lr": "4.932e-05", "step": 2655, "steps": "23.15s,2655/33195" }, { "epoch": 0.4000602500376563, "eta": "194:51:20", "grad_norm": 0.0089, "loss": 0.0588, "lr": "4.932e-05", "step": 2656, "steps": "22.97s,2656/33195" }, { "epoch": 0.400210875131797, "eta": "196:12:23", "grad_norm": 0.0074, "loss": 0.0481, "lr": "4.932e-05", "step": 2657, "steps": "23.13s,2657/33195" }, { "epoch": 0.40036150022593764, "eta": "196:22:11", "grad_norm": 0.0085, "loss": 0.0363, "lr": "4.932e-05", "step": 2658, "steps": "23.15s,2658/33195" }, { "epoch": 0.40051212532007835, "eta": "194:55:17", "grad_norm": 0.0085, "loss": 0.0654, "lr": "4.932e-05", "step": 2659, "steps": "22.98s,2659/33195" }, { "epoch": 0.400662750414219, "eta": "193:23:18", "grad_norm": 0.0059, "loss": 0.0418, "lr": "4.932e-05", "step": 2660, "steps": "22.8s,2660/33195" }, { "epoch": 0.4008133755083597, "eta": "194:44:20", "grad_norm": 0.007, "loss": 0.0409, "lr": "4.932e-05", "step": 2661, "steps": "22.96s,2661/33195" }, { "epoch": 0.40096400060250037, "eta": "195:14:29", "grad_norm": 0.007, "loss": 0.0288, "lr": "4.932e-05", "step": 2662, "steps": "23.02s,2662/33195" }, { "epoch": 0.4011146256966411, "eta": "197:11:09", "grad_norm": 0.0078, "loss": 0.0414, "lr": "4.932e-05", "step": 2663, "steps": "23.25s,2663/33195" }, { "epoch": 0.40126525079078174, "eta": "196:40:13", "grad_norm": 0.0068, "loss": 0.0367, "lr": "4.932e-05", "step": 2664, "steps": "23.19s,2664/33195" }, { "epoch": 0.40141587588492245, "eta": "196:34:45", "grad_norm": 0.0078, "loss": 0.0427, "lr": "4.932e-05", "step": 2665, "steps": "23.18s,2665/33195" }, { "epoch": 0.4015665009790631, "eta": "198:46:39", "grad_norm": 0.0083, "loss": 0.0392, "lr": "4.932e-05", "step": 2666, "steps": "23.44s,2666/33195" }, { "epoch": 0.4017171260732038, "eta": "194:52:13", "grad_norm": 0.0077, "loss": 0.0507, "lr": "4.932e-05", "step": 2667, "steps": "22.98s,2667/33195" }, { "epoch": 0.40186775116734447, "eta": "199:41:50", "grad_norm": 0.0068, "loss": 0.0381, "lr": "4.932e-05", "step": 2668, "steps": "23.55s,2668/33195" }, { "epoch": 0.4020183762614852, "eta": "193:04:37", "grad_norm": 0.0083, "loss": 0.0571, "lr": "4.932e-05", "step": 2669, "steps": "22.77s,2669/33195" }, { "epoch": 0.40216900135562583, "eta": "197:33:52", "grad_norm": 0.0068, "loss": 0.0564, "lr": "4.932e-05", "step": 2670, "steps": "23.3s,2670/33195" }, { "epoch": 0.40231962644976654, "eta": "194:45:36", "grad_norm": 0.0072, "loss": 0.0269, "lr": "4.932e-05", "step": 2671, "steps": "22.97s,2671/33195" }, { "epoch": 0.4024702515439072, "eta": "196:42:13", "grad_norm": 0.0066, "loss": 0.0689, "lr": "4.931e-05", "step": 2672, "steps": "23.2s,2672/33195" }, { "epoch": 0.4026208766380479, "eta": "198:23:34", "grad_norm": 0.0062, "loss": 0.0497, "lr": "4.931e-05", "step": 2673, "steps": "23.4s,2673/33195" }, { "epoch": 0.40277150173218856, "eta": "199:34:24", "grad_norm": 0.008, "loss": 0.0584, "lr": "4.931e-05", "step": 2674, "steps": "23.54s,2674/33195" }, { "epoch": 0.4029221268263293, "eta": "196:00:22", "grad_norm": 0.008, "loss": 0.0571, "lr": "4.931e-05", "step": 2675, "steps": "23.12s,2675/33195" }, { "epoch": 0.40307275192046993, "eta": "197:26:27", "grad_norm": 0.0066, "loss": 0.0602, "lr": "4.931e-05", "step": 2676, "steps": "23.29s,2676/33195" }, { "epoch": 0.40322337701461064, "eta": "195:39:15", "grad_norm": 0.0074, "loss": 0.0456, "lr": "4.931e-05", "step": 2677, "steps": "23.08s,2677/33195" }, { "epoch": 0.4033740021087513, "eta": "197:00:15", "grad_norm": 0.0073, "loss": 0.0459, "lr": "4.931e-05", "step": 2678, "steps": "23.24s,2678/33195" }, { "epoch": 0.403524627202892, "eta": "196:49:41", "grad_norm": 0.007, "loss": 0.0684, "lr": "4.931e-05", "step": 2679, "steps": "23.22s,2679/33195" }, { "epoch": 0.40367525229703266, "eta": "195:33:01", "grad_norm": 0.0104, "loss": 0.0501, "lr": "4.931e-05", "step": 2680, "steps": "23.07s,2680/33195" }, { "epoch": 0.40382587739117337, "eta": "196:38:44", "grad_norm": 0.0068, "loss": 0.049, "lr": "4.931e-05", "step": 2681, "steps": "23.2s,2681/33195" }, { "epoch": 0.403976502485314, "eta": "196:48:31", "grad_norm": 0.007, "loss": 0.0686, "lr": "4.931e-05", "step": 2682, "steps": "23.22s,2682/33195" }, { "epoch": 0.40412712757945474, "eta": "199:56:18", "grad_norm": 0.0101, "loss": 0.05, "lr": "4.931e-05", "step": 2683, "steps": "23.59s,2683/33195" }, { "epoch": 0.40427775267359545, "eta": "199:05:03", "grad_norm": 0.015, "loss": 0.0433, "lr": "4.931e-05", "step": 2684, "steps": "23.49s,2684/33195" }, { "epoch": 0.4044283777677361, "eta": "194:55:30", "grad_norm": 0.0099, "loss": 0.0424, "lr": "4.931e-05", "step": 2685, "steps": "23.0s,2685/33195" }, { "epoch": 0.4045790028618768, "eta": "196:46:58", "grad_norm": 0.0076, "loss": 0.0742, "lr": "4.931e-05", "step": 2686, "steps": "23.22s,2686/33195" }, { "epoch": 0.40472962795601747, "eta": "199:44:33", "grad_norm": 0.0146, "loss": 0.0469, "lr": "4.931e-05", "step": 2687, "steps": "23.57s,2687/33195" }, { "epoch": 0.4048802530501582, "eta": "195:04:31", "grad_norm": 0.0074, "loss": 0.0408, "lr": "4.931e-05", "step": 2688, "steps": "23.02s,2688/33195" }, { "epoch": 0.40503087814429883, "eta": "196:50:54", "grad_norm": 0.007, "loss": 0.0538, "lr": "4.931e-05", "step": 2689, "steps": "23.23s,2689/33195" }, { "epoch": 0.40518150323843954, "eta": "196:35:16", "grad_norm": 0.0071, "loss": 0.0404, "lr": "4.930e-05", "step": 2690, "steps": "23.2s,2690/33195" }, { "epoch": 0.4053321283325802, "eta": "196:29:47", "grad_norm": 0.0092, "loss": 0.0431, "lr": "4.930e-05", "step": 2691, "steps": "23.19s,2691/33195" }, { "epoch": 0.4054827534267209, "eta": "202:35:26", "grad_norm": 0.0082, "loss": 0.0496, "lr": "4.930e-05", "step": 2692, "steps": "23.91s,2692/33195" }, { "epoch": 0.40563337852086156, "eta": "195:33:06", "grad_norm": 0.0077, "loss": 0.0586, "lr": "4.930e-05", "step": 2693, "steps": "23.08s,2693/33195" }, { "epoch": 0.4057840036150023, "eta": "196:28:38", "grad_norm": 0.0084, "loss": 0.0412, "lr": "4.930e-05", "step": 2694, "steps": "23.19s,2694/33195" }, { "epoch": 0.40593462870914293, "eta": "196:38:25", "grad_norm": 0.0078, "loss": 0.0389, "lr": "4.930e-05", "step": 2695, "steps": "23.21s,2695/33195" }, { "epoch": 0.40608525380328364, "eta": "199:00:21", "grad_norm": 0.0089, "loss": 0.0488, "lr": "4.930e-05", "step": 2696, "steps": "23.49s,2696/33195" }, { "epoch": 0.4062358788974243, "eta": "198:29:28", "grad_norm": 0.0078, "loss": 0.0426, "lr": "4.930e-05", "step": 2697, "steps": "23.43s,2697/33195" }, { "epoch": 0.406386503991565, "eta": "196:27:05", "grad_norm": 0.0087, "loss": 0.0471, "lr": "4.930e-05", "step": 2698, "steps": "23.19s,2698/33195" }, { "epoch": 0.40653712908570566, "eta": "199:34:45", "grad_norm": 0.0072, "loss": 0.0589, "lr": "4.930e-05", "step": 2699, "steps": "23.56s,2699/33195" }, { "epoch": 0.4066877541798464, "eta": "197:01:53", "grad_norm": 0.007, "loss": 0.0488, "lr": "4.930e-05", "step": 2700, "steps": "23.26s,2700/33195" }, { "epoch": 0.406838379273987, "eta": "199:44:08", "grad_norm": 0.009, "loss": 0.0474, "lr": "4.930e-05", "step": 2701, "steps": "23.58s,2701/33195" }, { "epoch": 0.40698900436812774, "eta": "196:56:02", "grad_norm": 0.0064, "loss": 0.0628, "lr": "4.930e-05", "step": 2702, "steps": "23.25s,2702/33195" }, { "epoch": 0.4071396294622684, "eta": "196:40:24", "grad_norm": 0.0078, "loss": 0.041, "lr": "4.930e-05", "step": 2703, "steps": "23.22s,2703/33195" }, { "epoch": 0.4072902545564091, "eta": "195:39:02", "grad_norm": 0.0078, "loss": 0.0368, "lr": "4.930e-05", "step": 2704, "steps": "23.1s,2704/33195" }, { "epoch": 0.40744087965054976, "eta": "196:59:57", "grad_norm": 0.0085, "loss": 0.0531, "lr": "4.930e-05", "step": 2705, "steps": "23.26s,2705/33195" }, { "epoch": 0.40759150474469047, "eta": "199:37:05", "grad_norm": 0.0073, "loss": 0.0511, "lr": "4.930e-05", "step": 2706, "steps": "23.57s,2706/33195" }, { "epoch": 0.4077421298388311, "eta": "198:40:48", "grad_norm": 0.0072, "loss": 0.0641, "lr": "4.930e-05", "step": 2707, "steps": "23.46s,2707/33195" }, { "epoch": 0.40789275493297183, "eta": "196:18:08", "grad_norm": 0.006, "loss": 0.0449, "lr": "4.929e-05", "step": 2708, "steps": "23.18s,2708/33195" }, { "epoch": 0.4080433800271125, "eta": "197:08:34", "grad_norm": 0.008, "loss": 0.0493, "lr": "4.929e-05", "step": 2709, "steps": "23.28s,2709/33195" }, { "epoch": 0.4081940051212532, "eta": "195:16:24", "grad_norm": 0.0079, "loss": 0.037, "lr": "4.929e-05", "step": 2710, "steps": "23.06s,2710/33195" }, { "epoch": 0.4083446302153939, "eta": "198:59:34", "grad_norm": 0.0089, "loss": 0.0458, "lr": "4.929e-05", "step": 2711, "steps": "23.5s,2711/33195" }, { "epoch": 0.40849525530953457, "eta": "198:33:46", "grad_norm": 0.0082, "loss": 0.0697, "lr": "4.929e-05", "step": 2712, "steps": "23.45s,2712/33195" }, { "epoch": 0.4086458804036753, "eta": "199:14:01", "grad_norm": 0.0109, "loss": 0.0516, "lr": "4.929e-05", "step": 2713, "steps": "23.53s,2713/33195" }, { "epoch": 0.40879650549781593, "eta": "198:38:04", "grad_norm": 0.0064, "loss": 0.0501, "lr": "4.929e-05", "step": 2714, "steps": "23.46s,2714/33195" }, { "epoch": 0.40894713059195664, "eta": "196:20:31", "grad_norm": 0.0084, "loss": 0.0479, "lr": "4.929e-05", "step": 2715, "steps": "23.19s,2715/33195" }, { "epoch": 0.4090977556860973, "eta": "196:04:53", "grad_norm": 0.0068, "loss": 0.0369, "lr": "4.929e-05", "step": 2716, "steps": "23.16s,2716/33195" }, { "epoch": 0.409248380780238, "eta": "199:27:41", "grad_norm": 0.0077, "loss": 0.0328, "lr": "4.929e-05", "step": 2717, "steps": "23.56s,2717/33195" }, { "epoch": 0.40939900587437866, "eta": "196:09:12", "grad_norm": 0.0108, "loss": 0.0774, "lr": "4.929e-05", "step": 2718, "steps": "23.17s,2718/33195" }, { "epoch": 0.4095496309685194, "eta": "195:53:34", "grad_norm": 0.0063, "loss": 0.0467, "lr": "4.929e-05", "step": 2719, "steps": "23.14s,2719/33195" }, { "epoch": 0.40970025606266003, "eta": "196:28:44", "grad_norm": 0.0078, "loss": 0.0631, "lr": "4.929e-05", "step": 2720, "steps": "23.21s,2720/33195" }, { "epoch": 0.40985088115680074, "eta": "199:36:16", "grad_norm": 0.0178, "loss": 0.0428, "lr": "4.929e-05", "step": 2721, "steps": "23.58s,2721/33195" }, { "epoch": 0.4100015062509414, "eta": "199:25:43", "grad_norm": 0.0083, "loss": 0.0514, "lr": "4.929e-05", "step": 2722, "steps": "23.56s,2722/33195" }, { "epoch": 0.4101521313450821, "eta": "198:04:04", "grad_norm": 0.0083, "loss": 0.0354, "lr": "4.929e-05", "step": 2723, "steps": "23.4s,2723/33195" }, { "epoch": 0.41030275643922276, "eta": "198:29:04", "grad_norm": 0.0075, "loss": 0.0593, "lr": "4.929e-05", "step": 2724, "steps": "23.45s,2724/33195" }, { "epoch": 0.41045338153336347, "eta": "198:49:00", "grad_norm": 0.0071, "loss": 0.1045, "lr": "4.929e-05", "step": 2725, "steps": "23.49s,2725/33195" }, { "epoch": 0.4106040066275041, "eta": "195:10:15", "grad_norm": 0.0074, "loss": 0.0573, "lr": "4.928e-05", "step": 2726, "steps": "23.06s,2726/33195" }, { "epoch": 0.41075463172164484, "eta": "195:40:20", "grad_norm": 0.0096, "loss": 0.0432, "lr": "4.928e-05", "step": 2727, "steps": "23.12s,2727/33195" }, { "epoch": 0.4109052568157855, "eta": "199:23:22", "grad_norm": 0.0065, "loss": 0.0397, "lr": "4.928e-05", "step": 2728, "steps": "23.56s,2728/33195" }, { "epoch": 0.4110558819099262, "eta": "196:25:15", "grad_norm": 0.0069, "loss": 0.0529, "lr": "4.928e-05", "step": 2729, "steps": "23.21s,2729/33195" }, { "epoch": 0.41120650700406686, "eta": "198:11:30", "grad_norm": 0.0074, "loss": 0.0687, "lr": "4.928e-05", "step": 2730, "steps": "23.42s,2730/33195" }, { "epoch": 0.41135713209820757, "eta": "199:27:16", "grad_norm": 0.0102, "loss": 0.0982, "lr": "4.928e-05", "step": 2731, "steps": "23.57s,2731/33195" }, { "epoch": 0.4115077571923482, "eta": "199:52:16", "grad_norm": 0.0092, "loss": 0.0532, "lr": "4.928e-05", "step": 2732, "steps": "23.62s,2732/33195" }, { "epoch": 0.41165838228648893, "eta": "197:39:52", "grad_norm": 0.0071, "loss": 0.0408, "lr": "4.928e-05", "step": 2733, "steps": "23.36s,2733/33195" }, { "epoch": 0.4118090073806296, "eta": "195:17:19", "grad_norm": 0.0067, "loss": 0.051, "lr": "4.928e-05", "step": 2734, "steps": "23.08s,2734/33195" }, { "epoch": 0.4119596324747703, "eta": "197:23:51", "grad_norm": 0.0074, "loss": 0.0481, "lr": "4.928e-05", "step": 2735, "steps": "23.33s,2735/33195" }, { "epoch": 0.41211025756891095, "eta": "198:09:09", "grad_norm": 0.0117, "loss": 0.0464, "lr": "4.928e-05", "step": 2736, "steps": "23.42s,2736/33195" }, { "epoch": 0.41226088266305166, "eta": "195:26:19", "grad_norm": 0.0259, "loss": 0.0409, "lr": "4.928e-05", "step": 2737, "steps": "23.1s,2737/33195" }, { "epoch": 0.4124115077571924, "eta": "193:03:48", "grad_norm": 0.0076, "loss": 0.0611, "lr": "4.928e-05", "step": 2738, "steps": "22.82s,2738/33195" }, { "epoch": 0.41256213285133303, "eta": "198:18:08", "grad_norm": 0.0079, "loss": 0.0768, "lr": "4.928e-05", "step": 2739, "steps": "23.44s,2739/33195" }, { "epoch": 0.41271275794547374, "eta": "197:47:17", "grad_norm": 0.0069, "loss": 0.0436, "lr": "4.928e-05", "step": 2740, "steps": "23.38s,2740/33195" }, { "epoch": 0.4128633830396144, "eta": "195:55:14", "grad_norm": 0.0072, "loss": 0.0386, "lr": "4.928e-05", "step": 2741, "steps": "23.16s,2741/33195" }, { "epoch": 0.4130140081337551, "eta": "196:45:36", "grad_norm": 0.0074, "loss": 0.0544, "lr": "4.928e-05", "step": 2742, "steps": "23.26s,2742/33195" }, { "epoch": 0.41316463322789576, "eta": "196:24:55", "grad_norm": 0.0085, "loss": 0.0589, "lr": "4.927e-05", "step": 2743, "steps": "23.22s,2743/33195" }, { "epoch": 0.41331525832203647, "eta": "198:11:06", "grad_norm": 0.0071, "loss": 0.0715, "lr": "4.927e-05", "step": 2744, "steps": "23.43s,2744/33195" }, { "epoch": 0.4134658834161771, "eta": "196:03:51", "grad_norm": 0.0146, "loss": 0.0354, "lr": "4.927e-05", "step": 2745, "steps": "23.18s,2745/33195" }, { "epoch": 0.41361650851031784, "eta": "198:10:20", "grad_norm": 0.0088, "loss": 0.0474, "lr": "4.927e-05", "step": 2746, "steps": "23.43s,2746/33195" }, { "epoch": 0.4137671336044585, "eta": "195:58:00", "grad_norm": 0.0081, "loss": 0.0569, "lr": "4.927e-05", "step": 2747, "steps": "23.17s,2747/33195" }, { "epoch": 0.4139177586985992, "eta": "196:43:17", "grad_norm": 0.009, "loss": 0.0605, "lr": "4.927e-05", "step": 2748, "steps": "23.26s,2748/33195" }, { "epoch": 0.41406838379273986, "eta": "198:54:49", "grad_norm": 0.0073, "loss": 0.0692, "lr": "4.927e-05", "step": 2749, "steps": "23.52s,2749/33195" }, { "epoch": 0.41421900888688057, "eta": "194:45:48", "grad_norm": 0.0071, "loss": 0.0595, "lr": "4.927e-05", "step": 2750, "steps": "23.03s,2750/33195" }, { "epoch": 0.4143696339810212, "eta": "198:43:54", "grad_norm": 0.0083, "loss": 0.0645, "lr": "4.927e-05", "step": 2751, "steps": "23.5s,2751/33195" }, { "epoch": 0.41452025907516193, "eta": "196:51:53", "grad_norm": 0.0071, "loss": 0.0575, "lr": "4.927e-05", "step": 2752, "steps": "23.28s,2752/33195" }, { "epoch": 0.4146708841693026, "eta": "195:35:23", "grad_norm": 0.0079, "loss": 0.0322, "lr": "4.927e-05", "step": 2753, "steps": "23.13s,2753/33195" }, { "epoch": 0.4148215092634433, "eta": "195:24:51", "grad_norm": 0.0067, "loss": 0.0643, "lr": "4.927e-05", "step": 2754, "steps": "23.11s,2754/33195" }, { "epoch": 0.41497213435758395, "eta": "198:11:53", "grad_norm": 0.0062, "loss": 0.0423, "lr": "4.927e-05", "step": 2755, "steps": "23.44s,2755/33195" }, { "epoch": 0.41512275945172467, "eta": "195:54:31", "grad_norm": 0.0106, "loss": 0.0447, "lr": "4.927e-05", "step": 2756, "steps": "23.17s,2756/33195" }, { "epoch": 0.4152733845458653, "eta": "196:44:52", "grad_norm": 0.0086, "loss": 0.0718, "lr": "4.927e-05", "step": 2757, "steps": "23.27s,2757/33195" }, { "epoch": 0.41542400964000603, "eta": "196:29:15", "grad_norm": 0.0074, "loss": 0.0497, "lr": "4.927e-05", "step": 2758, "steps": "23.24s,2758/33195" }, { "epoch": 0.4155746347341467, "eta": "196:03:30", "grad_norm": 0.0093, "loss": 0.0353, "lr": "4.927e-05", "step": 2759, "steps": "23.19s,2759/33195" }, { "epoch": 0.4157252598282874, "eta": "199:26:01", "grad_norm": 0.007, "loss": 0.0385, "lr": "4.927e-05", "step": 2760, "steps": "23.59s,2760/33195" }, { "epoch": 0.41587588492242805, "eta": "198:09:32", "grad_norm": 0.0101, "loss": 0.0505, "lr": "4.926e-05", "step": 2761, "steps": "23.44s,2761/33195" }, { "epoch": 0.41602651001656876, "eta": "196:22:38", "grad_norm": 0.0067, "loss": 0.0519, "lr": "4.926e-05", "step": 2762, "steps": "23.23s,2762/33195" }, { "epoch": 0.4161771351107094, "eta": "197:38:20", "grad_norm": 0.0074, "loss": 0.0577, "lr": "4.926e-05", "step": 2763, "steps": "23.38s,2763/33195" }, { "epoch": 0.41632776020485013, "eta": "198:48:57", "grad_norm": 0.0076, "loss": 0.0657, "lr": "4.926e-05", "step": 2764, "steps": "23.52s,2764/33195" }, { "epoch": 0.41647838529899084, "eta": "198:18:07", "grad_norm": 0.0075, "loss": 0.0724, "lr": "4.926e-05", "step": 2765, "steps": "23.46s,2765/33195" }, { "epoch": 0.4166290103931315, "eta": "195:35:27", "grad_norm": 0.0064, "loss": 0.0411, "lr": "4.926e-05", "step": 2766, "steps": "23.14s,2766/33195" }, { "epoch": 0.4167796354872722, "eta": "196:00:25", "grad_norm": 0.0068, "loss": 0.0397, "lr": "4.926e-05", "step": 2767, "steps": "23.19s,2767/33195" }, { "epoch": 0.41693026058141286, "eta": "196:25:23", "grad_norm": 0.0067, "loss": 0.0401, "lr": "4.926e-05", "step": 2768, "steps": "23.24s,2768/33195" }, { "epoch": 0.41708088567555357, "eta": "194:58:47", "grad_norm": 0.0069, "loss": 0.0546, "lr": "4.926e-05", "step": 2769, "steps": "23.07s,2769/33195" }, { "epoch": 0.4172315107696942, "eta": "196:09:24", "grad_norm": 0.0081, "loss": 0.06, "lr": "4.926e-05", "step": 2770, "steps": "23.21s,2770/33195" }, { "epoch": 0.41738213586383494, "eta": "194:12:23", "grad_norm": 0.0082, "loss": 0.0325, "lr": "4.926e-05", "step": 2771, "steps": "22.98s,2771/33195" }, { "epoch": 0.4175327609579756, "eta": "197:55:06", "grad_norm": 0.007, "loss": 0.0484, "lr": "4.926e-05", "step": 2772, "steps": "23.42s,2772/33195" }, { "epoch": 0.4176833860521163, "eta": "195:32:45", "grad_norm": 0.0076, "loss": 0.0441, "lr": "4.926e-05", "step": 2773, "steps": "23.14s,2773/33195" }, { "epoch": 0.41783401114625696, "eta": "197:59:24", "grad_norm": 0.007, "loss": 0.077, "lr": "4.926e-05", "step": 2774, "steps": "23.43s,2774/33195" }, { "epoch": 0.41798463624039767, "eta": "198:09:09", "grad_norm": 0.0098, "loss": 0.0573, "lr": "4.926e-05", "step": 2775, "steps": "23.45s,2775/33195" }, { "epoch": 0.4181352613345383, "eta": "196:22:17", "grad_norm": 0.0072, "loss": 0.0582, "lr": "4.926e-05", "step": 2776, "steps": "23.24s,2776/33195" }, { "epoch": 0.41828588642867903, "eta": "198:13:26", "grad_norm": 0.0066, "loss": 0.0417, "lr": "4.926e-05", "step": 2777, "steps": "23.46s,2777/33195" }, { "epoch": 0.4184365115228197, "eta": "195:40:57", "grad_norm": 0.0067, "loss": 0.0635, "lr": "4.925e-05", "step": 2778, "steps": "23.16s,2778/33195" }, { "epoch": 0.4185871366169604, "eta": "197:32:06", "grad_norm": 0.0099, "loss": 0.0591, "lr": "4.925e-05", "step": 2779, "steps": "23.38s,2779/33195" }, { "epoch": 0.41873776171110105, "eta": "196:15:40", "grad_norm": 0.0074, "loss": 0.0492, "lr": "4.925e-05", "step": 2780, "steps": "23.23s,2780/33195" }, { "epoch": 0.41888838680524176, "eta": "196:35:33", "grad_norm": 0.014, "loss": 0.0589, "lr": "4.925e-05", "step": 2781, "steps": "23.27s,2781/33195" }, { "epoch": 0.4190390118993824, "eta": "195:29:16", "grad_norm": 0.0099, "loss": 0.0405, "lr": "4.925e-05", "step": 2782, "steps": "23.14s,2782/33195" }, { "epoch": 0.41918963699352313, "eta": "194:43:16", "grad_norm": 0.0065, "loss": 0.0342, "lr": "4.925e-05", "step": 2783, "steps": "23.05s,2783/33195" }, { "epoch": 0.4193402620876638, "eta": "194:58:05", "grad_norm": 0.0078, "loss": 0.0379, "lr": "4.925e-05", "step": 2784, "steps": "23.08s,2784/33195" }, { "epoch": 0.4194908871818045, "eta": "195:48:23", "grad_norm": 0.0082, "loss": 0.0562, "lr": "4.925e-05", "step": 2785, "steps": "23.18s,2785/33195" }, { "epoch": 0.41964151227594515, "eta": "195:58:08", "grad_norm": 0.006, "loss": 0.052, "lr": "4.925e-05", "step": 2786, "steps": "23.2s,2786/33195" }, { "epoch": 0.41979213737008586, "eta": "194:46:48", "grad_norm": 0.0083, "loss": 0.0456, "lr": "4.925e-05", "step": 2787, "steps": "23.06s,2787/33195" }, { "epoch": 0.4199427624642265, "eta": "197:28:35", "grad_norm": 0.0079, "loss": 0.0372, "lr": "4.925e-05", "step": 2788, "steps": "23.38s,2788/33195" }, { "epoch": 0.4200933875583672, "eta": "196:12:11", "grad_norm": 0.0072, "loss": 0.044, "lr": "4.925e-05", "step": 2789, "steps": "23.23s,2789/33195" }, { "epoch": 0.4202440126525079, "eta": "194:50:43", "grad_norm": 0.0078, "loss": 0.0385, "lr": "4.925e-05", "step": 2790, "steps": "23.07s,2790/33195" }, { "epoch": 0.4203946377466486, "eta": "194:30:04", "grad_norm": 0.0091, "loss": 0.0477, "lr": "4.925e-05", "step": 2791, "steps": "23.03s,2791/33195" }, { "epoch": 0.4205452628407893, "eta": "195:30:29", "grad_norm": 0.0085, "loss": 0.0739, "lr": "4.925e-05", "step": 2792, "steps": "23.15s,2792/33195" }, { "epoch": 0.42069588793492996, "eta": "196:00:30", "grad_norm": 0.012, "loss": 0.054, "lr": "4.925e-05", "step": 2793, "steps": "23.21s,2793/33195" }, { "epoch": 0.42084651302907067, "eta": "197:31:19", "grad_norm": 0.0075, "loss": 0.053, "lr": "4.925e-05", "step": 2794, "steps": "23.39s,2794/33195" }, { "epoch": 0.4209971381232113, "eta": "198:41:52", "grad_norm": 0.0067, "loss": 0.0826, "lr": "4.925e-05", "step": 2795, "steps": "23.53s,2795/33195" }, { "epoch": 0.42114776321735203, "eta": "197:45:44", "grad_norm": 0.0099, "loss": 0.0662, "lr": "4.924e-05", "step": 2796, "steps": "23.42s,2796/33195" }, { "epoch": 0.4212983883114927, "eta": "195:38:41", "grad_norm": 0.0083, "loss": 0.0688, "lr": "4.924e-05", "step": 2797, "steps": "23.17s,2797/33195" }, { "epoch": 0.4214490134056334, "eta": "195:28:10", "grad_norm": 0.0095, "loss": 0.0495, "lr": "4.924e-05", "step": 2798, "steps": "23.15s,2798/33195" }, { "epoch": 0.42159963849977405, "eta": "198:40:17", "grad_norm": 0.0071, "loss": 0.0517, "lr": "4.924e-05", "step": 2799, "steps": "23.53s,2799/33195" }, { "epoch": 0.42175026359391476, "eta": "198:50:02", "grad_norm": 0.0118, "loss": 0.0703, "lr": "4.924e-05", "step": 2800, "steps": "23.55s,2800/33195" }, { "epoch": 0.4219008886880554, "eta": "436:39:37", "grad_norm": 0.0085, "loss": 0.0344, "lr": "4.924e-05", "step": 2801, "steps": "51.72s,2801/33195" }, { "epoch": 0.42205151378219613, "eta": "193:19:59", "grad_norm": 0.0067, "loss": 0.0365, "lr": "4.924e-05", "step": 2802, "steps": "22.9s,2802/33195" }, { "epoch": 0.4222021388763368, "eta": "195:05:59", "grad_norm": 0.0078, "loss": 0.0471, "lr": "4.924e-05", "step": 2803, "steps": "23.11s,2803/33195" }, { "epoch": 0.4223527639704775, "eta": "195:46:07", "grad_norm": 0.0108, "loss": 0.0563, "lr": "4.924e-05", "step": 2804, "steps": "23.19s,2804/33195" }, { "epoch": 0.42250338906461815, "eta": "197:32:06", "grad_norm": 0.0067, "loss": 0.0358, "lr": "4.924e-05", "step": 2805, "steps": "23.4s,2805/33195" }, { "epoch": 0.42265401415875886, "eta": "195:04:49", "grad_norm": 0.0089, "loss": 0.0316, "lr": "4.924e-05", "step": 2806, "steps": "23.11s,2806/33195" }, { "epoch": 0.4228046392528995, "eta": "198:42:13", "grad_norm": 0.0058, "loss": 0.0686, "lr": "4.924e-05", "step": 2807, "steps": "23.54s,2807/33195" }, { "epoch": 0.4229552643470402, "eta": "195:59:46", "grad_norm": 0.0064, "loss": 0.0759, "lr": "4.924e-05", "step": 2808, "steps": "23.22s,2808/33195" }, { "epoch": 0.4231058894411809, "eta": "195:18:52", "grad_norm": 0.0054, "loss": 0.0515, "lr": "4.924e-05", "step": 2809, "steps": "23.14s,2809/33195" }, { "epoch": 0.4232565145353216, "eta": "195:53:55", "grad_norm": 0.0079, "loss": 0.0362, "lr": "4.924e-05", "step": 2810, "steps": "23.21s,2810/33195" }, { "epoch": 0.42340713962946225, "eta": "194:07:12", "grad_norm": 0.0078, "loss": 0.0617, "lr": "4.924e-05", "step": 2811, "steps": "23.0s,2811/33195" }, { "epoch": 0.42355776472360296, "eta": "194:42:15", "grad_norm": 0.009, "loss": 0.0455, "lr": "4.924e-05", "step": 2812, "steps": "23.07s,2812/33195" }, { "epoch": 0.4237083898177436, "eta": "194:31:45", "grad_norm": 0.0067, "loss": 0.0598, "lr": "4.923e-05", "step": 2813, "steps": "23.05s,2813/33195" }, { "epoch": 0.4238590149118843, "eta": "194:16:10", "grad_norm": 0.006, "loss": 0.0731, "lr": "4.923e-05", "step": 2814, "steps": "23.02s,2814/33195" }, { "epoch": 0.424009640006025, "eta": "194:36:02", "grad_norm": 0.008, "loss": 0.0677, "lr": "4.923e-05", "step": 2815, "steps": "23.06s,2815/33195" }, { "epoch": 0.4241602651001657, "eta": "194:55:54", "grad_norm": 0.0105, "loss": 0.0647, "lr": "4.923e-05", "step": 2816, "steps": "23.1s,2816/33195" }, { "epoch": 0.42431089019430634, "eta": "194:15:01", "grad_norm": 0.0073, "loss": 0.0689, "lr": "4.923e-05", "step": 2817, "steps": "23.02s,2817/33195" }, { "epoch": 0.42446151528844706, "eta": "198:22:43", "grad_norm": 0.0091, "loss": 0.0549, "lr": "4.923e-05", "step": 2818, "steps": "23.51s,2818/33195" }, { "epoch": 0.42461214038258777, "eta": "197:57:00", "grad_norm": 0.0117, "loss": 0.0729, "lr": "4.923e-05", "step": 2819, "steps": "23.46s,2819/33195" }, { "epoch": 0.4247627654767284, "eta": "195:24:45", "grad_norm": 0.0081, "loss": 0.0671, "lr": "4.923e-05", "step": 2820, "steps": "23.16s,2820/33195" }, { "epoch": 0.42491339057086913, "eta": "195:34:29", "grad_norm": 0.0101, "loss": 0.0605, "lr": "4.923e-05", "step": 2821, "steps": "23.18s,2821/33195" }, { "epoch": 0.4250640156650098, "eta": "195:49:17", "grad_norm": 0.006, "loss": 0.052, "lr": "4.923e-05", "step": 2822, "steps": "23.21s,2822/33195" }, { "epoch": 0.4252146407591505, "eta": "198:00:30", "grad_norm": 0.0063, "loss": 0.0505, "lr": "4.923e-05", "step": 2823, "steps": "23.47s,2823/33195" }, { "epoch": 0.42536526585329115, "eta": "196:03:42", "grad_norm": 0.0064, "loss": 0.0441, "lr": "4.923e-05", "step": 2824, "steps": "23.24s,2824/33195" }, { "epoch": 0.42551589094743186, "eta": "197:59:43", "grad_norm": 0.0078, "loss": 0.0326, "lr": "4.923e-05", "step": 2825, "steps": "23.47s,2825/33195" }, { "epoch": 0.4256665160415725, "eta": "196:02:55", "grad_norm": 0.007, "loss": 0.0566, "lr": "4.923e-05", "step": 2826, "steps": "23.24s,2826/33195" }, { "epoch": 0.42581714113571323, "eta": "197:43:45", "grad_norm": 0.0084, "loss": 0.0506, "lr": "4.923e-05", "step": 2827, "steps": "23.44s,2827/33195" }, { "epoch": 0.4259677662298539, "eta": "192:14:23", "grad_norm": 0.0082, "loss": 0.0488, "lr": "4.923e-05", "step": 2828, "steps": "22.79s,2828/33195" }, { "epoch": 0.4261183913239946, "eta": "197:32:51", "grad_norm": 0.0074, "loss": 0.0671, "lr": "4.923e-05", "step": 2829, "steps": "23.42s,2829/33195" }, { "epoch": 0.42626901641813525, "eta": "195:00:38", "grad_norm": 0.007, "loss": 0.0486, "lr": "4.922e-05", "step": 2830, "steps": "23.12s,2830/33195" }, { "epoch": 0.42641964151227596, "eta": "195:55:55", "grad_norm": 0.0099, "loss": 0.0527, "lr": "4.922e-05", "step": 2831, "steps": "23.23s,2831/33195" }, { "epoch": 0.4265702666064166, "eta": "198:22:17", "grad_norm": 0.0099, "loss": 0.0471, "lr": "4.922e-05", "step": 2832, "steps": "23.52s,2832/33195" }, { "epoch": 0.4267208917005573, "eta": "195:14:40", "grad_norm": 0.01, "loss": 0.0292, "lr": "4.922e-05", "step": 2833, "steps": "23.15s,2833/33195" }, { "epoch": 0.426871516794698, "eta": "195:29:27", "grad_norm": 0.0078, "loss": 0.0619, "lr": "4.922e-05", "step": 2834, "steps": "23.18s,2834/33195" }, { "epoch": 0.4270221418888387, "eta": "195:03:46", "grad_norm": 0.0079, "loss": 0.0397, "lr": "4.922e-05", "step": 2835, "steps": "23.13s,2835/33195" }, { "epoch": 0.42717276698297935, "eta": "197:30:07", "grad_norm": 0.0106, "loss": 0.0401, "lr": "4.922e-05", "step": 2836, "steps": "23.42s,2836/33195" }, { "epoch": 0.42732339207712006, "eta": "195:28:18", "grad_norm": 0.0081, "loss": 0.0435, "lr": "4.922e-05", "step": 2837, "steps": "23.18s,2837/33195" }, { "epoch": 0.4274740171712607, "eta": "194:57:33", "grad_norm": 0.0077, "loss": 0.0796, "lr": "4.922e-05", "step": 2838, "steps": "23.12s,2838/33195" }, { "epoch": 0.4276246422654014, "eta": "194:31:52", "grad_norm": 0.011, "loss": 0.0445, "lr": "4.922e-05", "step": 2839, "steps": "23.07s,2839/33195" }, { "epoch": 0.4277752673595421, "eta": "197:48:48", "grad_norm": 0.0139, "loss": 0.0565, "lr": "4.922e-05", "step": 2840, "steps": "23.46s,2840/33195" }, { "epoch": 0.4279258924536828, "eta": "198:03:35", "grad_norm": 0.0085, "loss": 0.052, "lr": "4.922e-05", "step": 2841, "steps": "23.49s,2841/33195" }, { "epoch": 0.42807651754782344, "eta": "198:28:29", "grad_norm": 0.0143, "loss": 0.0806, "lr": "4.922e-05", "step": 2842, "steps": "23.54s,2842/33195" }, { "epoch": 0.42822714264196415, "eta": "198:48:20", "grad_norm": 0.0076, "loss": 0.0758, "lr": "4.922e-05", "step": 2843, "steps": "23.58s,2843/33195" }, { "epoch": 0.4283777677361048, "eta": "195:05:22", "grad_norm": 0.0117, "loss": 0.0396, "lr": "4.922e-05", "step": 2844, "steps": "23.14s,2844/33195" }, { "epoch": 0.4285283928302455, "eta": "195:35:20", "grad_norm": 0.0089, "loss": 0.0713, "lr": "4.922e-05", "step": 2845, "steps": "23.2s,2845/33195" }, { "epoch": 0.42867901792438623, "eta": "198:52:12", "grad_norm": 0.0066, "loss": 0.0488, "lr": "4.922e-05", "step": 2846, "steps": "23.59s,2846/33195" }, { "epoch": 0.4288296430185269, "eta": "198:51:49", "grad_norm": 0.0103, "loss": 0.0378, "lr": "4.921e-05", "step": 2847, "steps": "23.59s,2847/33195" }, { "epoch": 0.4289802681126676, "eta": "192:47:15", "grad_norm": 0.008, "loss": 0.0693, "lr": "4.921e-05", "step": 2848, "steps": "22.87s,2848/33195" }, { "epoch": 0.42913089320680825, "eta": "195:23:40", "grad_norm": 0.0078, "loss": 0.0496, "lr": "4.921e-05", "step": 2849, "steps": "23.18s,2849/33195" }, { "epoch": 0.42928151830094896, "eta": "191:45:48", "grad_norm": 0.0084, "loss": 0.0271, "lr": "4.921e-05", "step": 2850, "steps": "22.75s,2850/33195" }, { "epoch": 0.4294321433950896, "eta": "194:27:16", "grad_norm": 0.0057, "loss": 0.0357, "lr": "4.921e-05", "step": 2851, "steps": "23.07s,2851/33195" }, { "epoch": 0.4295827684892303, "eta": "197:49:10", "grad_norm": 0.0093, "loss": 0.0597, "lr": "4.921e-05", "step": 2852, "steps": "23.47s,2852/33195" }, { "epoch": 0.429733393583371, "eta": "199:34:58", "grad_norm": 0.0093, "loss": 0.0403, "lr": "4.921e-05", "step": 2853, "steps": "23.68s,2853/33195" }, { "epoch": 0.4298840186775117, "eta": "226:47:56", "grad_norm": 0.0085, "loss": 0.0554, "lr": "4.921e-05", "step": 2854, "steps": "26.91s,2854/33195" }, { "epoch": 0.43003464377165235, "eta": "225:56:55", "grad_norm": 0.0061, "loss": 0.0625, "lr": "4.921e-05", "step": 2855, "steps": "26.81s,2855/33195" }, { "epoch": 0.43018526886579306, "eta": "214:03:30", "grad_norm": 0.0049, "loss": 0.0787, "lr": "4.921e-05", "step": 2856, "steps": "25.4s,2856/33195" }, { "epoch": 0.4303358939599337, "eta": "224:35:07", "grad_norm": 0.0071, "loss": 0.0604, "lr": "4.921e-05", "step": 2857, "steps": "26.65s,2857/33195" }, { "epoch": 0.4304865190540744, "eta": "209:24:34", "grad_norm": 0.0077, "loss": 0.0482, "lr": "4.921e-05", "step": 2858, "steps": "24.85s,2858/33195" }, { "epoch": 0.4306371441482151, "eta": "227:46:22", "grad_norm": 0.0081, "loss": 0.0648, "lr": "4.921e-05", "step": 2859, "steps": "27.03s,2859/33195" }, { "epoch": 0.4307877692423558, "eta": "212:30:48", "grad_norm": 0.0076, "loss": 0.052, "lr": "4.921e-05", "step": 2860, "steps": "25.22s,2860/33195" }, { "epoch": 0.43093839433649644, "eta": "196:14:38", "grad_norm": 0.0055, "loss": 0.0596, "lr": "4.921e-05", "step": 2861, "steps": "23.29s,2861/33195" }, { "epoch": 0.43108901943063715, "eta": "198:05:28", "grad_norm": 0.0091, "loss": 0.0423, "lr": "4.921e-05", "step": 2862, "steps": "23.51s,2862/33195" }, { "epoch": 0.4312396445247778, "eta": "197:29:42", "grad_norm": 0.0082, "loss": 0.0476, "lr": "4.921e-05", "step": 2863, "steps": "23.44s,2863/33195" }, { "epoch": 0.4313902696189185, "eta": "198:14:48", "grad_norm": 0.0074, "loss": 0.0288, "lr": "4.920e-05", "step": 2864, "steps": "23.53s,2864/33195" }, { "epoch": 0.4315408947130592, "eta": "194:52:12", "grad_norm": 0.0067, "loss": 0.0429, "lr": "4.920e-05", "step": 2865, "steps": "23.13s,2865/33195" }, { "epoch": 0.4316915198071999, "eta": "194:36:39", "grad_norm": 0.0095, "loss": 0.057, "lr": "4.920e-05", "step": 2866, "steps": "23.1s,2866/33195" }, { "epoch": 0.43184214490134054, "eta": "194:51:26", "grad_norm": 0.0092, "loss": 0.0696, "lr": "4.920e-05", "step": 2867, "steps": "23.13s,2867/33195" }, { "epoch": 0.43199276999548125, "eta": "192:29:31", "grad_norm": 0.007, "loss": 0.0607, "lr": "4.920e-05", "step": 2868, "steps": "22.85s,2868/33195" }, { "epoch": 0.4321433950896219, "eta": "194:25:24", "grad_norm": 0.0074, "loss": 0.0481, "lr": "4.920e-05", "step": 2869, "steps": "23.08s,2869/33195" }, { "epoch": 0.4322940201837626, "eta": "194:35:07", "grad_norm": 0.0068, "loss": 0.0503, "lr": "4.920e-05", "step": 2870, "steps": "23.1s,2870/33195" }, { "epoch": 0.4324446452779033, "eta": "197:11:24", "grad_norm": 0.0061, "loss": 0.0496, "lr": "4.920e-05", "step": 2871, "steps": "23.41s,2871/33195" }, { "epoch": 0.432595270372044, "eta": "197:31:14", "grad_norm": 0.0067, "loss": 0.0587, "lr": "4.920e-05", "step": 2872, "steps": "23.45s,2872/33195" }, { "epoch": 0.4327458954661847, "eta": "194:33:58", "grad_norm": 0.0076, "loss": 0.03, "lr": "4.920e-05", "step": 2873, "steps": "23.1s,2873/33195" }, { "epoch": 0.43289652056032535, "eta": "197:20:21", "grad_norm": 0.007, "loss": 0.0396, "lr": "4.920e-05", "step": 2874, "steps": "23.43s,2874/33195" }, { "epoch": 0.43304714565446606, "eta": "195:08:34", "grad_norm": 0.0081, "loss": 0.0569, "lr": "4.920e-05", "step": 2875, "steps": "23.17s,2875/33195" }, { "epoch": 0.4331977707486067, "eta": "193:37:13", "grad_norm": 0.0085, "loss": 0.0504, "lr": "4.920e-05", "step": 2876, "steps": "22.99s,2876/33195" }, { "epoch": 0.4333483958427474, "eta": "195:12:51", "grad_norm": 0.0064, "loss": 0.0479, "lr": "4.920e-05", "step": 2877, "steps": "23.18s,2877/33195" }, { "epoch": 0.4334990209368881, "eta": "194:57:18", "grad_norm": 0.0068, "loss": 0.0397, "lr": "4.920e-05", "step": 2878, "steps": "23.15s,2878/33195" }, { "epoch": 0.4336496460310288, "eta": "194:36:42", "grad_norm": 0.0082, "loss": 0.0309, "lr": "4.920e-05", "step": 2879, "steps": "23.11s,2879/33195" }, { "epoch": 0.43380027112516945, "eta": "194:31:16", "grad_norm": 0.0075, "loss": 0.0553, "lr": "4.920e-05", "step": 2880, "steps": "23.1s,2880/33195" }, { "epoch": 0.43395089621931016, "eta": "196:42:15", "grad_norm": 0.0057, "loss": 0.0577, "lr": "4.919e-05", "step": 2881, "steps": "23.36s,2881/33195" }, { "epoch": 0.4341015213134508, "eta": "195:21:01", "grad_norm": 0.0071, "loss": 0.0666, "lr": "4.919e-05", "step": 2882, "steps": "23.2s,2882/33195" }, { "epoch": 0.4342521464075915, "eta": "195:20:38", "grad_norm": 0.007, "loss": 0.0366, "lr": "4.919e-05", "step": 2883, "steps": "23.2s,2883/33195" }, { "epoch": 0.4344027715017322, "eta": "194:49:56", "grad_norm": 0.0076, "loss": 0.06, "lr": "4.919e-05", "step": 2884, "steps": "23.14s,2884/33195" }, { "epoch": 0.4345533965958729, "eta": "194:59:39", "grad_norm": 0.0076, "loss": 0.0584, "lr": "4.919e-05", "step": 2885, "steps": "23.16s,2885/33195" }, { "epoch": 0.43470402169001354, "eta": "194:59:16", "grad_norm": 0.0081, "loss": 0.0615, "lr": "4.919e-05", "step": 2886, "steps": "23.16s,2886/33195" }, { "epoch": 0.43485464678415425, "eta": "191:51:59", "grad_norm": 0.0077, "loss": 0.049, "lr": "4.919e-05", "step": 2887, "steps": "22.79s,2887/33195" }, { "epoch": 0.4350052718782949, "eta": "194:02:56", "grad_norm": 0.0063, "loss": 0.0534, "lr": "4.919e-05", "step": 2888, "steps": "23.05s,2888/33195" }, { "epoch": 0.4351558969724356, "eta": "198:05:00", "grad_norm": 0.0077, "loss": 0.0786, "lr": "4.919e-05", "step": 2889, "steps": "23.53s,2889/33195" }, { "epoch": 0.4353065220665763, "eta": "194:42:34", "grad_norm": 0.0066, "loss": 0.052, "lr": "4.919e-05", "step": 2890, "steps": "23.13s,2890/33195" }, { "epoch": 0.435457147160717, "eta": "194:47:14", "grad_norm": 0.0073, "loss": 0.066, "lr": "4.919e-05", "step": 2891, "steps": "23.14s,2891/33195" }, { "epoch": 0.43560777225485764, "eta": "194:51:54", "grad_norm": 0.0077, "loss": 0.0661, "lr": "4.919e-05", "step": 2892, "steps": "23.15s,2892/33195" }, { "epoch": 0.43575839734899835, "eta": "194:16:10", "grad_norm": 0.0075, "loss": 0.0635, "lr": "4.919e-05", "step": 2893, "steps": "23.08s,2893/33195" }, { "epoch": 0.435909022443139, "eta": "192:14:34", "grad_norm": 0.0077, "loss": 0.0228, "lr": "4.919e-05", "step": 2894, "steps": "22.84s,2894/33195" }, { "epoch": 0.4360596475372797, "eta": "197:17:12", "grad_norm": 0.0081, "loss": 0.0434, "lr": "4.919e-05", "step": 2895, "steps": "23.44s,2895/33195" }, { "epoch": 0.43621027263142037, "eta": "194:20:03", "grad_norm": 0.0077, "loss": 0.0518, "lr": "4.919e-05", "step": 2896, "steps": "23.09s,2896/33195" }, { "epoch": 0.4363608977255611, "eta": "195:10:10", "grad_norm": 0.0063, "loss": 0.0656, "lr": "4.918e-05", "step": 2897, "steps": "23.19s,2897/33195" }, { "epoch": 0.43651152281970174, "eta": "196:55:49", "grad_norm": 0.0073, "loss": 0.0505, "lr": "4.918e-05", "step": 2898, "steps": "23.4s,2898/33195" }, { "epoch": 0.43666214791384245, "eta": "195:09:24", "grad_norm": 0.0074, "loss": 0.0484, "lr": "4.918e-05", "step": 2899, "steps": "23.19s,2899/33195" }, { "epoch": 0.43681277300798316, "eta": "193:58:19", "grad_norm": 0.0078, "loss": 0.0382, "lr": "4.918e-05", "step": 2900, "steps": "23.05s,2900/33195" }, { "epoch": 0.4369633981021238, "eta": "195:03:34", "grad_norm": 0.0069, "loss": 0.0487, "lr": "4.918e-05", "step": 2901, "steps": "23.18s,2901/33195" }, { "epoch": 0.4371140231962645, "eta": "194:32:54", "grad_norm": 0.0074, "loss": 0.053, "lr": "4.918e-05", "step": 2902, "steps": "23.12s,2902/33195" }, { "epoch": 0.4372646482904052, "eta": "194:42:36", "grad_norm": 0.0094, "loss": 0.0464, "lr": "4.918e-05", "step": 2903, "steps": "23.14s,2903/33195" }, { "epoch": 0.4374152733845459, "eta": "198:04:10", "grad_norm": 0.0079, "loss": 0.0439, "lr": "4.918e-05", "step": 2904, "steps": "23.54s,2904/33195" }, { "epoch": 0.43756589847868654, "eta": "195:22:13", "grad_norm": 0.0076, "loss": 0.0682, "lr": "4.918e-05", "step": 2905, "steps": "23.22s,2905/33195" }, { "epoch": 0.43771652357282725, "eta": "194:51:33", "grad_norm": 0.0064, "loss": 0.0526, "lr": "4.918e-05", "step": 2906, "steps": "23.16s,2906/33195" }, { "epoch": 0.4378671486669679, "eta": "195:56:47", "grad_norm": 0.0064, "loss": 0.0429, "lr": "4.918e-05", "step": 2907, "steps": "23.29s,2907/33195" }, { "epoch": 0.4380177737611086, "eta": "197:52:30", "grad_norm": 0.0098, "loss": 0.0695, "lr": "4.918e-05", "step": 2908, "steps": "23.52s,2908/33195" }, { "epoch": 0.4381683988552493, "eta": "198:17:21", "grad_norm": 0.0066, "loss": 0.0591, "lr": "4.918e-05", "step": 2909, "steps": "23.57s,2909/33195" }, { "epoch": 0.43831902394939, "eta": "195:40:29", "grad_norm": 0.0076, "loss": 0.042, "lr": "4.918e-05", "step": 2910, "steps": "23.26s,2910/33195" }, { "epoch": 0.43846964904353064, "eta": "195:24:57", "grad_norm": 0.0106, "loss": 0.0651, "lr": "4.918e-05", "step": 2911, "steps": "23.23s,2911/33195" }, { "epoch": 0.43862027413767135, "eta": "197:35:47", "grad_norm": 0.0076, "loss": 0.0686, "lr": "4.918e-05", "step": 2912, "steps": "23.49s,2912/33195" }, { "epoch": 0.438770899231812, "eta": "194:28:39", "grad_norm": 0.0153, "loss": 0.0437, "lr": "4.918e-05", "step": 2913, "steps": "23.12s,2913/33195" }, { "epoch": 0.4389215243259527, "eta": "197:29:57", "grad_norm": 0.0065, "loss": 0.0559, "lr": "4.917e-05", "step": 2914, "steps": "23.48s,2914/33195" }, { "epoch": 0.43907214942009337, "eta": "194:32:56", "grad_norm": 0.0084, "loss": 0.0522, "lr": "4.917e-05", "step": 2915, "steps": "23.13s,2915/33195" }, { "epoch": 0.4392227745142341, "eta": "195:02:50", "grad_norm": 0.0072, "loss": 0.0486, "lr": "4.917e-05", "step": 2916, "steps": "23.19s,2916/33195" }, { "epoch": 0.43937339960837474, "eta": "194:01:53", "grad_norm": 0.007, "loss": 0.0827, "lr": "4.917e-05", "step": 2917, "steps": "23.07s,2917/33195" }, { "epoch": 0.43952402470251545, "eta": "194:26:44", "grad_norm": 0.0081, "loss": 0.0391, "lr": "4.917e-05", "step": 2918, "steps": "23.12s,2918/33195" }, { "epoch": 0.4396746497966561, "eta": "197:22:57", "grad_norm": 0.007, "loss": 0.0487, "lr": "4.917e-05", "step": 2919, "steps": "23.47s,2919/33195" }, { "epoch": 0.4398252748907968, "eta": "197:27:37", "grad_norm": 0.0076, "loss": 0.0537, "lr": "4.917e-05", "step": 2920, "steps": "23.48s,2920/33195" }, { "epoch": 0.43997589998493747, "eta": "194:25:34", "grad_norm": 0.0092, "loss": 0.053, "lr": "4.917e-05", "step": 2921, "steps": "23.12s,2921/33195" }, { "epoch": 0.4401265250790782, "eta": "197:06:39", "grad_norm": 0.0063, "loss": 0.0669, "lr": "4.917e-05", "step": 2922, "steps": "23.44s,2922/33195" }, { "epoch": 0.44027715017321883, "eta": "197:56:42", "grad_norm": 0.0074, "loss": 0.0434, "lr": "4.917e-05", "step": 2923, "steps": "23.54s,2923/33195" }, { "epoch": 0.44042777526735954, "eta": "197:26:03", "grad_norm": 0.0082, "loss": 0.0592, "lr": "4.917e-05", "step": 2924, "steps": "23.48s,2924/33195" }, { "epoch": 0.4405784003615002, "eta": "191:42:36", "grad_norm": 0.0081, "loss": 0.0433, "lr": "4.917e-05", "step": 2925, "steps": "22.8s,2925/33195" }, { "epoch": 0.4407290254556409, "eta": "195:29:14", "grad_norm": 0.0114, "loss": 0.0544, "lr": "4.917e-05", "step": 2926, "steps": "23.25s,2926/33195" }, { "epoch": 0.4408796505497816, "eta": "193:32:49", "grad_norm": 0.0076, "loss": 0.0536, "lr": "4.917e-05", "step": 2927, "steps": "23.02s,2927/33195" }, { "epoch": 0.4410302756439223, "eta": "197:19:26", "grad_norm": 0.0063, "loss": 0.0676, "lr": "4.917e-05", "step": 2928, "steps": "23.47s,2928/33195" }, { "epoch": 0.441180900738063, "eta": "197:08:57", "grad_norm": 0.0072, "loss": 0.0416, "lr": "4.917e-05", "step": 2929, "steps": "23.45s,2929/33195" }, { "epoch": 0.44133152583220364, "eta": "199:09:37", "grad_norm": 0.0052, "loss": 0.0428, "lr": "4.916e-05", "step": 2930, "steps": "23.69s,2930/33195" }, { "epoch": 0.44148215092634435, "eta": "193:31:17", "grad_norm": 0.0098, "loss": 0.0705, "lr": "4.916e-05", "step": 2931, "steps": "23.02s,2931/33195" }, { "epoch": 0.441632776020485, "eta": "194:16:17", "grad_norm": 0.0071, "loss": 0.0537, "lr": "4.916e-05", "step": 2932, "steps": "23.11s,2932/33195" }, { "epoch": 0.4417834011146257, "eta": "194:26:00", "grad_norm": 0.0072, "loss": 0.0481, "lr": "4.916e-05", "step": 2933, "steps": "23.13s,2933/33195" }, { "epoch": 0.4419340262087664, "eta": "196:56:55", "grad_norm": 0.0081, "loss": 0.0679, "lr": "4.916e-05", "step": 2934, "steps": "23.43s,2934/33195" }, { "epoch": 0.4420846513029071, "eta": "194:40:21", "grad_norm": 0.0104, "loss": 0.0455, "lr": "4.916e-05", "step": 2935, "steps": "23.16s,2935/33195" }, { "epoch": 0.44223527639704774, "eta": "194:45:01", "grad_norm": 0.0058, "loss": 0.0465, "lr": "4.916e-05", "step": 2936, "steps": "23.17s,2936/33195" }, { "epoch": 0.44238590149118845, "eta": "196:45:39", "grad_norm": 0.0059, "loss": 0.0425, "lr": "4.916e-05", "step": 2937, "steps": "23.41s,2937/33195" }, { "epoch": 0.4425365265853291, "eta": "197:20:34", "grad_norm": 0.0069, "loss": 0.0583, "lr": "4.916e-05", "step": 2938, "steps": "23.48s,2938/33195" }, { "epoch": 0.4426871516794698, "eta": "197:30:16", "grad_norm": 0.007, "loss": 0.0433, "lr": "4.916e-05", "step": 2939, "steps": "23.5s,2939/33195" }, { "epoch": 0.44283777677361047, "eta": "196:49:32", "grad_norm": 0.0069, "loss": 0.051, "lr": "4.916e-05", "step": 2940, "steps": "23.42s,2940/33195" }, { "epoch": 0.4429884018677512, "eta": "194:27:57", "grad_norm": 0.0075, "loss": 0.0692, "lr": "4.916e-05", "step": 2941, "steps": "23.14s,2941/33195" }, { "epoch": 0.44313902696189184, "eta": "197:24:02", "grad_norm": 0.0079, "loss": 0.0577, "lr": "4.916e-05", "step": 2942, "steps": "23.49s,2942/33195" }, { "epoch": 0.44328965205603255, "eta": "194:22:08", "grad_norm": 0.0078, "loss": 0.0585, "lr": "4.916e-05", "step": 2943, "steps": "23.13s,2943/33195" }, { "epoch": 0.4434402771501732, "eta": "197:03:05", "grad_norm": 0.0061, "loss": 0.0539, "lr": "4.916e-05", "step": 2944, "steps": "23.45s,2944/33195" }, { "epoch": 0.4435909022443139, "eta": "193:51:07", "grad_norm": 0.0079, "loss": 0.0403, "lr": "4.916e-05", "step": 2945, "steps": "23.07s,2945/33195" }, { "epoch": 0.44374152733845457, "eta": "197:27:31", "grad_norm": 0.0055, "loss": 0.0423, "lr": "4.916e-05", "step": 2946, "steps": "23.5s,2946/33195" }, { "epoch": 0.4438921524325953, "eta": "193:30:11", "grad_norm": 0.0085, "loss": 0.042, "lr": "4.915e-05", "step": 2947, "steps": "23.03s,2947/33195" }, { "epoch": 0.44404277752673593, "eta": "193:49:58", "grad_norm": 0.0075, "loss": 0.0542, "lr": "4.915e-05", "step": 2948, "steps": "23.07s,2948/33195" }, { "epoch": 0.44419340262087664, "eta": "194:19:49", "grad_norm": 0.0067, "loss": 0.0578, "lr": "4.915e-05", "step": 2949, "steps": "23.13s,2949/33195" }, { "epoch": 0.4443440277150173, "eta": "198:26:26", "grad_norm": 0.0232, "loss": 0.0312, "lr": "4.915e-05", "step": 2950, "steps": "23.62s,2950/33195" }, { "epoch": 0.444494652809158, "eta": "194:59:23", "grad_norm": 0.007, "loss": 0.084, "lr": "4.915e-05", "step": 2951, "steps": "23.21s,2951/33195" }, { "epoch": 0.44464527790329866, "eta": "195:04:02", "grad_norm": 0.0073, "loss": 0.066, "lr": "4.915e-05", "step": 2952, "steps": "23.22s,2952/33195" }, { "epoch": 0.4447959029974394, "eta": "191:26:55", "grad_norm": 0.0061, "loss": 0.0519, "lr": "4.915e-05", "step": 2953, "steps": "22.79s,2953/33195" }, { "epoch": 0.4449465280915801, "eta": "197:14:18", "grad_norm": 0.0067, "loss": 0.0375, "lr": "4.915e-05", "step": 2954, "steps": "23.48s,2954/33195" }, { "epoch": 0.44509715318572074, "eta": "197:18:57", "grad_norm": 0.007, "loss": 0.0585, "lr": "4.915e-05", "step": 2955, "steps": "23.49s,2955/33195" }, { "epoch": 0.44524777827986145, "eta": "197:13:31", "grad_norm": 0.0073, "loss": 0.0526, "lr": "4.915e-05", "step": 2956, "steps": "23.48s,2956/33195" }, { "epoch": 0.4453984033740021, "eta": "193:31:23", "grad_norm": 0.0067, "loss": 0.0513, "lr": "4.915e-05", "step": 2957, "steps": "23.04s,2957/33195" }, { "epoch": 0.4455490284681428, "eta": "194:31:28", "grad_norm": 0.0066, "loss": 0.0543, "lr": "4.915e-05", "step": 2958, "steps": "23.16s,2958/33195" }, { "epoch": 0.44569965356228347, "eta": "197:02:16", "grad_norm": 0.009, "loss": 0.051, "lr": "4.915e-05", "step": 2959, "steps": "23.46s,2959/33195" }, { "epoch": 0.4458502786564242, "eta": "197:01:53", "grad_norm": 0.007, "loss": 0.0663, "lr": "4.915e-05", "step": 2960, "steps": "23.46s,2960/33195" }, { "epoch": 0.44600090375056484, "eta": "201:38:38", "grad_norm": 0.007, "loss": 0.0465, "lr": "4.915e-05", "step": 2961, "steps": "24.01s,2961/33195" }, { "epoch": 0.44615152884470555, "eta": "196:35:54", "grad_norm": 0.007, "loss": 0.0262, "lr": "4.915e-05", "step": 2962, "steps": "23.41s,2962/33195" }, { "epoch": 0.4463021539388462, "eta": "194:49:42", "grad_norm": 0.0076, "loss": 0.0627, "lr": "4.914e-05", "step": 2963, "steps": "23.2s,2963/33195" }, { "epoch": 0.4464527790329869, "eta": "194:29:09", "grad_norm": 0.0087, "loss": 0.0542, "lr": "4.914e-05", "step": 2964, "steps": "23.16s,2964/33195" }, { "epoch": 0.44660340412712757, "eta": "194:59:00", "grad_norm": 0.0087, "loss": 0.0397, "lr": "4.914e-05", "step": 2965, "steps": "23.22s,2965/33195" }, { "epoch": 0.4467540292212683, "eta": "196:59:32", "grad_norm": 0.0067, "loss": 0.0542, "lr": "4.914e-05", "step": 2966, "steps": "23.46s,2966/33195" }, { "epoch": 0.44690465431540893, "eta": "197:39:27", "grad_norm": 0.0069, "loss": 0.0674, "lr": "4.914e-05", "step": 2967, "steps": "23.54s,2967/33195" }, { "epoch": 0.44705527940954964, "eta": "197:39:03", "grad_norm": 0.0057, "loss": 0.0394, "lr": "4.914e-05", "step": 2968, "steps": "23.54s,2968/33195" }, { "epoch": 0.4472059045036903, "eta": "193:36:51", "grad_norm": 0.0077, "loss": 0.0353, "lr": "4.914e-05", "step": 2969, "steps": "23.06s,2969/33195" }, { "epoch": 0.447356529597831, "eta": "198:13:32", "grad_norm": 0.0066, "loss": 0.0382, "lr": "4.914e-05", "step": 2970, "steps": "23.61s,2970/33195" }, { "epoch": 0.44750715469197166, "eta": "194:21:25", "grad_norm": 0.008, "loss": 0.0532, "lr": "4.914e-05", "step": 2971, "steps": "23.15s,2971/33195" }, { "epoch": 0.4476577797861124, "eta": "195:31:33", "grad_norm": 0.0081, "loss": 0.0359, "lr": "4.914e-05", "step": 2972, "steps": "23.29s,2972/33195" }, { "epoch": 0.44780840488025303, "eta": "191:29:23", "grad_norm": 0.0066, "loss": 0.048, "lr": "4.914e-05", "step": 2973, "steps": "22.81s,2973/33195" }, { "epoch": 0.44795902997439374, "eta": "194:15:13", "grad_norm": 0.0085, "loss": 0.0627, "lr": "4.914e-05", "step": 2974, "steps": "23.14s,2974/33195" }, { "epoch": 0.4481096550685344, "eta": "197:16:10", "grad_norm": 0.0081, "loss": 0.0389, "lr": "4.914e-05", "step": 2975, "steps": "23.5s,2975/33195" }, { "epoch": 0.4482602801626751, "eta": "193:49:16", "grad_norm": 0.0067, "loss": 0.0538, "lr": "4.914e-05", "step": 2976, "steps": "23.09s,2976/33195" }, { "epoch": 0.44841090525681576, "eta": "196:50:12", "grad_norm": 0.0072, "loss": 0.0499, "lr": "4.914e-05", "step": 2977, "steps": "23.45s,2977/33195" }, { "epoch": 0.44856153035095647, "eta": "197:04:55", "grad_norm": 0.0058, "loss": 0.0682, "lr": "4.914e-05", "step": 2978, "steps": "23.48s,2978/33195" }, { "epoch": 0.4487121554450971, "eta": "197:09:33", "grad_norm": 0.0067, "loss": 0.0442, "lr": "4.913e-05", "step": 2979, "steps": "23.49s,2979/33195" }, { "epoch": 0.44886278053923784, "eta": "193:27:35", "grad_norm": 0.0067, "loss": 0.0692, "lr": "4.913e-05", "step": 2980, "steps": "23.05s,2980/33195" }, { "epoch": 0.44901340563337855, "eta": "197:33:57", "grad_norm": 0.0055, "loss": 0.044, "lr": "4.913e-05", "step": 2981, "steps": "23.54s,2981/33195" }, { "epoch": 0.4491640307275192, "eta": "194:07:06", "grad_norm": 0.008, "loss": 0.0388, "lr": "4.913e-05", "step": 2982, "steps": "23.13s,2982/33195" }, { "epoch": 0.4493146558216599, "eta": "193:06:18", "grad_norm": 0.0091, "loss": 0.0472, "lr": "4.913e-05", "step": 2983, "steps": "23.01s,2983/33195" }, { "epoch": 0.44946528091580057, "eta": "193:51:14", "grad_norm": 0.0064, "loss": 0.0406, "lr": "4.913e-05", "step": 2984, "steps": "23.1s,2984/33195" }, { "epoch": 0.4496159060099413, "eta": "194:16:01", "grad_norm": 0.0064, "loss": 0.0396, "lr": "4.913e-05", "step": 2985, "steps": "23.15s,2985/33195" }, { "epoch": 0.44976653110408193, "eta": "193:55:29", "grad_norm": 0.0082, "loss": 0.0724, "lr": "4.913e-05", "step": 2986, "steps": "23.11s,2986/33195" }, { "epoch": 0.44991715619822265, "eta": "196:56:21", "grad_norm": 0.0084, "loss": 0.053, "lr": "4.913e-05", "step": 2987, "steps": "23.47s,2987/33195" }, { "epoch": 0.4500677812923633, "eta": "191:28:43", "grad_norm": 0.0068, "loss": 0.0691, "lr": "4.913e-05", "step": 2988, "steps": "22.82s,2988/33195" }, { "epoch": 0.450218406386504, "eta": "193:34:12", "grad_norm": 0.0072, "loss": 0.0708, "lr": "4.913e-05", "step": 2989, "steps": "23.07s,2989/33195" }, { "epoch": 0.45036903148064467, "eta": "193:33:49", "grad_norm": 0.0079, "loss": 0.0197, "lr": "4.913e-05", "step": 2990, "steps": "23.07s,2990/33195" }, { "epoch": 0.4505196565747854, "eta": "194:48:56", "grad_norm": 0.0165, "loss": 0.0512, "lr": "4.913e-05", "step": 2991, "steps": "23.22s,2991/33195" }, { "epoch": 0.45067028166892603, "eta": "193:43:07", "grad_norm": 0.0077, "loss": 0.0663, "lr": "4.913e-05", "step": 2992, "steps": "23.09s,2992/33195" }, { "epoch": 0.45082090676306674, "eta": "193:42:44", "grad_norm": 0.0063, "loss": 0.0415, "lr": "4.913e-05", "step": 2993, "steps": "23.09s,2993/33195" }, { "epoch": 0.4509715318572074, "eta": "192:57:03", "grad_norm": 0.0101, "loss": 0.0518, "lr": "4.913e-05", "step": 2994, "steps": "23.0s,2994/33195" }, { "epoch": 0.4511221569513481, "eta": "194:12:10", "grad_norm": 0.0096, "loss": 0.048, "lr": "4.912e-05", "step": 2995, "steps": "23.15s,2995/33195" }, { "epoch": 0.45127278204548876, "eta": "196:57:52", "grad_norm": 0.0075, "loss": 0.0772, "lr": "4.912e-05", "step": 2996, "steps": "23.48s,2996/33195" }, { "epoch": 0.4514234071396295, "eta": "194:11:23", "grad_norm": 0.0066, "loss": 0.0543, "lr": "4.912e-05", "step": 2997, "steps": "23.15s,2997/33195" }, { "epoch": 0.45157403223377013, "eta": "191:34:59", "grad_norm": 0.0068, "loss": 0.0544, "lr": "4.912e-05", "step": 2998, "steps": "22.84s,2998/33195" }, { "epoch": 0.45172465732791084, "eta": "194:10:37", "grad_norm": 0.0064, "loss": 0.0408, "lr": "4.912e-05", "step": 2999, "steps": "23.15s,2999/33195" }, { "epoch": 0.4518752824220515, "eta": "194:40:25", "grad_norm": 0.0066, "loss": 0.0494, "lr": "4.912e-05", "step": 3000, "steps": "23.21s,3000/33195" }, { "epoch": 0.4520259075161922, "eta": "427:59:59", "grad_norm": 0.0074, "loss": 0.0289, "lr": "4.912e-05", "step": 3001, "steps": "51.03s,3001/33195" }, { "epoch": 0.45217653261033286, "eta": "192:59:00", "grad_norm": 0.0067, "loss": 0.0482, "lr": "4.912e-05", "step": 3002, "steps": "23.01s,3002/33195" }, { "epoch": 0.45232715770447357, "eta": "193:43:55", "grad_norm": 0.0063, "loss": 0.0729, "lr": "4.912e-05", "step": 3003, "steps": "23.1s,3003/33195" }, { "epoch": 0.4524777827986142, "eta": "193:43:32", "grad_norm": 0.0073, "loss": 0.0541, "lr": "4.912e-05", "step": 3004, "steps": "23.1s,3004/33195" }, { "epoch": 0.45262840789275494, "eta": "192:17:36", "grad_norm": 0.0066, "loss": 0.073, "lr": "4.912e-05", "step": 3005, "steps": "22.93s,3005/33195" }, { "epoch": 0.4527790329868956, "eta": "197:04:01", "grad_norm": 0.0072, "loss": 0.0255, "lr": "4.912e-05", "step": 3006, "steps": "23.5s,3006/33195" }, { "epoch": 0.4529296580810363, "eta": "192:21:52", "grad_norm": 0.0074, "loss": 0.0651, "lr": "4.912e-05", "step": 3007, "steps": "22.94s,3007/33195" }, { "epoch": 0.453080283175177, "eta": "196:38:05", "grad_norm": 0.0062, "loss": 0.0641, "lr": "4.912e-05", "step": 3008, "steps": "23.45s,3008/33195" }, { "epoch": 0.45323090826931767, "eta": "193:41:36", "grad_norm": 0.0069, "loss": 0.0965, "lr": "4.912e-05", "step": 3009, "steps": "23.1s,3009/33195" }, { "epoch": 0.4533815333634584, "eta": "195:26:52", "grad_norm": 0.0094, "loss": 0.06, "lr": "4.912e-05", "step": 3010, "steps": "23.31s,3010/33195" }, { "epoch": 0.45353215845759903, "eta": "197:02:04", "grad_norm": 0.0079, "loss": 0.0585, "lr": "4.911e-05", "step": 3011, "steps": "23.5s,3011/33195" }, { "epoch": 0.45368278355173974, "eta": "197:16:45", "grad_norm": 0.0073, "loss": 0.0709, "lr": "4.911e-05", "step": 3012, "steps": "23.53s,3012/33195" }, { "epoch": 0.4538334086458804, "eta": "196:05:56", "grad_norm": 0.0107, "loss": 0.0505, "lr": "4.911e-05", "step": 3013, "steps": "23.39s,3013/33195" }, { "epoch": 0.4539840337400211, "eta": "192:54:24", "grad_norm": 0.0068, "loss": 0.0394, "lr": "4.911e-05", "step": 3014, "steps": "23.01s,3014/33195" }, { "epoch": 0.45413465883416176, "eta": "191:18:27", "grad_norm": 0.008, "loss": 0.0786, "lr": "4.911e-05", "step": 3015, "steps": "22.82s,3015/33195" }, { "epoch": 0.4542852839283025, "eta": "192:58:40", "grad_norm": 0.0093, "loss": 0.047, "lr": "4.911e-05", "step": 3016, "steps": "23.02s,3016/33195" }, { "epoch": 0.45443590902244313, "eta": "193:08:21", "grad_norm": 0.0062, "loss": 0.0363, "lr": "4.911e-05", "step": 3017, "steps": "23.04s,3017/33195" }, { "epoch": 0.45458653411658384, "eta": "197:04:21", "grad_norm": 0.0066, "loss": 0.0595, "lr": "4.911e-05", "step": 3018, "steps": "23.51s,3018/33195" }, { "epoch": 0.4547371592107245, "eta": "194:58:13", "grad_norm": 0.0078, "loss": 0.0487, "lr": "4.911e-05", "step": 3019, "steps": "23.26s,3019/33195" }, { "epoch": 0.4548877843048652, "eta": "193:12:13", "grad_norm": 0.0068, "loss": 0.0516, "lr": "4.911e-05", "step": 3020, "steps": "23.05s,3020/33195" }, { "epoch": 0.45503840939900586, "eta": "194:17:13", "grad_norm": 0.0118, "loss": 0.0337, "lr": "4.911e-05", "step": 3021, "steps": "23.18s,3021/33195" }, { "epoch": 0.45518903449314657, "eta": "193:41:38", "grad_norm": 0.0073, "loss": 0.047, "lr": "4.911e-05", "step": 3022, "steps": "23.11s,3022/33195" }, { "epoch": 0.4553396595872872, "eta": "197:52:40", "grad_norm": 0.0075, "loss": 0.0376, "lr": "4.911e-05", "step": 3023, "steps": "23.61s,3023/33195" }, { "epoch": 0.45549028468142794, "eta": "193:15:43", "grad_norm": 0.0078, "loss": 0.0433, "lr": "4.911e-05", "step": 3024, "steps": "23.06s,3024/33195" }, { "epoch": 0.4556409097755686, "eta": "194:00:35", "grad_norm": 0.0078, "loss": 0.0474, "lr": "4.911e-05", "step": 3025, "steps": "23.15s,3025/33195" }, { "epoch": 0.4557915348697093, "eta": "193:30:02", "grad_norm": 0.0074, "loss": 0.0478, "lr": "4.911e-05", "step": 3026, "steps": "23.09s,3026/33195" }, { "epoch": 0.45594215996384996, "eta": "193:39:42", "grad_norm": 0.0073, "loss": 0.0612, "lr": "4.910e-05", "step": 3027, "steps": "23.11s,3027/33195" }, { "epoch": 0.45609278505799067, "eta": "193:39:19", "grad_norm": 0.007, "loss": 0.0777, "lr": "4.910e-05", "step": 3028, "steps": "23.11s,3028/33195" }, { "epoch": 0.4562434101521313, "eta": "193:33:54", "grad_norm": 0.0108, "loss": 0.0506, "lr": "4.910e-05", "step": 3029, "steps": "23.1s,3029/33195" }, { "epoch": 0.45639403524627203, "eta": "196:09:22", "grad_norm": 0.0148, "loss": 0.0501, "lr": "4.910e-05", "step": 3030, "steps": "23.41s,3030/33195" }, { "epoch": 0.4565446603404127, "eta": "194:18:23", "grad_norm": 0.0064, "loss": 0.0669, "lr": "4.910e-05", "step": 3031, "steps": "23.19s,3031/33195" }, { "epoch": 0.4566952854345534, "eta": "197:18:58", "grad_norm": 0.0068, "loss": 0.0427, "lr": "4.910e-05", "step": 3032, "steps": "23.55s,3032/33195" }, { "epoch": 0.45684591052869405, "eta": "194:07:33", "grad_norm": 0.0067, "loss": 0.0462, "lr": "4.910e-05", "step": 3033, "steps": "23.17s,3033/33195" }, { "epoch": 0.45699653562283477, "eta": "195:57:45", "grad_norm": 0.0076, "loss": 0.0522, "lr": "4.910e-05", "step": 3034, "steps": "23.39s,3034/33195" }, { "epoch": 0.4571471607169755, "eta": "196:12:27", "grad_norm": 0.0082, "loss": 0.0378, "lr": "4.910e-05", "step": 3035, "steps": "23.42s,3035/33195" }, { "epoch": 0.45729778581111613, "eta": "193:21:09", "grad_norm": 0.0078, "loss": 0.0551, "lr": "4.910e-05", "step": 3036, "steps": "23.08s,3036/33195" }, { "epoch": 0.45744841090525684, "eta": "196:26:45", "grad_norm": 0.0074, "loss": 0.0618, "lr": "4.910e-05", "step": 3037, "steps": "23.45s,3037/33195" }, { "epoch": 0.4575990359993975, "eta": "193:50:32", "grad_norm": 0.0079, "loss": 0.0479, "lr": "4.910e-05", "step": 3038, "steps": "23.14s,3038/33195" }, { "epoch": 0.4577496610935382, "eta": "193:30:03", "grad_norm": 0.006, "loss": 0.0515, "lr": "4.910e-05", "step": 3039, "steps": "23.1s,3039/33195" }, { "epoch": 0.45790028618767886, "eta": "193:59:49", "grad_norm": 0.0111, "loss": 0.0807, "lr": "4.910e-05", "step": 3040, "steps": "23.16s,3040/33195" }, { "epoch": 0.4580509112818196, "eta": "194:44:40", "grad_norm": 0.0069, "loss": 0.0522, "lr": "4.910e-05", "step": 3041, "steps": "23.25s,3041/33195" }, { "epoch": 0.45820153637596023, "eta": "196:04:41", "grad_norm": 0.0089, "loss": 0.0575, "lr": "4.910e-05", "step": 3042, "steps": "23.41s,3042/33195" }, { "epoch": 0.45835216147010094, "eta": "194:48:55", "grad_norm": 0.0085, "loss": 0.0459, "lr": "4.909e-05", "step": 3043, "steps": "23.26s,3043/33195" }, { "epoch": 0.4585027865642416, "eta": "196:44:06", "grad_norm": 0.007, "loss": 0.044, "lr": "4.909e-05", "step": 3044, "steps": "23.49s,3044/33195" }, { "epoch": 0.4586534116583823, "eta": "196:58:48", "grad_norm": 0.0064, "loss": 0.0569, "lr": "4.909e-05", "step": 3045, "steps": "23.52s,3045/33195" }, { "epoch": 0.45880403675252296, "eta": "196:08:09", "grad_norm": 0.0061, "loss": 0.0485, "lr": "4.909e-05", "step": 3046, "steps": "23.42s,3046/33195" }, { "epoch": 0.45895466184666367, "eta": "196:32:53", "grad_norm": 0.0098, "loss": 0.0476, "lr": "4.909e-05", "step": 3047, "steps": "23.47s,3047/33195" }, { "epoch": 0.4591052869408043, "eta": "192:26:18", "grad_norm": 0.0078, "loss": 0.0694, "lr": "4.909e-05", "step": 3048, "steps": "22.98s,3048/33195" }, { "epoch": 0.45925591203494504, "eta": "195:51:54", "grad_norm": 0.0074, "loss": 0.0461, "lr": "4.909e-05", "step": 3049, "steps": "23.39s,3049/33195" }, { "epoch": 0.4594065371290857, "eta": "193:05:43", "grad_norm": 0.0073, "loss": 0.0437, "lr": "4.909e-05", "step": 3050, "steps": "23.06s,3050/33195" }, { "epoch": 0.4595571622232264, "eta": "195:46:06", "grad_norm": 0.0125, "loss": 0.0378, "lr": "4.909e-05", "step": 3051, "steps": "23.38s,3051/33195" }, { "epoch": 0.45970778731736706, "eta": "196:30:56", "grad_norm": 0.0075, "loss": 0.0865, "lr": "4.909e-05", "step": 3052, "steps": "23.47s,3052/33195" }, { "epoch": 0.45985841241150777, "eta": "194:04:51", "grad_norm": 0.007, "loss": 0.0659, "lr": "4.909e-05", "step": 3053, "steps": "23.18s,3053/33195" }, { "epoch": 0.4600090375056484, "eta": "193:44:22", "grad_norm": 0.0112, "loss": 0.0502, "lr": "4.909e-05", "step": 3054, "steps": "23.14s,3054/33195" }, { "epoch": 0.46015966259978913, "eta": "193:08:49", "grad_norm": 0.007, "loss": 0.0619, "lr": "4.909e-05", "step": 3055, "steps": "23.07s,3055/33195" }, { "epoch": 0.4603102876939298, "eta": "196:09:16", "grad_norm": 0.0078, "loss": 0.0468, "lr": "4.909e-05", "step": 3056, "steps": "23.43s,3056/33195" }, { "epoch": 0.4604609127880705, "eta": "193:38:11", "grad_norm": 0.0059, "loss": 0.0383, "lr": "4.909e-05", "step": 3057, "steps": "23.13s,3057/33195" }, { "epoch": 0.46061153788221115, "eta": "195:43:23", "grad_norm": 0.0055, "loss": 0.0502, "lr": "4.909e-05", "step": 3058, "steps": "23.38s,3058/33195" }, { "epoch": 0.46076216297635186, "eta": "195:53:02", "grad_norm": 0.0064, "loss": 0.0548, "lr": "4.908e-05", "step": 3059, "steps": "23.4s,3059/33195" }, { "epoch": 0.4609127880704925, "eta": "192:51:50", "grad_norm": 0.0072, "loss": 0.0706, "lr": "4.908e-05", "step": 3060, "steps": "23.04s,3060/33195" }, { "epoch": 0.46106341316463323, "eta": "193:01:30", "grad_norm": 0.0068, "loss": 0.0686, "lr": "4.908e-05", "step": 3061, "steps": "23.06s,3061/33195" }, { "epoch": 0.4612140382587739, "eta": "193:51:20", "grad_norm": 0.0063, "loss": 0.0649, "lr": "4.908e-05", "step": 3062, "steps": "23.16s,3062/33195" }, { "epoch": 0.4613646633529146, "eta": "193:25:50", "grad_norm": 0.0096, "loss": 0.0683, "lr": "4.908e-05", "step": 3063, "steps": "23.11s,3063/33195" }, { "epoch": 0.4615152884470553, "eta": "196:11:10", "grad_norm": 0.0081, "loss": 0.0567, "lr": "4.908e-05", "step": 3064, "steps": "23.44s,3064/33195" }, { "epoch": 0.46166591354119596, "eta": "194:45:25", "grad_norm": 0.008, "loss": 0.0492, "lr": "4.908e-05", "step": 3065, "steps": "23.27s,3065/33195" }, { "epoch": 0.46181653863533667, "eta": "193:24:41", "grad_norm": 0.0074, "loss": 0.0693, "lr": "4.908e-05", "step": 3066, "steps": "23.11s,3066/33195" }, { "epoch": 0.4619671637294773, "eta": "196:15:01", "grad_norm": 0.008, "loss": 0.0415, "lr": "4.908e-05", "step": 3067, "steps": "23.45s,3067/33195" }, { "epoch": 0.46211778882361804, "eta": "193:18:53", "grad_norm": 0.0087, "loss": 0.0612, "lr": "4.908e-05", "step": 3068, "steps": "23.1s,3068/33195" }, { "epoch": 0.4622684139177587, "eta": "192:33:19", "grad_norm": 0.0083, "loss": 0.0394, "lr": "4.908e-05", "step": 3069, "steps": "23.01s,3069/33195" }, { "epoch": 0.4624190390118994, "eta": "192:58:02", "grad_norm": 0.0085, "loss": 0.0427, "lr": "4.908e-05", "step": 3070, "steps": "23.06s,3070/33195" }, { "epoch": 0.46256966410604006, "eta": "196:13:27", "grad_norm": 0.0072, "loss": 0.0382, "lr": "4.908e-05", "step": 3071, "steps": "23.45s,3071/33195" }, { "epoch": 0.46272028920018077, "eta": "193:07:18", "grad_norm": 0.0065, "loss": 0.0211, "lr": "4.908e-05", "step": 3072, "steps": "23.08s,3072/33195" }, { "epoch": 0.4628709142943214, "eta": "196:27:44", "grad_norm": 0.007, "loss": 0.0401, "lr": "4.908e-05", "step": 3073, "steps": "23.48s,3073/33195" }, { "epoch": 0.46302153938846213, "eta": "196:17:18", "grad_norm": 0.0082, "loss": 0.057, "lr": "4.908e-05", "step": 3074, "steps": "23.46s,3074/33195" }, { "epoch": 0.4631721644826028, "eta": "196:31:58", "grad_norm": 0.007, "loss": 0.0559, "lr": "4.907e-05", "step": 3075, "steps": "23.49s,3075/33195" }, { "epoch": 0.4633227895767435, "eta": "193:00:45", "grad_norm": 0.0073, "loss": 0.0412, "lr": "4.907e-05", "step": 3076, "steps": "23.07s,3076/33195" }, { "epoch": 0.46347341467088415, "eta": "195:35:58", "grad_norm": 0.0073, "loss": 0.0407, "lr": "4.907e-05", "step": 3077, "steps": "23.38s,3077/33195" }, { "epoch": 0.46362403976502486, "eta": "195:30:34", "grad_norm": 0.0074, "loss": 0.0504, "lr": "4.907e-05", "step": 3078, "steps": "23.37s,3078/33195" }, { "epoch": 0.4637746648591655, "eta": "193:09:38", "grad_norm": 0.009, "loss": 0.0703, "lr": "4.907e-05", "step": 3079, "steps": "23.09s,3079/33195" }, { "epoch": 0.46392528995330623, "eta": "193:09:15", "grad_norm": 0.0098, "loss": 0.0365, "lr": "4.907e-05", "step": 3080, "steps": "23.09s,3080/33195" }, { "epoch": 0.4640759150474469, "eta": "195:19:21", "grad_norm": 0.0067, "loss": 0.0655, "lr": "4.907e-05", "step": 3081, "steps": "23.35s,3081/33195" }, { "epoch": 0.4642265401415876, "eta": "193:18:31", "grad_norm": 0.0063, "loss": 0.0413, "lr": "4.907e-05", "step": 3082, "steps": "23.11s,3082/33195" }, { "epoch": 0.46437716523572825, "eta": "192:48:01", "grad_norm": 0.0068, "loss": 0.0505, "lr": "4.907e-05", "step": 3083, "steps": "23.05s,3083/33195" }, { "epoch": 0.46452779032986896, "eta": "195:43:17", "grad_norm": 0.0077, "loss": 0.0556, "lr": "4.907e-05", "step": 3084, "steps": "23.4s,3084/33195" }, { "epoch": 0.4646784154240096, "eta": "195:52:56", "grad_norm": 0.0084, "loss": 0.0555, "lr": "4.907e-05", "step": 3085, "steps": "23.42s,3085/33195" }, { "epoch": 0.4648290405181503, "eta": "192:51:53", "grad_norm": 0.0071, "loss": 0.0618, "lr": "4.907e-05", "step": 3086, "steps": "23.06s,3086/33195" }, { "epoch": 0.464979665612291, "eta": "195:17:01", "grad_norm": 0.008, "loss": 0.0488, "lr": "4.907e-05", "step": 3087, "steps": "23.35s,3087/33195" }, { "epoch": 0.4651302907064317, "eta": "194:51:33", "grad_norm": 0.0056, "loss": 0.0625, "lr": "4.907e-05", "step": 3088, "steps": "23.3s,3088/33195" }, { "epoch": 0.46528091580057235, "eta": "193:55:58", "grad_norm": 0.0076, "loss": 0.0462, "lr": "4.907e-05", "step": 3089, "steps": "23.19s,3089/33195" }, { "epoch": 0.46543154089471306, "eta": "193:35:30", "grad_norm": 0.0083, "loss": 0.0548, "lr": "4.906e-05", "step": 3090, "steps": "23.15s,3090/33195" }, { "epoch": 0.46558216598885377, "eta": "193:00:00", "grad_norm": 0.0064, "loss": 0.0537, "lr": "4.906e-05", "step": 3091, "steps": "23.08s,3091/33195" }, { "epoch": 0.4657327910829944, "eta": "193:19:41", "grad_norm": 0.0063, "loss": 0.0574, "lr": "4.906e-05", "step": 3092, "steps": "23.12s,3092/33195" }, { "epoch": 0.46588341617713513, "eta": "193:34:21", "grad_norm": 0.0082, "loss": 0.0744, "lr": "4.906e-05", "step": 3093, "steps": "23.15s,3093/33195" }, { "epoch": 0.4660340412712758, "eta": "192:58:51", "grad_norm": 0.0071, "loss": 0.0448, "lr": "4.906e-05", "step": 3094, "steps": "23.08s,3094/33195" }, { "epoch": 0.4661846663654165, "eta": "193:48:38", "grad_norm": 0.0077, "loss": 0.0406, "lr": "4.906e-05", "step": 3095, "steps": "23.18s,3095/33195" }, { "epoch": 0.46633529145955716, "eta": "192:48:02", "grad_norm": 0.0096, "loss": 0.0579, "lr": "4.906e-05", "step": 3096, "steps": "23.06s,3096/33195" }, { "epoch": 0.46648591655369787, "eta": "192:07:32", "grad_norm": 0.0076, "loss": 0.0786, "lr": "4.906e-05", "step": 3097, "steps": "22.98s,3097/33195" }, { "epoch": 0.4666365416478385, "eta": "192:57:18", "grad_norm": 0.0077, "loss": 0.0667, "lr": "4.906e-05", "step": 3098, "steps": "23.08s,3098/33195" }, { "epoch": 0.46678716674197923, "eta": "195:32:25", "grad_norm": 0.0062, "loss": 0.0366, "lr": "4.906e-05", "step": 3099, "steps": "23.39s,3099/33195" }, { "epoch": 0.4669377918361199, "eta": "195:32:02", "grad_norm": 0.0074, "loss": 0.0731, "lr": "4.906e-05", "step": 3100, "steps": "23.39s,3100/33195" }, { "epoch": 0.4670884169302606, "eta": "192:16:02", "grad_norm": 0.013, "loss": 0.0297, "lr": "4.906e-05", "step": 3101, "steps": "23.0s,3101/33195" }, { "epoch": 0.46723904202440125, "eta": "191:55:35", "grad_norm": 0.0064, "loss": 0.05, "lr": "4.906e-05", "step": 3102, "steps": "22.96s,3102/33195" }, { "epoch": 0.46738966711854196, "eta": "193:40:31", "grad_norm": 0.0065, "loss": 0.0816, "lr": "4.906e-05", "step": 3103, "steps": "23.17s,3103/33195" }, { "epoch": 0.4675402922126826, "eta": "190:19:32", "grad_norm": 0.0058, "loss": 0.0911, "lr": "4.906e-05", "step": 3104, "steps": "22.77s,3104/33195" }, { "epoch": 0.46769091730682333, "eta": "193:09:39", "grad_norm": 0.0063, "loss": 0.0507, "lr": "4.906e-05", "step": 3105, "steps": "23.11s,3105/33195" }, { "epoch": 0.467841542400964, "eta": "193:49:23", "grad_norm": 0.0064, "loss": 0.0378, "lr": "4.905e-05", "step": 3106, "steps": "23.19s,3106/33195" }, { "epoch": 0.4679921674951047, "eta": "192:43:49", "grad_norm": 0.0075, "loss": 0.0468, "lr": "4.905e-05", "step": 3107, "steps": "23.06s,3107/33195" }, { "epoch": 0.46814279258924535, "eta": "193:33:34", "grad_norm": 0.0092, "loss": 0.0539, "lr": "4.905e-05", "step": 3108, "steps": "23.16s,3108/33195" }, { "epoch": 0.46829341768338606, "eta": "195:43:34", "grad_norm": 0.0065, "loss": 0.0569, "lr": "4.905e-05", "step": 3109, "steps": "23.42s,3109/33195" }, { "epoch": 0.4684440427775267, "eta": "190:32:18", "grad_norm": 0.0072, "loss": 0.0713, "lr": "4.905e-05", "step": 3110, "steps": "22.8s,3110/33195" }, { "epoch": 0.4685946678716674, "eta": "195:37:46", "grad_norm": 0.0071, "loss": 0.0592, "lr": "4.905e-05", "step": 3111, "steps": "23.41s,3111/33195" }, { "epoch": 0.4687452929658081, "eta": "193:16:59", "grad_norm": 0.0074, "loss": 0.0557, "lr": "4.905e-05", "step": 3112, "steps": "23.13s,3112/33195" }, { "epoch": 0.4688959180599488, "eta": "195:31:58", "grad_norm": 0.0077, "loss": 0.0544, "lr": "4.905e-05", "step": 3113, "steps": "23.4s,3113/33195" }, { "epoch": 0.46904654315408945, "eta": "193:01:11", "grad_norm": 0.0067, "loss": 0.0522, "lr": "4.905e-05", "step": 3114, "steps": "23.1s,3114/33195" }, { "epoch": 0.46919716824823016, "eta": "190:35:24", "grad_norm": 0.0101, "loss": 0.0442, "lr": "4.905e-05", "step": 3115, "steps": "22.81s,3115/33195" }, { "epoch": 0.4693477933423708, "eta": "193:15:27", "grad_norm": 0.0082, "loss": 0.0612, "lr": "4.905e-05", "step": 3116, "steps": "23.13s,3116/33195" }, { "epoch": 0.4694984184365115, "eta": "190:19:36", "grad_norm": 0.0078, "loss": 0.0269, "lr": "4.905e-05", "step": 3117, "steps": "22.78s,3117/33195" }, { "epoch": 0.46964904353065223, "eta": "195:25:01", "grad_norm": 0.0075, "loss": 0.0299, "lr": "4.905e-05", "step": 3118, "steps": "23.39s,3118/33195" }, { "epoch": 0.4697996686247929, "eta": "193:04:16", "grad_norm": 0.0096, "loss": 0.0503, "lr": "4.905e-05", "step": 3119, "steps": "23.11s,3119/33195" }, { "epoch": 0.4699502937189336, "eta": "195:29:15", "grad_norm": 0.0084, "loss": 0.0524, "lr": "4.905e-05", "step": 3120, "steps": "23.4s,3120/33195" }, { "epoch": 0.47010091881307425, "eta": "190:18:05", "grad_norm": 0.0064, "loss": 0.0382, "lr": "4.904e-05", "step": 3121, "steps": "22.78s,3121/33195" }, { "epoch": 0.47025154390721496, "eta": "195:53:31", "grad_norm": 0.0079, "loss": 0.0511, "lr": "4.904e-05", "step": 3122, "steps": "23.45s,3122/33195" }, { "epoch": 0.4704021690013556, "eta": "196:38:14", "grad_norm": 0.007, "loss": 0.0427, "lr": "4.904e-05", "step": 3123, "steps": "23.54s,3123/33195" }, { "epoch": 0.47055279409549633, "eta": "192:52:19", "grad_norm": 0.0064, "loss": 0.0285, "lr": "4.904e-05", "step": 3124, "steps": "23.09s,3124/33195" }, { "epoch": 0.470703419189637, "eta": "190:41:38", "grad_norm": 0.0066, "loss": 0.0539, "lr": "4.904e-05", "step": 3125, "steps": "22.83s,3125/33195" }, { "epoch": 0.4708540442837777, "eta": "192:46:32", "grad_norm": 0.0069, "loss": 0.0396, "lr": "4.904e-05", "step": 3126, "steps": "23.08s,3126/33195" }, { "epoch": 0.47100466937791835, "eta": "192:31:07", "grad_norm": 0.006, "loss": 0.0318, "lr": "4.904e-05", "step": 3127, "steps": "23.05s,3127/33195" }, { "epoch": 0.47115529447205906, "eta": "192:15:42", "grad_norm": 0.0085, "loss": 0.0562, "lr": "4.904e-05", "step": 3128, "steps": "23.02s,3128/33195" }, { "epoch": 0.4713059195661997, "eta": "195:40:46", "grad_norm": 0.0098, "loss": 0.0582, "lr": "4.904e-05", "step": 3129, "steps": "23.43s,3129/33195" }, { "epoch": 0.4714565446603404, "eta": "195:45:23", "grad_norm": 0.0075, "loss": 0.0687, "lr": "4.904e-05", "step": 3130, "steps": "23.44s,3130/33195" }, { "epoch": 0.4716071697544811, "eta": "192:44:37", "grad_norm": 0.0064, "loss": 0.0567, "lr": "4.904e-05", "step": 3131, "steps": "23.08s,3131/33195" }, { "epoch": 0.4717577948486218, "eta": "198:50:00", "grad_norm": 0.0075, "loss": 0.0245, "lr": "4.904e-05", "step": 3132, "steps": "23.81s,3132/33195" }, { "epoch": 0.47190841994276245, "eta": "195:14:09", "grad_norm": 0.0081, "loss": 0.0633, "lr": "4.904e-05", "step": 3133, "steps": "23.38s,3133/33195" }, { "epoch": 0.47205904503690316, "eta": "193:03:30", "grad_norm": 0.0096, "loss": 0.0594, "lr": "4.904e-05", "step": 3134, "steps": "23.12s,3134/33195" }, { "epoch": 0.4722096701310438, "eta": "202:59:18", "grad_norm": 0.0066, "loss": 0.0346, "lr": "4.904e-05", "step": 3135, "steps": "24.31s,3135/33195" }, { "epoch": 0.4723602952251845, "eta": "246:24:01", "grad_norm": 0.0087, "loss": 0.0327, "lr": "4.904e-05", "step": 3136, "steps": "29.51s,3136/33195" }, { "epoch": 0.4725109203193252, "eta": "197:27:51", "grad_norm": 0.008, "loss": 0.0653, "lr": "4.903e-05", "step": 3137, "steps": "23.65s,3137/33195" }, { "epoch": 0.4726615454134659, "eta": "195:37:15", "grad_norm": 0.0078, "loss": 0.0517, "lr": "4.903e-05", "step": 3138, "steps": "23.43s,3138/33195" }, { "epoch": 0.47281217050760654, "eta": "197:17:03", "grad_norm": 0.0098, "loss": 0.0348, "lr": "4.903e-05", "step": 3139, "steps": "23.63s,3139/33195" }, { "epoch": 0.47296279560174725, "eta": "194:41:22", "grad_norm": 0.0081, "loss": 0.0493, "lr": "4.903e-05", "step": 3140, "steps": "23.32s,3140/33195" }, { "epoch": 0.4731134206958879, "eta": "196:06:08", "grad_norm": 0.0062, "loss": 0.0631, "lr": "4.903e-05", "step": 3141, "steps": "23.49s,3141/33195" }, { "epoch": 0.4732640457900286, "eta": "192:25:21", "grad_norm": 0.0079, "loss": 0.0473, "lr": "4.903e-05", "step": 3142, "steps": "23.05s,3142/33195" }, { "epoch": 0.4734146708841693, "eta": "194:50:13", "grad_norm": 0.0067, "loss": 0.0503, "lr": "4.903e-05", "step": 3143, "steps": "23.34s,3143/33195" }, { "epoch": 0.47356529597831, "eta": "193:59:45", "grad_norm": 0.0056, "loss": 0.0466, "lr": "4.903e-05", "step": 3144, "steps": "23.24s,3144/33195" }, { "epoch": 0.4737159210724507, "eta": "192:59:16", "grad_norm": 0.0085, "loss": 0.0449, "lr": "4.903e-05", "step": 3145, "steps": "23.12s,3145/33195" }, { "epoch": 0.47386654616659135, "eta": "195:14:06", "grad_norm": 0.0075, "loss": 0.0581, "lr": "4.903e-05", "step": 3146, "steps": "23.39s,3146/33195" }, { "epoch": 0.47401717126073206, "eta": "194:43:39", "grad_norm": 0.0065, "loss": 0.0633, "lr": "4.903e-05", "step": 3147, "steps": "23.33s,3147/33195" }, { "epoch": 0.4741677963548727, "eta": "194:48:16", "grad_norm": 0.0064, "loss": 0.0769, "lr": "4.903e-05", "step": 3148, "steps": "23.34s,3148/33195" }, { "epoch": 0.4743184214490134, "eta": "195:52:59", "grad_norm": 0.0094, "loss": 0.0371, "lr": "4.903e-05", "step": 3149, "steps": "23.47s,3149/33195" }, { "epoch": 0.4744690465431541, "eta": "192:37:18", "grad_norm": 0.0092, "loss": 0.0889, "lr": "4.903e-05", "step": 3150, "steps": "23.08s,3150/33195" }, { "epoch": 0.4746196716372948, "eta": "195:27:10", "grad_norm": 0.0068, "loss": 0.0414, "lr": "4.903e-05", "step": 3151, "steps": "23.42s,3151/33195" }, { "epoch": 0.47477029673143545, "eta": "197:41:58", "grad_norm": 0.0059, "loss": 0.0677, "lr": "4.902e-05", "step": 3152, "steps": "23.69s,3152/33195" }, { "epoch": 0.47492092182557616, "eta": "193:21:13", "grad_norm": 0.0077, "loss": 0.0399, "lr": "4.902e-05", "step": 3153, "steps": "23.17s,3153/33195" }, { "epoch": 0.4750715469197168, "eta": "193:30:50", "grad_norm": 0.0072, "loss": 0.0628, "lr": "4.902e-05", "step": 3154, "steps": "23.19s,3154/33195" }, { "epoch": 0.4752221720138575, "eta": "193:45:28", "grad_norm": 0.0077, "loss": 0.0566, "lr": "4.902e-05", "step": 3155, "steps": "23.22s,3155/33195" }, { "epoch": 0.4753727971079982, "eta": "195:30:13", "grad_norm": 0.0081, "loss": 0.0728, "lr": "4.902e-05", "step": 3156, "steps": "23.43s,3156/33195" }, { "epoch": 0.4755234222021389, "eta": "193:24:40", "grad_norm": 0.0079, "loss": 0.06, "lr": "4.902e-05", "step": 3157, "steps": "23.18s,3157/33195" }, { "epoch": 0.47567404729627955, "eta": "193:04:16", "grad_norm": 0.0061, "loss": 0.0374, "lr": "4.902e-05", "step": 3158, "steps": "23.14s,3158/33195" }, { "epoch": 0.47582467239042026, "eta": "192:23:50", "grad_norm": 0.0075, "loss": 0.0501, "lr": "4.902e-05", "step": 3159, "steps": "23.06s,3159/33195" }, { "epoch": 0.4759752974845609, "eta": "192:13:26", "grad_norm": 0.0074, "loss": 0.0786, "lr": "4.902e-05", "step": 3160, "steps": "23.04s,3160/33195" }, { "epoch": 0.4761259225787016, "eta": "193:33:08", "grad_norm": 0.0074, "loss": 0.0809, "lr": "4.902e-05", "step": 3161, "steps": "23.2s,3161/33195" }, { "epoch": 0.4762765476728423, "eta": "195:22:52", "grad_norm": 0.0066, "loss": 0.0494, "lr": "4.902e-05", "step": 3162, "steps": "23.42s,3162/33195" }, { "epoch": 0.476427172766983, "eta": "192:42:19", "grad_norm": 0.0072, "loss": 0.0519, "lr": "4.902e-05", "step": 3163, "steps": "23.1s,3163/33195" }, { "epoch": 0.47657779786112364, "eta": "194:17:01", "grad_norm": 0.0085, "loss": 0.0453, "lr": "4.902e-05", "step": 3164, "steps": "23.29s,3164/33195" }, { "epoch": 0.47672842295526435, "eta": "196:01:45", "grad_norm": 0.006, "loss": 0.05, "lr": "4.902e-05", "step": 3165, "steps": "23.5s,3165/33195" }, { "epoch": 0.476879048049405, "eta": "192:26:09", "grad_norm": 0.0061, "loss": 0.0554, "lr": "4.902e-05", "step": 3166, "steps": "23.07s,3166/33195" }, { "epoch": 0.4770296731435457, "eta": "192:35:46", "grad_norm": 0.0057, "loss": 0.0617, "lr": "4.901e-05", "step": 3167, "steps": "23.09s,3167/33195" }, { "epoch": 0.4771802982376864, "eta": "192:40:23", "grad_norm": 0.0132, "loss": 0.0621, "lr": "4.901e-05", "step": 3168, "steps": "23.1s,3168/33195" }, { "epoch": 0.4773309233318271, "eta": "192:40:00", "grad_norm": 0.006, "loss": 0.0554, "lr": "4.901e-05", "step": 3169, "steps": "23.1s,3169/33195" }, { "epoch": 0.47748154842596774, "eta": "192:39:37", "grad_norm": 0.0078, "loss": 0.0606, "lr": "4.901e-05", "step": 3170, "steps": "23.1s,3170/33195" }, { "epoch": 0.47763217352010845, "eta": "195:04:21", "grad_norm": 0.0063, "loss": 0.0323, "lr": "4.901e-05", "step": 3171, "steps": "23.39s,3171/33195" }, { "epoch": 0.47778279861424916, "eta": "195:28:59", "grad_norm": 0.006, "loss": 0.0696, "lr": "4.901e-05", "step": 3172, "steps": "23.44s,3172/33195" }, { "epoch": 0.4779334237083898, "eta": "192:43:28", "grad_norm": 0.0059, "loss": 0.0411, "lr": "4.901e-05", "step": 3173, "steps": "23.11s,3173/33195" }, { "epoch": 0.4780840488025305, "eta": "195:23:12", "grad_norm": 0.0082, "loss": 0.043, "lr": "4.901e-05", "step": 3174, "steps": "23.43s,3174/33195" }, { "epoch": 0.4782346738966712, "eta": "195:32:49", "grad_norm": 0.0078, "loss": 0.049, "lr": "4.901e-05", "step": 3175, "steps": "23.45s,3175/33195" }, { "epoch": 0.4783852989908119, "eta": "192:42:19", "grad_norm": 0.0067, "loss": 0.0455, "lr": "4.901e-05", "step": 3176, "steps": "23.11s,3176/33195" }, { "epoch": 0.47853592408495255, "eta": "190:26:51", "grad_norm": 0.0058, "loss": 0.0689, "lr": "4.901e-05", "step": 3177, "steps": "22.84s,3177/33195" }, { "epoch": 0.47868654917909326, "eta": "195:31:38", "grad_norm": 0.0059, "loss": 0.0466, "lr": "4.901e-05", "step": 3178, "steps": "23.45s,3178/33195" }, { "epoch": 0.4788371742732339, "eta": "190:51:06", "grad_norm": 0.0081, "loss": 0.0359, "lr": "4.901e-05", "step": 3179, "steps": "22.89s,3179/33195" }, { "epoch": 0.4789877993673746, "eta": "192:35:46", "grad_norm": 0.0061, "loss": 0.0534, "lr": "4.901e-05", "step": 3180, "steps": "23.1s,3180/33195" }, { "epoch": 0.4791384244615153, "eta": "192:35:23", "grad_norm": 0.0072, "loss": 0.0506, "lr": "4.901e-05", "step": 3181, "steps": "23.1s,3181/33195" }, { "epoch": 0.479289049555656, "eta": "190:04:56", "grad_norm": 0.0073, "loss": 0.0514, "lr": "4.900e-05", "step": 3182, "steps": "22.8s,3182/33195" }, { "epoch": 0.47943967464979664, "eta": "193:59:39", "grad_norm": 0.0071, "loss": 0.0407, "lr": "4.900e-05", "step": 3183, "steps": "23.27s,3183/33195" }, { "epoch": 0.47959029974393735, "eta": "195:24:17", "grad_norm": 0.0057, "loss": 0.0416, "lr": "4.900e-05", "step": 3184, "steps": "23.44s,3184/33195" }, { "epoch": 0.479740924838078, "eta": "190:48:48", "grad_norm": 0.007, "loss": 0.0492, "lr": "4.900e-05", "step": 3185, "steps": "22.89s,3185/33195" }, { "epoch": 0.4798915499322187, "eta": "193:18:28", "grad_norm": 0.0247, "loss": 0.0754, "lr": "4.900e-05", "step": 3186, "steps": "23.19s,3186/33195" }, { "epoch": 0.4800421750263594, "eta": "191:58:04", "grad_norm": 0.006, "loss": 0.0584, "lr": "4.900e-05", "step": 3187, "steps": "23.03s,3187/33195" }, { "epoch": 0.4801928001205001, "eta": "193:07:42", "grad_norm": 0.0065, "loss": 0.0435, "lr": "4.900e-05", "step": 3188, "steps": "23.17s,3188/33195" }, { "epoch": 0.48034342521464074, "eta": "193:27:19", "grad_norm": 0.0076, "loss": 0.0515, "lr": "4.900e-05", "step": 3189, "steps": "23.21s,3189/33195" }, { "epoch": 0.48049405030878145, "eta": "193:21:56", "grad_norm": 0.0083, "loss": 0.0502, "lr": "4.900e-05", "step": 3190, "steps": "23.2s,3190/33195" }, { "epoch": 0.4806446754029221, "eta": "195:51:34", "grad_norm": 0.0058, "loss": 0.0487, "lr": "4.900e-05", "step": 3191, "steps": "23.5s,3191/33195" }, { "epoch": 0.4807953004970628, "eta": "194:16:09", "grad_norm": 0.0102, "loss": 0.0581, "lr": "4.900e-05", "step": 3192, "steps": "23.31s,3192/33195" }, { "epoch": 0.48094592559120347, "eta": "192:15:46", "grad_norm": 0.0064, "loss": 0.0584, "lr": "4.900e-05", "step": 3193, "steps": "23.07s,3193/33195" }, { "epoch": 0.4810965506853442, "eta": "193:20:23", "grad_norm": 0.0084, "loss": 0.0474, "lr": "4.900e-05", "step": 3194, "steps": "23.2s,3194/33195" }, { "epoch": 0.48124717577948484, "eta": "194:15:00", "grad_norm": 0.0065, "loss": 0.0618, "lr": "4.900e-05", "step": 3195, "steps": "23.31s,3195/33195" }, { "epoch": 0.48139780087362555, "eta": "192:24:36", "grad_norm": 0.0106, "loss": 0.0827, "lr": "4.900e-05", "step": 3196, "steps": "23.09s,3196/33195" }, { "epoch": 0.4815484259677662, "eta": "194:24:13", "grad_norm": 0.0082, "loss": 0.0405, "lr": "4.899e-05", "step": 3197, "steps": "23.33s,3197/33195" }, { "epoch": 0.4816990510619069, "eta": "192:38:50", "grad_norm": 0.0077, "loss": 0.0355, "lr": "4.899e-05", "step": 3198, "steps": "23.12s,3198/33195" }, { "epoch": 0.4818496761560476, "eta": "193:13:27", "grad_norm": 0.0077, "loss": 0.0668, "lr": "4.899e-05", "step": 3199, "steps": "23.19s,3199/33195" }, { "epoch": 0.4820003012501883, "eta": "192:58:04", "grad_norm": 0.0087, "loss": 0.0627, "lr": "4.899e-05", "step": 3200, "steps": "23.16s,3200/33195" }, { "epoch": 0.482150926344329, "eta": "411:45:03", "grad_norm": 0.0063, "loss": 0.0396, "lr": "4.899e-05", "step": 3201, "steps": "49.42s,3201/33195" }, { "epoch": 0.48230155143846964, "eta": "193:37:17", "grad_norm": 0.0065, "loss": 0.0426, "lr": "4.899e-05", "step": 3202, "steps": "23.24s,3202/33195" }, { "epoch": 0.48245217653261036, "eta": "192:31:55", "grad_norm": 0.0095, "loss": 0.0618, "lr": "4.899e-05", "step": 3203, "steps": "23.11s,3203/33195" }, { "epoch": 0.482602801626751, "eta": "192:51:31", "grad_norm": 0.0065, "loss": 0.0449, "lr": "4.899e-05", "step": 3204, "steps": "23.15s,3204/33195" }, { "epoch": 0.4827534267208917, "eta": "196:16:04", "grad_norm": 0.008, "loss": 0.0555, "lr": "4.899e-05", "step": 3205, "steps": "23.56s,3205/33195" }, { "epoch": 0.4829040518150324, "eta": "195:20:42", "grad_norm": 0.0079, "loss": 0.0705, "lr": "4.899e-05", "step": 3206, "steps": "23.45s,3206/33195" }, { "epoch": 0.4830546769091731, "eta": "195:10:18", "grad_norm": 0.0082, "loss": 0.0408, "lr": "4.899e-05", "step": 3207, "steps": "23.43s,3207/33195" }, { "epoch": 0.48320530200331374, "eta": "193:04:58", "grad_norm": 0.0073, "loss": 0.0565, "lr": "4.899e-05", "step": 3208, "steps": "23.18s,3208/33195" }, { "epoch": 0.48335592709745445, "eta": "193:39:34", "grad_norm": 0.0091, "loss": 0.0456, "lr": "4.899e-05", "step": 3209, "steps": "23.25s,3209/33195" }, { "epoch": 0.4835065521915951, "eta": "193:24:11", "grad_norm": 0.0084, "loss": 0.059, "lr": "4.899e-05", "step": 3210, "steps": "23.22s,3210/33195" }, { "epoch": 0.4836571772857358, "eta": "192:18:50", "grad_norm": 0.0089, "loss": 0.0264, "lr": "4.899e-05", "step": 3211, "steps": "23.09s,3211/33195" }, { "epoch": 0.4838078023798765, "eta": "191:53:28", "grad_norm": 0.0061, "loss": 0.0568, "lr": "4.898e-05", "step": 3212, "steps": "23.04s,3212/33195" }, { "epoch": 0.4839584274740172, "eta": "192:28:04", "grad_norm": 0.0081, "loss": 0.0696, "lr": "4.898e-05", "step": 3213, "steps": "23.11s,3213/33195" }, { "epoch": 0.48410905256815784, "eta": "191:37:42", "grad_norm": 0.0066, "loss": 0.05, "lr": "4.898e-05", "step": 3214, "steps": "23.01s,3214/33195" }, { "epoch": 0.48425967766229855, "eta": "192:12:18", "grad_norm": 0.0068, "loss": 0.0324, "lr": "4.898e-05", "step": 3215, "steps": "23.08s,3215/33195" }, { "epoch": 0.4844103027564392, "eta": "194:11:50", "grad_norm": 0.0069, "loss": 0.0498, "lr": "4.898e-05", "step": 3216, "steps": "23.32s,3216/33195" }, { "epoch": 0.4845609278505799, "eta": "192:46:30", "grad_norm": 0.0095, "loss": 0.0298, "lr": "4.898e-05", "step": 3217, "steps": "23.15s,3217/33195" }, { "epoch": 0.48471155294472057, "eta": "193:01:06", "grad_norm": 0.0064, "loss": 0.079, "lr": "4.898e-05", "step": 3218, "steps": "23.18s,3218/33195" }, { "epoch": 0.4848621780388613, "eta": "192:15:45", "grad_norm": 0.0097, "loss": 0.0652, "lr": "4.898e-05", "step": 3219, "steps": "23.09s,3219/33195" }, { "epoch": 0.48501280313300194, "eta": "192:20:22", "grad_norm": 0.007, "loss": 0.0629, "lr": "4.898e-05", "step": 3220, "steps": "23.1s,3220/33195" }, { "epoch": 0.48516342822714265, "eta": "195:34:49", "grad_norm": 0.0071, "loss": 0.0314, "lr": "4.898e-05", "step": 3221, "steps": "23.49s,3221/33195" }, { "epoch": 0.4853140533212833, "eta": "192:19:36", "grad_norm": 0.0086, "loss": 0.0468, "lr": "4.898e-05", "step": 3222, "steps": "23.1s,3222/33195" }, { "epoch": 0.485464678415424, "eta": "195:19:03", "grad_norm": 0.009, "loss": 0.0571, "lr": "4.898e-05", "step": 3223, "steps": "23.46s,3223/33195" }, { "epoch": 0.48561530350956467, "eta": "192:18:50", "grad_norm": 0.0084, "loss": 0.0348, "lr": "4.898e-05", "step": 3224, "steps": "23.1s,3224/33195" }, { "epoch": 0.4857659286037054, "eta": "191:28:30", "grad_norm": 0.0089, "loss": 0.0562, "lr": "4.898e-05", "step": 3225, "steps": "23.0s,3225/33195" }, { "epoch": 0.4859165536978461, "eta": "194:47:54", "grad_norm": 0.0061, "loss": 0.056, "lr": "4.898e-05", "step": 3226, "steps": "23.4s,3226/33195" }, { "epoch": 0.48606717879198674, "eta": "194:42:31", "grad_norm": 0.007, "loss": 0.0698, "lr": "4.897e-05", "step": 3227, "steps": "23.39s,3227/33195" }, { "epoch": 0.48621780388612745, "eta": "194:47:07", "grad_norm": 0.009, "loss": 0.0756, "lr": "4.897e-05", "step": 3228, "steps": "23.4s,3228/33195" }, { "epoch": 0.4863684289802681, "eta": "194:21:46", "grad_norm": 0.0065, "loss": 0.0435, "lr": "4.897e-05", "step": 3229, "steps": "23.35s,3229/33195" }, { "epoch": 0.4865190540744088, "eta": "192:41:29", "grad_norm": 0.0072, "loss": 0.0609, "lr": "4.897e-05", "step": 3230, "steps": "23.15s,3230/33195" }, { "epoch": 0.4866696791685495, "eta": "192:46:06", "grad_norm": 0.007, "loss": 0.0373, "lr": "4.897e-05", "step": 3231, "steps": "23.16s,3231/33195" }, { "epoch": 0.4868203042626902, "eta": "189:25:57", "grad_norm": 0.0126, "loss": 0.0624, "lr": "4.897e-05", "step": 3232, "steps": "22.76s,3232/33195" }, { "epoch": 0.48697092935683084, "eta": "194:20:12", "grad_norm": 0.0079, "loss": 0.0733, "lr": "4.897e-05", "step": 3233, "steps": "23.35s,3233/33195" }, { "epoch": 0.48712155445097155, "eta": "194:34:48", "grad_norm": 0.0106, "loss": 0.0486, "lr": "4.897e-05", "step": 3234, "steps": "23.38s,3234/33195" }, { "epoch": 0.4872721795451122, "eta": "192:24:35", "grad_norm": 0.0057, "loss": 0.048, "lr": "4.897e-05", "step": 3235, "steps": "23.12s,3235/33195" }, { "epoch": 0.4874228046392529, "eta": "191:24:17", "grad_norm": 0.0061, "loss": 0.0484, "lr": "4.897e-05", "step": 3236, "steps": "23.0s,3236/33195" }, { "epoch": 0.48757342973339357, "eta": "195:03:35", "grad_norm": 0.0074, "loss": 0.051, "lr": "4.897e-05", "step": 3237, "steps": "23.44s,3237/33195" }, { "epoch": 0.4877240548275343, "eta": "192:08:27", "grad_norm": 0.0071, "loss": 0.0428, "lr": "4.897e-05", "step": 3238, "steps": "23.09s,3238/33195" }, { "epoch": 0.48787467992167494, "eta": "195:27:46", "grad_norm": 0.0073, "loss": 0.0711, "lr": "4.897e-05", "step": 3239, "steps": "23.49s,3239/33195" }, { "epoch": 0.48802530501581565, "eta": "195:37:22", "grad_norm": 0.0063, "loss": 0.0552, "lr": "4.897e-05", "step": 3240, "steps": "23.51s,3240/33195" }, { "epoch": 0.4881759301099563, "eta": "190:07:28", "grad_norm": 0.009, "loss": 0.043, "lr": "4.897e-05", "step": 3241, "steps": "22.85s,3241/33195" }, { "epoch": 0.488326555204097, "eta": "195:01:38", "grad_norm": 0.006, "loss": 0.059, "lr": "4.896e-05", "step": 3242, "steps": "23.44s,3242/33195" }, { "epoch": 0.48847718029823767, "eta": "195:01:14", "grad_norm": 0.007, "loss": 0.0554, "lr": "4.896e-05", "step": 3243, "steps": "23.44s,3243/33195" }, { "epoch": 0.4886278053923784, "eta": "195:15:49", "grad_norm": 0.0064, "loss": 0.0463, "lr": "4.896e-05", "step": 3244, "steps": "23.47s,3244/33195" }, { "epoch": 0.48877843048651903, "eta": "192:00:46", "grad_norm": 0.007, "loss": 0.0649, "lr": "4.896e-05", "step": 3245, "steps": "23.08s,3245/33195" }, { "epoch": 0.48892905558065974, "eta": "192:40:18", "grad_norm": 0.0079, "loss": 0.0378, "lr": "4.896e-05", "step": 3246, "steps": "23.16s,3246/33195" }, { "epoch": 0.4890796806748004, "eta": "191:50:00", "grad_norm": 0.0086, "loss": 0.051, "lr": "4.896e-05", "step": 3247, "steps": "23.06s,3247/33195" }, { "epoch": 0.4892303057689411, "eta": "193:29:27", "grad_norm": 0.0068, "loss": 0.0662, "lr": "4.896e-05", "step": 3248, "steps": "23.26s,3248/33195" }, { "epoch": 0.48938093086308176, "eta": "189:59:26", "grad_norm": 0.0236, "loss": 0.0481, "lr": "4.896e-05", "step": 3249, "steps": "22.84s,3249/33195" }, { "epoch": 0.4895315559572225, "eta": "192:18:48", "grad_norm": 0.0074, "loss": 0.0331, "lr": "4.896e-05", "step": 3250, "steps": "23.12s,3250/33195" }, { "epoch": 0.48968218105136313, "eta": "192:33:23", "grad_norm": 0.0076, "loss": 0.0412, "lr": "4.896e-05", "step": 3251, "steps": "23.15s,3251/33195" }, { "epoch": 0.48983280614550384, "eta": "192:33:00", "grad_norm": 0.007, "loss": 0.065, "lr": "4.896e-05", "step": 3252, "steps": "23.15s,3252/33195" }, { "epoch": 0.48998343123964455, "eta": "192:52:34", "grad_norm": 0.0067, "loss": 0.0586, "lr": "4.896e-05", "step": 3253, "steps": "23.19s,3253/33195" }, { "epoch": 0.4901340563337852, "eta": "195:41:51", "grad_norm": 0.0166, "loss": 0.0677, "lr": "4.896e-05", "step": 3254, "steps": "23.53s,3254/33195" }, { "epoch": 0.4902846814279259, "eta": "192:46:49", "grad_norm": 0.0064, "loss": 0.0332, "lr": "4.896e-05", "step": 3255, "steps": "23.18s,3255/33195" }, { "epoch": 0.49043530652206657, "eta": "192:21:29", "grad_norm": 0.0079, "loss": 0.0677, "lr": "4.896e-05", "step": 3256, "steps": "23.13s,3256/33195" }, { "epoch": 0.4905859316162073, "eta": "195:20:43", "grad_norm": 0.0067, "loss": 0.0486, "lr": "4.895e-05", "step": 3257, "steps": "23.49s,3257/33195" }, { "epoch": 0.49073655671034794, "eta": "195:30:18", "grad_norm": 0.0085, "loss": 0.0483, "lr": "4.895e-05", "step": 3258, "steps": "23.51s,3258/33195" }, { "epoch": 0.49088718180448865, "eta": "195:44:53", "grad_norm": 0.0068, "loss": 0.0528, "lr": "4.895e-05", "step": 3259, "steps": "23.54s,3259/33195" }, { "epoch": 0.4910378068986293, "eta": "195:19:33", "grad_norm": 0.0087, "loss": 0.0684, "lr": "4.895e-05", "step": 3260, "steps": "23.49s,3260/33195" }, { "epoch": 0.49118843199277, "eta": "189:34:55", "grad_norm": 0.0068, "loss": 0.0397, "lr": "4.895e-05", "step": 3261, "steps": "22.8s,3261/33195" }, { "epoch": 0.49133905708691067, "eta": "195:08:47", "grad_norm": 0.0101, "loss": 0.0377, "lr": "4.895e-05", "step": 3262, "steps": "23.47s,3262/33195" }, { "epoch": 0.4914896821810514, "eta": "192:28:45", "grad_norm": 0.0082, "loss": 0.0468, "lr": "4.895e-05", "step": 3263, "steps": "23.15s,3263/33195" }, { "epoch": 0.49164030727519203, "eta": "194:58:01", "grad_norm": 0.0067, "loss": 0.071, "lr": "4.895e-05", "step": 3264, "steps": "23.45s,3264/33195" }, { "epoch": 0.49179093236933275, "eta": "193:07:53", "grad_norm": 0.0065, "loss": 0.052, "lr": "4.895e-05", "step": 3265, "steps": "23.23s,3265/33195" }, { "epoch": 0.4919415574634734, "eta": "197:36:52", "grad_norm": 0.0062, "loss": 0.0562, "lr": "4.895e-05", "step": 3266, "steps": "23.77s,3266/33195" }, { "epoch": 0.4920921825576141, "eta": "194:16:57", "grad_norm": 0.0063, "loss": 0.0552, "lr": "4.895e-05", "step": 3267, "steps": "23.37s,3267/33195" }, { "epoch": 0.49224280765175477, "eta": "194:56:28", "grad_norm": 0.0104, "loss": 0.0609, "lr": "4.895e-05", "step": 3268, "steps": "23.45s,3268/33195" }, { "epoch": 0.4923934327458955, "eta": "192:06:29", "grad_norm": 0.0073, "loss": 0.0429, "lr": "4.895e-05", "step": 3269, "steps": "23.11s,3269/33195" }, { "epoch": 0.49254405784003613, "eta": "189:31:30", "grad_norm": 0.0075, "loss": 0.0404, "lr": "4.895e-05", "step": 3270, "steps": "22.8s,3270/33195" }, { "epoch": 0.49269468293417684, "eta": "192:25:40", "grad_norm": 0.0067, "loss": 0.0479, "lr": "4.894e-05", "step": 3271, "steps": "23.15s,3271/33195" }, { "epoch": 0.4928453080283175, "eta": "194:39:56", "grad_norm": 0.0061, "loss": 0.0465, "lr": "4.894e-05", "step": 3272, "steps": "23.42s,3272/33195" }, { "epoch": 0.4929959331224582, "eta": "192:14:55", "grad_norm": 0.0085, "loss": 0.0652, "lr": "4.894e-05", "step": 3273, "steps": "23.13s,3273/33195" }, { "epoch": 0.49314655821659886, "eta": "190:49:46", "grad_norm": 0.0064, "loss": 0.0422, "lr": "4.894e-05", "step": 3274, "steps": "22.96s,3274/33195" }, { "epoch": 0.4932971833107396, "eta": "191:09:20", "grad_norm": 0.007, "loss": 0.0553, "lr": "4.894e-05", "step": 3275, "steps": "23.0s,3275/33195" }, { "epoch": 0.49344780840488023, "eta": "192:53:39", "grad_norm": 0.0071, "loss": 0.0515, "lr": "4.894e-05", "step": 3276, "steps": "23.21s,3276/33195" }, { "epoch": 0.49359843349902094, "eta": "191:43:28", "grad_norm": 0.0059, "loss": 0.0497, "lr": "4.894e-05", "step": 3277, "steps": "23.07s,3277/33195" }, { "epoch": 0.4937490585931616, "eta": "192:22:58", "grad_norm": 0.0085, "loss": 0.0502, "lr": "4.894e-05", "step": 3278, "steps": "23.15s,3278/33195" }, { "epoch": 0.4938996836873023, "eta": "191:37:42", "grad_norm": 0.0076, "loss": 0.0438, "lr": "4.894e-05", "step": 3279, "steps": "23.06s,3279/33195" }, { "epoch": 0.494050308781443, "eta": "191:47:18", "grad_norm": 0.006, "loss": 0.0456, "lr": "4.894e-05", "step": 3280, "steps": "23.08s,3280/33195" }, { "epoch": 0.49420093387558367, "eta": "192:56:43", "grad_norm": 0.0072, "loss": 0.0406, "lr": "4.894e-05", "step": 3281, "steps": "23.22s,3281/33195" }, { "epoch": 0.4943515589697244, "eta": "191:36:33", "grad_norm": 0.0063, "loss": 0.0392, "lr": "4.894e-05", "step": 3282, "steps": "23.06s,3282/33195" }, { "epoch": 0.49450218406386504, "eta": "191:56:07", "grad_norm": 0.0074, "loss": 0.0531, "lr": "4.894e-05", "step": 3283, "steps": "23.1s,3283/33195" }, { "epoch": 0.49465280915800575, "eta": "194:50:12", "grad_norm": 0.0096, "loss": 0.0556, "lr": "4.894e-05", "step": 3284, "steps": "23.45s,3284/33195" }, { "epoch": 0.4948034342521464, "eta": "189:50:43", "grad_norm": 0.0098, "loss": 0.0448, "lr": "4.894e-05", "step": 3285, "steps": "22.85s,3285/33195" }, { "epoch": 0.4949540593462871, "eta": "192:09:55", "grad_norm": 0.0071, "loss": 0.0515, "lr": "4.893e-05", "step": 3286, "steps": "23.13s,3286/33195" }, { "epoch": 0.49510468444042777, "eta": "191:24:40", "grad_norm": 0.0057, "loss": 0.0567, "lr": "4.893e-05", "step": 3287, "steps": "23.04s,3287/33195" }, { "epoch": 0.4952553095345685, "eta": "194:33:41", "grad_norm": 0.006, "loss": 0.047, "lr": "4.893e-05", "step": 3288, "steps": "23.42s,3288/33195" }, { "epoch": 0.49540593462870913, "eta": "194:43:16", "grad_norm": 0.0068, "loss": 0.046, "lr": "4.893e-05", "step": 3289, "steps": "23.44s,3289/33195" }, { "epoch": 0.49555655972284984, "eta": "191:58:24", "grad_norm": 0.01, "loss": 0.0691, "lr": "4.893e-05", "step": 3290, "steps": "23.11s,3290/33195" }, { "epoch": 0.4957071848169905, "eta": "192:47:51", "grad_norm": 0.0101, "loss": 0.0445, "lr": "4.893e-05", "step": 3291, "steps": "23.21s,3291/33195" }, { "epoch": 0.4958578099111312, "eta": "192:07:36", "grad_norm": 0.0062, "loss": 0.0616, "lr": "4.893e-05", "step": 3292, "steps": "23.13s,3292/33195" }, { "epoch": 0.49600843500527186, "eta": "194:51:40", "grad_norm": 0.0072, "loss": 0.0732, "lr": "4.893e-05", "step": 3293, "steps": "23.46s,3293/33195" }, { "epoch": 0.4961590600994126, "eta": "192:21:47", "grad_norm": 0.0062, "loss": 0.0513, "lr": "4.893e-05", "step": 3294, "steps": "23.16s,3294/33195" }, { "epoch": 0.49630968519355323, "eta": "194:11:02", "grad_norm": 0.0068, "loss": 0.0463, "lr": "4.893e-05", "step": 3295, "steps": "23.38s,3295/33195" }, { "epoch": 0.49646031028769394, "eta": "191:56:05", "grad_norm": 0.0084, "loss": 0.0388, "lr": "4.893e-05", "step": 3296, "steps": "23.11s,3296/33195" }, { "epoch": 0.4966109353818346, "eta": "194:30:11", "grad_norm": 0.0065, "loss": 0.0499, "lr": "4.893e-05", "step": 3297, "steps": "23.42s,3297/33195" }, { "epoch": 0.4967615604759753, "eta": "192:15:15", "grad_norm": 0.0077, "loss": 0.075, "lr": "4.893e-05", "step": 3298, "steps": "23.15s,3298/33195" }, { "epoch": 0.49691218557011596, "eta": "194:44:21", "grad_norm": 0.0066, "loss": 0.0689, "lr": "4.893e-05", "step": 3299, "steps": "23.45s,3299/33195" }, { "epoch": 0.49706281066425667, "eta": "192:49:21", "grad_norm": 0.0093, "loss": 0.0309, "lr": "4.892e-05", "step": 3300, "steps": "23.22s,3300/33195" }, { "epoch": 0.4972134357583973, "eta": "191:54:10", "grad_norm": 0.0067, "loss": 0.0631, "lr": "4.892e-05", "step": 3301, "steps": "23.11s,3301/33195" }, { "epoch": 0.49736406085253804, "eta": "193:53:21", "grad_norm": 0.0071, "loss": 0.0366, "lr": "4.892e-05", "step": 3302, "steps": "23.35s,3302/33195" }, { "epoch": 0.4975146859466787, "eta": "194:12:53", "grad_norm": 0.0087, "loss": 0.0676, "lr": "4.892e-05", "step": 3303, "steps": "23.39s,3303/33195" }, { "epoch": 0.4976653110408194, "eta": "194:32:26", "grad_norm": 0.0065, "loss": 0.0687, "lr": "4.892e-05", "step": 3304, "steps": "23.43s,3304/33195" }, { "epoch": 0.49781593613496006, "eta": "189:28:09", "grad_norm": 0.007, "loss": 0.0437, "lr": "4.892e-05", "step": 3305, "steps": "22.82s,3305/33195" }, { "epoch": 0.49796656122910077, "eta": "192:02:12", "grad_norm": 0.0063, "loss": 0.057, "lr": "4.892e-05", "step": 3306, "steps": "23.13s,3306/33195" }, { "epoch": 0.4981171863232415, "eta": "195:01:09", "grad_norm": 0.0067, "loss": 0.0701, "lr": "4.892e-05", "step": 3307, "steps": "23.49s,3307/33195" }, { "epoch": 0.49826781141738213, "eta": "192:36:18", "grad_norm": 0.0067, "loss": 0.0428, "lr": "4.892e-05", "step": 3308, "steps": "23.2s,3308/33195" }, { "epoch": 0.49841843651152284, "eta": "192:01:03", "grad_norm": 0.0067, "loss": 0.0471, "lr": "4.892e-05", "step": 3309, "steps": "23.13s,3309/33195" }, { "epoch": 0.4985690616056635, "eta": "191:50:42", "grad_norm": 0.0079, "loss": 0.0414, "lr": "4.892e-05", "step": 3310, "steps": "23.11s,3310/33195" }, { "epoch": 0.4987196866998042, "eta": "194:09:46", "grad_norm": 0.0082, "loss": 0.0381, "lr": "4.892e-05", "step": 3311, "steps": "23.39s,3311/33195" }, { "epoch": 0.49887031179394487, "eta": "190:45:11", "grad_norm": 0.0081, "loss": 0.0559, "lr": "4.892e-05", "step": 3312, "steps": "22.98s,3312/33195" }, { "epoch": 0.4990209368880856, "eta": "194:23:56", "grad_norm": 0.0071, "loss": 0.0376, "lr": "4.892e-05", "step": 3313, "steps": "23.42s,3313/33195" }, { "epoch": 0.49917156198222623, "eta": "194:13:35", "grad_norm": 0.009, "loss": 0.0509, "lr": "4.892e-05", "step": 3314, "steps": "23.4s,3314/33195" }, { "epoch": 0.49932218707636694, "eta": "194:33:07", "grad_norm": 0.0099, "loss": 0.0458, "lr": "4.891e-05", "step": 3315, "steps": "23.44s,3315/33195" }, { "epoch": 0.4994728121705076, "eta": "191:43:24", "grad_norm": 0.0085, "loss": 0.0606, "lr": "4.891e-05", "step": 3316, "steps": "23.1s,3316/33195" }, { "epoch": 0.4996234372646483, "eta": "191:38:03", "grad_norm": 0.0064, "loss": 0.0562, "lr": "4.891e-05", "step": 3317, "steps": "23.09s,3317/33195" }, { "epoch": 0.49977406235878896, "eta": "191:47:37", "grad_norm": 0.0071, "loss": 0.0709, "lr": "4.891e-05", "step": 3318, "steps": "23.11s,3318/33195" }, { "epoch": 0.4999246874529297, "eta": "191:22:20", "grad_norm": 0.0069, "loss": 0.0389, "lr": "4.891e-05", "step": 3319, "steps": "23.06s,3319/33195" }, { "epoch": 0.5000753125470704, "eta": "192:46:36", "grad_norm": 0.0116, "loss": 0.0525, "lr": "4.891e-05", "step": 3320, "steps": "23.23s,3320/33195" }, { "epoch": 0.500225937641211, "eta": "192:31:16", "grad_norm": 0.0074, "loss": 0.0603, "lr": "4.891e-05", "step": 3321, "steps": "23.2s,3321/33195" }, { "epoch": 0.5003765627353517, "eta": "191:41:06", "grad_norm": 0.0105, "loss": 0.0679, "lr": "4.891e-05", "step": 3322, "steps": "23.1s,3322/33195" }, { "epoch": 0.5005271878294923, "eta": "195:19:46", "grad_norm": 0.008, "loss": 0.0467, "lr": "4.891e-05", "step": 3323, "steps": "23.54s,3323/33195" }, { "epoch": 0.5006778129236331, "eta": "193:54:45", "grad_norm": 0.0081, "loss": 0.052, "lr": "4.891e-05", "step": 3324, "steps": "23.37s,3324/33195" }, { "epoch": 0.5008284380177738, "eta": "195:04:03", "grad_norm": 0.0073, "loss": 0.0479, "lr": "4.891e-05", "step": 3325, "steps": "23.51s,3325/33195" }, { "epoch": 0.5009790631119144, "eta": "191:09:41", "grad_norm": 0.007, "loss": 0.052, "lr": "4.891e-05", "step": 3326, "steps": "23.04s,3326/33195" }, { "epoch": 0.5011296882060551, "eta": "191:34:12", "grad_norm": 0.0062, "loss": 0.0331, "lr": "4.891e-05", "step": 3327, "steps": "23.09s,3327/33195" }, { "epoch": 0.5012803133001958, "eta": "191:23:51", "grad_norm": 0.0065, "loss": 0.0661, "lr": "4.891e-05", "step": 3328, "steps": "23.07s,3328/33195" }, { "epoch": 0.5014309383943365, "eta": "193:57:47", "grad_norm": 0.0067, "loss": 0.0327, "lr": "4.890e-05", "step": 3329, "steps": "23.38s,3329/33195" }, { "epoch": 0.5015815634884772, "eta": "192:12:52", "grad_norm": 0.0067, "loss": 0.06, "lr": "4.890e-05", "step": 3330, "steps": "23.17s,3330/33195" }, { "epoch": 0.5017321885826178, "eta": "191:52:34", "grad_norm": 0.0068, "loss": 0.0714, "lr": "4.890e-05", "step": 3331, "steps": "23.13s,3331/33195" }, { "epoch": 0.5018828136767586, "eta": "191:22:19", "grad_norm": 0.0068, "loss": 0.0413, "lr": "4.890e-05", "step": 3332, "steps": "23.07s,3332/33195" }, { "epoch": 0.5020334387708992, "eta": "192:11:42", "grad_norm": 0.0073, "loss": 0.0587, "lr": "4.890e-05", "step": 3333, "steps": "23.17s,3333/33195" }, { "epoch": 0.5021840638650399, "eta": "191:16:34", "grad_norm": 0.0073, "loss": 0.0407, "lr": "4.890e-05", "step": 3334, "steps": "23.06s,3334/33195" }, { "epoch": 0.5023346889591807, "eta": "194:00:25", "grad_norm": 0.0073, "loss": 0.0703, "lr": "4.890e-05", "step": 3335, "steps": "23.39s,3335/33195" }, { "epoch": 0.5024853140533213, "eta": "191:00:52", "grad_norm": 0.0061, "loss": 0.0565, "lr": "4.890e-05", "step": 3336, "steps": "23.03s,3336/33195" }, { "epoch": 0.502635939147462, "eta": "190:50:32", "grad_norm": 0.0056, "loss": 0.0572, "lr": "4.890e-05", "step": 3337, "steps": "23.01s,3337/33195" }, { "epoch": 0.5027865642416026, "eta": "191:49:52", "grad_norm": 0.0078, "loss": 0.0439, "lr": "4.890e-05", "step": 3338, "steps": "23.13s,3338/33195" }, { "epoch": 0.5029371893357434, "eta": "193:48:54", "grad_norm": 0.0071, "loss": 0.062, "lr": "4.890e-05", "step": 3339, "steps": "23.37s,3339/33195" }, { "epoch": 0.503087814429884, "eta": "195:08:08", "grad_norm": 0.006, "loss": 0.0572, "lr": "4.890e-05", "step": 3340, "steps": "23.53s,3340/33195" }, { "epoch": 0.5032384395240247, "eta": "194:32:54", "grad_norm": 0.0075, "loss": 0.0331, "lr": "4.890e-05", "step": 3341, "steps": "23.46s,3341/33195" }, { "epoch": 0.5033890646181653, "eta": "194:37:29", "grad_norm": 0.0076, "loss": 0.0476, "lr": "4.890e-05", "step": 3342, "steps": "23.47s,3342/33195" }, { "epoch": 0.5035396897123061, "eta": "191:33:01", "grad_norm": 0.0087, "loss": 0.0306, "lr": "4.890e-05", "step": 3343, "steps": "23.1s,3343/33195" }, { "epoch": 0.5036903148064468, "eta": "195:01:35", "grad_norm": 0.0072, "loss": 0.0497, "lr": "4.889e-05", "step": 3344, "steps": "23.52s,3344/33195" }, { "epoch": 0.5038409399005874, "eta": "193:36:37", "grad_norm": 0.0066, "loss": 0.0336, "lr": "4.889e-05", "step": 3345, "steps": "23.35s,3345/33195" }, { "epoch": 0.5039915649947281, "eta": "191:56:44", "grad_norm": 0.0072, "loss": 0.0691, "lr": "4.889e-05", "step": 3346, "steps": "23.15s,3346/33195" }, { "epoch": 0.5041421900888688, "eta": "191:41:25", "grad_norm": 0.0072, "loss": 0.0548, "lr": "4.889e-05", "step": 3347, "steps": "23.12s,3347/33195" }, { "epoch": 0.5042928151830095, "eta": "194:10:16", "grad_norm": 0.0091, "loss": 0.0654, "lr": "4.889e-05", "step": 3348, "steps": "23.42s,3348/33195" }, { "epoch": 0.5044434402771502, "eta": "195:19:31", "grad_norm": 0.0078, "loss": 0.0671, "lr": "4.889e-05", "step": 3349, "steps": "23.56s,3349/33195" }, { "epoch": 0.5045940653712908, "eta": "193:59:33", "grad_norm": 0.0062, "loss": 0.0479, "lr": "4.889e-05", "step": 3350, "steps": "23.4s,3350/33195" }, { "epoch": 0.5047446904654316, "eta": "191:15:01", "grad_norm": 0.0063, "loss": 0.0516, "lr": "4.889e-05", "step": 3351, "steps": "23.07s,3351/33195" }, { "epoch": 0.5048953155595722, "eta": "191:49:27", "grad_norm": 0.0079, "loss": 0.0381, "lr": "4.889e-05", "step": 3352, "steps": "23.14s,3352/33195" }, { "epoch": 0.5050459406537129, "eta": "190:24:30", "grad_norm": 0.0057, "loss": 0.0452, "lr": "4.889e-05", "step": 3353, "steps": "22.97s,3353/33195" }, { "epoch": 0.5051965657478535, "eta": "194:02:57", "grad_norm": 0.0095, "loss": 0.0421, "lr": "4.889e-05", "step": 3354, "steps": "23.41s,3354/33195" }, { "epoch": 0.5053471908419943, "eta": "191:58:14", "grad_norm": 0.0072, "loss": 0.0538, "lr": "4.889e-05", "step": 3355, "steps": "23.16s,3355/33195" }, { "epoch": 0.505497815936135, "eta": "193:32:20", "grad_norm": 0.0077, "loss": 0.0556, "lr": "4.889e-05", "step": 3356, "steps": "23.35s,3356/33195" }, { "epoch": 0.5056484410302756, "eta": "192:22:19", "grad_norm": 0.0062, "loss": 0.0703, "lr": "4.889e-05", "step": 3357, "steps": "23.21s,3357/33195" }, { "epoch": 0.5057990661244163, "eta": "191:32:13", "grad_norm": 0.0064, "loss": 0.0446, "lr": "4.888e-05", "step": 3358, "steps": "23.11s,3358/33195" }, { "epoch": 0.505949691218557, "eta": "193:26:12", "grad_norm": 0.0067, "loss": 0.0399, "lr": "4.888e-05", "step": 3359, "steps": "23.34s,3359/33195" }, { "epoch": 0.5061003163126977, "eta": "193:35:45", "grad_norm": 0.0089, "loss": 0.0754, "lr": "4.888e-05", "step": 3360, "steps": "23.36s,3360/33195" }, { "epoch": 0.5062509414068384, "eta": "191:41:00", "grad_norm": 0.0085, "loss": 0.0515, "lr": "4.888e-05", "step": 3361, "steps": "23.13s,3361/33195" }, { "epoch": 0.5064015665009791, "eta": "194:39:37", "grad_norm": 0.0066, "loss": 0.0621, "lr": "4.888e-05", "step": 3362, "steps": "23.49s,3362/33195" }, { "epoch": 0.5065521915951198, "eta": "194:34:15", "grad_norm": 0.0072, "loss": 0.0219, "lr": "4.888e-05", "step": 3363, "steps": "23.48s,3363/33195" }, { "epoch": 0.5067028166892604, "eta": "193:44:08", "grad_norm": 0.0072, "loss": 0.0484, "lr": "4.888e-05", "step": 3364, "steps": "23.38s,3364/33195" }, { "epoch": 0.5068534417834011, "eta": "192:14:16", "grad_norm": 0.0071, "loss": 0.0497, "lr": "4.888e-05", "step": 3365, "steps": "23.2s,3365/33195" }, { "epoch": 0.5070040668775418, "eta": "191:29:08", "grad_norm": 0.0058, "loss": 0.0431, "lr": "4.888e-05", "step": 3366, "steps": "23.11s,3366/33195" }, { "epoch": 0.5071546919716825, "eta": "191:03:53", "grad_norm": 0.0077, "loss": 0.0446, "lr": "4.888e-05", "step": 3367, "steps": "23.06s,3367/33195" }, { "epoch": 0.5073053170658232, "eta": "191:08:28", "grad_norm": 0.0067, "loss": 0.0503, "lr": "4.888e-05", "step": 3368, "steps": "23.07s,3368/33195" }, { "epoch": 0.5074559421599638, "eta": "193:47:10", "grad_norm": 0.0068, "loss": 0.0303, "lr": "4.888e-05", "step": 3369, "steps": "23.39s,3369/33195" }, { "epoch": 0.5076065672541046, "eta": "192:02:23", "grad_norm": 0.0113, "loss": 0.0724, "lr": "4.888e-05", "step": 3370, "steps": "23.18s,3370/33195" }, { "epoch": 0.5077571923482452, "eta": "191:32:10", "grad_norm": 0.0093, "loss": 0.059, "lr": "4.888e-05", "step": 3371, "steps": "23.12s,3371/33195" }, { "epoch": 0.5079078174423859, "eta": "190:37:07", "grad_norm": 0.0084, "loss": 0.0448, "lr": "4.887e-05", "step": 3372, "steps": "23.01s,3372/33195" }, { "epoch": 0.5080584425365265, "eta": "193:25:43", "grad_norm": 0.009, "loss": 0.0379, "lr": "4.887e-05", "step": 3373, "steps": "23.35s,3373/33195" }, { "epoch": 0.5082090676306673, "eta": "191:55:52", "grad_norm": 0.009, "loss": 0.0412, "lr": "4.887e-05", "step": 3374, "steps": "23.17s,3374/33195" }, { "epoch": 0.508359692724808, "eta": "195:44:06", "grad_norm": 0.0068, "loss": 0.0432, "lr": "4.887e-05", "step": 3375, "steps": "23.63s,3375/33195" }, { "epoch": 0.5085103178189486, "eta": "190:30:37", "grad_norm": 0.0057, "loss": 0.0587, "lr": "4.887e-05", "step": 3376, "steps": "23.0s,3376/33195" }, { "epoch": 0.5086609429130893, "eta": "194:23:48", "grad_norm": 0.0087, "loss": 0.0463, "lr": "4.887e-05", "step": 3377, "steps": "23.47s,3377/33195" }, { "epoch": 0.50881156800723, "eta": "191:24:30", "grad_norm": 0.0085, "loss": 0.0439, "lr": "4.887e-05", "step": 3378, "steps": "23.11s,3378/33195" }, { "epoch": 0.5089621931013707, "eta": "191:09:13", "grad_norm": 0.0084, "loss": 0.0382, "lr": "4.887e-05", "step": 3379, "steps": "23.08s,3379/33195" }, { "epoch": 0.5091128181955114, "eta": "191:28:42", "grad_norm": 0.0073, "loss": 0.0551, "lr": "4.887e-05", "step": 3380, "steps": "23.12s,3380/33195" }, { "epoch": 0.509263443289652, "eta": "191:33:17", "grad_norm": 0.0075, "loss": 0.0306, "lr": "4.887e-05", "step": 3381, "steps": "23.13s,3381/33195" }, { "epoch": 0.5094140683837928, "eta": "194:46:41", "grad_norm": 0.0087, "loss": 0.0636, "lr": "4.887e-05", "step": 3382, "steps": "23.52s,3382/33195" }, { "epoch": 0.5095646934779334, "eta": "191:27:33", "grad_norm": 0.0075, "loss": 0.0504, "lr": "4.887e-05", "step": 3383, "steps": "23.12s,3383/33195" }, { "epoch": 0.5097153185720741, "eta": "193:41:19", "grad_norm": 0.0065, "loss": 0.0452, "lr": "4.887e-05", "step": 3384, "steps": "23.39s,3384/33195" }, { "epoch": 0.5098659436662147, "eta": "193:35:57", "grad_norm": 0.0094, "loss": 0.0705, "lr": "4.887e-05", "step": 3385, "steps": "23.38s,3385/33195" }, { "epoch": 0.5100165687603555, "eta": "190:41:41", "grad_norm": 0.0068, "loss": 0.0741, "lr": "4.886e-05", "step": 3386, "steps": "23.03s,3386/33195" }, { "epoch": 0.5101671938544962, "eta": "191:16:04", "grad_norm": 0.0082, "loss": 0.0421, "lr": "4.886e-05", "step": 3387, "steps": "23.1s,3387/33195" }, { "epoch": 0.5103178189486368, "eta": "191:30:35", "grad_norm": 0.0056, "loss": 0.0452, "lr": "4.886e-05", "step": 3388, "steps": "23.13s,3388/33195" }, { "epoch": 0.5104684440427776, "eta": "194:19:06", "grad_norm": 0.0055, "loss": 0.0559, "lr": "4.886e-05", "step": 3389, "steps": "23.47s,3389/33195" }, { "epoch": 0.5106190691369182, "eta": "191:54:39", "grad_norm": 0.0068, "loss": 0.0558, "lr": "4.886e-05", "step": 3390, "steps": "23.18s,3390/33195" }, { "epoch": 0.5107696942310589, "eta": "191:44:20", "grad_norm": 0.0098, "loss": 0.0314, "lr": "4.886e-05", "step": 3391, "steps": "23.16s,3391/33195" }, { "epoch": 0.5109203193251995, "eta": "188:35:12", "grad_norm": 0.009, "loss": 0.0541, "lr": "4.886e-05", "step": 3392, "steps": "22.78s,3392/33195" }, { "epoch": 0.5110709444193403, "eta": "190:39:00", "grad_norm": 0.0067, "loss": 0.043, "lr": "4.886e-05", "step": 3393, "steps": "23.03s,3393/33195" }, { "epoch": 0.511221569513481, "eta": "191:33:15", "grad_norm": 0.0078, "loss": 0.0317, "lr": "4.886e-05", "step": 3394, "steps": "23.14s,3394/33195" }, { "epoch": 0.5113721946076216, "eta": "191:03:04", "grad_norm": 0.0076, "loss": 0.0817, "lr": "4.886e-05", "step": 3395, "steps": "23.08s,3395/33195" }, { "epoch": 0.5115228197017623, "eta": "190:57:42", "grad_norm": 0.0063, "loss": 0.0649, "lr": "4.886e-05", "step": 3396, "steps": "23.07s,3396/33195" }, { "epoch": 0.511673444795903, "eta": "191:07:15", "grad_norm": 0.0072, "loss": 0.0627, "lr": "4.886e-05", "step": 3397, "steps": "23.09s,3397/33195" }, { "epoch": 0.5118240698900437, "eta": "191:01:54", "grad_norm": 0.0078, "loss": 0.0781, "lr": "4.886e-05", "step": 3398, "steps": "23.08s,3398/33195" }, { "epoch": 0.5119746949841844, "eta": "190:46:37", "grad_norm": 0.009, "loss": 0.0612, "lr": "4.886e-05", "step": 3399, "steps": "23.05s,3399/33195" }, { "epoch": 0.512125320078325, "eta": "191:35:54", "grad_norm": 0.0085, "loss": 0.0568, "lr": "4.886e-05", "step": 3400, "steps": "23.15s,3400/33195" }, { "epoch": 0.5122759451724658, "eta": "404:57:00", "grad_norm": 0.0065, "loss": 0.0546, "lr": "4.885e-05", "step": 3401, "steps": "48.93s,3401/33195" }, { "epoch": 0.5124265702666064, "eta": "191:10:18", "grad_norm": 0.0067, "loss": 0.0699, "lr": "4.885e-05", "step": 3402, "steps": "23.1s,3402/33195" }, { "epoch": 0.5125771953607471, "eta": "193:04:07", "grad_norm": 0.0071, "loss": 0.0348, "lr": "4.885e-05", "step": 3403, "steps": "23.33s,3403/33195" }, { "epoch": 0.5127278204548877, "eta": "191:24:25", "grad_norm": 0.0124, "loss": 0.0704, "lr": "4.885e-05", "step": 3404, "steps": "23.13s,3404/33195" }, { "epoch": 0.5128784455490285, "eta": "191:29:00", "grad_norm": 0.0093, "loss": 0.0266, "lr": "4.885e-05", "step": 3405, "steps": "23.14s,3405/33195" }, { "epoch": 0.5130290706431692, "eta": "191:18:41", "grad_norm": 0.007, "loss": 0.0474, "lr": "4.885e-05", "step": 3406, "steps": "23.12s,3406/33195" }, { "epoch": 0.5131796957373098, "eta": "190:58:27", "grad_norm": 0.0071, "loss": 0.0484, "lr": "4.885e-05", "step": 3407, "steps": "23.08s,3407/33195" }, { "epoch": 0.5133303208314505, "eta": "191:52:40", "grad_norm": 0.0079, "loss": 0.0505, "lr": "4.885e-05", "step": 3408, "steps": "23.19s,3408/33195" }, { "epoch": 0.5134809459255912, "eta": "190:37:49", "grad_norm": 0.0065, "loss": 0.061, "lr": "4.885e-05", "step": 3409, "steps": "23.04s,3409/33195" }, { "epoch": 0.5136315710197319, "eta": "191:56:52", "grad_norm": 0.0062, "loss": 0.0713, "lr": "4.885e-05", "step": 3410, "steps": "23.2s,3410/33195" }, { "epoch": 0.5137821961138725, "eta": "191:21:43", "grad_norm": 0.0089, "loss": 0.0361, "lr": "4.885e-05", "step": 3411, "steps": "23.13s,3411/33195" }, { "epoch": 0.5139328212080132, "eta": "192:45:43", "grad_norm": 0.007, "loss": 0.0591, "lr": "4.885e-05", "step": 3412, "steps": "23.3s,3412/33195" }, { "epoch": 0.514083446302154, "eta": "193:44:54", "grad_norm": 0.0066, "loss": 0.046, "lr": "4.885e-05", "step": 3413, "steps": "23.42s,3413/33195" }, { "epoch": 0.5142340713962946, "eta": "191:45:23", "grad_norm": 0.0068, "loss": 0.0654, "lr": "4.885e-05", "step": 3414, "steps": "23.18s,3414/33195" }, { "epoch": 0.5143846964904353, "eta": "201:15:47", "grad_norm": 0.0066, "loss": 0.0827, "lr": "4.884e-05", "step": 3415, "steps": "24.33s,3415/33195" }, { "epoch": 0.514535321584576, "eta": "330:32:48", "grad_norm": 0.0102, "loss": 0.0513, "lr": "4.884e-05", "step": 3416, "steps": "39.96s,3416/33195" }, { "epoch": 0.5146859466787167, "eta": "205:42:58", "grad_norm": 0.0087, "loss": 0.0398, "lr": "4.884e-05", "step": 3417, "steps": "24.87s,3417/33195" }, { "epoch": 0.5148365717728574, "eta": "194:32:35", "grad_norm": 0.0073, "loss": 0.0604, "lr": "4.884e-05", "step": 3418, "steps": "23.52s,3418/33195" }, { "epoch": 0.514987196866998, "eta": "196:01:31", "grad_norm": 0.0089, "loss": 0.0709, "lr": "4.884e-05", "step": 3419, "steps": "23.7s,3419/33195" }, { "epoch": 0.5151378219611388, "eta": "189:04:16", "grad_norm": 0.0081, "loss": 0.0328, "lr": "4.884e-05", "step": 3420, "steps": "22.86s,3420/33195" }, { "epoch": 0.5152884470552794, "eta": "194:21:29", "grad_norm": 0.009, "loss": 0.0379, "lr": "4.884e-05", "step": 3421, "steps": "23.5s,3421/33195" }, { "epoch": 0.5154390721494201, "eta": "191:27:24", "grad_norm": 0.009, "loss": 0.0451, "lr": "4.884e-05", "step": 3422, "steps": "23.15s,3422/33195" }, { "epoch": 0.5155896972435607, "eta": "194:35:35", "grad_norm": 0.0075, "loss": 0.0516, "lr": "4.884e-05", "step": 3423, "steps": "23.53s,3423/33195" }, { "epoch": 0.5157403223377015, "eta": "191:31:36", "grad_norm": 0.0081, "loss": 0.0378, "lr": "4.884e-05", "step": 3424, "steps": "23.16s,3424/33195" }, { "epoch": 0.5158909474318422, "eta": "193:40:13", "grad_norm": 0.0082, "loss": 0.0589, "lr": "4.884e-05", "step": 3425, "steps": "23.42s,3425/33195" }, { "epoch": 0.5160415725259828, "eta": "191:20:54", "grad_norm": 0.0072, "loss": 0.0454, "lr": "4.884e-05", "step": 3426, "steps": "23.14s,3426/33195" }, { "epoch": 0.5161921976201235, "eta": "190:01:08", "grad_norm": 0.0074, "loss": 0.0766, "lr": "4.884e-05", "step": 3427, "steps": "22.98s,3427/33195" }, { "epoch": 0.5163428227142642, "eta": "193:58:53", "grad_norm": 0.0052, "loss": 0.0643, "lr": "4.884e-05", "step": 3428, "steps": "23.46s,3428/33195" }, { "epoch": 0.5164934478084049, "eta": "191:04:52", "grad_norm": 0.006, "loss": 0.0417, "lr": "4.883e-05", "step": 3429, "steps": "23.11s,3429/33195" }, { "epoch": 0.5166440729025455, "eta": "194:22:55", "grad_norm": 0.0075, "loss": 0.0459, "lr": "4.883e-05", "step": 3430, "steps": "23.51s,3430/33195" }, { "epoch": 0.5167946979966862, "eta": "191:48:44", "grad_norm": 0.0064, "loss": 0.0412, "lr": "4.883e-05", "step": 3431, "steps": "23.2s,3431/33195" }, { "epoch": 0.516945323090827, "eta": "190:33:57", "grad_norm": 0.0066, "loss": 0.0727, "lr": "4.883e-05", "step": 3432, "steps": "23.05s,3432/33195" }, { "epoch": 0.5170959481849676, "eta": "194:26:42", "grad_norm": 0.006, "loss": 0.0547, "lr": "4.883e-05", "step": 3433, "steps": "23.52s,3433/33195" }, { "epoch": 0.5172465732791083, "eta": "191:47:35", "grad_norm": 0.0061, "loss": 0.0392, "lr": "4.883e-05", "step": 3434, "steps": "23.2s,3434/33195" }, { "epoch": 0.5173971983732489, "eta": "188:48:38", "grad_norm": 0.0056, "loss": 0.076, "lr": "4.883e-05", "step": 3435, "steps": "22.84s,3435/33195" }, { "epoch": 0.5175478234673897, "eta": "189:42:49", "grad_norm": 0.0073, "loss": 0.074, "lr": "4.883e-05", "step": 3436, "steps": "22.95s,3436/33195" }, { "epoch": 0.5176984485615304, "eta": "194:20:10", "grad_norm": 0.006, "loss": 0.0598, "lr": "4.883e-05", "step": 3437, "steps": "23.51s,3437/33195" }, { "epoch": 0.517849073655671, "eta": "190:46:31", "grad_norm": 0.0062, "loss": 0.0458, "lr": "4.883e-05", "step": 3438, "steps": "23.08s,3438/33195" }, { "epoch": 0.5179996987498117, "eta": "191:30:46", "grad_norm": 0.0072, "loss": 0.0543, "lr": "4.883e-05", "step": 3439, "steps": "23.17s,3439/33195" }, { "epoch": 0.5181503238439524, "eta": "191:05:35", "grad_norm": 0.0079, "loss": 0.0465, "lr": "4.883e-05", "step": 3440, "steps": "23.12s,3440/33195" }, { "epoch": 0.5183009489380931, "eta": "189:45:51", "grad_norm": 0.0062, "loss": 0.0459, "lr": "4.883e-05", "step": 3441, "steps": "22.96s,3441/33195" }, { "epoch": 0.5184515740322337, "eta": "191:49:27", "grad_norm": 0.0065, "loss": 0.0645, "lr": "4.882e-05", "step": 3442, "steps": "23.21s,3442/33195" }, { "epoch": 0.5186021991263745, "eta": "194:17:49", "grad_norm": 0.0065, "loss": 0.0282, "lr": "4.882e-05", "step": 3443, "steps": "23.51s,3443/33195" }, { "epoch": 0.5187528242205152, "eta": "190:44:13", "grad_norm": 0.0069, "loss": 0.0648, "lr": "4.882e-05", "step": 3444, "steps": "23.08s,3444/33195" }, { "epoch": 0.5189034493146558, "eta": "194:02:10", "grad_norm": 0.008, "loss": 0.0702, "lr": "4.882e-05", "step": 3445, "steps": "23.48s,3445/33195" }, { "epoch": 0.5190540744087965, "eta": "194:06:44", "grad_norm": 0.0078, "loss": 0.0499, "lr": "4.882e-05", "step": 3446, "steps": "23.49s,3446/33195" }, { "epoch": 0.5192046995029372, "eta": "191:37:36", "grad_norm": 0.0078, "loss": 0.0562, "lr": "4.882e-05", "step": 3447, "steps": "23.19s,3447/33195" }, { "epoch": 0.5193553245970779, "eta": "191:17:23", "grad_norm": 0.0058, "loss": 0.0809, "lr": "4.882e-05", "step": 3448, "steps": "23.15s,3448/33195" }, { "epoch": 0.5195059496912185, "eta": "194:20:25", "grad_norm": 0.0075, "loss": 0.0606, "lr": "4.882e-05", "step": 3449, "steps": "23.52s,3449/33195" }, { "epoch": 0.5196565747853592, "eta": "191:21:34", "grad_norm": 0.0067, "loss": 0.0462, "lr": "4.882e-05", "step": 3450, "steps": "23.16s,3450/33195" }, { "epoch": 0.5198071998795, "eta": "189:51:57", "grad_norm": 0.0063, "loss": 0.0547, "lr": "4.882e-05", "step": 3451, "steps": "22.98s,3451/33195" }, { "epoch": 0.5199578249736406, "eta": "193:34:38", "grad_norm": 0.0049, "loss": 0.0672, "lr": "4.882e-05", "step": 3452, "steps": "23.43s,3452/33195" }, { "epoch": 0.5201084500677813, "eta": "194:08:57", "grad_norm": 0.0078, "loss": 0.0361, "lr": "4.882e-05", "step": 3453, "steps": "23.5s,3453/33195" }, { "epoch": 0.5202590751619219, "eta": "191:29:56", "grad_norm": 0.0062, "loss": 0.042, "lr": "4.882e-05", "step": 3454, "steps": "23.18s,3454/33195" }, { "epoch": 0.5204097002560627, "eta": "194:32:57", "grad_norm": 0.0062, "loss": 0.0675, "lr": "4.882e-05", "step": 3455, "steps": "23.55s,3455/33195" }, { "epoch": 0.5205603253502034, "eta": "191:39:04", "grad_norm": 0.0081, "loss": 0.0543, "lr": "4.881e-05", "step": 3456, "steps": "23.2s,3456/33195" }, { "epoch": 0.520710950444344, "eta": "191:53:33", "grad_norm": 0.0088, "loss": 0.0754, "lr": "4.881e-05", "step": 3457, "steps": "23.23s,3457/33195" }, { "epoch": 0.5208615755384847, "eta": "190:28:55", "grad_norm": 0.0087, "loss": 0.0542, "lr": "4.881e-05", "step": 3458, "steps": "23.06s,3458/33195" }, { "epoch": 0.5210122006326254, "eta": "192:27:28", "grad_norm": 0.0071, "loss": 0.0533, "lr": "4.881e-05", "step": 3459, "steps": "23.3s,3459/33195" }, { "epoch": 0.5211628257267661, "eta": "192:51:52", "grad_norm": 0.0074, "loss": 0.0521, "lr": "4.881e-05", "step": 3460, "steps": "23.35s,3460/33195" }, { "epoch": 0.5213134508209067, "eta": "194:20:41", "grad_norm": 0.0096, "loss": 0.0678, "lr": "4.881e-05", "step": 3461, "steps": "23.53s,3461/33195" }, { "epoch": 0.5214640759150474, "eta": "192:56:02", "grad_norm": 0.0055, "loss": 0.0617, "lr": "4.881e-05", "step": 3462, "steps": "23.36s,3462/33195" }, { "epoch": 0.5216147010091882, "eta": "194:39:43", "grad_norm": 0.0068, "loss": 0.0366, "lr": "4.881e-05", "step": 3463, "steps": "23.57s,3463/33195" }, { "epoch": 0.5217653261033288, "eta": "192:50:18", "grad_norm": 0.0074, "loss": 0.0721, "lr": "4.881e-05", "step": 3464, "steps": "23.35s,3464/33195" }, { "epoch": 0.5219159511974695, "eta": "190:51:00", "grad_norm": 0.0065, "loss": 0.0641, "lr": "4.881e-05", "step": 3465, "steps": "23.11s,3465/33195" }, { "epoch": 0.5220665762916101, "eta": "191:15:23", "grad_norm": 0.0056, "loss": 0.0516, "lr": "4.881e-05", "step": 3466, "steps": "23.16s,3466/33195" }, { "epoch": 0.5222172013857509, "eta": "193:28:47", "grad_norm": 0.0071, "loss": 0.0416, "lr": "4.881e-05", "step": 3467, "steps": "23.43s,3467/33195" }, { "epoch": 0.5223678264798916, "eta": "192:43:48", "grad_norm": 0.0071, "loss": 0.0464, "lr": "4.881e-05", "step": 3468, "steps": "23.34s,3468/33195" }, { "epoch": 0.5225184515740322, "eta": "190:39:33", "grad_norm": 0.0077, "loss": 0.0459, "lr": "4.881e-05", "step": 3469, "steps": "23.09s,3469/33195" }, { "epoch": 0.522669076668173, "eta": "189:49:37", "grad_norm": 0.0069, "loss": 0.06, "lr": "4.880e-05", "step": 3470, "steps": "22.99s,3470/33195" }, { "epoch": 0.5228197017623136, "eta": "193:37:07", "grad_norm": 0.006, "loss": 0.057, "lr": "4.880e-05", "step": 3471, "steps": "23.45s,3471/33195" }, { "epoch": 0.5229703268564543, "eta": "191:08:07", "grad_norm": 0.0066, "loss": 0.0595, "lr": "4.880e-05", "step": 3472, "steps": "23.15s,3472/33195" }, { "epoch": 0.5231209519505949, "eta": "194:01:07", "grad_norm": 0.0062, "loss": 0.0506, "lr": "4.880e-05", "step": 3473, "steps": "23.5s,3473/33195" }, { "epoch": 0.5232715770447357, "eta": "191:22:12", "grad_norm": 0.0066, "loss": 0.0613, "lr": "4.880e-05", "step": 3474, "steps": "23.18s,3474/33195" }, { "epoch": 0.5234222021388764, "eta": "192:50:59", "grad_norm": 0.014, "loss": 0.0299, "lr": "4.880e-05", "step": 3475, "steps": "23.36s,3475/33195" }, { "epoch": 0.523572827233017, "eta": "191:11:32", "grad_norm": 0.0065, "loss": 0.0587, "lr": "4.880e-05", "step": 3476, "steps": "23.16s,3476/33195" }, { "epoch": 0.5237234523271577, "eta": "193:54:35", "grad_norm": 0.008, "loss": 0.085, "lr": "4.880e-05", "step": 3477, "steps": "23.49s,3477/33195" }, { "epoch": 0.5238740774212984, "eta": "191:30:34", "grad_norm": 0.0058, "loss": 0.0495, "lr": "4.880e-05", "step": 3478, "steps": "23.2s,3478/33195" }, { "epoch": 0.5240247025154391, "eta": "192:44:28", "grad_norm": 0.0068, "loss": 0.086, "lr": "4.880e-05", "step": 3479, "steps": "23.35s,3479/33195" }, { "epoch": 0.5241753276095797, "eta": "191:19:53", "grad_norm": 0.0066, "loss": 0.0275, "lr": "4.880e-05", "step": 3480, "steps": "23.18s,3480/33195" }, { "epoch": 0.5243259527037204, "eta": "191:39:19", "grad_norm": 0.0078, "loss": 0.0331, "lr": "4.880e-05", "step": 3481, "steps": "23.22s,3481/33195" }, { "epoch": 0.5244765777978612, "eta": "193:27:52", "grad_norm": 0.0086, "loss": 0.0551, "lr": "4.880e-05", "step": 3482, "steps": "23.44s,3482/33195" }, { "epoch": 0.5246272028920018, "eta": "190:53:58", "grad_norm": 0.0096, "loss": 0.0504, "lr": "4.880e-05", "step": 3483, "steps": "23.13s,3483/33195" }, { "epoch": 0.5247778279861425, "eta": "193:51:51", "grad_norm": 0.0071, "loss": 0.0449, "lr": "4.879e-05", "step": 3484, "steps": "23.49s,3484/33195" }, { "epoch": 0.5249284530802831, "eta": "191:03:06", "grad_norm": 0.0078, "loss": 0.0256, "lr": "4.879e-05", "step": 3485, "steps": "23.15s,3485/33195" }, { "epoch": 0.5250790781744239, "eta": "191:02:43", "grad_norm": 0.0087, "loss": 0.0518, "lr": "4.879e-05", "step": 3486, "steps": "23.15s,3486/33195" }, { "epoch": 0.5252297032685646, "eta": "194:10:29", "grad_norm": 0.0073, "loss": 0.0447, "lr": "4.879e-05", "step": 3487, "steps": "23.53s,3487/33195" }, { "epoch": 0.5253803283627052, "eta": "190:17:23", "grad_norm": 0.0101, "loss": 0.0498, "lr": "4.879e-05", "step": 3488, "steps": "23.06s,3488/33195" }, { "epoch": 0.5255309534568459, "eta": "188:37:59", "grad_norm": 0.0094, "loss": 0.0438, "lr": "4.879e-05", "step": 3489, "steps": "22.86s,3489/33195" }, { "epoch": 0.5256815785509866, "eta": "191:30:53", "grad_norm": 0.0075, "loss": 0.0549, "lr": "4.879e-05", "step": 3490, "steps": "23.21s,3490/33195" }, { "epoch": 0.5258322036451273, "eta": "190:06:20", "grad_norm": 0.007, "loss": 0.0475, "lr": "4.879e-05", "step": 3491, "steps": "23.04s,3491/33195" }, { "epoch": 0.5259828287392679, "eta": "192:54:16", "grad_norm": 0.0072, "loss": 0.0296, "lr": "4.879e-05", "step": 3492, "steps": "23.38s,3492/33195" }, { "epoch": 0.5261334538334086, "eta": "193:38:25", "grad_norm": 0.0101, "loss": 0.0599, "lr": "4.879e-05", "step": 3493, "steps": "23.47s,3493/33195" }, { "epoch": 0.5262840789275494, "eta": "189:55:17", "grad_norm": 0.0083, "loss": 0.0558, "lr": "4.879e-05", "step": 3494, "steps": "23.02s,3494/33195" }, { "epoch": 0.52643470402169, "eta": "190:09:45", "grad_norm": 0.006, "loss": 0.0405, "lr": "4.879e-05", "step": 3495, "steps": "23.05s,3495/33195" }, { "epoch": 0.5265853291158307, "eta": "191:18:39", "grad_norm": 0.011, "loss": 0.0432, "lr": "4.879e-05", "step": 3496, "steps": "23.19s,3496/33195" }, { "epoch": 0.5267359542099714, "eta": "191:03:25", "grad_norm": 0.0072, "loss": 0.0447, "lr": "4.879e-05", "step": 3497, "steps": "23.16s,3497/33195" }, { "epoch": 0.5268865793041121, "eta": "191:42:38", "grad_norm": 0.0089, "loss": 0.0489, "lr": "4.878e-05", "step": 3498, "steps": "23.24s,3498/33195" }, { "epoch": 0.5270372043982527, "eta": "191:42:15", "grad_norm": 0.0078, "loss": 0.0333, "lr": "4.878e-05", "step": 3499, "steps": "23.24s,3499/33195" }, { "epoch": 0.5271878294923934, "eta": "190:17:43", "grad_norm": 0.0088, "loss": 0.0361, "lr": "4.878e-05", "step": 3500, "steps": "23.07s,3500/33195" }, { "epoch": 0.5273384545865342, "eta": "191:26:37", "grad_norm": 0.0068, "loss": 0.0403, "lr": "4.878e-05", "step": 3501, "steps": "23.21s,3501/33195" }, { "epoch": 0.5274890796806748, "eta": "194:44:11", "grad_norm": 0.0082, "loss": 0.0349, "lr": "4.878e-05", "step": 3502, "steps": "23.61s,3502/33195" }, { "epoch": 0.5276397047748155, "eta": "192:59:52", "grad_norm": 0.0078, "loss": 0.0438, "lr": "4.878e-05", "step": 3503, "steps": "23.4s,3503/33195" }, { "epoch": 0.5277903298689561, "eta": "191:00:43", "grad_norm": 0.0067, "loss": 0.0682, "lr": "4.878e-05", "step": 3504, "steps": "23.16s,3504/33195" }, { "epoch": 0.5279409549630969, "eta": "192:09:37", "grad_norm": 0.0073, "loss": 0.0638, "lr": "4.878e-05", "step": 3505, "steps": "23.3s,3505/33195" }, { "epoch": 0.5280915800572376, "eta": "190:45:06", "grad_norm": 0.0062, "loss": 0.0717, "lr": "4.878e-05", "step": 3506, "steps": "23.13s,3506/33195" }, { "epoch": 0.5282422051513782, "eta": "187:46:35", "grad_norm": 0.0106, "loss": 0.056, "lr": "4.878e-05", "step": 3507, "steps": "22.77s,3507/33195" }, { "epoch": 0.5283928302455189, "eta": "192:03:30", "grad_norm": 0.0084, "loss": 0.0493, "lr": "4.878e-05", "step": 3508, "steps": "23.29s,3508/33195" }, { "epoch": 0.5285434553396596, "eta": "190:09:19", "grad_norm": 0.0076, "loss": 0.039, "lr": "4.878e-05", "step": 3509, "steps": "23.06s,3509/33195" }, { "epoch": 0.5286940804338003, "eta": "191:13:15", "grad_norm": 0.0096, "loss": 0.0548, "lr": "4.878e-05", "step": 3510, "steps": "23.19s,3510/33195" }, { "epoch": 0.5288447055279409, "eta": "190:48:07", "grad_norm": 0.008, "loss": 0.0556, "lr": "4.877e-05", "step": 3511, "steps": "23.14s,3511/33195" }, { "epoch": 0.5289953306220816, "eta": "193:35:56", "grad_norm": 0.0092, "loss": 0.0524, "lr": "4.877e-05", "step": 3512, "steps": "23.48s,3512/33195" }, { "epoch": 0.5291459557162224, "eta": "190:17:40", "grad_norm": 0.0087, "loss": 0.0664, "lr": "4.877e-05", "step": 3513, "steps": "23.08s,3513/33195" }, { "epoch": 0.529296580810363, "eta": "191:06:45", "grad_norm": 0.0076, "loss": 0.0622, "lr": "4.877e-05", "step": 3514, "steps": "23.18s,3514/33195" }, { "epoch": 0.5294472059045037, "eta": "193:54:33", "grad_norm": 0.0073, "loss": 0.0408, "lr": "4.877e-05", "step": 3515, "steps": "23.52s,3515/33195" }, { "epoch": 0.5295978309986443, "eta": "190:21:28", "grad_norm": 0.0076, "loss": 0.0615, "lr": "4.877e-05", "step": 3516, "steps": "23.09s,3516/33195" }, { "epoch": 0.5297484560927851, "eta": "193:43:53", "grad_norm": 0.0098, "loss": 0.0606, "lr": "4.877e-05", "step": 3517, "steps": "23.5s,3517/33195" }, { "epoch": 0.5298990811869257, "eta": "190:30:35", "grad_norm": 0.0067, "loss": 0.0517, "lr": "4.877e-05", "step": 3518, "steps": "23.11s,3518/33195" }, { "epoch": 0.5300497062810664, "eta": "193:43:06", "grad_norm": 0.0088, "loss": 0.0353, "lr": "4.877e-05", "step": 3519, "steps": "23.5s,3519/33195" }, { "epoch": 0.5302003313752071, "eta": "194:07:26", "grad_norm": 0.0077, "loss": 0.0506, "lr": "4.877e-05", "step": 3520, "steps": "23.55s,3520/33195" }, { "epoch": 0.5303509564693478, "eta": "191:43:37", "grad_norm": 0.0089, "loss": 0.0446, "lr": "4.877e-05", "step": 3521, "steps": "23.26s,3521/33195" }, { "epoch": 0.5305015815634885, "eta": "190:29:03", "grad_norm": 0.0062, "loss": 0.0577, "lr": "4.877e-05", "step": 3522, "steps": "23.11s,3522/33195" }, { "epoch": 0.5306522066576291, "eta": "190:38:33", "grad_norm": 0.0077, "loss": 0.0734, "lr": "4.877e-05", "step": 3523, "steps": "23.13s,3523/33195" }, { "epoch": 0.5308028317517699, "eta": "190:57:57", "grad_norm": 0.0067, "loss": 0.078, "lr": "4.877e-05", "step": 3524, "steps": "23.17s,3524/33195" }, { "epoch": 0.5309534568459106, "eta": "192:51:18", "grad_norm": 0.0083, "loss": 0.046, "lr": "4.876e-05", "step": 3525, "steps": "23.4s,3525/33195" }, { "epoch": 0.5311040819400512, "eta": "190:42:20", "grad_norm": 0.0073, "loss": 0.0476, "lr": "4.876e-05", "step": 3526, "steps": "23.14s,3526/33195" }, { "epoch": 0.5312547070341919, "eta": "190:51:50", "grad_norm": 0.0068, "loss": 0.0537, "lr": "4.876e-05", "step": 3527, "steps": "23.16s,3527/33195" }, { "epoch": 0.5314053321283326, "eta": "187:28:44", "grad_norm": 0.009, "loss": 0.046, "lr": "4.876e-05", "step": 3528, "steps": "22.75s,3528/33195" }, { "epoch": 0.5315559572224733, "eta": "193:24:21", "grad_norm": 0.007, "loss": 0.0411, "lr": "4.876e-05", "step": 3529, "steps": "23.47s,3529/33195" }, { "epoch": 0.5317065823166139, "eta": "190:35:51", "grad_norm": 0.0109, "loss": 0.0472, "lr": "4.876e-05", "step": 3530, "steps": "23.13s,3530/33195" }, { "epoch": 0.5318572074107546, "eta": "194:37:43", "grad_norm": 0.007, "loss": 0.041, "lr": "4.876e-05", "step": 3531, "steps": "23.62s,3531/33195" }, { "epoch": 0.5320078325048954, "eta": "192:28:47", "grad_norm": 0.0079, "loss": 0.0395, "lr": "4.876e-05", "step": 3532, "steps": "23.36s,3532/33195" }, { "epoch": 0.532158457599036, "eta": "192:58:04", "grad_norm": 0.008, "loss": 0.0554, "lr": "4.876e-05", "step": 3533, "steps": "23.42s,3533/33195" }, { "epoch": 0.5323090826931767, "eta": "190:19:29", "grad_norm": 0.0059, "loss": 0.0341, "lr": "4.876e-05", "step": 3534, "steps": "23.1s,3534/33195" }, { "epoch": 0.5324597077873173, "eta": "193:31:53", "grad_norm": 0.0082, "loss": 0.0607, "lr": "4.876e-05", "step": 3535, "steps": "23.49s,3535/33195" }, { "epoch": 0.5326103328814581, "eta": "189:39:10", "grad_norm": 0.0064, "loss": 0.0527, "lr": "4.876e-05", "step": 3536, "steps": "23.02s,3536/33195" }, { "epoch": 0.5327609579755987, "eta": "193:31:06", "grad_norm": 0.0084, "loss": 0.0462, "lr": "4.876e-05", "step": 3537, "steps": "23.49s,3537/33195" }, { "epoch": 0.5329115830697394, "eta": "193:10:56", "grad_norm": 0.0059, "loss": 0.0592, "lr": "4.875e-05", "step": 3538, "steps": "23.45s,3538/33195" }, { "epoch": 0.5330622081638801, "eta": "191:56:24", "grad_norm": 0.0075, "loss": 0.0518, "lr": "4.875e-05", "step": 3539, "steps": "23.3s,3539/33195" }, { "epoch": 0.5332128332580208, "eta": "189:12:55", "grad_norm": 0.0063, "loss": 0.0505, "lr": "4.875e-05", "step": 3540, "steps": "22.97s,3540/33195" }, { "epoch": 0.5333634583521615, "eta": "190:11:50", "grad_norm": 0.0068, "loss": 0.0512, "lr": "4.875e-05", "step": 3541, "steps": "23.09s,3541/33195" }, { "epoch": 0.5335140834463021, "eta": "190:31:13", "grad_norm": 0.0071, "loss": 0.0321, "lr": "4.875e-05", "step": 3542, "steps": "23.13s,3542/33195" }, { "epoch": 0.5336647085404428, "eta": "190:06:08", "grad_norm": 0.0063, "loss": 0.0548, "lr": "4.875e-05", "step": 3543, "steps": "23.08s,3543/33195" }, { "epoch": 0.5338153336345836, "eta": "190:15:38", "grad_norm": 0.0067, "loss": 0.0605, "lr": "4.875e-05", "step": 3544, "steps": "23.1s,3544/33195" }, { "epoch": 0.5339659587287242, "eta": "193:57:37", "grad_norm": 0.0067, "loss": 0.0678, "lr": "4.875e-05", "step": 3545, "steps": "23.55s,3545/33195" }, { "epoch": 0.5341165838228649, "eta": "193:12:45", "grad_norm": 0.0058, "loss": 0.0413, "lr": "4.875e-05", "step": 3546, "steps": "23.46s,3546/33195" }, { "epoch": 0.5342672089170055, "eta": "193:27:11", "grad_norm": 0.0095, "loss": 0.0521, "lr": "4.875e-05", "step": 3547, "steps": "23.49s,3547/33195" }, { "epoch": 0.5344178340111463, "eta": "192:42:19", "grad_norm": 0.0064, "loss": 0.0505, "lr": "4.875e-05", "step": 3548, "steps": "23.4s,3548/33195" }, { "epoch": 0.5345684591052869, "eta": "190:48:17", "grad_norm": 0.0081, "loss": 0.0609, "lr": "4.875e-05", "step": 3549, "steps": "23.17s,3549/33195" }, { "epoch": 0.5347190841994276, "eta": "192:36:36", "grad_norm": 0.0063, "loss": 0.0745, "lr": "4.875e-05", "step": 3550, "steps": "23.39s,3550/33195" }, { "epoch": 0.5348697092935684, "eta": "191:56:41", "grad_norm": 0.0099, "loss": 0.0308, "lr": "4.875e-05", "step": 3551, "steps": "23.31s,3551/33195" }, { "epoch": 0.535020334387709, "eta": "190:32:19", "grad_norm": 0.0059, "loss": 0.0481, "lr": "4.874e-05", "step": 3552, "steps": "23.14s,3552/33195" }, { "epoch": 0.5351709594818497, "eta": "190:51:41", "grad_norm": 0.0078, "loss": 0.0766, "lr": "4.874e-05", "step": 3553, "steps": "23.18s,3553/33195" }, { "epoch": 0.5353215845759903, "eta": "190:51:18", "grad_norm": 0.0053, "loss": 0.0559, "lr": "4.874e-05", "step": 3554, "steps": "23.18s,3554/33195" }, { "epoch": 0.5354722096701311, "eta": "193:43:49", "grad_norm": 0.0095, "loss": 0.0665, "lr": "4.874e-05", "step": 3555, "steps": "23.53s,3555/33195" }, { "epoch": 0.5356228347642717, "eta": "190:45:35", "grad_norm": 0.0085, "loss": 0.0615, "lr": "4.874e-05", "step": 3556, "steps": "23.17s,3556/33195" }, { "epoch": 0.5357734598584124, "eta": "187:47:22", "grad_norm": 0.0065, "loss": 0.0449, "lr": "4.874e-05", "step": 3557, "steps": "22.81s,3557/33195" }, { "epoch": 0.5359240849525531, "eta": "192:58:11", "grad_norm": 0.0069, "loss": 0.0332, "lr": "4.874e-05", "step": 3558, "steps": "23.44s,3558/33195" }, { "epoch": 0.5360747100466938, "eta": "190:59:15", "grad_norm": 0.0076, "loss": 0.0534, "lr": "4.874e-05", "step": 3559, "steps": "23.2s,3559/33195" }, { "epoch": 0.5362253351408345, "eta": "193:22:06", "grad_norm": 0.0067, "loss": 0.0681, "lr": "4.874e-05", "step": 3560, "steps": "23.49s,3560/33195" }, { "epoch": 0.5363759602349751, "eta": "194:20:58", "grad_norm": 0.0087, "loss": 0.0483, "lr": "4.874e-05", "step": 3561, "steps": "23.61s,3561/33195" }, { "epoch": 0.5365265853291158, "eta": "190:33:23", "grad_norm": 0.0068, "loss": 0.0833, "lr": "4.874e-05", "step": 3562, "steps": "23.15s,3562/33195" }, { "epoch": 0.5366772104232566, "eta": "190:47:49", "grad_norm": 0.0073, "loss": 0.0643, "lr": "4.874e-05", "step": 3563, "steps": "23.18s,3563/33195" }, { "epoch": 0.5368278355173972, "eta": "190:27:41", "grad_norm": 0.0072, "loss": 0.0485, "lr": "4.874e-05", "step": 3564, "steps": "23.14s,3564/33195" }, { "epoch": 0.5369784606115379, "eta": "190:27:18", "grad_norm": 0.0067, "loss": 0.0449, "lr": "4.873e-05", "step": 3565, "steps": "23.14s,3565/33195" }, { "epoch": 0.5371290857056785, "eta": "192:35:18", "grad_norm": 0.012, "loss": 0.0491, "lr": "4.873e-05", "step": 3566, "steps": "23.4s,3566/33195" }, { "epoch": 0.5372797107998193, "eta": "194:43:18", "grad_norm": 0.0075, "loss": 0.0725, "lr": "4.873e-05", "step": 3567, "steps": "23.66s,3567/33195" }, { "epoch": 0.5374303358939599, "eta": "192:39:28", "grad_norm": 0.0096, "loss": 0.0505, "lr": "4.873e-05", "step": 3568, "steps": "23.41s,3568/33195" }, { "epoch": 0.5375809609881006, "eta": "190:20:49", "grad_norm": 0.0064, "loss": 0.066, "lr": "4.873e-05", "step": 3569, "steps": "23.13s,3569/33195" }, { "epoch": 0.5377315860822413, "eta": "190:10:33", "grad_norm": 0.0071, "loss": 0.053, "lr": "4.873e-05", "step": 3570, "steps": "23.11s,3570/33195" }, { "epoch": 0.537882211176382, "eta": "190:39:48", "grad_norm": 0.0061, "loss": 0.0577, "lr": "4.873e-05", "step": 3571, "steps": "23.17s,3571/33195" }, { "epoch": 0.5380328362705227, "eta": "189:45:06", "grad_norm": 0.0066, "loss": 0.0424, "lr": "4.873e-05", "step": 3572, "steps": "23.06s,3572/33195" }, { "epoch": 0.5381834613646633, "eta": "189:20:02", "grad_norm": 0.0077, "loss": 0.0798, "lr": "4.873e-05", "step": 3573, "steps": "23.01s,3573/33195" }, { "epoch": 0.538334086458804, "eta": "190:28:46", "grad_norm": 0.007, "loss": 0.0647, "lr": "4.873e-05", "step": 3574, "steps": "23.15s,3574/33195" }, { "epoch": 0.5384847115529448, "eta": "189:58:45", "grad_norm": 0.0059, "loss": 0.0382, "lr": "4.873e-05", "step": 3575, "steps": "23.09s,3575/33195" }, { "epoch": 0.5386353366470854, "eta": "187:25:20", "grad_norm": 0.0077, "loss": 0.032, "lr": "4.873e-05", "step": 3576, "steps": "22.78s,3576/33195" }, { "epoch": 0.5387859617412261, "eta": "189:03:41", "grad_norm": 0.0061, "loss": 0.0616, "lr": "4.873e-05", "step": 3577, "steps": "22.98s,3577/33195" }, { "epoch": 0.5389365868353668, "eta": "192:55:18", "grad_norm": 0.0066, "loss": 0.0857, "lr": "4.873e-05", "step": 3578, "steps": "23.45s,3578/33195" }, { "epoch": 0.5390872119295075, "eta": "192:54:55", "grad_norm": 0.0087, "loss": 0.053, "lr": "4.872e-05", "step": 3579, "steps": "23.45s,3579/33195" }, { "epoch": 0.5392378370236481, "eta": "188:42:48", "grad_norm": 0.0093, "loss": 0.0517, "lr": "4.872e-05", "step": 3580, "steps": "22.94s,3580/33195" }, { "epoch": 0.5393884621177888, "eta": "187:43:11", "grad_norm": 0.0082, "loss": 0.0439, "lr": "4.872e-05", "step": 3581, "steps": "22.82s,3581/33195" }, { "epoch": 0.5395390872119296, "eta": "190:05:56", "grad_norm": 0.0064, "loss": 0.0349, "lr": "4.872e-05", "step": 3582, "steps": "23.11s,3582/33195" }, { "epoch": 0.5396897123060702, "eta": "190:15:25", "grad_norm": 0.006, "loss": 0.0569, "lr": "4.872e-05", "step": 3583, "steps": "23.13s,3583/33195" }, { "epoch": 0.5398403374002109, "eta": "190:15:02", "grad_norm": 0.0089, "loss": 0.0515, "lr": "4.872e-05", "step": 3584, "steps": "23.13s,3584/33195" }, { "epoch": 0.5399909624943515, "eta": "192:47:38", "grad_norm": 0.0086, "loss": 0.0266, "lr": "4.872e-05", "step": 3585, "steps": "23.44s,3585/33195" }, { "epoch": 0.5401415875884923, "eta": "189:00:14", "grad_norm": 0.007, "loss": 0.0623, "lr": "4.872e-05", "step": 3586, "steps": "22.98s,3586/33195" }, { "epoch": 0.5402922126826329, "eta": "190:48:25", "grad_norm": 0.0059, "loss": 0.0411, "lr": "4.872e-05", "step": 3587, "steps": "23.2s,3587/33195" }, { "epoch": 0.5404428377767736, "eta": "189:04:24", "grad_norm": 0.006, "loss": 0.0658, "lr": "4.872e-05", "step": 3588, "steps": "22.99s,3588/33195" }, { "epoch": 0.5405934628709143, "eta": "192:46:04", "grad_norm": 0.0064, "loss": 0.0334, "lr": "4.872e-05", "step": 3589, "steps": "23.44s,3589/33195" }, { "epoch": 0.540744087965055, "eta": "192:25:57", "grad_norm": 0.0088, "loss": 0.042, "lr": "4.872e-05", "step": 3590, "steps": "23.4s,3590/33195" }, { "epoch": 0.5408947130591957, "eta": "187:24:35", "grad_norm": 0.0058, "loss": 0.0492, "lr": "4.872e-05", "step": 3591, "steps": "22.79s,3591/33195" }, { "epoch": 0.5410453381533363, "eta": "188:28:20", "grad_norm": 0.0077, "loss": 0.0676, "lr": "4.871e-05", "step": 3592, "steps": "22.92s,3592/33195" }, { "epoch": 0.541195963247477, "eta": "189:56:46", "grad_norm": 0.0067, "loss": 0.0687, "lr": "4.871e-05", "step": 3593, "steps": "23.1s,3593/33195" }, { "epoch": 0.5413465883416178, "eta": "188:47:18", "grad_norm": 0.007, "loss": 0.0606, "lr": "4.871e-05", "step": 3594, "steps": "22.96s,3594/33195" }, { "epoch": 0.5414972134357584, "eta": "191:29:44", "grad_norm": 0.0078, "loss": 0.0414, "lr": "4.871e-05", "step": 3595, "steps": "23.29s,3595/33195" }, { "epoch": 0.5416478385298991, "eta": "189:45:44", "grad_norm": 0.0096, "loss": 0.0557, "lr": "4.871e-05", "step": 3596, "steps": "23.08s,3596/33195" }, { "epoch": 0.5417984636240397, "eta": "190:00:09", "grad_norm": 0.0078, "loss": 0.0571, "lr": "4.871e-05", "step": 3597, "steps": "23.11s,3597/33195" }, { "epoch": 0.5419490887181805, "eta": "190:09:38", "grad_norm": 0.0082, "loss": 0.0478, "lr": "4.871e-05", "step": 3598, "steps": "23.13s,3598/33195" }, { "epoch": 0.5420997138123211, "eta": "190:24:03", "grad_norm": 0.0086, "loss": 0.0651, "lr": "4.871e-05", "step": 3599, "steps": "23.16s,3599/33195" }, { "epoch": 0.5422503389064618, "eta": "190:13:48", "grad_norm": 0.0064, "loss": 0.0493, "lr": "4.871e-05", "step": 3600, "steps": "23.14s,3600/33195" }, { "epoch": 0.5424009640006024, "eta": "421:28:04", "grad_norm": 0.0075, "loss": 0.0557, "lr": "4.871e-05", "step": 3601, "steps": "51.27s,3601/33195" }, { "epoch": 0.5425515890947432, "eta": "190:13:02", "grad_norm": 0.0079, "loss": 0.0607, "lr": "4.871e-05", "step": 3602, "steps": "23.14s,3602/33195" }, { "epoch": 0.5427022141888839, "eta": "190:47:10", "grad_norm": 0.0072, "loss": 0.0407, "lr": "4.871e-05", "step": 3603, "steps": "23.21s,3603/33195" }, { "epoch": 0.5428528392830245, "eta": "192:30:21", "grad_norm": 0.0069, "loss": 0.0595, "lr": "4.871e-05", "step": 3604, "steps": "23.42s,3604/33195" }, { "epoch": 0.5430034643771653, "eta": "190:06:56", "grad_norm": 0.0069, "loss": 0.0789, "lr": "4.870e-05", "step": 3605, "steps": "23.13s,3605/33195" }, { "epoch": 0.543154089471306, "eta": "190:55:52", "grad_norm": 0.0063, "loss": 0.0556, "lr": "4.870e-05", "step": 3606, "steps": "23.23s,3606/33195" }, { "epoch": 0.5433047145654466, "eta": "192:39:02", "grad_norm": 0.0068, "loss": 0.0584, "lr": "4.870e-05", "step": 3607, "steps": "23.44s,3607/33195" }, { "epoch": 0.5434553396595873, "eta": "191:14:49", "grad_norm": 0.0077, "loss": 0.0462, "lr": "4.870e-05", "step": 3608, "steps": "23.27s,3608/33195" }, { "epoch": 0.543605964753728, "eta": "191:14:26", "grad_norm": 0.0066, "loss": 0.0666, "lr": "4.870e-05", "step": 3609, "steps": "23.27s,3609/33195" }, { "epoch": 0.5437565898478687, "eta": "192:08:17", "grad_norm": 0.0058, "loss": 0.05, "lr": "4.870e-05", "step": 3610, "steps": "23.38s,3610/33195" }, { "epoch": 0.5439072149420093, "eta": "190:53:56", "grad_norm": 0.006, "loss": 0.0583, "lr": "4.870e-05", "step": 3611, "steps": "23.23s,3611/33195" }, { "epoch": 0.54405784003615, "eta": "189:29:43", "grad_norm": 0.0066, "loss": 0.0591, "lr": "4.870e-05", "step": 3612, "steps": "23.06s,3612/33195" }, { "epoch": 0.5442084651302908, "eta": "189:58:55", "grad_norm": 0.0071, "loss": 0.0419, "lr": "4.870e-05", "step": 3613, "steps": "23.12s,3613/33195" }, { "epoch": 0.5443590902244314, "eta": "190:37:59", "grad_norm": 0.007, "loss": 0.0668, "lr": "4.870e-05", "step": 3614, "steps": "23.2s,3614/33195" }, { "epoch": 0.5445097153185721, "eta": "190:08:01", "grad_norm": 0.0089, "loss": 0.0639, "lr": "4.870e-05", "step": 3615, "steps": "23.14s,3615/33195" }, { "epoch": 0.5446603404127127, "eta": "190:37:12", "grad_norm": 0.0066, "loss": 0.0583, "lr": "4.870e-05", "step": 3616, "steps": "23.2s,3616/33195" }, { "epoch": 0.5448109655068535, "eta": "192:54:51", "grad_norm": 0.0073, "loss": 0.0411, "lr": "4.870e-05", "step": 3617, "steps": "23.48s,3617/33195" }, { "epoch": 0.5449615906009941, "eta": "189:42:12", "grad_norm": 0.0064, "loss": 0.053, "lr": "4.870e-05", "step": 3618, "steps": "23.09s,3618/33195" }, { "epoch": 0.5451122156951348, "eta": "189:36:54", "grad_norm": 0.0078, "loss": 0.0504, "lr": "4.869e-05", "step": 3619, "steps": "23.08s,3619/33195" }, { "epoch": 0.5452628407892754, "eta": "190:11:01", "grad_norm": 0.0063, "loss": 0.0519, "lr": "4.869e-05", "step": 3620, "steps": "23.15s,3620/33195" }, { "epoch": 0.5454134658834162, "eta": "190:00:46", "grad_norm": 0.0071, "loss": 0.0481, "lr": "4.869e-05", "step": 3621, "steps": "23.13s,3621/33195" }, { "epoch": 0.5455640909775569, "eta": "189:35:44", "grad_norm": 0.008, "loss": 0.0451, "lr": "4.869e-05", "step": 3622, "steps": "23.08s,3622/33195" }, { "epoch": 0.5457147160716975, "eta": "192:47:34", "grad_norm": 0.0062, "loss": 0.0488, "lr": "4.869e-05", "step": 3623, "steps": "23.47s,3623/33195" }, { "epoch": 0.5458653411658382, "eta": "188:21:03", "grad_norm": 0.0085, "loss": 0.0756, "lr": "4.869e-05", "step": 3624, "steps": "22.93s,3624/33195" }, { "epoch": 0.546015966259979, "eta": "186:51:57", "grad_norm": 0.0063, "loss": 0.0384, "lr": "4.869e-05", "step": 3625, "steps": "22.75s,3625/33195" }, { "epoch": 0.5461665913541196, "eta": "192:11:54", "grad_norm": 0.0076, "loss": 0.0723, "lr": "4.869e-05", "step": 3626, "steps": "23.4s,3626/33195" }, { "epoch": 0.5463172164482603, "eta": "189:23:58", "grad_norm": 0.0059, "loss": 0.0476, "lr": "4.869e-05", "step": 3627, "steps": "23.06s,3627/33195" }, { "epoch": 0.5464678415424009, "eta": "188:39:13", "grad_norm": 0.0084, "loss": 0.0615, "lr": "4.869e-05", "step": 3628, "steps": "22.97s,3628/33195" }, { "epoch": 0.5466184666365417, "eta": "192:15:40", "grad_norm": 0.0082, "loss": 0.0608, "lr": "4.869e-05", "step": 3629, "steps": "23.41s,3629/33195" }, { "epoch": 0.5467690917306823, "eta": "191:45:42", "grad_norm": 0.0064, "loss": 0.0485, "lr": "4.869e-05", "step": 3630, "steps": "23.35s,3630/33195" }, { "epoch": 0.546919716824823, "eta": "189:07:38", "grad_norm": 0.0061, "loss": 0.0302, "lr": "4.869e-05", "step": 3631, "steps": "23.03s,3631/33195" }, { "epoch": 0.5470703419189638, "eta": "192:29:16", "grad_norm": 0.0074, "loss": 0.0618, "lr": "4.868e-05", "step": 3632, "steps": "23.44s,3632/33195" }, { "epoch": 0.5472209670131044, "eta": "189:31:30", "grad_norm": 0.0088, "loss": 0.046, "lr": "4.868e-05", "step": 3633, "steps": "23.08s,3633/33195" }, { "epoch": 0.5473715921072451, "eta": "189:40:59", "grad_norm": 0.0078, "loss": 0.0657, "lr": "4.868e-05", "step": 3634, "steps": "23.1s,3634/33195" }, { "epoch": 0.5475222172013857, "eta": "193:07:31", "grad_norm": 0.0059, "loss": 0.0629, "lr": "4.868e-05", "step": 3635, "steps": "23.52s,3635/33195" }, { "epoch": 0.5476728422955265, "eta": "190:24:33", "grad_norm": 0.0059, "loss": 0.0526, "lr": "4.868e-05", "step": 3636, "steps": "23.19s,3636/33195" }, { "epoch": 0.5478234673896671, "eta": "188:16:04", "grad_norm": 0.0058, "loss": 0.0533, "lr": "4.868e-05", "step": 3637, "steps": "22.93s,3637/33195" }, { "epoch": 0.5479740924838078, "eta": "188:10:46", "grad_norm": 0.007, "loss": 0.0733, "lr": "4.868e-05", "step": 3638, "steps": "22.92s,3638/33195" }, { "epoch": 0.5481247175779485, "eta": "191:27:25", "grad_norm": 0.0064, "loss": 0.0453, "lr": "4.868e-05", "step": 3639, "steps": "23.32s,3639/33195" }, { "epoch": 0.5482753426720892, "eta": "188:24:47", "grad_norm": 0.0066, "loss": 0.0331, "lr": "4.868e-05", "step": 3640, "steps": "22.95s,3640/33195" }, { "epoch": 0.5484259677662299, "eta": "189:08:44", "grad_norm": 0.0076, "loss": 0.0474, "lr": "4.868e-05", "step": 3641, "steps": "23.04s,3641/33195" }, { "epoch": 0.5485765928603705, "eta": "190:02:31", "grad_norm": 0.0068, "loss": 0.0682, "lr": "4.868e-05", "step": 3642, "steps": "23.15s,3642/33195" }, { "epoch": 0.5487272179545112, "eta": "190:11:59", "grad_norm": 0.01, "loss": 0.0381, "lr": "4.868e-05", "step": 3643, "steps": "23.17s,3643/33195" }, { "epoch": 0.548877843048652, "eta": "189:46:59", "grad_norm": 0.0063, "loss": 0.044, "lr": "4.868e-05", "step": 3644, "steps": "23.12s,3644/33195" }, { "epoch": 0.5490284681427926, "eta": "191:59:34", "grad_norm": 0.0057, "loss": 0.0427, "lr": "4.867e-05", "step": 3645, "steps": "23.39s,3645/33195" }, { "epoch": 0.5491790932369333, "eta": "190:15:45", "grad_norm": 0.006, "loss": 0.043, "lr": "4.867e-05", "step": 3646, "steps": "23.18s,3646/33195" }, { "epoch": 0.5493297183310739, "eta": "192:18:29", "grad_norm": 0.0064, "loss": 0.055, "lr": "4.867e-05", "step": 3647, "steps": "23.43s,3647/33195" }, { "epoch": 0.5494803434252147, "eta": "189:06:02", "grad_norm": 0.0077, "loss": 0.052, "lr": "4.867e-05", "step": 3648, "steps": "23.04s,3648/33195" }, { "epoch": 0.5496309685193553, "eta": "189:40:08", "grad_norm": 0.0077, "loss": 0.0622, "lr": "4.867e-05", "step": 3649, "steps": "23.11s,3649/33195" }, { "epoch": 0.549781593613496, "eta": "190:09:17", "grad_norm": 0.006, "loss": 0.0414, "lr": "4.867e-05", "step": 3650, "steps": "23.17s,3650/33195" }, { "epoch": 0.5499322187076366, "eta": "190:08:54", "grad_norm": 0.0128, "loss": 0.0462, "lr": "4.867e-05", "step": 3651, "steps": "23.17s,3651/33195" }, { "epoch": 0.5500828438017774, "eta": "189:43:54", "grad_norm": 0.0085, "loss": 0.0554, "lr": "4.867e-05", "step": 3652, "steps": "23.12s,3652/33195" }, { "epoch": 0.5502334688959181, "eta": "189:58:17", "grad_norm": 0.0073, "loss": 0.0607, "lr": "4.867e-05", "step": 3653, "steps": "23.15s,3653/33195" }, { "epoch": 0.5503840939900587, "eta": "189:48:03", "grad_norm": 0.0062, "loss": 0.0376, "lr": "4.867e-05", "step": 3654, "steps": "23.13s,3654/33195" }, { "epoch": 0.5505347190841994, "eta": "191:45:49", "grad_norm": 0.0064, "loss": 0.0412, "lr": "4.867e-05", "step": 3655, "steps": "23.37s,3655/33195" }, { "epoch": 0.5506853441783401, "eta": "190:06:58", "grad_norm": 0.0074, "loss": 0.0724, "lr": "4.867e-05", "step": 3656, "steps": "23.17s,3656/33195" }, { "epoch": 0.5508359692724808, "eta": "190:06:35", "grad_norm": 0.0066, "loss": 0.0385, "lr": "4.867e-05", "step": 3657, "steps": "23.17s,3657/33195" }, { "epoch": 0.5509865943666215, "eta": "192:53:34", "grad_norm": 0.0071, "loss": 0.0394, "lr": "4.866e-05", "step": 3658, "steps": "23.51s,3658/33195" }, { "epoch": 0.5511372194607622, "eta": "191:04:53", "grad_norm": 0.0087, "loss": 0.0735, "lr": "4.866e-05", "step": 3659, "steps": "23.29s,3659/33195" }, { "epoch": 0.5512878445549029, "eta": "191:43:52", "grad_norm": 0.0071, "loss": 0.0672, "lr": "4.866e-05", "step": 3660, "steps": "23.37s,3660/33195" }, { "epoch": 0.5514384696490435, "eta": "189:55:12", "grad_norm": 0.0129, "loss": 0.0672, "lr": "4.866e-05", "step": 3661, "steps": "23.15s,3661/33195" }, { "epoch": 0.5515890947431842, "eta": "192:27:24", "grad_norm": 0.0065, "loss": 0.0552, "lr": "4.866e-05", "step": 3662, "steps": "23.46s,3662/33195" }, { "epoch": 0.551739719837325, "eta": "189:44:35", "grad_norm": 0.0057, "loss": 0.0492, "lr": "4.866e-05", "step": 3663, "steps": "23.13s,3663/33195" }, { "epoch": 0.5518903449314656, "eta": "189:44:12", "grad_norm": 0.0067, "loss": 0.0453, "lr": "4.866e-05", "step": 3664, "steps": "23.13s,3664/33195" }, { "epoch": 0.5520409700256063, "eta": "193:10:31", "grad_norm": 0.006, "loss": 0.0671, "lr": "4.866e-05", "step": 3665, "steps": "23.55s,3665/33195" }, { "epoch": 0.5521915951197469, "eta": "189:28:39", "grad_norm": 0.0069, "loss": 0.0541, "lr": "4.866e-05", "step": 3666, "steps": "23.1s,3666/33195" }, { "epoch": 0.5523422202138877, "eta": "189:13:30", "grad_norm": 0.0069, "loss": 0.0556, "lr": "4.866e-05", "step": 3667, "steps": "23.07s,3667/33195" }, { "epoch": 0.5524928453080283, "eta": "189:22:58", "grad_norm": 0.0179, "loss": 0.0415, "lr": "4.866e-05", "step": 3668, "steps": "23.09s,3668/33195" }, { "epoch": 0.552643470402169, "eta": "192:19:44", "grad_norm": 0.0101, "loss": 0.053, "lr": "4.866e-05", "step": 3669, "steps": "23.45s,3669/33195" }, { "epoch": 0.5527940954963096, "eta": "192:29:11", "grad_norm": 0.0076, "loss": 0.0532, "lr": "4.866e-05", "step": 3670, "steps": "23.47s,3670/33195" }, { "epoch": 0.5529447205904504, "eta": "190:11:01", "grad_norm": 0.0084, "loss": 0.0333, "lr": "4.865e-05", "step": 3671, "steps": "23.19s,3671/33195" }, { "epoch": 0.5530953456845911, "eta": "189:11:35", "grad_norm": 0.0072, "loss": 0.072, "lr": "4.865e-05", "step": 3672, "steps": "23.07s,3672/33195" }, { "epoch": 0.5532459707787317, "eta": "191:58:30", "grad_norm": 0.0065, "loss": 0.0516, "lr": "4.865e-05", "step": 3673, "steps": "23.41s,3673/33195" }, { "epoch": 0.5533965958728724, "eta": "189:00:59", "grad_norm": 0.009, "loss": 0.0575, "lr": "4.865e-05", "step": 3674, "steps": "23.05s,3674/33195" }, { "epoch": 0.5535472209670131, "eta": "192:42:00", "grad_norm": 0.0095, "loss": 0.0454, "lr": "4.865e-05", "step": 3675, "steps": "23.5s,3675/33195" }, { "epoch": 0.5536978460611538, "eta": "192:02:14", "grad_norm": 0.0103, "loss": 0.025, "lr": "4.865e-05", "step": 3676, "steps": "23.42s,3676/33195" }, { "epoch": 0.5538484711552945, "eta": "189:09:40", "grad_norm": 0.006, "loss": 0.0372, "lr": "4.865e-05", "step": 3677, "steps": "23.07s,3677/33195" }, { "epoch": 0.5539990962494351, "eta": "192:06:23", "grad_norm": 0.0083, "loss": 0.0615, "lr": "4.865e-05", "step": 3678, "steps": "23.43s,3678/33195" }, { "epoch": 0.5541497213435759, "eta": "191:06:57", "grad_norm": 0.0093, "loss": 0.0496, "lr": "4.865e-05", "step": 3679, "steps": "23.31s,3679/33195" }, { "epoch": 0.5543003464377165, "eta": "189:57:42", "grad_norm": 0.0065, "loss": 0.0691, "lr": "4.865e-05", "step": 3680, "steps": "23.17s,3680/33195" }, { "epoch": 0.5544509715318572, "eta": "188:14:01", "grad_norm": 0.007, "loss": 0.0569, "lr": "4.865e-05", "step": 3681, "steps": "22.96s,3681/33195" }, { "epoch": 0.5546015966259978, "eta": "189:22:30", "grad_norm": 0.0102, "loss": 0.0599, "lr": "4.865e-05", "step": 3682, "steps": "23.1s,3682/33195" }, { "epoch": 0.5547522217201386, "eta": "188:52:36", "grad_norm": 0.0073, "loss": 0.0663, "lr": "4.865e-05", "step": 3683, "steps": "23.04s,3683/33195" }, { "epoch": 0.5549028468142793, "eta": "187:23:41", "grad_norm": 0.0064, "loss": 0.0587, "lr": "4.864e-05", "step": 3684, "steps": "22.86s,3684/33195" }, { "epoch": 0.5550534719084199, "eta": "191:43:58", "grad_norm": 0.0145, "loss": 0.0477, "lr": "4.864e-05", "step": 3685, "steps": "23.39s,3685/33195" }, { "epoch": 0.5552040970025607, "eta": "188:56:22", "grad_norm": 0.0055, "loss": 0.0584, "lr": "4.864e-05", "step": 3686, "steps": "23.05s,3686/33195" }, { "epoch": 0.5553547220967013, "eta": "189:15:39", "grad_norm": 0.0085, "loss": 0.0401, "lr": "4.864e-05", "step": 3687, "steps": "23.09s,3687/33195" }, { "epoch": 0.555505347190842, "eta": "189:15:16", "grad_norm": 0.0073, "loss": 0.0428, "lr": "4.864e-05", "step": 3688, "steps": "23.09s,3688/33195" }, { "epoch": 0.5556559722849826, "eta": "189:00:08", "grad_norm": 0.0084, "loss": 0.0344, "lr": "4.864e-05", "step": 3689, "steps": "23.06s,3689/33195" }, { "epoch": 0.5558065973791234, "eta": "191:46:57", "grad_norm": 0.0068, "loss": 0.0432, "lr": "4.864e-05", "step": 3690, "steps": "23.4s,3690/33195" }, { "epoch": 0.5559572224732641, "eta": "191:41:38", "grad_norm": 0.0106, "loss": 0.0274, "lr": "4.864e-05", "step": 3691, "steps": "23.39s,3691/33195" }, { "epoch": 0.5561078475674047, "eta": "188:34:24", "grad_norm": 0.0073, "loss": 0.0492, "lr": "4.864e-05", "step": 3692, "steps": "23.01s,3692/33195" }, { "epoch": 0.5562584726615454, "eta": "189:52:41", "grad_norm": 0.0084, "loss": 0.0577, "lr": "4.864e-05", "step": 3693, "steps": "23.17s,3693/33195" }, { "epoch": 0.5564090977556861, "eta": "191:45:23", "grad_norm": 0.0089, "loss": 0.0565, "lr": "4.864e-05", "step": 3694, "steps": "23.4s,3694/33195" }, { "epoch": 0.5565597228498268, "eta": "188:48:00", "grad_norm": 0.0066, "loss": 0.0781, "lr": "4.864e-05", "step": 3695, "steps": "23.04s,3695/33195" }, { "epoch": 0.5567103479439675, "eta": "195:30:46", "grad_norm": 0.0065, "loss": 0.0585, "lr": "4.864e-05", "step": 3696, "steps": "23.86s,3696/33195" }, { "epoch": 0.5568609730381081, "eta": "212:42:48", "grad_norm": 0.0069, "loss": 0.0572, "lr": "4.863e-05", "step": 3697, "steps": "25.96s,3697/33195" }, { "epoch": 0.5570115981322489, "eta": "292:11:03", "grad_norm": 0.0079, "loss": 0.0527, "lr": "4.863e-05", "step": 3698, "steps": "35.66s,3698/33195" }, { "epoch": 0.5571622232263895, "eta": "225:04:15", "grad_norm": 0.0087, "loss": 0.0548, "lr": "4.863e-05", "step": 3699, "steps": "27.47s,3699/33195" }, { "epoch": 0.5573128483205302, "eta": "200:43:47", "grad_norm": 0.0083, "loss": 0.0384, "lr": "4.863e-05", "step": 3700, "steps": "24.5s,3700/33195" }, { "epoch": 0.5574634734146708, "eta": "190:38:45", "grad_norm": 0.0063, "loss": 0.0665, "lr": "4.863e-05", "step": 3701, "steps": "23.27s,3701/33195" }, { "epoch": 0.5576140985088116, "eta": "193:15:39", "grad_norm": 0.0062, "loss": 0.0555, "lr": "4.863e-05", "step": 3702, "steps": "23.59s,3702/33195" }, { "epoch": 0.5577647236029523, "eta": "189:43:54", "grad_norm": 0.0076, "loss": 0.0677, "lr": "4.863e-05", "step": 3703, "steps": "23.16s,3703/33195" }, { "epoch": 0.5579153486970929, "eta": "196:11:49", "grad_norm": 0.0129, "loss": 0.0551, "lr": "4.863e-05", "step": 3704, "steps": "23.95s,3704/33195" }, { "epoch": 0.5580659737912336, "eta": "188:44:09", "grad_norm": 0.007, "loss": 0.0459, "lr": "4.863e-05", "step": 3705, "steps": "23.04s,3705/33195" }, { "epoch": 0.5582165988853743, "eta": "187:00:33", "grad_norm": 0.0074, "loss": 0.0525, "lr": "4.863e-05", "step": 3706, "steps": "22.83s,3706/33195" }, { "epoch": 0.558367223979515, "eta": "192:24:33", "grad_norm": 0.0089, "loss": 0.0614, "lr": "4.863e-05", "step": 3707, "steps": "23.49s,3707/33195" }, { "epoch": 0.5585178490736556, "eta": "193:03:28", "grad_norm": 0.0078, "loss": 0.0522, "lr": "4.863e-05", "step": 3708, "steps": "23.57s,3708/33195" }, { "epoch": 0.5586684741677963, "eta": "189:26:51", "grad_norm": 0.0073, "loss": 0.0456, "lr": "4.863e-05", "step": 3709, "steps": "23.13s,3709/33195" }, { "epoch": 0.5588190992619371, "eta": "187:53:05", "grad_norm": 0.0051, "loss": 0.0535, "lr": "4.862e-05", "step": 3710, "steps": "22.94s,3710/33195" }, { "epoch": 0.5589697243560777, "eta": "189:55:33", "grad_norm": 0.0063, "loss": 0.0419, "lr": "4.862e-05", "step": 3711, "steps": "23.19s,3711/33195" }, { "epoch": 0.5591203494502184, "eta": "189:40:26", "grad_norm": 0.0065, "loss": 0.0518, "lr": "4.862e-05", "step": 3712, "steps": "23.16s,3712/33195" }, { "epoch": 0.5592709745443591, "eta": "192:12:22", "grad_norm": 0.0064, "loss": 0.0776, "lr": "4.862e-05", "step": 3713, "steps": "23.47s,3713/33195" }, { "epoch": 0.5594215996384998, "eta": "189:49:29", "grad_norm": 0.009, "loss": 0.0493, "lr": "4.862e-05", "step": 3714, "steps": "23.18s,3714/33195" }, { "epoch": 0.5595722247326405, "eta": "192:06:40", "grad_norm": 0.0076, "loss": 0.0335, "lr": "4.862e-05", "step": 3715, "steps": "23.46s,3715/33195" }, { "epoch": 0.5597228498267811, "eta": "186:51:50", "grad_norm": 0.0059, "loss": 0.0532, "lr": "4.862e-05", "step": 3716, "steps": "22.82s,3716/33195" }, { "epoch": 0.5598734749209219, "eta": "191:31:30", "grad_norm": 0.0079, "loss": 0.0672, "lr": "4.862e-05", "step": 3717, "steps": "23.39s,3717/33195" }, { "epoch": 0.5600241000150625, "eta": "189:33:12", "grad_norm": 0.0074, "loss": 0.0734, "lr": "4.862e-05", "step": 3718, "steps": "23.15s,3718/33195" }, { "epoch": 0.5601747251092032, "eta": "189:47:33", "grad_norm": 0.0075, "loss": 0.0493, "lr": "4.862e-05", "step": 3719, "steps": "23.18s,3719/33195" }, { "epoch": 0.5603253502033438, "eta": "190:01:54", "grad_norm": 0.0055, "loss": 0.0457, "lr": "4.862e-05", "step": 3720, "steps": "23.21s,3720/33195" }, { "epoch": 0.5604759752974846, "eta": "188:52:45", "grad_norm": 0.006, "loss": 0.0624, "lr": "4.862e-05", "step": 3721, "steps": "23.07s,3721/33195" }, { "epoch": 0.5606266003916253, "eta": "189:16:55", "grad_norm": 0.013, "loss": 0.0346, "lr": "4.862e-05", "step": 3722, "steps": "23.12s,3722/33195" }, { "epoch": 0.5607772254857659, "eta": "189:31:16", "grad_norm": 0.0062, "loss": 0.0507, "lr": "4.861e-05", "step": 3723, "steps": "23.15s,3723/33195" }, { "epoch": 0.5609278505799066, "eta": "190:15:06", "grad_norm": 0.0063, "loss": 0.0493, "lr": "4.861e-05", "step": 3724, "steps": "23.24s,3724/33195" }, { "epoch": 0.5610784756740473, "eta": "191:52:56", "grad_norm": 0.0074, "loss": 0.0511, "lr": "4.861e-05", "step": 3725, "steps": "23.44s,3725/33195" }, { "epoch": 0.561229100768188, "eta": "191:27:59", "grad_norm": 0.0064, "loss": 0.0565, "lr": "4.861e-05", "step": 3726, "steps": "23.39s,3726/33195" }, { "epoch": 0.5613797258623286, "eta": "189:19:54", "grad_norm": 0.005, "loss": 0.0634, "lr": "4.861e-05", "step": 3727, "steps": "23.13s,3727/33195" }, { "epoch": 0.5615303509564693, "eta": "190:03:43", "grad_norm": 0.0072, "loss": 0.0445, "lr": "4.861e-05", "step": 3728, "steps": "23.22s,3728/33195" }, { "epoch": 0.5616809760506101, "eta": "189:48:36", "grad_norm": 0.0109, "loss": 0.0316, "lr": "4.861e-05", "step": 3729, "steps": "23.19s,3729/33195" }, { "epoch": 0.5618316011447507, "eta": "191:36:15", "grad_norm": 0.0075, "loss": 0.0498, "lr": "4.861e-05", "step": 3730, "steps": "23.41s,3730/33195" }, { "epoch": 0.5619822262388914, "eta": "189:33:06", "grad_norm": 0.0057, "loss": 0.0451, "lr": "4.861e-05", "step": 3731, "steps": "23.16s,3731/33195" }, { "epoch": 0.562132851333032, "eta": "191:50:12", "grad_norm": 0.0063, "loss": 0.0837, "lr": "4.861e-05", "step": 3732, "steps": "23.44s,3732/33195" }, { "epoch": 0.5622834764271728, "eta": "188:48:08", "grad_norm": 0.0069, "loss": 0.0555, "lr": "4.861e-05", "step": 3733, "steps": "23.07s,3733/33195" }, { "epoch": 0.5624341015213135, "eta": "192:23:48", "grad_norm": 0.0056, "loss": 0.057, "lr": "4.861e-05", "step": 3734, "steps": "23.51s,3734/33195" }, { "epoch": 0.5625847266154541, "eta": "189:46:17", "grad_norm": 0.0066, "loss": 0.0402, "lr": "4.861e-05", "step": 3735, "steps": "23.19s,3735/33195" }, { "epoch": 0.5627353517095948, "eta": "188:27:20", "grad_norm": 0.0074, "loss": 0.0428, "lr": "4.860e-05", "step": 3736, "steps": "23.03s,3736/33195" }, { "epoch": 0.5628859768037355, "eta": "191:43:20", "grad_norm": 0.0064, "loss": 0.0589, "lr": "4.860e-05", "step": 3737, "steps": "23.43s,3737/33195" }, { "epoch": 0.5630366018978762, "eta": "189:15:40", "grad_norm": 0.0063, "loss": 0.0479, "lr": "4.860e-05", "step": 3738, "steps": "23.13s,3738/33195" }, { "epoch": 0.5631872269920168, "eta": "191:47:28", "grad_norm": 0.0066, "loss": 0.0479, "lr": "4.860e-05", "step": 3739, "steps": "23.44s,3739/33195" }, { "epoch": 0.5633378520861576, "eta": "188:45:26", "grad_norm": 0.0073, "loss": 0.0459, "lr": "4.860e-05", "step": 3740, "steps": "23.07s,3740/33195" }, { "epoch": 0.5634884771802983, "eta": "192:16:09", "grad_norm": 0.0074, "loss": 0.0408, "lr": "4.860e-05", "step": 3741, "steps": "23.5s,3741/33195" }, { "epoch": 0.5636391022744389, "eta": "190:17:56", "grad_norm": 0.0067, "loss": 0.045, "lr": "4.860e-05", "step": 3742, "steps": "23.26s,3742/33195" }, { "epoch": 0.5637897273685796, "eta": "188:49:12", "grad_norm": 0.0068, "loss": 0.0565, "lr": "4.860e-05", "step": 3743, "steps": "23.08s,3743/33195" }, { "epoch": 0.5639403524627203, "eta": "191:50:25", "grad_norm": 0.0069, "loss": 0.0624, "lr": "4.860e-05", "step": 3744, "steps": "23.45s,3744/33195" }, { "epoch": 0.564090977556861, "eta": "189:57:09", "grad_norm": 0.0058, "loss": 0.0546, "lr": "4.860e-05", "step": 3745, "steps": "23.22s,3745/33195" }, { "epoch": 0.5642416026510017, "eta": "190:21:18", "grad_norm": 0.0079, "loss": 0.0404, "lr": "4.860e-05", "step": 3746, "steps": "23.27s,3746/33195" }, { "epoch": 0.5643922277451423, "eta": "191:49:15", "grad_norm": 0.0057, "loss": 0.0674, "lr": "4.860e-05", "step": 3747, "steps": "23.45s,3747/33195" }, { "epoch": 0.5645428528392831, "eta": "191:53:46", "grad_norm": 0.0063, "loss": 0.0525, "lr": "4.859e-05", "step": 3748, "steps": "23.46s,3748/33195" }, { "epoch": 0.5646934779334237, "eta": "192:27:44", "grad_norm": 0.0073, "loss": 0.0581, "lr": "4.859e-05", "step": 3749, "steps": "23.53s,3749/33195" }, { "epoch": 0.5648441030275644, "eta": "191:33:21", "grad_norm": 0.0055, "loss": 0.057, "lr": "4.859e-05", "step": 3750, "steps": "23.42s,3750/33195" }, { "epoch": 0.564994728121705, "eta": "189:20:28", "grad_norm": 0.0066, "loss": 0.0514, "lr": "4.859e-05", "step": 3751, "steps": "23.15s,3751/33195" }, { "epoch": 0.5651453532158458, "eta": "186:43:03", "grad_norm": 0.0069, "loss": 0.0481, "lr": "4.859e-05", "step": 3752, "steps": "22.83s,3752/33195" }, { "epoch": 0.5652959783099865, "eta": "191:42:00", "grad_norm": 0.0095, "loss": 0.0613, "lr": "4.859e-05", "step": 3753, "steps": "23.44s,3753/33195" }, { "epoch": 0.5654466034041271, "eta": "191:51:25", "grad_norm": 0.0074, "loss": 0.032, "lr": "4.859e-05", "step": 3754, "steps": "23.46s,3754/33195" }, { "epoch": 0.5655972284982678, "eta": "192:20:28", "grad_norm": 0.007, "loss": 0.0504, "lr": "4.859e-05", "step": 3755, "steps": "23.52s,3755/33195" }, { "epoch": 0.5657478535924085, "eta": "191:40:50", "grad_norm": 0.0077, "loss": 0.0456, "lr": "4.859e-05", "step": 3756, "steps": "23.44s,3756/33195" }, { "epoch": 0.5658984786865492, "eta": "188:29:05", "grad_norm": 0.0075, "loss": 0.0641, "lr": "4.859e-05", "step": 3757, "steps": "23.05s,3757/33195" }, { "epoch": 0.5660491037806898, "eta": "194:26:51", "grad_norm": 0.0058, "loss": 0.0547, "lr": "4.859e-05", "step": 3758, "steps": "23.78s,3758/33195" }, { "epoch": 0.5661997288748305, "eta": "192:23:49", "grad_norm": 0.006, "loss": 0.0428, "lr": "4.859e-05", "step": 3759, "steps": "23.53s,3759/33195" }, { "epoch": 0.5663503539689713, "eta": "191:49:05", "grad_norm": 0.0062, "loss": 0.0763, "lr": "4.859e-05", "step": 3760, "steps": "23.46s,3760/33195" }, { "epoch": 0.5665009790631119, "eta": "188:47:11", "grad_norm": 0.0061, "loss": 0.04, "lr": "4.858e-05", "step": 3761, "steps": "23.09s,3761/33195" }, { "epoch": 0.5666516041572526, "eta": "189:06:25", "grad_norm": 0.0062, "loss": 0.0746, "lr": "4.858e-05", "step": 3762, "steps": "23.13s,3762/33195" }, { "epoch": 0.5668022292513932, "eta": "192:22:14", "grad_norm": 0.0075, "loss": 0.0538, "lr": "4.858e-05", "step": 3763, "steps": "23.53s,3763/33195" }, { "epoch": 0.566952854345534, "eta": "188:26:24", "grad_norm": 0.0066, "loss": 0.0555, "lr": "4.858e-05", "step": 3764, "steps": "23.05s,3764/33195" }, { "epoch": 0.5671034794396747, "eta": "192:41:05", "grad_norm": 0.0082, "loss": 0.0597, "lr": "4.858e-05", "step": 3765, "steps": "23.57s,3765/33195" }, { "epoch": 0.5672541045338153, "eta": "192:16:10", "grad_norm": 0.007, "loss": 0.0595, "lr": "4.858e-05", "step": 3766, "steps": "23.52s,3766/33195" }, { "epoch": 0.5674047296279561, "eta": "192:05:58", "grad_norm": 0.0082, "loss": 0.0615, "lr": "4.858e-05", "step": 3767, "steps": "23.5s,3767/33195" }, { "epoch": 0.5675553547220967, "eta": "193:58:22", "grad_norm": 0.0071, "loss": 0.0635, "lr": "4.858e-05", "step": 3768, "steps": "23.73s,3768/33195" }, { "epoch": 0.5677059798162374, "eta": "191:50:28", "grad_norm": 0.0057, "loss": 0.058, "lr": "4.858e-05", "step": 3769, "steps": "23.47s,3769/33195" }, { "epoch": 0.567856604910378, "eta": "192:04:47", "grad_norm": 0.0075, "loss": 0.0755, "lr": "4.858e-05", "step": 3770, "steps": "23.5s,3770/33195" }, { "epoch": 0.5680072300045188, "eta": "191:05:33", "grad_norm": 0.0089, "loss": 0.0634, "lr": "4.858e-05", "step": 3771, "steps": "23.38s,3771/33195" }, { "epoch": 0.5681578550986595, "eta": "188:03:43", "grad_norm": 0.0072, "loss": 0.0669, "lr": "4.858e-05", "step": 3772, "steps": "23.01s,3772/33195" }, { "epoch": 0.5683084801928001, "eta": "192:03:37", "grad_norm": 0.008, "loss": 0.0502, "lr": "4.858e-05", "step": 3773, "steps": "23.5s,3773/33195" }, { "epoch": 0.5684591052869408, "eta": "191:38:42", "grad_norm": 0.0059, "loss": 0.0571, "lr": "4.857e-05", "step": 3774, "steps": "23.45s,3774/33195" }, { "epoch": 0.5686097303810815, "eta": "189:25:55", "grad_norm": 0.0082, "loss": 0.0523, "lr": "4.857e-05", "step": 3775, "steps": "23.18s,3775/33195" }, { "epoch": 0.5687603554752222, "eta": "189:25:32", "grad_norm": 0.0085, "loss": 0.0704, "lr": "4.857e-05", "step": 3776, "steps": "23.18s,3776/33195" }, { "epoch": 0.5689109805693628, "eta": "191:27:43", "grad_norm": 0.0071, "loss": 0.0263, "lr": "4.857e-05", "step": 3777, "steps": "23.43s,3777/33195" }, { "epoch": 0.5690616056635035, "eta": "188:11:13", "grad_norm": 0.0135, "loss": 0.0468, "lr": "4.857e-05", "step": 3778, "steps": "23.03s,3778/33195" }, { "epoch": 0.5692122307576443, "eta": "191:51:27", "grad_norm": 0.0084, "loss": 0.0621, "lr": "4.857e-05", "step": 3779, "steps": "23.48s,3779/33195" }, { "epoch": 0.5693628558517849, "eta": "190:32:37", "grad_norm": 0.0076, "loss": 0.0404, "lr": "4.857e-05", "step": 3780, "steps": "23.32s,3780/33195" }, { "epoch": 0.5695134809459256, "eta": "190:51:51", "grad_norm": 0.0089, "loss": 0.047, "lr": "4.857e-05", "step": 3781, "steps": "23.36s,3781/33195" }, { "epoch": 0.5696641060400662, "eta": "189:33:01", "grad_norm": 0.0107, "loss": 0.0314, "lr": "4.857e-05", "step": 3782, "steps": "23.2s,3782/33195" }, { "epoch": 0.569814731134207, "eta": "191:59:42", "grad_norm": 0.0101, "loss": 0.0632, "lr": "4.857e-05", "step": 3783, "steps": "23.5s,3783/33195" }, { "epoch": 0.5699653562283477, "eta": "188:33:25", "grad_norm": 0.009, "loss": 0.052, "lr": "4.857e-05", "step": 3784, "steps": "23.08s,3784/33195" }, { "epoch": 0.5701159813224883, "eta": "190:50:17", "grad_norm": 0.0079, "loss": 0.0507, "lr": "4.857e-05", "step": 3785, "steps": "23.36s,3785/33195" }, { "epoch": 0.570266606416629, "eta": "189:36:22", "grad_norm": 0.007, "loss": 0.0617, "lr": "4.856e-05", "step": 3786, "steps": "23.21s,3786/33195" }, { "epoch": 0.5704172315107697, "eta": "188:51:52", "grad_norm": 0.0082, "loss": 0.0472, "lr": "4.856e-05", "step": 3787, "steps": "23.12s,3787/33195" }, { "epoch": 0.5705678566049104, "eta": "191:38:08", "grad_norm": 0.0087, "loss": 0.0708, "lr": "4.856e-05", "step": 3788, "steps": "23.46s,3788/33195" }, { "epoch": 0.570718481699051, "eta": "190:48:44", "grad_norm": 0.0061, "loss": 0.0576, "lr": "4.856e-05", "step": 3789, "steps": "23.36s,3789/33195" }, { "epoch": 0.5708691067931917, "eta": "188:40:55", "grad_norm": 0.0053, "loss": 0.0442, "lr": "4.856e-05", "step": 3790, "steps": "23.1s,3790/33195" }, { "epoch": 0.5710197318873325, "eta": "189:44:14", "grad_norm": 0.0082, "loss": 0.0347, "lr": "4.856e-05", "step": 3791, "steps": "23.23s,3791/33195" }, { "epoch": 0.5711703569814731, "eta": "188:49:57", "grad_norm": 0.0086, "loss": 0.0393, "lr": "4.856e-05", "step": 3792, "steps": "23.12s,3792/33195" }, { "epoch": 0.5713209820756138, "eta": "187:21:21", "grad_norm": 0.0055, "loss": 0.0699, "lr": "4.856e-05", "step": 3793, "steps": "22.94s,3793/33195" }, { "epoch": 0.5714716071697545, "eta": "188:19:47", "grad_norm": 0.0069, "loss": 0.0559, "lr": "4.856e-05", "step": 3794, "steps": "23.06s,3794/33195" }, { "epoch": 0.5716222322638952, "eta": "190:56:12", "grad_norm": 0.0053, "loss": 0.0495, "lr": "4.856e-05", "step": 3795, "steps": "23.38s,3795/33195" }, { "epoch": 0.5717728573580358, "eta": "188:38:36", "grad_norm": 0.0081, "loss": 0.0473, "lr": "4.856e-05", "step": 3796, "steps": "23.1s,3796/33195" }, { "epoch": 0.5719234824521765, "eta": "188:18:37", "grad_norm": 0.0072, "loss": 0.0495, "lr": "4.856e-05", "step": 3797, "steps": "23.06s,3797/33195" }, { "epoch": 0.5720741075463173, "eta": "188:18:14", "grad_norm": 0.0057, "loss": 0.0764, "lr": "4.856e-05", "step": 3798, "steps": "23.06s,3798/33195" }, { "epoch": 0.5722247326404579, "eta": "188:47:15", "grad_norm": 0.0117, "loss": 0.0694, "lr": "4.855e-05", "step": 3799, "steps": "23.12s,3799/33195" }, { "epoch": 0.5723753577345986, "eta": "187:52:58", "grad_norm": 0.0059, "loss": 0.054, "lr": "4.855e-05", "step": 3800, "steps": "23.01s,3800/33195" }, { "epoch": 1.1452244652003616, "eta": "00:00:00", "grad_norm": 0.0056, "loss": 0.0512, "lr": "4.855e-05", "step": 3801, "steps": "0s,3801/16595" }, { "epoch": 1.1455257607713167, "eta": "124:58:49", "grad_norm": 0.006, "loss": 0.0549, "lr": "4.431e-05", "step": 3802, "steps": "35.17s,3802/16595" }, { "epoch": 1.1458270563422719, "eta": "117:30:31", "grad_norm": 0.0052, "loss": 0.0388, "lr": "4.431e-05", "step": 3803, "steps": "33.07s,3803/16595" }, { "epoch": 1.1461283519132268, "eta": "103:25:46", "grad_norm": 0.007, "loss": 0.0479, "lr": "4.430e-05", "step": 3804, "steps": "29.11s,3804/16595" }, { "epoch": 1.146429647484182, "eta": "92:54:18", "grad_norm": 0.0051, "loss": 0.0594, "lr": "4.430e-05", "step": 3805, "steps": "26.15s,3805/16595" }, { "epoch": 1.1467309430551371, "eta": "97:03:15", "grad_norm": 0.0067, "loss": 0.0564, "lr": "4.430e-05", "step": 3806, "steps": "27.32s,3806/16595" }, { "epoch": 1.1470322386260923, "eta": "96:50:00", "grad_norm": 0.0049, "loss": 0.0407, "lr": "4.429e-05", "step": 3807, "steps": "27.26s,3807/16595" }, { "epoch": 1.1473335341970472, "eta": "92:29:33", "grad_norm": 0.0057, "loss": 0.0366, "lr": "4.429e-05", "step": 3808, "steps": "26.04s,3808/16595" }, { "epoch": 1.1476348297680024, "eta": "102:21:32", "grad_norm": 0.0053, "loss": 0.04, "lr": "4.429e-05", "step": 3809, "steps": "28.82s,3809/16595" }, { "epoch": 1.1479361253389575, "eta": "86:26:26", "grad_norm": 0.0049, "loss": 0.0693, "lr": "4.429e-05", "step": 3810, "steps": "24.34s,3810/16595" }, { "epoch": 1.1482374209099127, "eta": "86:45:13", "grad_norm": 0.005, "loss": 0.0481, "lr": "4.428e-05", "step": 3811, "steps": "24.43s,3811/16595" }, { "epoch": 1.1485387164808678, "eta": "86:12:51", "grad_norm": 0.0058, "loss": 0.0536, "lr": "4.428e-05", "step": 3812, "steps": "24.28s,3812/16595" }, { "epoch": 1.148840012051823, "eta": "88:52:13", "grad_norm": 0.0054, "loss": 0.0571, "lr": "4.428e-05", "step": 3813, "steps": "25.03s,3813/16595" }, { "epoch": 1.1491413076227779, "eta": "86:24:49", "grad_norm": 0.0052, "loss": 0.0648, "lr": "4.427e-05", "step": 3814, "steps": "24.34s,3814/16595" }, { "epoch": 1.149442603193733, "eta": "86:03:07", "grad_norm": 0.0053, "loss": 0.0656, "lr": "4.427e-05", "step": 3815, "steps": "24.24s,3815/16595" }, { "epoch": 1.1497438987646882, "eta": "84:41:46", "grad_norm": 0.0058, "loss": 0.0554, "lr": "4.427e-05", "step": 3816, "steps": "23.86s,3816/16595" }, { "epoch": 1.1500451943356433, "eta": "85:51:39", "grad_norm": 0.0051, "loss": 0.0395, "lr": "4.426e-05", "step": 3817, "steps": "24.19s,3817/16595" }, { "epoch": 1.1503464899065983, "eta": "87:18:34", "grad_norm": 0.0052, "loss": 0.0533, "lr": "4.426e-05", "step": 3818, "steps": "24.6s,3818/16595" }, { "epoch": 1.1506477854775534, "eta": "86:03:38", "grad_norm": 0.0107, "loss": 0.0484, "lr": "4.426e-05", "step": 3819, "steps": "24.25s,3819/16595" }, { "epoch": 1.1509490810485086, "eta": "85:20:38", "grad_norm": 0.0056, "loss": 0.0482, "lr": "4.425e-05", "step": 3820, "steps": "24.05s,3820/16595" }, { "epoch": 1.1512503766194637, "eta": "85:39:24", "grad_norm": 0.0055, "loss": 0.0504, "lr": "4.425e-05", "step": 3821, "steps": "24.14s,3821/16595" }, { "epoch": 1.1515516721904189, "eta": "85:43:15", "grad_norm": 0.0055, "loss": 0.0554, "lr": "4.425e-05", "step": 3822, "steps": "24.16s,3822/16595" }, { "epoch": 1.151852967761374, "eta": "83:47:54", "grad_norm": 0.004, "loss": 0.0587, "lr": "4.425e-05", "step": 3823, "steps": "23.62s,3823/16595" }, { "epoch": 1.152154263332329, "eta": "84:27:57", "grad_norm": 0.0057, "loss": 0.0452, "lr": "4.424e-05", "step": 3824, "steps": "23.81s,3824/16595" }, { "epoch": 1.152455558903284, "eta": "84:19:02", "grad_norm": 0.0051, "loss": 0.0448, "lr": "4.424e-05", "step": 3825, "steps": "23.77s,3825/16595" }, { "epoch": 1.1527568544742393, "eta": "83:53:06", "grad_norm": 0.0052, "loss": 0.0653, "lr": "4.424e-05", "step": 3826, "steps": "23.65s,3826/16595" }, { "epoch": 1.1530581500451944, "eta": "84:01:13", "grad_norm": 0.0068, "loss": 0.0511, "lr": "4.423e-05", "step": 3827, "steps": "23.69s,3827/16595" }, { "epoch": 1.1533594456161493, "eta": "84:43:23", "grad_norm": 0.0123, "loss": 0.0387, "lr": "4.423e-05", "step": 3828, "steps": "23.89s,3828/16595" }, { "epoch": 1.1536607411871045, "eta": "86:18:44", "grad_norm": 0.0044, "loss": 0.0737, "lr": "4.423e-05", "step": 3829, "steps": "24.34s,3829/16595" }, { "epoch": 1.1539620367580596, "eta": "83:47:16", "grad_norm": 0.0053, "loss": 0.053, "lr": "4.422e-05", "step": 3830, "steps": "23.63s,3830/16595" }, { "epoch": 1.1542633323290148, "eta": "84:06:02", "grad_norm": 0.006, "loss": 0.0494, "lr": "4.422e-05", "step": 3831, "steps": "23.72s,3831/16595" }, { "epoch": 1.15456462789997, "eta": "83:59:15", "grad_norm": 0.0058, "loss": 0.0492, "lr": "4.422e-05", "step": 3832, "steps": "23.69s,3832/16595" }, { "epoch": 1.154865923470925, "eta": "85:53:43", "grad_norm": 0.0055, "loss": 0.0568, "lr": "4.422e-05", "step": 3833, "steps": "24.23s,3833/16595" }, { "epoch": 1.15516721904188, "eta": "84:51:38", "grad_norm": 0.0045, "loss": 0.0604, "lr": "4.421e-05", "step": 3834, "steps": "23.94s,3834/16595" }, { "epoch": 1.1554685146128352, "eta": "83:58:04", "grad_norm": 0.0056, "loss": 0.0448, "lr": "4.421e-05", "step": 3835, "steps": "23.69s,3835/16595" }, { "epoch": 1.1557698101837903, "eta": "85:12:06", "grad_norm": 0.0056, "loss": 0.0608, "lr": "4.421e-05", "step": 3836, "steps": "24.04s,3836/16595" }, { "epoch": 1.1560711057547455, "eta": "85:28:42", "grad_norm": 0.0054, "loss": 0.0572, "lr": "4.420e-05", "step": 3837, "steps": "24.12s,3837/16595" }, { "epoch": 1.1563724013257004, "eta": "84:58:32", "grad_norm": 0.0051, "loss": 0.0521, "lr": "4.420e-05", "step": 3838, "steps": "23.98s,3838/16595" }, { "epoch": 1.1566736968966556, "eta": "84:15:37", "grad_norm": 0.005, "loss": 0.0503, "lr": "4.420e-05", "step": 3839, "steps": "23.78s,3839/16595" }, { "epoch": 1.1569749924676107, "eta": "83:58:13", "grad_norm": 0.0055, "loss": 0.0585, "lr": "4.419e-05", "step": 3840, "steps": "23.7s,3840/16595" }, { "epoch": 1.1572762880385659, "eta": "83:59:57", "grad_norm": 0.004, "loss": 0.0569, "lr": "4.419e-05", "step": 3841, "steps": "23.71s,3841/16595" }, { "epoch": 1.157577583609521, "eta": "84:54:49", "grad_norm": 0.0048, "loss": 0.0551, "lr": "4.419e-05", "step": 3842, "steps": "23.97s,3842/16595" }, { "epoch": 1.1578788791804762, "eta": "85:13:33", "grad_norm": 0.005, "loss": 0.0815, "lr": "4.418e-05", "step": 3843, "steps": "24.06s,3843/16595" }, { "epoch": 1.158180174751431, "eta": "84:09:23", "grad_norm": 0.005, "loss": 0.0478, "lr": "4.418e-05", "step": 3844, "steps": "23.76s,3844/16595" }, { "epoch": 1.1584814703223862, "eta": "83:58:22", "grad_norm": 0.0056, "loss": 0.0633, "lr": "4.418e-05", "step": 3845, "steps": "23.71s,3845/16595" }, { "epoch": 1.1587827658933414, "eta": "84:31:58", "grad_norm": 0.0054, "loss": 0.053, "lr": "4.418e-05", "step": 3846, "steps": "23.87s,3846/16595" }, { "epoch": 1.1590840614642965, "eta": "83:53:20", "grad_norm": 0.0053, "loss": 0.0442, "lr": "4.417e-05", "step": 3847, "steps": "23.69s,3847/16595" }, { "epoch": 1.1593853570352515, "eta": "85:22:10", "grad_norm": 0.005, "loss": 0.047, "lr": "4.417e-05", "step": 3848, "steps": "24.11s,3848/16595" }, { "epoch": 1.1596866526062066, "eta": "85:04:46", "grad_norm": 0.0048, "loss": 0.0483, "lr": "4.417e-05", "step": 3849, "steps": "24.03s,3849/16595" }, { "epoch": 1.1599879481771618, "eta": "84:13:23", "grad_norm": 0.0056, "loss": 0.0499, "lr": "4.416e-05", "step": 3850, "steps": "23.79s,3850/16595" }, { "epoch": 1.160289243748117, "eta": "85:01:50", "grad_norm": 0.005, "loss": 0.0456, "lr": "4.416e-05", "step": 3851, "steps": "24.02s,3851/16595" }, { "epoch": 1.160590539319072, "eta": "85:03:34", "grad_norm": 0.0054, "loss": 0.0611, "lr": "4.416e-05", "step": 3852, "steps": "24.03s,3852/16595" }, { "epoch": 1.1608918348900272, "eta": "83:53:05", "grad_norm": 0.0045, "loss": 0.0537, "lr": "4.415e-05", "step": 3853, "steps": "23.7s,3853/16595" }, { "epoch": 1.1611931304609822, "eta": "83:42:04", "grad_norm": 0.0048, "loss": 0.0449, "lr": "4.415e-05", "step": 3854, "steps": "23.65s,3854/16595" }, { "epoch": 1.1614944260319373, "eta": "83:39:33", "grad_norm": 0.0049, "loss": 0.0499, "lr": "4.415e-05", "step": 3855, "steps": "23.64s,3855/16595" }, { "epoch": 1.1617957216028925, "eta": "85:14:42", "grad_norm": 0.0041, "loss": 0.0523, "lr": "4.414e-05", "step": 3856, "steps": "24.09s,3856/16595" }, { "epoch": 1.1620970171738476, "eta": "83:32:24", "grad_norm": 0.0068, "loss": 0.0473, "lr": "4.414e-05", "step": 3857, "steps": "23.61s,3857/16595" }, { "epoch": 1.1623983127448025, "eta": "83:36:15", "grad_norm": 0.0055, "loss": 0.0523, "lr": "4.414e-05", "step": 3858, "steps": "23.63s,3858/16595" }, { "epoch": 1.1626996083157577, "eta": "85:21:59", "grad_norm": 0.0049, "loss": 0.0589, "lr": "4.414e-05", "step": 3859, "steps": "24.13s,3859/16595" }, { "epoch": 1.1630009038867128, "eta": "84:37:01", "grad_norm": 0.0053, "loss": 0.0443, "lr": "4.413e-05", "step": 3860, "steps": "23.92s,3860/16595" }, { "epoch": 1.163302199457668, "eta": "83:49:55", "grad_norm": 0.0055, "loss": 0.0494, "lr": "4.413e-05", "step": 3861, "steps": "23.7s,3861/16595" }, { "epoch": 1.1636034950286231, "eta": "85:25:01", "grad_norm": 0.0046, "loss": 0.0494, "lr": "4.413e-05", "step": 3862, "steps": "24.15s,3862/16595" }, { "epoch": 1.1639047905995783, "eta": "84:27:20", "grad_norm": 0.0055, "loss": 0.0421, "lr": "4.412e-05", "step": 3863, "steps": "23.88s,3863/16595" }, { "epoch": 1.1642060861705332, "eta": "84:20:34", "grad_norm": 0.0052, "loss": 0.0439, "lr": "4.412e-05", "step": 3864, "steps": "23.85s,3864/16595" }, { "epoch": 1.1645073817414884, "eta": "83:39:51", "grad_norm": 0.0062, "loss": 0.0607, "lr": "4.412e-05", "step": 3865, "steps": "23.66s,3865/16595" }, { "epoch": 1.1648086773124435, "eta": "83:22:29", "grad_norm": 0.0058, "loss": 0.0462, "lr": "4.411e-05", "step": 3866, "steps": "23.58s,3866/16595" }, { "epoch": 1.1651099728833987, "eta": "84:17:15", "grad_norm": 0.0057, "loss": 0.034, "lr": "4.411e-05", "step": 3867, "steps": "23.84s,3867/16595" }, { "epoch": 1.1654112684543536, "eta": "85:39:35", "grad_norm": 0.0044, "loss": 0.0497, "lr": "4.411e-05", "step": 3868, "steps": "24.23s,3868/16595" }, { "epoch": 1.1657125640253088, "eta": "84:52:31", "grad_norm": 0.0053, "loss": 0.0585, "lr": "4.410e-05", "step": 3869, "steps": "24.01s,3869/16595" }, { "epoch": 1.166013859596264, "eta": "84:18:11", "grad_norm": 0.007, "loss": 0.0575, "lr": "4.410e-05", "step": 3870, "steps": "23.85s,3870/16595" }, { "epoch": 1.166315155167219, "eta": "84:13:32", "grad_norm": 0.0063, "loss": 0.0458, "lr": "4.410e-05", "step": 3871, "steps": "23.83s,3871/16595" }, { "epoch": 1.1666164507381742, "eta": "84:40:43", "grad_norm": 0.0054, "loss": 0.0451, "lr": "4.410e-05", "step": 3872, "steps": "23.96s,3872/16595" }, { "epoch": 1.1669177463091294, "eta": "83:51:33", "grad_norm": 0.0059, "loss": 0.0587, "lr": "4.409e-05", "step": 3873, "steps": "23.73s,3873/16595" }, { "epoch": 1.1672190418800843, "eta": "84:20:50", "grad_norm": 0.0079, "loss": 0.0661, "lr": "4.409e-05", "step": 3874, "steps": "23.87s,3874/16595" }, { "epoch": 1.1675203374510394, "eta": "83:18:57", "grad_norm": 0.0048, "loss": 0.0554, "lr": "4.409e-05", "step": 3875, "steps": "23.58s,3875/16595" }, { "epoch": 1.1678216330219946, "eta": "84:30:38", "grad_norm": 0.0047, "loss": 0.0422, "lr": "4.408e-05", "step": 3876, "steps": "23.92s,3876/16595" }, { "epoch": 1.1681229285929497, "eta": "83:30:53", "grad_norm": 0.0054, "loss": 0.0401, "lr": "4.408e-05", "step": 3877, "steps": "23.64s,3877/16595" }, { "epoch": 1.1684242241639047, "eta": "84:53:09", "grad_norm": 0.0046, "loss": 0.0503, "lr": "4.408e-05", "step": 3878, "steps": "24.03s,3878/16595" }, { "epoch": 1.1687255197348598, "eta": "84:37:55", "grad_norm": 0.0056, "loss": 0.0626, "lr": "4.407e-05", "step": 3879, "steps": "23.96s,3879/16595" }, { "epoch": 1.169026815305815, "eta": "82:57:55", "grad_norm": 0.0046, "loss": 0.0516, "lr": "4.407e-05", "step": 3880, "steps": "23.49s,3880/16595" }, { "epoch": 1.1693281108767701, "eta": "84:39:14", "grad_norm": 0.0047, "loss": 0.045, "lr": "4.407e-05", "step": 3881, "steps": "23.97s,3881/16595" }, { "epoch": 1.1696294064477253, "eta": "84:24:00", "grad_norm": 0.0059, "loss": 0.0434, "lr": "4.406e-05", "step": 3882, "steps": "23.9s,3882/16595" }, { "epoch": 1.1699307020186804, "eta": "83:53:57", "grad_norm": 0.0043, "loss": 0.0533, "lr": "4.406e-05", "step": 3883, "steps": "23.76s,3883/16595" }, { "epoch": 1.1702319975896354, "eta": "82:58:28", "grad_norm": 0.005, "loss": 0.0539, "lr": "4.406e-05", "step": 3884, "steps": "23.5s,3884/16595" }, { "epoch": 1.1705332931605905, "eta": "84:27:03", "grad_norm": 0.0055, "loss": 0.0491, "lr": "4.406e-05", "step": 3885, "steps": "23.92s,3885/16595" }, { "epoch": 1.1708345887315457, "eta": "83:16:45", "grad_norm": 0.0065, "loss": 0.0439, "lr": "4.405e-05", "step": 3886, "steps": "23.59s,3886/16595" }, { "epoch": 1.1711358843025008, "eta": "83:14:14", "grad_norm": 0.0047, "loss": 0.0512, "lr": "4.405e-05", "step": 3887, "steps": "23.58s,3887/16595" }, { "epoch": 1.1714371798734557, "eta": "84:11:01", "grad_norm": 0.0049, "loss": 0.0567, "lr": "4.405e-05", "step": 3888, "steps": "23.85s,3888/16595" }, { "epoch": 1.171738475444411, "eta": "84:08:31", "grad_norm": 0.005, "loss": 0.0366, "lr": "4.404e-05", "step": 3889, "steps": "23.84s,3889/16595" }, { "epoch": 1.172039771015366, "eta": "83:51:10", "grad_norm": 0.0052, "loss": 0.0628, "lr": "4.404e-05", "step": 3890, "steps": "23.76s,3890/16595" }, { "epoch": 1.1723410665863212, "eta": "83:29:36", "grad_norm": 0.0056, "loss": 0.0613, "lr": "4.404e-05", "step": 3891, "steps": "23.66s,3891/16595" }, { "epoch": 1.1726423621572764, "eta": "83:27:05", "grad_norm": 0.0062, "loss": 0.0474, "lr": "4.403e-05", "step": 3892, "steps": "23.65s,3892/16595" }, { "epoch": 1.1729436577282315, "eta": "85:04:05", "grad_norm": 0.0054, "loss": 0.064, "lr": "4.403e-05", "step": 3893, "steps": "24.11s,3893/16595" }, { "epoch": 1.1732449532991864, "eta": "84:27:41", "grad_norm": 0.0049, "loss": 0.0673, "lr": "4.403e-05", "step": 3894, "steps": "23.94s,3894/16595" }, { "epoch": 1.1735462488701416, "eta": "83:04:45", "grad_norm": 0.0054, "loss": 0.0497, "lr": "4.402e-05", "step": 3895, "steps": "23.55s,3895/16595" }, { "epoch": 1.1738475444410967, "eta": "83:48:48", "grad_norm": 0.0056, "loss": 0.0481, "lr": "4.402e-05", "step": 3896, "steps": "23.76s,3896/16595" }, { "epoch": 1.1741488400120519, "eta": "83:50:31", "grad_norm": 0.0061, "loss": 0.0617, "lr": "4.402e-05", "step": 3897, "steps": "23.77s,3897/16595" }, { "epoch": 1.1744501355830068, "eta": "84:21:52", "grad_norm": 0.0043, "loss": 0.0471, "lr": "4.401e-05", "step": 3898, "steps": "23.92s,3898/16595" }, { "epoch": 1.174751431153962, "eta": "84:19:21", "grad_norm": 0.0066, "loss": 0.0483, "lr": "4.401e-05", "step": 3899, "steps": "23.91s,3899/16595" }, { "epoch": 1.1750527267249171, "eta": "84:42:13", "grad_norm": 0.0054, "loss": 0.0355, "lr": "4.401e-05", "step": 3900, "steps": "24.02s,3900/16595" }, { "epoch": 1.1753540222958723, "eta": "84:52:24", "grad_norm": 0.0043, "loss": 0.0525, "lr": "4.401e-05", "step": 3901, "steps": "24.07s,3901/16595" }, { "epoch": 1.1756553178668274, "eta": "84:01:14", "grad_norm": 0.0084, "loss": 0.0512, "lr": "4.400e-05", "step": 3902, "steps": "23.83s,3902/16595" }, { "epoch": 1.1759566134377826, "eta": "83:48:08", "grad_norm": 0.0052, "loss": 0.0602, "lr": "4.400e-05", "step": 3903, "steps": "23.77s,3903/16595" }, { "epoch": 1.1762579090087375, "eta": "84:53:19", "grad_norm": 0.0049, "loss": 0.0542, "lr": "4.400e-05", "step": 3904, "steps": "24.08s,3904/16595" }, { "epoch": 1.1765592045796927, "eta": "83:55:48", "grad_norm": 0.0042, "loss": 0.0661, "lr": "4.399e-05", "step": 3905, "steps": "23.81s,3905/16595" }, { "epoch": 1.1768605001506478, "eta": "84:14:27", "grad_norm": 0.0055, "loss": 0.0506, "lr": "4.399e-05", "step": 3906, "steps": "23.9s,3906/16595" }, { "epoch": 1.177161795721603, "eta": "85:47:05", "grad_norm": 0.0045, "loss": 0.0517, "lr": "4.399e-05", "step": 3907, "steps": "24.34s,3907/16595" }, { "epoch": 1.1774630912925579, "eta": "84:32:41", "grad_norm": 0.0048, "loss": 0.0487, "lr": "4.398e-05", "step": 3908, "steps": "23.99s,3908/16595" }, { "epoch": 1.177764386863513, "eta": "83:52:06", "grad_norm": 0.0052, "loss": 0.0404, "lr": "4.398e-05", "step": 3909, "steps": "23.8s,3909/16595" }, { "epoch": 1.1780656824344682, "eta": "82:39:50", "grad_norm": 0.0046, "loss": 0.0579, "lr": "4.398e-05", "step": 3910, "steps": "23.46s,3910/16595" }, { "epoch": 1.1783669780054233, "eta": "84:39:56", "grad_norm": 0.0053, "loss": 0.0375, "lr": "4.397e-05", "step": 3911, "steps": "24.03s,3911/16595" }, { "epoch": 1.1786682735763785, "eta": "84:14:10", "grad_norm": 0.0052, "loss": 0.0454, "lr": "4.397e-05", "step": 3912, "steps": "23.91s,3912/16595" }, { "epoch": 1.1789695691473336, "eta": "83:08:15", "grad_norm": 0.006, "loss": 0.0546, "lr": "4.397e-05", "step": 3913, "steps": "23.6s,3913/16595" }, { "epoch": 1.1792708647182886, "eta": "84:02:48", "grad_norm": 0.0062, "loss": 0.0528, "lr": "4.397e-05", "step": 3914, "steps": "23.86s,3914/16595" }, { "epoch": 1.1795721602892437, "eta": "84:25:39", "grad_norm": 0.0048, "loss": 0.0429, "lr": "4.396e-05", "step": 3915, "steps": "23.97s,3915/16595" }, { "epoch": 1.1798734558601989, "eta": "84:40:03", "grad_norm": 0.0048, "loss": 0.0602, "lr": "4.396e-05", "step": 3916, "steps": "24.04s,3916/16595" }, { "epoch": 1.180174751431154, "eta": "83:59:30", "grad_norm": 0.0059, "loss": 0.0572, "lr": "4.396e-05", "step": 3917, "steps": "23.85s,3917/16595" }, { "epoch": 1.180476047002109, "eta": "83:27:24", "grad_norm": 0.0052, "loss": 0.0519, "lr": "4.395e-05", "step": 3918, "steps": "23.7s,3918/16595" }, { "epoch": 1.180777342573064, "eta": "85:16:52", "grad_norm": 0.0048, "loss": 0.069, "lr": "4.395e-05", "step": 3919, "steps": "24.22s,3919/16595" }, { "epoch": 1.1810786381440193, "eta": "85:16:28", "grad_norm": 0.0044, "loss": 0.0498, "lr": "4.395e-05", "step": 3920, "steps": "24.22s,3920/16595" }, { "epoch": 1.1813799337149744, "eta": "84:19:02", "grad_norm": 0.0048, "loss": 0.0576, "lr": "4.394e-05", "step": 3921, "steps": "23.95s,3921/16595" }, { "epoch": 1.1816812292859296, "eta": "84:22:51", "grad_norm": 0.0056, "loss": 0.0426, "lr": "4.394e-05", "step": 3922, "steps": "23.97s,3922/16595" }, { "epoch": 1.1819825248568847, "eta": "83:50:47", "grad_norm": 0.0064, "loss": 0.057, "lr": "4.394e-05", "step": 3923, "steps": "23.82s,3923/16595" }, { "epoch": 1.1822838204278396, "eta": "83:52:29", "grad_norm": 0.0059, "loss": 0.0509, "lr": "4.393e-05", "step": 3924, "steps": "23.83s,3924/16595" }, { "epoch": 1.1825851159987948, "eta": "84:11:06", "grad_norm": 0.0052, "loss": 0.0473, "lr": "4.393e-05", "step": 3925, "steps": "23.92s,3925/16595" }, { "epoch": 1.18288641156975, "eta": "83:07:21", "grad_norm": 0.0062, "loss": 0.0698, "lr": "4.393e-05", "step": 3926, "steps": "23.62s,3926/16595" }, { "epoch": 1.183187707140705, "eta": "85:57:59", "grad_norm": 0.0045, "loss": 0.0672, "lr": "4.392e-05", "step": 3927, "steps": "24.43s,3927/16595" }, { "epoch": 1.18348900271166, "eta": "83:19:14", "grad_norm": 0.0057, "loss": 0.0416, "lr": "4.392e-05", "step": 3928, "steps": "23.68s,3928/16595" }, { "epoch": 1.1837902982826152, "eta": "84:07:24", "grad_norm": 0.0049, "loss": 0.0552, "lr": "4.392e-05", "step": 3929, "steps": "23.91s,3929/16595" }, { "epoch": 1.1840915938535703, "eta": "84:57:39", "grad_norm": 0.0047, "loss": 0.0534, "lr": "4.392e-05", "step": 3930, "steps": "24.15s,3930/16595" }, { "epoch": 1.1843928894245255, "eta": "86:02:41", "grad_norm": 0.0062, "loss": 0.0666, "lr": "4.391e-05", "step": 3931, "steps": "24.46s,3931/16595" }, { "epoch": 1.1846941849954806, "eta": "83:09:13", "grad_norm": 0.0064, "loss": 0.0538, "lr": "4.391e-05", "step": 3932, "steps": "23.64s,3932/16595" }, { "epoch": 1.1849954805664358, "eta": "83:21:29", "grad_norm": 0.0043, "loss": 0.0607, "lr": "4.391e-05", "step": 3933, "steps": "23.7s,3933/16595" }, { "epoch": 1.1852967761373907, "eta": "83:50:38", "grad_norm": 0.0061, "loss": 0.0344, "lr": "4.390e-05", "step": 3934, "steps": "23.84s,3934/16595" }, { "epoch": 1.1855980717083459, "eta": "83:56:34", "grad_norm": 0.0071, "loss": 0.0545, "lr": "4.390e-05", "step": 3935, "steps": "23.87s,3935/16595" }, { "epoch": 1.185899367279301, "eta": "84:27:49", "grad_norm": 0.0051, "loss": 0.0473, "lr": "4.390e-05", "step": 3936, "steps": "24.02s,3936/16595" }, { "epoch": 1.1862006628502562, "eta": "84:27:25", "grad_norm": 0.0067, "loss": 0.0521, "lr": "4.389e-05", "step": 3937, "steps": "24.02s,3937/16595" }, { "epoch": 1.186501958421211, "eta": "83:17:24", "grad_norm": 0.0051, "loss": 0.043, "lr": "4.389e-05", "step": 3938, "steps": "23.69s,3938/16595" }, { "epoch": 1.1868032539921662, "eta": "83:17:00", "grad_norm": 0.0068, "loss": 0.0452, "lr": "4.389e-05", "step": 3939, "steps": "23.69s,3939/16595" }, { "epoch": 1.1871045495631214, "eta": "84:28:19", "grad_norm": 0.0068, "loss": 0.0545, "lr": "4.388e-05", "step": 3940, "steps": "24.03s,3940/16595" }, { "epoch": 1.1874058451340765, "eta": "83:28:52", "grad_norm": 0.0076, "loss": 0.0421, "lr": "4.388e-05", "step": 3941, "steps": "23.75s,3941/16595" }, { "epoch": 1.1877071407050317, "eta": "83:24:15", "grad_norm": 0.0049, "loss": 0.0688, "lr": "4.388e-05", "step": 3942, "steps": "23.73s,3942/16595" }, { "epoch": 1.1880084362759868, "eta": "84:14:28", "grad_norm": 0.006, "loss": 0.0507, "lr": "4.387e-05", "step": 3943, "steps": "23.97s,3943/16595" }, { "epoch": 1.1883097318469418, "eta": "82:39:11", "grad_norm": 0.005, "loss": 0.0359, "lr": "4.387e-05", "step": 3944, "steps": "23.52s,3944/16595" }, { "epoch": 1.188611027417897, "eta": "83:20:58", "grad_norm": 0.0047, "loss": 0.0542, "lr": "4.387e-05", "step": 3945, "steps": "23.72s,3945/16595" }, { "epoch": 1.188912322988852, "eta": "83:03:42", "grad_norm": 0.0047, "loss": 0.0449, "lr": "4.387e-05", "step": 3946, "steps": "23.64s,3946/16595" }, { "epoch": 1.1892136185598072, "eta": "83:53:54", "grad_norm": 0.0052, "loss": 0.0511, "lr": "4.386e-05", "step": 3947, "steps": "23.88s,3947/16595" }, { "epoch": 1.1895149141307622, "eta": "83:26:06", "grad_norm": 0.0052, "loss": 0.0649, "lr": "4.386e-05", "step": 3948, "steps": "23.75s,3948/16595" }, { "epoch": 1.1898162097017173, "eta": "83:21:29", "grad_norm": 0.0052, "loss": 0.0447, "lr": "4.386e-05", "step": 3949, "steps": "23.73s,3949/16595" }, { "epoch": 1.1901175052726725, "eta": "84:05:21", "grad_norm": 0.0049, "loss": 0.0524, "lr": "4.385e-05", "step": 3950, "steps": "23.94s,3950/16595" }, { "epoch": 1.1904188008436276, "eta": "84:36:33", "grad_norm": 0.0054, "loss": 0.0457, "lr": "4.385e-05", "step": 3951, "steps": "24.09s,3951/16595" }, { "epoch": 1.1907200964145828, "eta": "84:29:50", "grad_norm": 0.0051, "loss": 0.0527, "lr": "4.385e-05", "step": 3952, "steps": "24.06s,3952/16595" }, { "epoch": 1.191021391985538, "eta": "83:34:39", "grad_norm": 0.0051, "loss": 0.0514, "lr": "4.384e-05", "step": 3953, "steps": "23.8s,3953/16595" }, { "epoch": 1.1913226875564928, "eta": "82:16:18", "grad_norm": 0.0052, "loss": 0.0458, "lr": "4.384e-05", "step": 3954, "steps": "23.43s,3954/16595" }, { "epoch": 1.191623983127448, "eta": "84:01:15", "grad_norm": 0.0051, "loss": 0.0641, "lr": "4.384e-05", "step": 3955, "steps": "23.93s,3955/16595" }, { "epoch": 1.1919252786984031, "eta": "83:39:47", "grad_norm": 0.0047, "loss": 0.0597, "lr": "4.383e-05", "step": 3956, "steps": "23.83s,3956/16595" }, { "epoch": 1.1922265742693583, "eta": "84:27:50", "grad_norm": 0.0059, "loss": 0.0564, "lr": "4.383e-05", "step": 3957, "steps": "24.06s,3957/16595" }, { "epoch": 1.1925278698403132, "eta": "83:45:18", "grad_norm": 0.0047, "loss": 0.0487, "lr": "4.383e-05", "step": 3958, "steps": "23.86s,3958/16595" }, { "epoch": 1.1928291654112684, "eta": "83:42:48", "grad_norm": 0.0048, "loss": 0.0573, "lr": "4.382e-05", "step": 3959, "steps": "23.85s,3959/16595" }, { "epoch": 1.1931304609822235, "eta": "85:04:32", "grad_norm": 0.0054, "loss": 0.0554, "lr": "4.382e-05", "step": 3960, "steps": "24.24s,3960/16595" }, { "epoch": 1.1934317565531787, "eta": "83:42:00", "grad_norm": 0.0068, "loss": 0.044, "lr": "4.382e-05", "step": 3961, "steps": "23.85s,3961/16595" }, { "epoch": 1.1937330521241338, "eta": "83:52:08", "grad_norm": 0.0048, "loss": 0.0599, "lr": "4.382e-05", "step": 3962, "steps": "23.9s,3962/16595" }, { "epoch": 1.194034347695089, "eta": "84:10:41", "grad_norm": 0.0059, "loss": 0.0528, "lr": "4.381e-05", "step": 3963, "steps": "23.99s,3963/16595" }, { "epoch": 1.194335643266044, "eta": "84:20:49", "grad_norm": 0.0062, "loss": 0.036, "lr": "4.381e-05", "step": 3964, "steps": "24.04s,3964/16595" }, { "epoch": 1.194636938836999, "eta": "83:32:00", "grad_norm": 0.0066, "loss": 0.0454, "lr": "4.381e-05", "step": 3965, "steps": "23.81s,3965/16595" }, { "epoch": 1.1949382344079542, "eta": "83:35:49", "grad_norm": 0.0059, "loss": 0.0423, "lr": "4.380e-05", "step": 3966, "steps": "23.83s,3966/16595" }, { "epoch": 1.1952395299789094, "eta": "83:48:02", "grad_norm": 0.0052, "loss": 0.0571, "lr": "4.380e-05", "step": 3967, "steps": "23.89s,3967/16595" }, { "epoch": 1.1955408255498643, "eta": "83:18:11", "grad_norm": 0.0053, "loss": 0.0428, "lr": "4.380e-05", "step": 3968, "steps": "23.75s,3968/16595" }, { "epoch": 1.1958421211208194, "eta": "83:00:57", "grad_norm": 0.0046, "loss": 0.0635, "lr": "4.379e-05", "step": 3969, "steps": "23.67s,3969/16595" }, { "epoch": 1.1961434166917746, "eta": "82:52:08", "grad_norm": 0.0056, "loss": 0.0548, "lr": "4.379e-05", "step": 3970, "steps": "23.63s,3970/16595" }, { "epoch": 1.1964447122627297, "eta": "83:54:52", "grad_norm": 0.0054, "loss": 0.0441, "lr": "4.379e-05", "step": 3971, "steps": "23.93s,3971/16595" }, { "epoch": 1.196746007833685, "eta": "84:30:14", "grad_norm": 0.0062, "loss": 0.0402, "lr": "4.378e-05", "step": 3972, "steps": "24.1s,3972/16595" }, { "epoch": 1.19704730340464, "eta": "83:45:39", "grad_norm": 0.0052, "loss": 0.0389, "lr": "4.378e-05", "step": 3973, "steps": "23.89s,3973/16595" }, { "epoch": 1.197348598975595, "eta": "82:16:54", "grad_norm": 0.0052, "loss": 0.0744, "lr": "4.378e-05", "step": 3974, "steps": "23.47s,3974/16595" }, { "epoch": 1.1976498945465501, "eta": "83:49:04", "grad_norm": 0.0107, "loss": 0.0644, "lr": "4.377e-05", "step": 3975, "steps": "23.91s,3975/16595" }, { "epoch": 1.1979511901175053, "eta": "84:16:00", "grad_norm": 0.0054, "loss": 0.0443, "lr": "4.377e-05", "step": 3976, "steps": "24.04s,3976/16595" }, { "epoch": 1.1982524856884604, "eta": "83:14:37", "grad_norm": 0.0052, "loss": 0.0602, "lr": "4.377e-05", "step": 3977, "steps": "23.75s,3977/16595" }, { "epoch": 1.1985537812594154, "eta": "83:22:38", "grad_norm": 0.0048, "loss": 0.0636, "lr": "4.376e-05", "step": 3978, "steps": "23.79s,3978/16595" }, { "epoch": 1.1988550768303705, "eta": "83:49:34", "grad_norm": 0.004, "loss": 0.0555, "lr": "4.376e-05", "step": 3979, "steps": "23.92s,3979/16595" }, { "epoch": 1.1991563724013257, "eta": "84:39:38", "grad_norm": 0.0068, "loss": 0.051, "lr": "4.376e-05", "step": 3980, "steps": "24.16s,3980/16595" }, { "epoch": 1.1994576679722808, "eta": "83:57:11", "grad_norm": 0.005, "loss": 0.0487, "lr": "4.376e-05", "step": 3981, "steps": "23.96s,3981/16595" }, { "epoch": 1.199758963543236, "eta": "83:39:58", "grad_norm": 0.0051, "loss": 0.0452, "lr": "4.375e-05", "step": 3982, "steps": "23.88s,3982/16595" }, { "epoch": 1.2000602591141911, "eta": "82:57:32", "grad_norm": 0.0051, "loss": 0.0618, "lr": "4.375e-05", "step": 3983, "steps": "23.68s,3983/16595" }, { "epoch": 1.200361554685146, "eta": "83:11:51", "grad_norm": 0.0057, "loss": 0.0533, "lr": "4.375e-05", "step": 3984, "steps": "23.75s,3984/16595" }, { "epoch": 1.2006628502561012, "eta": "84:20:48", "grad_norm": 0.005, "loss": 0.0487, "lr": "4.374e-05", "step": 3985, "steps": "24.08s,3985/16595" }, { "epoch": 1.2009641458270564, "eta": "84:30:55", "grad_norm": 0.0051, "loss": 0.0511, "lr": "4.374e-05", "step": 3986, "steps": "24.13s,3986/16595" }, { "epoch": 1.2012654413980115, "eta": "84:01:05", "grad_norm": 0.0055, "loss": 0.0451, "lr": "4.374e-05", "step": 3987, "steps": "23.99s,3987/16595" }, { "epoch": 1.2015667369689667, "eta": "83:58:35", "grad_norm": 0.0047, "loss": 0.0509, "lr": "4.373e-05", "step": 3988, "steps": "23.98s,3988/16595" }, { "epoch": 1.2018680325399216, "eta": "83:47:41", "grad_norm": 0.0044, "loss": 0.0418, "lr": "4.373e-05", "step": 3989, "steps": "23.93s,3989/16595" }, { "epoch": 1.2021693281108767, "eta": "82:58:58", "grad_norm": 0.005, "loss": 0.0568, "lr": "4.373e-05", "step": 3990, "steps": "23.7s,3990/16595" }, { "epoch": 1.2024706236818319, "eta": "83:00:40", "grad_norm": 0.0047, "loss": 0.0509, "lr": "4.372e-05", "step": 3991, "steps": "23.71s,3991/16595" }, { "epoch": 1.202771919252787, "eta": "84:26:24", "grad_norm": 0.0055, "loss": 0.06, "lr": "4.372e-05", "step": 3992, "steps": "24.12s,3992/16595" }, { "epoch": 1.2030732148237422, "eta": "84:15:30", "grad_norm": 0.0039, "loss": 0.0549, "lr": "4.372e-05", "step": 3993, "steps": "24.07s,3993/16595" }, { "epoch": 1.2033745103946971, "eta": "83:26:47", "grad_norm": 0.0049, "loss": 0.0474, "lr": "4.371e-05", "step": 3994, "steps": "23.84s,3994/16595" }, { "epoch": 1.2036758059656523, "eta": "82:48:36", "grad_norm": 0.006, "loss": 0.042, "lr": "4.371e-05", "step": 3995, "steps": "23.66s,3995/16595" }, { "epoch": 1.2039771015366074, "eta": "82:54:30", "grad_norm": 0.0041, "loss": 0.0572, "lr": "4.371e-05", "step": 3996, "steps": "23.69s,3996/16595" }, { "epoch": 1.2042783971075626, "eta": "83:40:18", "grad_norm": 0.0048, "loss": 0.0468, "lr": "4.370e-05", "step": 3997, "steps": "23.91s,3997/16595" }, { "epoch": 1.2045796926785177, "eta": "83:25:12", "grad_norm": 0.005, "loss": 0.0435, "lr": "4.370e-05", "step": 3998, "steps": "23.84s,3998/16595" }, { "epoch": 1.2048809882494727, "eta": "82:26:01", "grad_norm": 0.0045, "loss": 0.0372, "lr": "4.370e-05", "step": 3999, "steps": "23.56s,3999/16595" }, { "epoch": 1.2051822838204278, "eta": "83:43:18", "grad_norm": 0.0049, "loss": 0.0429, "lr": "4.370e-05", "step": 4000, "steps": "23.93s,4000/16595" }, { "epoch": 1.205483579391383, "eta": "172:15:28", "grad_norm": 0.0047, "loss": 0.0509, "lr": "4.369e-05", "step": 4001, "steps": "49.24s,4001/16595" }, { "epoch": 1.205784874962338, "eta": "83:32:00", "grad_norm": 0.0053, "loss": 0.06, "lr": "4.369e-05", "step": 4002, "steps": "23.88s,4002/16595" }, { "epoch": 1.2060861705332933, "eta": "83:14:49", "grad_norm": 0.0064, "loss": 0.0457, "lr": "4.369e-05", "step": 4003, "steps": "23.8s,4003/16595" }, { "epoch": 1.2063874661042482, "eta": "82:28:15", "grad_norm": 0.0056, "loss": 0.0513, "lr": "4.368e-05", "step": 4004, "steps": "23.58s,4004/16595" }, { "epoch": 1.2066887616752033, "eta": "83:45:30", "grad_norm": 0.0051, "loss": 0.0509, "lr": "4.368e-05", "step": 4005, "steps": "23.95s,4005/16595" }, { "epoch": 1.2069900572461585, "eta": "83:13:38", "grad_norm": 0.0054, "loss": 0.0508, "lr": "4.368e-05", "step": 4006, "steps": "23.8s,4006/16595" }, { "epoch": 1.2072913528171136, "eta": "82:29:10", "grad_norm": 0.0043, "loss": 0.0677, "lr": "4.367e-05", "step": 4007, "steps": "23.59s,4007/16595" }, { "epoch": 1.2075926483880688, "eta": "83:25:25", "grad_norm": 0.0067, "loss": 0.0452, "lr": "4.367e-05", "step": 4008, "steps": "23.86s,4008/16595" }, { "epoch": 1.2078939439590237, "eta": "82:26:17", "grad_norm": 0.0055, "loss": 0.0611, "lr": "4.367e-05", "step": 4009, "steps": "23.58s,4009/16595" }, { "epoch": 1.2081952395299789, "eta": "83:18:20", "grad_norm": 0.0042, "loss": 0.0536, "lr": "4.366e-05", "step": 4010, "steps": "23.83s,4010/16595" }, { "epoch": 1.208496535100934, "eta": "83:24:14", "grad_norm": 0.0052, "loss": 0.0513, "lr": "4.366e-05", "step": 4011, "steps": "23.86s,4011/16595" }, { "epoch": 1.2087978306718892, "eta": "83:04:57", "grad_norm": 0.0056, "loss": 0.0641, "lr": "4.366e-05", "step": 4012, "steps": "23.77s,4012/16595" }, { "epoch": 1.2090991262428443, "eta": "82:39:24", "grad_norm": 0.0064, "loss": 0.0538, "lr": "4.365e-05", "step": 4013, "steps": "23.65s,4013/16595" }, { "epoch": 1.2094004218137993, "eta": "83:52:24", "grad_norm": 0.0053, "loss": 0.0578, "lr": "4.365e-05", "step": 4014, "steps": "24.0s,4014/16595" }, { "epoch": 1.2097017173847544, "eta": "82:44:54", "grad_norm": 0.0056, "loss": 0.0496, "lr": "4.365e-05", "step": 4015, "steps": "23.68s,4015/16595" }, { "epoch": 1.2100030129557096, "eta": "82:23:32", "grad_norm": 0.0049, "loss": 0.0519, "lr": "4.364e-05", "step": 4016, "steps": "23.58s,4016/16595" }, { "epoch": 1.2103043085266647, "eta": "83:42:48", "grad_norm": 0.0056, "loss": 0.0399, "lr": "4.364e-05", "step": 4017, "steps": "23.96s,4017/16595" }, { "epoch": 1.2106056040976199, "eta": "82:39:31", "grad_norm": 0.0059, "loss": 0.0554, "lr": "4.364e-05", "step": 4018, "steps": "23.66s,4018/16595" }, { "epoch": 1.2109068996685748, "eta": "83:10:34", "grad_norm": 0.0052, "loss": 0.0551, "lr": "4.363e-05", "step": 4019, "steps": "23.81s,4019/16595" }, { "epoch": 1.21120819523953, "eta": "83:43:42", "grad_norm": 0.0043, "loss": 0.0488, "lr": "4.363e-05", "step": 4020, "steps": "23.97s,4020/16595" }, { "epoch": 1.211509490810485, "eta": "83:49:36", "grad_norm": 0.0045, "loss": 0.0513, "lr": "4.363e-05", "step": 4021, "steps": "24.0s,4021/16595" }, { "epoch": 1.2118107863814402, "eta": "83:07:17", "grad_norm": 0.0056, "loss": 0.0462, "lr": "4.363e-05", "step": 4022, "steps": "23.8s,4022/16595" }, { "epoch": 1.2121120819523954, "eta": "83:50:53", "grad_norm": 0.0049, "loss": 0.0573, "lr": "4.362e-05", "step": 4023, "steps": "24.01s,4023/16595" }, { "epoch": 1.2124133775233503, "eta": "83:31:38", "grad_norm": 0.0059, "loss": 0.0444, "lr": "4.362e-05", "step": 4024, "steps": "23.92s,4024/16595" }, { "epoch": 1.2127146730943055, "eta": "82:28:23", "grad_norm": 0.0062, "loss": 0.048, "lr": "4.362e-05", "step": 4025, "steps": "23.62s,4025/16595" }, { "epoch": 1.2130159686652606, "eta": "83:30:50", "grad_norm": 0.0056, "loss": 0.068, "lr": "4.361e-05", "step": 4026, "steps": "23.92s,4026/16595" }, { "epoch": 1.2133172642362158, "eta": "83:32:32", "grad_norm": 0.005, "loss": 0.0402, "lr": "4.361e-05", "step": 4027, "steps": "23.93s,4027/16595" }, { "epoch": 1.213618559807171, "eta": "83:17:28", "grad_norm": 0.0045, "loss": 0.0594, "lr": "4.361e-05", "step": 4028, "steps": "23.86s,4028/16595" }, { "epoch": 1.2139198553781259, "eta": "82:35:11", "grad_norm": 0.0052, "loss": 0.0512, "lr": "4.360e-05", "step": 4029, "steps": "23.66s,4029/16595" }, { "epoch": 1.214221150949081, "eta": "82:47:21", "grad_norm": 0.0062, "loss": 0.0416, "lr": "4.360e-05", "step": 4030, "steps": "23.72s,4030/16595" }, { "epoch": 1.2145224465200362, "eta": "83:18:22", "grad_norm": 0.0045, "loss": 0.0543, "lr": "4.360e-05", "step": 4031, "steps": "23.87s,4031/16595" }, { "epoch": 1.2148237420909913, "eta": "83:36:49", "grad_norm": 0.0047, "loss": 0.0504, "lr": "4.359e-05", "step": 4032, "steps": "23.96s,4032/16595" }, { "epoch": 1.2151250376619465, "eta": "83:09:12", "grad_norm": 0.0049, "loss": 0.0703, "lr": "4.359e-05", "step": 4033, "steps": "23.83s,4033/16595" }, { "epoch": 1.2154263332329014, "eta": "82:31:07", "grad_norm": 0.0047, "loss": 0.0492, "lr": "4.359e-05", "step": 4034, "steps": "23.65s,4034/16595" }, { "epoch": 1.2157276288038565, "eta": "83:06:19", "grad_norm": 0.005, "loss": 0.0466, "lr": "4.358e-05", "step": 4035, "steps": "23.82s,4035/16595" }, { "epoch": 1.2160289243748117, "eta": "82:07:18", "grad_norm": 0.0053, "loss": 0.0598, "lr": "4.358e-05", "step": 4036, "steps": "23.54s,4036/16595" }, { "epoch": 1.2163302199457668, "eta": "82:32:02", "grad_norm": 0.0064, "loss": 0.0461, "lr": "4.358e-05", "step": 4037, "steps": "23.66s,4037/16595" }, { "epoch": 1.216631515516722, "eta": "82:31:38", "grad_norm": 0.0045, "loss": 0.0609, "lr": "4.357e-05", "step": 4038, "steps": "23.66s,4038/16595" }, { "epoch": 1.216932811087677, "eta": "82:54:16", "grad_norm": 0.005, "loss": 0.0529, "lr": "4.357e-05", "step": 4039, "steps": "23.77s,4039/16595" }, { "epoch": 1.217234106658632, "eta": "83:14:47", "grad_norm": 0.0051, "loss": 0.0692, "lr": "4.357e-05", "step": 4040, "steps": "23.87s,4040/16595" }, { "epoch": 1.2175354022295872, "eta": "82:22:05", "grad_norm": 0.0053, "loss": 0.0579, "lr": "4.356e-05", "step": 4041, "steps": "23.62s,4041/16595" }, { "epoch": 1.2178366978005424, "eta": "83:03:32", "grad_norm": 0.0056, "loss": 0.0601, "lr": "4.356e-05", "step": 4042, "steps": "23.82s,4042/16595" }, { "epoch": 1.2181379933714975, "eta": "83:15:41", "grad_norm": 0.0049, "loss": 0.0591, "lr": "4.356e-05", "step": 4043, "steps": "23.88s,4043/16595" }, { "epoch": 1.2184392889424525, "eta": "82:14:38", "grad_norm": 0.0063, "loss": 0.051, "lr": "4.355e-05", "step": 4044, "steps": "23.59s,4044/16595" }, { "epoch": 1.2187405845134076, "eta": "83:29:32", "grad_norm": 0.0069, "loss": 0.0602, "lr": "4.355e-05", "step": 4045, "steps": "23.95s,4045/16595" }, { "epoch": 1.2190418800843628, "eta": "82:20:07", "grad_norm": 0.0051, "loss": 0.0436, "lr": "4.355e-05", "step": 4046, "steps": "23.62s,4046/16595" }, { "epoch": 1.219343175655318, "eta": "83:05:44", "grad_norm": 0.0056, "loss": 0.0569, "lr": "4.355e-05", "step": 4047, "steps": "23.84s,4047/16595" }, { "epoch": 1.219644471226273, "eta": "83:49:15", "grad_norm": 0.0047, "loss": 0.0553, "lr": "4.354e-05", "step": 4048, "steps": "24.05s,4048/16595" }, { "epoch": 1.219945766797228, "eta": "82:44:02", "grad_norm": 0.0047, "loss": 0.047, "lr": "4.354e-05", "step": 4049, "steps": "23.74s,4049/16595" }, { "epoch": 1.2202470623681831, "eta": "82:56:11", "grad_norm": 0.0055, "loss": 0.0386, "lr": "4.354e-05", "step": 4050, "steps": "23.8s,4050/16595" }, { "epoch": 1.2205483579391383, "eta": "82:13:58", "grad_norm": 0.0066, "loss": 0.0599, "lr": "4.353e-05", "step": 4051, "steps": "23.6s,4051/16595" }, { "epoch": 1.2208496535100934, "eta": "82:53:17", "grad_norm": 0.0052, "loss": 0.0578, "lr": "4.353e-05", "step": 4052, "steps": "23.79s,4052/16595" }, { "epoch": 1.2211509490810486, "eta": "81:50:11", "grad_norm": 0.0051, "loss": 0.0484, "lr": "4.353e-05", "step": 4053, "steps": "23.49s,4053/16595" }, { "epoch": 1.2214522446520035, "eta": "84:24:28", "grad_norm": 0.0051, "loss": 0.0648, "lr": "4.352e-05", "step": 4054, "steps": "24.23s,4054/16595" }, { "epoch": 1.2217535402229587, "eta": "81:49:24", "grad_norm": 0.0049, "loss": 0.0488, "lr": "4.352e-05", "step": 4055, "steps": "23.49s,4055/16595" }, { "epoch": 1.2220548357939138, "eta": "81:55:17", "grad_norm": 0.0056, "loss": 0.0479, "lr": "4.352e-05", "step": 4056, "steps": "23.52s,4056/16595" }, { "epoch": 1.222356131364869, "eta": "83:01:45", "grad_norm": 0.0053, "loss": 0.0426, "lr": "4.351e-05", "step": 4057, "steps": "23.84s,4057/16595" }, { "epoch": 1.2226574269358241, "eta": "83:09:43", "grad_norm": 0.005, "loss": 0.0594, "lr": "4.351e-05", "step": 4058, "steps": "23.88s,4058/16595" }, { "epoch": 1.222958722506779, "eta": "82:06:38", "grad_norm": 0.0059, "loss": 0.0402, "lr": "4.351e-05", "step": 4059, "steps": "23.58s,4059/16595" }, { "epoch": 1.2232600180777342, "eta": "82:58:29", "grad_norm": 0.0055, "loss": 0.0464, "lr": "4.350e-05", "step": 4060, "steps": "23.83s,4060/16595" }, { "epoch": 1.2235613136486894, "eta": "82:12:07", "grad_norm": 0.0057, "loss": 0.0531, "lr": "4.350e-05", "step": 4061, "steps": "23.61s,4061/16595" }, { "epoch": 1.2238626092196445, "eta": "81:55:01", "grad_norm": 0.0042, "loss": 0.036, "lr": "4.350e-05", "step": 4062, "steps": "23.53s,4062/16595" }, { "epoch": 1.2241639047905997, "eta": "82:48:56", "grad_norm": 0.0055, "loss": 0.0464, "lr": "4.349e-05", "step": 4063, "steps": "23.79s,4063/16595" }, { "epoch": 1.2244652003615546, "eta": "83:01:04", "grad_norm": 0.005, "loss": 0.0489, "lr": "4.349e-05", "step": 4064, "steps": "23.85s,4064/16595" }, { "epoch": 1.2247664959325097, "eta": "83:15:17", "grad_norm": 0.0049, "loss": 0.0505, "lr": "4.349e-05", "step": 4065, "steps": "23.92s,4065/16595" }, { "epoch": 1.225067791503465, "eta": "82:58:11", "grad_norm": 0.0058, "loss": 0.042, "lr": "4.348e-05", "step": 4066, "steps": "23.84s,4066/16595" }, { "epoch": 1.22536908707442, "eta": "82:34:49", "grad_norm": 0.0066, "loss": 0.0506, "lr": "4.348e-05", "step": 4067, "steps": "23.73s,4067/16595" }, { "epoch": 1.2256703826453752, "eta": "83:16:11", "grad_norm": 0.0047, "loss": 0.0581, "lr": "4.348e-05", "step": 4068, "steps": "23.93s,4068/16595" }, { "epoch": 1.2259716782163301, "eta": "82:04:48", "grad_norm": 0.0065, "loss": 0.0514, "lr": "4.347e-05", "step": 4069, "steps": "23.59s,4069/16595" }, { "epoch": 1.2262729737872853, "eta": "82:27:22", "grad_norm": 0.0043, "loss": 0.0475, "lr": "4.347e-05", "step": 4070, "steps": "23.7s,4070/16595" }, { "epoch": 1.2265742693582404, "eta": "81:09:44", "grad_norm": 0.0052, "loss": 0.0414, "lr": "4.347e-05", "step": 4071, "steps": "23.33s,4071/16595" }, { "epoch": 1.2268755649291956, "eta": "82:34:56", "grad_norm": 0.0056, "loss": 0.047, "lr": "4.346e-05", "step": 4072, "steps": "23.74s,4072/16595" }, { "epoch": 1.2271768605001507, "eta": "83:18:21", "grad_norm": 0.0056, "loss": 0.0551, "lr": "4.346e-05", "step": 4073, "steps": "23.95s,4073/16595" }, { "epoch": 1.2274781560711057, "eta": "85:02:18", "grad_norm": 0.0058, "loss": 0.0612, "lr": "4.346e-05", "step": 4074, "steps": "24.45s,4074/16595" }, { "epoch": 1.2277794516420608, "eta": "82:17:03", "grad_norm": 0.0056, "loss": 0.0501, "lr": "4.346e-05", "step": 4075, "steps": "23.66s,4075/16595" }, { "epoch": 1.228080747213016, "eta": "82:54:12", "grad_norm": 0.0066, "loss": 0.0549, "lr": "4.345e-05", "step": 4076, "steps": "23.84s,4076/16595" }, { "epoch": 1.2283820427839711, "eta": "81:49:08", "grad_norm": 0.0058, "loss": 0.0568, "lr": "4.345e-05", "step": 4077, "steps": "23.53s,4077/16595" }, { "epoch": 1.2286833383549263, "eta": "81:42:29", "grad_norm": 0.0061, "loss": 0.0556, "lr": "4.345e-05", "step": 4078, "steps": "23.5s,4078/16595" }, { "epoch": 1.2289846339258812, "eta": "83:01:22", "grad_norm": 0.0045, "loss": 0.0487, "lr": "4.344e-05", "step": 4079, "steps": "23.88s,4079/16595" }, { "epoch": 1.2292859294968363, "eta": "81:54:13", "grad_norm": 0.0058, "loss": 0.0621, "lr": "4.344e-05", "step": 4080, "steps": "23.56s,4080/16595" }, { "epoch": 1.2295872250677915, "eta": "81:37:08", "grad_norm": 0.0059, "loss": 0.0713, "lr": "4.344e-05", "step": 4081, "steps": "23.48s,4081/16595" }, { "epoch": 1.2298885206387467, "eta": "82:39:19", "grad_norm": 0.0056, "loss": 0.0508, "lr": "4.343e-05", "step": 4082, "steps": "23.78s,4082/16595" }, { "epoch": 1.2301898162097018, "eta": "81:59:18", "grad_norm": 0.0051, "loss": 0.0702, "lr": "4.343e-05", "step": 4083, "steps": "23.59s,4083/16595" }, { "epoch": 1.2304911117806567, "eta": "83:47:20", "grad_norm": 0.0052, "loss": 0.0519, "lr": "4.343e-05", "step": 4084, "steps": "24.11s,4084/16595" }, { "epoch": 1.2307924073516119, "eta": "82:50:38", "grad_norm": 0.0052, "loss": 0.0555, "lr": "4.342e-05", "step": 4085, "steps": "23.84s,4085/16595" }, { "epoch": 1.231093702922567, "eta": "82:10:37", "grad_norm": 0.0048, "loss": 0.0498, "lr": "4.342e-05", "step": 4086, "steps": "23.65s,4086/16595" }, { "epoch": 1.2313949984935222, "eta": "82:16:29", "grad_norm": 0.0046, "loss": 0.0735, "lr": "4.342e-05", "step": 4087, "steps": "23.68s,4087/16595" }, { "epoch": 1.2316962940644773, "eta": "82:51:31", "grad_norm": 0.0063, "loss": 0.0579, "lr": "4.341e-05", "step": 4088, "steps": "23.85s,4088/16595" }, { "epoch": 1.2319975896354323, "eta": "82:34:27", "grad_norm": 0.0045, "loss": 0.0537, "lr": "4.341e-05", "step": 4089, "steps": "23.77s,4089/16595" }, { "epoch": 1.2322988852063874, "eta": "83:42:50", "grad_norm": 0.0062, "loss": 0.0595, "lr": "4.341e-05", "step": 4090, "steps": "24.1s,4090/16595" }, { "epoch": 1.2326001807773426, "eta": "82:21:09", "grad_norm": 0.0044, "loss": 0.0533, "lr": "4.340e-05", "step": 4091, "steps": "23.71s,4091/16595" }, { "epoch": 1.2329014763482977, "eta": "82:14:31", "grad_norm": 0.0055, "loss": 0.0526, "lr": "4.340e-05", "step": 4092, "steps": "23.68s,4092/16595" }, { "epoch": 1.2332027719192529, "eta": "84:25:23", "grad_norm": 0.0052, "loss": 0.0384, "lr": "4.340e-05", "step": 4093, "steps": "24.31s,4093/16595" }, { "epoch": 1.2335040674902078, "eta": "83:32:54", "grad_norm": 0.0052, "loss": 0.0538, "lr": "4.339e-05", "step": 4094, "steps": "24.06s,4094/16595" }, { "epoch": 1.233805363061163, "eta": "81:35:50", "grad_norm": 0.0049, "loss": 0.0379, "lr": "4.339e-05", "step": 4095, "steps": "23.5s,4095/16595" }, { "epoch": 1.234106658632118, "eta": "82:04:36", "grad_norm": 0.0052, "loss": 0.06, "lr": "4.339e-05", "step": 4096, "steps": "23.64s,4096/16595" }, { "epoch": 1.2344079542030733, "eta": "81:43:22", "grad_norm": 0.0054, "loss": 0.0409, "lr": "4.338e-05", "step": 4097, "steps": "23.54s,4097/16595" }, { "epoch": 1.2347092497740284, "eta": "81:36:44", "grad_norm": 0.0052, "loss": 0.0626, "lr": "4.338e-05", "step": 4098, "steps": "23.51s,4098/16595" }, { "epoch": 1.2350105453449833, "eta": "81:34:16", "grad_norm": 0.0051, "loss": 0.0537, "lr": "4.338e-05", "step": 4099, "steps": "23.5s,4099/16595" }, { "epoch": 1.2353118409159385, "eta": "82:36:21", "grad_norm": 0.0054, "loss": 0.0615, "lr": "4.337e-05", "step": 4100, "steps": "23.8s,4100/16595" }, { "epoch": 1.2356131364868936, "eta": "82:25:32", "grad_norm": 0.0046, "loss": 0.0577, "lr": "4.337e-05", "step": 4101, "steps": "23.75s,4101/16595" }, { "epoch": 1.2359144320578488, "eta": "82:39:43", "grad_norm": 0.0056, "loss": 0.0542, "lr": "4.337e-05", "step": 4102, "steps": "23.82s,4102/16595" }, { "epoch": 1.236215727628804, "eta": "82:14:20", "grad_norm": 0.006, "loss": 0.0546, "lr": "4.336e-05", "step": 4103, "steps": "23.7s,4103/16595" }, { "epoch": 1.2365170231997589, "eta": "82:05:37", "grad_norm": 0.0058, "loss": 0.0532, "lr": "4.336e-05", "step": 4104, "steps": "23.66s,4104/16595" }, { "epoch": 1.236818318770714, "eta": "81:23:35", "grad_norm": 0.0059, "loss": 0.0629, "lr": "4.336e-05", "step": 4105, "steps": "23.46s,4105/16595" }, { "epoch": 1.2371196143416692, "eta": "82:42:17", "grad_norm": 0.0075, "loss": 0.0367, "lr": "4.336e-05", "step": 4106, "steps": "23.84s,4106/16595" }, { "epoch": 1.2374209099126243, "eta": "81:39:27", "grad_norm": 0.0067, "loss": 0.0586, "lr": "4.335e-05", "step": 4107, "steps": "23.54s,4107/16595" }, { "epoch": 1.2377222054835795, "eta": "82:49:49", "grad_norm": 0.0053, "loss": 0.0449, "lr": "4.335e-05", "step": 4108, "steps": "23.88s,4108/16595" }, { "epoch": 1.2380235010545344, "eta": "82:36:56", "grad_norm": 0.0049, "loss": 0.0576, "lr": "4.335e-05", "step": 4109, "steps": "23.82s,4109/16595" }, { "epoch": 1.2383247966254896, "eta": "83:05:40", "grad_norm": 0.0055, "loss": 0.0438, "lr": "4.334e-05", "step": 4110, "steps": "23.96s,4110/16595" }, { "epoch": 1.2386260921964447, "eta": "81:44:07", "grad_norm": 0.0049, "loss": 0.0683, "lr": "4.334e-05", "step": 4111, "steps": "23.57s,4111/16595" }, { "epoch": 1.2389273877673999, "eta": "82:48:14", "grad_norm": 0.0056, "loss": 0.0422, "lr": "4.334e-05", "step": 4112, "steps": "23.88s,4112/16595" }, { "epoch": 1.239228683338355, "eta": "81:26:42", "grad_norm": 0.0048, "loss": 0.0595, "lr": "4.333e-05", "step": 4113, "steps": "23.49s,4113/16595" }, { "epoch": 1.23952997890931, "eta": "82:37:02", "grad_norm": 0.0058, "loss": 0.0629, "lr": "4.333e-05", "step": 4114, "steps": "23.83s,4114/16595" }, { "epoch": 1.239831274480265, "eta": "82:17:55", "grad_norm": 0.0047, "loss": 0.0669, "lr": "4.333e-05", "step": 4115, "steps": "23.74s,4115/16595" }, { "epoch": 1.2401325700512202, "eta": "82:05:02", "grad_norm": 0.0054, "loss": 0.0368, "lr": "4.332e-05", "step": 4116, "steps": "23.68s,4116/16595" }, { "epoch": 1.2404338656221754, "eta": "81:50:05", "grad_norm": 0.0046, "loss": 0.0583, "lr": "4.332e-05", "step": 4117, "steps": "23.61s,4117/16595" }, { "epoch": 1.2407351611931305, "eta": "83:39:54", "grad_norm": 0.0054, "loss": 0.0492, "lr": "4.332e-05", "step": 4118, "steps": "24.14s,4118/16595" }, { "epoch": 1.2410364567640855, "eta": "82:03:51", "grad_norm": 0.0126, "loss": 0.0687, "lr": "4.331e-05", "step": 4119, "steps": "23.68s,4119/16595" }, { "epoch": 1.2413377523350406, "eta": "81:44:45", "grad_norm": 0.0073, "loss": 0.0658, "lr": "4.331e-05", "step": 4120, "steps": "23.59s,4120/16595" }, { "epoch": 1.2416390479059958, "eta": "82:13:28", "grad_norm": 0.0049, "loss": 0.0562, "lr": "4.331e-05", "step": 4121, "steps": "23.73s,4121/16595" }, { "epoch": 1.241940343476951, "eta": "82:42:10", "grad_norm": 0.0062, "loss": 0.0457, "lr": "4.330e-05", "step": 4122, "steps": "23.87s,4122/16595" }, { "epoch": 1.242241639047906, "eta": "82:54:14", "grad_norm": 0.006, "loss": 0.048, "lr": "4.330e-05", "step": 4123, "steps": "23.93s,4123/16595" }, { "epoch": 1.242542934618861, "eta": "83:45:48", "grad_norm": 0.0056, "loss": 0.0588, "lr": "4.330e-05", "step": 4124, "steps": "24.18s,4124/16595" }, { "epoch": 1.2428442301898162, "eta": "82:51:22", "grad_norm": 0.0062, "loss": 0.0625, "lr": "4.329e-05", "step": 4125, "steps": "23.92s,4125/16595" }, { "epoch": 1.2431455257607713, "eta": "82:30:11", "grad_norm": 0.0067, "loss": 0.0469, "lr": "4.329e-05", "step": 4126, "steps": "23.82s,4126/16595" }, { "epoch": 1.2434468213317265, "eta": "82:54:43", "grad_norm": 0.0095, "loss": 0.0574, "lr": "4.329e-05", "step": 4127, "steps": "23.94s,4127/16595" }, { "epoch": 1.2437481169026816, "eta": "81:16:40", "grad_norm": 0.0045, "loss": 0.0484, "lr": "4.328e-05", "step": 4128, "steps": "23.47s,4128/16595" }, { "epoch": 1.2440494124736365, "eta": "82:39:23", "grad_norm": 0.0049, "loss": 0.0405, "lr": "4.328e-05", "step": 4129, "steps": "23.87s,4129/16595" }, { "epoch": 1.2443507080445917, "eta": "81:36:40", "grad_norm": 0.0047, "loss": 0.0631, "lr": "4.328e-05", "step": 4130, "steps": "23.57s,4130/16595" }, { "epoch": 1.2446520036155468, "eta": "81:48:44", "grad_norm": 0.0049, "loss": 0.0577, "lr": "4.327e-05", "step": 4131, "steps": "23.63s,4131/16595" }, { "epoch": 1.244953299186502, "eta": "81:21:20", "grad_norm": 0.0058, "loss": 0.0461, "lr": "4.327e-05", "step": 4132, "steps": "23.5s,4132/16595" }, { "epoch": 1.2452545947574571, "eta": "82:27:24", "grad_norm": 0.0044, "loss": 0.0536, "lr": "4.327e-05", "step": 4133, "steps": "23.82s,4133/16595" }, { "epoch": 1.2455558903284123, "eta": "82:20:47", "grad_norm": 0.0056, "loss": 0.0637, "lr": "4.326e-05", "step": 4134, "steps": "23.79s,4134/16595" }, { "epoch": 1.2458571858993672, "eta": "83:20:36", "grad_norm": 0.0054, "loss": 0.0459, "lr": "4.326e-05", "step": 4135, "steps": "24.08s,4135/16595" }, { "epoch": 1.2461584814703224, "eta": "82:46:59", "grad_norm": 0.0072, "loss": 0.0576, "lr": "4.326e-05", "step": 4136, "steps": "23.92s,4136/16595" }, { "epoch": 1.2464597770412775, "eta": "82:40:21", "grad_norm": 0.0063, "loss": 0.054, "lr": "4.325e-05", "step": 4137, "steps": "23.89s,4137/16595" }, { "epoch": 1.2467610726122327, "eta": "82:54:29", "grad_norm": 0.0048, "loss": 0.0467, "lr": "4.325e-05", "step": 4138, "steps": "23.96s,4138/16595" }, { "epoch": 1.2470623681831876, "eta": "81:58:02", "grad_norm": 0.0068, "loss": 0.0427, "lr": "4.325e-05", "step": 4139, "steps": "23.69s,4139/16595" }, { "epoch": 1.2473636637541428, "eta": "83:14:27", "grad_norm": 0.0056, "loss": 0.0482, "lr": "4.324e-05", "step": 4140, "steps": "24.06s,4140/16595" }, { "epoch": 1.247664959325098, "eta": "82:34:36", "grad_norm": 0.0062, "loss": 0.0649, "lr": "4.324e-05", "step": 4141, "steps": "23.87s,4141/16595" }, { "epoch": 1.247966254896053, "eta": "84:03:27", "grad_norm": 0.0046, "loss": 0.0517, "lr": "4.324e-05", "step": 4142, "steps": "24.3s,4142/16595" }, { "epoch": 1.2482675504670082, "eta": "83:58:54", "grad_norm": 0.0044, "loss": 0.053, "lr": "4.323e-05", "step": 4143, "steps": "24.28s,4143/16595" }, { "epoch": 1.2485688460379634, "eta": "82:31:20", "grad_norm": 0.0043, "loss": 0.0483, "lr": "4.323e-05", "step": 4144, "steps": "23.86s,4144/16595" }, { "epoch": 1.2488701416089183, "eta": "81:49:27", "grad_norm": 0.0046, "loss": 0.0407, "lr": "4.323e-05", "step": 4145, "steps": "23.66s,4145/16595" }, { "epoch": 1.2491714371798734, "eta": "82:59:36", "grad_norm": 0.0058, "loss": 0.0538, "lr": "4.322e-05", "step": 4146, "steps": "24.0s,4146/16595" }, { "epoch": 1.2494727327508286, "eta": "83:05:25", "grad_norm": 0.0046, "loss": 0.048, "lr": "4.322e-05", "step": 4147, "steps": "24.03s,4147/16595" }, { "epoch": 1.2497740283217837, "eta": "81:37:53", "grad_norm": 0.0042, "loss": 0.0607, "lr": "4.322e-05", "step": 4148, "steps": "23.61s,4148/16595" }, { "epoch": 1.2500753238927387, "eta": "82:41:48", "grad_norm": 0.0052, "loss": 0.0464, "lr": "4.321e-05", "step": 4149, "steps": "23.92s,4149/16595" }, { "epoch": 1.2503766194636938, "eta": "80:43:10", "grad_norm": 0.0042, "loss": 0.0362, "lr": "4.321e-05", "step": 4150, "steps": "23.35s,4150/16595" }, { "epoch": 1.250677915034649, "eta": "82:24:24", "grad_norm": 0.0049, "loss": 0.0604, "lr": "4.321e-05", "step": 4151, "steps": "23.84s,4151/16595" }, { "epoch": 1.2509792106056041, "eta": "80:56:55", "grad_norm": 0.0056, "loss": 0.0454, "lr": "4.321e-05", "step": 4152, "steps": "23.42s,4152/16595" }, { "epoch": 1.2512805061765593, "eta": "82:17:24", "grad_norm": 0.0044, "loss": 0.0377, "lr": "4.320e-05", "step": 4153, "steps": "23.81s,4153/16595" }, { "epoch": 1.2515818017475144, "eta": "82:52:15", "grad_norm": 0.0045, "loss": 0.0514, "lr": "4.320e-05", "step": 4154, "steps": "23.98s,4154/16595" }, { "epoch": 1.2518830973184694, "eta": "81:30:59", "grad_norm": 0.0052, "loss": 0.0444, "lr": "4.320e-05", "step": 4155, "steps": "23.59s,4155/16595" }, { "epoch": 1.2521843928894245, "eta": "82:03:46", "grad_norm": 0.0066, "loss": 0.0573, "lr": "4.319e-05", "step": 4156, "steps": "23.75s,4156/16595" }, { "epoch": 1.2524856884603797, "eta": "83:03:29", "grad_norm": 0.0052, "loss": 0.0674, "lr": "4.319e-05", "step": 4157, "steps": "24.04s,4157/16595" }, { "epoch": 1.2527869840313348, "eta": "80:52:30", "grad_norm": 0.0048, "loss": 0.0361, "lr": "4.319e-05", "step": 4158, "steps": "23.41s,4158/16595" }, { "epoch": 1.2530882796022897, "eta": "81:33:33", "grad_norm": 0.0046, "loss": 0.0544, "lr": "4.318e-05", "step": 4159, "steps": "23.61s,4159/16595" }, { "epoch": 1.253389575173245, "eta": "82:54:00", "grad_norm": 0.0063, "loss": 0.0531, "lr": "4.318e-05", "step": 4160, "steps": "24.0s,4160/16595" }, { "epoch": 1.2536908707442, "eta": "81:43:08", "grad_norm": 0.0046, "loss": 0.0499, "lr": "4.318e-05", "step": 4161, "steps": "23.66s,4161/16595" }, { "epoch": 1.2539921663151552, "eta": "82:42:50", "grad_norm": 0.0051, "loss": 0.0676, "lr": "4.317e-05", "step": 4162, "steps": "23.95s,4162/16595" }, { "epoch": 1.2542934618861104, "eta": "82:44:30", "grad_norm": 0.0042, "loss": 0.0538, "lr": "4.317e-05", "step": 4163, "steps": "23.96s,4163/16595" }, { "epoch": 1.2545947574570655, "eta": "81:52:19", "grad_norm": 0.0047, "loss": 0.0566, "lr": "4.317e-05", "step": 4164, "steps": "23.71s,4164/16595" }, { "epoch": 1.2548960530280204, "eta": "81:22:55", "grad_norm": 0.0062, "loss": 0.0508, "lr": "4.316e-05", "step": 4165, "steps": "23.57s,4165/16595" }, { "epoch": 1.2551973485989756, "eta": "81:37:01", "grad_norm": 0.0051, "loss": 0.0467, "lr": "4.316e-05", "step": 4166, "steps": "23.64s,4166/16595" }, { "epoch": 1.2554986441699307, "eta": "81:38:42", "grad_norm": 0.0051, "loss": 0.0576, "lr": "4.316e-05", "step": 4167, "steps": "23.65s,4167/16595" }, { "epoch": 1.2557999397408859, "eta": "81:17:35", "grad_norm": 0.0056, "loss": 0.0527, "lr": "4.315e-05", "step": 4168, "steps": "23.55s,4168/16595" }, { "epoch": 1.2561012353118408, "eta": "80:39:55", "grad_norm": 0.0042, "loss": 0.0562, "lr": "4.315e-05", "step": 4169, "steps": "23.37s,4169/16595" }, { "epoch": 1.256402530882796, "eta": "82:06:30", "grad_norm": 0.0042, "loss": 0.048, "lr": "4.315e-05", "step": 4170, "steps": "23.79s,4170/16595" }, { "epoch": 1.2567038264537511, "eta": "80:55:42", "grad_norm": 0.0048, "loss": 0.0432, "lr": "4.314e-05", "step": 4171, "steps": "23.45s,4171/16595" }, { "epoch": 1.2570051220247063, "eta": "81:28:27", "grad_norm": 0.0052, "loss": 0.0445, "lr": "4.314e-05", "step": 4172, "steps": "23.61s,4172/16595" }, { "epoch": 1.2573064175956614, "eta": "82:28:05", "grad_norm": 0.0044, "loss": 0.0489, "lr": "4.314e-05", "step": 4173, "steps": "23.9s,4173/16595" }, { "epoch": 1.2576077131666166, "eta": "80:56:36", "grad_norm": 0.0055, "loss": 0.0453, "lr": "4.313e-05", "step": 4174, "steps": "23.46s,4174/16595" }, { "epoch": 1.2579090087375715, "eta": "82:04:31", "grad_norm": 0.0054, "loss": 0.0582, "lr": "4.313e-05", "step": 4175, "steps": "23.79s,4175/16595" }, { "epoch": 1.2582103043085267, "eta": "80:59:58", "grad_norm": 0.0041, "loss": 0.0557, "lr": "4.313e-05", "step": 4176, "steps": "23.48s,4176/16595" }, { "epoch": 1.2585115998794818, "eta": "82:01:40", "grad_norm": 0.0055, "loss": 0.0575, "lr": "4.312e-05", "step": 4177, "steps": "23.78s,4177/16595" }, { "epoch": 1.258812895450437, "eta": "81:55:03", "grad_norm": 0.0088, "loss": 0.0569, "lr": "4.312e-05", "step": 4178, "steps": "23.75s,4178/16595" }, { "epoch": 1.2591141910213919, "eta": "81:38:06", "grad_norm": 0.0062, "loss": 0.0393, "lr": "4.312e-05", "step": 4179, "steps": "23.67s,4179/16595" }, { "epoch": 1.259415486592347, "eta": "81:43:55", "grad_norm": 0.005, "loss": 0.0442, "lr": "4.311e-05", "step": 4180, "steps": "23.7s,4180/16595" }, { "epoch": 1.2597167821633022, "eta": "82:31:07", "grad_norm": 0.0045, "loss": 0.0539, "lr": "4.311e-05", "step": 4181, "steps": "23.93s,4181/16595" }, { "epoch": 1.2600180777342573, "eta": "83:14:09", "grad_norm": 0.0046, "loss": 0.0572, "lr": "4.311e-05", "step": 4182, "steps": "24.14s,4182/16595" }, { "epoch": 1.2603193733052125, "eta": "82:53:04", "grad_norm": 0.0041, "loss": 0.0727, "lr": "4.310e-05", "step": 4183, "steps": "24.04s,4183/16595" }, { "epoch": 1.2606206688761676, "eta": "81:15:27", "grad_norm": 0.007, "loss": 0.0517, "lr": "4.310e-05", "step": 4184, "steps": "23.57s,4184/16595" }, { "epoch": 1.2609219644471226, "eta": "80:54:22", "grad_norm": 0.0051, "loss": 0.0653, "lr": "4.310e-05", "step": 4185, "steps": "23.47s,4185/16595" }, { "epoch": 1.2612232600180777, "eta": "81:06:23", "grad_norm": 0.0046, "loss": 0.0616, "lr": "4.309e-05", "step": 4186, "steps": "23.53s,4186/16595" }, { "epoch": 1.2615245555890329, "eta": "81:26:41", "grad_norm": 0.0052, "loss": 0.0507, "lr": "4.309e-05", "step": 4187, "steps": "23.63s,4187/16595" }, { "epoch": 1.261825851159988, "eta": "81:51:06", "grad_norm": 0.0062, "loss": 0.0339, "lr": "4.309e-05", "step": 4188, "steps": "23.75s,4188/16595" }, { "epoch": 1.262127146730943, "eta": "81:58:58", "grad_norm": 0.0054, "loss": 0.052, "lr": "4.308e-05", "step": 4189, "steps": "23.79s,4189/16595" }, { "epoch": 1.262428442301898, "eta": "81:58:34", "grad_norm": 0.0071, "loss": 0.0342, "lr": "4.308e-05", "step": 4190, "steps": "23.79s,4190/16595" }, { "epoch": 1.2627297378728533, "eta": "82:04:23", "grad_norm": 0.004, "loss": 0.0372, "lr": "4.308e-05", "step": 4191, "steps": "23.82s,4191/16595" }, { "epoch": 1.2630310334438084, "eta": "82:03:59", "grad_norm": 0.0051, "loss": 0.0457, "lr": "4.307e-05", "step": 4192, "steps": "23.82s,4192/16595" }, { "epoch": 1.2633323290147636, "eta": "82:05:39", "grad_norm": 0.0053, "loss": 0.053, "lr": "4.307e-05", "step": 4193, "steps": "23.83s,4193/16595" }, { "epoch": 1.2636336245857187, "eta": "82:46:36", "grad_norm": 0.0054, "loss": 0.0461, "lr": "4.307e-05", "step": 4194, "steps": "24.03s,4194/16595" }, { "epoch": 1.2639349201566736, "eta": "81:13:12", "grad_norm": 0.0041, "loss": 0.0694, "lr": "4.306e-05", "step": 4195, "steps": "23.58s,4195/16595" }, { "epoch": 1.2642362157276288, "eta": "82:33:24", "grad_norm": 0.0052, "loss": 0.0433, "lr": "4.306e-05", "step": 4196, "steps": "23.97s,4196/16595" }, { "epoch": 1.264537511298584, "eta": "81:59:56", "grad_norm": 0.0051, "loss": 0.0543, "lr": "4.306e-05", "step": 4197, "steps": "23.81s,4197/16595" }, { "epoch": 1.264838806869539, "eta": "81:57:28", "grad_norm": 0.006, "loss": 0.0451, "lr": "4.305e-05", "step": 4198, "steps": "23.8s,4198/16595" }, { "epoch": 1.265140102440494, "eta": "81:21:57", "grad_norm": 0.0047, "loss": 0.0315, "lr": "4.305e-05", "step": 4199, "steps": "23.63s,4199/16595" }, { "epoch": 1.2654413980114492, "eta": "81:13:18", "grad_norm": 0.0048, "loss": 0.0602, "lr": "4.305e-05", "step": 4200, "steps": "23.59s,4200/16595" }, { "epoch": 1.2657426935824043, "eta": "176:24:28", "grad_norm": 0.0053, "loss": 0.0683, "lr": "4.304e-05", "step": 4201, "steps": "51.24s,4201/16595" }, { "epoch": 1.2660439891533595, "eta": "81:35:14", "grad_norm": 0.0049, "loss": 0.038, "lr": "4.304e-05", "step": 4202, "steps": "23.7s,4202/16595" }, { "epoch": 1.2663452847243146, "eta": "81:28:38", "grad_norm": 0.0052, "loss": 0.0454, "lr": "4.304e-05", "step": 4203, "steps": "23.67s,4203/16595" }, { "epoch": 1.2666465802952698, "eta": "81:01:24", "grad_norm": 0.0056, "loss": 0.0493, "lr": "4.303e-05", "step": 4204, "steps": "23.54s,4204/16595" }, { "epoch": 1.2669478758662247, "eta": "82:17:24", "grad_norm": 0.0053, "loss": 0.0467, "lr": "4.303e-05", "step": 4205, "steps": "23.91s,4205/16595" }, { "epoch": 1.2672491714371799, "eta": "80:44:05", "grad_norm": 0.006, "loss": 0.0489, "lr": "4.303e-05", "step": 4206, "steps": "23.46s,4206/16595" }, { "epoch": 1.267550467008135, "eta": "81:53:54", "grad_norm": 0.0057, "loss": 0.0647, "lr": "4.302e-05", "step": 4207, "steps": "23.8s,4207/16595" }, { "epoch": 1.2678517625790902, "eta": "80:47:26", "grad_norm": 0.0082, "loss": 0.0479, "lr": "4.302e-05", "step": 4208, "steps": "23.48s,4208/16595" }, { "epoch": 1.268153058150045, "eta": "81:34:32", "grad_norm": 0.0046, "loss": 0.0479, "lr": "4.302e-05", "step": 4209, "steps": "23.71s,4209/16595" }, { "epoch": 1.2684543537210002, "eta": "80:28:05", "grad_norm": 0.0058, "loss": 0.071, "lr": "4.301e-05", "step": 4210, "steps": "23.39s,4210/16595" }, { "epoch": 1.2687556492919554, "eta": "83:02:29", "grad_norm": 0.0067, "loss": 0.0445, "lr": "4.301e-05", "step": 4211, "steps": "24.14s,4211/16595" }, { "epoch": 1.2690569448629105, "eta": "81:16:50", "grad_norm": 0.0044, "loss": 0.0494, "lr": "4.301e-05", "step": 4212, "steps": "23.63s,4212/16595" }, { "epoch": 1.2693582404338657, "eta": "82:05:58", "grad_norm": 0.0121, "loss": 0.0574, "lr": "4.300e-05", "step": 4213, "steps": "23.87s,4213/16595" }, { "epoch": 1.2696595360048208, "eta": "81:07:47", "grad_norm": 0.0055, "loss": 0.0471, "lr": "4.300e-05", "step": 4214, "steps": "23.59s,4214/16595" }, { "epoch": 1.2699608315757758, "eta": "80:38:31", "grad_norm": 0.0059, "loss": 0.0467, "lr": "4.300e-05", "step": 4215, "steps": "23.45s,4215/16595" }, { "epoch": 1.270262127146731, "eta": "80:38:07", "grad_norm": 0.0056, "loss": 0.0486, "lr": "4.299e-05", "step": 4216, "steps": "23.45s,4216/16595" }, { "epoch": 1.270563422717686, "eta": "80:54:14", "grad_norm": 0.0049, "loss": 0.0541, "lr": "4.299e-05", "step": 4217, "steps": "23.53s,4217/16595" }, { "epoch": 1.2708647182886412, "eta": "80:22:54", "grad_norm": 0.0056, "loss": 0.0381, "lr": "4.299e-05", "step": 4218, "steps": "23.38s,4218/16595" }, { "epoch": 1.2711660138595962, "eta": "81:45:01", "grad_norm": 0.0044, "loss": 0.0519, "lr": "4.298e-05", "step": 4219, "steps": "23.78s,4219/16595" }, { "epoch": 1.2714673094305513, "eta": "80:38:37", "grad_norm": 0.0048, "loss": 0.0624, "lr": "4.298e-05", "step": 4220, "steps": "23.46s,4220/16595" }, { "epoch": 1.2717686050015065, "eta": "80:58:51", "grad_norm": 0.0072, "loss": 0.0543, "lr": "4.298e-05", "step": 4221, "steps": "23.56s,4221/16595" }, { "epoch": 1.2720699005724616, "eta": "82:14:45", "grad_norm": 0.0053, "loss": 0.0598, "lr": "4.297e-05", "step": 4222, "steps": "23.93s,4222/16595" }, { "epoch": 1.2723711961434168, "eta": "81:55:48", "grad_norm": 0.0048, "loss": 0.0401, "lr": "4.297e-05", "step": 4223, "steps": "23.84s,4223/16595" }, { "epoch": 1.272672491714372, "eta": "81:01:48", "grad_norm": 0.0067, "loss": 0.0491, "lr": "4.297e-05", "step": 4224, "steps": "23.58s,4224/16595" }, { "epoch": 1.2729737872853268, "eta": "82:19:45", "grad_norm": 0.0063, "loss": 0.0486, "lr": "4.296e-05", "step": 4225, "steps": "23.96s,4225/16595" }, { "epoch": 1.273275082856282, "eta": "81:01:01", "grad_norm": 0.0069, "loss": 0.0361, "lr": "4.296e-05", "step": 4226, "steps": "23.58s,4226/16595" }, { "epoch": 1.2735763784272371, "eta": "81:35:40", "grad_norm": 0.0073, "loss": 0.0567, "lr": "4.296e-05", "step": 4227, "steps": "23.75s,4227/16595" }, { "epoch": 1.2738776739981923, "eta": "81:39:23", "grad_norm": 0.0048, "loss": 0.0364, "lr": "4.295e-05", "step": 4228, "steps": "23.77s,4228/16595" }, { "epoch": 1.2741789695691472, "eta": "80:55:42", "grad_norm": 0.0047, "loss": 0.0491, "lr": "4.295e-05", "step": 4229, "steps": "23.56s,4229/16595" }, { "epoch": 1.2744802651401024, "eta": "81:32:25", "grad_norm": 0.0053, "loss": 0.0656, "lr": "4.295e-05", "step": 4230, "steps": "23.74s,4230/16595" }, { "epoch": 1.2747815607110575, "eta": "83:08:52", "grad_norm": 0.0055, "loss": 0.0636, "lr": "4.294e-05", "step": 4231, "steps": "24.21s,4231/16595" }, { "epoch": 1.2750828562820127, "eta": "80:58:39", "grad_norm": 0.0052, "loss": 0.0532, "lr": "4.294e-05", "step": 4232, "steps": "23.58s,4232/16595" }, { "epoch": 1.2753841518529678, "eta": "81:43:35", "grad_norm": 0.0052, "loss": 0.0656, "lr": "4.294e-05", "step": 4233, "steps": "23.8s,4233/16595" }, { "epoch": 1.275685447423923, "eta": "81:28:46", "grad_norm": 0.0048, "loss": 0.0612, "lr": "4.293e-05", "step": 4234, "steps": "23.73s,4234/16595" }, { "epoch": 1.275986742994878, "eta": "80:51:18", "grad_norm": 0.0047, "loss": 0.0276, "lr": "4.293e-05", "step": 4235, "steps": "23.55s,4235/16595" }, { "epoch": 1.276288038565833, "eta": "81:21:48", "grad_norm": 0.006, "loss": 0.0387, "lr": "4.293e-05", "step": 4236, "steps": "23.7s,4236/16595" }, { "epoch": 1.2765893341367882, "eta": "81:37:53", "grad_norm": 0.0063, "loss": 0.0474, "lr": "4.292e-05", "step": 4237, "steps": "23.78s,4237/16595" }, { "epoch": 1.2768906297077434, "eta": "81:10:43", "grad_norm": 0.0065, "loss": 0.0491, "lr": "4.292e-05", "step": 4238, "steps": "23.65s,4238/16595" }, { "epoch": 1.2771919252786983, "eta": "80:10:36", "grad_norm": 0.0061, "loss": 0.0474, "lr": "4.292e-05", "step": 4239, "steps": "23.36s,4239/16595" }, { "epoch": 1.2774932208496534, "eta": "80:12:16", "grad_norm": 0.005, "loss": 0.0472, "lr": "4.291e-05", "step": 4240, "steps": "23.37s,4240/16595" }, { "epoch": 1.2777945164206086, "eta": "80:11:52", "grad_norm": 0.0057, "loss": 0.0435, "lr": "4.291e-05", "step": 4241, "steps": "23.37s,4241/16595" }, { "epoch": 1.2780958119915637, "eta": "81:40:01", "grad_norm": 0.0046, "loss": 0.0499, "lr": "4.291e-05", "step": 4242, "steps": "23.8s,4242/16595" }, { "epoch": 1.278397107562519, "eta": "80:50:13", "grad_norm": 0.0042, "loss": 0.0645, "lr": "4.290e-05", "step": 4243, "steps": "23.56s,4243/16595" }, { "epoch": 1.278698403133474, "eta": "81:33:03", "grad_norm": 0.0083, "loss": 0.0398, "lr": "4.290e-05", "step": 4244, "steps": "23.77s,4244/16595" }, { "epoch": 1.278999698704429, "eta": "80:37:05", "grad_norm": 0.0053, "loss": 0.0491, "lr": "4.290e-05", "step": 4245, "steps": "23.5s,4245/16595" }, { "epoch": 1.2793009942753841, "eta": "81:40:29", "grad_norm": 0.0059, "loss": 0.066, "lr": "4.289e-05", "step": 4246, "steps": "23.81s,4246/16595" }, { "epoch": 1.2796022898463393, "eta": "81:11:17", "grad_norm": 0.0051, "loss": 0.0425, "lr": "4.289e-05", "step": 4247, "steps": "23.67s,4247/16595" }, { "epoch": 1.2799035854172944, "eta": "80:29:44", "grad_norm": 0.0051, "loss": 0.0533, "lr": "4.289e-05", "step": 4248, "steps": "23.47s,4248/16595" }, { "epoch": 1.2802048809882494, "eta": "81:02:15", "grad_norm": 0.0049, "loss": 0.0541, "lr": "4.288e-05", "step": 4249, "steps": "23.63s,4249/16595" }, { "epoch": 1.2805061765592045, "eta": "81:16:16", "grad_norm": 0.0049, "loss": 0.0566, "lr": "4.288e-05", "step": 4250, "steps": "23.7s,4250/16595" }, { "epoch": 1.2808074721301597, "eta": "81:22:03", "grad_norm": 0.0044, "loss": 0.0647, "lr": "4.288e-05", "step": 4251, "steps": "23.73s,4251/16595" }, { "epoch": 1.2811087677011148, "eta": "80:34:20", "grad_norm": 0.0046, "loss": 0.0388, "lr": "4.287e-05", "step": 4252, "steps": "23.5s,4252/16595" }, { "epoch": 1.28141006327207, "eta": "80:46:17", "grad_norm": 0.0099, "loss": 0.0435, "lr": "4.287e-05", "step": 4253, "steps": "23.56s,4253/16595" }, { "epoch": 1.2817113588430251, "eta": "81:14:41", "grad_norm": 0.0063, "loss": 0.0438, "lr": "4.287e-05", "step": 4254, "steps": "23.7s,4254/16595" }, { "epoch": 1.28201265441398, "eta": "81:51:19", "grad_norm": 0.0056, "loss": 0.0598, "lr": "4.286e-05", "step": 4255, "steps": "23.88s,4255/16595" }, { "epoch": 1.2823139499849352, "eta": "82:01:12", "grad_norm": 0.0053, "loss": 0.0641, "lr": "4.286e-05", "step": 4256, "steps": "23.93s,4256/16595" }, { "epoch": 1.2826152455558903, "eta": "82:11:05", "grad_norm": 0.0049, "loss": 0.0502, "lr": "4.286e-05", "step": 4257, "steps": "23.98s,4257/16595" }, { "epoch": 1.2829165411268455, "eta": "81:41:54", "grad_norm": 0.0054, "loss": 0.0554, "lr": "4.285e-05", "step": 4258, "steps": "23.84s,4258/16595" }, { "epoch": 1.2832178366978004, "eta": "81:23:00", "grad_norm": 0.005, "loss": 0.0471, "lr": "4.285e-05", "step": 4259, "steps": "23.75s,4259/16595" }, { "epoch": 1.2835191322687556, "eta": "81:30:49", "grad_norm": 0.0046, "loss": 0.0497, "lr": "4.285e-05", "step": 4260, "steps": "23.79s,4260/16595" }, { "epoch": 1.2838204278397107, "eta": "82:19:46", "grad_norm": 0.0089, "loss": 0.0608, "lr": "4.284e-05", "step": 4261, "steps": "24.03s,4261/16595" }, { "epoch": 1.2841217234106659, "eta": "80:40:42", "grad_norm": 0.0047, "loss": 0.0626, "lr": "4.284e-05", "step": 4262, "steps": "23.55s,4262/16595" }, { "epoch": 1.284423018981621, "eta": "81:48:08", "grad_norm": 0.0049, "loss": 0.0441, "lr": "4.284e-05", "step": 4263, "steps": "23.88s,4263/16595" }, { "epoch": 1.2847243145525762, "eta": "81:31:17", "grad_norm": 0.0043, "loss": 0.0493, "lr": "4.283e-05", "step": 4264, "steps": "23.8s,4264/16595" }, { "epoch": 1.2850256101235311, "eta": "80:53:54", "grad_norm": 0.0044, "loss": 0.0534, "lr": "4.283e-05", "step": 4265, "steps": "23.62s,4265/16595" }, { "epoch": 1.2853269056944863, "eta": "81:59:16", "grad_norm": 0.0045, "loss": 0.0499, "lr": "4.283e-05", "step": 4266, "steps": "23.94s,4266/16595" }, { "epoch": 1.2856282012654414, "eta": "81:46:32", "grad_norm": 0.0052, "loss": 0.05, "lr": "4.282e-05", "step": 4267, "steps": "23.88s,4267/16595" }, { "epoch": 1.2859294968363966, "eta": "81:56:25", "grad_norm": 0.0054, "loss": 0.0641, "lr": "4.282e-05", "step": 4268, "steps": "23.93s,4268/16595" }, { "epoch": 1.2862307924073515, "eta": "81:10:49", "grad_norm": 0.0049, "loss": 0.0632, "lr": "4.282e-05", "step": 4269, "steps": "23.71s,4269/16595" }, { "epoch": 1.2865320879783066, "eta": "81:51:30", "grad_norm": 0.0051, "loss": 0.0451, "lr": "4.281e-05", "step": 4270, "steps": "23.91s,4270/16595" }, { "epoch": 1.2868333835492618, "eta": "81:28:31", "grad_norm": 0.005, "loss": 0.0572, "lr": "4.281e-05", "step": 4271, "steps": "23.8s,4271/16595" }, { "epoch": 1.287134679120217, "eta": "81:52:46", "grad_norm": 0.0058, "loss": 0.0534, "lr": "4.281e-05", "step": 4272, "steps": "23.92s,4272/16595" }, { "epoch": 1.287435974691172, "eta": "81:21:33", "grad_norm": 0.0058, "loss": 0.0511, "lr": "4.280e-05", "step": 4273, "steps": "23.77s,4273/16595" }, { "epoch": 1.2877372702621273, "eta": "79:59:01", "grad_norm": 0.0057, "loss": 0.046, "lr": "4.280e-05", "step": 4274, "steps": "23.37s,4274/16595" }, { "epoch": 1.2880385658330822, "eta": "81:28:59", "grad_norm": 0.0057, "loss": 0.0478, "lr": "4.280e-05", "step": 4275, "steps": "23.81s,4275/16595" }, { "epoch": 1.2883398614040373, "eta": "81:16:16", "grad_norm": 0.005, "loss": 0.0462, "lr": "4.279e-05", "step": 4276, "steps": "23.75s,4276/16595" }, { "epoch": 1.2886411569749925, "eta": "82:03:05", "grad_norm": 0.0101, "loss": 0.0428, "lr": "4.279e-05", "step": 4277, "steps": "23.98s,4277/16595" }, { "epoch": 1.2889424525459476, "eta": "81:23:41", "grad_norm": 0.0045, "loss": 0.0309, "lr": "4.279e-05", "step": 4278, "steps": "23.79s,4278/16595" }, { "epoch": 1.2892437481169026, "eta": "81:47:55", "grad_norm": 0.0051, "loss": 0.0603, "lr": "4.278e-05", "step": 4279, "steps": "23.91s,4279/16595" }, { "epoch": 1.2895450436878577, "eta": "82:57:18", "grad_norm": 0.0053, "loss": 0.0448, "lr": "4.278e-05", "step": 4280, "steps": "24.25s,4280/16595" }, { "epoch": 1.2898463392588129, "eta": "82:05:36", "grad_norm": 0.0048, "loss": 0.0531, "lr": "4.278e-05", "step": 4281, "steps": "24.0s,4281/16595" }, { "epoch": 1.290147634829768, "eta": "82:29:49", "grad_norm": 0.0055, "loss": 0.0629, "lr": "4.277e-05", "step": 4282, "steps": "24.12s,4282/16595" }, { "epoch": 1.2904489304007232, "eta": "81:36:04", "grad_norm": 0.0052, "loss": 0.0501, "lr": "4.277e-05", "step": 4283, "steps": "23.86s,4283/16595" }, { "epoch": 1.2907502259716783, "eta": "81:50:02", "grad_norm": 0.0054, "loss": 0.059, "lr": "4.277e-05", "step": 4284, "steps": "23.93s,4284/16595" }, { "epoch": 1.2910515215426333, "eta": "81:45:32", "grad_norm": 0.0064, "loss": 0.0517, "lr": "4.276e-05", "step": 4285, "steps": "23.91s,4285/16595" }, { "epoch": 1.2913528171135884, "eta": "81:22:34", "grad_norm": 0.0056, "loss": 0.0385, "lr": "4.276e-05", "step": 4286, "steps": "23.8s,4286/16595" }, { "epoch": 1.2916541126845436, "eta": "80:45:14", "grad_norm": 0.0069, "loss": 0.0523, "lr": "4.276e-05", "step": 4287, "steps": "23.62s,4287/16595" }, { "epoch": 1.2919554082554987, "eta": "81:25:52", "grad_norm": 0.0048, "loss": 0.0538, "lr": "4.275e-05", "step": 4288, "steps": "23.82s,4288/16595" }, { "epoch": 1.2922567038264536, "eta": "81:25:28", "grad_norm": 0.0067, "loss": 0.0587, "lr": "4.275e-05", "step": 4289, "steps": "23.82s,4289/16595" }, { "epoch": 1.2925579993974088, "eta": "80:07:09", "grad_norm": 0.007, "loss": 0.0515, "lr": "4.275e-05", "step": 4290, "steps": "23.44s,4290/16595" }, { "epoch": 1.292859294968364, "eta": "81:32:53", "grad_norm": 0.0045, "loss": 0.0415, "lr": "4.274e-05", "step": 4291, "steps": "23.86s,4291/16595" }, { "epoch": 1.293160590539319, "eta": "81:22:14", "grad_norm": 0.005, "loss": 0.0689, "lr": "4.274e-05", "step": 4292, "steps": "23.81s,4292/16595" }, { "epoch": 1.2934618861102742, "eta": "81:50:32", "grad_norm": 0.0051, "loss": 0.0538, "lr": "4.274e-05", "step": 4293, "steps": "23.95s,4293/16595" }, { "epoch": 1.2937631816812294, "eta": "81:21:26", "grad_norm": 0.0056, "loss": 0.0711, "lr": "4.273e-05", "step": 4294, "steps": "23.81s,4294/16595" }, { "epoch": 1.2940644772521843, "eta": "81:16:57", "grad_norm": 0.0061, "loss": 0.0483, "lr": "4.273e-05", "step": 4295, "steps": "23.79s,4295/16595" }, { "epoch": 1.2943657728231395, "eta": "81:24:45", "grad_norm": 0.0052, "loss": 0.0607, "lr": "4.273e-05", "step": 4296, "steps": "23.83s,4296/16595" }, { "epoch": 1.2946670683940946, "eta": "81:46:54", "grad_norm": 0.006, "loss": 0.051, "lr": "4.272e-05", "step": 4297, "steps": "23.94s,4297/16595" }, { "epoch": 1.2949683639650498, "eta": "81:07:33", "grad_norm": 0.0052, "loss": 0.0328, "lr": "4.272e-05", "step": 4298, "steps": "23.75s,4298/16595" }, { "epoch": 1.2952696595360047, "eta": "80:38:28", "grad_norm": 0.0059, "loss": 0.0538, "lr": "4.272e-05", "step": 4299, "steps": "23.61s,4299/16595" }, { "epoch": 1.2955709551069599, "eta": "80:05:17", "grad_norm": 0.0056, "loss": 0.0509, "lr": "4.271e-05", "step": 4300, "steps": "23.45s,4300/16595" }, { "epoch": 1.295872250677915, "eta": "81:08:25", "grad_norm": 0.0059, "loss": 0.0471, "lr": "4.271e-05", "step": 4301, "steps": "23.76s,4301/16595" }, { "epoch": 1.2961735462488702, "eta": "81:36:42", "grad_norm": 0.0056, "loss": 0.0576, "lr": "4.271e-05", "step": 4302, "steps": "23.9s,4302/16595" }, { "epoch": 1.2964748418198253, "eta": "80:16:24", "grad_norm": 0.0089, "loss": 0.0351, "lr": "4.270e-05", "step": 4303, "steps": "23.51s,4303/16595" }, { "epoch": 1.2967761373907805, "eta": "81:33:51", "grad_norm": 0.0047, "loss": 0.0698, "lr": "4.270e-05", "step": 4304, "steps": "23.89s,4304/16595" }, { "epoch": 1.2970774329617354, "eta": "79:55:08", "grad_norm": 0.0049, "loss": 0.0607, "lr": "4.270e-05", "step": 4305, "steps": "23.41s,4305/16595" }, { "epoch": 1.2973787285326905, "eta": "81:39:12", "grad_norm": 0.0051, "loss": 0.0432, "lr": "4.269e-05", "step": 4306, "steps": "23.92s,4306/16595" }, { "epoch": 1.2976800241036457, "eta": "80:45:34", "grad_norm": 0.0052, "loss": 0.0557, "lr": "4.269e-05", "step": 4307, "steps": "23.66s,4307/16595" }, { "epoch": 1.2979813196746008, "eta": "81:26:07", "grad_norm": 0.0056, "loss": 0.0413, "lr": "4.269e-05", "step": 4308, "steps": "23.86s,4308/16595" }, { "epoch": 1.2982826152455558, "eta": "79:35:09", "grad_norm": 0.0049, "loss": 0.0511, "lr": "4.268e-05", "step": 4309, "steps": "23.32s,4309/16595" }, { "epoch": 1.298583910816511, "eta": "80:56:40", "grad_norm": 0.0043, "loss": 0.0488, "lr": "4.268e-05", "step": 4310, "steps": "23.72s,4310/16595" }, { "epoch": 1.298885206387466, "eta": "80:41:56", "grad_norm": 0.0042, "loss": 0.047, "lr": "4.268e-05", "step": 4311, "steps": "23.65s,4311/16595" }, { "epoch": 1.2991865019584212, "eta": "79:58:33", "grad_norm": 0.0055, "loss": 0.0472, "lr": "4.267e-05", "step": 4312, "steps": "23.44s,4312/16595" }, { "epoch": 1.2994877975293764, "eta": "81:18:00", "grad_norm": 0.0058, "loss": 0.0616, "lr": "4.267e-05", "step": 4313, "steps": "23.83s,4313/16595" }, { "epoch": 1.2997890931003315, "eta": "81:03:16", "grad_norm": 0.0057, "loss": 0.053, "lr": "4.267e-05", "step": 4314, "steps": "23.76s,4314/16595" }, { "epoch": 1.3000903886712865, "eta": "80:58:47", "grad_norm": 0.0047, "loss": 0.0652, "lr": "4.266e-05", "step": 4315, "steps": "23.74s,4315/16595" }, { "epoch": 1.3003916842422416, "eta": "80:58:23", "grad_norm": 0.0045, "loss": 0.0487, "lr": "4.266e-05", "step": 4316, "steps": "23.74s,4316/16595" }, { "epoch": 1.3006929798131968, "eta": "80:12:58", "grad_norm": 0.0045, "loss": 0.0583, "lr": "4.266e-05", "step": 4317, "steps": "23.52s,4317/16595" }, { "epoch": 1.300994275384152, "eta": "80:20:46", "grad_norm": 0.0052, "loss": 0.0469, "lr": "4.265e-05", "step": 4318, "steps": "23.56s,4318/16595" }, { "epoch": 1.3012955709551068, "eta": "79:51:43", "grad_norm": 0.0055, "loss": 0.058, "lr": "4.265e-05", "step": 4319, "steps": "23.42s,4319/16595" }, { "epoch": 1.301596866526062, "eta": "80:36:21", "grad_norm": 0.0052, "loss": 0.0721, "lr": "4.265e-05", "step": 4320, "steps": "23.64s,4320/16595" }, { "epoch": 1.3018981620970171, "eta": "81:04:35", "grad_norm": 0.0065, "loss": 0.0576, "lr": "4.264e-05", "step": 4321, "steps": "23.78s,4321/16595" }, { "epoch": 1.3021994576679723, "eta": "81:10:20", "grad_norm": 0.0049, "loss": 0.0503, "lr": "4.264e-05", "step": 4322, "steps": "23.81s,4322/16595" }, { "epoch": 1.3025007532389274, "eta": "81:20:09", "grad_norm": 0.0045, "loss": 0.0614, "lr": "4.264e-05", "step": 4323, "steps": "23.86s,4323/16595" }, { "epoch": 1.3028020488098826, "eta": "80:22:30", "grad_norm": 0.0049, "loss": 0.0442, "lr": "4.263e-05", "step": 4324, "steps": "23.58s,4324/16595" }, { "epoch": 1.3031033443808375, "eta": "81:03:00", "grad_norm": 0.0062, "loss": 0.0569, "lr": "4.263e-05", "step": 4325, "steps": "23.78s,4325/16595" }, { "epoch": 1.3034046399517927, "eta": "80:52:23", "grad_norm": 0.0055, "loss": 0.0465, "lr": "4.263e-05", "step": 4326, "steps": "23.73s,4326/16595" }, { "epoch": 1.3037059355227478, "eta": "79:58:49", "grad_norm": 0.0055, "loss": 0.064, "lr": "4.262e-05", "step": 4327, "steps": "23.47s,4327/16595" }, { "epoch": 1.304007231093703, "eta": "80:04:34", "grad_norm": 0.0051, "loss": 0.0558, "lr": "4.262e-05", "step": 4328, "steps": "23.5s,4328/16595" }, { "epoch": 1.304308526664658, "eta": "81:34:08", "grad_norm": 0.005, "loss": 0.0535, "lr": "4.262e-05", "step": 4329, "steps": "23.94s,4329/16595" }, { "epoch": 1.304609822235613, "eta": "80:36:29", "grad_norm": 0.0066, "loss": 0.0637, "lr": "4.261e-05", "step": 4330, "steps": "23.66s,4330/16595" }, { "epoch": 1.3049111178065682, "eta": "79:47:02", "grad_norm": 0.006, "loss": 0.042, "lr": "4.261e-05", "step": 4331, "steps": "23.42s,4331/16595" }, { "epoch": 1.3052124133775234, "eta": "81:26:48", "grad_norm": 0.0072, "loss": 0.0455, "lr": "4.261e-05", "step": 4332, "steps": "23.91s,4332/16595" }, { "epoch": 1.3055137089484785, "eta": "80:12:50", "grad_norm": 0.0051, "loss": 0.0577, "lr": "4.260e-05", "step": 4333, "steps": "23.55s,4333/16595" }, { "epoch": 1.3058150045194337, "eta": "81:01:29", "grad_norm": 0.0049, "loss": 0.0539, "lr": "4.260e-05", "step": 4334, "steps": "23.79s,4334/16595" }, { "epoch": 1.3061163000903886, "eta": "80:22:16", "grad_norm": 0.0054, "loss": 0.06, "lr": "4.260e-05", "step": 4335, "steps": "23.6s,4335/16595" }, { "epoch": 1.3064175956613437, "eta": "80:30:02", "grad_norm": 0.005, "loss": 0.0614, "lr": "4.259e-05", "step": 4336, "steps": "23.64s,4336/16595" }, { "epoch": 1.306718891232299, "eta": "80:27:36", "grad_norm": 0.0076, "loss": 0.0428, "lr": "4.259e-05", "step": 4337, "steps": "23.63s,4337/16595" }, { "epoch": 1.307020186803254, "eta": "81:32:35", "grad_norm": 0.0056, "loss": 0.0595, "lr": "4.258e-05", "step": 4338, "steps": "23.95s,4338/16595" }, { "epoch": 1.307321482374209, "eta": "80:10:28", "grad_norm": 0.0057, "loss": 0.0497, "lr": "4.258e-05", "step": 4339, "steps": "23.55s,4339/16595" }, { "epoch": 1.3076227779451641, "eta": "80:55:01", "grad_norm": 0.0075, "loss": 0.0384, "lr": "4.258e-05", "step": 4340, "steps": "23.77s,4340/16595" }, { "epoch": 1.3079240735161193, "eta": "80:58:42", "grad_norm": 0.0057, "loss": 0.0485, "lr": "4.257e-05", "step": 4341, "steps": "23.79s,4341/16595" }, { "epoch": 1.3082253690870744, "eta": "81:57:32", "grad_norm": 0.006, "loss": 0.0462, "lr": "4.257e-05", "step": 4342, "steps": "24.08s,4342/16595" }, { "epoch": 1.3085266646580296, "eta": "81:02:00", "grad_norm": 0.0049, "loss": 0.0403, "lr": "4.257e-05", "step": 4343, "steps": "23.81s,4343/16595" }, { "epoch": 1.3088279602289847, "eta": "81:36:18", "grad_norm": 0.009, "loss": 0.0707, "lr": "4.256e-05", "step": 4344, "steps": "23.98s,4344/16595" }, { "epoch": 1.3091292557999397, "eta": "79:41:35", "grad_norm": 0.0089, "loss": 0.0586, "lr": "4.256e-05", "step": 4345, "steps": "23.42s,4345/16595" }, { "epoch": 1.3094305513708948, "eta": "80:38:21", "grad_norm": 0.0062, "loss": 0.0629, "lr": "4.256e-05", "step": 4346, "steps": "23.7s,4346/16595" }, { "epoch": 1.30973184694185, "eta": "80:52:14", "grad_norm": 0.0066, "loss": 0.0509, "lr": "4.255e-05", "step": 4347, "steps": "23.77s,4347/16595" }, { "epoch": 1.3100331425128051, "eta": "80:55:56", "grad_norm": 0.0054, "loss": 0.0572, "lr": "4.255e-05", "step": 4348, "steps": "23.79s,4348/16595" }, { "epoch": 1.31033443808376, "eta": "81:26:09", "grad_norm": 0.0049, "loss": 0.0452, "lr": "4.255e-05", "step": 4349, "steps": "23.94s,4349/16595" }, { "epoch": 1.3106357336547152, "eta": "80:57:11", "grad_norm": 0.0054, "loss": 0.0559, "lr": "4.254e-05", "step": 4350, "steps": "23.8s,4350/16595" }, { "epoch": 1.3109370292256703, "eta": "80:56:47", "grad_norm": 0.0061, "loss": 0.0502, "lr": "4.254e-05", "step": 4351, "steps": "23.8s,4351/16595" }, { "epoch": 1.3112383247966255, "eta": "80:54:20", "grad_norm": 0.0067, "loss": 0.0544, "lr": "4.254e-05", "step": 4352, "steps": "23.79s,4352/16595" }, { "epoch": 1.3115396203675806, "eta": "80:49:52", "grad_norm": 0.0063, "loss": 0.0405, "lr": "4.253e-05", "step": 4353, "steps": "23.77s,4353/16595" }, { "epoch": 1.3118409159385358, "eta": "80:06:37", "grad_norm": 0.0048, "loss": 0.0646, "lr": "4.253e-05", "step": 4354, "steps": "23.56s,4354/16595" }, { "epoch": 1.3121422115094907, "eta": "81:05:24", "grad_norm": 0.0057, "loss": 0.0535, "lr": "4.253e-05", "step": 4355, "steps": "23.85s,4355/16595" }, { "epoch": 1.3124435070804459, "eta": "80:34:24", "grad_norm": 0.0063, "loss": 0.0412, "lr": "4.252e-05", "step": 4356, "steps": "23.7s,4356/16595" }, { "epoch": 1.312744802651401, "eta": "79:45:03", "grad_norm": 0.0103, "loss": 0.0609, "lr": "4.252e-05", "step": 4357, "steps": "23.46s,4357/16595" }, { "epoch": 1.3130460982223562, "eta": "80:03:01", "grad_norm": 0.007, "loss": 0.0465, "lr": "4.252e-05", "step": 4358, "steps": "23.55s,4358/16595" }, { "epoch": 1.3133473937933111, "eta": "80:23:01", "grad_norm": 0.006, "loss": 0.0604, "lr": "4.251e-05", "step": 4359, "steps": "23.65s,4359/16595" }, { "epoch": 1.3136486893642663, "eta": "80:04:16", "grad_norm": 0.0055, "loss": 0.052, "lr": "4.251e-05", "step": 4360, "steps": "23.56s,4360/16595" }, { "epoch": 1.3139499849352214, "eta": "80:12:02", "grad_norm": 0.0068, "loss": 0.0381, "lr": "4.251e-05", "step": 4361, "steps": "23.6s,4361/16595" }, { "epoch": 1.3142512805061766, "eta": "80:38:09", "grad_norm": 0.0044, "loss": 0.0507, "lr": "4.250e-05", "step": 4362, "steps": "23.73s,4362/16595" }, { "epoch": 1.3145525760771317, "eta": "80:35:43", "grad_norm": 0.0053, "loss": 0.042, "lr": "4.250e-05", "step": 4363, "steps": "23.72s,4363/16595" }, { "epoch": 1.3148538716480869, "eta": "80:02:42", "grad_norm": 0.0055, "loss": 0.0588, "lr": "4.250e-05", "step": 4364, "steps": "23.56s,4364/16595" }, { "epoch": 1.3151551672190418, "eta": "81:46:16", "grad_norm": 0.0068, "loss": 0.0494, "lr": "4.249e-05", "step": 4365, "steps": "24.07s,4365/16595" }, { "epoch": 1.315456462789997, "eta": "81:39:45", "grad_norm": 0.0065, "loss": 0.0469, "lr": "4.249e-05", "step": 4366, "steps": "24.04s,4366/16595" }, { "epoch": 1.315757758360952, "eta": "80:09:40", "grad_norm": 0.0065, "loss": 0.0644, "lr": "4.249e-05", "step": 4367, "steps": "23.6s,4367/16595" }, { "epoch": 1.3160590539319073, "eta": "82:21:44", "grad_norm": 0.0058, "loss": 0.0472, "lr": "4.248e-05", "step": 4368, "steps": "24.25s,4368/16595" }, { "epoch": 1.3163603495028622, "eta": "79:50:33", "grad_norm": 0.0053, "loss": 0.0517, "lr": "4.248e-05", "step": 4369, "steps": "23.51s,4369/16595" }, { "epoch": 1.3166616450738173, "eta": "81:56:29", "grad_norm": 0.0053, "loss": 0.0394, "lr": "4.248e-05", "step": 4370, "steps": "24.13s,4370/16595" }, { "epoch": 1.3169629406447725, "eta": "81:01:04", "grad_norm": 0.0057, "loss": 0.0612, "lr": "4.247e-05", "step": 4371, "steps": "23.86s,4371/16595" }, { "epoch": 1.3172642362157276, "eta": "80:07:42", "grad_norm": 0.0055, "loss": 0.0618, "lr": "4.247e-05", "step": 4372, "steps": "23.6s,4372/16595" }, { "epoch": 1.3175655317866828, "eta": "80:23:36", "grad_norm": 0.0052, "loss": 0.0418, "lr": "4.247e-05", "step": 4373, "steps": "23.68s,4373/16595" }, { "epoch": 1.317866827357638, "eta": "81:20:15", "grad_norm": 0.0062, "loss": 0.0554, "lr": "4.246e-05", "step": 4374, "steps": "23.96s,4374/16595" }, { "epoch": 1.3181681229285929, "eta": "80:45:13", "grad_norm": 0.0057, "loss": 0.0535, "lr": "4.246e-05", "step": 4375, "steps": "23.79s,4375/16595" }, { "epoch": 1.318469418499548, "eta": "80:44:50", "grad_norm": 0.0053, "loss": 0.0456, "lr": "4.246e-05", "step": 4376, "steps": "23.79s,4376/16595" }, { "epoch": 1.3187707140705032, "eta": "81:02:45", "grad_norm": 0.0062, "loss": 0.0503, "lr": "4.245e-05", "step": 4377, "steps": "23.88s,4377/16595" }, { "epoch": 1.3190720096414583, "eta": "79:55:10", "grad_norm": 0.0066, "loss": 0.0381, "lr": "4.245e-05", "step": 4378, "steps": "23.55s,4378/16595" }, { "epoch": 1.3193733052124132, "eta": "79:58:51", "grad_norm": 0.0047, "loss": 0.0577, "lr": "4.245e-05", "step": 4379, "steps": "23.57s,4379/16595" }, { "epoch": 1.3196746007833684, "eta": "81:15:49", "grad_norm": 0.0068, "loss": 0.0417, "lr": "4.244e-05", "step": 4380, "steps": "23.95s,4380/16595" }, { "epoch": 1.3199758963543236, "eta": "80:04:10", "grad_norm": 0.0059, "loss": 0.0545, "lr": "4.244e-05", "step": 4381, "steps": "23.6s,4381/16595" }, { "epoch": 1.3202771919252787, "eta": "80:58:44", "grad_norm": 0.0056, "loss": 0.0477, "lr": "4.243e-05", "step": 4382, "steps": "23.87s,4382/16595" }, { "epoch": 1.3205784874962339, "eta": "80:40:01", "grad_norm": 0.0067, "loss": 0.0488, "lr": "4.243e-05", "step": 4383, "steps": "23.78s,4383/16595" }, { "epoch": 1.320879783067189, "eta": "80:43:41", "grad_norm": 0.0059, "loss": 0.0564, "lr": "4.243e-05", "step": 4384, "steps": "23.8s,4384/16595" }, { "epoch": 1.3211810786381442, "eta": "81:07:43", "grad_norm": 0.0051, "loss": 0.0502, "lr": "4.242e-05", "step": 4385, "steps": "23.92s,4385/16595" }, { "epoch": 1.321482374209099, "eta": "81:23:36", "grad_norm": 0.007, "loss": 0.0427, "lr": "4.242e-05", "step": 4386, "steps": "24.0s,4386/16595" }, { "epoch": 1.3217836697800542, "eta": "80:34:22", "grad_norm": 0.0058, "loss": 0.0522, "lr": "4.242e-05", "step": 4387, "steps": "23.76s,4387/16595" }, { "epoch": 1.3220849653510094, "eta": "79:51:14", "grad_norm": 0.0058, "loss": 0.0328, "lr": "4.241e-05", "step": 4388, "steps": "23.55s,4388/16595" }, { "epoch": 1.3223862609219643, "eta": "81:12:13", "grad_norm": 0.0072, "loss": 0.0495, "lr": "4.241e-05", "step": 4389, "steps": "23.95s,4389/16595" }, { "epoch": 1.3226875564929195, "eta": "81:09:47", "grad_norm": 0.0071, "loss": 0.0392, "lr": "4.241e-05", "step": 4390, "steps": "23.94s,4390/16595" }, { "epoch": 1.3229888520638746, "eta": "80:24:38", "grad_norm": 0.0063, "loss": 0.0482, "lr": "4.240e-05", "step": 4391, "steps": "23.72s,4391/16595" }, { "epoch": 1.3232901476348298, "eta": "80:54:45", "grad_norm": 0.0057, "loss": 0.0605, "lr": "4.240e-05", "step": 4392, "steps": "23.87s,4392/16595" }, { "epoch": 1.323591443205785, "eta": "79:33:00", "grad_norm": 0.0069, "loss": 0.0434, "lr": "4.240e-05", "step": 4393, "steps": "23.47s,4393/16595" }, { "epoch": 1.32389273877674, "eta": "80:33:37", "grad_norm": 0.0052, "loss": 0.0456, "lr": "4.239e-05", "step": 4394, "steps": "23.77s,4394/16595" }, { "epoch": 1.3241940343476952, "eta": "80:19:00", "grad_norm": 0.006, "loss": 0.044, "lr": "4.239e-05", "step": 4395, "steps": "23.7s,4395/16595" }, { "epoch": 1.3244953299186502, "eta": "80:30:48", "grad_norm": 0.0049, "loss": 0.0758, "lr": "4.239e-05", "step": 4396, "steps": "23.76s,4396/16595" }, { "epoch": 1.3247966254896053, "eta": "80:18:12", "grad_norm": 0.0061, "loss": 0.0455, "lr": "4.238e-05", "step": 4397, "steps": "23.7s,4397/16595" }, { "epoch": 1.3250979210605605, "eta": "80:30:00", "grad_norm": 0.0045, "loss": 0.0494, "lr": "4.238e-05", "step": 4398, "steps": "23.76s,4398/16595" }, { "epoch": 1.3253992166315154, "eta": "79:06:16", "grad_norm": 0.0053, "loss": 0.0395, "lr": "4.238e-05", "step": 4399, "steps": "23.35s,4399/16595" }, { "epoch": 1.3257005122024705, "eta": "80:33:17", "grad_norm": 0.0056, "loss": 0.047, "lr": "4.237e-05", "step": 4400, "steps": "23.78s,4400/16595" }, { "epoch": 1.3260018077734257, "eta": "173:37:44", "grad_norm": 0.0062, "loss": 0.0461, "lr": "4.237e-05", "step": 4401, "steps": "51.26s,4401/16595" }, { "epoch": 1.3263031033443808, "eta": "80:10:08", "grad_norm": 0.0056, "loss": 0.0371, "lr": "4.237e-05", "step": 4402, "steps": "23.67s,4402/16595" }, { "epoch": 1.326604398915336, "eta": "79:47:23", "grad_norm": 0.005, "loss": 0.0542, "lr": "4.236e-05", "step": 4403, "steps": "23.56s,4403/16595" }, { "epoch": 1.3269056944862911, "eta": "80:35:45", "grad_norm": 0.0052, "loss": 0.0485, "lr": "4.236e-05", "step": 4404, "steps": "23.8s,4404/16595" }, { "epoch": 1.3272069900572463, "eta": "80:47:33", "grad_norm": 0.0062, "loss": 0.0561, "lr": "4.236e-05", "step": 4405, "steps": "23.86s,4405/16595" }, { "epoch": 1.3275082856282012, "eta": "80:34:58", "grad_norm": 0.0045, "loss": 0.0473, "lr": "4.235e-05", "step": 4406, "steps": "23.8s,4406/16595" }, { "epoch": 1.3278095811991564, "eta": "81:21:17", "grad_norm": 0.0071, "loss": 0.0562, "lr": "4.235e-05", "step": 4407, "steps": "24.03s,4407/16595" }, { "epoch": 1.3281108767701115, "eta": "80:42:18", "grad_norm": 0.0059, "loss": 0.0397, "lr": "4.235e-05", "step": 4408, "steps": "23.84s,4408/16595" }, { "epoch": 1.3284121723410665, "eta": "81:26:35", "grad_norm": 0.0055, "loss": 0.0447, "lr": "4.234e-05", "step": 4409, "steps": "24.06s,4409/16595" }, { "epoch": 1.3287134679120216, "eta": "80:33:23", "grad_norm": 0.0058, "loss": 0.0712, "lr": "4.234e-05", "step": 4410, "steps": "23.8s,4410/16595" }, { "epoch": 1.3290147634829768, "eta": "80:41:06", "grad_norm": 0.0054, "loss": 0.0456, "lr": "4.234e-05", "step": 4411, "steps": "23.84s,4411/16595" }, { "epoch": 1.329316059053932, "eta": "80:30:33", "grad_norm": 0.0051, "loss": 0.0385, "lr": "4.233e-05", "step": 4412, "steps": "23.79s,4412/16595" }, { "epoch": 1.329617354624887, "eta": "79:55:38", "grad_norm": 0.0051, "loss": 0.0456, "lr": "4.233e-05", "step": 4413, "steps": "23.62s,4413/16595" }, { "epoch": 1.3299186501958422, "eta": "80:27:44", "grad_norm": 0.0061, "loss": 0.054, "lr": "4.233e-05", "step": 4414, "steps": "23.78s,4414/16595" }, { "epoch": 1.3302199457667974, "eta": "81:09:58", "grad_norm": 0.0045, "loss": 0.0522, "lr": "4.232e-05", "step": 4415, "steps": "23.99s,4415/16595" }, { "epoch": 1.3305212413377523, "eta": "80:51:18", "grad_norm": 0.0053, "loss": 0.0404, "lr": "4.232e-05", "step": 4416, "steps": "23.9s,4416/16595" }, { "epoch": 1.3308225369087074, "eta": "80:52:55", "grad_norm": 0.006, "loss": 0.0575, "lr": "4.231e-05", "step": 4417, "steps": "23.91s,4417/16595" }, { "epoch": 1.3311238324796626, "eta": "79:57:44", "grad_norm": 0.0055, "loss": 0.0647, "lr": "4.231e-05", "step": 4418, "steps": "23.64s,4418/16595" }, { "epoch": 1.3314251280506175, "eta": "81:14:27", "grad_norm": 0.0049, "loss": 0.0504, "lr": "4.231e-05", "step": 4419, "steps": "24.02s,4419/16595" }, { "epoch": 1.3317264236215727, "eta": "80:45:39", "grad_norm": 0.0045, "loss": 0.0625, "lr": "4.230e-05", "step": 4420, "steps": "23.88s,4420/16595" }, { "epoch": 1.3320277191925278, "eta": "79:56:33", "grad_norm": 0.006, "loss": 0.0562, "lr": "4.230e-05", "step": 4421, "steps": "23.64s,4421/16595" }, { "epoch": 1.332329014763483, "eta": "80:28:37", "grad_norm": 0.0059, "loss": 0.0556, "lr": "4.230e-05", "step": 4422, "steps": "23.8s,4422/16595" }, { "epoch": 1.3326303103344381, "eta": "81:14:53", "grad_norm": 0.0048, "loss": 0.052, "lr": "4.229e-05", "step": 4423, "steps": "24.03s,4423/16595" }, { "epoch": 1.3329316059053933, "eta": "80:01:27", "grad_norm": 0.005, "loss": 0.0741, "lr": "4.229e-05", "step": 4424, "steps": "23.67s,4424/16595" }, { "epoch": 1.3332329014763484, "eta": "80:13:14", "grad_norm": 0.005, "loss": 0.053, "lr": "4.229e-05", "step": 4425, "steps": "23.73s,4425/16595" }, { "epoch": 1.3335341970473034, "eta": "80:25:00", "grad_norm": 0.0051, "loss": 0.0563, "lr": "4.228e-05", "step": 4426, "steps": "23.79s,4426/16595" }, { "epoch": 1.3338354926182585, "eta": "80:44:53", "grad_norm": 0.0063, "loss": 0.0513, "lr": "4.228e-05", "step": 4427, "steps": "23.89s,4427/16595" }, { "epoch": 1.3341367881892137, "eta": "81:18:58", "grad_norm": 0.0051, "loss": 0.054, "lr": "4.228e-05", "step": 4428, "steps": "24.06s,4428/16595" }, { "epoch": 1.3344380837601686, "eta": "80:58:17", "grad_norm": 0.0052, "loss": 0.0559, "lr": "4.227e-05", "step": 4429, "steps": "23.96s,4429/16595" }, { "epoch": 1.3347393793311237, "eta": "79:46:55", "grad_norm": 0.0045, "loss": 0.0485, "lr": "4.227e-05", "step": 4430, "steps": "23.61s,4430/16595" }, { "epoch": 1.335040674902079, "eta": "80:27:04", "grad_norm": 0.006, "loss": 0.0579, "lr": "4.227e-05", "step": 4431, "steps": "23.81s,4431/16595" }, { "epoch": 1.335341970473034, "eta": "80:00:19", "grad_norm": 0.0061, "loss": 0.0419, "lr": "4.226e-05", "step": 4432, "steps": "23.68s,4432/16595" }, { "epoch": 1.3356432660439892, "eta": "79:33:35", "grad_norm": 0.0049, "loss": 0.0469, "lr": "4.226e-05", "step": 4433, "steps": "23.55s,4433/16595" }, { "epoch": 1.3359445616149443, "eta": "80:23:51", "grad_norm": 0.0046, "loss": 0.0574, "lr": "4.226e-05", "step": 4434, "steps": "23.8s,4434/16595" }, { "epoch": 1.3362458571858995, "eta": "80:27:31", "grad_norm": 0.0048, "loss": 0.0431, "lr": "4.225e-05", "step": 4435, "steps": "23.82s,4435/16595" }, { "epoch": 1.3365471527568544, "eta": "82:46:57", "grad_norm": 0.0073, "loss": 0.0708, "lr": "4.225e-05", "step": 4436, "steps": "24.51s,4436/16595" }, { "epoch": 1.3368484483278096, "eta": "80:26:43", "grad_norm": 0.0047, "loss": 0.0545, "lr": "4.225e-05", "step": 4437, "steps": "23.82s,4437/16595" }, { "epoch": 1.3371497438987647, "eta": "79:41:45", "grad_norm": 0.0055, "loss": 0.0648, "lr": "4.224e-05", "step": 4438, "steps": "23.6s,4438/16595" }, { "epoch": 1.3374510394697197, "eta": "81:06:27", "grad_norm": 0.0049, "loss": 0.0447, "lr": "4.224e-05", "step": 4439, "steps": "24.02s,4439/16595" }, { "epoch": 1.3377523350406748, "eta": "79:38:56", "grad_norm": 0.0047, "loss": 0.0374, "lr": "4.224e-05", "step": 4440, "steps": "23.59s,4440/16595" }, { "epoch": 1.33805363061163, "eta": "80:35:15", "grad_norm": 0.0049, "loss": 0.0459, "lr": "4.223e-05", "step": 4441, "steps": "23.87s,4441/16595" }, { "epoch": 1.3383549261825851, "eta": "81:13:21", "grad_norm": 0.0052, "loss": 0.0435, "lr": "4.223e-05", "step": 4442, "steps": "24.06s,4442/16595" }, { "epoch": 1.3386562217535403, "eta": "80:26:22", "grad_norm": 0.0056, "loss": 0.0516, "lr": "4.222e-05", "step": 4443, "steps": "23.83s,4443/16595" }, { "epoch": 1.3389575173244954, "eta": "80:11:47", "grad_norm": 0.0055, "loss": 0.0568, "lr": "4.222e-05", "step": 4444, "steps": "23.76s,4444/16595" }, { "epoch": 1.3392588128954506, "eta": "80:25:34", "grad_norm": 0.005, "loss": 0.0437, "lr": "4.222e-05", "step": 4445, "steps": "23.83s,4445/16595" }, { "epoch": 1.3395601084664055, "eta": "80:41:22", "grad_norm": 0.005, "loss": 0.041, "lr": "4.221e-05", "step": 4446, "steps": "23.91s,4446/16595" }, { "epoch": 1.3398614040373606, "eta": "79:58:27", "grad_norm": 0.0051, "loss": 0.0553, "lr": "4.221e-05", "step": 4447, "steps": "23.7s,4447/16595" }, { "epoch": 1.3401626996083158, "eta": "80:58:48", "grad_norm": 0.0081, "loss": 0.0543, "lr": "4.221e-05", "step": 4448, "steps": "24.0s,4448/16595" }, { "epoch": 1.3404639951792707, "eta": "79:05:02", "grad_norm": 0.0052, "loss": 0.0502, "lr": "4.220e-05", "step": 4449, "steps": "23.44s,4449/16595" }, { "epoch": 1.3407652907502259, "eta": "80:15:29", "grad_norm": 0.0046, "loss": 0.0523, "lr": "4.220e-05", "step": 4450, "steps": "23.79s,4450/16595" }, { "epoch": 1.341066586321181, "eta": "80:37:21", "grad_norm": 0.0052, "loss": 0.0493, "lr": "4.220e-05", "step": 4451, "steps": "23.9s,4451/16595" }, { "epoch": 1.3413678818921362, "eta": "80:30:53", "grad_norm": 0.0046, "loss": 0.0409, "lr": "4.219e-05", "step": 4452, "steps": "23.87s,4452/16595" }, { "epoch": 1.3416691774630913, "eta": "80:38:35", "grad_norm": 0.0051, "loss": 0.0596, "lr": "4.219e-05", "step": 4453, "steps": "23.91s,4453/16595" }, { "epoch": 1.3419704730340465, "eta": "80:50:19", "grad_norm": 0.0056, "loss": 0.0542, "lr": "4.219e-05", "step": 4454, "steps": "23.97s,4454/16595" }, { "epoch": 1.3422717686050016, "eta": "80:19:34", "grad_norm": 0.0055, "loss": 0.05, "lr": "4.218e-05", "step": 4455, "steps": "23.82s,4455/16595" }, { "epoch": 1.3425730641759566, "eta": "79:32:39", "grad_norm": 0.0058, "loss": 0.0497, "lr": "4.218e-05", "step": 4456, "steps": "23.59s,4456/16595" }, { "epoch": 1.3428743597469117, "eta": "79:58:33", "grad_norm": 0.0047, "loss": 0.0534, "lr": "4.218e-05", "step": 4457, "steps": "23.72s,4457/16595" }, { "epoch": 1.3431756553178669, "eta": "79:41:58", "grad_norm": 0.0061, "loss": 0.0497, "lr": "4.217e-05", "step": 4458, "steps": "23.64s,4458/16595" }, { "epoch": 1.3434769508888218, "eta": "79:05:10", "grad_norm": 0.0044, "loss": 0.0429, "lr": "4.217e-05", "step": 4459, "steps": "23.46s,4459/16595" }, { "epoch": 1.343778246459777, "eta": "80:19:37", "grad_norm": 0.0048, "loss": 0.0604, "lr": "4.217e-05", "step": 4460, "steps": "23.83s,4460/16595" }, { "epoch": 1.344079542030732, "eta": "79:38:46", "grad_norm": 0.0052, "loss": 0.0595, "lr": "4.216e-05", "step": 4461, "steps": "23.63s,4461/16595" }, { "epoch": 1.3443808376016873, "eta": "80:00:37", "grad_norm": 0.0056, "loss": 0.0475, "lr": "4.216e-05", "step": 4462, "steps": "23.74s,4462/16595" }, { "epoch": 1.3446821331726424, "eta": "80:00:13", "grad_norm": 0.0055, "loss": 0.0522, "lr": "4.216e-05", "step": 4463, "steps": "23.74s,4463/16595" }, { "epoch": 1.3449834287435976, "eta": "80:30:09", "grad_norm": 0.0059, "loss": 0.0436, "lr": "4.215e-05", "step": 4464, "steps": "23.89s,4464/16595" }, { "epoch": 1.3452847243145527, "eta": "80:39:52", "grad_norm": 0.0051, "loss": 0.0566, "lr": "4.215e-05", "step": 4465, "steps": "23.94s,4465/16595" }, { "epoch": 1.3455860198855076, "eta": "79:00:25", "grad_norm": 0.0045, "loss": 0.0485, "lr": "4.215e-05", "step": 4466, "steps": "23.45s,4466/16595" }, { "epoch": 1.3458873154564628, "eta": "80:35:01", "grad_norm": 0.0049, "loss": 0.0545, "lr": "4.214e-05", "step": 4467, "steps": "23.92s,4467/16595" }, { "epoch": 1.346188611027418, "eta": "80:12:23", "grad_norm": 0.0068, "loss": 0.0329, "lr": "4.214e-05", "step": 4468, "steps": "23.81s,4468/16595" }, { "epoch": 1.3464899065983729, "eta": "80:32:12", "grad_norm": 0.0059, "loss": 0.0551, "lr": "4.213e-05", "step": 4469, "steps": "23.91s,4469/16595" }, { "epoch": 1.346791202169328, "eta": "80:05:32", "grad_norm": 0.0065, "loss": 0.045, "lr": "4.213e-05", "step": 4470, "steps": "23.78s,4470/16595" }, { "epoch": 1.3470924977402832, "eta": "78:54:25", "grad_norm": 0.0053, "loss": 0.0592, "lr": "4.213e-05", "step": 4471, "steps": "23.43s,4471/16595" }, { "epoch": 1.3473937933112383, "eta": "80:02:43", "grad_norm": 0.0044, "loss": 0.05, "lr": "4.212e-05", "step": 4472, "steps": "23.77s,4472/16595" }, { "epoch": 1.3476950888821935, "eta": "79:46:10", "grad_norm": 0.0048, "loss": 0.0442, "lr": "4.212e-05", "step": 4473, "steps": "23.69s,4473/16595" }, { "epoch": 1.3479963844531486, "eta": "80:50:25", "grad_norm": 0.0064, "loss": 0.0679, "lr": "4.212e-05", "step": 4474, "steps": "24.01s,4474/16595" }, { "epoch": 1.3482976800241038, "eta": "80:50:01", "grad_norm": 0.0051, "loss": 0.0567, "lr": "4.211e-05", "step": 4475, "steps": "24.01s,4475/16595" }, { "epoch": 1.3485989755950587, "eta": "79:26:48", "grad_norm": 0.0047, "loss": 0.0489, "lr": "4.211e-05", "step": 4476, "steps": "23.6s,4476/16595" }, { "epoch": 1.3489002711660139, "eta": "80:47:12", "grad_norm": 0.0059, "loss": 0.057, "lr": "4.211e-05", "step": 4477, "steps": "24.0s,4477/16595" }, { "epoch": 1.349201566736969, "eta": "80:20:32", "grad_norm": 0.0049, "loss": 0.0565, "lr": "4.210e-05", "step": 4478, "steps": "23.87s,4478/16595" }, { "epoch": 1.3495028623079242, "eta": "81:00:32", "grad_norm": 0.0048, "loss": 0.0549, "lr": "4.210e-05", "step": 4479, "steps": "24.07s,4479/16595" }, { "epoch": 1.349804157878879, "eta": "79:53:30", "grad_norm": 0.0054, "loss": 0.0599, "lr": "4.210e-05", "step": 4480, "steps": "23.74s,4480/16595" }, { "epoch": 1.3501054534498342, "eta": "79:38:58", "grad_norm": 0.0047, "loss": 0.0415, "lr": "4.209e-05", "step": 4481, "steps": "23.67s,4481/16595" }, { "epoch": 1.3504067490207894, "eta": "81:09:25", "grad_norm": 0.005, "loss": 0.0506, "lr": "4.209e-05", "step": 4482, "steps": "24.12s,4482/16595" }, { "epoch": 1.3507080445917445, "eta": "79:58:22", "grad_norm": 0.0044, "loss": 0.0814, "lr": "4.209e-05", "step": 4483, "steps": "23.77s,4483/16595" }, { "epoch": 1.3510093401626997, "eta": "80:40:21", "grad_norm": 0.0046, "loss": 0.064, "lr": "4.208e-05", "step": 4484, "steps": "23.98s,4484/16595" }, { "epoch": 1.3513106357336548, "eta": "80:37:56", "grad_norm": 0.0056, "loss": 0.0524, "lr": "4.208e-05", "step": 4485, "steps": "23.97s,4485/16595" }, { "epoch": 1.3516119313046098, "eta": "79:14:48", "grad_norm": 0.0046, "loss": 0.0377, "lr": "4.208e-05", "step": 4486, "steps": "23.56s,4486/16595" }, { "epoch": 1.351913226875565, "eta": "79:56:47", "grad_norm": 0.0063, "loss": 0.0574, "lr": "4.207e-05", "step": 4487, "steps": "23.77s,4487/16595" }, { "epoch": 1.35221452244652, "eta": "79:14:00", "grad_norm": 0.0052, "loss": 0.0541, "lr": "4.207e-05", "step": 4488, "steps": "23.56s,4488/16595" }, { "epoch": 1.3525158180174752, "eta": "80:52:29", "grad_norm": 0.0074, "loss": 0.0488, "lr": "4.207e-05", "step": 4489, "steps": "24.05s,4489/16595" }, { "epoch": 1.3528171135884302, "eta": "79:29:22", "grad_norm": 0.0053, "loss": 0.0493, "lr": "4.206e-05", "step": 4490, "steps": "23.64s,4490/16595" }, { "epoch": 1.3531184091593853, "eta": "79:30:59", "grad_norm": 0.0056, "loss": 0.0481, "lr": "4.206e-05", "step": 4491, "steps": "23.65s,4491/16595" }, { "epoch": 1.3534197047303405, "eta": "80:06:54", "grad_norm": 0.0067, "loss": 0.0552, "lr": "4.205e-05", "step": 4492, "steps": "23.83s,4492/16595" }, { "epoch": 1.3537210003012956, "eta": "80:30:42", "grad_norm": 0.0052, "loss": 0.0522, "lr": "4.205e-05", "step": 4493, "steps": "23.95s,4493/16595" }, { "epoch": 1.3540222958722508, "eta": "79:43:55", "grad_norm": 0.0059, "loss": 0.0325, "lr": "4.205e-05", "step": 4494, "steps": "23.72s,4494/16595" }, { "epoch": 1.354323591443206, "eta": "80:17:49", "grad_norm": 0.0058, "loss": 0.0427, "lr": "4.204e-05", "step": 4495, "steps": "23.89s,4495/16595" }, { "epoch": 1.3546248870141608, "eta": "80:43:37", "grad_norm": 0.0045, "loss": 0.0577, "lr": "4.204e-05", "step": 4496, "steps": "24.02s,4496/16595" }, { "epoch": 1.354926182585116, "eta": "79:56:51", "grad_norm": 0.0047, "loss": 0.0626, "lr": "4.204e-05", "step": 4497, "steps": "23.79s,4497/16595" }, { "epoch": 1.3552274781560711, "eta": "79:34:16", "grad_norm": 0.0058, "loss": 0.0438, "lr": "4.203e-05", "step": 4498, "steps": "23.68s,4498/16595" }, { "epoch": 1.3555287737270263, "eta": "79:45:59", "grad_norm": 0.0048, "loss": 0.0503, "lr": "4.203e-05", "step": 4499, "steps": "23.74s,4499/16595" }, { "epoch": 1.3558300692979812, "eta": "78:22:56", "grad_norm": 0.0049, "loss": 0.0336, "lr": "4.203e-05", "step": 4500, "steps": "23.33s,4500/16595" }, { "epoch": 1.3561313648689364, "eta": "79:51:14", "grad_norm": 0.0047, "loss": 0.062, "lr": "4.202e-05", "step": 4501, "steps": "23.77s,4501/16595" }, { "epoch": 1.3564326604398915, "eta": "79:44:47", "grad_norm": 0.0049, "loss": 0.0375, "lr": "4.202e-05", "step": 4502, "steps": "23.74s,4502/16595" }, { "epoch": 1.3567339560108467, "eta": "79:50:26", "grad_norm": 0.0049, "loss": 0.0545, "lr": "4.202e-05", "step": 4503, "steps": "23.77s,4503/16595" }, { "epoch": 1.3570352515818018, "eta": "79:31:54", "grad_norm": 0.0066, "loss": 0.0453, "lr": "4.201e-05", "step": 4504, "steps": "23.68s,4504/16595" }, { "epoch": 1.357336547152757, "eta": "79:53:41", "grad_norm": 0.0052, "loss": 0.051, "lr": "4.201e-05", "step": 4505, "steps": "23.79s,4505/16595" }, { "epoch": 1.357637842723712, "eta": "79:47:14", "grad_norm": 0.0049, "loss": 0.0368, "lr": "4.201e-05", "step": 4506, "steps": "23.76s,4506/16595" }, { "epoch": 1.357939138294667, "eta": "79:28:42", "grad_norm": 0.0071, "loss": 0.0584, "lr": "4.200e-05", "step": 4507, "steps": "23.67s,4507/16595" }, { "epoch": 1.3582404338656222, "eta": "79:30:20", "grad_norm": 0.0046, "loss": 0.0475, "lr": "4.200e-05", "step": 4508, "steps": "23.68s,4508/16595" }, { "epoch": 1.3585417294365774, "eta": "79:09:47", "grad_norm": 0.005, "loss": 0.0441, "lr": "4.200e-05", "step": 4509, "steps": "23.58s,4509/16595" }, { "epoch": 1.3588430250075323, "eta": "79:25:31", "grad_norm": 0.0051, "loss": 0.0499, "lr": "4.199e-05", "step": 4510, "steps": "23.66s,4510/16595" }, { "epoch": 1.3591443205784874, "eta": "79:21:05", "grad_norm": 0.0059, "loss": 0.0471, "lr": "4.199e-05", "step": 4511, "steps": "23.64s,4511/16595" }, { "epoch": 1.3594456161494426, "eta": "79:50:54", "grad_norm": 0.007, "loss": 0.0468, "lr": "4.198e-05", "step": 4512, "steps": "23.79s,4512/16595" }, { "epoch": 1.3597469117203977, "eta": "79:04:11", "grad_norm": 0.0055, "loss": 0.0516, "lr": "4.198e-05", "step": 4513, "steps": "23.56s,4513/16595" }, { "epoch": 1.360048207291353, "eta": "79:11:51", "grad_norm": 0.0066, "loss": 0.0531, "lr": "4.198e-05", "step": 4514, "steps": "23.6s,4514/16595" }, { "epoch": 1.360349502862308, "eta": "80:27:58", "grad_norm": 0.0058, "loss": 0.0842, "lr": "4.197e-05", "step": 4515, "steps": "23.98s,4515/16595" }, { "epoch": 1.360650798433263, "eta": "79:49:19", "grad_norm": 0.0064, "loss": 0.0596, "lr": "4.197e-05", "step": 4516, "steps": "23.79s,4516/16595" }, { "epoch": 1.3609520940042181, "eta": "80:07:02", "grad_norm": 0.0054, "loss": 0.053, "lr": "4.197e-05", "step": 4517, "steps": "23.88s,4517/16595" }, { "epoch": 1.3612533895751733, "eta": "80:26:46", "grad_norm": 0.0065, "loss": 0.0467, "lr": "4.196e-05", "step": 4518, "steps": "23.98s,4518/16595" }, { "epoch": 1.3615546851461284, "eta": "79:01:50", "grad_norm": 0.0058, "loss": 0.0631, "lr": "4.196e-05", "step": 4519, "steps": "23.56s,4519/16595" }, { "epoch": 1.3618559807170834, "eta": "79:15:32", "grad_norm": 0.0054, "loss": 0.0643, "lr": "4.196e-05", "step": 4520, "steps": "23.63s,4520/16595" }, { "epoch": 1.3621572762880385, "eta": "78:50:59", "grad_norm": 0.006, "loss": 0.0526, "lr": "4.195e-05", "step": 4521, "steps": "23.51s,4521/16595" }, { "epoch": 1.3624585718589937, "eta": "79:48:57", "grad_norm": 0.0061, "loss": 0.0631, "lr": "4.195e-05", "step": 4522, "steps": "23.8s,4522/16595" }, { "epoch": 1.3627598674299488, "eta": "81:05:00", "grad_norm": 0.0056, "loss": 0.0583, "lr": "4.195e-05", "step": 4523, "steps": "24.18s,4523/16595" }, { "epoch": 1.363061163000904, "eta": "80:28:24", "grad_norm": 0.0057, "loss": 0.0637, "lr": "4.194e-05", "step": 4524, "steps": "24.0s,4524/16595" }, { "epoch": 1.3633624585718591, "eta": "80:23:58", "grad_norm": 0.0049, "loss": 0.0476, "lr": "4.194e-05", "step": 4525, "steps": "23.98s,4525/16595" }, { "epoch": 1.363663754142814, "eta": "79:55:24", "grad_norm": 0.0058, "loss": 0.0488, "lr": "4.194e-05", "step": 4526, "steps": "23.84s,4526/16595" }, { "epoch": 1.3639650497137692, "eta": "80:03:03", "grad_norm": 0.0071, "loss": 0.0569, "lr": "4.193e-05", "step": 4527, "steps": "23.88s,4527/16595" }, { "epoch": 1.3642663452847243, "eta": "80:56:58", "grad_norm": 0.0045, "loss": 0.052, "lr": "4.193e-05", "step": 4528, "steps": "24.15s,4528/16595" }, { "epoch": 1.3645676408556795, "eta": "80:22:22", "grad_norm": 0.0043, "loss": 0.0408, "lr": "4.192e-05", "step": 4529, "steps": "23.98s,4529/16595" }, { "epoch": 1.3648689364266344, "eta": "79:51:48", "grad_norm": 0.0053, "loss": 0.058, "lr": "4.192e-05", "step": 4530, "steps": "23.83s,4530/16595" }, { "epoch": 1.3651702319975896, "eta": "79:45:23", "grad_norm": 0.0047, "loss": 0.0473, "lr": "4.192e-05", "step": 4531, "steps": "23.8s,4531/16595" }, { "epoch": 1.3654715275685447, "eta": "78:44:40", "grad_norm": 0.0059, "loss": 0.0518, "lr": "4.191e-05", "step": 4532, "steps": "23.5s,4532/16595" }, { "epoch": 1.3657728231394999, "eta": "79:34:32", "grad_norm": 0.0045, "loss": 0.051, "lr": "4.191e-05", "step": 4533, "steps": "23.75s,4533/16595" }, { "epoch": 1.366074118710455, "eta": "80:30:25", "grad_norm": 0.0055, "loss": 0.0517, "lr": "4.191e-05", "step": 4534, "steps": "24.03s,4534/16595" }, { "epoch": 1.3663754142814102, "eta": "80:13:57", "grad_norm": 0.0051, "loss": 0.0507, "lr": "4.190e-05", "step": 4535, "steps": "23.95s,4535/16595" }, { "epoch": 1.3666767098523651, "eta": "79:51:26", "grad_norm": 0.0049, "loss": 0.0669, "lr": "4.190e-05", "step": 4536, "steps": "23.84s,4536/16595" }, { "epoch": 1.3669780054233203, "eta": "79:14:52", "grad_norm": 0.0041, "loss": 0.0419, "lr": "4.190e-05", "step": 4537, "steps": "23.66s,4537/16595" }, { "epoch": 1.3672793009942754, "eta": "79:32:33", "grad_norm": 0.0059, "loss": 0.0468, "lr": "4.189e-05", "step": 4538, "steps": "23.75s,4538/16595" }, { "epoch": 1.3675805965652306, "eta": "78:37:54", "grad_norm": 0.0055, "loss": 0.0464, "lr": "4.189e-05", "step": 4539, "steps": "23.48s,4539/16595" }, { "epoch": 1.3678818921361855, "eta": "78:37:31", "grad_norm": 0.0051, "loss": 0.0517, "lr": "4.189e-05", "step": 4540, "steps": "23.48s,4540/16595" }, { "epoch": 1.3681831877071406, "eta": "79:27:21", "grad_norm": 0.0053, "loss": 0.0621, "lr": "4.188e-05", "step": 4541, "steps": "23.73s,4541/16595" }, { "epoch": 1.3684844832780958, "eta": "80:03:07", "grad_norm": 0.0045, "loss": 0.0542, "lr": "4.188e-05", "step": 4542, "steps": "23.91s,4542/16595" }, { "epoch": 1.368785778849051, "eta": "79:10:29", "grad_norm": 0.0054, "loss": 0.0518, "lr": "4.188e-05", "step": 4543, "steps": "23.65s,4543/16595" }, { "epoch": 1.369087074420006, "eta": "79:46:15", "grad_norm": 0.0074, "loss": 0.04, "lr": "4.187e-05", "step": 4544, "steps": "23.83s,4544/16595" }, { "epoch": 1.3693883699909613, "eta": "80:20:00", "grad_norm": 0.005, "loss": 0.0277, "lr": "4.187e-05", "step": 4545, "steps": "24.0s,4545/16595" }, { "epoch": 1.3696896655619162, "eta": "79:49:28", "grad_norm": 0.0056, "loss": 0.0362, "lr": "4.187e-05", "step": 4546, "steps": "23.85s,4546/16595" }, { "epoch": 1.3699909611328713, "eta": "79:33:00", "grad_norm": 0.0053, "loss": 0.0521, "lr": "4.186e-05", "step": 4547, "steps": "23.77s,4547/16595" }, { "epoch": 1.3702922567038265, "eta": "78:46:26", "grad_norm": 0.0048, "loss": 0.0392, "lr": "4.186e-05", "step": 4548, "steps": "23.54s,4548/16595" }, { "epoch": 1.3705935522747816, "eta": "79:12:08", "grad_norm": 0.0059, "loss": 0.0629, "lr": "4.185e-05", "step": 4549, "steps": "23.67s,4549/16595" }, { "epoch": 1.3708948478457366, "eta": "78:43:38", "grad_norm": 0.0047, "loss": 0.0576, "lr": "4.185e-05", "step": 4550, "steps": "23.53s,4550/16595" }, { "epoch": 1.3711961434166917, "eta": "78:41:14", "grad_norm": 0.0049, "loss": 0.058, "lr": "4.185e-05", "step": 4551, "steps": "23.52s,4551/16595" }, { "epoch": 1.3714974389876469, "eta": "79:25:00", "grad_norm": 0.005, "loss": 0.0491, "lr": "4.184e-05", "step": 4552, "steps": "23.74s,4552/16595" }, { "epoch": 1.371798734558602, "eta": "79:38:40", "grad_norm": 0.0056, "loss": 0.0476, "lr": "4.184e-05", "step": 4553, "steps": "23.81s,4553/16595" }, { "epoch": 1.3721000301295572, "eta": "79:38:16", "grad_norm": 0.0047, "loss": 0.0566, "lr": "4.184e-05", "step": 4554, "steps": "23.81s,4554/16595" }, { "epoch": 1.3724013257005123, "eta": "78:51:43", "grad_norm": 0.005, "loss": 0.0425, "lr": "4.183e-05", "step": 4555, "steps": "23.58s,4555/16595" }, { "epoch": 1.3727026212714672, "eta": "79:09:23", "grad_norm": 0.0052, "loss": 0.0554, "lr": "4.183e-05", "step": 4556, "steps": "23.67s,4556/16595" }, { "epoch": 1.3730039168424224, "eta": "79:29:03", "grad_norm": 0.0057, "loss": 0.0584, "lr": "4.183e-05", "step": 4557, "steps": "23.77s,4557/16595" }, { "epoch": 1.3733052124133776, "eta": "79:30:39", "grad_norm": 0.0046, "loss": 0.0555, "lr": "4.182e-05", "step": 4558, "steps": "23.78s,4558/16595" }, { "epoch": 1.3736065079843327, "eta": "79:20:14", "grad_norm": 0.0039, "loss": 0.0592, "lr": "4.182e-05", "step": 4559, "steps": "23.73s,4559/16595" }, { "epoch": 1.3739078035552876, "eta": "78:47:44", "grad_norm": 0.0048, "loss": 0.0485, "lr": "4.182e-05", "step": 4560, "steps": "23.57s,4560/16595" }, { "epoch": 1.3742090991262428, "eta": "78:31:18", "grad_norm": 0.0052, "loss": 0.0433, "lr": "4.181e-05", "step": 4561, "steps": "23.49s,4561/16595" }, { "epoch": 1.374510394697198, "eta": "79:21:03", "grad_norm": 0.0046, "loss": 0.0511, "lr": "4.181e-05", "step": 4562, "steps": "23.74s,4562/16595" }, { "epoch": 1.374811690268153, "eta": "78:26:31", "grad_norm": 0.0049, "loss": 0.0478, "lr": "4.181e-05", "step": 4563, "steps": "23.47s,4563/16595" }, { "epoch": 1.3751129858391082, "eta": "78:48:10", "grad_norm": 0.0047, "loss": 0.0494, "lr": "4.180e-05", "step": 4564, "steps": "23.58s,4564/16595" }, { "epoch": 1.3754142814100634, "eta": "78:49:47", "grad_norm": 0.0056, "loss": 0.0468, "lr": "4.180e-05", "step": 4565, "steps": "23.59s,4565/16595" }, { "epoch": 1.3757155769810183, "eta": "79:31:30", "grad_norm": 0.0065, "loss": 0.0545, "lr": "4.179e-05", "step": 4566, "steps": "23.8s,4566/16595" }, { "epoch": 1.3760168725519735, "eta": "78:51:00", "grad_norm": 0.0076, "loss": 0.039, "lr": "4.179e-05", "step": 4567, "steps": "23.6s,4567/16595" }, { "epoch": 1.3763181681229286, "eta": "79:52:45", "grad_norm": 0.0061, "loss": 0.0555, "lr": "4.179e-05", "step": 4568, "steps": "23.91s,4568/16595" }, { "epoch": 1.3766194636938838, "eta": "79:26:18", "grad_norm": 0.0052, "loss": 0.0386, "lr": "4.178e-05", "step": 4569, "steps": "23.78s,4569/16595" }, { "epoch": 1.3769207592648387, "eta": "78:35:48", "grad_norm": 0.0056, "loss": 0.0446, "lr": "4.178e-05", "step": 4570, "steps": "23.53s,4570/16595" }, { "epoch": 1.3772220548357939, "eta": "78:55:27", "grad_norm": 0.0055, "loss": 0.0587, "lr": "4.178e-05", "step": 4571, "steps": "23.63s,4571/16595" }, { "epoch": 1.377523350406749, "eta": "79:03:04", "grad_norm": 0.0043, "loss": 0.0577, "lr": "4.177e-05", "step": 4572, "steps": "23.67s,4572/16595" }, { "epoch": 1.3778246459777042, "eta": "79:36:44", "grad_norm": 0.0057, "loss": 0.0406, "lr": "4.177e-05", "step": 4573, "steps": "23.84s,4573/16595" }, { "epoch": 1.3781259415486593, "eta": "79:18:18", "grad_norm": 0.0059, "loss": 0.0486, "lr": "4.177e-05", "step": 4574, "steps": "23.75s,4574/16595" }, { "epoch": 1.3784272371196145, "eta": "78:47:52", "grad_norm": 0.0047, "loss": 0.0486, "lr": "4.176e-05", "step": 4575, "steps": "23.6s,4575/16595" }, { "epoch": 1.3787285326905694, "eta": "79:45:33", "grad_norm": 0.0046, "loss": 0.045, "lr": "4.176e-05", "step": 4576, "steps": "23.89s,4576/16595" }, { "epoch": 1.3790298282615245, "eta": "78:39:04", "grad_norm": 0.0054, "loss": 0.0502, "lr": "4.176e-05", "step": 4577, "steps": "23.56s,4577/16595" }, { "epoch": 1.3793311238324797, "eta": "78:58:42", "grad_norm": 0.0068, "loss": 0.0419, "lr": "4.175e-05", "step": 4578, "steps": "23.66s,4578/16595" }, { "epoch": 1.3796324194034348, "eta": "78:10:14", "grad_norm": 0.0049, "loss": 0.0494, "lr": "4.175e-05", "step": 4579, "steps": "23.42s,4579/16595" }, { "epoch": 1.3799337149743898, "eta": "79:45:58", "grad_norm": 0.0071, "loss": 0.0561, "lr": "4.174e-05", "step": 4580, "steps": "23.9s,4580/16595" }, { "epoch": 1.380235010545345, "eta": "79:55:35", "grad_norm": 0.0048, "loss": 0.0647, "lr": "4.174e-05", "step": 4581, "steps": "23.95s,4581/16595" }, { "epoch": 1.3805363061163, "eta": "79:09:08", "grad_norm": 0.0059, "loss": 0.0444, "lr": "4.174e-05", "step": 4582, "steps": "23.72s,4582/16595" }, { "epoch": 1.3808376016872552, "eta": "78:22:41", "grad_norm": 0.0103, "loss": 0.0408, "lr": "4.173e-05", "step": 4583, "steps": "23.49s,4583/16595" }, { "epoch": 1.3811388972582104, "eta": "78:12:17", "grad_norm": 0.0077, "loss": 0.0578, "lr": "4.173e-05", "step": 4584, "steps": "23.44s,4584/16595" }, { "epoch": 1.3814401928291655, "eta": "79:11:57", "grad_norm": 0.0044, "loss": 0.061, "lr": "4.173e-05", "step": 4585, "steps": "23.74s,4585/16595" }, { "epoch": 1.3817414884001205, "eta": "80:13:36", "grad_norm": 0.0053, "loss": 0.0508, "lr": "4.172e-05", "step": 4586, "steps": "24.05s,4586/16595" }, { "epoch": 1.3820427839710756, "eta": "79:45:11", "grad_norm": 0.0052, "loss": 0.0504, "lr": "4.172e-05", "step": 4587, "steps": "23.91s,4587/16595" }, { "epoch": 1.3823440795420308, "eta": "77:54:43", "grad_norm": 0.0052, "loss": 0.0607, "lr": "4.172e-05", "step": 4588, "steps": "23.36s,4588/16595" }, { "epoch": 1.382645375112986, "eta": "79:08:22", "grad_norm": 0.0085, "loss": 0.0437, "lr": "4.171e-05", "step": 4589, "steps": "23.73s,4589/16595" }, { "epoch": 1.3829466706839408, "eta": "79:25:59", "grad_norm": 0.0048, "loss": 0.0562, "lr": "4.171e-05", "step": 4590, "steps": "23.82s,4590/16595" }, { "epoch": 1.383247966254896, "eta": "78:17:33", "grad_norm": 0.005, "loss": 0.055, "lr": "4.171e-05", "step": 4591, "steps": "23.48s,4591/16595" }, { "epoch": 1.3835492618258511, "eta": "78:09:10", "grad_norm": 0.0068, "loss": 0.0378, "lr": "4.170e-05", "step": 4592, "steps": "23.44s,4592/16595" }, { "epoch": 1.3838505573968063, "eta": "79:44:47", "grad_norm": 0.0052, "loss": 0.0487, "lr": "4.170e-05", "step": 4593, "steps": "23.92s,4593/16595" }, { "epoch": 1.3841518529677614, "eta": "79:14:23", "grad_norm": 0.0062, "loss": 0.0444, "lr": "4.170e-05", "step": 4594, "steps": "23.77s,4594/16595" }, { "epoch": 1.3844531485387166, "eta": "78:20:00", "grad_norm": 0.0059, "loss": 0.0649, "lr": "4.169e-05", "step": 4595, "steps": "23.5s,4595/16595" }, { "epoch": 1.3847544441096715, "eta": "80:15:35", "grad_norm": 0.0068, "loss": 0.0598, "lr": "4.169e-05", "step": 4596, "steps": "24.08s,4596/16595" }, { "epoch": 1.3850557396806267, "eta": "79:33:12", "grad_norm": 0.0054, "loss": 0.0558, "lr": "4.168e-05", "step": 4597, "steps": "23.87s,4597/16595" }, { "epoch": 1.3853570352515818, "eta": "79:10:48", "grad_norm": 0.0052, "loss": 0.0432, "lr": "4.168e-05", "step": 4598, "steps": "23.76s,4598/16595" }, { "epoch": 1.385658330822537, "eta": "79:54:24", "grad_norm": 0.0074, "loss": 0.0531, "lr": "4.168e-05", "step": 4599, "steps": "23.98s,4599/16595" }, { "epoch": 1.385959626393492, "eta": "78:36:02", "grad_norm": 0.0046, "loss": 0.0546, "lr": "4.167e-05", "step": 4600, "steps": "23.59s,4600/16595" }, { "epoch": 1.386260921964447, "eta": "181:54:32", "grad_norm": 0.0044, "loss": 0.0533, "lr": "4.167e-05", "step": 4601, "steps": "54.6s,4601/16595" }, { "epoch": 1.3865622175354022, "eta": "78:13:15", "grad_norm": 0.0077, "loss": 0.0573, "lr": "4.167e-05", "step": 4602, "steps": "23.48s,4602/16595" }, { "epoch": 1.3868635131063574, "eta": "79:46:48", "grad_norm": 0.006, "loss": 0.0497, "lr": "4.166e-05", "step": 4603, "steps": "23.95s,4603/16595" }, { "epoch": 1.3871648086773125, "eta": "79:40:24", "grad_norm": 0.0048, "loss": 0.0497, "lr": "4.166e-05", "step": 4604, "steps": "23.92s,4604/16595" }, { "epoch": 1.3874661042482677, "eta": "77:56:06", "grad_norm": 0.0052, "loss": 0.0495, "lr": "4.166e-05", "step": 4605, "steps": "23.4s,4605/16595" }, { "epoch": 1.3877673998192226, "eta": "79:33:37", "grad_norm": 0.0043, "loss": 0.0493, "lr": "4.165e-05", "step": 4606, "steps": "23.89s,4606/16595" }, { "epoch": 1.3880686953901777, "eta": "79:29:13", "grad_norm": 0.0056, "loss": 0.0438, "lr": "4.165e-05", "step": 4607, "steps": "23.87s,4607/16595" }, { "epoch": 1.388369990961133, "eta": "79:30:49", "grad_norm": 0.0054, "loss": 0.0552, "lr": "4.165e-05", "step": 4608, "steps": "23.88s,4608/16595" }, { "epoch": 1.388671286532088, "eta": "80:10:22", "grad_norm": 0.0052, "loss": 0.0639, "lr": "4.164e-05", "step": 4609, "steps": "24.08s,4609/16595" }, { "epoch": 1.388972582103043, "eta": "79:50:00", "grad_norm": 0.0048, "loss": 0.0348, "lr": "4.164e-05", "step": 4610, "steps": "23.98s,4610/16595" }, { "epoch": 1.3892738776739981, "eta": "80:07:34", "grad_norm": 0.0042, "loss": 0.0508, "lr": "4.163e-05", "step": 4611, "steps": "24.07s,4611/16595" }, { "epoch": 1.3895751732449533, "eta": "78:49:17", "grad_norm": 0.0063, "loss": 0.058, "lr": "4.163e-05", "step": 4612, "steps": "23.68s,4612/16595" }, { "epoch": 1.3898764688159084, "eta": "78:52:53", "grad_norm": 0.0057, "loss": 0.0466, "lr": "4.163e-05", "step": 4613, "steps": "23.7s,4613/16595" }, { "epoch": 1.3901777643868636, "eta": "79:22:26", "grad_norm": 0.0056, "loss": 0.0598, "lr": "4.162e-05", "step": 4614, "steps": "23.85s,4614/16595" }, { "epoch": 1.3904790599578187, "eta": "80:15:57", "grad_norm": 0.0045, "loss": 0.053, "lr": "4.162e-05", "step": 4615, "steps": "24.12s,4615/16595" }, { "epoch": 1.3907803555287737, "eta": "79:33:37", "grad_norm": 0.0055, "loss": 0.0515, "lr": "4.162e-05", "step": 4616, "steps": "23.91s,4616/16595" }, { "epoch": 1.3910816510997288, "eta": "79:09:16", "grad_norm": 0.0057, "loss": 0.0504, "lr": "4.161e-05", "step": 4617, "steps": "23.79s,4617/16595" }, { "epoch": 1.391382946670684, "eta": "78:26:57", "grad_norm": 0.0053, "loss": 0.0598, "lr": "4.161e-05", "step": 4618, "steps": "23.58s,4618/16595" }, { "epoch": 1.3916842422416391, "eta": "79:18:27", "grad_norm": 0.0057, "loss": 0.0523, "lr": "4.161e-05", "step": 4619, "steps": "23.84s,4619/16595" }, { "epoch": 1.391985537812594, "eta": "79:18:04", "grad_norm": 0.005, "loss": 0.0457, "lr": "4.160e-05", "step": 4620, "steps": "23.84s,4620/16595" }, { "epoch": 1.3922868333835492, "eta": "79:15:40", "grad_norm": 0.0056, "loss": 0.0529, "lr": "4.160e-05", "step": 4621, "steps": "23.83s,4621/16595" }, { "epoch": 1.3925881289545043, "eta": "78:39:21", "grad_norm": 0.006, "loss": 0.0397, "lr": "4.160e-05", "step": 4622, "steps": "23.65s,4622/16595" }, { "epoch": 1.3928894245254595, "eta": "78:30:58", "grad_norm": 0.0057, "loss": 0.0457, "lr": "4.159e-05", "step": 4623, "steps": "23.61s,4623/16595" }, { "epoch": 1.3931907200964146, "eta": "78:34:34", "grad_norm": 0.0062, "loss": 0.0609, "lr": "4.159e-05", "step": 4624, "steps": "23.63s,4624/16595" }, { "epoch": 1.3934920156673698, "eta": "78:40:10", "grad_norm": 0.0055, "loss": 0.0572, "lr": "4.158e-05", "step": 4625, "steps": "23.66s,4625/16595" }, { "epoch": 1.3937933112383247, "eta": "78:31:47", "grad_norm": 0.0069, "loss": 0.0364, "lr": "4.158e-05", "step": 4626, "steps": "23.62s,4626/16595" }, { "epoch": 1.3940946068092799, "eta": "78:29:24", "grad_norm": 0.0046, "loss": 0.0498, "lr": "4.158e-05", "step": 4627, "steps": "23.61s,4627/16595" }, { "epoch": 1.394395902380235, "eta": "79:38:49", "grad_norm": 0.0058, "loss": 0.0542, "lr": "4.157e-05", "step": 4628, "steps": "23.96s,4628/16595" }, { "epoch": 1.3946971979511902, "eta": "79:30:26", "grad_norm": 0.0052, "loss": 0.0454, "lr": "4.157e-05", "step": 4629, "steps": "23.92s,4629/16595" }, { "epoch": 1.394998493522145, "eta": "78:40:11", "grad_norm": 0.0049, "loss": 0.0634, "lr": "4.157e-05", "step": 4630, "steps": "23.67s,4630/16595" }, { "epoch": 1.3952997890931003, "eta": "78:27:50", "grad_norm": 0.0047, "loss": 0.0532, "lr": "4.156e-05", "step": 4631, "steps": "23.61s,4631/16595" }, { "epoch": 1.3956010846640554, "eta": "79:09:18", "grad_norm": 0.0045, "loss": 0.0574, "lr": "4.156e-05", "step": 4632, "steps": "23.82s,4632/16595" }, { "epoch": 1.3959023802350106, "eta": "78:37:00", "grad_norm": 0.0062, "loss": 0.0489, "lr": "4.156e-05", "step": 4633, "steps": "23.66s,4633/16595" }, { "epoch": 1.3962036758059657, "eta": "79:18:29", "grad_norm": 0.0049, "loss": 0.059, "lr": "4.155e-05", "step": 4634, "steps": "23.87s,4634/16595" }, { "epoch": 1.3965049713769209, "eta": "80:05:55", "grad_norm": 0.0052, "loss": 0.051, "lr": "4.155e-05", "step": 4635, "steps": "24.11s,4635/16595" }, { "epoch": 1.3968062669478758, "eta": "78:41:48", "grad_norm": 0.0062, "loss": 0.0446, "lr": "4.155e-05", "step": 4636, "steps": "23.69s,4636/16595" }, { "epoch": 1.397107562518831, "eta": "77:35:38", "grad_norm": 0.0067, "loss": 0.0489, "lr": "4.154e-05", "step": 4637, "steps": "23.36s,4637/16595" }, { "epoch": 1.397408858089786, "eta": "78:54:58", "grad_norm": 0.0042, "loss": 0.0771, "lr": "4.154e-05", "step": 4638, "steps": "23.76s,4638/16595" }, { "epoch": 1.3977101536607413, "eta": "78:04:45", "grad_norm": 0.005, "loss": 0.0503, "lr": "4.153e-05", "step": 4639, "steps": "23.51s,4639/16595" }, { "epoch": 1.3980114492316962, "eta": "79:06:08", "grad_norm": 0.0049, "loss": 0.0529, "lr": "4.153e-05", "step": 4640, "steps": "23.82s,4640/16595" }, { "epoch": 1.3983127448026513, "eta": "78:25:53", "grad_norm": 0.0047, "loss": 0.0426, "lr": "4.153e-05", "step": 4641, "steps": "23.62s,4641/16595" }, { "epoch": 1.3986140403736065, "eta": "77:45:39", "grad_norm": 0.0048, "loss": 0.0408, "lr": "4.152e-05", "step": 4642, "steps": "23.42s,4642/16595" }, { "epoch": 1.3989153359445616, "eta": "79:14:54", "grad_norm": 0.0049, "loss": 0.0639, "lr": "4.152e-05", "step": 4643, "steps": "23.87s,4643/16595" }, { "epoch": 1.3992166315155168, "eta": "78:42:38", "grad_norm": 0.0042, "loss": 0.0446, "lr": "4.152e-05", "step": 4644, "steps": "23.71s,4644/16595" }, { "epoch": 1.399517927086472, "eta": "78:20:20", "grad_norm": 0.0049, "loss": 0.0489, "lr": "4.151e-05", "step": 4645, "steps": "23.6s,4645/16595" }, { "epoch": 1.3998192226574269, "eta": "79:17:41", "grad_norm": 0.0059, "loss": 0.0466, "lr": "4.151e-05", "step": 4646, "steps": "23.89s,4646/16595" }, { "epoch": 1.400120518228382, "eta": "78:49:25", "grad_norm": 0.0056, "loss": 0.0497, "lr": "4.151e-05", "step": 4647, "steps": "23.75s,4647/16595" }, { "epoch": 1.4004218137993372, "eta": "78:21:08", "grad_norm": 0.0054, "loss": 0.0556, "lr": "4.150e-05", "step": 4648, "steps": "23.61s,4648/16595" }, { "epoch": 1.4007231093702923, "eta": "78:56:35", "grad_norm": 0.0052, "loss": 0.0474, "lr": "4.150e-05", "step": 4649, "steps": "23.79s,4649/16595" }, { "epoch": 1.4010244049412472, "eta": "77:24:36", "grad_norm": 0.0054, "loss": 0.0489, "lr": "4.150e-05", "step": 4650, "steps": "23.33s,4650/16595" }, { "epoch": 1.4013257005122024, "eta": "77:52:05", "grad_norm": 0.0052, "loss": 0.0545, "lr": "4.149e-05", "step": 4651, "steps": "23.47s,4651/16595" }, { "epoch": 1.4016269960831576, "eta": "79:37:12", "grad_norm": 0.0045, "loss": 0.0613, "lr": "4.149e-05", "step": 4652, "steps": "24.0s,4652/16595" }, { "epoch": 1.4019282916541127, "eta": "79:36:48", "grad_norm": 0.011, "loss": 0.0575, "lr": "4.148e-05", "step": 4653, "steps": "24.0s,4653/16595" }, { "epoch": 1.4022295872250679, "eta": "79:00:34", "grad_norm": 0.0043, "loss": 0.0503, "lr": "4.148e-05", "step": 4654, "steps": "23.82s,4654/16595" }, { "epoch": 1.402530882796023, "eta": "79:26:03", "grad_norm": 0.0052, "loss": 0.0682, "lr": "4.148e-05", "step": 4655, "steps": "23.95s,4655/16595" }, { "epoch": 1.402832178366978, "eta": "79:31:37", "grad_norm": 0.0057, "loss": 0.0446, "lr": "4.147e-05", "step": 4656, "steps": "23.98s,4656/16595" }, { "epoch": 1.403133473937933, "eta": "79:07:20", "grad_norm": 0.0053, "loss": 0.0519, "lr": "4.147e-05", "step": 4657, "steps": "23.86s,4657/16595" }, { "epoch": 1.4034347695088882, "eta": "79:06:56", "grad_norm": 0.0059, "loss": 0.0415, "lr": "4.147e-05", "step": 4658, "steps": "23.86s,4658/16595" }, { "epoch": 1.4037360650798434, "eta": "78:30:44", "grad_norm": 0.0047, "loss": 0.0571, "lr": "4.146e-05", "step": 4659, "steps": "23.68s,4659/16595" }, { "epoch": 1.4040373606507983, "eta": "78:52:13", "grad_norm": 0.0057, "loss": 0.048, "lr": "4.146e-05", "step": 4660, "steps": "23.79s,4660/16595" }, { "epoch": 1.4043386562217535, "eta": "78:41:53", "grad_norm": 0.0044, "loss": 0.0403, "lr": "4.146e-05", "step": 4661, "steps": "23.74s,4661/16595" }, { "epoch": 1.4046399517927086, "eta": "78:21:36", "grad_norm": 0.0052, "loss": 0.0481, "lr": "4.145e-05", "step": 4662, "steps": "23.64s,4662/16595" }, { "epoch": 1.4049412473636638, "eta": "78:55:00", "grad_norm": 0.005, "loss": 0.0347, "lr": "4.145e-05", "step": 4663, "steps": "23.81s,4663/16595" }, { "epoch": 1.405242542934619, "eta": "78:36:43", "grad_norm": 0.0075, "loss": 0.0664, "lr": "4.144e-05", "step": 4664, "steps": "23.72s,4664/16595" }, { "epoch": 1.405543838505574, "eta": "77:56:33", "grad_norm": 0.0047, "loss": 0.0595, "lr": "4.144e-05", "step": 4665, "steps": "23.52s,4665/16595" }, { "epoch": 1.405845134076529, "eta": "78:31:57", "grad_norm": 0.0085, "loss": 0.0511, "lr": "4.144e-05", "step": 4666, "steps": "23.7s,4666/16595" }, { "epoch": 1.4061464296474842, "eta": "79:41:08", "grad_norm": 0.0048, "loss": 0.0568, "lr": "4.143e-05", "step": 4667, "steps": "24.05s,4667/16595" }, { "epoch": 1.4064477252184393, "eta": "78:15:15", "grad_norm": 0.0049, "loss": 0.0515, "lr": "4.143e-05", "step": 4668, "steps": "23.62s,4668/16595" }, { "epoch": 1.4067490207893945, "eta": "79:36:21", "grad_norm": 0.005, "loss": 0.0449, "lr": "4.143e-05", "step": 4669, "steps": "24.03s,4669/16595" }, { "epoch": 1.4070503163603494, "eta": "78:26:24", "grad_norm": 0.0059, "loss": 0.0624, "lr": "4.142e-05", "step": 4670, "steps": "23.68s,4670/16595" }, { "epoch": 1.4073516119313045, "eta": "77:54:12", "grad_norm": 0.006, "loss": 0.0475, "lr": "4.142e-05", "step": 4671, "steps": "23.52s,4671/16595" }, { "epoch": 1.4076529075022597, "eta": "78:39:31", "grad_norm": 0.0059, "loss": 0.0597, "lr": "4.142e-05", "step": 4672, "steps": "23.75s,4672/16595" }, { "epoch": 1.4079542030732148, "eta": "78:58:59", "grad_norm": 0.0055, "loss": 0.0503, "lr": "4.141e-05", "step": 4673, "steps": "23.85s,4673/16595" }, { "epoch": 1.40825549864417, "eta": "79:42:18", "grad_norm": 0.0047, "loss": 0.0537, "lr": "4.141e-05", "step": 4674, "steps": "24.07s,4674/16595" }, { "epoch": 1.4085567942151251, "eta": "79:10:07", "grad_norm": 0.0051, "loss": 0.0491, "lr": "4.141e-05", "step": 4675, "steps": "23.91s,4675/16595" }, { "epoch": 1.40885808978608, "eta": "78:02:10", "grad_norm": 0.0048, "loss": 0.0408, "lr": "4.140e-05", "step": 4676, "steps": "23.57s,4676/16595" }, { "epoch": 1.4091593853570352, "eta": "79:01:22", "grad_norm": 0.005, "loss": 0.0646, "lr": "4.140e-05", "step": 4677, "steps": "23.87s,4677/16595" }, { "epoch": 1.4094606809279904, "eta": "78:47:04", "grad_norm": 0.0061, "loss": 0.0651, "lr": "4.139e-05", "step": 4678, "steps": "23.8s,4678/16595" }, { "epoch": 1.4097619764989455, "eta": "78:28:48", "grad_norm": 0.0067, "loss": 0.0497, "lr": "4.139e-05", "step": 4679, "steps": "23.71s,4679/16595" }, { "epoch": 1.4100632720699005, "eta": "78:14:30", "grad_norm": 0.0077, "loss": 0.066, "lr": "4.139e-05", "step": 4680, "steps": "23.64s,4680/16595" }, { "epoch": 1.4103645676408556, "eta": "78:55:48", "grad_norm": 0.0053, "loss": 0.037, "lr": "4.138e-05", "step": 4681, "steps": "23.85s,4681/16595" }, { "epoch": 1.4106658632118108, "eta": "78:11:44", "grad_norm": 0.0053, "loss": 0.0559, "lr": "4.138e-05", "step": 4682, "steps": "23.63s,4682/16595" }, { "epoch": 1.410967158782766, "eta": "78:51:02", "grad_norm": 0.0067, "loss": 0.0589, "lr": "4.138e-05", "step": 4683, "steps": "23.83s,4683/16595" }, { "epoch": 1.411268454353721, "eta": "80:37:51", "grad_norm": 0.0049, "loss": 0.049, "lr": "4.137e-05", "step": 4684, "steps": "24.37s,4684/16595" }, { "epoch": 1.4115697499246762, "eta": "79:04:09", "grad_norm": 0.0057, "loss": 0.0485, "lr": "4.137e-05", "step": 4685, "steps": "23.9s,4685/16595" }, { "epoch": 1.4118710454956311, "eta": "79:35:30", "grad_norm": 0.0054, "loss": 0.0416, "lr": "4.137e-05", "step": 4686, "steps": "24.06s,4686/16595" }, { "epoch": 1.4121723410665863, "eta": "78:19:41", "grad_norm": 0.0053, "loss": 0.0565, "lr": "4.136e-05", "step": 4687, "steps": "23.68s,4687/16595" }, { "epoch": 1.4124736366375414, "eta": "78:03:25", "grad_norm": 0.0052, "loss": 0.0482, "lr": "4.136e-05", "step": 4688, "steps": "23.6s,4688/16595" }, { "epoch": 1.4127749322084966, "eta": "78:28:49", "grad_norm": 0.005, "loss": 0.0654, "lr": "4.135e-05", "step": 4689, "steps": "23.73s,4689/16595" }, { "epoch": 1.4130762277794515, "eta": "79:29:56", "grad_norm": 0.0058, "loss": 0.0427, "lr": "4.135e-05", "step": 4690, "steps": "24.04s,4690/16595" }, { "epoch": 1.4133775233504067, "eta": "79:31:31", "grad_norm": 0.0065, "loss": 0.0496, "lr": "4.135e-05", "step": 4691, "steps": "24.05s,4691/16595" }, { "epoch": 1.4136788189213618, "eta": "78:11:45", "grad_norm": 0.0051, "loss": 0.0601, "lr": "4.134e-05", "step": 4692, "steps": "23.65s,4692/16595" }, { "epoch": 1.413980114492317, "eta": "77:59:28", "grad_norm": 0.0051, "loss": 0.0428, "lr": "4.134e-05", "step": 4693, "steps": "23.59s,4693/16595" }, { "epoch": 1.4142814100632721, "eta": "78:28:49", "grad_norm": 0.0064, "loss": 0.0516, "lr": "4.134e-05", "step": 4694, "steps": "23.74s,4694/16595" }, { "epoch": 1.4145827056342273, "eta": "77:48:46", "grad_norm": 0.0056, "loss": 0.0588, "lr": "4.133e-05", "step": 4695, "steps": "23.54s,4695/16595" }, { "epoch": 1.4148840012051822, "eta": "78:59:46", "grad_norm": 0.0049, "loss": 0.0513, "lr": "4.133e-05", "step": 4696, "steps": "23.9s,4696/16595" }, { "epoch": 1.4151852967761374, "eta": "79:39:01", "grad_norm": 0.0045, "loss": 0.0554, "lr": "4.133e-05", "step": 4697, "steps": "24.1s,4697/16595" }, { "epoch": 1.4154865923470925, "eta": "78:41:07", "grad_norm": 0.0059, "loss": 0.0672, "lr": "4.132e-05", "step": 4698, "steps": "23.81s,4698/16595" }, { "epoch": 1.4157878879180477, "eta": "78:03:03", "grad_norm": 0.0061, "loss": 0.0417, "lr": "4.132e-05", "step": 4699, "steps": "23.62s,4699/16595" }, { "epoch": 1.4160891834890026, "eta": "78:42:18", "grad_norm": 0.0046, "loss": 0.0666, "lr": "4.131e-05", "step": 4700, "steps": "23.82s,4700/16595" }, { "epoch": 1.4163904790599577, "eta": "79:35:26", "grad_norm": 0.0053, "loss": 0.0276, "lr": "4.131e-05", "step": 4701, "steps": "24.09s,4701/16595" }, { "epoch": 1.416691774630913, "eta": "77:38:05", "grad_norm": 0.0051, "loss": 0.0459, "lr": "4.131e-05", "step": 4702, "steps": "23.5s,4702/16595" }, { "epoch": 1.416993070201868, "eta": "78:25:16", "grad_norm": 0.0055, "loss": 0.0485, "lr": "4.130e-05", "step": 4703, "steps": "23.74s,4703/16595" }, { "epoch": 1.4172943657728232, "eta": "78:42:42", "grad_norm": 0.0054, "loss": 0.0401, "lr": "4.130e-05", "step": 4704, "steps": "23.83s,4704/16595" }, { "epoch": 1.4175956613437783, "eta": "78:26:27", "grad_norm": 0.0062, "loss": 0.0364, "lr": "4.130e-05", "step": 4705, "steps": "23.75s,4705/16595" }, { "epoch": 1.4178969569147333, "eta": "78:26:03", "grad_norm": 0.0055, "loss": 0.0423, "lr": "4.129e-05", "step": 4706, "steps": "23.75s,4706/16595" }, { "epoch": 1.4181982524856884, "eta": "78:03:52", "grad_norm": 0.0066, "loss": 0.0556, "lr": "4.129e-05", "step": 4707, "steps": "23.64s,4707/16595" }, { "epoch": 1.4184995480566436, "eta": "79:00:55", "grad_norm": 0.005, "loss": 0.0693, "lr": "4.129e-05", "step": 4708, "steps": "23.93s,4708/16595" }, { "epoch": 1.4188008436275987, "eta": "78:32:47", "grad_norm": 0.0052, "loss": 0.0502, "lr": "4.128e-05", "step": 4709, "steps": "23.79s,4709/16595" }, { "epoch": 1.4191021391985537, "eta": "77:42:52", "grad_norm": 0.0049, "loss": 0.0522, "lr": "4.128e-05", "step": 4710, "steps": "23.54s,4710/16595" }, { "epoch": 1.4194034347695088, "eta": "78:16:09", "grad_norm": 0.0066, "loss": 0.0483, "lr": "4.128e-05", "step": 4711, "steps": "23.71s,4711/16595" }, { "epoch": 1.419704730340464, "eta": "77:50:01", "grad_norm": 0.0049, "loss": 0.0354, "lr": "4.127e-05", "step": 4712, "steps": "23.58s,4712/16595" }, { "epoch": 1.4200060259114191, "eta": "79:14:46", "grad_norm": 0.0049, "loss": 0.0556, "lr": "4.127e-05", "step": 4713, "steps": "24.01s,4713/16595" }, { "epoch": 1.4203073214823743, "eta": "79:12:24", "grad_norm": 0.006, "loss": 0.0656, "lr": "4.126e-05", "step": 4714, "steps": "24.0s,4714/16595" }, { "epoch": 1.4206086170533294, "eta": "79:25:51", "grad_norm": 0.0062, "loss": 0.0451, "lr": "4.126e-05", "step": 4715, "steps": "24.07s,4715/16595" }, { "epoch": 1.4209099126242843, "eta": "77:58:20", "grad_norm": 0.0068, "loss": 0.0441, "lr": "4.126e-05", "step": 4716, "steps": "23.63s,4716/16595" }, { "epoch": 1.4212112081952395, "eta": "77:36:10", "grad_norm": 0.0053, "loss": 0.0572, "lr": "4.125e-05", "step": 4717, "steps": "23.52s,4717/16595" }, { "epoch": 1.4215125037661946, "eta": "79:16:44", "grad_norm": 0.006, "loss": 0.0434, "lr": "4.125e-05", "step": 4718, "steps": "24.03s,4718/16595" }, { "epoch": 1.4218137993371498, "eta": "78:26:51", "grad_norm": 0.0057, "loss": 0.0547, "lr": "4.125e-05", "step": 4719, "steps": "23.78s,4719/16595" }, { "epoch": 1.4221150949081047, "eta": "78:54:10", "grad_norm": 0.0058, "loss": 0.0541, "lr": "4.124e-05", "step": 4720, "steps": "23.92s,4720/16595" }, { "epoch": 1.4224163904790599, "eta": "79:03:39", "grad_norm": 0.0053, "loss": 0.0631, "lr": "4.124e-05", "step": 4721, "steps": "23.97s,4721/16595" }, { "epoch": 1.422717686050015, "eta": "78:53:22", "grad_norm": 0.0069, "loss": 0.0317, "lr": "4.124e-05", "step": 4722, "steps": "23.92s,4722/16595" }, { "epoch": 1.4230189816209702, "eta": "79:06:49", "grad_norm": 0.0049, "loss": 0.049, "lr": "4.123e-05", "step": 4723, "steps": "23.99s,4723/16595" }, { "epoch": 1.4233202771919253, "eta": "78:56:31", "grad_norm": 0.0058, "loss": 0.0522, "lr": "4.123e-05", "step": 4724, "steps": "23.94s,4724/16595" }, { "epoch": 1.4236215727628805, "eta": "78:48:13", "grad_norm": 0.0046, "loss": 0.0648, "lr": "4.122e-05", "step": 4725, "steps": "23.9s,4725/16595" }, { "epoch": 1.4239228683338354, "eta": "78:24:04", "grad_norm": 0.0055, "loss": 0.0553, "lr": "4.122e-05", "step": 4726, "steps": "23.78s,4726/16595" }, { "epoch": 1.4242241639047906, "eta": "79:01:15", "grad_norm": 0.0052, "loss": 0.0496, "lr": "4.122e-05", "step": 4727, "steps": "23.97s,4727/16595" }, { "epoch": 1.4245254594757457, "eta": "77:14:03", "grad_norm": 0.0048, "loss": 0.0633, "lr": "4.121e-05", "step": 4728, "steps": "23.43s,4728/16595" }, { "epoch": 1.4248267550467009, "eta": "78:56:30", "grad_norm": 0.0055, "loss": 0.0633, "lr": "4.121e-05", "step": 4729, "steps": "23.95s,4729/16595" }, { "epoch": 1.4251280506176558, "eta": "78:12:36", "grad_norm": 0.0064, "loss": 0.0458, "lr": "4.121e-05", "step": 4730, "steps": "23.73s,4730/16595" }, { "epoch": 1.425429346188611, "eta": "77:50:27", "grad_norm": 0.0059, "loss": 0.0528, "lr": "4.120e-05", "step": 4731, "steps": "23.62s,4731/16595" }, { "epoch": 1.425730641759566, "eta": "77:38:12", "grad_norm": 0.0049, "loss": 0.0686, "lr": "4.120e-05", "step": 4732, "steps": "23.56s,4732/16595" }, { "epoch": 1.4260319373305212, "eta": "78:35:08", "grad_norm": 0.0056, "loss": 0.0508, "lr": "4.120e-05", "step": 4733, "steps": "23.85s,4733/16595" }, { "epoch": 1.4263332329014764, "eta": "77:29:30", "grad_norm": 0.0048, "loss": 0.0448, "lr": "4.119e-05", "step": 4734, "steps": "23.52s,4734/16595" }, { "epoch": 1.4266345284724316, "eta": "77:46:54", "grad_norm": 0.0053, "loss": 0.0385, "lr": "4.119e-05", "step": 4735, "steps": "23.61s,4735/16595" }, { "epoch": 1.4269358240433865, "eta": "78:00:21", "grad_norm": 0.005, "loss": 0.056, "lr": "4.118e-05", "step": 4736, "steps": "23.68s,4736/16595" }, { "epoch": 1.4272371196143416, "eta": "77:40:11", "grad_norm": 0.006, "loss": 0.0368, "lr": "4.118e-05", "step": 4737, "steps": "23.58s,4737/16595" }, { "epoch": 1.4275384151852968, "eta": "78:33:09", "grad_norm": 0.005, "loss": 0.0481, "lr": "4.118e-05", "step": 4738, "steps": "23.85s,4738/16595" }, { "epoch": 1.427839710756252, "eta": "77:23:36", "grad_norm": 0.0051, "loss": 0.043, "lr": "4.117e-05", "step": 4739, "steps": "23.5s,4739/16595" }, { "epoch": 1.4281410063272069, "eta": "78:12:36", "grad_norm": 0.0047, "loss": 0.0503, "lr": "4.117e-05", "step": 4740, "steps": "23.75s,4740/16595" }, { "epoch": 1.428442301898162, "eta": "78:22:05", "grad_norm": 0.005, "loss": 0.051, "lr": "4.117e-05", "step": 4741, "steps": "23.8s,4741/16595" }, { "epoch": 1.4287435974691172, "eta": "78:29:35", "grad_norm": 0.0055, "loss": 0.0534, "lr": "4.116e-05", "step": 4742, "steps": "23.84s,4742/16595" }, { "epoch": 1.4290448930400723, "eta": "78:54:52", "grad_norm": 0.0052, "loss": 0.0582, "lr": "4.116e-05", "step": 4743, "steps": "23.97s,4743/16595" }, { "epoch": 1.4293461886110275, "eta": "79:35:57", "grad_norm": 0.0059, "loss": 0.0684, "lr": "4.116e-05", "step": 4744, "steps": "24.18s,4744/16595" }, { "epoch": 1.4296474841819826, "eta": "77:50:52", "grad_norm": 0.0055, "loss": 0.0493, "lr": "4.115e-05", "step": 4745, "steps": "23.65s,4745/16595" }, { "epoch": 1.4299487797529375, "eta": "79:35:08", "grad_norm": 0.0043, "loss": 0.0554, "lr": "4.115e-05", "step": 4746, "steps": "24.18s,4746/16595" }, { "epoch": 1.4302500753238927, "eta": "78:11:48", "grad_norm": 0.0047, "loss": 0.0616, "lr": "4.114e-05", "step": 4747, "steps": "23.76s,4747/16595" }, { "epoch": 1.4305513708948479, "eta": "78:17:20", "grad_norm": 0.0047, "loss": 0.0587, "lr": "4.114e-05", "step": 4748, "steps": "23.79s,4748/16595" }, { "epoch": 1.430852666465803, "eta": "77:33:30", "grad_norm": 0.0049, "loss": 0.051, "lr": "4.114e-05", "step": 4749, "steps": "23.57s,4749/16595" }, { "epoch": 1.431153962036758, "eta": "77:42:58", "grad_norm": 0.0078, "loss": 0.0456, "lr": "4.113e-05", "step": 4750, "steps": "23.62s,4750/16595" }, { "epoch": 1.431455257607713, "eta": "78:24:02", "grad_norm": 0.005, "loss": 0.0547, "lr": "4.113e-05", "step": 4751, "steps": "23.83s,4751/16595" }, { "epoch": 1.4317565531786682, "eta": "77:44:10", "grad_norm": 0.0045, "loss": 0.0364, "lr": "4.113e-05", "step": 4752, "steps": "23.63s,4752/16595" }, { "epoch": 1.4320578487496234, "eta": "78:48:54", "grad_norm": 0.0059, "loss": 0.0439, "lr": "4.112e-05", "step": 4753, "steps": "23.96s,4753/16595" }, { "epoch": 1.4323591443205785, "eta": "78:14:57", "grad_norm": 0.0046, "loss": 0.0502, "lr": "4.112e-05", "step": 4754, "steps": "23.79s,4754/16595" }, { "epoch": 1.4326604398915337, "eta": "78:32:19", "grad_norm": 0.0048, "loss": 0.0524, "lr": "4.111e-05", "step": 4755, "steps": "23.88s,4755/16595" }, { "epoch": 1.4329617354624888, "eta": "77:30:45", "grad_norm": 0.006, "loss": 0.0417, "lr": "4.111e-05", "step": 4756, "steps": "23.57s,4756/16595" }, { "epoch": 1.4332630310334438, "eta": "78:17:42", "grad_norm": 0.0047, "loss": 0.0577, "lr": "4.111e-05", "step": 4757, "steps": "23.81s,4757/16595" }, { "epoch": 1.433564326604399, "eta": "77:49:41", "grad_norm": 0.0058, "loss": 0.0535, "lr": "4.110e-05", "step": 4758, "steps": "23.67s,4758/16595" }, { "epoch": 1.433865622175354, "eta": "77:23:39", "grad_norm": 0.0046, "loss": 0.0422, "lr": "4.110e-05", "step": 4759, "steps": "23.54s,4759/16595" }, { "epoch": 1.434166917746309, "eta": "78:34:16", "grad_norm": 0.0058, "loss": 0.042, "lr": "4.110e-05", "step": 4760, "steps": "23.9s,4760/16595" }, { "epoch": 1.4344682133172642, "eta": "78:20:04", "grad_norm": 0.0046, "loss": 0.0442, "lr": "4.109e-05", "step": 4761, "steps": "23.83s,4761/16595" }, { "epoch": 1.4347695088882193, "eta": "78:21:38", "grad_norm": 0.0044, "loss": 0.0564, "lr": "4.109e-05", "step": 4762, "steps": "23.84s,4762/16595" }, { "epoch": 1.4350708044591745, "eta": "77:37:51", "grad_norm": 0.0053, "loss": 0.0451, "lr": "4.109e-05", "step": 4763, "steps": "23.62s,4763/16595" }, { "epoch": 1.4353721000301296, "eta": "77:31:33", "grad_norm": 0.0053, "loss": 0.0503, "lr": "4.108e-05", "step": 4764, "steps": "23.59s,4764/16595" }, { "epoch": 1.4356733956010848, "eta": "77:42:59", "grad_norm": 0.0054, "loss": 0.0608, "lr": "4.108e-05", "step": 4765, "steps": "23.65s,4765/16595" }, { "epoch": 1.43597469117204, "eta": "78:02:18", "grad_norm": 0.0057, "loss": 0.0413, "lr": "4.107e-05", "step": 4766, "steps": "23.75s,4766/16595" }, { "epoch": 1.4362759867429948, "eta": "78:41:20", "grad_norm": 0.0044, "loss": 0.0454, "lr": "4.107e-05", "step": 4767, "steps": "23.95s,4767/16595" }, { "epoch": 1.43657728231395, "eta": "77:26:02", "grad_norm": 0.005, "loss": 0.0552, "lr": "4.107e-05", "step": 4768, "steps": "23.57s,4768/16595" }, { "epoch": 1.4368785778849051, "eta": "78:46:27", "grad_norm": 0.0049, "loss": 0.0489, "lr": "4.106e-05", "step": 4769, "steps": "23.98s,4769/16595" }, { "epoch": 1.43717987345586, "eta": "78:12:33", "grad_norm": 0.0052, "loss": 0.0447, "lr": "4.106e-05", "step": 4770, "steps": "23.81s,4770/16595" }, { "epoch": 1.4374811690268152, "eta": "79:05:21", "grad_norm": 0.0055, "loss": 0.0662, "lr": "4.106e-05", "step": 4771, "steps": "24.08s,4771/16595" }, { "epoch": 1.4377824645977704, "eta": "78:59:03", "grad_norm": 0.0052, "loss": 0.049, "lr": "4.105e-05", "step": 4772, "steps": "24.05s,4772/16595" }, { "epoch": 1.4380837601687255, "eta": "78:19:14", "grad_norm": 0.0049, "loss": 0.0449, "lr": "4.105e-05", "step": 4773, "steps": "23.85s,4773/16595" }, { "epoch": 1.4383850557396807, "eta": "77:59:08", "grad_norm": 0.0053, "loss": 0.0509, "lr": "4.105e-05", "step": 4774, "steps": "23.75s,4774/16595" }, { "epoch": 1.4386863513106358, "eta": "78:18:27", "grad_norm": 0.0049, "loss": 0.0518, "lr": "4.104e-05", "step": 4775, "steps": "23.85s,4775/16595" }, { "epoch": 1.438987646881591, "eta": "77:50:28", "grad_norm": 0.0053, "loss": 0.0404, "lr": "4.104e-05", "step": 4776, "steps": "23.71s,4776/16595" }, { "epoch": 1.439288942452546, "eta": "77:38:15", "grad_norm": 0.0061, "loss": 0.0543, "lr": "4.103e-05", "step": 4777, "steps": "23.65s,4777/16595" }, { "epoch": 1.439590238023501, "eta": "78:13:19", "grad_norm": 0.0048, "loss": 0.0493, "lr": "4.103e-05", "step": 4778, "steps": "23.83s,4778/16595" }, { "epoch": 1.4398915335944562, "eta": "77:15:48", "grad_norm": 0.0061, "loss": 0.0603, "lr": "4.103e-05", "step": 4779, "steps": "23.54s,4779/16595" }, { "epoch": 1.4401928291654111, "eta": "77:03:36", "grad_norm": 0.0052, "loss": 0.0585, "lr": "4.102e-05", "step": 4780, "steps": "23.48s,4780/16595" }, { "epoch": 1.4404941247363663, "eta": "78:10:09", "grad_norm": 0.0072, "loss": 0.0391, "lr": "4.102e-05", "step": 4781, "steps": "23.82s,4781/16595" }, { "epoch": 1.4407954203073214, "eta": "77:22:30", "grad_norm": 0.0054, "loss": 0.0459, "lr": "4.102e-05", "step": 4782, "steps": "23.58s,4782/16595" }, { "epoch": 1.4410967158782766, "eta": "78:19:12", "grad_norm": 0.0056, "loss": 0.0451, "lr": "4.101e-05", "step": 4783, "steps": "23.87s,4783/16595" }, { "epoch": 1.4413980114492317, "eta": "78:20:46", "grad_norm": 0.0049, "loss": 0.049, "lr": "4.101e-05", "step": 4784, "steps": "23.88s,4784/16595" }, { "epoch": 1.441699307020187, "eta": "78:38:05", "grad_norm": 0.0056, "loss": 0.0469, "lr": "4.101e-05", "step": 4785, "steps": "23.97s,4785/16595" }, { "epoch": 1.442000602591142, "eta": "78:55:24", "grad_norm": 0.0054, "loss": 0.043, "lr": "4.100e-05", "step": 4786, "steps": "24.06s,4786/16595" }, { "epoch": 1.442301898162097, "eta": "77:00:51", "grad_norm": 0.0046, "loss": 0.0486, "lr": "4.100e-05", "step": 4787, "steps": "23.48s,4787/16595" }, { "epoch": 1.4426031937330521, "eta": "77:12:16", "grad_norm": 0.0055, "loss": 0.0538, "lr": "4.099e-05", "step": 4788, "steps": "23.54s,4788/16595" }, { "epoch": 1.4429044893040073, "eta": "77:59:06", "grad_norm": 0.0045, "loss": 0.0584, "lr": "4.099e-05", "step": 4789, "steps": "23.78s,4789/16595" }, { "epoch": 1.4432057848749622, "eta": "77:19:21", "grad_norm": 0.0062, "loss": 0.0492, "lr": "4.099e-05", "step": 4790, "steps": "23.58s,4790/16595" }, { "epoch": 1.4435070804459174, "eta": "78:55:22", "grad_norm": 0.0056, "loss": 0.0409, "lr": "4.098e-05", "step": 4791, "steps": "24.07s,4791/16595" }, { "epoch": 1.4438083760168725, "eta": "77:55:57", "grad_norm": 0.0045, "loss": 0.062, "lr": "4.098e-05", "step": 4792, "steps": "23.77s,4792/16595" }, { "epoch": 1.4441096715878277, "eta": "77:18:11", "grad_norm": 0.0049, "loss": 0.0529, "lr": "4.098e-05", "step": 4793, "steps": "23.58s,4793/16595" }, { "epoch": 1.4444109671587828, "eta": "78:30:33", "grad_norm": 0.0045, "loss": 0.0492, "lr": "4.097e-05", "step": 4794, "steps": "23.95s,4794/16595" }, { "epoch": 1.444712262729738, "eta": "77:35:06", "grad_norm": 0.0054, "loss": 0.0398, "lr": "4.097e-05", "step": 4795, "steps": "23.67s,4795/16595" }, { "epoch": 1.4450135583006931, "eta": "78:23:52", "grad_norm": 0.0048, "loss": 0.053, "lr": "4.096e-05", "step": 4796, "steps": "23.92s,4796/16595" }, { "epoch": 1.445314853871648, "eta": "78:35:16", "grad_norm": 0.0052, "loss": 0.0508, "lr": "4.096e-05", "step": 4797, "steps": "23.98s,4797/16595" }, { "epoch": 1.4456161494426032, "eta": "77:22:07", "grad_norm": 0.005, "loss": 0.0551, "lr": "4.096e-05", "step": 4798, "steps": "23.61s,4798/16595" }, { "epoch": 1.4459174450135583, "eta": "78:32:30", "grad_norm": 0.0051, "loss": 0.0617, "lr": "4.095e-05", "step": 4799, "steps": "23.97s,4799/16595" }, { "epoch": 1.4462187405845133, "eta": "77:23:17", "grad_norm": 0.005, "loss": 0.0472, "lr": "4.095e-05", "step": 4800, "steps": "23.62s,4800/16595" }, { "epoch": 1.4465200361554684, "eta": "135:31:57", "grad_norm": 0.0048, "loss": 0.0665, "lr": "4.095e-05", "step": 4801, "steps": "41.37s,4801/16595" }, { "epoch": 1.4468213317264236, "eta": "77:04:49", "grad_norm": 0.0049, "loss": 0.051, "lr": "4.094e-05", "step": 4802, "steps": "23.53s,4802/16595" }, { "epoch": 1.4471226272973787, "eta": "77:12:17", "grad_norm": 0.0054, "loss": 0.0507, "lr": "4.094e-05", "step": 4803, "steps": "23.57s,4803/16595" }, { "epoch": 1.4474239228683339, "eta": "76:46:21", "grad_norm": 0.0053, "loss": 0.0477, "lr": "4.094e-05", "step": 4804, "steps": "23.44s,4804/16595" }, { "epoch": 1.447725218439289, "eta": "78:20:16", "grad_norm": 0.0054, "loss": 0.0562, "lr": "4.093e-05", "step": 4805, "steps": "23.92s,4805/16595" }, { "epoch": 1.4480265140102442, "eta": "77:32:43", "grad_norm": 0.0052, "loss": 0.0477, "lr": "4.093e-05", "step": 4806, "steps": "23.68s,4806/16595" }, { "epoch": 1.448327809581199, "eta": "77:44:07", "grad_norm": 0.0053, "loss": 0.0555, "lr": "4.092e-05", "step": 4807, "steps": "23.74s,4807/16595" }, { "epoch": 1.4486291051521543, "eta": "78:09:15", "grad_norm": 0.0043, "loss": 0.0454, "lr": "4.092e-05", "step": 4808, "steps": "23.87s,4808/16595" }, { "epoch": 1.4489304007231094, "eta": "76:42:25", "grad_norm": 0.0041, "loss": 0.066, "lr": "4.092e-05", "step": 4809, "steps": "23.43s,4809/16595" }, { "epoch": 1.4492316962940643, "eta": "76:51:51", "grad_norm": 0.0056, "loss": 0.0331, "lr": "4.091e-05", "step": 4810, "steps": "23.48s,4810/16595" }, { "epoch": 1.4495329918650195, "eta": "79:03:03", "grad_norm": 0.0065, "loss": 0.0625, "lr": "4.091e-05", "step": 4811, "steps": "24.15s,4811/16595" }, { "epoch": 1.4498342874359746, "eta": "77:00:53", "grad_norm": 0.0063, "loss": 0.0556, "lr": "4.091e-05", "step": 4812, "steps": "23.53s,4812/16595" }, { "epoch": 1.4501355830069298, "eta": "77:02:28", "grad_norm": 0.0044, "loss": 0.0434, "lr": "4.090e-05", "step": 4813, "steps": "23.54s,4813/16595" }, { "epoch": 1.450436878577885, "eta": "77:45:16", "grad_norm": 0.0051, "loss": 0.0531, "lr": "4.090e-05", "step": 4814, "steps": "23.76s,4814/16595" }, { "epoch": 1.45073817414884, "eta": "77:23:17", "grad_norm": 0.0062, "loss": 0.0424, "lr": "4.089e-05", "step": 4815, "steps": "23.65s,4815/16595" }, { "epoch": 1.4510394697197952, "eta": "77:46:26", "grad_norm": 0.0049, "loss": 0.0675, "lr": "4.089e-05", "step": 4816, "steps": "23.77s,4816/16595" }, { "epoch": 1.4513407652907502, "eta": "78:09:36", "grad_norm": 0.0053, "loss": 0.0489, "lr": "4.089e-05", "step": 4817, "steps": "23.89s,4817/16595" }, { "epoch": 1.4516420608617053, "eta": "77:41:43", "grad_norm": 0.0052, "loss": 0.0536, "lr": "4.088e-05", "step": 4818, "steps": "23.75s,4818/16595" }, { "epoch": 1.4519433564326605, "eta": "77:13:51", "grad_norm": 0.0047, "loss": 0.0574, "lr": "4.088e-05", "step": 4819, "steps": "23.61s,4819/16595" }, { "epoch": 1.4522446520036154, "eta": "77:42:54", "grad_norm": 0.01, "loss": 0.0557, "lr": "4.088e-05", "step": 4820, "steps": "23.76s,4820/16595" }, { "epoch": 1.4525459475745706, "eta": "78:17:49", "grad_norm": 0.0053, "loss": 0.0371, "lr": "4.087e-05", "step": 4821, "steps": "23.94s,4821/16595" }, { "epoch": 1.4528472431455257, "eta": "77:24:26", "grad_norm": 0.0056, "loss": 0.0661, "lr": "4.087e-05", "step": 4822, "steps": "23.67s,4822/16595" }, { "epoch": 1.4531485387164809, "eta": "77:45:38", "grad_norm": 0.0054, "loss": 0.0532, "lr": "4.087e-05", "step": 4823, "steps": "23.78s,4823/16595" }, { "epoch": 1.453449834287436, "eta": "78:18:35", "grad_norm": 0.0059, "loss": 0.0328, "lr": "4.086e-05", "step": 4824, "steps": "23.95s,4824/16595" }, { "epoch": 1.4537511298583912, "eta": "77:05:36", "grad_norm": 0.0056, "loss": 0.0388, "lr": "4.086e-05", "step": 4825, "steps": "23.58s,4825/16595" }, { "epoch": 1.4540524254293463, "eta": "76:25:59", "grad_norm": 0.0054, "loss": 0.0518, "lr": "4.085e-05", "step": 4826, "steps": "23.38s,4826/16595" }, { "epoch": 1.4543537210003012, "eta": "77:06:47", "grad_norm": 0.0051, "loss": 0.0478, "lr": "4.085e-05", "step": 4827, "steps": "23.59s,4827/16595" }, { "epoch": 1.4546550165712564, "eta": "77:22:04", "grad_norm": 0.008, "loss": 0.0466, "lr": "4.085e-05", "step": 4828, "steps": "23.67s,4828/16595" }, { "epoch": 1.4549563121422115, "eta": "78:14:38", "grad_norm": 0.0095, "loss": 0.0516, "lr": "4.084e-05", "step": 4829, "steps": "23.94s,4829/16595" }, { "epoch": 1.4552576077131665, "eta": "76:59:43", "grad_norm": 0.0055, "loss": 0.0562, "lr": "4.084e-05", "step": 4830, "steps": "23.56s,4830/16595" }, { "epoch": 1.4555589032841216, "eta": "77:24:49", "grad_norm": 0.0053, "loss": 0.0598, "lr": "4.084e-05", "step": 4831, "steps": "23.69s,4831/16595" }, { "epoch": 1.4558601988550768, "eta": "77:47:57", "grad_norm": 0.0044, "loss": 0.0597, "lr": "4.083e-05", "step": 4832, "steps": "23.81s,4832/16595" }, { "epoch": 1.456161494426032, "eta": "77:49:30", "grad_norm": 0.0054, "loss": 0.0378, "lr": "4.083e-05", "step": 4833, "steps": "23.82s,4833/16595" }, { "epoch": 1.456462789996987, "eta": "77:31:28", "grad_norm": 0.0052, "loss": 0.0587, "lr": "4.082e-05", "step": 4834, "steps": "23.73s,4834/16595" }, { "epoch": 1.4567640855679422, "eta": "76:42:04", "grad_norm": 0.0071, "loss": 0.051, "lr": "4.082e-05", "step": 4835, "steps": "23.48s,4835/16595" }, { "epoch": 1.4570653811388974, "eta": "77:32:38", "grad_norm": 0.0049, "loss": 0.0511, "lr": "4.082e-05", "step": 4836, "steps": "23.74s,4836/16595" }, { "epoch": 1.4573666767098523, "eta": "77:26:22", "grad_norm": 0.0047, "loss": 0.0417, "lr": "4.081e-05", "step": 4837, "steps": "23.71s,4837/16595" }, { "epoch": 1.4576679722808075, "eta": "78:14:57", "grad_norm": 0.0047, "loss": 0.0545, "lr": "4.081e-05", "step": 4838, "steps": "23.96s,4838/16595" }, { "epoch": 1.4579692678517626, "eta": "78:08:41", "grad_norm": 0.0065, "loss": 0.0584, "lr": "4.081e-05", "step": 4839, "steps": "23.93s,4839/16595" }, { "epoch": 1.4582705634227175, "eta": "77:05:35", "grad_norm": 0.0046, "loss": 0.0455, "lr": "4.080e-05", "step": 4840, "steps": "23.61s,4840/16595" }, { "epoch": 1.4585718589936727, "eta": "77:52:12", "grad_norm": 0.0064, "loss": 0.0498, "lr": "4.080e-05", "step": 4841, "steps": "23.85s,4841/16595" }, { "epoch": 1.4588731545646278, "eta": "77:30:16", "grad_norm": 0.0045, "loss": 0.0541, "lr": "4.080e-05", "step": 4842, "steps": "23.74s,4842/16595" }, { "epoch": 1.459174450135583, "eta": "77:29:52", "grad_norm": 0.0048, "loss": 0.0499, "lr": "4.079e-05", "step": 4843, "steps": "23.74s,4843/16595" }, { "epoch": 1.4594757457065382, "eta": "76:38:33", "grad_norm": 0.0052, "loss": 0.0595, "lr": "4.079e-05", "step": 4844, "steps": "23.48s,4844/16595" }, { "epoch": 1.4597770412774933, "eta": "77:29:05", "grad_norm": 0.0069, "loss": 0.0529, "lr": "4.078e-05", "step": 4845, "steps": "23.74s,4845/16595" }, { "epoch": 1.4600783368484485, "eta": "77:34:33", "grad_norm": 0.0059, "loss": 0.0629, "lr": "4.078e-05", "step": 4846, "steps": "23.77s,4846/16595" }, { "epoch": 1.4603796324194034, "eta": "77:22:25", "grad_norm": 0.0043, "loss": 0.041, "lr": "4.078e-05", "step": 4847, "steps": "23.71s,4847/16595" }, { "epoch": 1.4606809279903585, "eta": "77:55:18", "grad_norm": 0.0054, "loss": 0.0557, "lr": "4.077e-05", "step": 4848, "steps": "23.88s,4848/16595" }, { "epoch": 1.4609822235613137, "eta": "76:09:11", "grad_norm": 0.0046, "loss": 0.043, "lr": "4.077e-05", "step": 4849, "steps": "23.34s,4849/16595" }, { "epoch": 1.4612835191322688, "eta": "77:15:21", "grad_norm": 0.0056, "loss": 0.0547, "lr": "4.077e-05", "step": 4850, "steps": "23.68s,4850/16595" }, { "epoch": 1.4615848147032238, "eta": "78:07:48", "grad_norm": 0.0055, "loss": 0.0611, "lr": "4.076e-05", "step": 4851, "steps": "23.95s,4851/16595" }, { "epoch": 1.461886110274179, "eta": "76:47:10", "grad_norm": 0.0054, "loss": 0.0387, "lr": "4.076e-05", "step": 4852, "steps": "23.54s,4852/16595" }, { "epoch": 1.462187405845134, "eta": "79:19:25", "grad_norm": 0.0063, "loss": 0.0546, "lr": "4.075e-05", "step": 4853, "steps": "24.32s,4853/16595" }, { "epoch": 1.4624887014160892, "eta": "77:47:02", "grad_norm": 0.0068, "loss": 0.045, "lr": "4.075e-05", "step": 4854, "steps": "23.85s,4854/16595" }, { "epoch": 1.4627899969870444, "eta": "76:38:10", "grad_norm": 0.0046, "loss": 0.0397, "lr": "4.075e-05", "step": 4855, "steps": "23.5s,4855/16595" }, { "epoch": 1.4630912925579995, "eta": "76:37:46", "grad_norm": 0.0053, "loss": 0.0623, "lr": "4.074e-05", "step": 4856, "steps": "23.5s,4856/16595" }, { "epoch": 1.4633925881289545, "eta": "77:30:12", "grad_norm": 0.0057, "loss": 0.0662, "lr": "4.074e-05", "step": 4857, "steps": "23.77s,4857/16595" }, { "epoch": 1.4636938836999096, "eta": "77:16:06", "grad_norm": 0.0052, "loss": 0.0479, "lr": "4.074e-05", "step": 4858, "steps": "23.7s,4858/16595" }, { "epoch": 1.4639951792708648, "eta": "76:58:06", "grad_norm": 0.005, "loss": 0.0495, "lr": "4.073e-05", "step": 4859, "steps": "23.61s,4859/16595" }, { "epoch": 1.46429647484182, "eta": "79:08:45", "grad_norm": 0.0046, "loss": 0.0429, "lr": "4.073e-05", "step": 4860, "steps": "24.28s,4860/16595" }, { "epoch": 1.4645977704127748, "eta": "77:52:05", "grad_norm": 0.0061, "loss": 0.0491, "lr": "4.072e-05", "step": 4861, "steps": "23.89s,4861/16595" }, { "epoch": 1.46489906598373, "eta": "77:28:13", "grad_norm": 0.0046, "loss": 0.0488, "lr": "4.072e-05", "step": 4862, "steps": "23.77s,4862/16595" }, { "epoch": 1.4652003615546851, "eta": "77:25:52", "grad_norm": 0.0051, "loss": 0.0598, "lr": "4.072e-05", "step": 4863, "steps": "23.76s,4863/16595" }, { "epoch": 1.4655016571256403, "eta": "77:37:12", "grad_norm": 0.0045, "loss": 0.0482, "lr": "4.071e-05", "step": 4864, "steps": "23.82s,4864/16595" }, { "epoch": 1.4658029526965954, "eta": "76:28:23", "grad_norm": 0.0066, "loss": 0.0693, "lr": "4.071e-05", "step": 4865, "steps": "23.47s,4865/16595" }, { "epoch": 1.4661042482675506, "eta": "77:20:46", "grad_norm": 0.0057, "loss": 0.0431, "lr": "4.071e-05", "step": 4866, "steps": "23.74s,4866/16595" }, { "epoch": 1.4664055438385055, "eta": "77:37:58", "grad_norm": 0.0047, "loss": 0.0517, "lr": "4.070e-05", "step": 4867, "steps": "23.83s,4867/16595" }, { "epoch": 1.4667068394094607, "eta": "77:27:48", "grad_norm": 0.0058, "loss": 0.0539, "lr": "4.070e-05", "step": 4868, "steps": "23.78s,4868/16595" }, { "epoch": 1.4670081349804158, "eta": "77:27:24", "grad_norm": 0.005, "loss": 0.0459, "lr": "4.069e-05", "step": 4869, "steps": "23.78s,4869/16595" }, { "epoch": 1.467309430551371, "eta": "76:53:47", "grad_norm": 0.0043, "loss": 0.0456, "lr": "4.069e-05", "step": 4870, "steps": "23.61s,4870/16595" }, { "epoch": 1.467610726122326, "eta": "77:20:45", "grad_norm": 0.0048, "loss": 0.037, "lr": "4.069e-05", "step": 4871, "steps": "23.75s,4871/16595" }, { "epoch": 1.467912021693281, "eta": "77:16:26", "grad_norm": 0.0056, "loss": 0.0448, "lr": "4.068e-05", "step": 4872, "steps": "23.73s,4872/16595" }, { "epoch": 1.4682133172642362, "eta": "77:12:08", "grad_norm": 0.0067, "loss": 0.0439, "lr": "4.068e-05", "step": 4873, "steps": "23.71s,4873/16595" }, { "epoch": 1.4685146128351914, "eta": "78:31:50", "grad_norm": 0.0046, "loss": 0.0465, "lr": "4.068e-05", "step": 4874, "steps": "24.12s,4874/16595" }, { "epoch": 1.4688159084061465, "eta": "77:32:50", "grad_norm": 0.0053, "loss": 0.0528, "lr": "4.067e-05", "step": 4875, "steps": "23.82s,4875/16595" }, { "epoch": 1.4691172039771017, "eta": "77:20:43", "grad_norm": 0.005, "loss": 0.0554, "lr": "4.067e-05", "step": 4876, "steps": "23.76s,4876/16595" }, { "epoch": 1.4694184995480566, "eta": "77:02:45", "grad_norm": 0.0048, "loss": 0.0472, "lr": "4.067e-05", "step": 4877, "steps": "23.67s,4877/16595" }, { "epoch": 1.4697197951190117, "eta": "76:19:23", "grad_norm": 0.0049, "loss": 0.0658, "lr": "4.066e-05", "step": 4878, "steps": "23.45s,4878/16595" }, { "epoch": 1.470021090689967, "eta": "77:44:55", "grad_norm": 0.0045, "loss": 0.0528, "lr": "4.066e-05", "step": 4879, "steps": "23.89s,4879/16595" }, { "epoch": 1.470322386260922, "eta": "76:47:54", "grad_norm": 0.0048, "loss": 0.0438, "lr": "4.065e-05", "step": 4880, "steps": "23.6s,4880/16595" }, { "epoch": 1.470623681831877, "eta": "77:22:38", "grad_norm": 0.0053, "loss": 0.0504, "lr": "4.065e-05", "step": 4881, "steps": "23.78s,4881/16595" }, { "epoch": 1.4709249774028321, "eta": "76:47:06", "grad_norm": 0.005, "loss": 0.0519, "lr": "4.065e-05", "step": 4882, "steps": "23.6s,4882/16595" }, { "epoch": 1.4712262729737873, "eta": "77:41:22", "grad_norm": 0.0042, "loss": 0.0452, "lr": "4.064e-05", "step": 4883, "steps": "23.88s,4883/16595" }, { "epoch": 1.4715275685447424, "eta": "76:46:19", "grad_norm": 0.0058, "loss": 0.0571, "lr": "4.064e-05", "step": 4884, "steps": "23.6s,4884/16595" }, { "epoch": 1.4718288641156976, "eta": "76:10:48", "grad_norm": 0.0054, "loss": 0.0505, "lr": "4.064e-05", "step": 4885, "steps": "23.42s,4885/16595" }, { "epoch": 1.4721301596866527, "eta": "76:53:20", "grad_norm": 0.0068, "loss": 0.0356, "lr": "4.063e-05", "step": 4886, "steps": "23.64s,4886/16595" }, { "epoch": 1.4724314552576077, "eta": "77:49:32", "grad_norm": 0.0059, "loss": 0.0602, "lr": "4.063e-05", "step": 4887, "steps": "23.93s,4887/16595" }, { "epoch": 1.4727327508285628, "eta": "77:02:18", "grad_norm": 0.0064, "loss": 0.0415, "lr": "4.062e-05", "step": 4888, "steps": "23.69s,4888/16595" }, { "epoch": 1.473034046399518, "eta": "78:18:00", "grad_norm": 0.0062, "loss": 0.0455, "lr": "4.062e-05", "step": 4889, "steps": "24.08s,4889/16595" }, { "epoch": 1.4733353419704731, "eta": "77:22:59", "grad_norm": 0.0175, "loss": 0.0465, "lr": "4.062e-05", "step": 4890, "steps": "23.8s,4890/16595" }, { "epoch": 1.473636637541428, "eta": "76:14:18", "grad_norm": 0.0063, "loss": 0.0605, "lr": "4.061e-05", "step": 4891, "steps": "23.45s,4891/16595" }, { "epoch": 1.4739379331123832, "eta": "78:46:03", "grad_norm": 0.0072, "loss": 0.0635, "lr": "4.061e-05", "step": 4892, "steps": "24.23s,4892/16595" }, { "epoch": 1.4742392286833383, "eta": "77:13:59", "grad_norm": 0.0051, "loss": 0.0468, "lr": "4.061e-05", "step": 4893, "steps": "23.76s,4893/16595" }, { "epoch": 1.4745405242542935, "eta": "77:23:20", "grad_norm": 0.0058, "loss": 0.0534, "lr": "4.060e-05", "step": 4894, "steps": "23.81s,4894/16595" }, { "epoch": 1.4748418198252486, "eta": "77:21:00", "grad_norm": 0.0052, "loss": 0.0529, "lr": "4.060e-05", "step": 4895, "steps": "23.8s,4895/16595" }, { "epoch": 1.4751431153962038, "eta": "77:34:15", "grad_norm": 0.0056, "loss": 0.0377, "lr": "4.059e-05", "step": 4896, "steps": "23.87s,4896/16595" }, { "epoch": 1.4754444109671587, "eta": "76:45:06", "grad_norm": 0.0056, "loss": 0.0578, "lr": "4.059e-05", "step": 4897, "steps": "23.62s,4897/16595" }, { "epoch": 1.4757457065381139, "eta": "77:54:54", "grad_norm": 0.006, "loss": 0.0412, "lr": "4.059e-05", "step": 4898, "steps": "23.98s,4898/16595" }, { "epoch": 1.476047002109069, "eta": "76:09:14", "grad_norm": 0.0059, "loss": 0.0477, "lr": "4.058e-05", "step": 4899, "steps": "23.44s,4899/16595" }, { "epoch": 1.4763482976800242, "eta": "76:34:11", "grad_norm": 0.0058, "loss": 0.0587, "lr": "4.058e-05", "step": 4900, "steps": "23.57s,4900/16595" }, { "epoch": 1.476649593250979, "eta": "77:34:12", "grad_norm": 0.0062, "loss": 0.0446, "lr": "4.058e-05", "step": 4901, "steps": "23.88s,4901/16595" }, { "epoch": 1.4769508888219343, "eta": "76:58:44", "grad_norm": 0.0063, "loss": 0.0559, "lr": "4.057e-05", "step": 4902, "steps": "23.7s,4902/16595" }, { "epoch": 1.4772521843928894, "eta": "76:50:32", "grad_norm": 0.0089, "loss": 0.0531, "lr": "4.057e-05", "step": 4903, "steps": "23.66s,4903/16595" }, { "epoch": 1.4775534799638446, "eta": "77:11:35", "grad_norm": 0.0058, "loss": 0.0661, "lr": "4.056e-05", "step": 4904, "steps": "23.77s,4904/16595" }, { "epoch": 1.4778547755347997, "eta": "77:17:02", "grad_norm": 0.0049, "loss": 0.057, "lr": "4.056e-05", "step": 4905, "steps": "23.8s,4905/16595" }, { "epoch": 1.4781560711057549, "eta": "75:56:45", "grad_norm": 0.0056, "loss": 0.057, "lr": "4.056e-05", "step": 4906, "steps": "23.39s,4906/16595" }, { "epoch": 1.4784573666767098, "eta": "77:35:43", "grad_norm": 0.0066, "loss": 0.0333, "lr": "4.055e-05", "step": 4907, "steps": "23.9s,4907/16595" }, { "epoch": 1.478758662247665, "eta": "77:29:28", "grad_norm": 0.0058, "loss": 0.0641, "lr": "4.055e-05", "step": 4908, "steps": "23.87s,4908/16595" }, { "epoch": 1.47905995781862, "eta": "76:38:26", "grad_norm": 0.0058, "loss": 0.0472, "lr": "4.055e-05", "step": 4909, "steps": "23.61s,4909/16595" }, { "epoch": 1.4793612533895752, "eta": "77:07:15", "grad_norm": 0.0082, "loss": 0.0513, "lr": "4.054e-05", "step": 4910, "steps": "23.76s,4910/16595" }, { "epoch": 1.4796625489605302, "eta": "76:31:48", "grad_norm": 0.0052, "loss": 0.0552, "lr": "4.054e-05", "step": 4911, "steps": "23.58s,4911/16595" }, { "epoch": 1.4799638445314853, "eta": "77:45:24", "grad_norm": 0.0057, "loss": 0.0591, "lr": "4.053e-05", "step": 4912, "steps": "23.96s,4912/16595" }, { "epoch": 1.4802651401024405, "eta": "76:50:29", "grad_norm": 0.0064, "loss": 0.0558, "lr": "4.053e-05", "step": 4913, "steps": "23.68s,4913/16595" }, { "epoch": 1.4805664356733956, "eta": "76:52:02", "grad_norm": 0.0058, "loss": 0.0417, "lr": "4.053e-05", "step": 4914, "steps": "23.69s,4914/16595" }, { "epoch": 1.4808677312443508, "eta": "76:41:55", "grad_norm": 0.0043, "loss": 0.0601, "lr": "4.052e-05", "step": 4915, "steps": "23.64s,4915/16595" }, { "epoch": 1.481169026815306, "eta": "76:57:05", "grad_norm": 0.006, "loss": 0.0506, "lr": "4.052e-05", "step": 4916, "steps": "23.72s,4916/16595" }, { "epoch": 1.4814703223862609, "eta": "77:58:59", "grad_norm": 0.0054, "loss": 0.0292, "lr": "4.052e-05", "step": 4917, "steps": "24.04s,4917/16595" }, { "epoch": 1.481771617957216, "eta": "76:54:21", "grad_norm": 0.0076, "loss": 0.0686, "lr": "4.051e-05", "step": 4918, "steps": "23.71s,4918/16595" }, { "epoch": 1.4820729135281712, "eta": "77:42:36", "grad_norm": 0.0058, "loss": 0.0533, "lr": "4.051e-05", "step": 4919, "steps": "23.96s,4919/16595" }, { "epoch": 1.4823742090991263, "eta": "77:32:29", "grad_norm": 0.0056, "loss": 0.0398, "lr": "4.050e-05", "step": 4920, "steps": "23.91s,4920/16595" }, { "epoch": 1.4826755046700812, "eta": "76:39:33", "grad_norm": 0.0046, "loss": 0.0419, "lr": "4.050e-05", "step": 4921, "steps": "23.64s,4921/16595" }, { "epoch": 1.4829768002410364, "eta": "77:37:31", "grad_norm": 0.0059, "loss": 0.05, "lr": "4.050e-05", "step": 4922, "steps": "23.94s,4922/16595" }, { "epoch": 1.4832780958119915, "eta": "76:34:52", "grad_norm": 0.0054, "loss": 0.0423, "lr": "4.049e-05", "step": 4923, "steps": "23.62s,4923/16595" }, { "epoch": 1.4835793913829467, "eta": "76:51:59", "grad_norm": 0.0059, "loss": 0.058, "lr": "4.049e-05", "step": 4924, "steps": "23.71s,4924/16595" }, { "epoch": 1.4838806869539019, "eta": "77:11:02", "grad_norm": 0.0055, "loss": 0.0539, "lr": "4.049e-05", "step": 4925, "steps": "23.81s,4925/16595" }, { "epoch": 1.484181982524857, "eta": "77:06:45", "grad_norm": 0.0049, "loss": 0.0543, "lr": "4.048e-05", "step": 4926, "steps": "23.79s,4926/16595" }, { "epoch": 1.484483278095812, "eta": "77:08:18", "grad_norm": 0.0048, "loss": 0.0549, "lr": "4.048e-05", "step": 4927, "steps": "23.8s,4927/16595" }, { "epoch": 1.484784573666767, "eta": "75:52:04", "grad_norm": 0.0047, "loss": 0.0327, "lr": "4.047e-05", "step": 4928, "steps": "23.41s,4928/16595" }, { "epoch": 1.4850858692377222, "eta": "77:07:30", "grad_norm": 0.0065, "loss": 0.0574, "lr": "4.047e-05", "step": 4929, "steps": "23.8s,4929/16595" }, { "epoch": 1.4853871648086774, "eta": "77:11:00", "grad_norm": 0.0064, "loss": 0.0526, "lr": "4.047e-05", "step": 4930, "steps": "23.82s,4930/16595" }, { "epoch": 1.4856884603796323, "eta": "75:56:44", "grad_norm": 0.0056, "loss": 0.0422, "lr": "4.046e-05", "step": 4931, "steps": "23.44s,4931/16595" }, { "epoch": 1.4859897559505875, "eta": "76:19:40", "grad_norm": 0.0048, "loss": 0.046, "lr": "4.046e-05", "step": 4932, "steps": "23.56s,4932/16595" }, { "epoch": 1.4862910515215426, "eta": "76:56:12", "grad_norm": 0.005, "loss": 0.0463, "lr": "4.046e-05", "step": 4933, "steps": "23.75s,4933/16595" }, { "epoch": 1.4865923470924978, "eta": "76:44:09", "grad_norm": 0.0057, "loss": 0.0412, "lr": "4.045e-05", "step": 4934, "steps": "23.69s,4934/16595" }, { "epoch": 1.486893642663453, "eta": "76:28:12", "grad_norm": 0.006, "loss": 0.051, "lr": "4.045e-05", "step": 4935, "steps": "23.61s,4935/16595" }, { "epoch": 1.487194938234408, "eta": "76:10:19", "grad_norm": 0.0052, "loss": 0.0575, "lr": "4.044e-05", "step": 4936, "steps": "23.52s,4936/16595" }, { "epoch": 1.487496233805363, "eta": "76:58:30", "grad_norm": 0.0045, "loss": 0.0459, "lr": "4.044e-05", "step": 4937, "steps": "23.77s,4937/16595" }, { "epoch": 1.4877975293763182, "eta": "75:42:20", "grad_norm": 0.0051, "loss": 0.0641, "lr": "4.044e-05", "step": 4938, "steps": "23.38s,4938/16595" }, { "epoch": 1.4880988249472733, "eta": "76:26:38", "grad_norm": 0.0052, "loss": 0.0604, "lr": "4.043e-05", "step": 4939, "steps": "23.61s,4939/16595" }, { "epoch": 1.4884001205182285, "eta": "77:10:55", "grad_norm": 0.005, "loss": 0.0485, "lr": "4.043e-05", "step": 4940, "steps": "23.84s,4940/16595" }, { "epoch": 1.4887014160891834, "eta": "77:18:17", "grad_norm": 0.0054, "loss": 0.0516, "lr": "4.043e-05", "step": 4941, "steps": "23.88s,4941/16595" }, { "epoch": 1.4890027116601385, "eta": "76:09:55", "grad_norm": 0.0049, "loss": 0.044, "lr": "4.042e-05", "step": 4942, "steps": "23.53s,4942/16595" }, { "epoch": 1.4893040072310937, "eta": "77:03:54", "grad_norm": 0.0049, "loss": 0.0481, "lr": "4.042e-05", "step": 4943, "steps": "23.81s,4943/16595" }, { "epoch": 1.4896053028020488, "eta": "76:05:15", "grad_norm": 0.0052, "loss": 0.0481, "lr": "4.041e-05", "step": 4944, "steps": "23.51s,4944/16595" }, { "epoch": 1.489906598373004, "eta": "77:24:28", "grad_norm": 0.005, "loss": 0.0502, "lr": "4.041e-05", "step": 4945, "steps": "23.92s,4945/16595" }, { "epoch": 1.4902078939439591, "eta": "76:35:31", "grad_norm": 0.0039, "loss": 0.048, "lr": "4.041e-05", "step": 4946, "steps": "23.67s,4946/16595" }, { "epoch": 1.490509189514914, "eta": "76:09:53", "grad_norm": 0.0044, "loss": 0.0436, "lr": "4.040e-05", "step": 4947, "steps": "23.54s,4947/16595" }, { "epoch": 1.4908104850858692, "eta": "76:30:51", "grad_norm": 0.0061, "loss": 0.0363, "lr": "4.040e-05", "step": 4948, "steps": "23.65s,4948/16595" }, { "epoch": 1.4911117806568244, "eta": "77:34:31", "grad_norm": 0.0049, "loss": 0.0666, "lr": "4.040e-05", "step": 4949, "steps": "23.98s,4949/16595" }, { "epoch": 1.4914130762277795, "eta": "77:24:24", "grad_norm": 0.0044, "loss": 0.046, "lr": "4.039e-05", "step": 4950, "steps": "23.93s,4950/16595" }, { "epoch": 1.4917143717987345, "eta": "76:41:19", "grad_norm": 0.0047, "loss": 0.0334, "lr": "4.039e-05", "step": 4951, "steps": "23.71s,4951/16595" }, { "epoch": 1.4920156673696896, "eta": "76:54:30", "grad_norm": 0.0067, "loss": 0.0696, "lr": "4.038e-05", "step": 4952, "steps": "23.78s,4952/16595" }, { "epoch": 1.4923169629406448, "eta": "76:52:10", "grad_norm": 0.0066, "loss": 0.046, "lr": "4.038e-05", "step": 4953, "steps": "23.77s,4953/16595" }, { "epoch": 1.4926182585116, "eta": "76:47:53", "grad_norm": 0.0043, "loss": 0.0507, "lr": "4.038e-05", "step": 4954, "steps": "23.75s,4954/16595" }, { "epoch": 1.492919554082555, "eta": "76:16:27", "grad_norm": 0.0051, "loss": 0.0662, "lr": "4.037e-05", "step": 4955, "steps": "23.59s,4955/16595" }, { "epoch": 1.4932208496535102, "eta": "77:18:08", "grad_norm": 0.006, "loss": 0.0506, "lr": "4.037e-05", "step": 4956, "steps": "23.91s,4956/16595" }, { "epoch": 1.4935221452244651, "eta": "76:38:56", "grad_norm": 0.005, "loss": 0.0591, "lr": "4.037e-05", "step": 4957, "steps": "23.71s,4957/16595" }, { "epoch": 1.4938234407954203, "eta": "77:27:02", "grad_norm": 0.005, "loss": 0.0575, "lr": "4.036e-05", "step": 4958, "steps": "23.96s,4958/16595" }, { "epoch": 1.4941247363663754, "eta": "76:40:05", "grad_norm": 0.0049, "loss": 0.0495, "lr": "4.036e-05", "step": 4959, "steps": "23.72s,4959/16595" }, { "epoch": 1.4944260319373306, "eta": "77:08:47", "grad_norm": 0.0043, "loss": 0.0537, "lr": "4.035e-05", "step": 4960, "steps": "23.87s,4960/16595" }, { "epoch": 1.4947273275082855, "eta": "77:16:08", "grad_norm": 0.0049, "loss": 0.0468, "lr": "4.035e-05", "step": 4961, "steps": "23.91s,4961/16595" }, { "epoch": 1.4950286230792407, "eta": "77:27:23", "grad_norm": 0.0043, "loss": 0.0508, "lr": "4.035e-05", "step": 4962, "steps": "23.97s,4962/16595" }, { "epoch": 1.4953299186501958, "eta": "76:48:12", "grad_norm": 0.0061, "loss": 0.0594, "lr": "4.034e-05", "step": 4963, "steps": "23.77s,4963/16595" }, { "epoch": 1.495631214221151, "eta": "76:45:52", "grad_norm": 0.0056, "loss": 0.0605, "lr": "4.034e-05", "step": 4964, "steps": "23.76s,4964/16595" }, { "epoch": 1.4959325097921061, "eta": "76:20:16", "grad_norm": 0.006, "loss": 0.0509, "lr": "4.034e-05", "step": 4965, "steps": "23.63s,4965/16595" }, { "epoch": 1.4962338053630613, "eta": "76:35:23", "grad_norm": 0.0064, "loss": 0.0522, "lr": "4.033e-05", "step": 4966, "steps": "23.71s,4966/16595" }, { "epoch": 1.4965351009340162, "eta": "77:06:00", "grad_norm": 0.0049, "loss": 0.0484, "lr": "4.033e-05", "step": 4967, "steps": "23.87s,4967/16595" }, { "epoch": 1.4968363965049714, "eta": "76:22:58", "grad_norm": 0.0058, "loss": 0.0606, "lr": "4.032e-05", "step": 4968, "steps": "23.65s,4968/16595" }, { "epoch": 1.4971376920759265, "eta": "76:53:35", "grad_norm": 0.0054, "loss": 0.0678, "lr": "4.032e-05", "step": 4969, "steps": "23.81s,4969/16595" }, { "epoch": 1.4974389876468817, "eta": "77:20:18", "grad_norm": 0.0044, "loss": 0.0385, "lr": "4.032e-05", "step": 4970, "steps": "23.95s,4970/16595" }, { "epoch": 1.4977402832178366, "eta": "76:02:25", "grad_norm": 0.0054, "loss": 0.0522, "lr": "4.031e-05", "step": 4971, "steps": "23.55s,4971/16595" }, { "epoch": 1.4980415787887917, "eta": "76:44:38", "grad_norm": 0.01, "loss": 0.0448, "lr": "4.031e-05", "step": 4972, "steps": "23.77s,4972/16595" }, { "epoch": 1.498342874359747, "eta": "75:20:57", "grad_norm": 0.0054, "loss": 0.048, "lr": "4.031e-05", "step": 4973, "steps": "23.34s,4973/16595" }, { "epoch": 1.498644169930702, "eta": "77:30:20", "grad_norm": 0.0047, "loss": 0.0527, "lr": "4.030e-05", "step": 4974, "steps": "24.01s,4974/16595" }, { "epoch": 1.4989454655016572, "eta": "76:41:31", "grad_norm": 0.0046, "loss": 0.0363, "lr": "4.030e-05", "step": 4975, "steps": "23.76s,4975/16595" }, { "epoch": 1.4992467610726123, "eta": "76:58:33", "grad_norm": 0.0049, "loss": 0.0513, "lr": "4.029e-05", "step": 4976, "steps": "23.85s,4976/16595" }, { "epoch": 1.4995480566435673, "eta": "76:11:40", "grad_norm": 0.0052, "loss": 0.0403, "lr": "4.029e-05", "step": 4977, "steps": "23.61s,4977/16595" }, { "epoch": 1.4998493522145224, "eta": "77:15:10", "grad_norm": 0.0046, "loss": 0.0548, "lr": "4.029e-05", "step": 4978, "steps": "23.94s,4978/16595" }, { "epoch": 1.5001506477854776, "eta": "77:34:08", "grad_norm": 0.0044, "loss": 0.0638, "lr": "4.028e-05", "step": 4979, "steps": "24.04s,4979/16595" }, { "epoch": 1.5004519433564325, "eta": "76:12:26", "grad_norm": 0.0048, "loss": 0.0571, "lr": "4.028e-05", "step": 4980, "steps": "23.62s,4980/16595" }, { "epoch": 1.5007532389273877, "eta": "76:35:16", "grad_norm": 0.0111, "loss": 0.0358, "lr": "4.028e-05", "step": 4981, "steps": "23.74s,4981/16595" }, { "epoch": 1.5010545344983428, "eta": "76:03:54", "grad_norm": 0.0058, "loss": 0.0502, "lr": "4.027e-05", "step": 4982, "steps": "23.58s,4982/16595" }, { "epoch": 1.501355830069298, "eta": "75:28:40", "grad_norm": 0.0052, "loss": 0.048, "lr": "4.027e-05", "step": 4983, "steps": "23.4s,4983/16595" }, { "epoch": 1.501657125640253, "eta": "77:08:55", "grad_norm": 0.005, "loss": 0.0454, "lr": "4.026e-05", "step": 4984, "steps": "23.92s,4984/16595" }, { "epoch": 1.5019584212112083, "eta": "77:02:42", "grad_norm": 0.0054, "loss": 0.0575, "lr": "4.026e-05", "step": 4985, "steps": "23.89s,4985/16595" }, { "epoch": 1.5022597167821634, "eta": "77:29:24", "grad_norm": 0.0052, "loss": 0.0616, "lr": "4.026e-05", "step": 4986, "steps": "24.03s,4986/16595" }, { "epoch": 1.5025610123531186, "eta": "76:48:22", "grad_norm": 0.0045, "loss": 0.0473, "lr": "4.025e-05", "step": 4987, "steps": "23.82s,4987/16595" }, { "epoch": 1.5028623079240735, "eta": "77:05:23", "grad_norm": 0.0051, "loss": 0.0503, "lr": "4.025e-05", "step": 4988, "steps": "23.91s,4988/16595" }, { "epoch": 1.5031636034950286, "eta": "77:14:39", "grad_norm": 0.0056, "loss": 0.0385, "lr": "4.024e-05", "step": 4989, "steps": "23.96s,4989/16595" }, { "epoch": 1.5034648990659836, "eta": "76:29:46", "grad_norm": 0.0064, "loss": 0.0536, "lr": "4.024e-05", "step": 4990, "steps": "23.73s,4990/16595" }, { "epoch": 1.5037661946369387, "eta": "76:52:35", "grad_norm": 0.0038, "loss": 0.0498, "lr": "4.024e-05", "step": 4991, "steps": "23.85s,4991/16595" }, { "epoch": 1.5040674902078939, "eta": "76:19:19", "grad_norm": 0.0049, "loss": 0.048, "lr": "4.023e-05", "step": 4992, "steps": "23.68s,4992/16595" }, { "epoch": 1.504368785778849, "eta": "75:32:30", "grad_norm": 0.0054, "loss": 0.0555, "lr": "4.023e-05", "step": 4993, "steps": "23.44s,4993/16595" }, { "epoch": 1.5046700813498042, "eta": "76:10:47", "grad_norm": 0.0048, "loss": 0.0493, "lr": "4.023e-05", "step": 4994, "steps": "23.64s,4994/16595" }, { "epoch": 1.5049713769207593, "eta": "75:51:04", "grad_norm": 0.0064, "loss": 0.0539, "lr": "4.022e-05", "step": 4995, "steps": "23.54s,4995/16595" }, { "epoch": 1.5052726724917145, "eta": "76:44:48", "grad_norm": 0.0047, "loss": 0.0455, "lr": "4.022e-05", "step": 4996, "steps": "23.82s,4996/16595" }, { "epoch": 1.5055739680626696, "eta": "76:19:16", "grad_norm": 0.0056, "loss": 0.0475, "lr": "4.021e-05", "step": 4997, "steps": "23.69s,4997/16595" }, { "epoch": 1.5058752636336246, "eta": "77:28:27", "grad_norm": 0.0043, "loss": 0.0584, "lr": "4.021e-05", "step": 4998, "steps": "24.05s,4998/16595" }, { "epoch": 1.5061765592045797, "eta": "76:32:00", "grad_norm": 0.0046, "loss": 0.0509, "lr": "4.021e-05", "step": 4999, "steps": "23.76s,4999/16595" }, { "epoch": 1.5064778547755346, "eta": "75:37:30", "grad_norm": 0.005, "loss": 0.0492, "lr": "4.020e-05", "step": 5000, "steps": "23.48s,5000/16595" }, { "epoch": 1.5067791503464898, "eta": "158:09:41", "grad_norm": 0.0048, "loss": 0.0521, "lr": "4.020e-05", "step": 5001, "steps": "49.11s,5001/16595" }, { "epoch": 1.507080445917445, "eta": "76:38:33", "grad_norm": 0.0049, "loss": 0.0288, "lr": "4.020e-05", "step": 5002, "steps": "23.8s,5002/16595" }, { "epoch": 1.5073817414884, "eta": "76:16:54", "grad_norm": 0.0061, "loss": 0.0552, "lr": "4.019e-05", "step": 5003, "steps": "23.69s,5003/16595" }, { "epoch": 1.5076830370593552, "eta": "77:16:24", "grad_norm": 0.0071, "loss": 0.0481, "lr": "4.019e-05", "step": 5004, "steps": "24.0s,5004/16595" }, { "epoch": 1.5079843326303104, "eta": "76:33:30", "grad_norm": 0.0049, "loss": 0.0424, "lr": "4.018e-05", "step": 5005, "steps": "23.78s,5005/16595" }, { "epoch": 1.5082856282012655, "eta": "75:42:53", "grad_norm": 0.0053, "loss": 0.0511, "lr": "4.018e-05", "step": 5006, "steps": "23.52s,5006/16595" }, { "epoch": 1.5085869237722207, "eta": "76:09:32", "grad_norm": 0.0053, "loss": 0.0398, "lr": "4.018e-05", "step": 5007, "steps": "23.66s,5007/16595" }, { "epoch": 1.5088882193431756, "eta": "76:57:25", "grad_norm": 0.0063, "loss": 0.0406, "lr": "4.017e-05", "step": 5008, "steps": "23.91s,5008/16595" }, { "epoch": 1.5091895149141308, "eta": "77:16:19", "grad_norm": 0.0043, "loss": 0.0551, "lr": "4.017e-05", "step": 5009, "steps": "24.01s,5009/16595" }, { "epoch": 1.5094908104850857, "eta": "76:54:41", "grad_norm": 0.0056, "loss": 0.0393, "lr": "4.017e-05", "step": 5010, "steps": "23.9s,5010/16595" }, { "epoch": 1.5097921060560409, "eta": "76:31:07", "grad_norm": 0.0043, "loss": 0.0589, "lr": "4.016e-05", "step": 5011, "steps": "23.78s,5011/16595" }, { "epoch": 1.510093401626996, "eta": "75:54:02", "grad_norm": 0.0067, "loss": 0.0673, "lr": "4.016e-05", "step": 5012, "steps": "23.59s,5012/16595" }, { "epoch": 1.5103946971979512, "eta": "76:28:24", "grad_norm": 0.0049, "loss": 0.0563, "lr": "4.015e-05", "step": 5013, "steps": "23.77s,5013/16595" }, { "epoch": 1.5106959927689063, "eta": "76:14:29", "grad_norm": 0.0049, "loss": 0.0522, "lr": "4.015e-05", "step": 5014, "steps": "23.7s,5014/16595" }, { "epoch": 1.5109972883398615, "eta": "75:43:13", "grad_norm": 0.0056, "loss": 0.0555, "lr": "4.015e-05", "step": 5015, "steps": "23.54s,5015/16595" }, { "epoch": 1.5112985839108166, "eta": "76:05:59", "grad_norm": 0.0063, "loss": 0.0347, "lr": "4.014e-05", "step": 5016, "steps": "23.66s,5016/16595" }, { "epoch": 1.5115998794817718, "eta": "76:21:01", "grad_norm": 0.0042, "loss": 0.0632, "lr": "4.014e-05", "step": 5017, "steps": "23.74s,5017/16595" }, { "epoch": 1.5119011750527267, "eta": "75:55:32", "grad_norm": 0.006, "loss": 0.045, "lr": "4.013e-05", "step": 5018, "steps": "23.61s,5018/16595" }, { "epoch": 1.5122024706236818, "eta": "75:45:30", "grad_norm": 0.0047, "loss": 0.0439, "lr": "4.013e-05", "step": 5019, "steps": "23.56s,5019/16595" }, { "epoch": 1.5125037661946368, "eta": "75:58:37", "grad_norm": 0.0085, "loss": 0.0531, "lr": "4.013e-05", "step": 5020, "steps": "23.63s,5020/16595" }, { "epoch": 1.512805061765592, "eta": "76:52:14", "grad_norm": 0.0053, "loss": 0.0612, "lr": "4.012e-05", "step": 5021, "steps": "23.91s,5021/16595" }, { "epoch": 1.513106357336547, "eta": "75:48:11", "grad_norm": 0.0045, "loss": 0.047, "lr": "4.012e-05", "step": 5022, "steps": "23.58s,5022/16595" }, { "epoch": 1.5134076529075022, "eta": "76:24:26", "grad_norm": 0.0058, "loss": 0.0579, "lr": "4.012e-05", "step": 5023, "steps": "23.77s,5023/16595" }, { "epoch": 1.5137089484784574, "eta": "76:16:19", "grad_norm": 0.0051, "loss": 0.0652, "lr": "4.011e-05", "step": 5024, "steps": "23.73s,5024/16595" }, { "epoch": 1.5140102440494125, "eta": "75:23:52", "grad_norm": 0.0072, "loss": 0.0399, "lr": "4.011e-05", "step": 5025, "steps": "23.46s,5025/16595" }, { "epoch": 1.5143115396203677, "eta": "76:48:19", "grad_norm": 0.0067, "loss": 0.0576, "lr": "4.010e-05", "step": 5026, "steps": "23.9s,5026/16595" }, { "epoch": 1.5146128351913228, "eta": "76:15:08", "grad_norm": 0.0046, "loss": 0.0536, "lr": "4.010e-05", "step": 5027, "steps": "23.73s,5027/16595" }, { "epoch": 1.5149141307622778, "eta": "76:12:49", "grad_norm": 0.0049, "loss": 0.0565, "lr": "4.010e-05", "step": 5028, "steps": "23.72s,5028/16595" }, { "epoch": 1.515215426333233, "eta": "76:43:16", "grad_norm": 0.0053, "loss": 0.0451, "lr": "4.009e-05", "step": 5029, "steps": "23.88s,5029/16595" }, { "epoch": 1.5155167219041878, "eta": "76:27:27", "grad_norm": 0.0057, "loss": 0.0397, "lr": "4.009e-05", "step": 5030, "steps": "23.8s,5030/16595" }, { "epoch": 1.515818017475143, "eta": "75:52:21", "grad_norm": 0.0046, "loss": 0.0387, "lr": "4.009e-05", "step": 5031, "steps": "23.62s,5031/16595" }, { "epoch": 1.5161193130460981, "eta": "76:05:27", "grad_norm": 0.008, "loss": 0.0513, "lr": "4.008e-05", "step": 5032, "steps": "23.69s,5032/16595" }, { "epoch": 1.5164206086170533, "eta": "76:20:28", "grad_norm": 0.0068, "loss": 0.0557, "lr": "4.008e-05", "step": 5033, "steps": "23.77s,5033/16595" }, { "epoch": 1.5167219041880085, "eta": "76:23:56", "grad_norm": 0.0064, "loss": 0.0492, "lr": "4.007e-05", "step": 5034, "steps": "23.79s,5034/16595" }, { "epoch": 1.5170231997589636, "eta": "76:08:07", "grad_norm": 0.0046, "loss": 0.0522, "lr": "4.007e-05", "step": 5035, "steps": "23.71s,5035/16595" }, { "epoch": 1.5173244953299188, "eta": "77:03:36", "grad_norm": 0.0061, "loss": 0.0463, "lr": "4.007e-05", "step": 5036, "steps": "24.0s,5036/16595" }, { "epoch": 1.517625790900874, "eta": "75:55:46", "grad_norm": 0.0052, "loss": 0.0525, "lr": "4.006e-05", "step": 5037, "steps": "23.65s,5037/16595" }, { "epoch": 1.5179270864718288, "eta": "76:14:38", "grad_norm": 0.0054, "loss": 0.0475, "lr": "4.006e-05", "step": 5038, "steps": "23.75s,5038/16595" }, { "epoch": 1.518228382042784, "eta": "74:57:12", "grad_norm": 0.006, "loss": 0.0553, "lr": "4.005e-05", "step": 5039, "steps": "23.35s,5039/16595" }, { "epoch": 1.518529677613739, "eta": "75:52:40", "grad_norm": 0.0072, "loss": 0.0539, "lr": "4.005e-05", "step": 5040, "steps": "23.64s,5040/16595" }, { "epoch": 1.518830973184694, "eta": "75:02:12", "grad_norm": 0.006, "loss": 0.0511, "lr": "4.005e-05", "step": 5041, "steps": "23.38s,5041/16595" }, { "epoch": 1.5191322687556492, "eta": "75:28:46", "grad_norm": 0.005, "loss": 0.0643, "lr": "4.004e-05", "step": 5042, "steps": "23.52s,5042/16595" }, { "epoch": 1.5194335643266044, "eta": "75:32:14", "grad_norm": 0.0063, "loss": 0.0482, "lr": "4.004e-05", "step": 5043, "steps": "23.54s,5043/16595" }, { "epoch": 1.5197348598975595, "eta": "75:04:53", "grad_norm": 0.0051, "loss": 0.0422, "lr": "4.004e-05", "step": 5044, "steps": "23.4s,5044/16595" }, { "epoch": 1.5200361554685147, "eta": "75:27:36", "grad_norm": 0.0049, "loss": 0.0643, "lr": "4.003e-05", "step": 5045, "steps": "23.52s,5045/16595" }, { "epoch": 1.5203374510394698, "eta": "76:44:12", "grad_norm": 0.0049, "loss": 0.0494, "lr": "4.003e-05", "step": 5046, "steps": "23.92s,5046/16595" }, { "epoch": 1.520638746610425, "eta": "75:59:32", "grad_norm": 0.005, "loss": 0.0508, "lr": "4.002e-05", "step": 5047, "steps": "23.69s,5047/16595" }, { "epoch": 1.52094004218138, "eta": "77:04:34", "grad_norm": 0.0072, "loss": 0.0562, "lr": "4.002e-05", "step": 5048, "steps": "24.03s,5048/16595" }, { "epoch": 1.521241337752335, "eta": "76:19:54", "grad_norm": 0.006, "loss": 0.0553, "lr": "4.002e-05", "step": 5049, "steps": "23.8s,5049/16595" }, { "epoch": 1.52154263332329, "eta": "76:04:07", "grad_norm": 0.0059, "loss": 0.0532, "lr": "4.001e-05", "step": 5050, "steps": "23.72s,5050/16595" }, { "epoch": 1.5218439288942451, "eta": "76:13:20", "grad_norm": 0.0062, "loss": 0.0358, "lr": "4.001e-05", "step": 5051, "steps": "23.77s,5051/16595" }, { "epoch": 1.5221452244652003, "eta": "76:30:15", "grad_norm": 0.0044, "loss": 0.0602, "lr": "4.001e-05", "step": 5052, "steps": "23.86s,5052/16595" }, { "epoch": 1.5224465200361554, "eta": "75:28:18", "grad_norm": 0.0056, "loss": 0.0465, "lr": "4.000e-05", "step": 5053, "steps": "23.54s,5053/16595" }, { "epoch": 1.5227478156071106, "eta": "75:04:50", "grad_norm": 0.0054, "loss": 0.0583, "lr": "4.000e-05", "step": 5054, "steps": "23.42s,5054/16595" }, { "epoch": 1.5230491111780657, "eta": "76:29:04", "grad_norm": 0.0052, "loss": 0.0556, "lr": "3.999e-05", "step": 5055, "steps": "23.86s,5055/16595" }, { "epoch": 1.523350406749021, "eta": "76:05:35", "grad_norm": 0.0128, "loss": 0.051, "lr": "3.999e-05", "step": 5056, "steps": "23.74s,5056/16595" }, { "epoch": 1.523651702319976, "eta": "75:51:44", "grad_norm": 0.005, "loss": 0.0384, "lr": "3.999e-05", "step": 5057, "steps": "23.67s,5057/16595" }, { "epoch": 1.523952997890931, "eta": "75:59:02", "grad_norm": 0.0045, "loss": 0.0617, "lr": "3.998e-05", "step": 5058, "steps": "23.71s,5058/16595" }, { "epoch": 1.5242542934618861, "eta": "74:57:07", "grad_norm": 0.0049, "loss": 0.0545, "lr": "3.998e-05", "step": 5059, "steps": "23.39s,5059/16595" }, { "epoch": 1.524555589032841, "eta": "77:07:27", "grad_norm": 0.0054, "loss": 0.0446, "lr": "3.997e-05", "step": 5060, "steps": "24.07s,5060/16595" }, { "epoch": 1.5248568846037962, "eta": "75:46:19", "grad_norm": 0.0046, "loss": 0.0589, "lr": "3.997e-05", "step": 5061, "steps": "23.65s,5061/16595" }, { "epoch": 1.5251581801747514, "eta": "75:05:33", "grad_norm": 0.0059, "loss": 0.0542, "lr": "3.997e-05", "step": 5062, "steps": "23.44s,5062/16595" }, { "epoch": 1.5254594757457065, "eta": "76:08:35", "grad_norm": 0.005, "loss": 0.0503, "lr": "3.996e-05", "step": 5063, "steps": "23.77s,5063/16595" }, { "epoch": 1.5257607713166617, "eta": "76:04:21", "grad_norm": 0.0059, "loss": 0.0424, "lr": "3.996e-05", "step": 5064, "steps": "23.75s,5064/16595" }, { "epoch": 1.5260620668876168, "eta": "74:35:33", "grad_norm": 0.0063, "loss": 0.0582, "lr": "3.996e-05", "step": 5065, "steps": "23.29s,5065/16595" }, { "epoch": 1.526363362458572, "eta": "76:32:23", "grad_norm": 0.0055, "loss": 0.0486, "lr": "3.995e-05", "step": 5066, "steps": "23.9s,5066/16595" }, { "epoch": 1.526664658029527, "eta": "75:42:01", "grad_norm": 0.006, "loss": 0.0607, "lr": "3.995e-05", "step": 5067, "steps": "23.64s,5067/16595" }, { "epoch": 1.526965953600482, "eta": "74:40:09", "grad_norm": 0.0067, "loss": 0.0418, "lr": "3.994e-05", "step": 5068, "steps": "23.32s,5068/16595" }, { "epoch": 1.5272672491714372, "eta": "76:44:38", "grad_norm": 0.006, "loss": 0.0484, "lr": "3.994e-05", "step": 5069, "steps": "23.97s,5069/16595" }, { "epoch": 1.5275685447423921, "eta": "76:05:49", "grad_norm": 0.0043, "loss": 0.058, "lr": "3.994e-05", "step": 5070, "steps": "23.77s,5070/16595" }, { "epoch": 1.5278698403133473, "eta": "77:20:19", "grad_norm": 0.0053, "loss": 0.055, "lr": "3.993e-05", "step": 5071, "steps": "24.16s,5071/16595" }, { "epoch": 1.5281711358843024, "eta": "76:06:56", "grad_norm": 0.0057, "loss": 0.0468, "lr": "3.993e-05", "step": 5072, "steps": "23.78s,5072/16595" }, { "epoch": 1.5284724314552576, "eta": "75:58:52", "grad_norm": 0.0043, "loss": 0.041, "lr": "3.992e-05", "step": 5073, "steps": "23.74s,5073/16595" }, { "epoch": 1.5287737270262127, "eta": "74:58:57", "grad_norm": 0.0054, "loss": 0.045, "lr": "3.992e-05", "step": 5074, "steps": "23.43s,5074/16595" }, { "epoch": 1.5290750225971679, "eta": "76:19:12", "grad_norm": 0.0052, "loss": 0.0557, "lr": "3.992e-05", "step": 5075, "steps": "23.85s,5075/16595" }, { "epoch": 1.529376318168123, "eta": "76:13:02", "grad_norm": 0.0056, "loss": 0.0443, "lr": "3.991e-05", "step": 5076, "steps": "23.82s,5076/16595" }, { "epoch": 1.5296776137390782, "eta": "76:04:58", "grad_norm": 0.0048, "loss": 0.0604, "lr": "3.991e-05", "step": 5077, "steps": "23.78s,5077/16595" }, { "epoch": 1.529978909310033, "eta": "75:10:49", "grad_norm": 0.0049, "loss": 0.0702, "lr": "3.991e-05", "step": 5078, "steps": "23.5s,5078/16595" }, { "epoch": 1.5302802048809883, "eta": "75:27:42", "grad_norm": 0.0055, "loss": 0.053, "lr": "3.990e-05", "step": 5079, "steps": "23.59s,5079/16595" }, { "epoch": 1.5305815004519434, "eta": "74:54:41", "grad_norm": 0.0062, "loss": 0.0386, "lr": "3.990e-05", "step": 5080, "steps": "23.42s,5080/16595" }, { "epoch": 1.5308827960228983, "eta": "75:55:42", "grad_norm": 0.0054, "loss": 0.0659, "lr": "3.989e-05", "step": 5081, "steps": "23.74s,5081/16595" }, { "epoch": 1.5311840915938535, "eta": "74:53:54", "grad_norm": 0.0056, "loss": 0.0469, "lr": "3.989e-05", "step": 5082, "steps": "23.42s,5082/16595" }, { "epoch": 1.5314853871648086, "eta": "74:47:45", "grad_norm": 0.0044, "loss": 0.0544, "lr": "3.989e-05", "step": 5083, "steps": "23.39s,5083/16595" }, { "epoch": 1.5317866827357638, "eta": "74:51:12", "grad_norm": 0.0044, "loss": 0.0415, "lr": "3.988e-05", "step": 5084, "steps": "23.41s,5084/16595" }, { "epoch": 1.532087978306719, "eta": "75:44:31", "grad_norm": 0.005, "loss": 0.0491, "lr": "3.988e-05", "step": 5085, "steps": "23.69s,5085/16595" }, { "epoch": 1.532389273877674, "eta": "75:44:08", "grad_norm": 0.0043, "loss": 0.058, "lr": "3.988e-05", "step": 5086, "steps": "23.69s,5086/16595" }, { "epoch": 1.5326905694486292, "eta": "75:28:23", "grad_norm": 0.0064, "loss": 0.0463, "lr": "3.987e-05", "step": 5087, "steps": "23.61s,5087/16595" }, { "epoch": 1.5329918650195842, "eta": "75:47:10", "grad_norm": 0.0046, "loss": 0.0615, "lr": "3.987e-05", "step": 5088, "steps": "23.71s,5088/16595" }, { "epoch": 1.5332931605905393, "eta": "76:00:12", "grad_norm": 0.0048, "loss": 0.039, "lr": "3.986e-05", "step": 5089, "steps": "23.78s,5089/16595" }, { "epoch": 1.5335944561614945, "eta": "76:13:14", "grad_norm": 0.0043, "loss": 0.0527, "lr": "3.986e-05", "step": 5090, "steps": "23.85s,5090/16595" }, { "epoch": 1.5338957517324494, "eta": "75:26:49", "grad_norm": 0.0058, "loss": 0.0368, "lr": "3.986e-05", "step": 5091, "steps": "23.61s,5091/16595" }, { "epoch": 1.5341970473034046, "eta": "75:07:15", "grad_norm": 0.0048, "loss": 0.0679, "lr": "3.985e-05", "step": 5092, "steps": "23.51s,5092/16595" }, { "epoch": 1.5344983428743597, "eta": "75:58:37", "grad_norm": 0.0049, "loss": 0.058, "lr": "3.985e-05", "step": 5093, "steps": "23.78s,5093/16595" }, { "epoch": 1.5347996384453149, "eta": "75:48:38", "grad_norm": 0.0068, "loss": 0.0488, "lr": "3.984e-05", "step": 5094, "steps": "23.73s,5094/16595" }, { "epoch": 1.53510093401627, "eta": "75:29:05", "grad_norm": 0.0054, "loss": 0.0509, "lr": "3.984e-05", "step": 5095, "steps": "23.63s,5095/16595" }, { "epoch": 1.5354022295872252, "eta": "74:04:21", "grad_norm": 0.0054, "loss": 0.0479, "lr": "3.984e-05", "step": 5096, "steps": "23.19s,5096/16595" }, { "epoch": 1.5357035251581803, "eta": "75:18:42", "grad_norm": 0.0055, "loss": 0.0584, "lr": "3.983e-05", "step": 5097, "steps": "23.58s,5097/16595" }, { "epoch": 1.5360048207291352, "eta": "76:08:08", "grad_norm": 0.006, "loss": 0.0546, "lr": "3.983e-05", "step": 5098, "steps": "23.84s,5098/16595" }, { "epoch": 1.5363061163000904, "eta": "75:58:09", "grad_norm": 0.0057, "loss": 0.0487, "lr": "3.983e-05", "step": 5099, "steps": "23.79s,5099/16595" }, { "epoch": 1.5366074118710455, "eta": "75:59:41", "grad_norm": 0.0049, "loss": 0.0473, "lr": "3.982e-05", "step": 5100, "steps": "23.8s,5100/16595" }, { "epoch": 1.5369087074420005, "eta": "75:55:27", "grad_norm": 0.0045, "loss": 0.0743, "lr": "3.982e-05", "step": 5101, "steps": "23.78s,5101/16595" }, { "epoch": 1.5372100030129556, "eta": "75:18:39", "grad_norm": 0.0058, "loss": 0.0514, "lr": "3.981e-05", "step": 5102, "steps": "23.59s,5102/16595" }, { "epoch": 1.5375112985839108, "eta": "75:37:25", "grad_norm": 0.005, "loss": 0.0513, "lr": "3.981e-05", "step": 5103, "steps": "23.69s,5103/16595" }, { "epoch": 1.537812594154866, "eta": "76:28:44", "grad_norm": 0.0072, "loss": 0.0452, "lr": "3.981e-05", "step": 5104, "steps": "23.96s,5104/16595" }, { "epoch": 1.538113889725821, "eta": "75:53:52", "grad_norm": 0.005, "loss": 0.054, "lr": "3.980e-05", "step": 5105, "steps": "23.78s,5105/16595" }, { "epoch": 1.5384151852967762, "eta": "76:29:51", "grad_norm": 0.0046, "loss": 0.046, "lr": "3.980e-05", "step": 5106, "steps": "23.97s,5106/16595" }, { "epoch": 1.5387164808677314, "eta": "74:55:38", "grad_norm": 0.0044, "loss": 0.0501, "lr": "3.979e-05", "step": 5107, "steps": "23.48s,5107/16595" }, { "epoch": 1.5390177764386863, "eta": "75:12:28", "grad_norm": 0.0061, "loss": 0.0481, "lr": "3.979e-05", "step": 5108, "steps": "23.57s,5108/16595" }, { "epoch": 1.5393190720096415, "eta": "76:55:27", "grad_norm": 0.006, "loss": 0.0557, "lr": "3.979e-05", "step": 5109, "steps": "24.11s,5109/16595" }, { "epoch": 1.5396203675805966, "eta": "75:49:58", "grad_norm": 0.0074, "loss": 0.0506, "lr": "3.978e-05", "step": 5110, "steps": "23.77s,5110/16595" }, { "epoch": 1.5399216631515515, "eta": "76:31:41", "grad_norm": 0.0063, "loss": 0.0486, "lr": "3.978e-05", "step": 5111, "steps": "23.99s,5111/16595" }, { "epoch": 1.5402229587225067, "eta": "75:31:57", "grad_norm": 0.0051, "loss": 0.049, "lr": "3.978e-05", "step": 5112, "steps": "23.68s,5112/16595" }, { "epoch": 1.5405242542934618, "eta": "75:37:18", "grad_norm": 0.0063, "loss": 0.0574, "lr": "3.977e-05", "step": 5113, "steps": "23.71s,5113/16595" }, { "epoch": 1.540825549864417, "eta": "74:47:09", "grad_norm": 0.0047, "loss": 0.0647, "lr": "3.977e-05", "step": 5114, "steps": "23.45s,5114/16595" }, { "epoch": 1.5411268454353722, "eta": "76:43:28", "grad_norm": 0.0045, "loss": 0.0608, "lr": "3.976e-05", "step": 5115, "steps": "24.06s,5115/16595" }, { "epoch": 1.5414281410063273, "eta": "75:26:33", "grad_norm": 0.0055, "loss": 0.0425, "lr": "3.976e-05", "step": 5116, "steps": "23.66s,5116/16595" }, { "epoch": 1.5417294365772825, "eta": "75:18:30", "grad_norm": 0.0048, "loss": 0.0704, "lr": "3.976e-05", "step": 5117, "steps": "23.62s,5117/16595" }, { "epoch": 1.5420307321482374, "eta": "76:32:42", "grad_norm": 0.0094, "loss": 0.0522, "lr": "3.975e-05", "step": 5118, "steps": "24.01s,5118/16595" }, { "epoch": 1.5423320277191925, "eta": "74:45:12", "grad_norm": 0.0059, "loss": 0.0479, "lr": "3.975e-05", "step": 5119, "steps": "23.45s,5119/16595" }, { "epoch": 1.5426333232901477, "eta": "76:05:08", "grad_norm": 0.0054, "loss": 0.0581, "lr": "3.974e-05", "step": 5120, "steps": "23.87s,5120/16595" }, { "epoch": 1.5429346188611026, "eta": "75:51:21", "grad_norm": 0.006, "loss": 0.0475, "lr": "3.974e-05", "step": 5121, "steps": "23.8s,5121/16595" }, { "epoch": 1.5432359144320578, "eta": "74:36:22", "grad_norm": 0.005, "loss": 0.0452, "lr": "3.974e-05", "step": 5122, "steps": "23.41s,5122/16595" }, { "epoch": 1.543537210003013, "eta": "75:16:08", "grad_norm": 0.0053, "loss": 0.0436, "lr": "3.973e-05", "step": 5123, "steps": "23.62s,5123/16595" }, { "epoch": 1.543838505573968, "eta": "76:34:08", "grad_norm": 0.0043, "loss": 0.0645, "lr": "3.973e-05", "step": 5124, "steps": "24.03s,5124/16595" }, { "epoch": 1.5441398011449232, "eta": "75:13:26", "grad_norm": 0.0046, "loss": 0.0393, "lr": "3.972e-05", "step": 5125, "steps": "23.61s,5125/16595" }, { "epoch": 1.5444410967158784, "eta": "76:12:18", "grad_norm": 0.0054, "loss": 0.0552, "lr": "3.972e-05", "step": 5126, "steps": "23.92s,5126/16595" }, { "epoch": 1.5447423922868335, "eta": "75:50:53", "grad_norm": 0.0044, "loss": 0.0356, "lr": "3.972e-05", "step": 5127, "steps": "23.81s,5127/16595" }, { "epoch": 1.5450436878577885, "eta": "74:37:51", "grad_norm": 0.005, "loss": 0.0366, "lr": "3.971e-05", "step": 5128, "steps": "23.43s,5128/16595" }, { "epoch": 1.5453449834287436, "eta": "75:44:21", "grad_norm": 0.0044, "loss": 0.053, "lr": "3.971e-05", "step": 5129, "steps": "23.78s,5129/16595" }, { "epoch": 1.5456462789996988, "eta": "75:09:34", "grad_norm": 0.0049, "loss": 0.0546, "lr": "3.971e-05", "step": 5130, "steps": "23.6s,5130/16595" }, { "epoch": 1.5459475745706537, "eta": "76:19:52", "grad_norm": 0.0053, "loss": 0.0412, "lr": "3.970e-05", "step": 5131, "steps": "23.97s,5131/16595" }, { "epoch": 1.5462488701416088, "eta": "74:42:01", "grad_norm": 0.0048, "loss": 0.0445, "lr": "3.970e-05", "step": 5132, "steps": "23.46s,5132/16595" }, { "epoch": 1.546550165712564, "eta": "74:35:54", "grad_norm": 0.0054, "loss": 0.0366, "lr": "3.969e-05", "step": 5133, "steps": "23.43s,5133/16595" }, { "epoch": 1.5468514612835191, "eta": "75:04:10", "grad_norm": 0.0052, "loss": 0.0515, "lr": "3.969e-05", "step": 5134, "steps": "23.58s,5134/16595" }, { "epoch": 1.5471527568544743, "eta": "76:20:10", "grad_norm": 0.0058, "loss": 0.0429, "lr": "3.969e-05", "step": 5135, "steps": "23.98s,5135/16595" }, { "epoch": 1.5474540524254294, "eta": "74:30:55", "grad_norm": 0.0052, "loss": 0.0381, "lr": "3.968e-05", "step": 5136, "steps": "23.41s,5136/16595" }, { "epoch": 1.5477553479963846, "eta": "75:31:38", "grad_norm": 0.0055, "loss": 0.0408, "lr": "3.968e-05", "step": 5137, "steps": "23.73s,5137/16595" }, { "epoch": 1.5480566435673395, "eta": "76:13:15", "grad_norm": 0.005, "loss": 0.0562, "lr": "3.967e-05", "step": 5138, "steps": "23.95s,5138/16595" }, { "epoch": 1.5483579391382947, "eta": "76:03:18", "grad_norm": 0.0041, "loss": 0.0469, "lr": "3.967e-05", "step": 5139, "steps": "23.9s,5139/16595" }, { "epoch": 1.5486592347092498, "eta": "75:36:10", "grad_norm": 0.0049, "loss": 0.0448, "lr": "3.967e-05", "step": 5140, "steps": "23.76s,5140/16595" }, { "epoch": 1.5489605302802048, "eta": "74:59:30", "grad_norm": 0.0043, "loss": 0.0502, "lr": "3.966e-05", "step": 5141, "steps": "23.57s,5141/16595" }, { "epoch": 1.54926182585116, "eta": "75:23:56", "grad_norm": 0.0045, "loss": 0.0491, "lr": "3.966e-05", "step": 5142, "steps": "23.7s,5142/16595" }, { "epoch": 1.549563121422115, "eta": "75:25:26", "grad_norm": 0.005, "loss": 0.0567, "lr": "3.966e-05", "step": 5143, "steps": "23.71s,5143/16595" }, { "epoch": 1.5498644169930702, "eta": "76:07:02", "grad_norm": 0.0047, "loss": 0.0631, "lr": "3.965e-05", "step": 5144, "steps": "23.93s,5144/16595" }, { "epoch": 1.5501657125640254, "eta": "75:05:34", "grad_norm": 0.0056, "loss": 0.0327, "lr": "3.965e-05", "step": 5145, "steps": "23.61s,5145/16595" }, { "epoch": 1.5504670081349805, "eta": "76:04:20", "grad_norm": 0.0054, "loss": 0.0546, "lr": "3.964e-05", "step": 5146, "steps": "23.92s,5146/16595" }, { "epoch": 1.5507683037059357, "eta": "74:17:05", "grad_norm": 0.0051, "loss": 0.0523, "lr": "3.964e-05", "step": 5147, "steps": "23.36s,5147/16595" }, { "epoch": 1.5510695992768906, "eta": "75:21:33", "grad_norm": 0.0051, "loss": 0.0542, "lr": "3.964e-05", "step": 5148, "steps": "23.7s,5148/16595" }, { "epoch": 1.5513708948478457, "eta": "74:58:16", "grad_norm": 0.0069, "loss": 0.0373, "lr": "3.963e-05", "step": 5149, "steps": "23.58s,5149/16595" }, { "epoch": 1.551672190418801, "eta": "75:45:34", "grad_norm": 0.0052, "loss": 0.0546, "lr": "3.963e-05", "step": 5150, "steps": "23.83s,5150/16595" }, { "epoch": 1.5519734859897558, "eta": "75:31:49", "grad_norm": 0.0048, "loss": 0.0599, "lr": "3.962e-05", "step": 5151, "steps": "23.76s,5151/16595" }, { "epoch": 1.552274781560711, "eta": "75:02:49", "grad_norm": 0.0047, "loss": 0.0522, "lr": "3.962e-05", "step": 5152, "steps": "23.61s,5152/16595" }, { "epoch": 1.5525760771316661, "eta": "76:05:21", "grad_norm": 0.0048, "loss": 0.0438, "lr": "3.962e-05", "step": 5153, "steps": "23.94s,5153/16595" }, { "epoch": 1.5528773727026213, "eta": "75:00:07", "grad_norm": 0.006, "loss": 0.0502, "lr": "3.961e-05", "step": 5154, "steps": "23.6s,5154/16595" }, { "epoch": 1.5531786682735764, "eta": "75:39:46", "grad_norm": 0.0046, "loss": 0.0527, "lr": "3.961e-05", "step": 5155, "steps": "23.81s,5155/16595" }, { "epoch": 1.5534799638445316, "eta": "74:09:46", "grad_norm": 0.0052, "loss": 0.0647, "lr": "3.961e-05", "step": 5156, "steps": "23.34s,5156/16595" }, { "epoch": 1.5537812594154867, "eta": "75:38:58", "grad_norm": 0.0041, "loss": 0.0476, "lr": "3.960e-05", "step": 5157, "steps": "23.81s,5157/16595" }, { "epoch": 1.5540825549864417, "eta": "75:40:29", "grad_norm": 0.0052, "loss": 0.0582, "lr": "3.960e-05", "step": 5158, "steps": "23.82s,5158/16595" }, { "epoch": 1.5543838505573968, "eta": "75:24:50", "grad_norm": 0.0053, "loss": 0.0463, "lr": "3.959e-05", "step": 5159, "steps": "23.74s,5159/16595" }, { "epoch": 1.554685146128352, "eta": "75:03:29", "grad_norm": 0.0053, "loss": 0.0347, "lr": "3.959e-05", "step": 5160, "steps": "23.63s,5160/16595" }, { "epoch": 1.5549864416993069, "eta": "76:28:50", "grad_norm": 0.0043, "loss": 0.0569, "lr": "3.959e-05", "step": 5161, "steps": "24.08s,5161/16595" }, { "epoch": 1.555287737270262, "eta": "76:07:29", "grad_norm": 0.0052, "loss": 0.0579, "lr": "3.958e-05", "step": 5162, "steps": "23.97s,5162/16595" }, { "epoch": 1.5555890328412172, "eta": "75:38:30", "grad_norm": 0.0049, "loss": 0.0548, "lr": "3.958e-05", "step": 5163, "steps": "23.82s,5163/16595" }, { "epoch": 1.5558903284121723, "eta": "75:11:26", "grad_norm": 0.0049, "loss": 0.0621, "lr": "3.957e-05", "step": 5164, "steps": "23.68s,5164/16595" }, { "epoch": 1.5561916239831275, "eta": "75:33:54", "grad_norm": 0.0051, "loss": 0.0423, "lr": "3.957e-05", "step": 5165, "steps": "23.8s,5165/16595" }, { "epoch": 1.5564929195540826, "eta": "74:19:12", "grad_norm": 0.0043, "loss": 0.0683, "lr": "3.957e-05", "step": 5166, "steps": "23.41s,5166/16595" }, { "epoch": 1.5567942151250378, "eta": "75:04:32", "grad_norm": 0.0054, "loss": 0.0501, "lr": "3.956e-05", "step": 5167, "steps": "23.65s,5167/16595" }, { "epoch": 1.5570955106959927, "eta": "75:19:22", "grad_norm": 0.0048, "loss": 0.0573, "lr": "3.956e-05", "step": 5168, "steps": "23.73s,5168/16595" }, { "epoch": 1.5573968062669479, "eta": "75:58:58", "grad_norm": 0.0074, "loss": 0.0477, "lr": "3.955e-05", "step": 5169, "steps": "23.94s,5169/16595" }, { "epoch": 1.557698101837903, "eta": "75:47:09", "grad_norm": 0.0049, "loss": 0.041, "lr": "3.955e-05", "step": 5170, "steps": "23.88s,5170/16595" }, { "epoch": 1.557999397408858, "eta": "75:25:48", "grad_norm": 0.0056, "loss": 0.0355, "lr": "3.955e-05", "step": 5171, "steps": "23.77s,5171/16595" }, { "epoch": 1.558300692979813, "eta": "75:13:59", "grad_norm": 0.0053, "loss": 0.0515, "lr": "3.954e-05", "step": 5172, "steps": "23.71s,5172/16595" }, { "epoch": 1.5586019885507683, "eta": "75:38:20", "grad_norm": 0.0054, "loss": 0.0508, "lr": "3.954e-05", "step": 5173, "steps": "23.84s,5173/16595" }, { "epoch": 1.5589032841217234, "eta": "75:37:56", "grad_norm": 0.0059, "loss": 0.05, "lr": "3.954e-05", "step": 5174, "steps": "23.84s,5174/16595" }, { "epoch": 1.5592045796926786, "eta": "74:53:46", "grad_norm": 0.006, "loss": 0.0582, "lr": "3.953e-05", "step": 5175, "steps": "23.61s,5175/16595" }, { "epoch": 1.5595058752636337, "eta": "75:16:12", "grad_norm": 0.0052, "loss": 0.0477, "lr": "3.953e-05", "step": 5176, "steps": "23.73s,5176/16595" }, { "epoch": 1.5598071708345889, "eta": "74:32:03", "grad_norm": 0.0055, "loss": 0.0604, "lr": "3.952e-05", "step": 5177, "steps": "23.5s,5177/16595" }, { "epoch": 1.5601084664055438, "eta": "74:39:16", "grad_norm": 0.0053, "loss": 0.0553, "lr": "3.952e-05", "step": 5178, "steps": "23.54s,5178/16595" }, { "epoch": 1.560409761976499, "eta": "75:13:07", "grad_norm": 0.0057, "loss": 0.06, "lr": "3.952e-05", "step": 5179, "steps": "23.72s,5179/16595" }, { "epoch": 1.560711057547454, "eta": "74:44:11", "grad_norm": 0.0047, "loss": 0.0579, "lr": "3.951e-05", "step": 5180, "steps": "23.57s,5180/16595" }, { "epoch": 1.561012353118409, "eta": "75:19:56", "grad_norm": 0.0049, "loss": 0.0504, "lr": "3.951e-05", "step": 5181, "steps": "23.76s,5181/16595" }, { "epoch": 1.5613136486893642, "eta": "74:12:58", "grad_norm": 0.0046, "loss": 0.0556, "lr": "3.950e-05", "step": 5182, "steps": "23.41s,5182/16595" }, { "epoch": 1.5616149442603193, "eta": "75:21:03", "grad_norm": 0.0067, "loss": 0.0588, "lr": "3.950e-05", "step": 5183, "steps": "23.77s,5183/16595" }, { "epoch": 1.5619162398312745, "eta": "74:54:01", "grad_norm": 0.0064, "loss": 0.053, "lr": "3.950e-05", "step": 5184, "steps": "23.63s,5184/16595" }, { "epoch": 1.5622175354022296, "eta": "75:20:15", "grad_norm": 0.0047, "loss": 0.0594, "lr": "3.949e-05", "step": 5185, "steps": "23.77s,5185/16595" }, { "epoch": 1.5625188309731848, "eta": "74:30:25", "grad_norm": 0.0047, "loss": 0.0581, "lr": "3.949e-05", "step": 5186, "steps": "23.51s,5186/16595" }, { "epoch": 1.56282012654414, "eta": "74:43:20", "grad_norm": 0.0063, "loss": 0.0333, "lr": "3.948e-05", "step": 5187, "steps": "23.58s,5187/16595" }, { "epoch": 1.5631214221150949, "eta": "75:07:39", "grad_norm": 0.0044, "loss": 0.0661, "lr": "3.948e-05", "step": 5188, "steps": "23.71s,5188/16595" }, { "epoch": 1.56342271768605, "eta": "75:54:47", "grad_norm": 0.0041, "loss": 0.0589, "lr": "3.948e-05", "step": 5189, "steps": "23.96s,5189/16595" }, { "epoch": 1.5637240132570052, "eta": "74:07:57", "grad_norm": 0.0046, "loss": 0.0522, "lr": "3.947e-05", "step": 5190, "steps": "23.4s,5190/16595" }, { "epoch": 1.56402530882796, "eta": "76:03:30", "grad_norm": 0.0058, "loss": 0.0527, "lr": "3.947e-05", "step": 5191, "steps": "24.01s,5191/16595" }, { "epoch": 1.5643266043989152, "eta": "75:21:17", "grad_norm": 0.0043, "loss": 0.0452, "lr": "3.947e-05", "step": 5192, "steps": "23.79s,5192/16595" }, { "epoch": 1.5646278999698704, "eta": "75:11:23", "grad_norm": 0.0052, "loss": 0.059, "lr": "3.946e-05", "step": 5193, "steps": "23.74s,5193/16595" }, { "epoch": 1.5649291955408255, "eta": "74:51:59", "grad_norm": 0.0049, "loss": 0.0496, "lr": "3.946e-05", "step": 5194, "steps": "23.64s,5194/16595" }, { "epoch": 1.5652304911117807, "eta": "74:25:00", "grad_norm": 0.0052, "loss": 0.0416, "lr": "3.945e-05", "step": 5195, "steps": "23.5s,5195/16595" }, { "epoch": 1.5655317866827358, "eta": "74:49:18", "grad_norm": 0.0077, "loss": 0.0536, "lr": "3.945e-05", "step": 5196, "steps": "23.63s,5196/16595" }, { "epoch": 1.565833082253691, "eta": "75:06:00", "grad_norm": 0.0061, "loss": 0.0433, "lr": "3.945e-05", "step": 5197, "steps": "23.72s,5197/16595" }, { "epoch": 1.566134377824646, "eta": "75:47:24", "grad_norm": 0.0051, "loss": 0.0605, "lr": "3.944e-05", "step": 5198, "steps": "23.94s,5198/16595" }, { "epoch": 1.566435673395601, "eta": "75:07:07", "grad_norm": 0.0045, "loss": 0.0499, "lr": "3.944e-05", "step": 5199, "steps": "23.73s,5199/16595" }, { "epoch": 1.5667369689665562, "eta": "74:17:20", "grad_norm": 0.0047, "loss": 0.0417, "lr": "3.943e-05", "step": 5200, "steps": "23.47s,5200/16595" }, { "epoch": 1.5670382645375112, "eta": "163:47:19", "grad_norm": 0.0047, "loss": 0.0547, "lr": "3.943e-05", "step": 5201, "steps": "51.75s,5201/16595" }, { "epoch": 1.5673395601084663, "eta": "74:50:44", "grad_norm": 0.0054, "loss": 0.05, "lr": "3.943e-05", "step": 5202, "steps": "23.65s,5202/16595" }, { "epoch": 1.5676408556794215, "eta": "74:31:21", "grad_norm": 0.0059, "loss": 0.0504, "lr": "3.942e-05", "step": 5203, "steps": "23.55s,5203/16595" }, { "epoch": 1.5679421512503766, "eta": "74:34:45", "grad_norm": 0.0045, "loss": 0.0419, "lr": "3.942e-05", "step": 5204, "steps": "23.57s,5204/16595" }, { "epoch": 1.5682434468213318, "eta": "74:41:57", "grad_norm": 0.0049, "loss": 0.0457, "lr": "3.941e-05", "step": 5205, "steps": "23.61s,5205/16595" }, { "epoch": 1.568544742392287, "eta": "74:58:39", "grad_norm": 0.0044, "loss": 0.057, "lr": "3.941e-05", "step": 5206, "steps": "23.7s,5206/16595" }, { "epoch": 1.568846037963242, "eta": "73:38:32", "grad_norm": 0.0059, "loss": 0.0429, "lr": "3.941e-05", "step": 5207, "steps": "23.28s,5207/16595" }, { "epoch": 1.569147333534197, "eta": "75:09:15", "grad_norm": 0.0059, "loss": 0.0378, "lr": "3.940e-05", "step": 5208, "steps": "23.76s,5208/16595" }, { "epoch": 1.5694486291051521, "eta": "75:16:26", "grad_norm": 0.0048, "loss": 0.0396, "lr": "3.940e-05", "step": 5209, "steps": "23.8s,5209/16595" }, { "epoch": 1.5697499246761073, "eta": "74:55:10", "grad_norm": 0.0071, "loss": 0.0447, "lr": "3.939e-05", "step": 5210, "steps": "23.69s,5210/16595" }, { "epoch": 1.5700512202470622, "eta": "74:28:13", "grad_norm": 0.0054, "loss": 0.0543, "lr": "3.939e-05", "step": 5211, "steps": "23.55s,5211/16595" }, { "epoch": 1.5703525158180174, "eta": "75:15:15", "grad_norm": 0.0048, "loss": 0.0416, "lr": "3.939e-05", "step": 5212, "steps": "23.8s,5212/16595" }, { "epoch": 1.5706538113889725, "eta": "73:34:19", "grad_norm": 0.0049, "loss": 0.0564, "lr": "3.938e-05", "step": 5213, "steps": "23.27s,5213/16595" }, { "epoch": 1.5709551069599277, "eta": "75:23:56", "grad_norm": 0.0046, "loss": 0.0524, "lr": "3.938e-05", "step": 5214, "steps": "23.85s,5214/16595" }, { "epoch": 1.5712564025308828, "eta": "74:09:34", "grad_norm": 0.006, "loss": 0.0579, "lr": "3.938e-05", "step": 5215, "steps": "23.46s,5215/16595" }, { "epoch": 1.571557698101838, "eta": "76:01:04", "grad_norm": 0.0044, "loss": 0.0471, "lr": "3.937e-05", "step": 5216, "steps": "24.05s,5216/16595" }, { "epoch": 1.5718589936727931, "eta": "75:20:51", "grad_norm": 0.0051, "loss": 0.0422, "lr": "3.937e-05", "step": 5217, "steps": "23.84s,5217/16595" }, { "epoch": 1.572160289243748, "eta": "74:48:13", "grad_norm": 0.0061, "loss": 0.0789, "lr": "3.936e-05", "step": 5218, "steps": "23.67s,5218/16595" }, { "epoch": 1.5724615848147032, "eta": "75:03:00", "grad_norm": 0.0052, "loss": 0.0483, "lr": "3.936e-05", "step": 5219, "steps": "23.75s,5219/16595" }, { "epoch": 1.5727628803856584, "eta": "74:56:55", "grad_norm": 0.0061, "loss": 0.061, "lr": "3.936e-05", "step": 5220, "steps": "23.72s,5220/16595" }, { "epoch": 1.5730641759566133, "eta": "75:04:06", "grad_norm": 0.0049, "loss": 0.0514, "lr": "3.935e-05", "step": 5221, "steps": "23.76s,5221/16595" }, { "epoch": 1.5733654715275684, "eta": "75:07:29", "grad_norm": 0.0054, "loss": 0.0442, "lr": "3.935e-05", "step": 5222, "steps": "23.78s,5222/16595" }, { "epoch": 1.5736667670985236, "eta": "74:55:43", "grad_norm": 0.0057, "loss": 0.0576, "lr": "3.934e-05", "step": 5223, "steps": "23.72s,5223/16595" }, { "epoch": 1.5739680626694788, "eta": "75:19:58", "grad_norm": 0.0066, "loss": 0.0463, "lr": "3.934e-05", "step": 5224, "steps": "23.85s,5224/16595" }, { "epoch": 1.574269358240434, "eta": "75:02:31", "grad_norm": 0.0058, "loss": 0.0405, "lr": "3.934e-05", "step": 5225, "steps": "23.76s,5225/16595" }, { "epoch": 1.574570653811389, "eta": "74:50:45", "grad_norm": 0.0049, "loss": 0.0564, "lr": "3.933e-05", "step": 5226, "steps": "23.7s,5226/16595" }, { "epoch": 1.5748719493823442, "eta": "76:21:18", "grad_norm": 0.0056, "loss": 0.0442, "lr": "3.933e-05", "step": 5227, "steps": "24.18s,5227/16595" }, { "epoch": 1.5751732449532991, "eta": "73:49:20", "grad_norm": 0.0103, "loss": 0.045, "lr": "3.932e-05", "step": 5228, "steps": "23.38s,5228/16595" }, { "epoch": 1.5754745405242543, "eta": "75:21:46", "grad_norm": 0.0061, "loss": 0.0552, "lr": "3.932e-05", "step": 5229, "steps": "23.87s,5229/16595" }, { "epoch": 1.5757758360952094, "eta": "74:54:51", "grad_norm": 0.0069, "loss": 0.0493, "lr": "3.932e-05", "step": 5230, "steps": "23.73s,5230/16595" }, { "epoch": 1.5760771316661644, "eta": "74:37:24", "grad_norm": 0.0059, "loss": 0.042, "lr": "3.931e-05", "step": 5231, "steps": "23.64s,5231/16595" }, { "epoch": 1.5763784272371195, "eta": "74:37:01", "grad_norm": 0.0048, "loss": 0.0588, "lr": "3.931e-05", "step": 5232, "steps": "23.64s,5232/16595" }, { "epoch": 1.5766797228080747, "eta": "75:12:36", "grad_norm": 0.0049, "loss": 0.0413, "lr": "3.931e-05", "step": 5233, "steps": "23.83s,5233/16595" }, { "epoch": 1.5769810183790298, "eta": "75:17:53", "grad_norm": 0.0065, "loss": 0.0633, "lr": "3.930e-05", "step": 5234, "steps": "23.86s,5234/16595" }, { "epoch": 1.577282313949985, "eta": "74:26:22", "grad_norm": 0.0067, "loss": 0.0415, "lr": "3.930e-05", "step": 5235, "steps": "23.59s,5235/16595" }, { "epoch": 1.5775836095209401, "eta": "75:07:37", "grad_norm": 0.0042, "loss": 0.0469, "lr": "3.929e-05", "step": 5236, "steps": "23.81s,5236/16595" }, { "epoch": 1.5778849050918953, "eta": "74:42:37", "grad_norm": 0.0046, "loss": 0.0507, "lr": "3.929e-05", "step": 5237, "steps": "23.68s,5237/16595" }, { "epoch": 1.5781862006628502, "eta": "73:49:13", "grad_norm": 0.0049, "loss": 0.0566, "lr": "3.929e-05", "step": 5238, "steps": "23.4s,5238/16595" }, { "epoch": 1.5784874962338054, "eta": "75:55:38", "grad_norm": 0.0045, "loss": 0.0444, "lr": "3.928e-05", "step": 5239, "steps": "24.07s,5239/16595" }, { "epoch": 1.5787887918047605, "eta": "74:03:35", "grad_norm": 0.0043, "loss": 0.0605, "lr": "3.928e-05", "step": 5240, "steps": "23.48s,5240/16595" }, { "epoch": 1.5790900873757154, "eta": "74:20:13", "grad_norm": 0.0052, "loss": 0.0453, "lr": "3.927e-05", "step": 5241, "steps": "23.57s,5241/16595" }, { "epoch": 1.5793913829466706, "eta": "74:34:58", "grad_norm": 0.0044, "loss": 0.0541, "lr": "3.927e-05", "step": 5242, "steps": "23.65s,5242/16595" }, { "epoch": 1.5796926785176257, "eta": "74:27:00", "grad_norm": 0.0052, "loss": 0.0635, "lr": "3.927e-05", "step": 5243, "steps": "23.61s,5243/16595" }, { "epoch": 1.5799939740885809, "eta": "75:38:30", "grad_norm": 0.0042, "loss": 0.0525, "lr": "3.926e-05", "step": 5244, "steps": "23.99s,5244/16595" }, { "epoch": 1.580295269659536, "eta": "74:45:08", "grad_norm": 0.005, "loss": 0.0576, "lr": "3.926e-05", "step": 5245, "steps": "23.71s,5245/16595" }, { "epoch": 1.5805965652304912, "eta": "74:29:36", "grad_norm": 0.0038, "loss": 0.0529, "lr": "3.925e-05", "step": 5246, "steps": "23.63s,5246/16595" }, { "epoch": 1.5808978608014463, "eta": "73:57:04", "grad_norm": 0.0043, "loss": 0.0507, "lr": "3.925e-05", "step": 5247, "steps": "23.46s,5247/16595" }, { "epoch": 1.5811991563724013, "eta": "75:29:20", "grad_norm": 0.0038, "loss": 0.0545, "lr": "3.925e-05", "step": 5248, "steps": "23.95s,5248/16595" }, { "epoch": 1.5815004519433564, "eta": "74:53:00", "grad_norm": 0.0046, "loss": 0.0472, "lr": "3.924e-05", "step": 5249, "steps": "23.76s,5249/16595" }, { "epoch": 1.5818017475143116, "eta": "74:16:41", "grad_norm": 0.0055, "loss": 0.0434, "lr": "3.924e-05", "step": 5250, "steps": "23.57s,5250/16595" }, { "epoch": 1.5821030430852665, "eta": "74:57:53", "grad_norm": 0.0048, "loss": 0.0581, "lr": "3.923e-05", "step": 5251, "steps": "23.79s,5251/16595" }, { "epoch": 1.5824043386562217, "eta": "74:12:07", "grad_norm": 0.005, "loss": 0.0439, "lr": "3.923e-05", "step": 5252, "steps": "23.55s,5252/16595" }, { "epoch": 1.5827056342271768, "eta": "75:31:07", "grad_norm": 0.0046, "loss": 0.0558, "lr": "3.923e-05", "step": 5253, "steps": "23.97s,5253/16595" }, { "epoch": 1.583006929798132, "eta": "75:21:16", "grad_norm": 0.0044, "loss": 0.0519, "lr": "3.922e-05", "step": 5254, "steps": "23.92s,5254/16595" }, { "epoch": 1.583308225369087, "eta": "74:46:51", "grad_norm": 0.0056, "loss": 0.0576, "lr": "3.922e-05", "step": 5255, "steps": "23.74s,5255/16595" }, { "epoch": 1.5836095209400423, "eta": "74:48:21", "grad_norm": 0.0067, "loss": 0.0482, "lr": "3.921e-05", "step": 5256, "steps": "23.75s,5256/16595" }, { "epoch": 1.5839108165109974, "eta": "75:37:05", "grad_norm": 0.0046, "loss": 0.0454, "lr": "3.921e-05", "step": 5257, "steps": "24.01s,5257/16595" }, { "epoch": 1.5842121120819526, "eta": "74:49:27", "grad_norm": 0.0049, "loss": 0.0387, "lr": "3.921e-05", "step": 5258, "steps": "23.76s,5258/16595" }, { "epoch": 1.5845134076529075, "eta": "74:43:23", "grad_norm": 0.006, "loss": 0.055, "lr": "3.920e-05", "step": 5259, "steps": "23.73s,5259/16595" }, { "epoch": 1.5848147032238626, "eta": "74:05:12", "grad_norm": 0.0051, "loss": 0.0532, "lr": "3.920e-05", "step": 5260, "steps": "23.53s,5260/16595" }, { "epoch": 1.5851159987948176, "eta": "74:40:42", "grad_norm": 0.011, "loss": 0.0567, "lr": "3.920e-05", "step": 5261, "steps": "23.72s,5261/16595" }, { "epoch": 1.5854172943657727, "eta": "75:04:52", "grad_norm": 0.0052, "loss": 0.0553, "lr": "3.919e-05", "step": 5262, "steps": "23.85s,5262/16595" }, { "epoch": 1.5857185899367279, "eta": "73:37:35", "grad_norm": 0.0042, "loss": 0.0519, "lr": "3.919e-05", "step": 5263, "steps": "23.39s,5263/16595" }, { "epoch": 1.586019885507683, "eta": "74:52:44", "grad_norm": 0.0058, "loss": 0.0641, "lr": "3.918e-05", "step": 5264, "steps": "23.79s,5264/16595" }, { "epoch": 1.5863211810786382, "eta": "74:48:34", "grad_norm": 0.0052, "loss": 0.0406, "lr": "3.918e-05", "step": 5265, "steps": "23.77s,5265/16595" }, { "epoch": 1.5866224766495933, "eta": "75:25:56", "grad_norm": 0.0049, "loss": 0.0358, "lr": "3.918e-05", "step": 5266, "steps": "23.97s,5266/16595" }, { "epoch": 1.5869237722205485, "eta": "75:23:38", "grad_norm": 0.0051, "loss": 0.04, "lr": "3.917e-05", "step": 5267, "steps": "23.96s,5267/16595" }, { "epoch": 1.5872250677915036, "eta": "74:34:09", "grad_norm": 0.0048, "loss": 0.0458, "lr": "3.917e-05", "step": 5268, "steps": "23.7s,5268/16595" }, { "epoch": 1.5875263633624586, "eta": "75:00:11", "grad_norm": 0.0041, "loss": 0.0415, "lr": "3.916e-05", "step": 5269, "steps": "23.84s,5269/16595" }, { "epoch": 1.5878276589334137, "eta": "74:20:09", "grad_norm": 0.0044, "loss": 0.0509, "lr": "3.916e-05", "step": 5270, "steps": "23.63s,5270/16595" }, { "epoch": 1.5881289545043686, "eta": "74:08:26", "grad_norm": 0.0047, "loss": 0.0602, "lr": "3.916e-05", "step": 5271, "steps": "23.57s,5271/16595" }, { "epoch": 1.5884302500753238, "eta": "75:17:52", "grad_norm": 0.0054, "loss": 0.0384, "lr": "3.915e-05", "step": 5272, "steps": "23.94s,5272/16595" }, { "epoch": 1.588731545646279, "eta": "74:11:25", "grad_norm": 0.0056, "loss": 0.0431, "lr": "3.915e-05", "step": 5273, "steps": "23.59s,5273/16595" }, { "epoch": 1.589032841217234, "eta": "74:09:09", "grad_norm": 0.0051, "loss": 0.0594, "lr": "3.914e-05", "step": 5274, "steps": "23.58s,5274/16595" }, { "epoch": 1.5893341367881892, "eta": "73:59:19", "grad_norm": 0.0069, "loss": 0.0484, "lr": "3.914e-05", "step": 5275, "steps": "23.53s,5275/16595" }, { "epoch": 1.5896354323591444, "eta": "74:25:20", "grad_norm": 0.0063, "loss": 0.0488, "lr": "3.914e-05", "step": 5276, "steps": "23.67s,5276/16595" }, { "epoch": 1.5899367279300995, "eta": "76:04:55", "grad_norm": 0.0051, "loss": 0.0666, "lr": "3.913e-05", "step": 5277, "steps": "24.2s,5277/16595" }, { "epoch": 1.5902380235010547, "eta": "75:17:22", "grad_norm": 0.0052, "loss": 0.0516, "lr": "3.913e-05", "step": 5278, "steps": "23.95s,5278/16595" }, { "epoch": 1.5905393190720096, "eta": "74:07:11", "grad_norm": 0.0052, "loss": 0.0638, "lr": "3.912e-05", "step": 5279, "steps": "23.58s,5279/16595" }, { "epoch": 1.5908406146429648, "eta": "75:07:08", "grad_norm": 0.0051, "loss": 0.0626, "lr": "3.912e-05", "step": 5280, "steps": "23.9s,5280/16595" }, { "epoch": 1.5911419102139197, "eta": "75:18:03", "grad_norm": 0.0051, "loss": 0.0359, "lr": "3.912e-05", "step": 5281, "steps": "23.96s,5281/16595" }, { "epoch": 1.5914432057848749, "eta": "74:49:22", "grad_norm": 0.0064, "loss": 0.0348, "lr": "3.911e-05", "step": 5282, "steps": "23.81s,5282/16595" }, { "epoch": 1.59174450135583, "eta": "74:11:16", "grad_norm": 0.005, "loss": 0.0442, "lr": "3.911e-05", "step": 5283, "steps": "23.61s,5283/16595" }, { "epoch": 1.5920457969267852, "eta": "74:46:41", "grad_norm": 0.0052, "loss": 0.0446, "lr": "3.910e-05", "step": 5284, "steps": "23.8s,5284/16595" }, { "epoch": 1.5923470924977403, "eta": "73:32:47", "grad_norm": 0.005, "loss": 0.0561, "lr": "3.910e-05", "step": 5285, "steps": "23.41s,5285/16595" }, { "epoch": 1.5926483880686955, "eta": "74:10:05", "grad_norm": 0.0059, "loss": 0.0473, "lr": "3.910e-05", "step": 5286, "steps": "23.61s,5286/16595" }, { "epoch": 1.5929496836396506, "eta": "74:37:58", "grad_norm": 0.0059, "loss": 0.049, "lr": "3.909e-05", "step": 5287, "steps": "23.76s,5287/16595" }, { "epoch": 1.5932509792106058, "eta": "74:20:36", "grad_norm": 0.0044, "loss": 0.0394, "lr": "3.909e-05", "step": 5288, "steps": "23.67s,5288/16595" }, { "epoch": 1.5935522747815607, "eta": "73:48:11", "grad_norm": 0.0053, "loss": 0.0582, "lr": "3.908e-05", "step": 5289, "steps": "23.5s,5289/16595" }, { "epoch": 1.5938535703525158, "eta": "74:42:25", "grad_norm": 0.0055, "loss": 0.0479, "lr": "3.908e-05", "step": 5290, "steps": "23.79s,5290/16595" }, { "epoch": 1.5941548659234708, "eta": "75:17:49", "grad_norm": 0.0046, "loss": 0.0559, "lr": "3.908e-05", "step": 5291, "steps": "23.98s,5291/16595" }, { "epoch": 1.594456161494426, "eta": "75:02:21", "grad_norm": 0.0069, "loss": 0.0522, "lr": "3.907e-05", "step": 5292, "steps": "23.9s,5292/16595" }, { "epoch": 1.594757457065381, "eta": "74:50:39", "grad_norm": 0.0054, "loss": 0.0561, "lr": "3.907e-05", "step": 5293, "steps": "23.84s,5293/16595" }, { "epoch": 1.5950587526363362, "eta": "74:50:15", "grad_norm": 0.0051, "loss": 0.0555, "lr": "3.907e-05", "step": 5294, "steps": "23.84s,5294/16595" }, { "epoch": 1.5953600482072914, "eta": "73:45:50", "grad_norm": 0.007, "loss": 0.0621, "lr": "3.906e-05", "step": 5295, "steps": "23.5s,5295/16595" }, { "epoch": 1.5956613437782465, "eta": "73:56:44", "grad_norm": 0.0047, "loss": 0.0525, "lr": "3.906e-05", "step": 5296, "steps": "23.56s,5296/16595" }, { "epoch": 1.5959626393492017, "eta": "74:26:28", "grad_norm": 0.0056, "loss": 0.0527, "lr": "3.905e-05", "step": 5297, "steps": "23.72s,5297/16595" }, { "epoch": 1.5962639349201568, "eta": "74:33:36", "grad_norm": 0.0057, "loss": 0.0541, "lr": "3.905e-05", "step": 5298, "steps": "23.76s,5298/16595" }, { "epoch": 1.5965652304911118, "eta": "74:12:30", "grad_norm": 0.0052, "loss": 0.0709, "lr": "3.905e-05", "step": 5299, "steps": "23.65s,5299/16595" }, { "epoch": 1.596866526062067, "eta": "74:23:24", "grad_norm": 0.005, "loss": 0.0534, "lr": "3.904e-05", "step": 5300, "steps": "23.71s,5300/16595" }, { "epoch": 1.5971678216330218, "eta": "75:25:07", "grad_norm": 0.0052, "loss": 0.0558, "lr": "3.904e-05", "step": 5301, "steps": "24.04s,5301/16595" }, { "epoch": 1.597469117203977, "eta": "74:16:58", "grad_norm": 0.0074, "loss": 0.0506, "lr": "3.903e-05", "step": 5302, "steps": "23.68s,5302/16595" }, { "epoch": 1.5977704127749321, "eta": "73:59:38", "grad_norm": 0.0053, "loss": 0.0539, "lr": "3.903e-05", "step": 5303, "steps": "23.59s,5303/16595" }, { "epoch": 1.5980717083458873, "eta": "74:29:21", "grad_norm": 0.0054, "loss": 0.0459, "lr": "3.903e-05", "step": 5304, "steps": "23.75s,5304/16595" }, { "epoch": 1.5983730039168424, "eta": "74:30:50", "grad_norm": 0.0056, "loss": 0.0533, "lr": "3.902e-05", "step": 5305, "steps": "23.76s,5305/16595" }, { "epoch": 1.5986742994877976, "eta": "75:21:14", "grad_norm": 0.0049, "loss": 0.0622, "lr": "3.902e-05", "step": 5306, "steps": "24.03s,5306/16595" }, { "epoch": 1.5989755950587528, "eta": "73:56:11", "grad_norm": 0.0052, "loss": 0.0497, "lr": "3.901e-05", "step": 5307, "steps": "23.58s,5307/16595" }, { "epoch": 1.599276890629708, "eta": "73:42:37", "grad_norm": 0.0049, "loss": 0.0506, "lr": "3.901e-05", "step": 5308, "steps": "23.51s,5308/16595" }, { "epoch": 1.5995781862006628, "eta": "74:36:46", "grad_norm": 0.0051, "loss": 0.0511, "lr": "3.901e-05", "step": 5309, "steps": "23.8s,5309/16595" }, { "epoch": 1.599879481771618, "eta": "73:32:26", "grad_norm": 0.0052, "loss": 0.0292, "lr": "3.900e-05", "step": 5310, "steps": "23.46s,5310/16595" }, { "epoch": 1.600180777342573, "eta": "73:39:34", "grad_norm": 0.0051, "loss": 0.0506, "lr": "3.900e-05", "step": 5311, "steps": "23.5s,5311/16595" }, { "epoch": 1.600482072913528, "eta": "75:13:12", "grad_norm": 0.0043, "loss": 0.051, "lr": "3.899e-05", "step": 5312, "steps": "24.0s,5312/16595" }, { "epoch": 1.6007833684844832, "eta": "74:27:40", "grad_norm": 0.0053, "loss": 0.0522, "lr": "3.899e-05", "step": 5313, "steps": "23.76s,5313/16595" }, { "epoch": 1.6010846640554384, "eta": "73:53:25", "grad_norm": 0.0066, "loss": 0.044, "lr": "3.899e-05", "step": 5314, "steps": "23.58s,5314/16595" }, { "epoch": 1.6013859596263935, "eta": "73:47:24", "grad_norm": 0.0063, "loss": 0.0656, "lr": "3.898e-05", "step": 5315, "steps": "23.55s,5315/16595" }, { "epoch": 1.6016872551973487, "eta": "74:41:31", "grad_norm": 0.0051, "loss": 0.0493, "lr": "3.898e-05", "step": 5316, "steps": "23.84s,5316/16595" }, { "epoch": 1.6019885507683038, "eta": "74:29:50", "grad_norm": 0.0044, "loss": 0.055, "lr": "3.897e-05", "step": 5317, "steps": "23.78s,5317/16595" }, { "epoch": 1.602289846339259, "eta": "74:20:03", "grad_norm": 0.0049, "loss": 0.0537, "lr": "3.897e-05", "step": 5318, "steps": "23.73s,5318/16595" }, { "epoch": 1.602591141910214, "eta": "74:57:14", "grad_norm": 0.0049, "loss": 0.0553, "lr": "3.897e-05", "step": 5319, "steps": "23.93s,5319/16595" }, { "epoch": 1.602892437481169, "eta": "73:45:26", "grad_norm": 0.0053, "loss": 0.0516, "lr": "3.896e-05", "step": 5320, "steps": "23.55s,5320/16595" }, { "epoch": 1.603193733052124, "eta": "74:07:35", "grad_norm": 0.0049, "loss": 0.0732, "lr": "3.896e-05", "step": 5321, "steps": "23.67s,5321/16595" }, { "epoch": 1.6034950286230791, "eta": "74:59:48", "grad_norm": 0.0049, "loss": 0.0542, "lr": "3.895e-05", "step": 5322, "steps": "23.95s,5322/16595" }, { "epoch": 1.6037963241940343, "eta": "74:57:31", "grad_norm": 0.0059, "loss": 0.0503, "lr": "3.895e-05", "step": 5323, "steps": "23.94s,5323/16595" }, { "epoch": 1.6040976197649894, "eta": "73:02:32", "grad_norm": 0.0049, "loss": 0.0523, "lr": "3.895e-05", "step": 5324, "steps": "23.33s,5324/16595" }, { "epoch": 1.6043989153359446, "eta": "74:22:55", "grad_norm": 0.0068, "loss": 0.0585, "lr": "3.894e-05", "step": 5325, "steps": "23.76s,5325/16595" }, { "epoch": 1.6047002109068997, "eta": "73:01:45", "grad_norm": 0.0043, "loss": 0.0429, "lr": "3.894e-05", "step": 5326, "steps": "23.33s,5326/16595" }, { "epoch": 1.605001506477855, "eta": "74:05:13", "grad_norm": 0.0041, "loss": 0.0612, "lr": "3.893e-05", "step": 5327, "steps": "23.67s,5327/16595" }, { "epoch": 1.60530280204881, "eta": "73:38:32", "grad_norm": 0.0051, "loss": 0.0439, "lr": "3.893e-05", "step": 5328, "steps": "23.53s,5328/16595" }, { "epoch": 1.605604097619765, "eta": "73:21:15", "grad_norm": 0.0046, "loss": 0.0427, "lr": "3.893e-05", "step": 5329, "steps": "23.44s,5329/16595" }, { "epoch": 1.6059053931907201, "eta": "74:04:02", "grad_norm": 0.0048, "loss": 0.0516, "lr": "3.892e-05", "step": 5330, "steps": "23.67s,5330/16595" }, { "epoch": 1.606206688761675, "eta": "73:41:07", "grad_norm": 0.006, "loss": 0.0464, "lr": "3.892e-05", "step": 5331, "steps": "23.55s,5331/16595" }, { "epoch": 1.6065079843326302, "eta": "74:08:53", "grad_norm": 0.0045, "loss": 0.0531, "lr": "3.891e-05", "step": 5332, "steps": "23.7s,5332/16595" }, { "epoch": 1.6068092799035854, "eta": "73:15:56", "grad_norm": 0.0041, "loss": 0.0603, "lr": "3.891e-05", "step": 5333, "steps": "23.42s,5333/16595" }, { "epoch": 1.6071105754745405, "eta": "74:15:36", "grad_norm": 0.0043, "loss": 0.0468, "lr": "3.891e-05", "step": 5334, "steps": "23.74s,5334/16595" }, { "epoch": 1.6074118710454957, "eta": "73:45:10", "grad_norm": 0.0052, "loss": 0.0397, "lr": "3.890e-05", "step": 5335, "steps": "23.58s,5335/16595" }, { "epoch": 1.6077131666164508, "eta": "74:18:33", "grad_norm": 0.0055, "loss": 0.0508, "lr": "3.890e-05", "step": 5336, "steps": "23.76s,5336/16595" }, { "epoch": 1.608014462187406, "eta": "74:20:02", "grad_norm": 0.0053, "loss": 0.0393, "lr": "3.889e-05", "step": 5337, "steps": "23.77s,5337/16595" }, { "epoch": 1.608315757758361, "eta": "74:42:09", "grad_norm": 0.0049, "loss": 0.0668, "lr": "3.889e-05", "step": 5338, "steps": "23.89s,5338/16595" }, { "epoch": 1.608617053329316, "eta": "74:21:07", "grad_norm": 0.0049, "loss": 0.0445, "lr": "3.889e-05", "step": 5339, "steps": "23.78s,5339/16595" }, { "epoch": 1.6089183489002712, "eta": "73:11:19", "grad_norm": 0.005, "loss": 0.0451, "lr": "3.888e-05", "step": 5340, "steps": "23.41s,5340/16595" }, { "epoch": 1.6092196444712261, "eta": "73:52:12", "grad_norm": 0.0044, "loss": 0.0541, "lr": "3.888e-05", "step": 5341, "steps": "23.63s,5341/16595" }, { "epoch": 1.6095209400421813, "eta": "74:01:11", "grad_norm": 0.0057, "loss": 0.0539, "lr": "3.888e-05", "step": 5342, "steps": "23.68s,5342/16595" }, { "epoch": 1.6098222356131364, "eta": "74:43:55", "grad_norm": 0.0054, "loss": 0.0476, "lr": "3.887e-05", "step": 5343, "steps": "23.91s,5343/16595" }, { "epoch": 1.6101235311840916, "eta": "73:47:16", "grad_norm": 0.0045, "loss": 0.0557, "lr": "3.887e-05", "step": 5344, "steps": "23.61s,5344/16595" }, { "epoch": 1.6104248267550467, "eta": "74:33:45", "grad_norm": 0.0047, "loss": 0.0368, "lr": "3.886e-05", "step": 5345, "steps": "23.86s,5345/16595" }, { "epoch": 1.6107261223260019, "eta": "74:12:43", "grad_norm": 0.0042, "loss": 0.0592, "lr": "3.886e-05", "step": 5346, "steps": "23.75s,5346/16595" }, { "epoch": 1.611027417896957, "eta": "73:32:57", "grad_norm": 0.0046, "loss": 0.0508, "lr": "3.886e-05", "step": 5347, "steps": "23.54s,5347/16595" }, { "epoch": 1.6113287134679122, "eta": "73:32:34", "grad_norm": 0.0049, "loss": 0.0466, "lr": "3.885e-05", "step": 5348, "steps": "23.54s,5348/16595" }, { "epoch": 1.611630009038867, "eta": "72:52:49", "grad_norm": 0.0062, "loss": 0.0465, "lr": "3.885e-05", "step": 5349, "steps": "23.33s,5349/16595" }, { "epoch": 1.6119313046098223, "eta": "74:43:00", "grad_norm": 0.0052, "loss": 0.0583, "lr": "3.884e-05", "step": 5350, "steps": "23.92s,5350/16595" }, { "epoch": 1.6122326001807772, "eta": "72:48:17", "grad_norm": 0.0053, "loss": 0.0474, "lr": "3.884e-05", "step": 5351, "steps": "23.31s,5351/16595" }, { "epoch": 1.6125338957517323, "eta": "73:36:37", "grad_norm": 0.0049, "loss": 0.057, "lr": "3.884e-05", "step": 5352, "steps": "23.57s,5352/16595" }, { "epoch": 1.6128351913226875, "eta": "74:02:27", "grad_norm": 0.0074, "loss": 0.0507, "lr": "3.883e-05", "step": 5353, "steps": "23.71s,5353/16595" }, { "epoch": 1.6131364868936426, "eta": "74:41:24", "grad_norm": 0.0051, "loss": 0.0539, "lr": "3.883e-05", "step": 5354, "steps": "23.92s,5354/16595" }, { "epoch": 1.6134377824645978, "eta": "74:20:24", "grad_norm": 0.0042, "loss": 0.0565, "lr": "3.882e-05", "step": 5355, "steps": "23.81s,5355/16595" }, { "epoch": 1.613739078035553, "eta": "74:29:22", "grad_norm": 0.0051, "loss": 0.0532, "lr": "3.882e-05", "step": 5356, "steps": "23.86s,5356/16595" }, { "epoch": 1.614040373606508, "eta": "74:28:58", "grad_norm": 0.0047, "loss": 0.065, "lr": "3.882e-05", "step": 5357, "steps": "23.86s,5357/16595" }, { "epoch": 1.6143416691774632, "eta": "74:15:28", "grad_norm": 0.006, "loss": 0.0405, "lr": "3.881e-05", "step": 5358, "steps": "23.79s,5358/16595" }, { "epoch": 1.6146429647484182, "eta": "74:18:49", "grad_norm": 0.0038, "loss": 0.0548, "lr": "3.881e-05", "step": 5359, "steps": "23.81s,5359/16595" }, { "epoch": 1.6149442603193733, "eta": "74:09:03", "grad_norm": 0.0063, "loss": 0.0579, "lr": "3.880e-05", "step": 5360, "steps": "23.76s,5360/16595" }, { "epoch": 1.6152455558903283, "eta": "73:44:19", "grad_norm": 0.0056, "loss": 0.059, "lr": "3.880e-05", "step": 5361, "steps": "23.63s,5361/16595" }, { "epoch": 1.6155468514612834, "eta": "74:28:51", "grad_norm": 0.0097, "loss": 0.0569, "lr": "3.880e-05", "step": 5362, "steps": "23.87s,5362/16595" }, { "epoch": 1.6158481470322386, "eta": "73:41:39", "grad_norm": 0.0054, "loss": 0.0486, "lr": "3.879e-05", "step": 5363, "steps": "23.62s,5363/16595" }, { "epoch": 1.6161494426031937, "eta": "74:24:19", "grad_norm": 0.0049, "loss": 0.0411, "lr": "3.879e-05", "step": 5364, "steps": "23.85s,5364/16595" }, { "epoch": 1.6164507381741489, "eta": "73:44:37", "grad_norm": 0.0044, "loss": 0.0527, "lr": "3.878e-05", "step": 5365, "steps": "23.64s,5365/16595" }, { "epoch": 1.616752033745104, "eta": "73:32:59", "grad_norm": 0.0049, "loss": 0.0487, "lr": "3.878e-05", "step": 5366, "steps": "23.58s,5366/16595" }, { "epoch": 1.6170533293160592, "eta": "74:19:23", "grad_norm": 0.0054, "loss": 0.049, "lr": "3.878e-05", "step": 5367, "steps": "23.83s,5367/16595" }, { "epoch": 1.6173546248870143, "eta": "74:41:26", "grad_norm": 0.0057, "loss": 0.0499, "lr": "3.877e-05", "step": 5368, "steps": "23.95s,5368/16595" }, { "epoch": 1.6176559204579692, "eta": "74:12:58", "grad_norm": 0.0053, "loss": 0.0415, "lr": "3.877e-05", "step": 5369, "steps": "23.8s,5369/16595" }, { "epoch": 1.6179572160289244, "eta": "73:18:19", "grad_norm": 0.0058, "loss": 0.0429, "lr": "3.876e-05", "step": 5370, "steps": "23.51s,5370/16595" }, { "epoch": 1.6182585115998793, "eta": "74:06:34", "grad_norm": 0.0043, "loss": 0.0594, "lr": "3.876e-05", "step": 5371, "steps": "23.77s,5371/16595" }, { "epoch": 1.6185598071708345, "eta": "74:11:47", "grad_norm": 0.0055, "loss": 0.0447, "lr": "3.876e-05", "step": 5372, "steps": "23.8s,5372/16595" }, { "epoch": 1.6188611027417896, "eta": "74:07:39", "grad_norm": 0.0045, "loss": 0.0628, "lr": "3.875e-05", "step": 5373, "steps": "23.78s,5373/16595" }, { "epoch": 1.6191623983127448, "eta": "73:59:46", "grad_norm": 0.005, "loss": 0.0621, "lr": "3.875e-05", "step": 5374, "steps": "23.74s,5374/16595" }, { "epoch": 1.6194636938837, "eta": "74:19:57", "grad_norm": 0.005, "loss": 0.0421, "lr": "3.874e-05", "step": 5375, "steps": "23.85s,5375/16595" }, { "epoch": 1.619764989454655, "eta": "74:19:33", "grad_norm": 0.0057, "loss": 0.0426, "lr": "3.874e-05", "step": 5376, "steps": "23.85s,5376/16595" }, { "epoch": 1.6200662850256102, "eta": "73:38:01", "grad_norm": 0.0042, "loss": 0.0594, "lr": "3.874e-05", "step": 5377, "steps": "23.63s,5377/16595" }, { "epoch": 1.6203675805965654, "eta": "74:15:01", "grad_norm": 0.0055, "loss": 0.0489, "lr": "3.873e-05", "step": 5378, "steps": "23.83s,5378/16595" }, { "epoch": 1.6206688761675203, "eta": "74:37:03", "grad_norm": 0.005, "loss": 0.047, "lr": "3.873e-05", "step": 5379, "steps": "23.95s,5379/16595" }, { "epoch": 1.6209701717384755, "eta": "73:03:11", "grad_norm": 0.0058, "loss": 0.0519, "lr": "3.872e-05", "step": 5380, "steps": "23.45s,5380/16595" }, { "epoch": 1.6212714673094304, "eta": "73:25:13", "grad_norm": 0.0057, "loss": 0.0368, "lr": "3.872e-05", "step": 5381, "steps": "23.57s,5381/16595" }, { "epoch": 1.6215727628803855, "eta": "75:13:13", "grad_norm": 0.0056, "loss": 0.0453, "lr": "3.872e-05", "step": 5382, "steps": "24.15s,5382/16595" }, { "epoch": 1.6218740584513407, "eta": "73:11:22", "grad_norm": 0.0054, "loss": 0.0471, "lr": "3.871e-05", "step": 5383, "steps": "23.5s,5383/16595" }, { "epoch": 1.6221753540222958, "eta": "74:01:25", "grad_norm": 0.0073, "loss": 0.0534, "lr": "3.871e-05", "step": 5384, "steps": "23.77s,5384/16595" }, { "epoch": 1.622476649593251, "eta": "73:59:09", "grad_norm": 0.0058, "loss": 0.0598, "lr": "3.870e-05", "step": 5385, "steps": "23.76s,5385/16595" }, { "epoch": 1.6227779451642061, "eta": "73:51:17", "grad_norm": 0.0045, "loss": 0.0365, "lr": "3.870e-05", "step": 5386, "steps": "23.72s,5386/16595" }, { "epoch": 1.6230792407351613, "eta": "73:15:24", "grad_norm": 0.0056, "loss": 0.0721, "lr": "3.870e-05", "step": 5387, "steps": "23.53s,5387/16595" }, { "epoch": 1.6233805363061165, "eta": "72:54:27", "grad_norm": 0.005, "loss": 0.051, "lr": "3.869e-05", "step": 5388, "steps": "23.42s,5388/16595" }, { "epoch": 1.6236818318770714, "eta": "73:40:46", "grad_norm": 0.0049, "loss": 0.0428, "lr": "3.869e-05", "step": 5389, "steps": "23.67s,5389/16595" }, { "epoch": 1.6239831274480265, "eta": "74:04:39", "grad_norm": 0.005, "loss": 0.0581, "lr": "3.868e-05", "step": 5390, "steps": "23.8s,5390/16595" }, { "epoch": 1.6242844230189815, "eta": "73:41:50", "grad_norm": 0.005, "loss": 0.0417, "lr": "3.868e-05", "step": 5391, "steps": "23.68s,5391/16595" }, { "epoch": 1.6245857185899366, "eta": "74:31:51", "grad_norm": 0.0048, "loss": 0.0514, "lr": "3.868e-05", "step": 5392, "steps": "23.95s,5392/16595" }, { "epoch": 1.6248870141608918, "eta": "73:50:23", "grad_norm": 0.0049, "loss": 0.0464, "lr": "3.867e-05", "step": 5393, "steps": "23.73s,5393/16595" }, { "epoch": 1.625188309731847, "eta": "73:40:39", "grad_norm": 0.0049, "loss": 0.0462, "lr": "3.867e-05", "step": 5394, "steps": "23.68s,5394/16595" }, { "epoch": 1.625489605302802, "eta": "73:12:16", "grad_norm": 0.005, "loss": 0.0578, "lr": "3.866e-05", "step": 5395, "steps": "23.53s,5395/16595" }, { "epoch": 1.6257909008737572, "eta": "74:02:16", "grad_norm": 0.0063, "loss": 0.0566, "lr": "3.866e-05", "step": 5396, "steps": "23.8s,5396/16595" }, { "epoch": 1.6260921964447124, "eta": "73:46:56", "grad_norm": 0.0055, "loss": 0.0536, "lr": "3.866e-05", "step": 5397, "steps": "23.72s,5397/16595" }, { "epoch": 1.6263934920156675, "eta": "73:42:48", "grad_norm": 0.0059, "loss": 0.0522, "lr": "3.865e-05", "step": 5398, "steps": "23.7s,5398/16595" }, { "epoch": 1.6266947875866224, "eta": "74:10:24", "grad_norm": 0.0045, "loss": 0.0598, "lr": "3.865e-05", "step": 5399, "steps": "23.85s,5399/16595" }, { "epoch": 1.6269960831575776, "eta": "73:49:29", "grad_norm": 0.0151, "loss": 0.0395, "lr": "3.864e-05", "step": 5400, "steps": "23.74s,5400/16595" }, { "epoch": 1.6272973787285325, "eta": "153:21:28", "grad_norm": 0.0051, "loss": 0.0653, "lr": "3.864e-05", "step": 5401, "steps": "49.32s,5401/16595" }, { "epoch": 1.6275986742994877, "eta": "72:54:35", "grad_norm": 0.0048, "loss": 0.0488, "lr": "3.864e-05", "step": 5402, "steps": "23.45s,5402/16595" }, { "epoch": 1.6278999698704428, "eta": "73:46:26", "grad_norm": 0.0053, "loss": 0.0563, "lr": "3.863e-05", "step": 5403, "steps": "23.73s,5403/16595" }, { "epoch": 1.628201265441398, "eta": "72:46:21", "grad_norm": 0.0046, "loss": 0.0498, "lr": "3.863e-05", "step": 5404, "steps": "23.41s,5404/16595" }, { "epoch": 1.6285025610123531, "eta": "73:19:32", "grad_norm": 0.0066, "loss": 0.0477, "lr": "3.862e-05", "step": 5405, "steps": "23.59s,5405/16595" }, { "epoch": 1.6288038565833083, "eta": "72:56:45", "grad_norm": 0.0056, "loss": 0.0513, "lr": "3.862e-05", "step": 5406, "steps": "23.47s,5406/16595" }, { "epoch": 1.6291051521542634, "eta": "73:44:51", "grad_norm": 0.0053, "loss": 0.052, "lr": "3.862e-05", "step": 5407, "steps": "23.73s,5407/16595" }, { "epoch": 1.6294064477252186, "eta": "73:44:27", "grad_norm": 0.0041, "loss": 0.0423, "lr": "3.861e-05", "step": 5408, "steps": "23.73s,5408/16595" }, { "epoch": 1.6297077432961735, "eta": "73:42:11", "grad_norm": 0.0053, "loss": 0.0509, "lr": "3.861e-05", "step": 5409, "steps": "23.72s,5409/16595" }, { "epoch": 1.6300090388671287, "eta": "73:39:56", "grad_norm": 0.0046, "loss": 0.0579, "lr": "3.860e-05", "step": 5410, "steps": "23.71s,5410/16595" }, { "epoch": 1.6303103344380836, "eta": "74:14:57", "grad_norm": 0.006, "loss": 0.0594, "lr": "3.860e-05", "step": 5411, "steps": "23.9s,5411/16595" }, { "epoch": 1.6306116300090387, "eta": "73:46:36", "grad_norm": 0.0041, "loss": 0.0442, "lr": "3.860e-05", "step": 5412, "steps": "23.75s,5412/16595" }, { "epoch": 1.630912925579994, "eta": "73:20:07", "grad_norm": 0.0056, "loss": 0.0415, "lr": "3.859e-05", "step": 5413, "steps": "23.61s,5413/16595" }, { "epoch": 1.631214221150949, "eta": "74:10:02", "grad_norm": 0.0054, "loss": 0.0525, "lr": "3.859e-05", "step": 5414, "steps": "23.88s,5414/16595" }, { "epoch": 1.6315155167219042, "eta": "73:32:22", "grad_norm": 0.0046, "loss": 0.0525, "lr": "3.858e-05", "step": 5415, "steps": "23.68s,5415/16595" }, { "epoch": 1.6318168122928594, "eta": "74:09:14", "grad_norm": 0.0045, "loss": 0.0466, "lr": "3.858e-05", "step": 5416, "steps": "23.88s,5416/16595" }, { "epoch": 1.6321181078638145, "eta": "74:03:15", "grad_norm": 0.0058, "loss": 0.055, "lr": "3.858e-05", "step": 5417, "steps": "23.85s,5417/16595" }, { "epoch": 1.6324194034347697, "eta": "73:53:32", "grad_norm": 0.0043, "loss": 0.0369, "lr": "3.857e-05", "step": 5418, "steps": "23.8s,5418/16595" }, { "epoch": 1.6327206990057246, "eta": "73:15:53", "grad_norm": 0.0052, "loss": 0.0303, "lr": "3.857e-05", "step": 5419, "steps": "23.6s,5419/16595" }, { "epoch": 1.6330219945766797, "eta": "72:58:44", "grad_norm": 0.005, "loss": 0.0561, "lr": "3.856e-05", "step": 5420, "steps": "23.51s,5420/16595" }, { "epoch": 1.6333232901476347, "eta": "74:10:58", "grad_norm": 0.0062, "loss": 0.049, "lr": "3.856e-05", "step": 5421, "steps": "23.9s,5421/16595" }, { "epoch": 1.6336245857185898, "eta": "74:25:28", "grad_norm": 0.0049, "loss": 0.0569, "lr": "3.856e-05", "step": 5422, "steps": "23.98s,5422/16595" }, { "epoch": 1.633925881289545, "eta": "73:03:08", "grad_norm": 0.0059, "loss": 0.0442, "lr": "3.855e-05", "step": 5423, "steps": "23.54s,5423/16595" }, { "epoch": 1.6342271768605001, "eta": "74:22:48", "grad_norm": 0.0052, "loss": 0.0642, "lr": "3.855e-05", "step": 5424, "steps": "23.97s,5424/16595" }, { "epoch": 1.6345284724314553, "eta": "72:04:39", "grad_norm": 0.0068, "loss": 0.0395, "lr": "3.854e-05", "step": 5425, "steps": "23.23s,5425/16595" }, { "epoch": 1.6348297680024104, "eta": "74:12:42", "grad_norm": 0.0048, "loss": 0.0712, "lr": "3.854e-05", "step": 5426, "steps": "23.92s,5426/16595" }, { "epoch": 1.6351310635733656, "eta": "73:27:38", "grad_norm": 0.0042, "loss": 0.0462, "lr": "3.854e-05", "step": 5427, "steps": "23.68s,5427/16595" }, { "epoch": 1.6354323591443207, "eta": "73:04:54", "grad_norm": 0.0048, "loss": 0.0585, "lr": "3.853e-05", "step": 5428, "steps": "23.56s,5428/16595" }, { "epoch": 1.6357336547152757, "eta": "74:07:47", "grad_norm": 0.0064, "loss": 0.0479, "lr": "3.853e-05", "step": 5429, "steps": "23.9s,5429/16595" }, { "epoch": 1.6360349502862308, "eta": "73:48:47", "grad_norm": 0.0045, "loss": 0.0458, "lr": "3.852e-05", "step": 5430, "steps": "23.8s,5430/16595" }, { "epoch": 1.6363362458571857, "eta": "72:35:49", "grad_norm": 0.0043, "loss": 0.056, "lr": "3.852e-05", "step": 5431, "steps": "23.41s,5431/16595" }, { "epoch": 1.6366375414281409, "eta": "73:10:46", "grad_norm": 0.0046, "loss": 0.044, "lr": "3.852e-05", "step": 5432, "steps": "23.6s,5432/16595" }, { "epoch": 1.636938836999096, "eta": "73:43:52", "grad_norm": 0.0048, "loss": 0.0476, "lr": "3.851e-05", "step": 5433, "steps": "23.78s,5433/16595" }, { "epoch": 1.6372401325700512, "eta": "73:45:20", "grad_norm": 0.0054, "loss": 0.0395, "lr": "3.851e-05", "step": 5434, "steps": "23.79s,5434/16595" }, { "epoch": 1.6375414281410063, "eta": "74:18:25", "grad_norm": 0.0046, "loss": 0.0488, "lr": "3.850e-05", "step": 5435, "steps": "23.97s,5435/16595" }, { "epoch": 1.6378427237119615, "eta": "72:56:11", "grad_norm": 0.0074, "loss": 0.0408, "lr": "3.850e-05", "step": 5436, "steps": "23.53s,5436/16595" }, { "epoch": 1.6381440192829166, "eta": "74:00:53", "grad_norm": 0.0052, "loss": 0.0564, "lr": "3.850e-05", "step": 5437, "steps": "23.88s,5437/16595" }, { "epoch": 1.6384453148538718, "eta": "73:41:53", "grad_norm": 0.0044, "loss": 0.0486, "lr": "3.849e-05", "step": 5438, "steps": "23.78s,5438/16595" }, { "epoch": 1.6387466104248267, "eta": "73:28:28", "grad_norm": 0.0038, "loss": 0.0424, "lr": "3.849e-05", "step": 5439, "steps": "23.71s,5439/16595" }, { "epoch": 1.6390479059957819, "eta": "73:50:23", "grad_norm": 0.0049, "loss": 0.0511, "lr": "3.848e-05", "step": 5440, "steps": "23.83s,5440/16595" }, { "epoch": 1.639349201566737, "eta": "74:06:43", "grad_norm": 0.0058, "loss": 0.0482, "lr": "3.848e-05", "step": 5441, "steps": "23.92s,5441/16595" }, { "epoch": 1.639650497137692, "eta": "73:49:35", "grad_norm": 0.0055, "loss": 0.0495, "lr": "3.848e-05", "step": 5442, "steps": "23.83s,5442/16595" }, { "epoch": 1.639951792708647, "eta": "73:38:03", "grad_norm": 0.0049, "loss": 0.0418, "lr": "3.847e-05", "step": 5443, "steps": "23.77s,5443/16595" }, { "epoch": 1.6402530882796023, "eta": "74:55:42", "grad_norm": 0.0046, "loss": 0.0557, "lr": "3.847e-05", "step": 5444, "steps": "24.19s,5444/16595" }, { "epoch": 1.6405543838505574, "eta": "73:29:49", "grad_norm": 0.0052, "loss": 0.052, "lr": "3.846e-05", "step": 5445, "steps": "23.73s,5445/16595" }, { "epoch": 1.6408556794215126, "eta": "73:20:08", "grad_norm": 0.0053, "loss": 0.0531, "lr": "3.846e-05", "step": 5446, "steps": "23.68s,5446/16595" }, { "epoch": 1.6411569749924677, "eta": "73:45:45", "grad_norm": 0.0054, "loss": 0.0405, "lr": "3.846e-05", "step": 5447, "steps": "23.82s,5447/16595" }, { "epoch": 1.6414582705634229, "eta": "73:11:55", "grad_norm": 0.0052, "loss": 0.0443, "lr": "3.845e-05", "step": 5448, "steps": "23.64s,5448/16595" }, { "epoch": 1.6417595661343778, "eta": "74:16:32", "grad_norm": 0.0058, "loss": 0.0565, "lr": "3.845e-05", "step": 5449, "steps": "23.99s,5449/16595" }, { "epoch": 1.642060861705333, "eta": "72:35:50", "grad_norm": 0.0067, "loss": 0.059, "lr": "3.844e-05", "step": 5450, "steps": "23.45s,5450/16595" }, { "epoch": 1.642362157276288, "eta": "73:31:10", "grad_norm": 0.0049, "loss": 0.0611, "lr": "3.844e-05", "step": 5451, "steps": "23.75s,5451/16595" }, { "epoch": 1.642663452847243, "eta": "73:06:37", "grad_norm": 0.0046, "loss": 0.0549, "lr": "3.844e-05", "step": 5452, "steps": "23.62s,5452/16595" }, { "epoch": 1.6429647484181982, "eta": "73:39:39", "grad_norm": 0.0046, "loss": 0.0469, "lr": "3.843e-05", "step": 5453, "steps": "23.8s,5453/16595" }, { "epoch": 1.6432660439891533, "eta": "73:18:50", "grad_norm": 0.0054, "loss": 0.0379, "lr": "3.843e-05", "step": 5454, "steps": "23.69s,5454/16595" }, { "epoch": 1.6435673395601085, "eta": "73:57:26", "grad_norm": 0.0054, "loss": 0.0408, "lr": "3.842e-05", "step": 5455, "steps": "23.9s,5455/16595" }, { "epoch": 1.6438686351310636, "eta": "73:31:02", "grad_norm": 0.0056, "loss": 0.0533, "lr": "3.842e-05", "step": 5456, "steps": "23.76s,5456/16595" }, { "epoch": 1.6441699307020188, "eta": "73:26:56", "grad_norm": 0.0048, "loss": 0.0537, "lr": "3.842e-05", "step": 5457, "steps": "23.74s,5457/16595" }, { "epoch": 1.644471226272974, "eta": "73:09:50", "grad_norm": 0.0045, "loss": 0.0478, "lr": "3.841e-05", "step": 5458, "steps": "23.65s,5458/16595" }, { "epoch": 1.6447725218439289, "eta": "73:28:00", "grad_norm": 0.0054, "loss": 0.0649, "lr": "3.841e-05", "step": 5459, "steps": "23.75s,5459/16595" }, { "epoch": 1.645073817414884, "eta": "74:15:51", "grad_norm": 0.0046, "loss": 0.0559, "lr": "3.840e-05", "step": 5460, "steps": "24.01s,5460/16595" }, { "epoch": 1.6453751129858392, "eta": "72:57:31", "grad_norm": 0.005, "loss": 0.0478, "lr": "3.840e-05", "step": 5461, "steps": "23.59s,5461/16595" }, { "epoch": 1.645676408556794, "eta": "73:15:40", "grad_norm": 0.009, "loss": 0.0484, "lr": "3.839e-05", "step": 5462, "steps": "23.69s,5462/16595" }, { "epoch": 1.6459777041277492, "eta": "73:41:15", "grad_norm": 0.0072, "loss": 0.0477, "lr": "3.839e-05", "step": 5463, "steps": "23.83s,5463/16595" }, { "epoch": 1.6462789996987044, "eta": "72:50:46", "grad_norm": 0.0059, "loss": 0.0632, "lr": "3.839e-05", "step": 5464, "steps": "23.56s,5464/16595" }, { "epoch": 1.6465802952696595, "eta": "72:39:15", "grad_norm": 0.0053, "loss": 0.0589, "lr": "3.838e-05", "step": 5465, "steps": "23.5s,5465/16595" }, { "epoch": 1.6468815908406147, "eta": "72:57:24", "grad_norm": 0.0058, "loss": 0.0515, "lr": "3.838e-05", "step": 5466, "steps": "23.6s,5466/16595" }, { "epoch": 1.6471828864115698, "eta": "73:30:23", "grad_norm": 0.005, "loss": 0.0299, "lr": "3.837e-05", "step": 5467, "steps": "23.78s,5467/16595" }, { "epoch": 1.647484181982525, "eta": "73:05:53", "grad_norm": 0.0054, "loss": 0.0412, "lr": "3.837e-05", "step": 5468, "steps": "23.65s,5468/16595" }, { "epoch": 1.64778547755348, "eta": "73:48:08", "grad_norm": 0.0043, "loss": 0.0585, "lr": "3.837e-05", "step": 5469, "steps": "23.88s,5469/16595" }, { "epoch": 1.648086773124435, "eta": "73:47:45", "grad_norm": 0.0052, "loss": 0.0453, "lr": "3.836e-05", "step": 5470, "steps": "23.88s,5470/16595" }, { "epoch": 1.6483880686953902, "eta": "73:52:54", "grad_norm": 0.0051, "loss": 0.0579, "lr": "3.836e-05", "step": 5471, "steps": "23.91s,5471/16595" }, { "epoch": 1.6486893642663452, "eta": "73:33:58", "grad_norm": 0.0054, "loss": 0.06, "lr": "3.835e-05", "step": 5472, "steps": "23.81s,5472/16595" }, { "epoch": 1.6489906598373003, "eta": "73:11:20", "grad_norm": 0.0062, "loss": 0.0485, "lr": "3.835e-05", "step": 5473, "steps": "23.69s,5473/16595" }, { "epoch": 1.6492919554082555, "eta": "73:23:54", "grad_norm": 0.0054, "loss": 0.0548, "lr": "3.835e-05", "step": 5474, "steps": "23.76s,5474/16595" }, { "epoch": 1.6495932509792106, "eta": "72:24:12", "grad_norm": 0.0058, "loss": 0.051, "lr": "3.834e-05", "step": 5475, "steps": "23.44s,5475/16595" }, { "epoch": 1.6498945465501658, "eta": "73:06:26", "grad_norm": 0.0054, "loss": 0.0407, "lr": "3.834e-05", "step": 5476, "steps": "23.67s,5476/16595" }, { "epoch": 1.650195842121121, "eta": "73:15:18", "grad_norm": 0.0067, "loss": 0.042, "lr": "3.833e-05", "step": 5477, "steps": "23.72s,5477/16595" }, { "epoch": 1.650497137692076, "eta": "73:09:21", "grad_norm": 0.0065, "loss": 0.0561, "lr": "3.833e-05", "step": 5478, "steps": "23.69s,5478/16595" }, { "epoch": 1.650798433263031, "eta": "72:33:46", "grad_norm": 0.0056, "loss": 0.0609, "lr": "3.833e-05", "step": 5479, "steps": "23.5s,5479/16595" }, { "epoch": 1.6510997288339861, "eta": "72:33:22", "grad_norm": 0.0052, "loss": 0.0453, "lr": "3.832e-05", "step": 5480, "steps": "23.5s,5480/16595" }, { "epoch": 1.6514010244049413, "eta": "73:04:28", "grad_norm": 0.0051, "loss": 0.0547, "lr": "3.832e-05", "step": 5481, "steps": "23.67s,5481/16595" }, { "epoch": 1.6517023199758962, "eta": "73:44:49", "grad_norm": 0.0056, "loss": 0.059, "lr": "3.831e-05", "step": 5482, "steps": "23.89s,5482/16595" }, { "epoch": 1.6520036155468514, "eta": "73:48:07", "grad_norm": 0.005, "loss": 0.0496, "lr": "3.831e-05", "step": 5483, "steps": "23.91s,5483/16595" }, { "epoch": 1.6523049111178065, "eta": "73:53:17", "grad_norm": 0.0054, "loss": 0.0629, "lr": "3.831e-05", "step": 5484, "steps": "23.94s,5484/16595" }, { "epoch": 1.6526062066887617, "eta": "72:40:40", "grad_norm": 0.0053, "loss": 0.0483, "lr": "3.830e-05", "step": 5485, "steps": "23.55s,5485/16595" }, { "epoch": 1.6529075022597168, "eta": "73:26:34", "grad_norm": 0.0049, "loss": 0.0571, "lr": "3.830e-05", "step": 5486, "steps": "23.8s,5486/16595" }, { "epoch": 1.653208797830672, "eta": "72:23:13", "grad_norm": 0.0093, "loss": 0.0554, "lr": "3.829e-05", "step": 5487, "steps": "23.46s,5487/16595" }, { "epoch": 1.6535100934016271, "eta": "73:09:06", "grad_norm": 0.0057, "loss": 0.0499, "lr": "3.829e-05", "step": 5488, "steps": "23.71s,5488/16595" }, { "epoch": 1.653811388972582, "eta": "73:21:40", "grad_norm": 0.0059, "loss": 0.0554, "lr": "3.829e-05", "step": 5489, "steps": "23.78s,5489/16595" }, { "epoch": 1.6541126845435372, "eta": "73:52:44", "grad_norm": 0.0045, "loss": 0.0532, "lr": "3.828e-05", "step": 5490, "steps": "23.95s,5490/16595" }, { "epoch": 1.6544139801144924, "eta": "72:29:04", "grad_norm": 0.0044, "loss": 0.0599, "lr": "3.828e-05", "step": 5491, "steps": "23.5s,5491/16595" }, { "epoch": 1.6547152756854473, "eta": "73:37:08", "grad_norm": 0.0054, "loss": 0.0563, "lr": "3.827e-05", "step": 5492, "steps": "23.87s,5492/16595" }, { "epoch": 1.6550165712564024, "eta": "73:12:41", "grad_norm": 0.0049, "loss": 0.0491, "lr": "3.827e-05", "step": 5493, "steps": "23.74s,5493/16595" }, { "epoch": 1.6553178668273576, "eta": "74:13:21", "grad_norm": 0.0057, "loss": 0.0633, "lr": "3.827e-05", "step": 5494, "steps": "24.07s,5494/16595" }, { "epoch": 1.6556191623983127, "eta": "73:04:30", "grad_norm": 0.0084, "loss": 0.0456, "lr": "3.826e-05", "step": 5495, "steps": "23.7s,5495/16595" }, { "epoch": 1.655920457969268, "eta": "72:53:00", "grad_norm": 0.0047, "loss": 0.0576, "lr": "3.826e-05", "step": 5496, "steps": "23.64s,5496/16595" }, { "epoch": 1.656221753540223, "eta": "73:07:24", "grad_norm": 0.0051, "loss": 0.0567, "lr": "3.825e-05", "step": 5497, "steps": "23.72s,5497/16595" }, { "epoch": 1.6565230491111782, "eta": "73:08:51", "grad_norm": 0.0049, "loss": 0.0572, "lr": "3.825e-05", "step": 5498, "steps": "23.73s,5498/16595" }, { "epoch": 1.6568243446821331, "eta": "72:44:25", "grad_norm": 0.0063, "loss": 0.0655, "lr": "3.825e-05", "step": 5499, "steps": "23.6s,5499/16595" }, { "epoch": 1.6571256402530883, "eta": "72:51:25", "grad_norm": 0.006, "loss": 0.0501, "lr": "3.824e-05", "step": 5500, "steps": "23.64s,5500/16595" }, { "epoch": 1.6574269358240434, "eta": "72:15:54", "grad_norm": 0.0052, "loss": 0.042, "lr": "3.824e-05", "step": 5501, "steps": "23.45s,5501/16595" }, { "epoch": 1.6577282313949984, "eta": "72:59:53", "grad_norm": 0.0055, "loss": 0.0544, "lr": "3.823e-05", "step": 5502, "steps": "23.69s,5502/16595" }, { "epoch": 1.6580295269659535, "eta": "72:44:42", "grad_norm": 0.005, "loss": 0.0458, "lr": "3.823e-05", "step": 5503, "steps": "23.61s,5503/16595" }, { "epoch": 1.6583308225369087, "eta": "73:34:13", "grad_norm": 0.0069, "loss": 0.0497, "lr": "3.823e-05", "step": 5504, "steps": "23.88s,5504/16595" }, { "epoch": 1.6586321181078638, "eta": "73:09:47", "grad_norm": 0.005, "loss": 0.0478, "lr": "3.822e-05", "step": 5505, "steps": "23.75s,5505/16595" }, { "epoch": 1.658933413678819, "eta": "73:11:14", "grad_norm": 0.0053, "loss": 0.0447, "lr": "3.822e-05", "step": 5506, "steps": "23.76s,5506/16595" }, { "epoch": 1.6592347092497741, "eta": "73:40:24", "grad_norm": 0.0054, "loss": 0.0395, "lr": "3.821e-05", "step": 5507, "steps": "23.92s,5507/16595" }, { "epoch": 1.6595360048207293, "eta": "74:40:59", "grad_norm": 0.0054, "loss": 0.0632, "lr": "3.821e-05", "step": 5508, "steps": "24.25s,5508/16595" }, { "epoch": 1.6598373003916842, "eta": "73:19:17", "grad_norm": 0.0044, "loss": 0.0561, "lr": "3.820e-05", "step": 5509, "steps": "23.81s,5509/16595" }, { "epoch": 1.6601385959626394, "eta": "73:20:44", "grad_norm": 0.0074, "loss": 0.0459, "lr": "3.820e-05", "step": 5510, "steps": "23.82s,5510/16595" }, { "epoch": 1.6604398915335945, "eta": "72:36:00", "grad_norm": 0.0089, "loss": 0.0424, "lr": "3.820e-05", "step": 5511, "steps": "23.58s,5511/16595" }, { "epoch": 1.6607411871045494, "eta": "74:02:26", "grad_norm": 0.006, "loss": 0.0482, "lr": "3.819e-05", "step": 5512, "steps": "24.05s,5512/16595" }, { "epoch": 1.6610424826755046, "eta": "73:08:28", "grad_norm": 0.0056, "loss": 0.063, "lr": "3.819e-05", "step": 5513, "steps": "23.76s,5513/16595" }, { "epoch": 1.6613437782464597, "eta": "72:27:26", "grad_norm": 0.0054, "loss": 0.0626, "lr": "3.818e-05", "step": 5514, "steps": "23.54s,5514/16595" }, { "epoch": 1.6616450738174149, "eta": "72:38:08", "grad_norm": 0.0054, "loss": 0.0504, "lr": "3.818e-05", "step": 5515, "steps": "23.6s,5515/16595" }, { "epoch": 1.66194636938837, "eta": "72:35:53", "grad_norm": 0.0051, "loss": 0.0596, "lr": "3.818e-05", "step": 5516, "steps": "23.59s,5516/16595" }, { "epoch": 1.6622476649593252, "eta": "72:35:30", "grad_norm": 0.0052, "loss": 0.0398, "lr": "3.817e-05", "step": 5517, "steps": "23.59s,5517/16595" }, { "epoch": 1.6625489605302803, "eta": "72:59:06", "grad_norm": 0.0048, "loss": 0.0501, "lr": "3.817e-05", "step": 5518, "steps": "23.72s,5518/16595" }, { "epoch": 1.6628502561012353, "eta": "72:31:01", "grad_norm": 0.0045, "loss": 0.0522, "lr": "3.816e-05", "step": 5519, "steps": "23.57s,5519/16595" }, { "epoch": 1.6631515516721904, "eta": "72:41:42", "grad_norm": 0.0049, "loss": 0.038, "lr": "3.816e-05", "step": 5520, "steps": "23.63s,5520/16595" }, { "epoch": 1.6634528472431456, "eta": "72:43:09", "grad_norm": 0.0108, "loss": 0.0613, "lr": "3.816e-05", "step": 5521, "steps": "23.64s,5521/16595" }, { "epoch": 1.6637541428141005, "eta": "73:32:35", "grad_norm": 0.007, "loss": 0.0385, "lr": "3.815e-05", "step": 5522, "steps": "23.91s,5522/16595" }, { "epoch": 1.6640554383850557, "eta": "72:58:58", "grad_norm": 0.0044, "loss": 0.034, "lr": "3.815e-05", "step": 5523, "steps": "23.73s,5523/16595" }, { "epoch": 1.6643567339560108, "eta": "72:56:44", "grad_norm": 0.0069, "loss": 0.0607, "lr": "3.814e-05", "step": 5524, "steps": "23.72s,5524/16595" }, { "epoch": 1.664658029526966, "eta": "72:41:34", "grad_norm": 0.0051, "loss": 0.0707, "lr": "3.814e-05", "step": 5525, "steps": "23.64s,5525/16595" }, { "epoch": 1.664959325097921, "eta": "72:28:16", "grad_norm": 0.0054, "loss": 0.0512, "lr": "3.814e-05", "step": 5526, "steps": "23.57s,5526/16595" }, { "epoch": 1.6652606206688763, "eta": "71:54:40", "grad_norm": 0.0059, "loss": 0.0406, "lr": "3.813e-05", "step": 5527, "steps": "23.39s,5527/16595" }, { "epoch": 1.6655619162398314, "eta": "71:46:54", "grad_norm": 0.0059, "loss": 0.048, "lr": "3.813e-05", "step": 5528, "steps": "23.35s,5528/16595" }, { "epoch": 1.6658632118107863, "eta": "72:03:07", "grad_norm": 0.0053, "loss": 0.0474, "lr": "3.812e-05", "step": 5529, "steps": "23.44s,5529/16595" }, { "epoch": 1.6661645073817415, "eta": "72:24:51", "grad_norm": 0.0046, "loss": 0.0616, "lr": "3.812e-05", "step": 5530, "steps": "23.56s,5530/16595" }, { "epoch": 1.6664658029526966, "eta": "72:35:31", "grad_norm": 0.0047, "loss": 0.0479, "lr": "3.812e-05", "step": 5531, "steps": "23.62s,5531/16595" }, { "epoch": 1.6667670985236516, "eta": "72:57:15", "grad_norm": 0.0078, "loss": 0.0447, "lr": "3.811e-05", "step": 5532, "steps": "23.74s,5532/16595" }, { "epoch": 1.6670683940946067, "eta": "73:39:16", "grad_norm": 0.0062, "loss": 0.039, "lr": "3.811e-05", "step": 5533, "steps": "23.97s,5533/16595" }, { "epoch": 1.6673696896655619, "eta": "72:03:00", "grad_norm": 0.0045, "loss": 0.0662, "lr": "3.810e-05", "step": 5534, "steps": "23.45s,5534/16595" }, { "epoch": 1.667670985236517, "eta": "72:54:13", "grad_norm": 0.0054, "loss": 0.0504, "lr": "3.810e-05", "step": 5535, "steps": "23.73s,5535/16595" }, { "epoch": 1.6679722808074722, "eta": "73:21:28", "grad_norm": 0.0069, "loss": 0.0411, "lr": "3.810e-05", "step": 5536, "steps": "23.88s,5536/16595" }, { "epoch": 1.6682735763784273, "eta": "73:22:55", "grad_norm": 0.0042, "loss": 0.0629, "lr": "3.809e-05", "step": 5537, "steps": "23.89s,5537/16595" }, { "epoch": 1.6685748719493825, "eta": "72:43:49", "grad_norm": 0.0056, "loss": 0.0607, "lr": "3.809e-05", "step": 5538, "steps": "23.68s,5538/16595" }, { "epoch": 1.6688761675203374, "eta": "73:35:01", "grad_norm": 0.004, "loss": 0.07, "lr": "3.808e-05", "step": 5539, "steps": "23.96s,5539/16595" }, { "epoch": 1.6691774630912926, "eta": "73:27:15", "grad_norm": 0.0055, "loss": 0.057, "lr": "3.808e-05", "step": 5540, "steps": "23.92s,5540/16595" }, { "epoch": 1.6694787586622477, "eta": "72:57:23", "grad_norm": 0.0046, "loss": 0.0388, "lr": "3.807e-05", "step": 5541, "steps": "23.76s,5541/16595" }, { "epoch": 1.6697800542332026, "eta": "73:37:30", "grad_norm": 0.0065, "loss": 0.0612, "lr": "3.807e-05", "step": 5542, "steps": "23.98s,5542/16595" }, { "epoch": 1.6700813498041578, "eta": "72:51:03", "grad_norm": 0.0043, "loss": 0.0453, "lr": "3.807e-05", "step": 5543, "steps": "23.73s,5543/16595" }, { "epoch": 1.670382645375113, "eta": "73:29:20", "grad_norm": 0.0047, "loss": 0.0485, "lr": "3.806e-05", "step": 5544, "steps": "23.94s,5544/16595" }, { "epoch": 1.670683940946068, "eta": "72:59:29", "grad_norm": 0.0056, "loss": 0.0537, "lr": "3.806e-05", "step": 5545, "steps": "23.78s,5545/16595" }, { "epoch": 1.6709852365170232, "eta": "73:19:20", "grad_norm": 0.0074, "loss": 0.0503, "lr": "3.805e-05", "step": 5546, "steps": "23.89s,5546/16595" }, { "epoch": 1.6712865320879784, "eta": "73:22:37", "grad_norm": 0.0051, "loss": 0.0489, "lr": "3.805e-05", "step": 5547, "steps": "23.91s,5547/16595" }, { "epoch": 1.6715878276589335, "eta": "72:32:31", "grad_norm": 0.0059, "loss": 0.0477, "lr": "3.805e-05", "step": 5548, "steps": "23.64s,5548/16595" }, { "epoch": 1.6718891232298885, "eta": "72:08:11", "grad_norm": 0.0054, "loss": 0.0404, "lr": "3.804e-05", "step": 5549, "steps": "23.51s,5549/16595" }, { "epoch": 1.6721904188008436, "eta": "72:15:09", "grad_norm": 0.0049, "loss": 0.0605, "lr": "3.804e-05", "step": 5550, "steps": "23.55s,5550/16595" }, { "epoch": 1.6724917143717988, "eta": "72:58:56", "grad_norm": 0.006, "loss": 0.0473, "lr": "3.803e-05", "step": 5551, "steps": "23.79s,5551/16595" }, { "epoch": 1.6727930099427537, "eta": "74:04:48", "grad_norm": 0.0063, "loss": 0.0403, "lr": "3.803e-05", "step": 5552, "steps": "24.15s,5552/16595" }, { "epoch": 1.6730943055137089, "eta": "72:06:37", "grad_norm": 0.0062, "loss": 0.0485, "lr": "3.803e-05", "step": 5553, "steps": "23.51s,5553/16595" }, { "epoch": 1.673395601084664, "eta": "72:19:06", "grad_norm": 0.0047, "loss": 0.0549, "lr": "3.802e-05", "step": 5554, "steps": "23.58s,5554/16595" }, { "epoch": 1.6736968966556192, "eta": "73:41:31", "grad_norm": 0.0058, "loss": 0.0634, "lr": "3.802e-05", "step": 5555, "steps": "24.03s,5555/16595" }, { "epoch": 1.6739981922265743, "eta": "73:02:28", "grad_norm": 0.0056, "loss": 0.0532, "lr": "3.801e-05", "step": 5556, "steps": "23.82s,5556/16595" }, { "epoch": 1.6742994877975295, "eta": "72:27:07", "grad_norm": 0.0053, "loss": 0.0438, "lr": "3.801e-05", "step": 5557, "steps": "23.63s,5557/16595" }, { "epoch": 1.6746007833684846, "eta": "72:28:34", "grad_norm": 0.0067, "loss": 0.0553, "lr": "3.801e-05", "step": 5558, "steps": "23.64s,5558/16595" }, { "epoch": 1.6749020789394395, "eta": "73:36:14", "grad_norm": 0.0064, "loss": 0.0642, "lr": "3.800e-05", "step": 5559, "steps": "24.01s,5559/16595" }, { "epoch": 1.6752033745103947, "eta": "71:49:10", "grad_norm": 0.0081, "loss": 0.0549, "lr": "3.800e-05", "step": 5560, "steps": "23.43s,5560/16595" }, { "epoch": 1.6755046700813498, "eta": "73:06:00", "grad_norm": 0.0049, "loss": 0.0541, "lr": "3.799e-05", "step": 5561, "steps": "23.85s,5561/16595" }, { "epoch": 1.6758059656523048, "eta": "72:50:54", "grad_norm": 0.006, "loss": 0.046, "lr": "3.799e-05", "step": 5562, "steps": "23.77s,5562/16595" }, { "epoch": 1.67610726122326, "eta": "73:32:48", "grad_norm": 0.006, "loss": 0.0407, "lr": "3.799e-05", "step": 5563, "steps": "24.0s,5563/16595" }, { "epoch": 1.676408556794215, "eta": "71:38:24", "grad_norm": 0.0064, "loss": 0.0441, "lr": "3.798e-05", "step": 5564, "steps": "23.38s,5564/16595" }, { "epoch": 1.6767098523651702, "eta": "72:38:41", "grad_norm": 0.006, "loss": 0.0535, "lr": "3.798e-05", "step": 5565, "steps": "23.71s,5565/16595" }, { "epoch": 1.6770111479361254, "eta": "72:41:58", "grad_norm": 0.0052, "loss": 0.0421, "lr": "3.797e-05", "step": 5566, "steps": "23.73s,5566/16595" }, { "epoch": 1.6773124435070805, "eta": "72:30:32", "grad_norm": 0.0046, "loss": 0.0518, "lr": "3.797e-05", "step": 5567, "steps": "23.67s,5567/16595" }, { "epoch": 1.6776137390780357, "eta": "73:21:36", "grad_norm": 0.0049, "loss": 0.0595, "lr": "3.796e-05", "step": 5568, "steps": "23.95s,5568/16595" }, { "epoch": 1.6779150346489906, "eta": "71:34:37", "grad_norm": 0.0066, "loss": 0.0408, "lr": "3.796e-05", "step": 5569, "steps": "23.37s,5569/16595" }, { "epoch": 1.6782163302199458, "eta": "72:49:34", "grad_norm": 0.0049, "loss": 0.0437, "lr": "3.796e-05", "step": 5570, "steps": "23.78s,5570/16595" }, { "epoch": 1.678517625790901, "eta": "72:52:51", "grad_norm": 0.0053, "loss": 0.0399, "lr": "3.795e-05", "step": 5571, "steps": "23.8s,5571/16595" }, { "epoch": 1.6788189213618558, "eta": "72:02:51", "grad_norm": 0.0049, "loss": 0.048, "lr": "3.795e-05", "step": 5572, "steps": "23.53s,5572/16595" }, { "epoch": 1.679120216932811, "eta": "72:13:28", "grad_norm": 0.0048, "loss": 0.0442, "lr": "3.794e-05", "step": 5573, "steps": "23.59s,5573/16595" }, { "epoch": 1.6794215125037661, "eta": "72:42:28", "grad_norm": 0.0078, "loss": 0.0441, "lr": "3.794e-05", "step": 5574, "steps": "23.75s,5574/16595" }, { "epoch": 1.6797228080747213, "eta": "72:36:34", "grad_norm": 0.0048, "loss": 0.0623, "lr": "3.794e-05", "step": 5575, "steps": "23.72s,5575/16595" }, { "epoch": 1.6800241036456764, "eta": "73:29:26", "grad_norm": 0.0076, "loss": 0.0606, "lr": "3.793e-05", "step": 5576, "steps": "24.01s,5576/16595" }, { "epoch": 1.6803253992166316, "eta": "72:13:44", "grad_norm": 0.0063, "loss": 0.0582, "lr": "3.793e-05", "step": 5577, "steps": "23.6s,5577/16595" }, { "epoch": 1.6806266947875868, "eta": "72:29:52", "grad_norm": 0.0063, "loss": 0.0627, "lr": "3.792e-05", "step": 5578, "steps": "23.69s,5578/16595" }, { "epoch": 1.6809279903585417, "eta": "71:49:05", "grad_norm": 0.005, "loss": 0.0523, "lr": "3.792e-05", "step": 5579, "steps": "23.47s,5579/16595" }, { "epoch": 1.6812292859294968, "eta": "72:29:05", "grad_norm": 0.005, "loss": 0.0517, "lr": "3.792e-05", "step": 5580, "steps": "23.69s,5580/16595" }, { "epoch": 1.681530581500452, "eta": "71:33:37", "grad_norm": 0.0052, "loss": 0.0577, "lr": "3.791e-05", "step": 5581, "steps": "23.39s,5581/16595" }, { "epoch": 1.681831877071407, "eta": "73:14:11", "grad_norm": 0.0062, "loss": 0.0347, "lr": "3.791e-05", "step": 5582, "steps": "23.94s,5582/16595" }, { "epoch": 1.682133172642362, "eta": "72:11:23", "grad_norm": 0.0056, "loss": 0.0408, "lr": "3.790e-05", "step": 5583, "steps": "23.6s,5583/16595" }, { "epoch": 1.6824344682133172, "eta": "73:09:43", "grad_norm": 0.0056, "loss": 0.0519, "lr": "3.790e-05", "step": 5584, "steps": "23.92s,5584/16595" }, { "epoch": 1.6827357637842724, "eta": "72:32:37", "grad_norm": 0.0049, "loss": 0.0592, "lr": "3.790e-05", "step": 5585, "steps": "23.72s,5585/16595" }, { "epoch": 1.6830370593552275, "eta": "71:42:41", "grad_norm": 0.0056, "loss": 0.04, "lr": "3.789e-05", "step": 5586, "steps": "23.45s,5586/16595" }, { "epoch": 1.6833383549261827, "eta": "72:28:09", "grad_norm": 0.0051, "loss": 0.0539, "lr": "3.789e-05", "step": 5587, "steps": "23.7s,5587/16595" }, { "epoch": 1.6836396504971378, "eta": "71:54:44", "grad_norm": 0.0051, "loss": 0.0516, "lr": "3.788e-05", "step": 5588, "steps": "23.52s,5588/16595" }, { "epoch": 1.6839409460680927, "eta": "73:13:13", "grad_norm": 0.0049, "loss": 0.0474, "lr": "3.788e-05", "step": 5589, "steps": "23.95s,5589/16595" }, { "epoch": 1.684242241639048, "eta": "72:06:47", "grad_norm": 0.0055, "loss": 0.0538, "lr": "3.787e-05", "step": 5590, "steps": "23.59s,5590/16595" }, { "epoch": 1.684543537210003, "eta": "73:05:05", "grad_norm": 0.0066, "loss": 0.0443, "lr": "3.787e-05", "step": 5591, "steps": "23.91s,5591/16595" }, { "epoch": 1.684844832780958, "eta": "72:37:11", "grad_norm": 0.0051, "loss": 0.0517, "lr": "3.787e-05", "step": 5592, "steps": "23.76s,5592/16595" }, { "epoch": 1.6851461283519131, "eta": "73:40:58", "grad_norm": 0.0047, "loss": 0.0542, "lr": "3.786e-05", "step": 5593, "steps": "24.11s,5593/16595" }, { "epoch": 1.6854474239228683, "eta": "72:38:13", "grad_norm": 0.0054, "loss": 0.0513, "lr": "3.786e-05", "step": 5594, "steps": "23.77s,5594/16595" }, { "epoch": 1.6857487194938234, "eta": "72:02:59", "grad_norm": 0.0058, "loss": 0.0519, "lr": "3.785e-05", "step": 5595, "steps": "23.58s,5595/16595" }, { "epoch": 1.6860500150647786, "eta": "72:41:06", "grad_norm": 0.0051, "loss": 0.052, "lr": "3.785e-05", "step": 5596, "steps": "23.79s,5596/16595" }, { "epoch": 1.6863513106357337, "eta": "72:00:22", "grad_norm": 0.0065, "loss": 0.0568, "lr": "3.785e-05", "step": 5597, "steps": "23.57s,5597/16595" }, { "epoch": 1.6866526062066889, "eta": "71:47:09", "grad_norm": 0.0055, "loss": 0.0562, "lr": "3.784e-05", "step": 5598, "steps": "23.5s,5598/16595" }, { "epoch": 1.6869539017776438, "eta": "73:00:04", "grad_norm": 0.0051, "loss": 0.0493, "lr": "3.784e-05", "step": 5599, "steps": "23.9s,5599/16595" }, { "epoch": 1.687255197348599, "eta": "72:34:01", "grad_norm": 0.005, "loss": 0.0577, "lr": "3.783e-05", "step": 5600, "steps": "23.76s,5600/16595" }, { "epoch": 1.6875564929195541, "eta": "154:31:36", "grad_norm": 0.0051, "loss": 0.0605, "lr": "3.783e-05", "step": 5601, "steps": "50.6s,5601/16595" }, { "epoch": 1.687857788490509, "eta": "73:09:52", "grad_norm": 0.0057, "loss": 0.0468, "lr": "3.783e-05", "step": 5602, "steps": "23.96s,5602/16595" }, { "epoch": 1.6881590840614642, "eta": "73:14:58", "grad_norm": 0.0045, "loss": 0.0501, "lr": "3.782e-05", "step": 5603, "steps": "23.99s,5603/16595" }, { "epoch": 1.6884603796324194, "eta": "73:07:14", "grad_norm": 0.005, "loss": 0.0523, "lr": "3.782e-05", "step": 5604, "steps": "23.95s,5604/16595" }, { "epoch": 1.6887616752033745, "eta": "72:35:42", "grad_norm": 0.0049, "loss": 0.0555, "lr": "3.781e-05", "step": 5605, "steps": "23.78s,5605/16595" }, { "epoch": 1.6890629707743297, "eta": "72:42:37", "grad_norm": 0.0057, "loss": 0.0528, "lr": "3.781e-05", "step": 5606, "steps": "23.82s,5606/16595" }, { "epoch": 1.6893642663452848, "eta": "71:50:57", "grad_norm": 0.0042, "loss": 0.0531, "lr": "3.781e-05", "step": 5607, "steps": "23.54s,5607/16595" }, { "epoch": 1.68966556191624, "eta": "72:16:12", "grad_norm": 0.005, "loss": 0.0462, "lr": "3.780e-05", "step": 5608, "steps": "23.68s,5608/16595" }, { "epoch": 1.6899668574871949, "eta": "72:50:35", "grad_norm": 0.0045, "loss": 0.0532, "lr": "3.780e-05", "step": 5609, "steps": "23.87s,5609/16595" }, { "epoch": 1.69026815305815, "eta": "72:26:23", "grad_norm": 0.0049, "loss": 0.0588, "lr": "3.779e-05", "step": 5610, "steps": "23.74s,5610/16595" }, { "epoch": 1.6905694486291052, "eta": "73:04:26", "grad_norm": 0.0052, "loss": 0.0491, "lr": "3.779e-05", "step": 5611, "steps": "23.95s,5611/16595" }, { "epoch": 1.6908707442000601, "eta": "73:09:32", "grad_norm": 0.0065, "loss": 0.0662, "lr": "3.778e-05", "step": 5612, "steps": "23.98s,5612/16595" }, { "epoch": 1.6911720397710153, "eta": "71:39:27", "grad_norm": 0.0063, "loss": 0.0588, "lr": "3.778e-05", "step": 5613, "steps": "23.49s,5613/16595" }, { "epoch": 1.6914733353419704, "eta": "72:39:27", "grad_norm": 0.0092, "loss": 0.059, "lr": "3.778e-05", "step": 5614, "steps": "23.82s,5614/16595" }, { "epoch": 1.6917746309129256, "eta": "72:51:52", "grad_norm": 0.0051, "loss": 0.0634, "lr": "3.777e-05", "step": 5615, "steps": "23.89s,5615/16595" }, { "epoch": 1.6920759264838807, "eta": "71:32:47", "grad_norm": 0.0042, "loss": 0.0573, "lr": "3.777e-05", "step": 5616, "steps": "23.46s,5616/16595" }, { "epoch": 1.6923772220548359, "eta": "72:58:23", "grad_norm": 0.0047, "loss": 0.0581, "lr": "3.776e-05", "step": 5617, "steps": "23.93s,5617/16595" }, { "epoch": 1.692678517625791, "eta": "71:52:07", "grad_norm": 0.0047, "loss": 0.0573, "lr": "3.776e-05", "step": 5618, "steps": "23.57s,5618/16595" }, { "epoch": 1.692979813196746, "eta": "72:00:53", "grad_norm": 0.0057, "loss": 0.0511, "lr": "3.776e-05", "step": 5619, "steps": "23.62s,5619/16595" }, { "epoch": 1.693281108767701, "eta": "72:51:42", "grad_norm": 0.006, "loss": 0.0548, "lr": "3.775e-05", "step": 5620, "steps": "23.9s,5620/16595" }, { "epoch": 1.6935824043386563, "eta": "72:11:04", "grad_norm": 0.0065, "loss": 0.0534, "lr": "3.775e-05", "step": 5621, "steps": "23.68s,5621/16595" }, { "epoch": 1.6938836999096112, "eta": "72:16:09", "grad_norm": 0.0052, "loss": 0.0353, "lr": "3.774e-05", "step": 5622, "steps": "23.71s,5622/16595" }, { "epoch": 1.6941849954805663, "eta": "72:30:23", "grad_norm": 0.0054, "loss": 0.0524, "lr": "3.774e-05", "step": 5623, "steps": "23.79s,5623/16595" }, { "epoch": 1.6944862910515215, "eta": "72:20:51", "grad_norm": 0.0044, "loss": 0.0425, "lr": "3.774e-05", "step": 5624, "steps": "23.74s,5624/16595" }, { "epoch": 1.6947875866224766, "eta": "71:45:43", "grad_norm": 0.0061, "loss": 0.0727, "lr": "3.773e-05", "step": 5625, "steps": "23.55s,5625/16595" }, { "epoch": 1.6950888821934318, "eta": "73:03:56", "grad_norm": 0.005, "loss": 0.0384, "lr": "3.773e-05", "step": 5626, "steps": "23.98s,5626/16595" }, { "epoch": 1.695390177764387, "eta": "72:26:59", "grad_norm": 0.005, "loss": 0.0429, "lr": "3.772e-05", "step": 5627, "steps": "23.78s,5627/16595" }, { "epoch": 1.695691473335342, "eta": "72:32:04", "grad_norm": 0.0051, "loss": 0.0547, "lr": "3.772e-05", "step": 5628, "steps": "23.81s,5628/16595" }, { "epoch": 1.6959927689062972, "eta": "71:22:13", "grad_norm": 0.0052, "loss": 0.0553, "lr": "3.771e-05", "step": 5629, "steps": "23.43s,5629/16595" }, { "epoch": 1.6962940644772522, "eta": "71:56:33", "grad_norm": 0.0051, "loss": 0.052, "lr": "3.771e-05", "step": 5630, "steps": "23.62s,5630/16595" }, { "epoch": 1.6965953600482073, "eta": "71:52:30", "grad_norm": 0.0055, "loss": 0.0628, "lr": "3.771e-05", "step": 5631, "steps": "23.6s,5631/16595" }, { "epoch": 1.6968966556191623, "eta": "71:35:40", "grad_norm": 0.0058, "loss": 0.0482, "lr": "3.770e-05", "step": 5632, "steps": "23.51s,5632/16595" }, { "epoch": 1.6971979511901174, "eta": "71:51:43", "grad_norm": 0.0049, "loss": 0.0509, "lr": "3.770e-05", "step": 5633, "steps": "23.6s,5633/16595" }, { "epoch": 1.6974992467610726, "eta": "72:24:12", "grad_norm": 0.0043, "loss": 0.038, "lr": "3.769e-05", "step": 5634, "steps": "23.78s,5634/16595" }, { "epoch": 1.6978005423320277, "eta": "72:20:09", "grad_norm": 0.0046, "loss": 0.0456, "lr": "3.769e-05", "step": 5635, "steps": "23.76s,5635/16595" }, { "epoch": 1.6981018379029829, "eta": "72:06:58", "grad_norm": 0.0056, "loss": 0.0735, "lr": "3.769e-05", "step": 5636, "steps": "23.69s,5636/16595" }, { "epoch": 1.698403133473938, "eta": "72:35:48", "grad_norm": 0.0058, "loss": 0.0388, "lr": "3.768e-05", "step": 5637, "steps": "23.85s,5637/16595" }, { "epoch": 1.6987044290448932, "eta": "71:46:06", "grad_norm": 0.0051, "loss": 0.0593, "lr": "3.768e-05", "step": 5638, "steps": "23.58s,5638/16595" }, { "epoch": 1.6990057246158483, "eta": "72:18:34", "grad_norm": 0.005, "loss": 0.0551, "lr": "3.767e-05", "step": 5639, "steps": "23.76s,5639/16595" }, { "epoch": 1.6993070201868032, "eta": "72:49:13", "grad_norm": 0.0061, "loss": 0.0583, "lr": "3.767e-05", "step": 5640, "steps": "23.93s,5640/16595" }, { "epoch": 1.6996083157577584, "eta": "71:52:13", "grad_norm": 0.0047, "loss": 0.0446, "lr": "3.767e-05", "step": 5641, "steps": "23.62s,5641/16595" }, { "epoch": 1.6999096113287133, "eta": "72:39:17", "grad_norm": 0.0047, "loss": 0.051, "lr": "3.766e-05", "step": 5642, "steps": "23.88s,5642/16595" }, { "epoch": 1.7002109068996685, "eta": "72:46:11", "grad_norm": 0.0053, "loss": 0.044, "lr": "3.766e-05", "step": 5643, "steps": "23.92s,5643/16595" }, { "epoch": 1.7005122024706236, "eta": "72:11:07", "grad_norm": 0.0057, "loss": 0.0408, "lr": "3.765e-05", "step": 5644, "steps": "23.73s,5644/16595" }, { "epoch": 1.7008134980415788, "eta": "72:10:43", "grad_norm": 0.0063, "loss": 0.0487, "lr": "3.765e-05", "step": 5645, "steps": "23.73s,5645/16595" }, { "epoch": 1.701114793612534, "eta": "72:15:48", "grad_norm": 0.0045, "loss": 0.0578, "lr": "3.764e-05", "step": 5646, "steps": "23.76s,5646/16595" }, { "epoch": 1.701416089183489, "eta": "71:57:09", "grad_norm": 0.0045, "loss": 0.051, "lr": "3.764e-05", "step": 5647, "steps": "23.66s,5647/16595" }, { "epoch": 1.7017173847544442, "eta": "72:02:14", "grad_norm": 0.0065, "loss": 0.0381, "lr": "3.764e-05", "step": 5648, "steps": "23.69s,5648/16595" }, { "epoch": 1.7020186803253994, "eta": "71:50:53", "grad_norm": 0.0057, "loss": 0.0529, "lr": "3.763e-05", "step": 5649, "steps": "23.63s,5649/16595" }, { "epoch": 1.7023199758963543, "eta": "71:43:12", "grad_norm": 0.0051, "loss": 0.0458, "lr": "3.763e-05", "step": 5650, "steps": "23.59s,5650/16595" }, { "epoch": 1.7026212714673095, "eta": "72:01:03", "grad_norm": 0.0052, "loss": 0.059, "lr": "3.762e-05", "step": 5651, "steps": "23.69s,5651/16595" }, { "epoch": 1.7029225670382644, "eta": "71:00:28", "grad_norm": 0.0055, "loss": 0.0546, "lr": "3.762e-05", "step": 5652, "steps": "23.36s,5652/16595" }, { "epoch": 1.7032238626092195, "eta": "71:27:26", "grad_norm": 0.0061, "loss": 0.0391, "lr": "3.762e-05", "step": 5653, "steps": "23.51s,5653/16595" }, { "epoch": 1.7035251581801747, "eta": "72:27:13", "grad_norm": 0.0092, "loss": 0.0654, "lr": "3.761e-05", "step": 5654, "steps": "23.84s,5654/16595" }, { "epoch": 1.7038264537511298, "eta": "71:30:18", "grad_norm": 0.0055, "loss": 0.062, "lr": "3.761e-05", "step": 5655, "steps": "23.53s,5655/16595" }, { "epoch": 1.704127749322085, "eta": "71:09:51", "grad_norm": 0.005, "loss": 0.0603, "lr": "3.760e-05", "step": 5656, "steps": "23.42s,5656/16595" }, { "epoch": 1.7044290448930401, "eta": "71:31:20", "grad_norm": 0.0056, "loss": 0.0447, "lr": "3.760e-05", "step": 5657, "steps": "23.54s,5657/16595" }, { "epoch": 1.7047303404639953, "eta": "72:11:03", "grad_norm": 0.0057, "loss": 0.0427, "lr": "3.760e-05", "step": 5658, "steps": "23.76s,5658/16595" }, { "epoch": 1.7050316360349504, "eta": "73:05:20", "grad_norm": 0.0055, "loss": 0.0507, "lr": "3.759e-05", "step": 5659, "steps": "24.06s,5659/16595" }, { "epoch": 1.7053329316059054, "eta": "70:57:21", "grad_norm": 0.0054, "loss": 0.043, "lr": "3.759e-05", "step": 5660, "steps": "23.36s,5660/16595" }, { "epoch": 1.7056342271768605, "eta": "71:53:27", "grad_norm": 0.005, "loss": 0.0535, "lr": "3.758e-05", "step": 5661, "steps": "23.67s,5661/16595" }, { "epoch": 1.7059355227478155, "eta": "71:31:12", "grad_norm": 0.0045, "loss": 0.0597, "lr": "3.758e-05", "step": 5662, "steps": "23.55s,5662/16595" }, { "epoch": 1.7062368183187706, "eta": "72:01:47", "grad_norm": 0.0054, "loss": 0.056, "lr": "3.757e-05", "step": 5663, "steps": "23.72s,5663/16595" }, { "epoch": 1.7065381138897258, "eta": "71:39:31", "grad_norm": 0.0044, "loss": 0.0592, "lr": "3.757e-05", "step": 5664, "steps": "23.6s,5664/16595" }, { "epoch": 1.706839409460681, "eta": "71:42:46", "grad_norm": 0.0058, "loss": 0.0343, "lr": "3.757e-05", "step": 5665, "steps": "23.62s,5665/16595" }, { "epoch": 1.707140705031636, "eta": "71:40:33", "grad_norm": 0.0045, "loss": 0.0415, "lr": "3.756e-05", "step": 5666, "steps": "23.61s,5666/16595" }, { "epoch": 1.7074420006025912, "eta": "71:51:05", "grad_norm": 0.0058, "loss": 0.0642, "lr": "3.756e-05", "step": 5667, "steps": "23.67s,5667/16595" }, { "epoch": 1.7077432961735464, "eta": "71:45:14", "grad_norm": 0.0053, "loss": 0.0615, "lr": "3.755e-05", "step": 5668, "steps": "23.64s,5668/16595" }, { "epoch": 1.7080445917445015, "eta": "72:37:39", "grad_norm": 0.0046, "loss": 0.0343, "lr": "3.755e-05", "step": 5669, "steps": "23.93s,5669/16595" }, { "epoch": 1.7083458873154564, "eta": "71:11:40", "grad_norm": 0.0061, "loss": 0.0404, "lr": "3.755e-05", "step": 5670, "steps": "23.46s,5670/16595" }, { "epoch": 1.7086471828864116, "eta": "72:04:05", "grad_norm": 0.006, "loss": 0.0574, "lr": "3.754e-05", "step": 5671, "steps": "23.75s,5671/16595" }, { "epoch": 1.7089484784573665, "eta": "72:40:05", "grad_norm": 0.0053, "loss": 0.052, "lr": "3.754e-05", "step": 5672, "steps": "23.95s,5672/16595" }, { "epoch": 1.7092497740283217, "eta": "72:45:09", "grad_norm": 0.0052, "loss": 0.0536, "lr": "3.753e-05", "step": 5673, "steps": "23.98s,5673/16595" }, { "epoch": 1.7095510695992768, "eta": "71:53:47", "grad_norm": 0.0044, "loss": 0.0533, "lr": "3.753e-05", "step": 5674, "steps": "23.7s,5674/16595" }, { "epoch": 1.709852365170232, "eta": "71:29:44", "grad_norm": 0.0052, "loss": 0.0463, "lr": "3.753e-05", "step": 5675, "steps": "23.57s,5675/16595" }, { "epoch": 1.7101536607411871, "eta": "71:40:15", "grad_norm": 0.0061, "loss": 0.0449, "lr": "3.752e-05", "step": 5676, "steps": "23.63s,5676/16595" }, { "epoch": 1.7104549563121423, "eta": "71:43:30", "grad_norm": 0.0052, "loss": 0.0476, "lr": "3.752e-05", "step": 5677, "steps": "23.65s,5677/16595" }, { "epoch": 1.7107562518830974, "eta": "72:19:30", "grad_norm": 0.0049, "loss": 0.0544, "lr": "3.751e-05", "step": 5678, "steps": "23.85s,5678/16595" }, { "epoch": 1.7110575474540526, "eta": "72:33:39", "grad_norm": 0.0043, "loss": 0.0678, "lr": "3.751e-05", "step": 5679, "steps": "23.93s,5679/16595" }, { "epoch": 1.7113588430250075, "eta": "71:25:57", "grad_norm": 0.0053, "loss": 0.0584, "lr": "3.750e-05", "step": 5680, "steps": "23.56s,5680/16595" }, { "epoch": 1.7116601385959627, "eta": "71:34:39", "grad_norm": 0.0057, "loss": 0.0388, "lr": "3.750e-05", "step": 5681, "steps": "23.61s,5681/16595" }, { "epoch": 1.7119614341669176, "eta": "71:17:53", "grad_norm": 0.0062, "loss": 0.0441, "lr": "3.750e-05", "step": 5682, "steps": "23.52s,5682/16595" }, { "epoch": 1.7122627297378727, "eta": "71:42:57", "grad_norm": 0.0045, "loss": 0.0523, "lr": "3.749e-05", "step": 5683, "steps": "23.66s,5683/16595" }, { "epoch": 1.712564025308828, "eta": "71:51:39", "grad_norm": 0.0053, "loss": 0.0475, "lr": "3.749e-05", "step": 5684, "steps": "23.71s,5684/16595" }, { "epoch": 1.712865320879783, "eta": "72:05:48", "grad_norm": 0.005, "loss": 0.0567, "lr": "3.748e-05", "step": 5685, "steps": "23.79s,5685/16595" }, { "epoch": 1.7131666164507382, "eta": "72:36:19", "grad_norm": 0.0056, "loss": 0.0507, "lr": "3.748e-05", "step": 5686, "steps": "23.96s,5686/16595" }, { "epoch": 1.7134679120216934, "eta": "71:52:17", "grad_norm": 0.0071, "loss": 0.0475, "lr": "3.748e-05", "step": 5687, "steps": "23.72s,5687/16595" }, { "epoch": 1.7137692075926485, "eta": "72:00:59", "grad_norm": 0.005, "loss": 0.0626, "lr": "3.747e-05", "step": 5688, "steps": "23.77s,5688/16595" }, { "epoch": 1.7140705031636037, "eta": "71:27:52", "grad_norm": 0.005, "loss": 0.0568, "lr": "3.747e-05", "step": 5689, "steps": "23.59s,5689/16595" }, { "epoch": 1.7143717987345586, "eta": "71:12:56", "grad_norm": 0.0058, "loss": 0.0461, "lr": "3.746e-05", "step": 5690, "steps": "23.51s,5690/16595" }, { "epoch": 1.7146730943055137, "eta": "72:23:25", "grad_norm": 0.0064, "loss": 0.05, "lr": "3.746e-05", "step": 5691, "steps": "23.9s,5691/16595" }, { "epoch": 1.7149743898764687, "eta": "73:21:10", "grad_norm": 0.0052, "loss": 0.0535, "lr": "3.746e-05", "step": 5692, "steps": "24.22s,5692/16595" }, { "epoch": 1.7152756854474238, "eta": "71:59:00", "grad_norm": 0.0059, "loss": 0.0488, "lr": "3.745e-05", "step": 5693, "steps": "23.77s,5693/16595" }, { "epoch": 1.715576981018379, "eta": "71:24:05", "grad_norm": 0.0045, "loss": 0.0633, "lr": "3.745e-05", "step": 5694, "steps": "23.58s,5694/16595" }, { "epoch": 1.7158782765893341, "eta": "71:43:41", "grad_norm": 0.0056, "loss": 0.0434, "lr": "3.744e-05", "step": 5695, "steps": "23.69s,5695/16595" }, { "epoch": 1.7161795721602893, "eta": "71:45:06", "grad_norm": 0.0064, "loss": 0.059, "lr": "3.744e-05", "step": 5696, "steps": "23.7s,5696/16595" }, { "epoch": 1.7164808677312444, "eta": "73:30:03", "grad_norm": 0.0049, "loss": 0.049, "lr": "3.743e-05", "step": 5697, "steps": "24.28s,5697/16595" }, { "epoch": 1.7167821633021996, "eta": "72:24:16", "grad_norm": 0.0052, "loss": 0.0383, "lr": "3.743e-05", "step": 5698, "steps": "23.92s,5698/16595" }, { "epoch": 1.7170834588731547, "eta": "71:45:44", "grad_norm": 0.0052, "loss": 0.0469, "lr": "3.743e-05", "step": 5699, "steps": "23.71s,5699/16595" }, { "epoch": 1.7173847544441097, "eta": "70:54:29", "grad_norm": 0.0051, "loss": 0.0633, "lr": "3.742e-05", "step": 5700, "steps": "23.43s,5700/16595" }, { "epoch": 1.7176860500150648, "eta": "71:59:28", "grad_norm": 0.0051, "loss": 0.0574, "lr": "3.742e-05", "step": 5701, "steps": "23.79s,5701/16595" }, { "epoch": 1.7179873455860197, "eta": "72:17:13", "grad_norm": 0.0055, "loss": 0.0529, "lr": "3.741e-05", "step": 5702, "steps": "23.89s,5702/16595" }, { "epoch": 1.7182886411569749, "eta": "72:11:23", "grad_norm": 0.0055, "loss": 0.0583, "lr": "3.741e-05", "step": 5703, "steps": "23.86s,5703/16595" }, { "epoch": 1.71858993672793, "eta": "72:16:25", "grad_norm": 0.006, "loss": 0.0519, "lr": "3.741e-05", "step": 5704, "steps": "23.89s,5704/16595" }, { "epoch": 1.7188912322988852, "eta": "71:17:57", "grad_norm": 0.0062, "loss": 0.0594, "lr": "3.740e-05", "step": 5705, "steps": "23.57s,5705/16595" }, { "epoch": 1.7191925278698403, "eta": "73:01:00", "grad_norm": 0.006, "loss": 0.0532, "lr": "3.740e-05", "step": 5706, "steps": "24.14s,5706/16595" }, { "epoch": 1.7194938234407955, "eta": "71:28:03", "grad_norm": 0.0099, "loss": 0.0447, "lr": "3.739e-05", "step": 5707, "steps": "23.63s,5707/16595" }, { "epoch": 1.7197951190117506, "eta": "71:27:39", "grad_norm": 0.0053, "loss": 0.0637, "lr": "3.739e-05", "step": 5708, "steps": "23.63s,5708/16595" }, { "epoch": 1.7200964145827058, "eta": "71:29:05", "grad_norm": 0.006, "loss": 0.0423, "lr": "3.738e-05", "step": 5709, "steps": "23.64s,5709/16595" }, { "epoch": 1.7203977101536607, "eta": "72:44:53", "grad_norm": 0.0072, "loss": 0.0752, "lr": "3.738e-05", "step": 5710, "steps": "24.06s,5710/16595" }, { "epoch": 1.7206990057246159, "eta": "72:20:54", "grad_norm": 0.0059, "loss": 0.0507, "lr": "3.738e-05", "step": 5711, "steps": "23.93s,5711/16595" }, { "epoch": 1.7210003012955708, "eta": "72:29:34", "grad_norm": 0.0062, "loss": 0.0552, "lr": "3.737e-05", "step": 5712, "steps": "23.98s,5712/16595" }, { "epoch": 1.721301596866526, "eta": "72:32:48", "grad_norm": 0.0118, "loss": 0.0515, "lr": "3.737e-05", "step": 5713, "steps": "24.0s,5713/16595" }, { "epoch": 1.721602892437481, "eta": "71:52:30", "grad_norm": 0.0047, "loss": 0.0564, "lr": "3.736e-05", "step": 5714, "steps": "23.78s,5714/16595" }, { "epoch": 1.7219041880084363, "eta": "71:14:01", "grad_norm": 0.0053, "loss": 0.0599, "lr": "3.736e-05", "step": 5715, "steps": "23.57s,5715/16595" }, { "epoch": 1.7222054835793914, "eta": "70:33:44", "grad_norm": 0.0052, "loss": 0.044, "lr": "3.736e-05", "step": 5716, "steps": "23.35s,5716/16595" }, { "epoch": 1.7225067791503466, "eta": "72:11:15", "grad_norm": 0.0053, "loss": 0.0408, "lr": "3.735e-05", "step": 5717, "steps": "23.89s,5717/16595" }, { "epoch": 1.7228080747213017, "eta": "71:52:43", "grad_norm": 0.0056, "loss": 0.053, "lr": "3.735e-05", "step": 5718, "steps": "23.79s,5718/16595" }, { "epoch": 1.7231093702922569, "eta": "71:05:12", "grad_norm": 0.0056, "loss": 0.0379, "lr": "3.734e-05", "step": 5719, "steps": "23.53s,5719/16595" }, { "epoch": 1.7234106658632118, "eta": "71:51:56", "grad_norm": 0.0053, "loss": 0.0425, "lr": "3.734e-05", "step": 5720, "steps": "23.79s,5720/16595" }, { "epoch": 1.723711961434167, "eta": "72:51:20", "grad_norm": 0.0044, "loss": 0.0485, "lr": "3.733e-05", "step": 5721, "steps": "24.12s,5721/16595" }, { "epoch": 1.7240132570051219, "eta": "72:02:01", "grad_norm": 0.005, "loss": 0.0562, "lr": "3.733e-05", "step": 5722, "steps": "23.85s,5722/16595" }, { "epoch": 1.724314552576077, "eta": "71:16:19", "grad_norm": 0.0066, "loss": 0.0468, "lr": "3.733e-05", "step": 5723, "steps": "23.6s,5723/16595" }, { "epoch": 1.7246158481470322, "eta": "71:57:35", "grad_norm": 0.0049, "loss": 0.0558, "lr": "3.732e-05", "step": 5724, "steps": "23.83s,5724/16595" }, { "epoch": 1.7249171437179873, "eta": "71:53:34", "grad_norm": 0.0049, "loss": 0.0508, "lr": "3.732e-05", "step": 5725, "steps": "23.81s,5725/16595" }, { "epoch": 1.7252184392889425, "eta": "71:06:04", "grad_norm": 0.0052, "loss": 0.0557, "lr": "3.731e-05", "step": 5726, "steps": "23.55s,5726/16595" }, { "epoch": 1.7255197348598976, "eta": "72:50:44", "grad_norm": 0.0053, "loss": 0.0389, "lr": "3.731e-05", "step": 5727, "steps": "24.13s,5727/16595" }, { "epoch": 1.7258210304308528, "eta": "71:46:57", "grad_norm": 0.005, "loss": 0.0612, "lr": "3.731e-05", "step": 5728, "steps": "23.78s,5728/16595" }, { "epoch": 1.726122326001808, "eta": "70:43:10", "grad_norm": 0.0057, "loss": 0.0483, "lr": "3.730e-05", "step": 5729, "steps": "23.43s,5729/16595" }, { "epoch": 1.7264236215727629, "eta": "71:47:58", "grad_norm": 0.006, "loss": 0.0484, "lr": "3.730e-05", "step": 5730, "steps": "23.79s,5730/16595" }, { "epoch": 1.726724917143718, "eta": "71:34:54", "grad_norm": 0.0054, "loss": 0.0429, "lr": "3.729e-05", "step": 5731, "steps": "23.72s,5731/16595" }, { "epoch": 1.727026212714673, "eta": "71:48:59", "grad_norm": 0.0053, "loss": 0.0505, "lr": "3.729e-05", "step": 5732, "steps": "23.8s,5732/16595" }, { "epoch": 1.727327508285628, "eta": "71:34:06", "grad_norm": 0.0054, "loss": 0.0541, "lr": "3.728e-05", "step": 5733, "steps": "23.72s,5733/16595" }, { "epoch": 1.7276288038565832, "eta": "71:42:45", "grad_norm": 0.0056, "loss": 0.0471, "lr": "3.728e-05", "step": 5734, "steps": "23.77s,5734/16595" }, { "epoch": 1.7279300994275384, "eta": "71:44:10", "grad_norm": 0.0062, "loss": 0.0557, "lr": "3.728e-05", "step": 5735, "steps": "23.78s,5735/16595" }, { "epoch": 1.7282313949984935, "eta": "72:07:18", "grad_norm": 0.0041, "loss": 0.0531, "lr": "3.727e-05", "step": 5736, "steps": "23.91s,5736/16595" }, { "epoch": 1.7285326905694487, "eta": "72:08:43", "grad_norm": 0.0052, "loss": 0.0535, "lr": "3.727e-05", "step": 5737, "steps": "23.92s,5737/16595" }, { "epoch": 1.7288339861404038, "eta": "72:17:22", "grad_norm": 0.0064, "loss": 0.0422, "lr": "3.726e-05", "step": 5738, "steps": "23.97s,5738/16595" }, { "epoch": 1.729135281711359, "eta": "70:48:18", "grad_norm": 0.0055, "loss": 0.0436, "lr": "3.726e-05", "step": 5739, "steps": "23.48s,5739/16595" }, { "epoch": 1.729436577282314, "eta": "72:14:45", "grad_norm": 0.0065, "loss": 0.0469, "lr": "3.726e-05", "step": 5740, "steps": "23.96s,5740/16595" }, { "epoch": 1.729737872853269, "eta": "71:14:40", "grad_norm": 0.0056, "loss": 0.0615, "lr": "3.725e-05", "step": 5741, "steps": "23.63s,5741/16595" }, { "epoch": 1.730039168424224, "eta": "71:03:25", "grad_norm": 0.0049, "loss": 0.0397, "lr": "3.725e-05", "step": 5742, "steps": "23.57s,5742/16595" }, { "epoch": 1.7303404639951792, "eta": "71:12:04", "grad_norm": 0.0045, "loss": 0.0491, "lr": "3.724e-05", "step": 5743, "steps": "23.62s,5743/16595" }, { "epoch": 1.7306417595661343, "eta": "72:20:24", "grad_norm": 0.0062, "loss": 0.0497, "lr": "3.724e-05", "step": 5744, "steps": "24.0s,5744/16595" }, { "epoch": 1.7309430551370895, "eta": "71:40:13", "grad_norm": 0.0049, "loss": 0.0539, "lr": "3.723e-05", "step": 5745, "steps": "23.78s,5745/16595" }, { "epoch": 1.7312443507080446, "eta": "71:10:53", "grad_norm": 0.005, "loss": 0.0578, "lr": "3.723e-05", "step": 5746, "steps": "23.62s,5746/16595" }, { "epoch": 1.7315456462789998, "eta": "71:35:48", "grad_norm": 0.0049, "loss": 0.0418, "lr": "3.723e-05", "step": 5747, "steps": "23.76s,5747/16595" }, { "epoch": 1.731846941849955, "eta": "71:13:43", "grad_norm": 0.0049, "loss": 0.07, "lr": "3.722e-05", "step": 5748, "steps": "23.64s,5748/16595" }, { "epoch": 1.73214823742091, "eta": "71:42:14", "grad_norm": 0.0054, "loss": 0.044, "lr": "3.722e-05", "step": 5749, "steps": "23.8s,5749/16595" }, { "epoch": 1.732449532991865, "eta": "71:31:00", "grad_norm": 0.0046, "loss": 0.0397, "lr": "3.721e-05", "step": 5750, "steps": "23.74s,5750/16595" }, { "epoch": 1.7327508285628201, "eta": "72:06:45", "grad_norm": 0.005, "loss": 0.0662, "lr": "3.721e-05", "step": 5751, "steps": "23.94s,5751/16595" }, { "epoch": 1.733052124133775, "eta": "71:15:45", "grad_norm": 0.0051, "loss": 0.0657, "lr": "3.721e-05", "step": 5752, "steps": "23.66s,5752/16595" }, { "epoch": 1.7333534197047302, "eta": "71:06:19", "grad_norm": 0.006, "loss": 0.0445, "lr": "3.720e-05", "step": 5753, "steps": "23.61s,5753/16595" }, { "epoch": 1.7336547152756854, "eta": "72:07:21", "grad_norm": 0.0058, "loss": 0.0461, "lr": "3.720e-05", "step": 5754, "steps": "23.95s,5754/16595" }, { "epoch": 1.7339560108466405, "eta": "71:07:20", "grad_norm": 0.0048, "loss": 0.0526, "lr": "3.719e-05", "step": 5755, "steps": "23.62s,5755/16595" }, { "epoch": 1.7342573064175957, "eta": "72:15:36", "grad_norm": 0.0047, "loss": 0.054, "lr": "3.719e-05", "step": 5756, "steps": "24.0s,5756/16595" }, { "epoch": 1.7345586019885508, "eta": "70:43:04", "grad_norm": 0.0073, "loss": 0.0415, "lr": "3.718e-05", "step": 5757, "steps": "23.49s,5757/16595" }, { "epoch": 1.734859897559506, "eta": "72:59:57", "grad_norm": 0.0047, "loss": 0.0591, "lr": "3.718e-05", "step": 5758, "steps": "24.25s,5758/16595" }, { "epoch": 1.7351611931304611, "eta": "71:36:28", "grad_norm": 0.0054, "loss": 0.0561, "lr": "3.718e-05", "step": 5759, "steps": "23.79s,5759/16595" }, { "epoch": 1.735462488701416, "eta": "72:12:11", "grad_norm": 0.0053, "loss": 0.0588, "lr": "3.717e-05", "step": 5760, "steps": "23.99s,5760/16595" }, { "epoch": 1.7357637842723712, "eta": "71:21:14", "grad_norm": 0.0056, "loss": 0.0499, "lr": "3.717e-05", "step": 5761, "steps": "23.71s,5761/16595" }, { "epoch": 1.7360650798433261, "eta": "71:04:35", "grad_norm": 0.0051, "loss": 0.0512, "lr": "3.716e-05", "step": 5762, "steps": "23.62s,5762/16595" }, { "epoch": 1.7363663754142813, "eta": "72:01:58", "grad_norm": 0.0052, "loss": 0.0637, "lr": "3.716e-05", "step": 5763, "steps": "23.94s,5763/16595" }, { "epoch": 1.7366676709852364, "eta": "72:14:12", "grad_norm": 0.0059, "loss": 0.0546, "lr": "3.716e-05", "step": 5764, "steps": "24.01s,5764/16595" }, { "epoch": 1.7369689665561916, "eta": "71:01:36", "grad_norm": 0.0053, "loss": 0.0388, "lr": "3.715e-05", "step": 5765, "steps": "23.61s,5765/16595" }, { "epoch": 1.7372702621271467, "eta": "71:01:12", "grad_norm": 0.0063, "loss": 0.0472, "lr": "3.715e-05", "step": 5766, "steps": "23.61s,5766/16595" }, { "epoch": 1.737571557698102, "eta": "71:02:37", "grad_norm": 0.0201, "loss": 0.0581, "lr": "3.714e-05", "step": 5767, "steps": "23.62s,5767/16595" }, { "epoch": 1.737872853269057, "eta": "71:09:26", "grad_norm": 0.012, "loss": 0.0557, "lr": "3.714e-05", "step": 5768, "steps": "23.66s,5768/16595" }, { "epoch": 1.7381741488400122, "eta": "71:19:52", "grad_norm": 0.0051, "loss": 0.0514, "lr": "3.713e-05", "step": 5769, "steps": "23.72s,5769/16595" }, { "epoch": 1.7384754444109671, "eta": "70:48:48", "grad_norm": 0.0062, "loss": 0.0536, "lr": "3.713e-05", "step": 5770, "steps": "23.55s,5770/16595" }, { "epoch": 1.7387767399819223, "eta": "71:15:28", "grad_norm": 0.0056, "loss": 0.0461, "lr": "3.713e-05", "step": 5771, "steps": "23.7s,5771/16595" }, { "epoch": 1.7390780355528772, "eta": "70:28:11", "grad_norm": 0.0048, "loss": 0.051, "lr": "3.712e-05", "step": 5772, "steps": "23.44s,5772/16595" }, { "epoch": 1.7393793311238324, "eta": "70:49:26", "grad_norm": 0.0056, "loss": 0.0439, "lr": "3.712e-05", "step": 5773, "steps": "23.56s,5773/16595" }, { "epoch": 1.7396806266947875, "eta": "71:21:30", "grad_norm": 0.0054, "loss": 0.0509, "lr": "3.711e-05", "step": 5774, "steps": "23.74s,5774/16595" }, { "epoch": 1.7399819222657427, "eta": "70:54:03", "grad_norm": 0.0058, "loss": 0.0576, "lr": "3.711e-05", "step": 5775, "steps": "23.59s,5775/16595" }, { "epoch": 1.7402832178366978, "eta": "70:59:04", "grad_norm": 0.0048, "loss": 0.0451, "lr": "3.711e-05", "step": 5776, "steps": "23.62s,5776/16595" }, { "epoch": 1.740584513407653, "eta": "71:29:20", "grad_norm": 0.0055, "loss": 0.0474, "lr": "3.710e-05", "step": 5777, "steps": "23.79s,5777/16595" }, { "epoch": 1.7408858089786081, "eta": "70:34:51", "grad_norm": 0.0066, "loss": 0.0464, "lr": "3.710e-05", "step": 5778, "steps": "23.49s,5778/16595" }, { "epoch": 1.7411871045495633, "eta": "71:26:44", "grad_norm": 0.0055, "loss": 0.0588, "lr": "3.709e-05", "step": 5779, "steps": "23.78s,5779/16595" }, { "epoch": 1.7414884001205182, "eta": "71:06:31", "grad_norm": 0.005, "loss": 0.057, "lr": "3.709e-05", "step": 5780, "steps": "23.67s,5780/16595" }, { "epoch": 1.7417896956914733, "eta": "71:04:19", "grad_norm": 0.0053, "loss": 0.054, "lr": "3.708e-05", "step": 5781, "steps": "23.66s,5781/16595" }, { "epoch": 1.7420909912624283, "eta": "70:53:06", "grad_norm": 0.0053, "loss": 0.053, "lr": "3.708e-05", "step": 5782, "steps": "23.6s,5782/16595" }, { "epoch": 1.7423922868333834, "eta": "71:41:22", "grad_norm": 0.0049, "loss": 0.0572, "lr": "3.708e-05", "step": 5783, "steps": "23.87s,5783/16595" }, { "epoch": 1.7426935824043386, "eta": "71:10:20", "grad_norm": 0.0047, "loss": 0.0594, "lr": "3.707e-05", "step": 5784, "steps": "23.7s,5784/16595" }, { "epoch": 1.7429948779752937, "eta": "71:06:20", "grad_norm": 0.0047, "loss": 0.0581, "lr": "3.707e-05", "step": 5785, "steps": "23.68s,5785/16595" }, { "epoch": 1.7432961735462489, "eta": "71:52:47", "grad_norm": 0.0064, "loss": 0.0469, "lr": "3.706e-05", "step": 5786, "steps": "23.94s,5786/16595" }, { "epoch": 1.743597469117204, "eta": "72:05:00", "grad_norm": 0.0047, "loss": 0.0541, "lr": "3.706e-05", "step": 5787, "steps": "24.01s,5787/16595" }, { "epoch": 1.7438987646881592, "eta": "70:38:08", "grad_norm": 0.0052, "loss": 0.0488, "lr": "3.706e-05", "step": 5788, "steps": "23.53s,5788/16595" }, { "epoch": 1.7442000602591143, "eta": "71:28:10", "grad_norm": 0.0052, "loss": 0.0486, "lr": "3.705e-05", "step": 5789, "steps": "23.81s,5789/16595" }, { "epoch": 1.7445013558300693, "eta": "70:58:58", "grad_norm": 0.0061, "loss": 0.0585, "lr": "3.705e-05", "step": 5790, "steps": "23.65s,5790/16595" }, { "epoch": 1.7448026514010244, "eta": "71:27:23", "grad_norm": 0.0047, "loss": 0.0519, "lr": "3.704e-05", "step": 5791, "steps": "23.81s,5791/16595" }, { "epoch": 1.7451039469719793, "eta": "71:32:23", "grad_norm": 0.0056, "loss": 0.0538, "lr": "3.704e-05", "step": 5792, "steps": "23.84s,5792/16595" }, { "epoch": 1.7454052425429345, "eta": "71:01:23", "grad_norm": 0.0046, "loss": 0.0486, "lr": "3.703e-05", "step": 5793, "steps": "23.67s,5793/16595" }, { "epoch": 1.7457065381138896, "eta": "70:50:11", "grad_norm": 0.0048, "loss": 0.05, "lr": "3.703e-05", "step": 5794, "steps": "23.61s,5794/16595" }, { "epoch": 1.7460078336848448, "eta": "71:36:36", "grad_norm": 0.0045, "loss": 0.05, "lr": "3.703e-05", "step": 5795, "steps": "23.87s,5795/16595" }, { "epoch": 1.7463091292558, "eta": "71:21:48", "grad_norm": 0.0052, "loss": 0.0518, "lr": "3.702e-05", "step": 5796, "steps": "23.79s,5796/16595" }, { "epoch": 1.746610424826755, "eta": "70:41:48", "grad_norm": 0.0049, "loss": 0.0464, "lr": "3.702e-05", "step": 5797, "steps": "23.57s,5797/16595" }, { "epoch": 1.7469117203977103, "eta": "70:54:01", "grad_norm": 0.0046, "loss": 0.0592, "lr": "3.701e-05", "step": 5798, "steps": "23.64s,5798/16595" }, { "epoch": 1.7472130159686654, "eta": "71:13:25", "grad_norm": 0.0057, "loss": 0.0418, "lr": "3.701e-05", "step": 5799, "steps": "23.75s,5799/16595" }, { "epoch": 1.7475143115396203, "eta": "71:50:48", "grad_norm": 0.0066, "loss": 0.0516, "lr": "3.701e-05", "step": 5800, "steps": "23.96s,5800/16595" }, { "epoch": 1.7478156071105755, "eta": "121:54:44", "grad_norm": 0.0051, "loss": 0.0496, "lr": "3.700e-05", "step": 5801, "steps": "40.66s,5801/16595" }, { "epoch": 1.7481169026815304, "eta": "70:59:38", "grad_norm": 0.0053, "loss": 0.0565, "lr": "3.700e-05", "step": 5802, "steps": "23.68s,5802/16595" }, { "epoch": 1.7484181982524856, "eta": "71:24:25", "grad_norm": 0.0045, "loss": 0.065, "lr": "3.699e-05", "step": 5803, "steps": "23.82s,5803/16595" }, { "epoch": 1.7487194938234407, "eta": "70:39:03", "grad_norm": 0.0049, "loss": 0.0509, "lr": "3.699e-05", "step": 5804, "steps": "23.57s,5804/16595" }, { "epoch": 1.7490207893943959, "eta": "71:11:02", "grad_norm": 0.0046, "loss": 0.0638, "lr": "3.698e-05", "step": 5805, "steps": "23.75s,5805/16595" }, { "epoch": 1.749322084965351, "eta": "70:32:53", "grad_norm": 0.0051, "loss": 0.0622, "lr": "3.698e-05", "step": 5806, "steps": "23.54s,5806/16595" }, { "epoch": 1.7496233805363062, "eta": "70:59:27", "grad_norm": 0.0067, "loss": 0.0428, "lr": "3.698e-05", "step": 5807, "steps": "23.69s,5807/16595" }, { "epoch": 1.7499246761072613, "eta": "70:46:28", "grad_norm": 0.0053, "loss": 0.071, "lr": "3.697e-05", "step": 5808, "steps": "23.62s,5808/16595" }, { "epoch": 1.7502259716782165, "eta": "70:40:41", "grad_norm": 0.0198, "loss": 0.0622, "lr": "3.697e-05", "step": 5809, "steps": "23.59s,5809/16595" }, { "epoch": 1.7505272672491714, "eta": "71:48:36", "grad_norm": 0.0054, "loss": 0.0562, "lr": "3.696e-05", "step": 5810, "steps": "23.97s,5810/16595" }, { "epoch": 1.7508285628201266, "eta": "70:30:55", "grad_norm": 0.0059, "loss": 0.05, "lr": "3.696e-05", "step": 5811, "steps": "23.54s,5811/16595" }, { "epoch": 1.7511298583910817, "eta": "70:50:17", "grad_norm": 0.005, "loss": 0.0485, "lr": "3.696e-05", "step": 5812, "steps": "23.65s,5812/16595" }, { "epoch": 1.7514311539620366, "eta": "71:07:52", "grad_norm": 0.0045, "loss": 0.0336, "lr": "3.695e-05", "step": 5813, "steps": "23.75s,5813/16595" }, { "epoch": 1.7517324495329918, "eta": "71:27:14", "grad_norm": 0.0052, "loss": 0.0455, "lr": "3.695e-05", "step": 5814, "steps": "23.86s,5814/16595" }, { "epoch": 1.752033745103947, "eta": "71:14:16", "grad_norm": 0.0042, "loss": 0.0555, "lr": "3.694e-05", "step": 5815, "steps": "23.79s,5815/16595" }, { "epoch": 1.752335040674902, "eta": "69:38:39", "grad_norm": 0.0051, "loss": 0.0492, "lr": "3.694e-05", "step": 5816, "steps": "23.26s,5816/16595" }, { "epoch": 1.7526363362458572, "eta": "70:23:10", "grad_norm": 0.0049, "loss": 0.0685, "lr": "3.693e-05", "step": 5817, "steps": "23.51s,5817/16595" }, { "epoch": 1.7529376318168124, "eta": "71:18:28", "grad_norm": 0.0047, "loss": 0.0582, "lr": "3.693e-05", "step": 5818, "steps": "23.82s,5818/16595" }, { "epoch": 1.7532389273877675, "eta": "71:21:39", "grad_norm": 0.0051, "loss": 0.051, "lr": "3.693e-05", "step": 5819, "steps": "23.84s,5819/16595" }, { "epoch": 1.7535402229587225, "eta": "69:49:40", "grad_norm": 0.0046, "loss": 0.0604, "lr": "3.692e-05", "step": 5820, "steps": "23.33s,5820/16595" }, { "epoch": 1.7538415185296776, "eta": "70:39:34", "grad_norm": 0.0046, "loss": 0.0446, "lr": "3.692e-05", "step": 5821, "steps": "23.61s,5821/16595" }, { "epoch": 1.7541428141006328, "eta": "71:07:54", "grad_norm": 0.0041, "loss": 0.0562, "lr": "3.691e-05", "step": 5822, "steps": "23.77s,5822/16595" }, { "epoch": 1.7544441096715877, "eta": "71:38:01", "grad_norm": 0.0051, "loss": 0.0515, "lr": "3.691e-05", "step": 5823, "steps": "23.94s,5823/16595" }, { "epoch": 1.7547454052425429, "eta": "70:58:08", "grad_norm": 0.0077, "loss": 0.0546, "lr": "3.690e-05", "step": 5824, "steps": "23.72s,5824/16595" }, { "epoch": 1.755046700813498, "eta": "71:01:19", "grad_norm": 0.006, "loss": 0.0445, "lr": "3.690e-05", "step": 5825, "steps": "23.74s,5825/16595" }, { "epoch": 1.7553479963844532, "eta": "70:41:11", "grad_norm": 0.0065, "loss": 0.0657, "lr": "3.690e-05", "step": 5826, "steps": "23.63s,5826/16595" }, { "epoch": 1.7556492919554083, "eta": "71:13:06", "grad_norm": 0.0043, "loss": 0.0621, "lr": "3.689e-05", "step": 5827, "steps": "23.81s,5827/16595" }, { "epoch": 1.7559505875263635, "eta": "71:01:56", "grad_norm": 0.007, "loss": 0.0588, "lr": "3.689e-05", "step": 5828, "steps": "23.75s,5828/16595" }, { "epoch": 1.7562518830973186, "eta": "70:48:58", "grad_norm": 0.0046, "loss": 0.0459, "lr": "3.688e-05", "step": 5829, "steps": "23.68s,5829/16595" }, { "epoch": 1.7565531786682735, "eta": "71:04:44", "grad_norm": 0.0055, "loss": 0.063, "lr": "3.688e-05", "step": 5830, "steps": "23.77s,5830/16595" }, { "epoch": 1.7568544742392287, "eta": "71:31:14", "grad_norm": 0.0044, "loss": 0.0461, "lr": "3.688e-05", "step": 5831, "steps": "23.92s,5831/16595" }, { "epoch": 1.7571557698101838, "eta": "70:47:47", "grad_norm": 0.0059, "loss": 0.0646, "lr": "3.687e-05", "step": 5832, "steps": "23.68s,5832/16595" }, { "epoch": 1.7574570653811388, "eta": "70:07:56", "grad_norm": 0.0051, "loss": 0.0495, "lr": "3.687e-05", "step": 5833, "steps": "23.46s,5833/16595" }, { "epoch": 1.757758360952094, "eta": "71:28:15", "grad_norm": 0.0056, "loss": 0.0641, "lr": "3.686e-05", "step": 5834, "steps": "23.91s,5834/16595" }, { "epoch": 1.758059656523049, "eta": "71:13:30", "grad_norm": 0.0049, "loss": 0.053, "lr": "3.686e-05", "step": 5835, "steps": "23.83s,5835/16595" }, { "epoch": 1.7583609520940042, "eta": "70:55:11", "grad_norm": 0.0043, "loss": 0.0537, "lr": "3.685e-05", "step": 5836, "steps": "23.73s,5836/16595" }, { "epoch": 1.7586622476649594, "eta": "71:28:51", "grad_norm": 0.0047, "loss": 0.0495, "lr": "3.685e-05", "step": 5837, "steps": "23.92s,5837/16595" }, { "epoch": 1.7589635432359145, "eta": "71:05:09", "grad_norm": 0.0051, "loss": 0.0521, "lr": "3.685e-05", "step": 5838, "steps": "23.79s,5838/16595" }, { "epoch": 1.7592648388068697, "eta": "71:26:15", "grad_norm": 0.0048, "loss": 0.0584, "lr": "3.684e-05", "step": 5839, "steps": "23.91s,5839/16595" }, { "epoch": 1.7595661343778246, "eta": "71:18:41", "grad_norm": 0.0046, "loss": 0.0388, "lr": "3.684e-05", "step": 5840, "steps": "23.87s,5840/16595" }, { "epoch": 1.7598674299487798, "eta": "71:12:55", "grad_norm": 0.006, "loss": 0.0468, "lr": "3.683e-05", "step": 5841, "steps": "23.84s,5841/16595" }, { "epoch": 1.760168725519735, "eta": "70:56:23", "grad_norm": 0.0045, "loss": 0.039, "lr": "3.683e-05", "step": 5842, "steps": "23.75s,5842/16595" }, { "epoch": 1.7604700210906898, "eta": "71:53:20", "grad_norm": 0.0051, "loss": 0.0662, "lr": "3.682e-05", "step": 5843, "steps": "24.07s,5843/16595" }, { "epoch": 1.760771316661645, "eta": "71:04:33", "grad_norm": 0.0054, "loss": 0.0594, "lr": "3.682e-05", "step": 5844, "steps": "23.8s,5844/16595" }, { "epoch": 1.7610726122326001, "eta": "70:39:05", "grad_norm": 0.0063, "loss": 0.0493, "lr": "3.682e-05", "step": 5845, "steps": "23.66s,5845/16595" }, { "epoch": 1.7613739078035553, "eta": "71:14:31", "grad_norm": 0.0065, "loss": 0.0537, "lr": "3.681e-05", "step": 5846, "steps": "23.86s,5846/16595" }, { "epoch": 1.7616752033745104, "eta": "71:01:34", "grad_norm": 0.0047, "loss": 0.0497, "lr": "3.681e-05", "step": 5847, "steps": "23.79s,5847/16595" }, { "epoch": 1.7619764989454656, "eta": "69:24:27", "grad_norm": 0.0062, "loss": 0.0602, "lr": "3.680e-05", "step": 5848, "steps": "23.25s,5848/16595" }, { "epoch": 1.7622777945164207, "eta": "70:33:55", "grad_norm": 0.0083, "loss": 0.0352, "lr": "3.680e-05", "step": 5849, "steps": "23.64s,5849/16595" }, { "epoch": 1.7625790900873757, "eta": "70:03:05", "grad_norm": 0.0049, "loss": 0.0574, "lr": "3.680e-05", "step": 5850, "steps": "23.47s,5850/16595" }, { "epoch": 1.7628803856583308, "eta": "70:24:10", "grad_norm": 0.005, "loss": 0.0556, "lr": "3.679e-05", "step": 5851, "steps": "23.59s,5851/16595" }, { "epoch": 1.763181681229286, "eta": "71:19:17", "grad_norm": 0.0072, "loss": 0.0483, "lr": "3.679e-05", "step": 5852, "steps": "23.9s,5852/16595" }, { "epoch": 1.763482976800241, "eta": "69:40:25", "grad_norm": 0.0047, "loss": 0.0381, "lr": "3.678e-05", "step": 5853, "steps": "23.35s,5853/16595" }, { "epoch": 1.763784272371196, "eta": "70:46:16", "grad_norm": 0.0065, "loss": 0.0451, "lr": "3.678e-05", "step": 5854, "steps": "23.72s,5854/16595" }, { "epoch": 1.7640855679421512, "eta": "70:51:15", "grad_norm": 0.0063, "loss": 0.0596, "lr": "3.677e-05", "step": 5855, "steps": "23.75s,5855/16595" }, { "epoch": 1.7643868635131064, "eta": "69:33:53", "grad_norm": 0.005, "loss": 0.0514, "lr": "3.677e-05", "step": 5856, "steps": "23.32s,5856/16595" }, { "epoch": 1.7646881590840615, "eta": "70:11:05", "grad_norm": 0.0075, "loss": 0.0528, "lr": "3.677e-05", "step": 5857, "steps": "23.53s,5857/16595" }, { "epoch": 1.7649894546550167, "eta": "71:29:25", "grad_norm": 0.0059, "loss": 0.0523, "lr": "3.676e-05", "step": 5858, "steps": "23.97s,5858/16595" }, { "epoch": 1.7652907502259718, "eta": "71:07:33", "grad_norm": 0.0054, "loss": 0.0553, "lr": "3.676e-05", "step": 5859, "steps": "23.85s,5859/16595" }, { "epoch": 1.7655920457969267, "eta": "71:00:00", "grad_norm": 0.0079, "loss": 0.0531, "lr": "3.675e-05", "step": 5860, "steps": "23.81s,5860/16595" }, { "epoch": 1.765893341367882, "eta": "70:50:39", "grad_norm": 0.0054, "loss": 0.0582, "lr": "3.675e-05", "step": 5861, "steps": "23.76s,5861/16595" }, { "epoch": 1.766194636938837, "eta": "70:32:22", "grad_norm": 0.0066, "loss": 0.0558, "lr": "3.674e-05", "step": 5862, "steps": "23.66s,5862/16595" }, { "epoch": 1.766495932509792, "eta": "70:24:49", "grad_norm": 0.0052, "loss": 0.0391, "lr": "3.674e-05", "step": 5863, "steps": "23.62s,5863/16595" }, { "epoch": 1.7667972280807471, "eta": "71:03:47", "grad_norm": 0.0044, "loss": 0.0539, "lr": "3.674e-05", "step": 5864, "steps": "23.84s,5864/16595" }, { "epoch": 1.7670985236517023, "eta": "71:21:16", "grad_norm": 0.0063, "loss": 0.0579, "lr": "3.673e-05", "step": 5865, "steps": "23.94s,5865/16595" }, { "epoch": 1.7673998192226574, "eta": "69:49:40", "grad_norm": 0.0071, "loss": 0.0486, "lr": "3.673e-05", "step": 5866, "steps": "23.43s,5866/16595" }, { "epoch": 1.7677011147936126, "eta": "71:32:59", "grad_norm": 0.0055, "loss": 0.0509, "lr": "3.672e-05", "step": 5867, "steps": "24.01s,5867/16595" }, { "epoch": 1.7680024103645677, "eta": "69:36:22", "grad_norm": 0.0075, "loss": 0.0454, "lr": "3.672e-05", "step": 5868, "steps": "23.36s,5868/16595" }, { "epoch": 1.7683037059355229, "eta": "69:39:33", "grad_norm": 0.0046, "loss": 0.0665, "lr": "3.672e-05", "step": 5869, "steps": "23.38s,5869/16595" }, { "epoch": 1.7686050015064778, "eta": "70:23:51", "grad_norm": 0.0051, "loss": 0.0469, "lr": "3.671e-05", "step": 5870, "steps": "23.63s,5870/16595" }, { "epoch": 1.768906297077433, "eta": "70:50:16", "grad_norm": 0.0052, "loss": 0.0453, "lr": "3.671e-05", "step": 5871, "steps": "23.78s,5871/16595" }, { "epoch": 1.7692075926483881, "eta": "70:46:18", "grad_norm": 0.0175, "loss": 0.0269, "lr": "3.670e-05", "step": 5872, "steps": "23.76s,5872/16595" }, { "epoch": 1.769508888219343, "eta": "70:44:07", "grad_norm": 0.0046, "loss": 0.0634, "lr": "3.670e-05", "step": 5873, "steps": "23.75s,5873/16595" }, { "epoch": 1.7698101837902982, "eta": "70:11:33", "grad_norm": 0.0057, "loss": 0.053, "lr": "3.669e-05", "step": 5874, "steps": "23.57s,5874/16595" }, { "epoch": 1.7701114793612533, "eta": "70:20:06", "grad_norm": 0.0047, "loss": 0.0487, "lr": "3.669e-05", "step": 5875, "steps": "23.62s,5875/16595" }, { "epoch": 1.7704127749322085, "eta": "70:19:42", "grad_norm": 0.0043, "loss": 0.0719, "lr": "3.669e-05", "step": 5876, "steps": "23.62s,5876/16595" }, { "epoch": 1.7707140705031637, "eta": "70:05:01", "grad_norm": 0.006, "loss": 0.0581, "lr": "3.668e-05", "step": 5877, "steps": "23.54s,5877/16595" }, { "epoch": 1.7710153660741188, "eta": "70:43:55", "grad_norm": 0.0056, "loss": 0.0602, "lr": "3.668e-05", "step": 5878, "steps": "23.76s,5878/16595" }, { "epoch": 1.771316661645074, "eta": "70:14:57", "grad_norm": 0.005, "loss": 0.0393, "lr": "3.667e-05", "step": 5879, "steps": "23.6s,5879/16595" }, { "epoch": 1.7716179572160289, "eta": "70:07:25", "grad_norm": 0.0061, "loss": 0.043, "lr": "3.667e-05", "step": 5880, "steps": "23.56s,5880/16595" }, { "epoch": 1.771919252786984, "eta": "70:40:57", "grad_norm": 0.0056, "loss": 0.0455, "lr": "3.666e-05", "step": 5881, "steps": "23.75s,5881/16595" }, { "epoch": 1.7722205483579392, "eta": "70:40:33", "grad_norm": 0.0053, "loss": 0.0405, "lr": "3.666e-05", "step": 5882, "steps": "23.75s,5882/16595" }, { "epoch": 1.7725218439288941, "eta": "70:52:39", "grad_norm": 0.0047, "loss": 0.0507, "lr": "3.666e-05", "step": 5883, "steps": "23.82s,5883/16595" }, { "epoch": 1.7728231394998493, "eta": "70:50:28", "grad_norm": 0.005, "loss": 0.0521, "lr": "3.665e-05", "step": 5884, "steps": "23.81s,5884/16595" }, { "epoch": 1.7731244350708044, "eta": "70:03:40", "grad_norm": 0.0053, "loss": 0.0644, "lr": "3.665e-05", "step": 5885, "steps": "23.55s,5885/16595" }, { "epoch": 1.7734257306417596, "eta": "70:19:20", "grad_norm": 0.0061, "loss": 0.0389, "lr": "3.664e-05", "step": 5886, "steps": "23.64s,5886/16595" }, { "epoch": 1.7737270262127147, "eta": "69:53:58", "grad_norm": 0.0072, "loss": 0.0377, "lr": "3.664e-05", "step": 5887, "steps": "23.5s,5887/16595" }, { "epoch": 1.7740283217836699, "eta": "69:39:17", "grad_norm": 0.006, "loss": 0.0598, "lr": "3.664e-05", "step": 5888, "steps": "23.42s,5888/16595" }, { "epoch": 1.774329617354625, "eta": "70:44:55", "grad_norm": 0.0065, "loss": 0.031, "lr": "3.663e-05", "step": 5889, "steps": "23.79s,5889/16595" }, { "epoch": 1.77463091292558, "eta": "70:14:12", "grad_norm": 0.0053, "loss": 0.0488, "lr": "3.663e-05", "step": 5890, "steps": "23.62s,5890/16595" }, { "epoch": 1.774932208496535, "eta": "70:19:09", "grad_norm": 0.0044, "loss": 0.0488, "lr": "3.662e-05", "step": 5891, "steps": "23.65s,5891/16595" }, { "epoch": 1.7752335040674903, "eta": "70:24:07", "grad_norm": 0.0067, "loss": 0.0518, "lr": "3.662e-05", "step": 5892, "steps": "23.68s,5892/16595" }, { "epoch": 1.7755347996384452, "eta": "69:49:49", "grad_norm": 0.006, "loss": 0.0626, "lr": "3.661e-05", "step": 5893, "steps": "23.49s,5893/16595" }, { "epoch": 1.7758360952094003, "eta": "70:30:27", "grad_norm": 0.0047, "loss": 0.0574, "lr": "3.661e-05", "step": 5894, "steps": "23.72s,5894/16595" }, { "epoch": 1.7761373907803555, "eta": "70:28:17", "grad_norm": 0.0054, "loss": 0.0614, "lr": "3.661e-05", "step": 5895, "steps": "23.71s,5895/16595" }, { "epoch": 1.7764386863513106, "eta": "70:38:35", "grad_norm": 0.0049, "loss": 0.0494, "lr": "3.660e-05", "step": 5896, "steps": "23.77s,5896/16595" }, { "epoch": 1.7767399819222658, "eta": "70:11:26", "grad_norm": 0.0052, "loss": 0.0438, "lr": "3.660e-05", "step": 5897, "steps": "23.62s,5897/16595" }, { "epoch": 1.777041277493221, "eta": "69:40:44", "grad_norm": 0.0053, "loss": 0.0486, "lr": "3.659e-05", "step": 5898, "steps": "23.45s,5898/16595" }, { "epoch": 1.777342573064176, "eta": "71:05:55", "grad_norm": 0.0061, "loss": 0.0468, "lr": "3.659e-05", "step": 5899, "steps": "23.93s,5899/16595" }, { "epoch": 1.777643868635131, "eta": "70:04:55", "grad_norm": 0.0077, "loss": 0.0393, "lr": "3.658e-05", "step": 5900, "steps": "23.59s,5900/16595" }, { "epoch": 1.7779451642060862, "eta": "70:43:44", "grad_norm": 0.0052, "loss": 0.0488, "lr": "3.658e-05", "step": 5901, "steps": "23.81s,5901/16595" }, { "epoch": 1.7782464597770413, "eta": "70:59:22", "grad_norm": 0.0057, "loss": 0.0582, "lr": "3.658e-05", "step": 5902, "steps": "23.9s,5902/16595" }, { "epoch": 1.7785477553479963, "eta": "70:32:15", "grad_norm": 0.0043, "loss": 0.0454, "lr": "3.657e-05", "step": 5903, "steps": "23.75s,5903/16595" }, { "epoch": 1.7788490509189514, "eta": "70:19:22", "grad_norm": 0.0067, "loss": 0.0405, "lr": "3.657e-05", "step": 5904, "steps": "23.68s,5904/16595" }, { "epoch": 1.7791503464899066, "eta": "70:27:53", "grad_norm": 0.0057, "loss": 0.0558, "lr": "3.656e-05", "step": 5905, "steps": "23.73s,5905/16595" }, { "epoch": 1.7794516420608617, "eta": "69:21:35", "grad_norm": 0.006, "loss": 0.0536, "lr": "3.656e-05", "step": 5906, "steps": "23.36s,5906/16595" }, { "epoch": 1.7797529376318169, "eta": "70:39:34", "grad_norm": 0.0056, "loss": 0.0414, "lr": "3.655e-05", "step": 5907, "steps": "23.8s,5907/16595" }, { "epoch": 1.780054233202772, "eta": "70:07:06", "grad_norm": 0.0052, "loss": 0.0387, "lr": "3.655e-05", "step": 5908, "steps": "23.62s,5908/16595" }, { "epoch": 1.7803555287737272, "eta": "70:26:18", "grad_norm": 0.0059, "loss": 0.0514, "lr": "3.655e-05", "step": 5909, "steps": "23.73s,5909/16595" }, { "epoch": 1.780656824344682, "eta": "70:08:06", "grad_norm": 0.005, "loss": 0.0719, "lr": "3.654e-05", "step": 5910, "steps": "23.63s,5910/16595" }, { "epoch": 1.7809581199156372, "eta": "70:34:25", "grad_norm": 0.0063, "loss": 0.0673, "lr": "3.654e-05", "step": 5911, "steps": "23.78s,5911/16595" }, { "epoch": 1.7812594154865924, "eta": "70:05:32", "grad_norm": 0.0072, "loss": 0.0487, "lr": "3.653e-05", "step": 5912, "steps": "23.62s,5912/16595" }, { "epoch": 1.7815607110575473, "eta": "71:03:53", "grad_norm": 0.0047, "loss": 0.0642, "lr": "3.653e-05", "step": 5913, "steps": "23.95s,5913/16595" }, { "epoch": 1.7818620066285025, "eta": "69:52:17", "grad_norm": 0.0056, "loss": 0.0433, "lr": "3.653e-05", "step": 5914, "steps": "23.55s,5914/16595" }, { "epoch": 1.7821633021994576, "eta": "70:43:31", "grad_norm": 0.0064, "loss": 0.0721, "lr": "3.652e-05", "step": 5915, "steps": "23.84s,5915/16595" }, { "epoch": 1.7824645977704128, "eta": "70:34:13", "grad_norm": 0.0057, "loss": 0.0467, "lr": "3.652e-05", "step": 5916, "steps": "23.79s,5916/16595" }, { "epoch": 1.782765893341368, "eta": "71:09:25", "grad_norm": 0.0058, "loss": 0.0469, "lr": "3.651e-05", "step": 5917, "steps": "23.99s,5917/16595" }, { "epoch": 1.783067188912323, "eta": "71:26:48", "grad_norm": 0.0082, "loss": 0.0571, "lr": "3.651e-05", "step": 5918, "steps": "24.09s,5918/16595" }, { "epoch": 1.7833684844832782, "eta": "70:25:55", "grad_norm": 0.007, "loss": 0.0488, "lr": "3.650e-05", "step": 5919, "steps": "23.75s,5919/16595" }, { "epoch": 1.7836697800542332, "eta": "70:25:31", "grad_norm": 0.0055, "loss": 0.0461, "lr": "3.650e-05", "step": 5920, "steps": "23.75s,5920/16595" }, { "epoch": 1.7839710756251883, "eta": "69:40:39", "grad_norm": 0.0056, "loss": 0.047, "lr": "3.650e-05", "step": 5921, "steps": "23.5s,5921/16595" }, { "epoch": 1.7842723711961435, "eta": "70:06:56", "grad_norm": 0.0058, "loss": 0.0369, "lr": "3.649e-05", "step": 5922, "steps": "23.65s,5922/16595" }, { "epoch": 1.7845736667670984, "eta": "70:22:33", "grad_norm": 0.007, "loss": 0.045, "lr": "3.649e-05", "step": 5923, "steps": "23.74s,5923/16595" }, { "epoch": 1.7848749623380535, "eta": "69:53:42", "grad_norm": 0.0049, "loss": 0.0494, "lr": "3.648e-05", "step": 5924, "steps": "23.58s,5924/16595" }, { "epoch": 1.7851762579090087, "eta": "70:12:52", "grad_norm": 0.0043, "loss": 0.0464, "lr": "3.648e-05", "step": 5925, "steps": "23.69s,5925/16595" }, { "epoch": 1.7854775534799638, "eta": "71:18:16", "grad_norm": 0.0055, "loss": 0.0508, "lr": "3.647e-05", "step": 5926, "steps": "24.06s,5926/16595" }, { "epoch": 1.785778849050919, "eta": "70:06:44", "grad_norm": 0.005, "loss": 0.0509, "lr": "3.647e-05", "step": 5927, "steps": "23.66s,5927/16595" }, { "epoch": 1.7860801446218741, "eta": "70:04:34", "grad_norm": 0.0051, "loss": 0.051, "lr": "3.647e-05", "step": 5928, "steps": "23.65s,5928/16595" }, { "epoch": 1.7863814401928293, "eta": "70:07:44", "grad_norm": 0.0057, "loss": 0.0386, "lr": "3.646e-05", "step": 5929, "steps": "23.67s,5929/16595" }, { "epoch": 1.7866827357637842, "eta": "70:18:00", "grad_norm": 0.0065, "loss": 0.0425, "lr": "3.646e-05", "step": 5930, "steps": "23.73s,5930/16595" }, { "epoch": 1.7869840313347394, "eta": "70:47:49", "grad_norm": 0.0045, "loss": 0.0437, "lr": "3.645e-05", "step": 5931, "steps": "23.9s,5931/16595" }, { "epoch": 1.7872853269056945, "eta": "70:26:06", "grad_norm": 0.0048, "loss": 0.0522, "lr": "3.645e-05", "step": 5932, "steps": "23.78s,5932/16595" }, { "epoch": 1.7875866224766495, "eta": "69:21:44", "grad_norm": 0.0053, "loss": 0.0469, "lr": "3.644e-05", "step": 5933, "steps": "23.42s,5933/16595" }, { "epoch": 1.7878879180476046, "eta": "70:14:38", "grad_norm": 0.0053, "loss": 0.0599, "lr": "3.644e-05", "step": 5934, "steps": "23.72s,5934/16595" }, { "epoch": 1.7881892136185598, "eta": "71:09:19", "grad_norm": 0.0053, "loss": 0.0491, "lr": "3.644e-05", "step": 5935, "steps": "24.03s,5935/16595" }, { "epoch": 1.788490509189515, "eta": "71:01:49", "grad_norm": 0.0054, "loss": 0.054, "lr": "3.643e-05", "step": 5936, "steps": "23.99s,5936/16595" }, { "epoch": 1.78879180476047, "eta": "69:37:56", "grad_norm": 0.0053, "loss": 0.0438, "lr": "3.643e-05", "step": 5937, "steps": "23.52s,5937/16595" }, { "epoch": 1.7890931003314252, "eta": "70:30:49", "grad_norm": 0.005, "loss": 0.0505, "lr": "3.642e-05", "step": 5938, "steps": "23.82s,5938/16595" }, { "epoch": 1.7893943959023804, "eta": "70:12:40", "grad_norm": 0.0054, "loss": 0.0425, "lr": "3.642e-05", "step": 5939, "steps": "23.72s,5939/16595" }, { "epoch": 1.7896956914733353, "eta": "69:49:11", "grad_norm": 0.0051, "loss": 0.0472, "lr": "3.641e-05", "step": 5940, "steps": "23.59s,5940/16595" }, { "epoch": 1.7899969870442904, "eta": "70:04:46", "grad_norm": 0.0054, "loss": 0.0401, "lr": "3.641e-05", "step": 5941, "steps": "23.68s,5941/16595" }, { "epoch": 1.7902982826152456, "eta": "69:59:03", "grad_norm": 0.0053, "loss": 0.0418, "lr": "3.641e-05", "step": 5942, "steps": "23.65s,5942/16595" }, { "epoch": 1.7905995781862005, "eta": "69:16:03", "grad_norm": 0.0059, "loss": 0.0557, "lr": "3.640e-05", "step": 5943, "steps": "23.41s,5943/16595" }, { "epoch": 1.7909008737571557, "eta": "69:38:44", "grad_norm": 0.0048, "loss": 0.0612, "lr": "3.640e-05", "step": 5944, "steps": "23.54s,5944/16595" }, { "epoch": 1.7912021693281108, "eta": "70:45:48", "grad_norm": 0.005, "loss": 0.0427, "lr": "3.639e-05", "step": 5945, "steps": "23.92s,5945/16595" }, { "epoch": 1.791503464899066, "eta": "71:03:08", "grad_norm": 0.0042, "loss": 0.0503, "lr": "3.639e-05", "step": 5946, "steps": "24.02s,5946/16595" }, { "epoch": 1.7918047604700211, "eta": "70:05:57", "grad_norm": 0.0059, "loss": 0.0595, "lr": "3.639e-05", "step": 5947, "steps": "23.7s,5947/16595" }, { "epoch": 1.7921060560409763, "eta": "69:47:49", "grad_norm": 0.0059, "loss": 0.0606, "lr": "3.638e-05", "step": 5948, "steps": "23.6s,5948/16595" }, { "epoch": 1.7924073516119314, "eta": "70:15:48", "grad_norm": 0.0065, "loss": 0.0584, "lr": "3.638e-05", "step": 5949, "steps": "23.76s,5949/16595" }, { "epoch": 1.7927086471828864, "eta": "70:24:17", "grad_norm": 0.0053, "loss": 0.0545, "lr": "3.637e-05", "step": 5950, "steps": "23.81s,5950/16595" }, { "epoch": 1.7930099427538415, "eta": "69:50:11", "grad_norm": 0.006, "loss": 0.0531, "lr": "3.637e-05", "step": 5951, "steps": "23.62s,5951/16595" }, { "epoch": 1.7933112383247967, "eta": "71:04:17", "grad_norm": 0.005, "loss": 0.0595, "lr": "3.636e-05", "step": 5952, "steps": "24.04s,5952/16595" }, { "epoch": 1.7936125338957516, "eta": "70:46:09", "grad_norm": 0.0039, "loss": 0.0615, "lr": "3.636e-05", "step": 5953, "steps": "23.94s,5953/16595" }, { "epoch": 1.7939138294667067, "eta": "70:31:34", "grad_norm": 0.0046, "loss": 0.055, "lr": "3.636e-05", "step": 5954, "steps": "23.86s,5954/16595" }, { "epoch": 1.794215125037662, "eta": "71:36:47", "grad_norm": 0.0066, "loss": 0.0498, "lr": "3.635e-05", "step": 5955, "steps": "24.23s,5955/16595" }, { "epoch": 1.794516420608617, "eta": "71:06:14", "grad_norm": 0.0048, "loss": 0.0386, "lr": "3.635e-05", "step": 5956, "steps": "24.06s,5956/16595" }, { "epoch": 1.7948177161795722, "eta": "70:17:58", "grad_norm": 0.0047, "loss": 0.0566, "lr": "3.634e-05", "step": 5957, "steps": "23.79s,5957/16595" }, { "epoch": 1.7951190117505273, "eta": "70:38:50", "grad_norm": 0.0046, "loss": 0.0541, "lr": "3.634e-05", "step": 5958, "steps": "23.91s,5958/16595" }, { "epoch": 1.7954203073214825, "eta": "70:06:32", "grad_norm": 0.0132, "loss": 0.0558, "lr": "3.633e-05", "step": 5959, "steps": "23.73s,5959/16595" }, { "epoch": 1.7957216028924374, "eta": "70:43:21", "grad_norm": 0.0057, "loss": 0.0532, "lr": "3.633e-05", "step": 5960, "steps": "23.94s,5960/16595" }, { "epoch": 1.7960228984633926, "eta": "70:23:28", "grad_norm": 0.0053, "loss": 0.0487, "lr": "3.633e-05", "step": 5961, "steps": "23.83s,5961/16595" }, { "epoch": 1.7963241940343477, "eta": "70:10:40", "grad_norm": 0.0042, "loss": 0.0346, "lr": "3.632e-05", "step": 5962, "steps": "23.76s,5962/16595" }, { "epoch": 1.7966254896053027, "eta": "70:19:07", "grad_norm": 0.0046, "loss": 0.0488, "lr": "3.632e-05", "step": 5963, "steps": "23.81s,5963/16595" }, { "epoch": 1.7969267851762578, "eta": "69:25:34", "grad_norm": 0.0053, "loss": 0.0532, "lr": "3.631e-05", "step": 5964, "steps": "23.51s,5964/16595" }, { "epoch": 1.797228080747213, "eta": "69:32:16", "grad_norm": 0.0049, "loss": 0.045, "lr": "3.631e-05", "step": 5965, "steps": "23.55s,5965/16595" }, { "epoch": 1.7975293763181681, "eta": "70:21:29", "grad_norm": 0.0065, "loss": 0.0548, "lr": "3.630e-05", "step": 5966, "steps": "23.83s,5966/16595" }, { "epoch": 1.7978306718891233, "eta": "69:54:31", "grad_norm": 0.0056, "loss": 0.0543, "lr": "3.630e-05", "step": 5967, "steps": "23.68s,5967/16595" }, { "epoch": 1.7981319674600784, "eta": "70:01:12", "grad_norm": 0.0052, "loss": 0.0429, "lr": "3.630e-05", "step": 5968, "steps": "23.72s,5968/16595" }, { "epoch": 1.7984332630310336, "eta": "70:11:26", "grad_norm": 0.0048, "loss": 0.0628, "lr": "3.629e-05", "step": 5969, "steps": "23.78s,5969/16595" }, { "epoch": 1.7987345586019885, "eta": "70:11:02", "grad_norm": 0.0055, "loss": 0.0334, "lr": "3.629e-05", "step": 5970, "steps": "23.78s,5970/16595" }, { "epoch": 1.7990358541729436, "eta": "69:47:37", "grad_norm": 0.0054, "loss": 0.0493, "lr": "3.628e-05", "step": 5971, "steps": "23.65s,5971/16595" }, { "epoch": 1.7993371497438988, "eta": "70:27:57", "grad_norm": 0.0052, "loss": 0.0469, "lr": "3.628e-05", "step": 5972, "steps": "23.88s,5972/16595" }, { "epoch": 1.7996384453148537, "eta": "70:24:00", "grad_norm": 0.0041, "loss": 0.0502, "lr": "3.627e-05", "step": 5973, "steps": "23.86s,5973/16595" }, { "epoch": 1.7999397408858089, "eta": "69:48:12", "grad_norm": 0.0052, "loss": 0.0515, "lr": "3.627e-05", "step": 5974, "steps": "23.66s,5974/16595" }, { "epoch": 1.800241036456764, "eta": "70:07:17", "grad_norm": 0.0075, "loss": 0.0533, "lr": "3.627e-05", "step": 5975, "steps": "23.77s,5975/16595" }, { "epoch": 1.8005423320277192, "eta": "70:24:35", "grad_norm": 0.0058, "loss": 0.0384, "lr": "3.626e-05", "step": 5976, "steps": "23.87s,5976/16595" }, { "epoch": 1.8008436275986743, "eta": "69:48:48", "grad_norm": 0.0052, "loss": 0.0554, "lr": "3.626e-05", "step": 5977, "steps": "23.67s,5977/16595" }, { "epoch": 1.8011449231696295, "eta": "70:14:56", "grad_norm": 0.0042, "loss": 0.0445, "lr": "3.625e-05", "step": 5978, "steps": "23.82s,5978/16595" }, { "epoch": 1.8014462187405846, "eta": "70:14:33", "grad_norm": 0.0038, "loss": 0.0504, "lr": "3.625e-05", "step": 5979, "steps": "23.82s,5979/16595" }, { "epoch": 1.8017475143115396, "eta": "69:10:27", "grad_norm": 0.0044, "loss": 0.0617, "lr": "3.624e-05", "step": 5980, "steps": "23.46s,5980/16595" }, { "epoch": 1.8020488098824947, "eta": "70:04:54", "grad_norm": 0.0046, "loss": 0.0461, "lr": "3.624e-05", "step": 5981, "steps": "23.77s,5981/16595" }, { "epoch": 1.8023501054534499, "eta": "69:36:12", "grad_norm": 0.0048, "loss": 0.0449, "lr": "3.624e-05", "step": 5982, "steps": "23.61s,5982/16595" }, { "epoch": 1.8026514010244048, "eta": "70:20:02", "grad_norm": 0.0055, "loss": 0.0475, "lr": "3.623e-05", "step": 5983, "steps": "23.86s,5983/16595" }, { "epoch": 1.80295269659536, "eta": "70:39:05", "grad_norm": 0.0058, "loss": 0.0475, "lr": "3.623e-05", "step": 5984, "steps": "23.97s,5984/16595" }, { "epoch": 1.803253992166315, "eta": "69:40:20", "grad_norm": 0.007, "loss": 0.0573, "lr": "3.622e-05", "step": 5985, "steps": "23.64s,5985/16595" }, { "epoch": 1.8035552877372703, "eta": "69:25:48", "grad_norm": 0.0051, "loss": 0.0407, "lr": "3.622e-05", "step": 5986, "steps": "23.56s,5986/16595" }, { "epoch": 1.8038565833082254, "eta": "69:23:38", "grad_norm": 0.0067, "loss": 0.0554, "lr": "3.621e-05", "step": 5987, "steps": "23.55s,5987/16595" }, { "epoch": 1.8041578788791806, "eta": "69:17:56", "grad_norm": 0.0078, "loss": 0.0492, "lr": "3.621e-05", "step": 5988, "steps": "23.52s,5988/16595" }, { "epoch": 1.8044591744501357, "eta": "70:21:11", "grad_norm": 0.0058, "loss": 0.0597, "lr": "3.621e-05", "step": 5989, "steps": "23.88s,5989/16595" }, { "epoch": 1.8047604700210906, "eta": "70:06:39", "grad_norm": 0.0046, "loss": 0.0468, "lr": "3.620e-05", "step": 5990, "steps": "23.8s,5990/16595" }, { "epoch": 1.8050617655920458, "eta": "69:57:25", "grad_norm": 0.0058, "loss": 0.0456, "lr": "3.620e-05", "step": 5991, "steps": "23.75s,5991/16595" }, { "epoch": 1.805363061163001, "eta": "68:28:39", "grad_norm": 0.0057, "loss": 0.0632, "lr": "3.619e-05", "step": 5992, "steps": "23.25s,5992/16595" }, { "epoch": 1.8056643567339559, "eta": "70:31:57", "grad_norm": 0.0052, "loss": 0.0737, "lr": "3.619e-05", "step": 5993, "steps": "23.95s,5993/16595" }, { "epoch": 1.805965652304911, "eta": "69:56:13", "grad_norm": 0.0082, "loss": 0.0351, "lr": "3.618e-05", "step": 5994, "steps": "23.75s,5994/16595" }, { "epoch": 1.8062669478758662, "eta": "69:34:38", "grad_norm": 0.0048, "loss": 0.0603, "lr": "3.618e-05", "step": 5995, "steps": "23.63s,5995/16595" }, { "epoch": 1.8065682434468213, "eta": "70:00:44", "grad_norm": 0.0047, "loss": 0.0294, "lr": "3.618e-05", "step": 5996, "steps": "23.78s,5996/16595" }, { "epoch": 1.8068695390177765, "eta": "69:28:32", "grad_norm": 0.0052, "loss": 0.0467, "lr": "3.617e-05", "step": 5997, "steps": "23.6s,5997/16595" }, { "epoch": 1.8071708345887316, "eta": "68:49:17", "grad_norm": 0.0056, "loss": 0.0533, "lr": "3.617e-05", "step": 5998, "steps": "23.38s,5998/16595" }, { "epoch": 1.8074721301596868, "eta": "69:27:45", "grad_norm": 0.0054, "loss": 0.057, "lr": "3.616e-05", "step": 5999, "steps": "23.6s,5999/16595" }, { "epoch": 1.807773425730642, "eta": "69:09:42", "grad_norm": 0.0067, "loss": 0.0588, "lr": "3.616e-05", "step": 6000, "steps": "23.5s,6000/16595" }, { "epoch": 1.8080747213015969, "eta": "144:59:26", "grad_norm": 0.006, "loss": 0.0501, "lr": "3.615e-05", "step": 6001, "steps": "49.27s,6001/16595" }, { "epoch": 1.808376016872552, "eta": "69:38:56", "grad_norm": 0.0053, "loss": 0.0364, "lr": "3.615e-05", "step": 6002, "steps": "23.67s,6002/16595" }, { "epoch": 1.808677312443507, "eta": "70:10:19", "grad_norm": 0.0073, "loss": 0.0554, "lr": "3.615e-05", "step": 6003, "steps": "23.85s,6003/16595" }, { "epoch": 1.808978608014462, "eta": "70:09:55", "grad_norm": 0.0051, "loss": 0.0561, "lr": "3.614e-05", "step": 6004, "steps": "23.85s,6004/16595" }, { "epoch": 1.8092799035854172, "eta": "70:58:56", "grad_norm": 0.0052, "loss": 0.0496, "lr": "3.614e-05", "step": 6005, "steps": "24.13s,6005/16595" }, { "epoch": 1.8095811991563724, "eta": "70:07:21", "grad_norm": 0.0057, "loss": 0.0509, "lr": "3.613e-05", "step": 6006, "steps": "23.84s,6006/16595" }, { "epoch": 1.8098824947273275, "eta": "68:59:54", "grad_norm": 0.0065, "loss": 0.043, "lr": "3.613e-05", "step": 6007, "steps": "23.46s,6007/16595" }, { "epoch": 1.8101837902982827, "eta": "69:40:06", "grad_norm": 0.0055, "loss": 0.0651, "lr": "3.613e-05", "step": 6008, "steps": "23.69s,6008/16595" }, { "epoch": 1.8104850858692378, "eta": "70:23:48", "grad_norm": 0.0059, "loss": 0.055, "lr": "3.612e-05", "step": 6009, "steps": "23.94s,6009/16595" }, { "epoch": 1.810786381440193, "eta": "69:58:43", "grad_norm": 0.0085, "loss": 0.0528, "lr": "3.612e-05", "step": 6010, "steps": "23.8s,6010/16595" }, { "epoch": 1.811087677011148, "eta": "69:53:01", "grad_norm": 0.0046, "loss": 0.0597, "lr": "3.611e-05", "step": 6011, "steps": "23.77s,6011/16595" }, { "epoch": 1.811388972582103, "eta": "69:50:52", "grad_norm": 0.0057, "loss": 0.0462, "lr": "3.611e-05", "step": 6012, "steps": "23.76s,6012/16595" }, { "epoch": 1.811690268153058, "eta": "69:08:08", "grad_norm": 0.0066, "loss": 0.0545, "lr": "3.610e-05", "step": 6013, "steps": "23.52s,6013/16595" }, { "epoch": 1.8119915637240132, "eta": "69:32:26", "grad_norm": 0.0051, "loss": 0.0515, "lr": "3.610e-05", "step": 6014, "steps": "23.66s,6014/16595" }, { "epoch": 1.8122928592949683, "eta": "68:35:37", "grad_norm": 0.0061, "loss": 0.0541, "lr": "3.610e-05", "step": 6015, "steps": "23.34s,6015/16595" }, { "epoch": 1.8125941548659235, "eta": "68:38:45", "grad_norm": 0.0045, "loss": 0.0513, "lr": "3.609e-05", "step": 6016, "steps": "23.36s,6016/16595" }, { "epoch": 1.8128954504368786, "eta": "70:11:48", "grad_norm": 0.0057, "loss": 0.0598, "lr": "3.609e-05", "step": 6017, "steps": "23.89s,6017/16595" }, { "epoch": 1.8131967460078338, "eta": "69:39:40", "grad_norm": 0.0054, "loss": 0.0514, "lr": "3.608e-05", "step": 6018, "steps": "23.71s,6018/16595" }, { "epoch": 1.813498041578789, "eta": "68:44:38", "grad_norm": 0.0055, "loss": 0.0564, "lr": "3.608e-05", "step": 6019, "steps": "23.4s,6019/16595" }, { "epoch": 1.813799337149744, "eta": "68:46:00", "grad_norm": 0.0058, "loss": 0.0488, "lr": "3.607e-05", "step": 6020, "steps": "23.41s,6020/16595" }, { "epoch": 1.814100632720699, "eta": "69:17:20", "grad_norm": 0.0045, "loss": 0.0576, "lr": "3.607e-05", "step": 6021, "steps": "23.59s,6021/16595" }, { "epoch": 1.8144019282916541, "eta": "68:57:34", "grad_norm": 0.0063, "loss": 0.0491, "lr": "3.607e-05", "step": 6022, "steps": "23.48s,6022/16595" }, { "epoch": 1.814703223862609, "eta": "70:11:10", "grad_norm": 0.0056, "loss": 0.0561, "lr": "3.606e-05", "step": 6023, "steps": "23.9s,6023/16595" }, { "epoch": 1.8150045194335642, "eta": "70:05:29", "grad_norm": 0.0073, "loss": 0.0465, "lr": "3.606e-05", "step": 6024, "steps": "23.87s,6024/16595" }, { "epoch": 1.8153058150045194, "eta": "70:10:22", "grad_norm": 0.0052, "loss": 0.0485, "lr": "3.605e-05", "step": 6025, "steps": "23.9s,6025/16595" }, { "epoch": 1.8156071105754745, "eta": "69:38:16", "grad_norm": 0.0062, "loss": 0.0662, "lr": "3.605e-05", "step": 6026, "steps": "23.72s,6026/16595" }, { "epoch": 1.8159084061464297, "eta": "69:14:59", "grad_norm": 0.0052, "loss": 0.05, "lr": "3.604e-05", "step": 6027, "steps": "23.59s,6027/16595" }, { "epoch": 1.8162097017173848, "eta": "68:53:27", "grad_norm": 0.0059, "loss": 0.0562, "lr": "3.604e-05", "step": 6028, "steps": "23.47s,6028/16595" }, { "epoch": 1.81651099728834, "eta": "69:54:42", "grad_norm": 0.0051, "loss": 0.0464, "lr": "3.604e-05", "step": 6029, "steps": "23.82s,6029/16595" }, { "epoch": 1.8168122928592951, "eta": "69:08:31", "grad_norm": 0.0094, "loss": 0.0365, "lr": "3.603e-05", "step": 6030, "steps": "23.56s,6030/16595" }, { "epoch": 1.81711358843025, "eta": "68:39:57", "grad_norm": 0.0077, "loss": 0.0487, "lr": "3.603e-05", "step": 6031, "steps": "23.4s,6031/16595" }, { "epoch": 1.8174148840012052, "eta": "69:49:59", "grad_norm": 0.0059, "loss": 0.0536, "lr": "3.602e-05", "step": 6032, "steps": "23.8s,6032/16595" }, { "epoch": 1.8177161795721601, "eta": "69:33:45", "grad_norm": 0.0049, "loss": 0.0545, "lr": "3.602e-05", "step": 6033, "steps": "23.71s,6033/16595" }, { "epoch": 1.8180174751431153, "eta": "69:49:11", "grad_norm": 0.0054, "loss": 0.0815, "lr": "3.601e-05", "step": 6034, "steps": "23.8s,6034/16595" }, { "epoch": 1.8183187707140704, "eta": "69:18:52", "grad_norm": 0.007, "loss": 0.0578, "lr": "3.601e-05", "step": 6035, "steps": "23.63s,6035/16595" }, { "epoch": 1.8186200662850256, "eta": "70:04:14", "grad_norm": 0.0072, "loss": 0.0435, "lr": "3.601e-05", "step": 6036, "steps": "23.89s,6036/16595" }, { "epoch": 1.8189213618559807, "eta": "69:25:07", "grad_norm": 0.0064, "loss": 0.0536, "lr": "3.600e-05", "step": 6037, "steps": "23.67s,6037/16595" }, { "epoch": 1.819222657426936, "eta": "68:38:59", "grad_norm": 0.0052, "loss": 0.0744, "lr": "3.600e-05", "step": 6038, "steps": "23.41s,6038/16595" }, { "epoch": 1.819523952997891, "eta": "69:40:10", "grad_norm": 0.0068, "loss": 0.0339, "lr": "3.599e-05", "step": 6039, "steps": "23.76s,6039/16595" }, { "epoch": 1.8198252485688462, "eta": "69:30:59", "grad_norm": 0.0066, "loss": 0.0567, "lr": "3.599e-05", "step": 6040, "steps": "23.71s,6040/16595" }, { "epoch": 1.8201265441398011, "eta": "68:39:34", "grad_norm": 0.0062, "loss": 0.0479, "lr": "3.598e-05", "step": 6041, "steps": "23.42s,6041/16595" }, { "epoch": 1.8204278397107563, "eta": "69:47:46", "grad_norm": 0.0053, "loss": 0.0515, "lr": "3.598e-05", "step": 6042, "steps": "23.81s,6042/16595" }, { "epoch": 1.8207291352817112, "eta": "69:50:54", "grad_norm": 0.006, "loss": 0.0418, "lr": "3.598e-05", "step": 6043, "steps": "23.83s,6043/16595" }, { "epoch": 1.8210304308526664, "eta": "69:24:07", "grad_norm": 0.0054, "loss": 0.0452, "lr": "3.597e-05", "step": 6044, "steps": "23.68s,6044/16595" }, { "epoch": 1.8213317264236215, "eta": "69:37:48", "grad_norm": 0.0064, "loss": 0.049, "lr": "3.597e-05", "step": 6045, "steps": "23.76s,6045/16595" }, { "epoch": 1.8216330219945767, "eta": "69:33:53", "grad_norm": 0.0063, "loss": 0.0404, "lr": "3.596e-05", "step": 6046, "steps": "23.74s,6046/16595" }, { "epoch": 1.8219343175655318, "eta": "69:52:49", "grad_norm": 0.006, "loss": 0.0547, "lr": "3.596e-05", "step": 6047, "steps": "23.85s,6047/16595" }, { "epoch": 1.822235613136487, "eta": "68:54:25", "grad_norm": 0.0051, "loss": 0.0806, "lr": "3.595e-05", "step": 6048, "steps": "23.52s,6048/16595" }, { "epoch": 1.8225369087074421, "eta": "69:11:36", "grad_norm": 0.0066, "loss": 0.0476, "lr": "3.595e-05", "step": 6049, "steps": "23.62s,6049/16595" }, { "epoch": 1.8228382042783973, "eta": "69:00:40", "grad_norm": 0.005, "loss": 0.0684, "lr": "3.595e-05", "step": 6050, "steps": "23.56s,6050/16595" }, { "epoch": 1.8231394998493522, "eta": "69:03:47", "grad_norm": 0.0051, "loss": 0.0709, "lr": "3.594e-05", "step": 6051, "steps": "23.58s,6051/16595" }, { "epoch": 1.8234407954203073, "eta": "69:50:50", "grad_norm": 0.0047, "loss": 0.0609, "lr": "3.594e-05", "step": 6052, "steps": "23.85s,6052/16595" }, { "epoch": 1.8237420909912623, "eta": "69:52:12", "grad_norm": 0.0071, "loss": 0.0462, "lr": "3.593e-05", "step": 6053, "steps": "23.86s,6053/16595" }, { "epoch": 1.8240433865622174, "eta": "69:43:01", "grad_norm": 0.0058, "loss": 0.0473, "lr": "3.593e-05", "step": 6054, "steps": "23.81s,6054/16595" }, { "epoch": 1.8243446821331726, "eta": "69:33:50", "grad_norm": 0.0063, "loss": 0.0545, "lr": "3.592e-05", "step": 6055, "steps": "23.76s,6055/16595" }, { "epoch": 1.8246459777041277, "eta": "69:35:12", "grad_norm": 0.0055, "loss": 0.0476, "lr": "3.592e-05", "step": 6056, "steps": "23.77s,6056/16595" }, { "epoch": 1.8249472732750829, "eta": "69:47:05", "grad_norm": 0.006, "loss": 0.0425, "lr": "3.592e-05", "step": 6057, "steps": "23.84s,6057/16595" }, { "epoch": 1.825248568846038, "eta": "69:27:23", "grad_norm": 0.0051, "loss": 0.0655, "lr": "3.591e-05", "step": 6058, "steps": "23.73s,6058/16595" }, { "epoch": 1.8255498644169932, "eta": "70:54:47", "grad_norm": 0.006, "loss": 0.0431, "lr": "3.591e-05", "step": 6059, "steps": "24.23s,6059/16595" }, { "epoch": 1.8258511599879483, "eta": "69:12:32", "grad_norm": 0.0068, "loss": 0.0516, "lr": "3.590e-05", "step": 6060, "steps": "23.65s,6060/16595" }, { "epoch": 1.8261524555589033, "eta": "69:22:41", "grad_norm": 0.0061, "loss": 0.0458, "lr": "3.590e-05", "step": 6061, "steps": "23.71s,6061/16595" }, { "epoch": 1.8264537511298584, "eta": "69:32:49", "grad_norm": 0.0056, "loss": 0.038, "lr": "3.589e-05", "step": 6062, "steps": "23.77s,6062/16595" }, { "epoch": 1.8267550467008133, "eta": "69:13:07", "grad_norm": 0.0061, "loss": 0.0452, "lr": "3.589e-05", "step": 6063, "steps": "23.66s,6063/16595" }, { "epoch": 1.8270563422717685, "eta": "69:30:16", "grad_norm": 0.0057, "loss": 0.0474, "lr": "3.589e-05", "step": 6064, "steps": "23.76s,6064/16595" }, { "epoch": 1.8273576378427236, "eta": "69:07:03", "grad_norm": 0.0075, "loss": 0.0623, "lr": "3.588e-05", "step": 6065, "steps": "23.63s,6065/16595" }, { "epoch": 1.8276589334136788, "eta": "70:11:36", "grad_norm": 0.0042, "loss": 0.0505, "lr": "3.588e-05", "step": 6066, "steps": "24.0s,6066/16595" }, { "epoch": 1.827960228984634, "eta": "69:30:50", "grad_norm": 0.0063, "loss": 0.0553, "lr": "3.587e-05", "step": 6067, "steps": "23.77s,6067/16595" }, { "epoch": 1.828261524555589, "eta": "68:36:03", "grad_norm": 0.0043, "loss": 0.0434, "lr": "3.587e-05", "step": 6068, "steps": "23.46s,6068/16595" }, { "epoch": 1.8285628201265443, "eta": "68:53:12", "grad_norm": 0.0066, "loss": 0.0533, "lr": "3.586e-05", "step": 6069, "steps": "23.56s,6069/16595" }, { "epoch": 1.8288641156974994, "eta": "69:26:08", "grad_norm": 0.0048, "loss": 0.052, "lr": "3.586e-05", "step": 6070, "steps": "23.75s,6070/16595" }, { "epoch": 1.8291654112684543, "eta": "69:31:00", "grad_norm": 0.0063, "loss": 0.0326, "lr": "3.585e-05", "step": 6071, "steps": "23.78s,6071/16595" }, { "epoch": 1.8294667068394095, "eta": "69:51:39", "grad_norm": 0.0066, "loss": 0.0486, "lr": "3.585e-05", "step": 6072, "steps": "23.9s,6072/16595" }, { "epoch": 1.8297680024103644, "eta": "69:49:30", "grad_norm": 0.0055, "loss": 0.0539, "lr": "3.585e-05", "step": 6073, "steps": "23.89s,6073/16595" }, { "epoch": 1.8300692979813196, "eta": "69:14:02", "grad_norm": 0.0057, "loss": 0.0427, "lr": "3.584e-05", "step": 6074, "steps": "23.69s,6074/16595" }, { "epoch": 1.8303705935522747, "eta": "70:04:29", "grad_norm": 0.0057, "loss": 0.0599, "lr": "3.584e-05", "step": 6075, "steps": "23.98s,6075/16595" }, { "epoch": 1.8306718891232299, "eta": "69:51:49", "grad_norm": 0.0046, "loss": 0.0541, "lr": "3.583e-05", "step": 6076, "steps": "23.91s,6076/16595" }, { "epoch": 1.830973184694185, "eta": "69:07:35", "grad_norm": 0.0056, "loss": 0.0431, "lr": "3.583e-05", "step": 6077, "steps": "23.66s,6077/16595" }, { "epoch": 1.8312744802651402, "eta": "69:15:58", "grad_norm": 0.0044, "loss": 0.0719, "lr": "3.582e-05", "step": 6078, "steps": "23.71s,6078/16595" }, { "epoch": 1.8315757758360953, "eta": "69:31:20", "grad_norm": 0.0078, "loss": 0.0557, "lr": "3.582e-05", "step": 6079, "steps": "23.8s,6079/16595" }, { "epoch": 1.8318770714070505, "eta": "68:41:52", "grad_norm": 0.0064, "loss": 0.044, "lr": "3.582e-05", "step": 6080, "steps": "23.52s,6080/16595" }, { "epoch": 1.8321783669780054, "eta": "70:44:09", "grad_norm": 0.0061, "loss": 0.0465, "lr": "3.581e-05", "step": 6081, "steps": "24.22s,6081/16595" }, { "epoch": 1.8324796625489606, "eta": "69:10:52", "grad_norm": 0.0043, "loss": 0.0562, "lr": "3.581e-05", "step": 6082, "steps": "23.69s,6082/16595" }, { "epoch": 1.8327809581199155, "eta": "69:13:59", "grad_norm": 0.0053, "loss": 0.0732, "lr": "3.580e-05", "step": 6083, "steps": "23.71s,6083/16595" }, { "epoch": 1.8330822536908706, "eta": "68:49:04", "grad_norm": 0.0051, "loss": 0.0337, "lr": "3.580e-05", "step": 6084, "steps": "23.57s,6084/16595" }, { "epoch": 1.8333835492618258, "eta": "69:49:59", "grad_norm": 0.0048, "loss": 0.0512, "lr": "3.579e-05", "step": 6085, "steps": "23.92s,6085/16595" }, { "epoch": 1.833684844832781, "eta": "68:48:17", "grad_norm": 0.0071, "loss": 0.0452, "lr": "3.579e-05", "step": 6086, "steps": "23.57s,6086/16595" }, { "epoch": 1.833986140403736, "eta": "69:33:25", "grad_norm": 0.005, "loss": 0.0472, "lr": "3.579e-05", "step": 6087, "steps": "23.83s,6087/16595" }, { "epoch": 1.8342874359746912, "eta": "69:01:30", "grad_norm": 0.0052, "loss": 0.0531, "lr": "3.578e-05", "step": 6088, "steps": "23.65s,6088/16595" }, { "epoch": 1.8345887315456464, "eta": "68:52:21", "grad_norm": 0.0057, "loss": 0.0548, "lr": "3.578e-05", "step": 6089, "steps": "23.6s,6089/16595" }, { "epoch": 1.8348900271166015, "eta": "69:44:29", "grad_norm": 0.0063, "loss": 0.0415, "lr": "3.577e-05", "step": 6090, "steps": "23.9s,6090/16595" }, { "epoch": 1.8351913226875565, "eta": "68:07:48", "grad_norm": 0.0049, "loss": 0.0386, "lr": "3.577e-05", "step": 6091, "steps": "23.35s,6091/16595" }, { "epoch": 1.8354926182585116, "eta": "68:47:40", "grad_norm": 0.0056, "loss": 0.0724, "lr": "3.576e-05", "step": 6092, "steps": "23.58s,6092/16595" }, { "epoch": 1.8357939138294666, "eta": "68:38:32", "grad_norm": 0.0057, "loss": 0.0544, "lr": "3.576e-05", "step": 6093, "steps": "23.53s,6093/16595" }, { "epoch": 1.8360952094004217, "eta": "69:44:38", "grad_norm": 0.005, "loss": 0.0566, "lr": "3.576e-05", "step": 6094, "steps": "23.91s,6094/16595" }, { "epoch": 1.8363965049713769, "eta": "68:44:45", "grad_norm": 0.0056, "loss": 0.0387, "lr": "3.575e-05", "step": 6095, "steps": "23.57s,6095/16595" }, { "epoch": 1.836697800542332, "eta": "69:00:06", "grad_norm": 0.0044, "loss": 0.0493, "lr": "3.575e-05", "step": 6096, "steps": "23.66s,6096/16595" }, { "epoch": 1.8369990961132872, "eta": "68:31:43", "grad_norm": 0.0074, "loss": 0.0478, "lr": "3.574e-05", "step": 6097, "steps": "23.5s,6097/16595" }, { "epoch": 1.8373003916842423, "eta": "68:48:49", "grad_norm": 0.0048, "loss": 0.0601, "lr": "3.574e-05", "step": 6098, "steps": "23.6s,6098/16595" }, { "epoch": 1.8376016872551975, "eta": "68:58:55", "grad_norm": 0.0057, "loss": 0.043, "lr": "3.573e-05", "step": 6099, "steps": "23.66s,6099/16595" }, { "epoch": 1.8379029828261526, "eta": "68:55:01", "grad_norm": 0.0062, "loss": 0.0568, "lr": "3.573e-05", "step": 6100, "steps": "23.64s,6100/16595" }, { "epoch": 1.8382042783971075, "eta": "68:37:08", "grad_norm": 0.0054, "loss": 0.047, "lr": "3.573e-05", "step": 6101, "steps": "23.54s,6101/16595" }, { "epoch": 1.8385055739680627, "eta": "68:03:31", "grad_norm": 0.0049, "loss": 0.0456, "lr": "3.572e-05", "step": 6102, "steps": "23.35s,6102/16595" }, { "epoch": 1.8388068695390176, "eta": "68:41:36", "grad_norm": 0.0058, "loss": 0.0406, "lr": "3.572e-05", "step": 6103, "steps": "23.57s,6103/16595" }, { "epoch": 1.8391081651099728, "eta": "68:46:27", "grad_norm": 0.0044, "loss": 0.067, "lr": "3.571e-05", "step": 6104, "steps": "23.6s,6104/16595" }, { "epoch": 1.839409460680928, "eta": "70:34:27", "grad_norm": 0.0054, "loss": 0.0556, "lr": "3.571e-05", "step": 6105, "steps": "24.22s,6105/16595" }, { "epoch": 1.839710756251883, "eta": "68:50:55", "grad_norm": 0.0055, "loss": 0.0489, "lr": "3.570e-05", "step": 6106, "steps": "23.63s,6106/16595" }, { "epoch": 1.8400120518228382, "eta": "69:21:59", "grad_norm": 0.0041, "loss": 0.058, "lr": "3.570e-05", "step": 6107, "steps": "23.81s,6107/16595" }, { "epoch": 1.8403133473937934, "eta": "68:39:38", "grad_norm": 0.0066, "loss": 0.0646, "lr": "3.570e-05", "step": 6108, "steps": "23.57s,6108/16595" }, { "epoch": 1.8406146429647485, "eta": "69:05:27", "grad_norm": 0.0063, "loss": 0.0473, "lr": "3.569e-05", "step": 6109, "steps": "23.72s,6109/16595" }, { "epoch": 1.8409159385357037, "eta": "68:02:09", "grad_norm": 0.0066, "loss": 0.0584, "lr": "3.569e-05", "step": 6110, "steps": "23.36s,6110/16595" }, { "epoch": 1.8412172341066586, "eta": "69:01:10", "grad_norm": 0.0056, "loss": 0.0432, "lr": "3.568e-05", "step": 6111, "steps": "23.7s,6111/16595" }, { "epoch": 1.8415185296776138, "eta": "68:39:49", "grad_norm": 0.0061, "loss": 0.0536, "lr": "3.568e-05", "step": 6112, "steps": "23.58s,6112/16595" }, { "epoch": 1.8418198252485687, "eta": "68:34:11", "grad_norm": 0.0038, "loss": 0.0494, "lr": "3.567e-05", "step": 6113, "steps": "23.55s,6113/16595" }, { "epoch": 1.8421211208195238, "eta": "68:26:48", "grad_norm": 0.0057, "loss": 0.0575, "lr": "3.567e-05", "step": 6114, "steps": "23.51s,6114/16595" }, { "epoch": 1.842422416390479, "eta": "68:33:24", "grad_norm": 0.0042, "loss": 0.0638, "lr": "3.567e-05", "step": 6115, "steps": "23.55s,6115/16595" }, { "epoch": 1.8427237119614341, "eta": "69:44:36", "grad_norm": 0.026, "loss": 0.0421, "lr": "3.566e-05", "step": 6116, "steps": "23.96s,6116/16595" }, { "epoch": 1.8430250075323893, "eta": "69:30:14", "grad_norm": 0.0053, "loss": 0.0447, "lr": "3.566e-05", "step": 6117, "steps": "23.88s,6117/16595" }, { "epoch": 1.8433263031033444, "eta": "69:35:05", "grad_norm": 0.0054, "loss": 0.0574, "lr": "3.565e-05", "step": 6118, "steps": "23.91s,6118/16595" }, { "epoch": 1.8436275986742996, "eta": "69:31:11", "grad_norm": 0.0053, "loss": 0.0478, "lr": "3.565e-05", "step": 6119, "steps": "23.89s,6119/16595" }, { "epoch": 1.8439288942452547, "eta": "69:29:03", "grad_norm": 0.0042, "loss": 0.0556, "lr": "3.564e-05", "step": 6120, "steps": "23.88s,6120/16595" }, { "epoch": 1.8442301898162097, "eta": "69:14:41", "grad_norm": 0.0048, "loss": 0.0567, "lr": "3.564e-05", "step": 6121, "steps": "23.8s,6121/16595" }, { "epoch": 1.8445314853871648, "eta": "69:29:59", "grad_norm": 0.0049, "loss": 0.0393, "lr": "3.564e-05", "step": 6122, "steps": "23.89s,6122/16595" }, { "epoch": 1.8448327809581198, "eta": "69:05:10", "grad_norm": 0.0056, "loss": 0.06, "lr": "3.563e-05", "step": 6123, "steps": "23.75s,6123/16595" }, { "epoch": 1.845134076529075, "eta": "69:39:40", "grad_norm": 0.0053, "loss": 0.0486, "lr": "3.563e-05", "step": 6124, "steps": "23.95s,6124/16595" }, { "epoch": 1.84543537210003, "eta": "68:32:57", "grad_norm": 0.0055, "loss": 0.0517, "lr": "3.562e-05", "step": 6125, "steps": "23.57s,6125/16595" }, { "epoch": 1.8457366676709852, "eta": "68:55:15", "grad_norm": 0.0046, "loss": 0.0536, "lr": "3.562e-05", "step": 6126, "steps": "23.7s,6126/16595" }, { "epoch": 1.8460379632419404, "eta": "68:33:55", "grad_norm": 0.0051, "loss": 0.054, "lr": "3.561e-05", "step": 6127, "steps": "23.58s,6127/16595" }, { "epoch": 1.8463392588128955, "eta": "69:01:26", "grad_norm": 0.005, "loss": 0.0696, "lr": "3.561e-05", "step": 6128, "steps": "23.74s,6128/16595" }, { "epoch": 1.8466405543838507, "eta": "68:57:33", "grad_norm": 0.0053, "loss": 0.0645, "lr": "3.560e-05", "step": 6129, "steps": "23.72s,6129/16595" }, { "epoch": 1.8469418499548058, "eta": "69:14:36", "grad_norm": 0.0054, "loss": 0.0564, "lr": "3.560e-05", "step": 6130, "steps": "23.82s,6130/16595" }, { "epoch": 1.8472431455257607, "eta": "68:42:48", "grad_norm": 0.0056, "loss": 0.0565, "lr": "3.560e-05", "step": 6131, "steps": "23.64s,6131/16595" }, { "epoch": 1.847544441096716, "eta": "68:58:06", "grad_norm": 0.0047, "loss": 0.0413, "lr": "3.559e-05", "step": 6132, "steps": "23.73s,6132/16595" }, { "epoch": 1.8478457366676708, "eta": "70:30:08", "grad_norm": 0.0051, "loss": 0.0553, "lr": "3.559e-05", "step": 6133, "steps": "24.26s,6133/16595" }, { "epoch": 1.848147032238626, "eta": "69:35:40", "grad_norm": 0.0055, "loss": 0.0441, "lr": "3.558e-05", "step": 6134, "steps": "23.95s,6134/16595" }, { "epoch": 1.8484483278095811, "eta": "69:31:47", "grad_norm": 0.006, "loss": 0.0389, "lr": "3.558e-05", "step": 6135, "steps": "23.93s,6135/16595" }, { "epoch": 1.8487496233805363, "eta": "69:27:54", "grad_norm": 0.0044, "loss": 0.0796, "lr": "3.557e-05", "step": 6136, "steps": "23.91s,6136/16595" }, { "epoch": 1.8490509189514914, "eta": "68:42:11", "grad_norm": 0.0051, "loss": 0.0543, "lr": "3.557e-05", "step": 6137, "steps": "23.65s,6137/16595" }, { "epoch": 1.8493522145224466, "eta": "67:30:20", "grad_norm": 0.0063, "loss": 0.0612, "lr": "3.557e-05", "step": 6138, "steps": "23.24s,6138/16595" }, { "epoch": 1.8496535100934017, "eta": "69:11:01", "grad_norm": 0.0102, "loss": 0.0574, "lr": "3.556e-05", "step": 6139, "steps": "23.82s,6139/16595" }, { "epoch": 1.8499548056643569, "eta": "69:24:34", "grad_norm": 0.0052, "loss": 0.0652, "lr": "3.556e-05", "step": 6140, "steps": "23.9s,6140/16595" }, { "epoch": 1.8502561012353118, "eta": "68:45:50", "grad_norm": 0.0065, "loss": 0.0556, "lr": "3.555e-05", "step": 6141, "steps": "23.68s,6141/16595" }, { "epoch": 1.850557396806267, "eta": "68:24:32", "grad_norm": 0.0048, "loss": 0.0602, "lr": "3.555e-05", "step": 6142, "steps": "23.56s,6142/16595" }, { "epoch": 1.850858692377222, "eta": "69:26:51", "grad_norm": 0.0058, "loss": 0.0605, "lr": "3.554e-05", "step": 6143, "steps": "23.92s,6143/16595" }, { "epoch": 1.851159987948177, "eta": "68:44:39", "grad_norm": 0.0061, "loss": 0.042, "lr": "3.554e-05", "step": 6144, "steps": "23.68s,6144/16595" }, { "epoch": 1.8514612835191322, "eta": "68:47:45", "grad_norm": 0.0063, "loss": 0.0494, "lr": "3.554e-05", "step": 6145, "steps": "23.7s,6145/16595" }, { "epoch": 1.8517625790900873, "eta": "68:52:34", "grad_norm": 0.0051, "loss": 0.0436, "lr": "3.553e-05", "step": 6146, "steps": "23.73s,6146/16595" }, { "epoch": 1.8520638746610425, "eta": "68:45:13", "grad_norm": 0.0056, "loss": 0.0521, "lr": "3.553e-05", "step": 6147, "steps": "23.69s,6147/16595" }, { "epoch": 1.8523651702319976, "eta": "68:39:36", "grad_norm": 0.0134, "loss": 0.0462, "lr": "3.552e-05", "step": 6148, "steps": "23.66s,6148/16595" }, { "epoch": 1.8526664658029528, "eta": "69:19:14", "grad_norm": 0.006, "loss": 0.0494, "lr": "3.552e-05", "step": 6149, "steps": "23.89s,6149/16595" }, { "epoch": 1.852967761373908, "eta": "68:19:39", "grad_norm": 0.0061, "loss": 0.0447, "lr": "3.551e-05", "step": 6150, "steps": "23.55s,6150/16595" }, { "epoch": 1.8532690569448629, "eta": "68:50:36", "grad_norm": 0.0054, "loss": 0.0509, "lr": "3.551e-05", "step": 6151, "steps": "23.73s,6151/16595" }, { "epoch": 1.853570352515818, "eta": "69:14:34", "grad_norm": 0.0054, "loss": 0.044, "lr": "3.551e-05", "step": 6152, "steps": "23.87s,6152/16595" }, { "epoch": 1.853871648086773, "eta": "68:16:44", "grad_norm": 0.0056, "loss": 0.0475, "lr": "3.550e-05", "step": 6153, "steps": "23.54s,6153/16595" }, { "epoch": 1.8541729436577281, "eta": "70:21:38", "grad_norm": 0.0055, "loss": 0.0375, "lr": "3.550e-05", "step": 6154, "steps": "24.26s,6154/16595" }, { "epoch": 1.8544742392286833, "eta": "68:33:21", "grad_norm": 0.006, "loss": 0.0451, "lr": "3.549e-05", "step": 6155, "steps": "23.64s,6155/16595" }, { "epoch": 1.8547755347996384, "eta": "68:39:55", "grad_norm": 0.0067, "loss": 0.0436, "lr": "3.549e-05", "step": 6156, "steps": "23.68s,6156/16595" }, { "epoch": 1.8550768303705936, "eta": "69:23:01", "grad_norm": 0.0052, "loss": 0.0519, "lr": "3.548e-05", "step": 6157, "steps": "23.93s,6157/16595" }, { "epoch": 1.8553781259415487, "eta": "69:24:21", "grad_norm": 0.0047, "loss": 0.0533, "lr": "3.548e-05", "step": 6158, "steps": "23.94s,6158/16595" }, { "epoch": 1.8556794215125039, "eta": "68:17:52", "grad_norm": 0.0044, "loss": 0.0574, "lr": "3.548e-05", "step": 6159, "steps": "23.56s,6159/16595" }, { "epoch": 1.855980717083459, "eta": "69:28:46", "grad_norm": 0.0044, "loss": 0.0592, "lr": "3.547e-05", "step": 6160, "steps": "23.97s,6160/16595" }, { "epoch": 1.856282012654414, "eta": "68:18:49", "grad_norm": 0.0054, "loss": 0.0527, "lr": "3.547e-05", "step": 6161, "steps": "23.57s,6161/16595" }, { "epoch": 1.856583308225369, "eta": "68:49:43", "grad_norm": 0.0059, "loss": 0.0632, "lr": "3.546e-05", "step": 6162, "steps": "23.75s,6162/16595" }, { "epoch": 1.856884603796324, "eta": "69:10:11", "grad_norm": 0.0048, "loss": 0.0432, "lr": "3.546e-05", "step": 6163, "steps": "23.87s,6163/16595" }, { "epoch": 1.8571858993672792, "eta": "68:41:59", "grad_norm": 0.0055, "loss": 0.0623, "lr": "3.545e-05", "step": 6164, "steps": "23.71s,6164/16595" }, { "epoch": 1.8574871949382343, "eta": "69:23:18", "grad_norm": 0.0061, "loss": 0.0597, "lr": "3.545e-05", "step": 6165, "steps": "23.95s,6165/16595" }, { "epoch": 1.8577884905091895, "eta": "68:39:27", "grad_norm": 0.0057, "loss": 0.049, "lr": "3.544e-05", "step": 6166, "steps": "23.7s,6166/16595" }, { "epoch": 1.8580897860801446, "eta": "69:10:20", "grad_norm": 0.005, "loss": 0.0447, "lr": "3.544e-05", "step": 6167, "steps": "23.88s,6167/16595" }, { "epoch": 1.8583910816510998, "eta": "69:04:43", "grad_norm": 0.0056, "loss": 0.0552, "lr": "3.544e-05", "step": 6168, "steps": "23.85s,6168/16595" }, { "epoch": 1.858692377222055, "eta": "68:24:22", "grad_norm": 0.0048, "loss": 0.0555, "lr": "3.543e-05", "step": 6169, "steps": "23.62s,6169/16595" }, { "epoch": 1.85899367279301, "eta": "68:11:48", "grad_norm": 0.0045, "loss": 0.0578, "lr": "3.543e-05", "step": 6170, "steps": "23.55s,6170/16595" }, { "epoch": 1.859294968363965, "eta": "67:54:02", "grad_norm": 0.0062, "loss": 0.0661, "lr": "3.542e-05", "step": 6171, "steps": "23.45s,6171/16595" }, { "epoch": 1.8595962639349202, "eta": "69:13:33", "grad_norm": 0.0052, "loss": 0.0566, "lr": "3.542e-05", "step": 6172, "steps": "23.91s,6172/16595" }, { "epoch": 1.859897559505875, "eta": "68:19:19", "grad_norm": 0.0048, "loss": 0.049, "lr": "3.541e-05", "step": 6173, "steps": "23.6s,6173/16595" }, { "epoch": 1.8601988550768302, "eta": "68:20:39", "grad_norm": 0.0053, "loss": 0.0512, "lr": "3.541e-05", "step": 6174, "steps": "23.61s,6174/16595" }, { "epoch": 1.8605001506477854, "eta": "68:44:35", "grad_norm": 0.0047, "loss": 0.048, "lr": "3.541e-05", "step": 6175, "steps": "23.75s,6175/16595" }, { "epoch": 1.8608014462187406, "eta": "68:54:36", "grad_norm": 0.0044, "loss": 0.0469, "lr": "3.540e-05", "step": 6176, "steps": "23.81s,6176/16595" }, { "epoch": 1.8611027417896957, "eta": "69:08:06", "grad_norm": 0.0056, "loss": 0.056, "lr": "3.540e-05", "step": 6177, "steps": "23.89s,6177/16595" }, { "epoch": 1.8614040373606509, "eta": "68:29:30", "grad_norm": 0.0053, "loss": 0.0566, "lr": "3.539e-05", "step": 6178, "steps": "23.67s,6178/16595" }, { "epoch": 1.861705332931606, "eta": "68:25:38", "grad_norm": 0.0066, "loss": 0.0381, "lr": "3.539e-05", "step": 6179, "steps": "23.65s,6179/16595" }, { "epoch": 1.8620066285025612, "eta": "68:02:40", "grad_norm": 0.0051, "loss": 0.0537, "lr": "3.538e-05", "step": 6180, "steps": "23.52s,6180/16595" }, { "epoch": 1.862307924073516, "eta": "69:37:44", "grad_norm": 0.0058, "loss": 0.0592, "lr": "3.538e-05", "step": 6181, "steps": "24.07s,6181/16595" }, { "epoch": 1.8626092196444712, "eta": "68:59:10", "grad_norm": 0.0098, "loss": 0.0538, "lr": "3.538e-05", "step": 6182, "steps": "23.85s,6182/16595" }, { "epoch": 1.8629105152154264, "eta": "69:10:55", "grad_norm": 0.0053, "loss": 0.0464, "lr": "3.537e-05", "step": 6183, "steps": "23.92s,6183/16595" }, { "epoch": 1.8632118107863813, "eta": "68:37:33", "grad_norm": 0.0049, "loss": 0.0441, "lr": "3.537e-05", "step": 6184, "steps": "23.73s,6184/16595" }, { "epoch": 1.8635131063573365, "eta": "67:46:50", "grad_norm": 0.0052, "loss": 0.0526, "lr": "3.536e-05", "step": 6185, "steps": "23.44s,6185/16595" }, { "epoch": 1.8638144019282916, "eta": "68:33:17", "grad_norm": 0.0049, "loss": 0.0467, "lr": "3.536e-05", "step": 6186, "steps": "23.71s,6186/16595" }, { "epoch": 1.8641156974992468, "eta": "68:48:30", "grad_norm": 0.0049, "loss": 0.0611, "lr": "3.535e-05", "step": 6187, "steps": "23.8s,6187/16595" }, { "epoch": 1.864416993070202, "eta": "68:15:09", "grad_norm": 0.0057, "loss": 0.0543, "lr": "3.535e-05", "step": 6188, "steps": "23.61s,6188/16595" }, { "epoch": 1.864718288641157, "eta": "68:18:13", "grad_norm": 0.0065, "loss": 0.0425, "lr": "3.534e-05", "step": 6189, "steps": "23.63s,6189/16595" }, { "epoch": 1.8650195842121122, "eta": "68:02:13", "grad_norm": 0.0049, "loss": 0.0574, "lr": "3.534e-05", "step": 6190, "steps": "23.54s,6190/16595" }, { "epoch": 1.8653208797830672, "eta": "69:14:39", "grad_norm": 0.0049, "loss": 0.0578, "lr": "3.534e-05", "step": 6191, "steps": "23.96s,6191/16595" }, { "epoch": 1.8656221753540223, "eta": "68:23:59", "grad_norm": 0.0075, "loss": 0.053, "lr": "3.533e-05", "step": 6192, "steps": "23.67s,6192/16595" }, { "epoch": 1.8659234709249775, "eta": "68:53:03", "grad_norm": 0.0058, "loss": 0.0502, "lr": "3.533e-05", "step": 6193, "steps": "23.84s,6193/16595" }, { "epoch": 1.8662247664959324, "eta": "69:11:43", "grad_norm": 0.0063, "loss": 0.0416, "lr": "3.532e-05", "step": 6194, "steps": "23.95s,6194/16595" }, { "epoch": 1.8665260620668875, "eta": "68:33:12", "grad_norm": 0.0052, "loss": 0.0551, "lr": "3.532e-05", "step": 6195, "steps": "23.73s,6195/16595" }, { "epoch": 1.8668273576378427, "eta": "69:04:00", "grad_norm": 0.0048, "loss": 0.0462, "lr": "3.531e-05", "step": 6196, "steps": "23.91s,6196/16595" }, { "epoch": 1.8671286532087978, "eta": "68:30:40", "grad_norm": 0.0062, "loss": 0.056, "lr": "3.531e-05", "step": 6197, "steps": "23.72s,6197/16595" }, { "epoch": 1.867429948779753, "eta": "69:15:20", "grad_norm": 0.0055, "loss": 0.0616, "lr": "3.531e-05", "step": 6198, "steps": "23.98s,6198/16595" }, { "epoch": 1.8677312443507081, "eta": "68:29:53", "grad_norm": 0.0052, "loss": 0.0356, "lr": "3.530e-05", "step": 6199, "steps": "23.72s,6199/16595" }, { "epoch": 1.8680325399216633, "eta": "68:50:16", "grad_norm": 0.0056, "loss": 0.0635, "lr": "3.530e-05", "step": 6200, "steps": "23.84s,6200/16595" }, { "epoch": 1.8683338354926182, "eta": "144:47:39", "grad_norm": 0.0061, "loss": 0.0526, "lr": "3.529e-05", "step": 6201, "steps": "50.15s,6201/16595" }, { "epoch": 1.8686351310635734, "eta": "69:24:07", "grad_norm": 0.0055, "loss": 0.0593, "lr": "3.529e-05", "step": 6202, "steps": "24.04s,6202/16595" }, { "epoch": 1.8689364266345285, "eta": "68:52:33", "grad_norm": 0.0051, "loss": 0.0636, "lr": "3.528e-05", "step": 6203, "steps": "23.86s,6203/16595" }, { "epoch": 1.8692377222054835, "eta": "69:16:24", "grad_norm": 0.006, "loss": 0.0542, "lr": "3.528e-05", "step": 6204, "steps": "24.0s,6204/16595" }, { "epoch": 1.8695390177764386, "eta": "68:10:11", "grad_norm": 0.006, "loss": 0.0496, "lr": "3.528e-05", "step": 6205, "steps": "23.62s,6205/16595" }, { "epoch": 1.8698403133473938, "eta": "68:01:08", "grad_norm": 0.0055, "loss": 0.0494, "lr": "3.527e-05", "step": 6206, "steps": "23.57s,6206/16595" }, { "epoch": 1.870141608918349, "eta": "68:38:50", "grad_norm": 0.007, "loss": 0.0617, "lr": "3.527e-05", "step": 6207, "steps": "23.79s,6207/16595" }, { "epoch": 1.870442904489304, "eta": "68:00:21", "grad_norm": 0.0103, "loss": 0.0666, "lr": "3.526e-05", "step": 6208, "steps": "23.57s,6208/16595" }, { "epoch": 1.8707442000602592, "eta": "68:50:09", "grad_norm": 0.0056, "loss": 0.0663, "lr": "3.526e-05", "step": 6209, "steps": "23.86s,6209/16595" }, { "epoch": 1.8710454956312144, "eta": "69:26:06", "grad_norm": 0.0062, "loss": 0.0637, "lr": "3.525e-05", "step": 6210, "steps": "24.07s,6210/16595" }, { "epoch": 1.8713467912021693, "eta": "68:35:31", "grad_norm": 0.0054, "loss": 0.0638, "lr": "3.525e-05", "step": 6211, "steps": "23.78s,6211/16595" }, { "epoch": 1.8716480867731244, "eta": "67:55:19", "grad_norm": 0.0055, "loss": 0.0481, "lr": "3.524e-05", "step": 6212, "steps": "23.55s,6212/16595" }, { "epoch": 1.8719493823440796, "eta": "68:26:04", "grad_norm": 0.0051, "loss": 0.0664, "lr": "3.524e-05", "step": 6213, "steps": "23.73s,6213/16595" }, { "epoch": 1.8722506779150345, "eta": "67:54:32", "grad_norm": 0.0055, "loss": 0.0489, "lr": "3.524e-05", "step": 6214, "steps": "23.55s,6214/16595" }, { "epoch": 1.8725519734859897, "eta": "68:02:48", "grad_norm": 0.0058, "loss": 0.0502, "lr": "3.523e-05", "step": 6215, "steps": "23.6s,6215/16595" }, { "epoch": 1.8728532690569448, "eta": "68:21:26", "grad_norm": 0.0051, "loss": 0.0416, "lr": "3.523e-05", "step": 6216, "steps": "23.71s,6216/16595" }, { "epoch": 1.8731545646279, "eta": "68:26:13", "grad_norm": 0.0054, "loss": 0.0496, "lr": "3.522e-05", "step": 6217, "steps": "23.74s,6217/16595" }, { "epoch": 1.8734558601988551, "eta": "68:41:23", "grad_norm": 0.007, "loss": 0.0559, "lr": "3.522e-05", "step": 6218, "steps": "23.83s,6218/16595" }, { "epoch": 1.8737571557698103, "eta": "67:42:12", "grad_norm": 0.0051, "loss": 0.049, "lr": "3.521e-05", "step": 6219, "steps": "23.49s,6219/16595" }, { "epoch": 1.8740584513407654, "eta": "67:57:22", "grad_norm": 0.0063, "loss": 0.047, "lr": "3.521e-05", "step": 6220, "steps": "23.58s,6220/16595" }, { "epoch": 1.8743597469117204, "eta": "68:35:01", "grad_norm": 0.0049, "loss": 0.0424, "lr": "3.521e-05", "step": 6221, "steps": "23.8s,6221/16595" }, { "epoch": 1.8746610424826755, "eta": "68:34:37", "grad_norm": 0.0052, "loss": 0.0561, "lr": "3.520e-05", "step": 6222, "steps": "23.8s,6222/16595" }, { "epoch": 1.8749623380536307, "eta": "68:13:28", "grad_norm": 0.0051, "loss": 0.0524, "lr": "3.520e-05", "step": 6223, "steps": "23.68s,6223/16595" }, { "epoch": 1.8752636336245856, "eta": "68:00:59", "grad_norm": 0.0037, "loss": 0.0526, "lr": "3.519e-05", "step": 6224, "steps": "23.61s,6224/16595" }, { "epoch": 1.8755649291955407, "eta": "68:02:19", "grad_norm": 0.0051, "loss": 0.0567, "lr": "3.519e-05", "step": 6225, "steps": "23.62s,6225/16595" }, { "epoch": 1.875866224766496, "eta": "68:10:34", "grad_norm": 0.0071, "loss": 0.0504, "lr": "3.518e-05", "step": 6226, "steps": "23.67s,6226/16595" }, { "epoch": 1.876167520337451, "eta": "68:43:00", "grad_norm": 0.0056, "loss": 0.0427, "lr": "3.518e-05", "step": 6227, "steps": "23.86s,6227/16595" }, { "epoch": 1.8764688159084062, "eta": "68:58:09", "grad_norm": 0.0055, "loss": 0.0448, "lr": "3.518e-05", "step": 6228, "steps": "23.95s,6228/16595" }, { "epoch": 1.8767701114793613, "eta": "68:50:51", "grad_norm": 0.005, "loss": 0.0503, "lr": "3.517e-05", "step": 6229, "steps": "23.91s,6229/16595" }, { "epoch": 1.8770714070503165, "eta": "67:49:59", "grad_norm": 0.005, "loss": 0.0577, "lr": "3.517e-05", "step": 6230, "steps": "23.56s,6230/16595" }, { "epoch": 1.8773727026212714, "eta": "68:44:52", "grad_norm": 0.0047, "loss": 0.0438, "lr": "3.516e-05", "step": 6231, "steps": "23.88s,6231/16595" }, { "epoch": 1.8776739981922266, "eta": "67:50:55", "grad_norm": 0.006, "loss": 0.0772, "lr": "3.516e-05", "step": 6232, "steps": "23.57s,6232/16595" }, { "epoch": 1.8779752937631817, "eta": "68:33:42", "grad_norm": 0.0058, "loss": 0.0482, "lr": "3.515e-05", "step": 6233, "steps": "23.82s,6233/16595" }, { "epoch": 1.8782765893341367, "eta": "68:59:13", "grad_norm": 0.0066, "loss": 0.0528, "lr": "3.515e-05", "step": 6234, "steps": "23.97s,6234/16595" }, { "epoch": 1.8785778849050918, "eta": "68:20:50", "grad_norm": 0.0052, "loss": 0.0522, "lr": "3.514e-05", "step": 6235, "steps": "23.75s,6235/16595" }, { "epoch": 1.878879180476047, "eta": "67:40:43", "grad_norm": 0.0053, "loss": 0.0633, "lr": "3.514e-05", "step": 6236, "steps": "23.52s,6236/16595" }, { "epoch": 1.8791804760470021, "eta": "68:25:13", "grad_norm": 0.006, "loss": 0.044, "lr": "3.514e-05", "step": 6237, "steps": "23.78s,6237/16595" }, { "epoch": 1.8794817716179573, "eta": "68:30:00", "grad_norm": 0.0058, "loss": 0.0465, "lr": "3.513e-05", "step": 6238, "steps": "23.81s,6238/16595" }, { "epoch": 1.8797830671889124, "eta": "67:48:10", "grad_norm": 0.0054, "loss": 0.0648, "lr": "3.513e-05", "step": 6239, "steps": "23.57s,6239/16595" }, { "epoch": 1.8800843627598676, "eta": "68:17:07", "grad_norm": 0.0057, "loss": 0.0426, "lr": "3.512e-05", "step": 6240, "steps": "23.74s,6240/16595" }, { "epoch": 1.8803856583308225, "eta": "68:35:42", "grad_norm": 0.0047, "loss": 0.0492, "lr": "3.512e-05", "step": 6241, "steps": "23.85s,6241/16595" }, { "epoch": 1.8806869539017776, "eta": "68:09:26", "grad_norm": 0.0049, "loss": 0.0452, "lr": "3.511e-05", "step": 6242, "steps": "23.7s,6242/16595" }, { "epoch": 1.8809882494727328, "eta": "67:55:14", "grad_norm": 0.0056, "loss": 0.0455, "lr": "3.511e-05", "step": 6243, "steps": "23.62s,6243/16595" }, { "epoch": 1.8812895450436877, "eta": "67:44:29", "grad_norm": 0.005, "loss": 0.048, "lr": "3.511e-05", "step": 6244, "steps": "23.56s,6244/16595" }, { "epoch": 1.8815908406146429, "eta": "68:09:58", "grad_norm": 0.0058, "loss": 0.0662, "lr": "3.510e-05", "step": 6245, "steps": "23.71s,6245/16595" }, { "epoch": 1.881892136185598, "eta": "67:47:09", "grad_norm": 0.0076, "loss": 0.0404, "lr": "3.510e-05", "step": 6246, "steps": "23.58s,6246/16595" }, { "epoch": 1.8821934317565532, "eta": "68:10:54", "grad_norm": 0.0062, "loss": 0.0533, "lr": "3.509e-05", "step": 6247, "steps": "23.72s,6247/16595" }, { "epoch": 1.8824947273275083, "eta": "68:07:03", "grad_norm": 0.006, "loss": 0.052, "lr": "3.509e-05", "step": 6248, "steps": "23.7s,6248/16595" }, { "epoch": 1.8827960228984635, "eta": "68:29:05", "grad_norm": 0.0051, "loss": 0.0422, "lr": "3.508e-05", "step": 6249, "steps": "23.83s,6249/16595" }, { "epoch": 1.8830973184694186, "eta": "67:35:14", "grad_norm": 0.0055, "loss": 0.053, "lr": "3.508e-05", "step": 6250, "steps": "23.52s,6250/16595" }, { "epoch": 1.8833986140403736, "eta": "68:35:11", "grad_norm": 0.006, "loss": 0.0395, "lr": "3.507e-05", "step": 6251, "steps": "23.87s,6251/16595" }, { "epoch": 1.8836999096113287, "eta": "67:44:47", "grad_norm": 0.0065, "loss": 0.0613, "lr": "3.507e-05", "step": 6252, "steps": "23.58s,6252/16595" }, { "epoch": 1.8840012051822839, "eta": "68:51:37", "grad_norm": 0.0074, "loss": 0.0332, "lr": "3.507e-05", "step": 6253, "steps": "23.97s,6253/16595" }, { "epoch": 1.8843025007532388, "eta": "68:42:36", "grad_norm": 0.0059, "loss": 0.0567, "lr": "3.506e-05", "step": 6254, "steps": "23.92s,6254/16595" }, { "epoch": 1.884603796324194, "eta": "67:53:57", "grad_norm": 0.0055, "loss": 0.0374, "lr": "3.506e-05", "step": 6255, "steps": "23.64s,6255/16595" }, { "epoch": 1.884905091895149, "eta": "68:17:41", "grad_norm": 0.0055, "loss": 0.071, "lr": "3.505e-05", "step": 6256, "steps": "23.78s,6256/16595" }, { "epoch": 1.8852063874661042, "eta": "68:22:27", "grad_norm": 0.0051, "loss": 0.058, "lr": "3.505e-05", "step": 6257, "steps": "23.81s,6257/16595" }, { "epoch": 1.8855076830370594, "eta": "68:53:04", "grad_norm": 0.0053, "loss": 0.0455, "lr": "3.504e-05", "step": 6258, "steps": "23.99s,6258/16595" }, { "epoch": 1.8858089786080146, "eta": "68:49:13", "grad_norm": 0.0053, "loss": 0.0413, "lr": "3.504e-05", "step": 6259, "steps": "23.97s,6259/16595" }, { "epoch": 1.8861102741789697, "eta": "67:46:49", "grad_norm": 0.0053, "loss": 0.0508, "lr": "3.504e-05", "step": 6260, "steps": "23.61s,6260/16595" }, { "epoch": 1.8864115697499246, "eta": "67:22:18", "grad_norm": 0.0067, "loss": 0.053, "lr": "3.503e-05", "step": 6261, "steps": "23.47s,6261/16595" }, { "epoch": 1.8867128653208798, "eta": "68:06:42", "grad_norm": 0.0062, "loss": 0.0543, "lr": "3.503e-05", "step": 6262, "steps": "23.73s,6262/16595" }, { "epoch": 1.887014160891835, "eta": "68:32:08", "grad_norm": 0.0059, "loss": 0.0731, "lr": "3.502e-05", "step": 6263, "steps": "23.88s,6263/16595" }, { "epoch": 1.8873154564627899, "eta": "67:46:58", "grad_norm": 0.0051, "loss": 0.0499, "lr": "3.502e-05", "step": 6264, "steps": "23.62s,6264/16595" }, { "epoch": 1.887616752033745, "eta": "67:46:34", "grad_norm": 0.0063, "loss": 0.0564, "lr": "3.501e-05", "step": 6265, "steps": "23.62s,6265/16595" }, { "epoch": 1.8879180476047002, "eta": "68:17:10", "grad_norm": 0.0043, "loss": 0.0475, "lr": "3.501e-05", "step": 6266, "steps": "23.8s,6266/16595" }, { "epoch": 1.8882193431756553, "eta": "67:57:50", "grad_norm": 0.0053, "loss": 0.0512, "lr": "3.500e-05", "step": 6267, "steps": "23.69s,6267/16595" }, { "epoch": 1.8885206387466105, "eta": "67:38:30", "grad_norm": 0.0063, "loss": 0.0658, "lr": "3.500e-05", "step": 6268, "steps": "23.58s,6268/16595" }, { "epoch": 1.8888219343175656, "eta": "68:26:18", "grad_norm": 0.0056, "loss": 0.0492, "lr": "3.500e-05", "step": 6269, "steps": "23.86s,6269/16595" }, { "epoch": 1.8891232298885208, "eta": "68:36:14", "grad_norm": 0.0069, "loss": 0.0602, "lr": "3.499e-05", "step": 6270, "steps": "23.92s,6270/16595" }, { "epoch": 1.8894245254594757, "eta": "67:35:36", "grad_norm": 0.0068, "loss": 0.0361, "lr": "3.499e-05", "step": 6271, "steps": "23.57s,6271/16595" }, { "epoch": 1.8897258210304309, "eta": "68:37:09", "grad_norm": 0.006, "loss": 0.0578, "lr": "3.498e-05", "step": 6272, "steps": "23.93s,6272/16595" }, { "epoch": 1.890027116601386, "eta": "69:07:43", "grad_norm": 0.0059, "loss": 0.0548, "lr": "3.498e-05", "step": 6273, "steps": "24.11s,6273/16595" }, { "epoch": 1.890328412172341, "eta": "68:39:47", "grad_norm": 0.0057, "loss": 0.0599, "lr": "3.497e-05", "step": 6274, "steps": "23.95s,6274/16595" }, { "epoch": 1.890629707743296, "eta": "68:42:50", "grad_norm": 0.0055, "loss": 0.0589, "lr": "3.497e-05", "step": 6275, "steps": "23.97s,6275/16595" }, { "epoch": 1.8909310033142512, "eta": "69:01:21", "grad_norm": 0.0052, "loss": 0.0683, "lr": "3.497e-05", "step": 6276, "steps": "24.08s,6276/16595" }, { "epoch": 1.8912322988852064, "eta": "67:33:15", "grad_norm": 0.0054, "loss": 0.0591, "lr": "3.496e-05", "step": 6277, "steps": "23.57s,6277/16595" }, { "epoch": 1.8915335944561615, "eta": "68:38:12", "grad_norm": 0.0054, "loss": 0.055, "lr": "3.496e-05", "step": 6278, "steps": "23.95s,6278/16595" }, { "epoch": 1.8918348900271167, "eta": "67:29:01", "grad_norm": 0.0044, "loss": 0.0562, "lr": "3.495e-05", "step": 6279, "steps": "23.55s,6279/16595" }, { "epoch": 1.8921361855980718, "eta": "68:04:44", "grad_norm": 0.0049, "loss": 0.0444, "lr": "3.495e-05", "step": 6280, "steps": "23.76s,6280/16595" }, { "epoch": 1.8924374811690268, "eta": "67:38:33", "grad_norm": 0.0059, "loss": 0.0463, "lr": "3.494e-05", "step": 6281, "steps": "23.61s,6281/16595" }, { "epoch": 1.892738776739982, "eta": "67:53:38", "grad_norm": 0.0051, "loss": 0.0411, "lr": "3.494e-05", "step": 6282, "steps": "23.7s,6282/16595" }, { "epoch": 1.893040072310937, "eta": "67:42:55", "grad_norm": 0.006, "loss": 0.0398, "lr": "3.493e-05", "step": 6283, "steps": "23.64s,6283/16595" }, { "epoch": 1.893341367881892, "eta": "67:44:15", "grad_norm": 0.0058, "loss": 0.0556, "lr": "3.493e-05", "step": 6284, "steps": "23.65s,6284/16595" }, { "epoch": 1.8936426634528472, "eta": "67:57:36", "grad_norm": 0.0057, "loss": 0.0593, "lr": "3.493e-05", "step": 6285, "steps": "23.73s,6285/16595" }, { "epoch": 1.8939439590238023, "eta": "67:31:26", "grad_norm": 0.006, "loss": 0.0527, "lr": "3.492e-05", "step": 6286, "steps": "23.58s,6286/16595" }, { "epoch": 1.8942452545947575, "eta": "68:24:18", "grad_norm": 0.0047, "loss": 0.0497, "lr": "3.492e-05", "step": 6287, "steps": "23.89s,6287/16595" }, { "epoch": 1.8945465501657126, "eta": "68:30:46", "grad_norm": 0.005, "loss": 0.039, "lr": "3.491e-05", "step": 6288, "steps": "23.93s,6288/16595" }, { "epoch": 1.8948478457366678, "eta": "69:04:43", "grad_norm": 0.0056, "loss": 0.0473, "lr": "3.491e-05", "step": 6289, "steps": "24.13s,6289/16595" }, { "epoch": 1.895149141307623, "eta": "67:21:16", "grad_norm": 0.0132, "loss": 0.0404, "lr": "3.490e-05", "step": 6290, "steps": "23.53s,6290/16595" }, { "epoch": 1.8954504368785778, "eta": "67:38:03", "grad_norm": 0.0064, "loss": 0.0553, "lr": "3.490e-05", "step": 6291, "steps": "23.63s,6291/16595" }, { "epoch": 1.895751732449533, "eta": "68:01:42", "grad_norm": 0.0059, "loss": 0.051, "lr": "3.490e-05", "step": 6292, "steps": "23.77s,6292/16595" }, { "epoch": 1.8960530280204881, "eta": "68:21:54", "grad_norm": 0.0047, "loss": 0.0643, "lr": "3.489e-05", "step": 6293, "steps": "23.89s,6293/16595" }, { "epoch": 1.896354323591443, "eta": "67:43:44", "grad_norm": 0.0063, "loss": 0.0449, "lr": "3.489e-05", "step": 6294, "steps": "23.67s,6294/16595" }, { "epoch": 1.8966556191623982, "eta": "67:45:04", "grad_norm": 0.005, "loss": 0.0287, "lr": "3.488e-05", "step": 6295, "steps": "23.68s,6295/16595" }, { "epoch": 1.8969569147333534, "eta": "68:37:53", "grad_norm": 0.0056, "loss": 0.0493, "lr": "3.488e-05", "step": 6296, "steps": "23.99s,6296/16595" }, { "epoch": 1.8972582103043085, "eta": "68:34:03", "grad_norm": 0.0056, "loss": 0.0317, "lr": "3.487e-05", "step": 6297, "steps": "23.97s,6297/16595" }, { "epoch": 1.8975595058752637, "eta": "67:37:01", "grad_norm": 0.0093, "loss": 0.0592, "lr": "3.487e-05", "step": 6298, "steps": "23.64s,6298/16595" }, { "epoch": 1.8978608014462188, "eta": "67:45:12", "grad_norm": 0.0065, "loss": 0.07, "lr": "3.486e-05", "step": 6299, "steps": "23.69s,6299/16595" }, { "epoch": 1.898162097017174, "eta": "68:19:07", "grad_norm": 0.0057, "loss": 0.07, "lr": "3.486e-05", "step": 6300, "steps": "23.89s,6300/16595" }, { "epoch": 1.898463392588129, "eta": "67:42:41", "grad_norm": 0.0052, "loss": 0.0466, "lr": "3.486e-05", "step": 6301, "steps": "23.68s,6301/16595" }, { "epoch": 1.898764688159084, "eta": "68:32:03", "grad_norm": 0.0057, "loss": 0.0541, "lr": "3.485e-05", "step": 6302, "steps": "23.97s,6302/16595" }, { "epoch": 1.8990659837300392, "eta": "67:50:29", "grad_norm": 0.0061, "loss": 0.0588, "lr": "3.485e-05", "step": 6303, "steps": "23.73s,6303/16595" }, { "epoch": 1.8993672793009941, "eta": "68:05:31", "grad_norm": 0.0062, "loss": 0.0541, "lr": "3.484e-05", "step": 6304, "steps": "23.82s,6304/16595" }, { "epoch": 1.8996685748719493, "eta": "68:11:59", "grad_norm": 0.0077, "loss": 0.0626, "lr": "3.484e-05", "step": 6305, "steps": "23.86s,6305/16595" }, { "epoch": 1.8999698704429044, "eta": "67:51:00", "grad_norm": 0.0049, "loss": 0.0443, "lr": "3.483e-05", "step": 6306, "steps": "23.74s,6306/16595" }, { "epoch": 1.9002711660138596, "eta": "67:47:11", "grad_norm": 0.0064, "loss": 0.0504, "lr": "3.483e-05", "step": 6307, "steps": "23.72s,6307/16595" }, { "epoch": 1.9005724615848147, "eta": "68:09:04", "grad_norm": 0.0056, "loss": 0.0542, "lr": "3.483e-05", "step": 6308, "steps": "23.85s,6308/16595" }, { "epoch": 1.90087375715577, "eta": "68:03:32", "grad_norm": 0.0062, "loss": 0.0658, "lr": "3.482e-05", "step": 6309, "steps": "23.82s,6309/16595" }, { "epoch": 1.901175052726725, "eta": "67:15:08", "grad_norm": 0.0043, "loss": 0.0507, "lr": "3.482e-05", "step": 6310, "steps": "23.54s,6310/16595" }, { "epoch": 1.90147634829768, "eta": "68:04:27", "grad_norm": 0.0081, "loss": 0.046, "lr": "3.481e-05", "step": 6311, "steps": "23.83s,6311/16595" }, { "epoch": 1.9017776438686351, "eta": "68:19:29", "grad_norm": 0.0049, "loss": 0.0614, "lr": "3.481e-05", "step": 6312, "steps": "23.92s,6312/16595" }, { "epoch": 1.9020789394395903, "eta": "68:15:39", "grad_norm": 0.0067, "loss": 0.0508, "lr": "3.480e-05", "step": 6313, "steps": "23.9s,6313/16595" }, { "epoch": 1.9023802350105452, "eta": "67:25:34", "grad_norm": 0.0052, "loss": 0.0683, "lr": "3.480e-05", "step": 6314, "steps": "23.61s,6314/16595" }, { "epoch": 1.9026815305815004, "eta": "67:37:10", "grad_norm": 0.0047, "loss": 0.0474, "lr": "3.479e-05", "step": 6315, "steps": "23.68s,6315/16595" }, { "epoch": 1.9029828261524555, "eta": "68:23:02", "grad_norm": 0.0069, "loss": 0.0539, "lr": "3.479e-05", "step": 6316, "steps": "23.95s,6316/16595" }, { "epoch": 1.9032841217234107, "eta": "67:03:50", "grad_norm": 0.0049, "loss": 0.0332, "lr": "3.479e-05", "step": 6317, "steps": "23.49s,6317/16595" }, { "epoch": 1.9035854172943658, "eta": "69:20:28", "grad_norm": 0.0062, "loss": 0.0604, "lr": "3.478e-05", "step": 6318, "steps": "24.29s,6318/16595" }, { "epoch": 1.903886712865321, "eta": "67:08:11", "grad_norm": 0.0065, "loss": 0.0517, "lr": "3.478e-05", "step": 6319, "steps": "23.52s,6319/16595" }, { "epoch": 1.9041880084362761, "eta": "67:45:28", "grad_norm": 0.0054, "loss": 0.0537, "lr": "3.477e-05", "step": 6320, "steps": "23.74s,6320/16595" }, { "epoch": 1.904489304007231, "eta": "67:45:04", "grad_norm": 0.0053, "loss": 0.0488, "lr": "3.477e-05", "step": 6321, "steps": "23.74s,6321/16595" }, { "epoch": 1.9047905995781862, "eta": "68:22:21", "grad_norm": 0.0049, "loss": 0.0548, "lr": "3.476e-05", "step": 6322, "steps": "23.96s,6322/16595" }, { "epoch": 1.9050918951491413, "eta": "67:49:25", "grad_norm": 0.0055, "loss": 0.0626, "lr": "3.476e-05", "step": 6323, "steps": "23.77s,6323/16595" }, { "epoch": 1.9053931907200963, "eta": "67:04:31", "grad_norm": 0.0066, "loss": 0.0537, "lr": "3.475e-05", "step": 6324, "steps": "23.51s,6324/16595" }, { "epoch": 1.9056944862910514, "eta": "68:26:17", "grad_norm": 0.0061, "loss": 0.0447, "lr": "3.475e-05", "step": 6325, "steps": "23.99s,6325/16595" }, { "epoch": 1.9059957818620066, "eta": "67:36:15", "grad_norm": 0.0043, "loss": 0.0533, "lr": "3.475e-05", "step": 6326, "steps": "23.7s,6326/16595" }, { "epoch": 1.9062970774329617, "eta": "67:29:00", "grad_norm": 0.0055, "loss": 0.062, "lr": "3.474e-05", "step": 6327, "steps": "23.66s,6327/16595" }, { "epoch": 1.9065983730039169, "eta": "67:06:22", "grad_norm": 0.0045, "loss": 0.065, "lr": "3.474e-05", "step": 6328, "steps": "23.53s,6328/16595" }, { "epoch": 1.906899668574872, "eta": "67:36:46", "grad_norm": 0.0051, "loss": 0.0496, "lr": "3.473e-05", "step": 6329, "steps": "23.71s,6329/16595" }, { "epoch": 1.9072009641458272, "eta": "67:26:07", "grad_norm": 0.0058, "loss": 0.0577, "lr": "3.473e-05", "step": 6330, "steps": "23.65s,6330/16595" }, { "epoch": 1.9075022597167821, "eta": "67:34:16", "grad_norm": 0.0049, "loss": 0.0467, "lr": "3.472e-05", "step": 6331, "steps": "23.7s,6331/16595" }, { "epoch": 1.9078035552877373, "eta": "67:04:48", "grad_norm": 0.0067, "loss": 0.0576, "lr": "3.472e-05", "step": 6332, "steps": "23.53s,6332/16595" }, { "epoch": 1.9081048508586924, "eta": "67:30:04", "grad_norm": 0.007, "loss": 0.0459, "lr": "3.472e-05", "step": 6333, "steps": "23.68s,6333/16595" }, { "epoch": 1.9084061464296473, "eta": "66:43:30", "grad_norm": 0.0046, "loss": 0.0369, "lr": "3.471e-05", "step": 6334, "steps": "23.41s,6334/16595" }, { "epoch": 1.9087074420006025, "eta": "67:36:07", "grad_norm": 0.0055, "loss": 0.0729, "lr": "3.471e-05", "step": 6335, "steps": "23.72s,6335/16595" }, { "epoch": 1.9090087375715576, "eta": "67:57:57", "grad_norm": 0.0072, "loss": 0.0483, "lr": "3.470e-05", "step": 6336, "steps": "23.85s,6336/16595" }, { "epoch": 1.9093100331425128, "eta": "67:04:33", "grad_norm": 0.0065, "loss": 0.0433, "lr": "3.470e-05", "step": 6337, "steps": "23.54s,6337/16595" }, { "epoch": 1.909611328713468, "eta": "68:51:51", "grad_norm": 0.0063, "loss": 0.0567, "lr": "3.469e-05", "step": 6338, "steps": "24.17s,6338/16595" }, { "epoch": 1.909912624284423, "eta": "68:22:24", "grad_norm": 0.006, "loss": 0.0446, "lr": "3.469e-05", "step": 6339, "steps": "24.0s,6339/16595" }, { "epoch": 1.9102139198553783, "eta": "67:51:14", "grad_norm": 0.009, "loss": 0.0592, "lr": "3.468e-05", "step": 6340, "steps": "23.82s,6340/16595" }, { "epoch": 1.9105152154263332, "eta": "67:13:14", "grad_norm": 0.0055, "loss": 0.0564, "lr": "3.468e-05", "step": 6341, "steps": "23.6s,6341/16595" }, { "epoch": 1.9108165109972883, "eta": "67:45:18", "grad_norm": 0.0067, "loss": 0.0436, "lr": "3.468e-05", "step": 6342, "steps": "23.79s,6342/16595" }, { "epoch": 1.9111178065682435, "eta": "68:10:32", "grad_norm": 0.0056, "loss": 0.057, "lr": "3.467e-05", "step": 6343, "steps": "23.94s,6343/16595" }, { "epoch": 1.9114191021391984, "eta": "67:24:01", "grad_norm": 0.0062, "loss": 0.0582, "lr": "3.467e-05", "step": 6344, "steps": "23.67s,6344/16595" }, { "epoch": 1.9117203977101536, "eta": "67:59:30", "grad_norm": 0.0053, "loss": 0.0549, "lr": "3.466e-05", "step": 6345, "steps": "23.88s,6345/16595" }, { "epoch": 1.9120216932811087, "eta": "67:07:51", "grad_norm": 0.005, "loss": 0.0635, "lr": "3.466e-05", "step": 6346, "steps": "23.58s,6346/16595" }, { "epoch": 1.9123229888520639, "eta": "66:46:58", "grad_norm": 0.007, "loss": 0.0442, "lr": "3.465e-05", "step": 6347, "steps": "23.46s,6347/16595" }, { "epoch": 1.912624284423019, "eta": "67:08:46", "grad_norm": 0.0055, "loss": 0.0562, "lr": "3.465e-05", "step": 6348, "steps": "23.59s,6348/16595" }, { "epoch": 1.9129255799939742, "eta": "67:23:45", "grad_norm": 0.0064, "loss": 0.0502, "lr": "3.464e-05", "step": 6349, "steps": "23.68s,6349/16595" }, { "epoch": 1.9132268755649293, "eta": "67:28:28", "grad_norm": 0.0064, "loss": 0.0437, "lr": "3.464e-05", "step": 6350, "steps": "23.71s,6350/16595" }, { "epoch": 1.9135281711358842, "eta": "66:43:41", "grad_norm": 0.0058, "loss": 0.0516, "lr": "3.464e-05", "step": 6351, "steps": "23.45s,6351/16595" }, { "epoch": 1.9138294667068394, "eta": "67:37:56", "grad_norm": 0.0048, "loss": 0.0561, "lr": "3.463e-05", "step": 6352, "steps": "23.77s,6352/16595" }, { "epoch": 1.9141307622777946, "eta": "67:15:20", "grad_norm": 0.0064, "loss": 0.0474, "lr": "3.463e-05", "step": 6353, "steps": "23.64s,6353/16595" }, { "epoch": 1.9144320578487495, "eta": "67:32:01", "grad_norm": 0.0048, "loss": 0.0452, "lr": "3.462e-05", "step": 6354, "steps": "23.74s,6354/16595" }, { "epoch": 1.9147333534197046, "eta": "67:45:16", "grad_norm": 0.0056, "loss": 0.0556, "lr": "3.462e-05", "step": 6355, "steps": "23.82s,6355/16595" }, { "epoch": 1.9150346489906598, "eta": "67:36:21", "grad_norm": 0.0056, "loss": 0.0495, "lr": "3.461e-05", "step": 6356, "steps": "23.77s,6356/16595" }, { "epoch": 1.915335944561615, "eta": "67:47:53", "grad_norm": 0.0051, "loss": 0.0591, "lr": "3.461e-05", "step": 6357, "steps": "23.84s,6357/16595" }, { "epoch": 1.91563724013257, "eta": "67:47:30", "grad_norm": 0.0135, "loss": 0.047, "lr": "3.461e-05", "step": 6358, "steps": "23.84s,6358/16595" }, { "epoch": 1.9159385357035252, "eta": "67:57:20", "grad_norm": 0.0055, "loss": 0.0578, "lr": "3.460e-05", "step": 6359, "steps": "23.9s,6359/16595" }, { "epoch": 1.9162398312744804, "eta": "67:21:07", "grad_norm": 0.0067, "loss": 0.0497, "lr": "3.460e-05", "step": 6360, "steps": "23.69s,6360/16595" }, { "epoch": 1.9165411268454353, "eta": "68:37:28", "grad_norm": 0.0052, "loss": 0.0468, "lr": "3.459e-05", "step": 6361, "steps": "24.14s,6361/16595" }, { "epoch": 1.9168424224163905, "eta": "66:59:51", "grad_norm": 0.005, "loss": 0.0486, "lr": "3.459e-05", "step": 6362, "steps": "23.57s,6362/16595" }, { "epoch": 1.9171437179873456, "eta": "67:50:37", "grad_norm": 0.0075, "loss": 0.0551, "lr": "3.458e-05", "step": 6363, "steps": "23.87s,6363/16595" }, { "epoch": 1.9174450135583005, "eta": "67:48:31", "grad_norm": 0.0067, "loss": 0.0539, "lr": "3.458e-05", "step": 6364, "steps": "23.86s,6364/16595" }, { "epoch": 1.9177463091292557, "eta": "67:39:36", "grad_norm": 0.005, "loss": 0.0462, "lr": "3.457e-05", "step": 6365, "steps": "23.81s,6365/16595" }, { "epoch": 1.9180476047002109, "eta": "67:37:30", "grad_norm": 0.006, "loss": 0.0512, "lr": "3.457e-05", "step": 6366, "steps": "23.8s,6366/16595" }, { "epoch": 1.918348900271166, "eta": "66:54:29", "grad_norm": 0.0053, "loss": 0.0604, "lr": "3.457e-05", "step": 6367, "steps": "23.55s,6367/16595" }, { "epoch": 1.9186501958421212, "eta": "67:14:33", "grad_norm": 0.0054, "loss": 0.0497, "lr": "3.456e-05", "step": 6368, "steps": "23.67s,6368/16595" }, { "epoch": 1.9189514914130763, "eta": "67:31:12", "grad_norm": 0.0062, "loss": 0.0614, "lr": "3.456e-05", "step": 6369, "steps": "23.77s,6369/16595" }, { "epoch": 1.9192527869840315, "eta": "68:28:44", "grad_norm": 0.0053, "loss": 0.0514, "lr": "3.455e-05", "step": 6370, "steps": "24.11s,6370/16595" }, { "epoch": 1.9195540825549866, "eta": "66:39:17", "grad_norm": 0.006, "loss": 0.0458, "lr": "3.455e-05", "step": 6371, "steps": "23.47s,6371/16595" }, { "epoch": 1.9198553781259415, "eta": "67:07:51", "grad_norm": 0.0073, "loss": 0.0715, "lr": "3.454e-05", "step": 6372, "steps": "23.64s,6372/16595" }, { "epoch": 1.9201566736968967, "eta": "67:26:12", "grad_norm": 0.0052, "loss": 0.0452, "lr": "3.454e-05", "step": 6373, "steps": "23.75s,6373/16595" }, { "epoch": 1.9204579692678516, "eta": "66:36:24", "grad_norm": 0.0051, "loss": 0.0539, "lr": "3.453e-05", "step": 6374, "steps": "23.46s,6374/16595" }, { "epoch": 1.9207592648388068, "eta": "66:54:45", "grad_norm": 0.0053, "loss": 0.0411, "lr": "3.453e-05", "step": 6375, "steps": "23.57s,6375/16595" }, { "epoch": 1.921060560409762, "eta": "66:57:46", "grad_norm": 0.0053, "loss": 0.0487, "lr": "3.453e-05", "step": 6376, "steps": "23.59s,6376/16595" }, { "epoch": 1.921361855980717, "eta": "67:22:55", "grad_norm": 0.0056, "loss": 0.0506, "lr": "3.452e-05", "step": 6377, "steps": "23.74s,6377/16595" }, { "epoch": 1.9216631515516722, "eta": "67:51:28", "grad_norm": 0.0056, "loss": 0.0641, "lr": "3.452e-05", "step": 6378, "steps": "23.91s,6378/16595" }, { "epoch": 1.9219644471226274, "eta": "67:44:15", "grad_norm": 0.006, "loss": 0.0521, "lr": "3.451e-05", "step": 6379, "steps": "23.87s,6379/16595" }, { "epoch": 1.9222657426935825, "eta": "66:40:52", "grad_norm": 0.0053, "loss": 0.0576, "lr": "3.451e-05", "step": 6380, "steps": "23.5s,6380/16595" }, { "epoch": 1.9225670382645377, "eta": "67:02:36", "grad_norm": 0.0057, "loss": 0.0607, "lr": "3.450e-05", "step": 6381, "steps": "23.63s,6381/16595" }, { "epoch": 1.9228683338354926, "eta": "67:10:43", "grad_norm": 0.0055, "loss": 0.0548, "lr": "3.450e-05", "step": 6382, "steps": "23.68s,6382/16595" }, { "epoch": 1.9231696294064478, "eta": "67:08:38", "grad_norm": 0.0053, "loss": 0.0519, "lr": "3.449e-05", "step": 6383, "steps": "23.67s,6383/16595" }, { "epoch": 1.9234709249774027, "eta": "68:40:08", "grad_norm": 0.0061, "loss": 0.0353, "lr": "3.449e-05", "step": 6384, "steps": "24.21s,6384/16595" }, { "epoch": 1.9237722205483578, "eta": "67:12:57", "grad_norm": 0.0057, "loss": 0.0672, "lr": "3.449e-05", "step": 6385, "steps": "23.7s,6385/16595" }, { "epoch": 1.924073516119313, "eta": "67:43:10", "grad_norm": 0.0056, "loss": 0.0577, "lr": "3.448e-05", "step": 6386, "steps": "23.88s,6386/16595" }, { "epoch": 1.9243748116902681, "eta": "66:51:44", "grad_norm": 0.0046, "loss": 0.0499, "lr": "3.448e-05", "step": 6387, "steps": "23.58s,6387/16595" }, { "epoch": 1.9246761072612233, "eta": "66:54:45", "grad_norm": 0.0049, "loss": 0.048, "lr": "3.447e-05", "step": 6388, "steps": "23.6s,6388/16595" }, { "epoch": 1.9249774028321784, "eta": "67:45:23", "grad_norm": 0.0054, "loss": 0.0515, "lr": "3.447e-05", "step": 6389, "steps": "23.9s,6389/16595" }, { "epoch": 1.9252786984031336, "eta": "66:57:22", "grad_norm": 0.0058, "loss": 0.0602, "lr": "3.446e-05", "step": 6390, "steps": "23.62s,6390/16595" }, { "epoch": 1.9255799939740887, "eta": "66:16:09", "grad_norm": 0.007, "loss": 0.046, "lr": "3.446e-05", "step": 6391, "steps": "23.38s,6391/16595" }, { "epoch": 1.9258812895450437, "eta": "66:48:04", "grad_norm": 0.0049, "loss": 0.051, "lr": "3.446e-05", "step": 6392, "steps": "23.57s,6392/16595" }, { "epoch": 1.9261825851159988, "eta": "66:51:05", "grad_norm": 0.0079, "loss": 0.065, "lr": "3.445e-05", "step": 6393, "steps": "23.59s,6393/16595" }, { "epoch": 1.9264838806869538, "eta": "66:54:05", "grad_norm": 0.0063, "loss": 0.0511, "lr": "3.445e-05", "step": 6394, "steps": "23.61s,6394/16595" }, { "epoch": 1.926785176257909, "eta": "66:48:35", "grad_norm": 0.0053, "loss": 0.0529, "lr": "3.444e-05", "step": 6395, "steps": "23.58s,6395/16595" }, { "epoch": 1.927086471828864, "eta": "68:18:17", "grad_norm": 0.0049, "loss": 0.0595, "lr": "3.444e-05", "step": 6396, "steps": "24.11s,6396/16595" }, { "epoch": 1.9273877673998192, "eta": "66:32:31", "grad_norm": 0.0049, "loss": 0.0546, "lr": "3.443e-05", "step": 6397, "steps": "23.49s,6397/16595" }, { "epoch": 1.9276890629707744, "eta": "66:35:31", "grad_norm": 0.0069, "loss": 0.0421, "lr": "3.443e-05", "step": 6398, "steps": "23.51s,6398/16595" }, { "epoch": 1.9279903585417295, "eta": "67:14:13", "grad_norm": 0.0055, "loss": 0.0438, "lr": "3.442e-05", "step": 6399, "steps": "23.74s,6399/16595" }, { "epoch": 1.9282916541126847, "eta": "67:35:54", "grad_norm": 0.0049, "loss": 0.0534, "lr": "3.442e-05", "step": 6400, "steps": "23.87s,6400/16595" }, { "epoch": 1.9285929496836398, "eta": "141:43:29", "grad_norm": 0.0053, "loss": 0.042, "lr": "3.442e-05", "step": 6401, "steps": "50.05s,6401/16595" }, { "epoch": 1.9288942452545947, "eta": "66:40:45", "grad_norm": 0.0049, "loss": 0.0478, "lr": "3.441e-05", "step": 6402, "steps": "23.55s,6402/16595" }, { "epoch": 1.92919554082555, "eta": "67:33:01", "grad_norm": 0.0049, "loss": 0.0429, "lr": "3.441e-05", "step": 6403, "steps": "23.86s,6403/16595" }, { "epoch": 1.9294968363965048, "eta": "66:38:16", "grad_norm": 0.0058, "loss": 0.0586, "lr": "3.440e-05", "step": 6404, "steps": "23.54s,6404/16595" }, { "epoch": 1.92979813196746, "eta": "67:35:37", "grad_norm": 0.0098, "loss": 0.0307, "lr": "3.440e-05", "step": 6405, "steps": "23.88s,6405/16595" }, { "epoch": 1.9300994275384151, "eta": "66:42:34", "grad_norm": 0.0045, "loss": 0.0611, "lr": "3.439e-05", "step": 6406, "steps": "23.57s,6406/16595" }, { "epoch": 1.9304007231093703, "eta": "66:23:30", "grad_norm": 0.0068, "loss": 0.0431, "lr": "3.439e-05", "step": 6407, "steps": "23.46s,6407/16595" }, { "epoch": 1.9307020186803254, "eta": "66:33:18", "grad_norm": 0.0048, "loss": 0.0525, "lr": "3.438e-05", "step": 6408, "steps": "23.52s,6408/16595" }, { "epoch": 1.9310033142512806, "eta": "67:13:39", "grad_norm": 0.0052, "loss": 0.046, "lr": "3.438e-05", "step": 6409, "steps": "23.76s,6409/16595" }, { "epoch": 1.9313046098222357, "eta": "67:04:46", "grad_norm": 0.0056, "loss": 0.0856, "lr": "3.438e-05", "step": 6410, "steps": "23.71s,6410/16595" }, { "epoch": 1.9316059053931909, "eta": "67:14:33", "grad_norm": 0.0052, "loss": 0.0539, "lr": "3.437e-05", "step": 6411, "steps": "23.77s,6411/16595" }, { "epoch": 1.9319072009641458, "eta": "67:14:09", "grad_norm": 0.0049, "loss": 0.0491, "lr": "3.437e-05", "step": 6412, "steps": "23.77s,6412/16595" }, { "epoch": 1.932208496535101, "eta": "66:55:06", "grad_norm": 0.005, "loss": 0.0594, "lr": "3.436e-05", "step": 6413, "steps": "23.66s,6413/16595" }, { "epoch": 1.932509792106056, "eta": "67:38:49", "grad_norm": 0.0062, "loss": 0.055, "lr": "3.436e-05", "step": 6414, "steps": "23.92s,6414/16595" }, { "epoch": 1.932811087677011, "eta": "66:37:20", "grad_norm": 0.0058, "loss": 0.0463, "lr": "3.435e-05", "step": 6415, "steps": "23.56s,6415/16595" }, { "epoch": 1.9331123832479662, "eta": "67:29:32", "grad_norm": 0.0053, "loss": 0.05, "lr": "3.435e-05", "step": 6416, "steps": "23.87s,6416/16595" }, { "epoch": 1.9334136788189213, "eta": "66:34:51", "grad_norm": 0.0048, "loss": 0.0583, "lr": "3.434e-05", "step": 6417, "steps": "23.55s,6417/16595" }, { "epoch": 1.9337149743898765, "eta": "67:23:39", "grad_norm": 0.0047, "loss": 0.0556, "lr": "3.434e-05", "step": 6418, "steps": "23.84s,6418/16595" }, { "epoch": 1.9340162699608316, "eta": "66:52:44", "grad_norm": 0.0051, "loss": 0.0432, "lr": "3.434e-05", "step": 6419, "steps": "23.66s,6419/16595" }, { "epoch": 1.9343175655317868, "eta": "65:36:01", "grad_norm": 0.0058, "loss": 0.0591, "lr": "3.433e-05", "step": 6420, "steps": "23.21s,6420/16595" }, { "epoch": 1.934618861102742, "eta": "67:34:20", "grad_norm": 0.0079, "loss": 0.0716, "lr": "3.433e-05", "step": 6421, "steps": "23.91s,6421/16595" }, { "epoch": 1.9349201566736969, "eta": "67:37:19", "grad_norm": 0.0054, "loss": 0.0419, "lr": "3.432e-05", "step": 6422, "steps": "23.93s,6422/16595" }, { "epoch": 1.935221452244652, "eta": "66:51:09", "grad_norm": 0.0045, "loss": 0.0522, "lr": "3.432e-05", "step": 6423, "steps": "23.66s,6423/16595" }, { "epoch": 1.935522747815607, "eta": "67:17:53", "grad_norm": 0.0154, "loss": 0.0537, "lr": "3.431e-05", "step": 6424, "steps": "23.82s,6424/16595" }, { "epoch": 1.935824043386562, "eta": "66:41:53", "grad_norm": 0.0052, "loss": 0.0493, "lr": "3.431e-05", "step": 6425, "steps": "23.61s,6425/16595" }, { "epoch": 1.9361253389575173, "eta": "65:43:52", "grad_norm": 0.0051, "loss": 0.0445, "lr": "3.430e-05", "step": 6426, "steps": "23.27s,6426/16595" }, { "epoch": 1.9364266345284724, "eta": "66:03:49", "grad_norm": 0.0042, "loss": 0.0564, "lr": "3.430e-05", "step": 6427, "steps": "23.39s,6427/16595" }, { "epoch": 1.9367279300994276, "eta": "66:45:47", "grad_norm": 0.0054, "loss": 0.0454, "lr": "3.430e-05", "step": 6428, "steps": "23.64s,6428/16595" }, { "epoch": 1.9370292256703827, "eta": "67:15:54", "grad_norm": 0.005, "loss": 0.061, "lr": "3.429e-05", "step": 6429, "steps": "23.82s,6429/16595" }, { "epoch": 1.9373305212413379, "eta": "66:51:47", "grad_norm": 0.0049, "loss": 0.0694, "lr": "3.429e-05", "step": 6430, "steps": "23.68s,6430/16595" }, { "epoch": 1.937631816812293, "eta": "67:37:07", "grad_norm": 0.0063, "loss": 0.044, "lr": "3.428e-05", "step": 6431, "steps": "23.95s,6431/16595" }, { "epoch": 1.937933112383248, "eta": "66:15:25", "grad_norm": 0.0054, "loss": 0.0536, "lr": "3.428e-05", "step": 6432, "steps": "23.47s,6432/16595" }, { "epoch": 1.938234407954203, "eta": "67:00:45", "grad_norm": 0.0055, "loss": 0.059, "lr": "3.427e-05", "step": 6433, "steps": "23.74s,6433/16595" }, { "epoch": 1.938535703525158, "eta": "66:06:10", "grad_norm": 0.0052, "loss": 0.0566, "lr": "3.427e-05", "step": 6434, "steps": "23.42s,6434/16595" }, { "epoch": 1.9388369990961132, "eta": "67:18:36", "grad_norm": 0.0069, "loss": 0.0505, "lr": "3.426e-05", "step": 6435, "steps": "23.85s,6435/16595" }, { "epoch": 1.9391382946670683, "eta": "67:14:48", "grad_norm": 0.0125, "loss": 0.0469, "lr": "3.426e-05", "step": 6436, "steps": "23.83s,6436/16595" }, { "epoch": 1.9394395902380235, "eta": "66:40:33", "grad_norm": 0.0072, "loss": 0.0508, "lr": "3.426e-05", "step": 6437, "steps": "23.63s,6437/16595" }, { "epoch": 1.9397408858089786, "eta": "66:01:13", "grad_norm": 0.0047, "loss": 0.0557, "lr": "3.425e-05", "step": 6438, "steps": "23.4s,6438/16595" }, { "epoch": 1.9400421813799338, "eta": "67:05:09", "grad_norm": 0.0053, "loss": 0.0458, "lr": "3.425e-05", "step": 6439, "steps": "23.78s,6439/16595" }, { "epoch": 1.940343476950889, "eta": "66:15:40", "grad_norm": 0.0065, "loss": 0.0597, "lr": "3.424e-05", "step": 6440, "steps": "23.49s,6440/16595" }, { "epoch": 1.940644772521844, "eta": "66:50:49", "grad_norm": 0.0062, "loss": 0.0382, "lr": "3.424e-05", "step": 6441, "steps": "23.7s,6441/16595" }, { "epoch": 1.940946068092799, "eta": "67:29:21", "grad_norm": 0.0053, "loss": 0.0498, "lr": "3.423e-05", "step": 6442, "steps": "23.93s,6442/16595" }, { "epoch": 1.9412473636637542, "eta": "66:36:30", "grad_norm": 0.0052, "loss": 0.0568, "lr": "3.423e-05", "step": 6443, "steps": "23.62s,6443/16595" }, { "epoch": 1.941548659234709, "eta": "66:09:02", "grad_norm": 0.0049, "loss": 0.0561, "lr": "3.422e-05", "step": 6444, "steps": "23.46s,6444/16595" }, { "epoch": 1.9418499548056642, "eta": "67:19:42", "grad_norm": 0.0065, "loss": 0.0558, "lr": "3.422e-05", "step": 6445, "steps": "23.88s,6445/16595" }, { "epoch": 1.9421512503766194, "eta": "66:55:37", "grad_norm": 0.006, "loss": 0.0487, "lr": "3.422e-05", "step": 6446, "steps": "23.74s,6446/16595" }, { "epoch": 1.9424525459475745, "eta": "66:33:14", "grad_norm": 0.0048, "loss": 0.058, "lr": "3.421e-05", "step": 6447, "steps": "23.61s,6447/16595" }, { "epoch": 1.9427538415185297, "eta": "66:12:33", "grad_norm": 0.0049, "loss": 0.0687, "lr": "3.421e-05", "step": 6448, "steps": "23.49s,6448/16595" }, { "epoch": 1.9430551370894849, "eta": "66:40:54", "grad_norm": 0.0052, "loss": 0.0438, "lr": "3.420e-05", "step": 6449, "steps": "23.66s,6449/16595" }, { "epoch": 1.94335643266044, "eta": "66:20:13", "grad_norm": 0.0041, "loss": 0.0641, "lr": "3.420e-05", "step": 6450, "steps": "23.54s,6450/16595" }, { "epoch": 1.9436577282313952, "eta": "66:48:34", "grad_norm": 0.006, "loss": 0.0468, "lr": "3.419e-05", "step": 6451, "steps": "23.71s,6451/16595" }, { "epoch": 1.94395902380235, "eta": "66:31:16", "grad_norm": 0.0069, "loss": 0.0535, "lr": "3.419e-05", "step": 6452, "steps": "23.61s,6452/16595" }, { "epoch": 1.9442603193733052, "eta": "67:08:03", "grad_norm": 0.0049, "loss": 0.058, "lr": "3.418e-05", "step": 6453, "steps": "23.83s,6453/16595" }, { "epoch": 1.9445616149442602, "eta": "66:03:26", "grad_norm": 0.0052, "loss": 0.047, "lr": "3.418e-05", "step": 6454, "steps": "23.45s,6454/16595" }, { "epoch": 1.9448629105152153, "eta": "66:33:28", "grad_norm": 0.0052, "loss": 0.0624, "lr": "3.418e-05", "step": 6455, "steps": "23.63s,6455/16595" }, { "epoch": 1.9451642060861705, "eta": "67:05:10", "grad_norm": 0.0064, "loss": 0.0512, "lr": "3.417e-05", "step": 6456, "steps": "23.82s,6456/16595" }, { "epoch": 1.9454655016571256, "eta": "67:08:09", "grad_norm": 0.0047, "loss": 0.0435, "lr": "3.417e-05", "step": 6457, "steps": "23.84s,6457/16595" }, { "epoch": 1.9457667972280808, "eta": "66:49:11", "grad_norm": 0.0051, "loss": 0.0527, "lr": "3.416e-05", "step": 6458, "steps": "23.73s,6458/16595" }, { "epoch": 1.946068092799036, "eta": "67:10:44", "grad_norm": 0.0062, "loss": 0.0579, "lr": "3.416e-05", "step": 6459, "steps": "23.86s,6459/16595" }, { "epoch": 1.946369388369991, "eta": "66:51:46", "grad_norm": 0.0056, "loss": 0.0387, "lr": "3.415e-05", "step": 6460, "steps": "23.75s,6460/16595" }, { "epoch": 1.9466706839409462, "eta": "66:59:49", "grad_norm": 0.0049, "loss": 0.0416, "lr": "3.415e-05", "step": 6461, "steps": "23.8s,6461/16595" }, { "epoch": 1.9469719795119012, "eta": "66:25:38", "grad_norm": 0.0084, "loss": 0.0447, "lr": "3.414e-05", "step": 6462, "steps": "23.6s,6462/16595" }, { "epoch": 1.9472732750828563, "eta": "67:04:05", "grad_norm": 0.0059, "loss": 0.0482, "lr": "3.414e-05", "step": 6463, "steps": "23.83s,6463/16595" }, { "epoch": 1.9475745706538112, "eta": "67:02:00", "grad_norm": 0.009, "loss": 0.0405, "lr": "3.414e-05", "step": 6464, "steps": "23.82s,6464/16595" }, { "epoch": 1.9478758662247664, "eta": "66:49:47", "grad_norm": 0.0046, "loss": 0.0455, "lr": "3.413e-05", "step": 6465, "steps": "23.75s,6465/16595" }, { "epoch": 1.9481771617957215, "eta": "67:06:16", "grad_norm": 0.0056, "loss": 0.0388, "lr": "3.413e-05", "step": 6466, "steps": "23.85s,6466/16595" }, { "epoch": 1.9484784573666767, "eta": "67:00:48", "grad_norm": 0.0074, "loss": 0.0673, "lr": "3.412e-05", "step": 6467, "steps": "23.82s,6467/16595" }, { "epoch": 1.9487797529376318, "eta": "65:42:46", "grad_norm": 0.0077, "loss": 0.0421, "lr": "3.412e-05", "step": 6468, "steps": "23.36s,6468/16595" }, { "epoch": 1.949081048508587, "eta": "67:10:08", "grad_norm": 0.0045, "loss": 0.044, "lr": "3.411e-05", "step": 6469, "steps": "23.88s,6469/16595" }, { "epoch": 1.9493823440795421, "eta": "67:09:45", "grad_norm": 0.0058, "loss": 0.0509, "lr": "3.411e-05", "step": 6470, "steps": "23.88s,6470/16595" }, { "epoch": 1.9496836396504973, "eta": "67:31:17", "grad_norm": 0.0045, "loss": 0.0446, "lr": "3.411e-05", "step": 6471, "steps": "24.01s,6471/16595" }, { "epoch": 1.9499849352214522, "eta": "66:03:09", "grad_norm": 0.0047, "loss": 0.0666, "lr": "3.410e-05", "step": 6472, "steps": "23.49s,6472/16595" }, { "epoch": 1.9502862307924074, "eta": "66:49:59", "grad_norm": 0.0052, "loss": 0.0574, "lr": "3.410e-05", "step": 6473, "steps": "23.77s,6473/16595" }, { "epoch": 1.9505875263633623, "eta": "66:59:43", "grad_norm": 0.0045, "loss": 0.0639, "lr": "3.409e-05", "step": 6474, "steps": "23.83s,6474/16595" }, { "epoch": 1.9508888219343175, "eta": "66:50:53", "grad_norm": 0.0044, "loss": 0.0561, "lr": "3.409e-05", "step": 6475, "steps": "23.78s,6475/16595" }, { "epoch": 1.9511901175052726, "eta": "67:29:17", "grad_norm": 0.0059, "loss": 0.0471, "lr": "3.408e-05", "step": 6476, "steps": "24.01s,6476/16595" }, { "epoch": 1.9514914130762278, "eta": "66:31:33", "grad_norm": 0.0051, "loss": 0.039, "lr": "3.408e-05", "step": 6477, "steps": "23.67s,6477/16595" }, { "epoch": 1.951792708647183, "eta": "66:00:48", "grad_norm": 0.0051, "loss": 0.0506, "lr": "3.407e-05", "step": 6478, "steps": "23.49s,6478/16595" }, { "epoch": 1.952094004218138, "eta": "66:13:54", "grad_norm": 0.0054, "loss": 0.0722, "lr": "3.407e-05", "step": 6479, "steps": "23.57s,6479/16595" }, { "epoch": 1.9523952997890932, "eta": "66:16:52", "grad_norm": 0.0057, "loss": 0.0485, "lr": "3.406e-05", "step": 6480, "steps": "23.59s,6480/16595" }, { "epoch": 1.9526965953600484, "eta": "66:09:44", "grad_norm": 0.0046, "loss": 0.041, "lr": "3.406e-05", "step": 6481, "steps": "23.55s,6481/16595" }, { "epoch": 1.9529978909310033, "eta": "66:36:19", "grad_norm": 0.0056, "loss": 0.0522, "lr": "3.406e-05", "step": 6482, "steps": "23.71s,6482/16595" }, { "epoch": 1.9532991865019584, "eta": "65:52:06", "grad_norm": 0.0048, "loss": 0.0539, "lr": "3.405e-05", "step": 6483, "steps": "23.45s,6483/16595" }, { "epoch": 1.9536004820729134, "eta": "67:10:55", "grad_norm": 0.0059, "loss": 0.0471, "lr": "3.405e-05", "step": 6484, "steps": "23.92s,6484/16595" }, { "epoch": 1.9539017776438685, "eta": "65:42:54", "grad_norm": 0.0059, "loss": 0.0628, "lr": "3.404e-05", "step": 6485, "steps": "23.4s,6485/16595" }, { "epoch": 1.9542030732148237, "eta": "67:06:45", "grad_norm": 0.0051, "loss": 0.0502, "lr": "3.404e-05", "step": 6486, "steps": "23.9s,6486/16595" }, { "epoch": 1.9545043687857788, "eta": "66:24:14", "grad_norm": 0.0063, "loss": 0.0542, "lr": "3.403e-05", "step": 6487, "steps": "23.65s,6487/16595" }, { "epoch": 1.954805664356734, "eta": "66:27:12", "grad_norm": 0.0057, "loss": 0.0341, "lr": "3.403e-05", "step": 6488, "steps": "23.67s,6488/16595" }, { "epoch": 1.9551069599276891, "eta": "65:29:32", "grad_norm": 0.005, "loss": 0.0532, "lr": "3.402e-05", "step": 6489, "steps": "23.33s,6489/16595" }, { "epoch": 1.9554082554986443, "eta": "65:35:53", "grad_norm": 0.0059, "loss": 0.056, "lr": "3.402e-05", "step": 6490, "steps": "23.37s,6490/16595" }, { "epoch": 1.9557095510695994, "eta": "66:46:14", "grad_norm": 0.0057, "loss": 0.0665, "lr": "3.402e-05", "step": 6491, "steps": "23.79s,6491/16595" }, { "epoch": 1.9560108466405544, "eta": "66:50:53", "grad_norm": 0.0062, "loss": 0.0394, "lr": "3.401e-05", "step": 6492, "steps": "23.82s,6492/16595" }, { "epoch": 1.9563121422115095, "eta": "66:16:49", "grad_norm": 0.0087, "loss": 0.0536, "lr": "3.401e-05", "step": 6493, "steps": "23.62s,6493/16595" }, { "epoch": 1.9566134377824644, "eta": "65:49:29", "grad_norm": 0.0049, "loss": 0.0421, "lr": "3.400e-05", "step": 6494, "steps": "23.46s,6494/16595" }, { "epoch": 1.9569147333534196, "eta": "66:44:39", "grad_norm": 0.0052, "loss": 0.0407, "lr": "3.400e-05", "step": 6495, "steps": "23.79s,6495/16595" }, { "epoch": 1.9572160289243747, "eta": "66:25:44", "grad_norm": 0.0055, "loss": 0.0527, "lr": "3.399e-05", "step": 6496, "steps": "23.68s,6496/16595" }, { "epoch": 1.95751732449533, "eta": "66:45:32", "grad_norm": 0.0061, "loss": 0.0392, "lr": "3.399e-05", "step": 6497, "steps": "23.8s,6497/16595" }, { "epoch": 1.957818620066285, "eta": "66:50:11", "grad_norm": 0.0096, "loss": 0.0483, "lr": "3.398e-05", "step": 6498, "steps": "23.83s,6498/16595" }, { "epoch": 1.9581199156372402, "eta": "68:24:01", "grad_norm": 0.0054, "loss": 0.0501, "lr": "3.398e-05", "step": 6499, "steps": "24.39s,6499/16595" }, { "epoch": 1.9584212112081953, "eta": "67:14:38", "grad_norm": 0.0057, "loss": 0.0437, "lr": "3.398e-05", "step": 6500, "steps": "23.98s,6500/16595" }, { "epoch": 1.9587225067791505, "eta": "66:47:19", "grad_norm": 0.0054, "loss": 0.0544, "lr": "3.397e-05", "step": 6501, "steps": "23.82s,6501/16595" }, { "epoch": 1.9590238023501054, "eta": "66:25:03", "grad_norm": 0.0046, "loss": 0.0537, "lr": "3.397e-05", "step": 6502, "steps": "23.69s,6502/16595" }, { "epoch": 1.9593250979210606, "eta": "66:53:15", "grad_norm": 0.0061, "loss": 0.0437, "lr": "3.396e-05", "step": 6503, "steps": "23.86s,6503/16595" }, { "epoch": 1.9596263934920155, "eta": "65:57:21", "grad_norm": 0.0046, "loss": 0.043, "lr": "3.396e-05", "step": 6504, "steps": "23.53s,6504/16595" }, { "epoch": 1.9599276890629707, "eta": "66:13:46", "grad_norm": 0.009, "loss": 0.0478, "lr": "3.395e-05", "step": 6505, "steps": "23.63s,6505/16595" }, { "epoch": 1.9602289846339258, "eta": "67:52:35", "grad_norm": 0.0054, "loss": 0.051, "lr": "3.395e-05", "step": 6506, "steps": "24.22s,6506/16595" }, { "epoch": 1.960530280204881, "eta": "65:44:24", "grad_norm": 0.0056, "loss": 0.0646, "lr": "3.394e-05", "step": 6507, "steps": "23.46s,6507/16595" }, { "epoch": 1.960831575775836, "eta": "66:27:43", "grad_norm": 0.0081, "loss": 0.0465, "lr": "3.394e-05", "step": 6508, "steps": "23.72s,6508/16595" }, { "epoch": 1.9611328713467913, "eta": "66:37:25", "grad_norm": 0.0054, "loss": 0.0481, "lr": "3.394e-05", "step": 6509, "steps": "23.78s,6509/16595" }, { "epoch": 1.9614341669177464, "eta": "65:53:19", "grad_norm": 0.0075, "loss": 0.043, "lr": "3.393e-05", "step": 6510, "steps": "23.52s,6510/16595" }, { "epoch": 1.9617354624887016, "eta": "66:01:19", "grad_norm": 0.0058, "loss": 0.042, "lr": "3.393e-05", "step": 6511, "steps": "23.57s,6511/16595" }, { "epoch": 1.9620367580596565, "eta": "66:02:37", "grad_norm": 0.0043, "loss": 0.0555, "lr": "3.392e-05", "step": 6512, "steps": "23.58s,6512/16595" }, { "epoch": 1.9623380536306116, "eta": "65:57:11", "grad_norm": 0.0064, "loss": 0.0533, "lr": "3.392e-05", "step": 6513, "steps": "23.55s,6513/16595" }, { "epoch": 1.9626393492015666, "eta": "65:41:40", "grad_norm": 0.0053, "loss": 0.0485, "lr": "3.391e-05", "step": 6514, "steps": "23.46s,6514/16595" }, { "epoch": 1.9629406447725217, "eta": "66:43:26", "grad_norm": 0.0055, "loss": 0.0505, "lr": "3.391e-05", "step": 6515, "steps": "23.83s,6515/16595" }, { "epoch": 1.9632419403434769, "eta": "66:58:09", "grad_norm": 0.0052, "loss": 0.0415, "lr": "3.390e-05", "step": 6516, "steps": "23.92s,6516/16595" }, { "epoch": 1.963543235914432, "eta": "66:40:57", "grad_norm": 0.0049, "loss": 0.0431, "lr": "3.390e-05", "step": 6517, "steps": "23.82s,6517/16595" }, { "epoch": 1.9638445314853872, "eta": "66:32:10", "grad_norm": 0.0046, "loss": 0.0543, "lr": "3.390e-05", "step": 6518, "steps": "23.77s,6518/16595" }, { "epoch": 1.9641458270563423, "eta": "66:35:08", "grad_norm": 0.0057, "loss": 0.0562, "lr": "3.389e-05", "step": 6519, "steps": "23.79s,6519/16595" }, { "epoch": 1.9644471226272975, "eta": "66:07:52", "grad_norm": 0.0057, "loss": 0.0513, "lr": "3.389e-05", "step": 6520, "steps": "23.63s,6520/16595" }, { "epoch": 1.9647484181982526, "eta": "66:19:13", "grad_norm": 0.0055, "loss": 0.0515, "lr": "3.388e-05", "step": 6521, "steps": "23.7s,6521/16595" }, { "epoch": 1.9650497137692076, "eta": "65:45:15", "grad_norm": 0.0055, "loss": 0.0479, "lr": "3.388e-05", "step": 6522, "steps": "23.5s,6522/16595" }, { "epoch": 1.9653510093401627, "eta": "66:45:17", "grad_norm": 0.0049, "loss": 0.0378, "lr": "3.387e-05", "step": 6523, "steps": "23.86s,6523/16595" }, { "epoch": 1.9656523049111176, "eta": "66:36:30", "grad_norm": 0.0051, "loss": 0.0514, "lr": "3.387e-05", "step": 6524, "steps": "23.81s,6524/16595" }, { "epoch": 1.9659536004820728, "eta": "66:52:53", "grad_norm": 0.0048, "loss": 0.0723, "lr": "3.386e-05", "step": 6525, "steps": "23.91s,6525/16595" }, { "epoch": 1.966254896053028, "eta": "66:50:49", "grad_norm": 0.0055, "loss": 0.0557, "lr": "3.386e-05", "step": 6526, "steps": "23.9s,6526/16595" }, { "epoch": 1.966556191623983, "eta": "66:26:55", "grad_norm": 0.0066, "loss": 0.0467, "lr": "3.386e-05", "step": 6527, "steps": "23.76s,6527/16595" }, { "epoch": 1.9668574871949382, "eta": "67:03:26", "grad_norm": 0.0053, "loss": 0.0391, "lr": "3.385e-05", "step": 6528, "steps": "23.98s,6528/16595" }, { "epoch": 1.9671587827658934, "eta": "66:12:42", "grad_norm": 0.005, "loss": 0.0535, "lr": "3.385e-05", "step": 6529, "steps": "23.68s,6529/16595" }, { "epoch": 1.9674600783368486, "eta": "65:45:28", "grad_norm": 0.0048, "loss": 0.0548, "lr": "3.384e-05", "step": 6530, "steps": "23.52s,6530/16595" }, { "epoch": 1.9677613739078037, "eta": "66:11:55", "grad_norm": 0.0062, "loss": 0.0646, "lr": "3.384e-05", "step": 6531, "steps": "23.68s,6531/16595" }, { "epoch": 1.9680626694787586, "eta": "66:23:16", "grad_norm": 0.0053, "loss": 0.0458, "lr": "3.383e-05", "step": 6532, "steps": "23.75s,6532/16595" }, { "epoch": 1.9683639650497138, "eta": "66:06:06", "grad_norm": 0.0047, "loss": 0.0427, "lr": "3.383e-05", "step": 6533, "steps": "23.65s,6533/16595" }, { "epoch": 1.9686652606206687, "eta": "65:48:56", "grad_norm": 0.0058, "loss": 0.0475, "lr": "3.382e-05", "step": 6534, "steps": "23.55s,6534/16595" }, { "epoch": 1.9689665561916239, "eta": "66:43:52", "grad_norm": 0.0047, "loss": 0.0436, "lr": "3.382e-05", "step": 6535, "steps": "23.88s,6535/16595" }, { "epoch": 1.969267851762579, "eta": "66:36:46", "grad_norm": 0.0062, "loss": 0.0404, "lr": "3.382e-05", "step": 6536, "steps": "23.84s,6536/16595" }, { "epoch": 1.9695691473335342, "eta": "65:56:08", "grad_norm": 0.0094, "loss": 0.0427, "lr": "3.381e-05", "step": 6537, "steps": "23.6s,6537/16595" }, { "epoch": 1.9698704429044893, "eta": "66:15:52", "grad_norm": 0.0057, "loss": 0.0467, "lr": "3.381e-05", "step": 6538, "steps": "23.72s,6538/16595" }, { "epoch": 1.9701717384754445, "eta": "66:03:44", "grad_norm": 0.006, "loss": 0.0657, "lr": "3.380e-05", "step": 6539, "steps": "23.65s,6539/16595" }, { "epoch": 1.9704730340463996, "eta": "66:10:02", "grad_norm": 0.0045, "loss": 0.0467, "lr": "3.380e-05", "step": 6540, "steps": "23.69s,6540/16595" }, { "epoch": 1.9707743296173548, "eta": "66:31:26", "grad_norm": 0.0054, "loss": 0.043, "lr": "3.379e-05", "step": 6541, "steps": "23.82s,6541/16595" }, { "epoch": 1.9710756251883097, "eta": "65:22:20", "grad_norm": 0.0059, "loss": 0.0438, "lr": "3.379e-05", "step": 6542, "steps": "23.41s,6542/16595" }, { "epoch": 1.9713769207592649, "eta": "65:35:21", "grad_norm": 0.0055, "loss": 0.066, "lr": "3.378e-05", "step": 6543, "steps": "23.49s,6543/16595" }, { "epoch": 1.9716782163302198, "eta": "66:13:29", "grad_norm": 0.0049, "loss": 0.0639, "lr": "3.378e-05", "step": 6544, "steps": "23.72s,6544/16595" }, { "epoch": 1.971979511901175, "eta": "66:06:24", "grad_norm": 0.0056, "loss": 0.0558, "lr": "3.378e-05", "step": 6545, "steps": "23.68s,6545/16595" }, { "epoch": 1.97228080747213, "eta": "66:32:48", "grad_norm": 0.0067, "loss": 0.0457, "lr": "3.377e-05", "step": 6546, "steps": "23.84s,6546/16595" }, { "epoch": 1.9725821030430852, "eta": "65:47:11", "grad_norm": 0.0058, "loss": 0.0558, "lr": "3.377e-05", "step": 6547, "steps": "23.57s,6547/16595" }, { "epoch": 1.9728833986140404, "eta": "65:48:28", "grad_norm": 0.0048, "loss": 0.0563, "lr": "3.376e-05", "step": 6548, "steps": "23.58s,6548/16595" }, { "epoch": 1.9731846941849955, "eta": "66:21:33", "grad_norm": 0.0049, "loss": 0.0514, "lr": "3.376e-05", "step": 6549, "steps": "23.78s,6549/16595" }, { "epoch": 1.9734859897559507, "eta": "65:19:13", "grad_norm": 0.0052, "loss": 0.069, "lr": "3.375e-05", "step": 6550, "steps": "23.41s,6550/16595" }, { "epoch": 1.9737872853269058, "eta": "66:05:42", "grad_norm": 0.006, "loss": 0.0609, "lr": "3.375e-05", "step": 6551, "steps": "23.69s,6551/16595" }, { "epoch": 1.9740885808978608, "eta": "66:25:23", "grad_norm": 0.0063, "loss": 0.0432, "lr": "3.374e-05", "step": 6552, "steps": "23.81s,6552/16595" }, { "epoch": 1.974389876468816, "eta": "65:24:44", "grad_norm": 0.0052, "loss": 0.0323, "lr": "3.374e-05", "step": 6553, "steps": "23.45s,6553/16595" }, { "epoch": 1.974691172039771, "eta": "65:36:04", "grad_norm": 0.0046, "loss": 0.06, "lr": "3.373e-05", "step": 6554, "steps": "23.52s,6554/16595" }, { "epoch": 1.974992467610726, "eta": "66:04:07", "grad_norm": 0.0076, "loss": 0.0526, "lr": "3.373e-05", "step": 6555, "steps": "23.69s,6555/16595" }, { "epoch": 1.9752937631816812, "eta": "65:33:36", "grad_norm": 0.0053, "loss": 0.0404, "lr": "3.373e-05", "step": 6556, "steps": "23.51s,6556/16595" }, { "epoch": 1.9755950587526363, "eta": "66:28:25", "grad_norm": 0.0053, "loss": 0.0587, "lr": "3.372e-05", "step": 6557, "steps": "23.84s,6557/16595" }, { "epoch": 1.9758963543235915, "eta": "66:07:57", "grad_norm": 0.0051, "loss": 0.0464, "lr": "3.372e-05", "step": 6558, "steps": "23.72s,6558/16595" }, { "epoch": 1.9761976498945466, "eta": "65:34:06", "grad_norm": 0.006, "loss": 0.033, "lr": "3.371e-05", "step": 6559, "steps": "23.52s,6559/16595" }, { "epoch": 1.9764989454655018, "eta": "66:33:55", "grad_norm": 0.0066, "loss": 0.0435, "lr": "3.371e-05", "step": 6560, "steps": "23.88s,6560/16595" }, { "epoch": 1.976800241036457, "eta": "66:01:45", "grad_norm": 0.0057, "loss": 0.0435, "lr": "3.370e-05", "step": 6561, "steps": "23.69s,6561/16595" }, { "epoch": 1.9771015366074118, "eta": "66:21:25", "grad_norm": 0.0051, "loss": 0.0458, "lr": "3.370e-05", "step": 6562, "steps": "23.81s,6562/16595" }, { "epoch": 1.977402832178367, "eta": "65:34:12", "grad_norm": 0.0054, "loss": 0.0543, "lr": "3.369e-05", "step": 6563, "steps": "23.53s,6563/16595" }, { "epoch": 1.9777041277493221, "eta": "66:34:00", "grad_norm": 0.0052, "loss": 0.0493, "lr": "3.369e-05", "step": 6564, "steps": "23.89s,6564/16595" }, { "epoch": 1.978005423320277, "eta": "66:10:12", "grad_norm": 0.0045, "loss": 0.0604, "lr": "3.369e-05", "step": 6565, "steps": "23.75s,6565/16595" }, { "epoch": 1.9783067188912322, "eta": "66:06:28", "grad_norm": 0.0052, "loss": 0.043, "lr": "3.368e-05", "step": 6566, "steps": "23.73s,6566/16595" }, { "epoch": 1.9786080144621874, "eta": "65:57:43", "grad_norm": 0.0055, "loss": 0.0536, "lr": "3.368e-05", "step": 6567, "steps": "23.68s,6567/16595" }, { "epoch": 1.9789093100331425, "eta": "66:30:44", "grad_norm": 0.0052, "loss": 0.0421, "lr": "3.367e-05", "step": 6568, "steps": "23.88s,6568/16595" }, { "epoch": 1.9792106056040977, "eta": "66:30:20", "grad_norm": 0.0065, "loss": 0.0485, "lr": "3.367e-05", "step": 6569, "steps": "23.88s,6569/16595" }, { "epoch": 1.9795119011750528, "eta": "66:03:13", "grad_norm": 0.0057, "loss": 0.0565, "lr": "3.366e-05", "step": 6570, "steps": "23.72s,6570/16595" }, { "epoch": 1.979813196746008, "eta": "65:14:22", "grad_norm": 0.0069, "loss": 0.0342, "lr": "3.366e-05", "step": 6571, "steps": "23.43s,6571/16595" }, { "epoch": 1.980114492316963, "eta": "66:27:29", "grad_norm": 0.0049, "loss": 0.049, "lr": "3.365e-05", "step": 6572, "steps": "23.87s,6572/16595" }, { "epoch": 1.980415787887918, "eta": "66:52:08", "grad_norm": 0.0068, "loss": 0.0577, "lr": "3.365e-05", "step": 6573, "steps": "24.02s,6573/16595" }, { "epoch": 1.9807170834588732, "eta": "65:49:56", "grad_norm": 0.0053, "loss": 0.0642, "lr": "3.365e-05", "step": 6574, "steps": "23.65s,6574/16595" }, { "epoch": 1.9810183790298281, "eta": "65:27:50", "grad_norm": 0.0066, "loss": 0.056, "lr": "3.364e-05", "step": 6575, "steps": "23.52s,6575/16595" }, { "epoch": 1.9813196746007833, "eta": "66:19:12", "grad_norm": 0.0052, "loss": 0.055, "lr": "3.364e-05", "step": 6576, "steps": "23.83s,6576/16595" }, { "epoch": 1.9816209701717384, "eta": "65:22:02", "grad_norm": 0.0062, "loss": 0.0498, "lr": "3.363e-05", "step": 6577, "steps": "23.49s,6577/16595" }, { "epoch": 1.9819222657426936, "eta": "66:18:25", "grad_norm": 0.0064, "loss": 0.0469, "lr": "3.363e-05", "step": 6578, "steps": "23.83s,6578/16595" }, { "epoch": 1.9822235613136487, "eta": "65:59:39", "grad_norm": 0.0056, "loss": 0.0498, "lr": "3.362e-05", "step": 6579, "steps": "23.72s,6579/16595" }, { "epoch": 1.982524856884604, "eta": "66:54:20", "grad_norm": 0.0066, "loss": 0.0468, "lr": "3.362e-05", "step": 6580, "steps": "24.05s,6580/16595" }, { "epoch": 1.982826152455559, "eta": "65:13:48", "grad_norm": 0.0067, "loss": 0.0579, "lr": "3.361e-05", "step": 6581, "steps": "23.45s,6581/16595" }, { "epoch": 1.983127448026514, "eta": "65:18:25", "grad_norm": 0.006, "loss": 0.0435, "lr": "3.361e-05", "step": 6582, "steps": "23.48s,6582/16595" }, { "epoch": 1.9834287435974691, "eta": "65:31:22", "grad_norm": 0.006, "loss": 0.0438, "lr": "3.361e-05", "step": 6583, "steps": "23.56s,6583/16595" }, { "epoch": 1.9837300391684243, "eta": "66:12:41", "grad_norm": 0.006, "loss": 0.0516, "lr": "3.360e-05", "step": 6584, "steps": "23.81s,6584/16595" }, { "epoch": 1.9840313347393792, "eta": "66:20:38", "grad_norm": 0.0054, "loss": 0.0493, "lr": "3.360e-05", "step": 6585, "steps": "23.86s,6585/16595" }, { "epoch": 1.9843326303103344, "eta": "65:33:32", "grad_norm": 0.0044, "loss": 0.0621, "lr": "3.359e-05", "step": 6586, "steps": "23.58s,6586/16595" }, { "epoch": 1.9846339258812895, "eta": "66:28:11", "grad_norm": 0.0046, "loss": 0.0571, "lr": "3.359e-05", "step": 6587, "steps": "23.91s,6587/16595" }, { "epoch": 1.9849352214522447, "eta": "65:47:45", "grad_norm": 0.0056, "loss": 0.0468, "lr": "3.358e-05", "step": 6588, "steps": "23.67s,6588/16595" }, { "epoch": 1.9852365170231998, "eta": "66:25:43", "grad_norm": 0.0052, "loss": 0.0482, "lr": "3.358e-05", "step": 6589, "steps": "23.9s,6589/16595" }, { "epoch": 1.985537812594155, "eta": "65:16:57", "grad_norm": 0.0053, "loss": 0.0463, "lr": "3.357e-05", "step": 6590, "steps": "23.49s,6590/16595" }, { "epoch": 1.9858391081651101, "eta": "65:14:53", "grad_norm": 0.0056, "loss": 0.0531, "lr": "3.357e-05", "step": 6591, "steps": "23.48s,6591/16595" }, { "epoch": 1.986140403736065, "eta": "65:56:11", "grad_norm": 0.0137, "loss": 0.0617, "lr": "3.356e-05", "step": 6592, "steps": "23.73s,6592/16595" }, { "epoch": 1.9864416993070202, "eta": "66:09:07", "grad_norm": 0.0058, "loss": 0.0522, "lr": "3.356e-05", "step": 6593, "steps": "23.81s,6593/16595" }, { "epoch": 1.9867429948779753, "eta": "65:00:23", "grad_norm": 0.0055, "loss": 0.0472, "lr": "3.356e-05", "step": 6594, "steps": "23.4s,6594/16595" }, { "epoch": 1.9870442904489303, "eta": "65:40:00", "grad_norm": 0.005, "loss": 0.0642, "lr": "3.355e-05", "step": 6595, "steps": "23.64s,6595/16595" }, { "epoch": 1.9873455860198854, "eta": "66:02:56", "grad_norm": 0.005, "loss": 0.0612, "lr": "3.355e-05", "step": 6596, "steps": "23.78s,6596/16595" }, { "epoch": 1.9876468815908406, "eta": "65:24:12", "grad_norm": 0.0055, "loss": 0.0555, "lr": "3.354e-05", "step": 6597, "steps": "23.55s,6597/16595" }, { "epoch": 1.9879481771617957, "eta": "65:22:09", "grad_norm": 0.0056, "loss": 0.0555, "lr": "3.354e-05", "step": 6598, "steps": "23.54s,6598/16595" }, { "epoch": 1.9882494727327509, "eta": "65:11:46", "grad_norm": 0.0074, "loss": 0.0384, "lr": "3.353e-05", "step": 6599, "steps": "23.48s,6599/16595" }, { "epoch": 1.988550768303706, "eta": "65:54:41", "grad_norm": 0.0054, "loss": 0.0577, "lr": "3.353e-05", "step": 6600, "steps": "23.74s,6600/16595" }, { "epoch": 1.9888520638746612, "eta": "143:51:29", "grad_norm": 0.0073, "loss": 0.0356, "lr": "3.352e-05", "step": 6601, "steps": "51.82s,6601/16595" }, { "epoch": 1.989153359445616, "eta": "64:30:37", "grad_norm": 0.0052, "loss": 0.0437, "lr": "3.352e-05", "step": 6602, "steps": "23.24s,6602/16595" }, { "epoch": 1.9894546550165713, "eta": "65:48:30", "grad_norm": 0.0074, "loss": 0.0461, "lr": "3.352e-05", "step": 6603, "steps": "23.71s,6603/16595" }, { "epoch": 1.9897559505875264, "eta": "65:44:46", "grad_norm": 0.0056, "loss": 0.0497, "lr": "3.351e-05", "step": 6604, "steps": "23.69s,6604/16595" }, { "epoch": 1.9900572461584813, "eta": "64:57:45", "grad_norm": 0.0046, "loss": 0.0525, "lr": "3.351e-05", "step": 6605, "steps": "23.41s,6605/16595" }, { "epoch": 1.9903585417294365, "eta": "64:55:42", "grad_norm": 0.0054, "loss": 0.038, "lr": "3.350e-05", "step": 6606, "steps": "23.4s,6606/16595" }, { "epoch": 1.9906598373003916, "eta": "64:46:59", "grad_norm": 0.0057, "loss": 0.0487, "lr": "3.350e-05", "step": 6607, "steps": "23.35s,6607/16595" }, { "epoch": 1.9909611328713468, "eta": "64:46:36", "grad_norm": 0.0051, "loss": 0.0607, "lr": "3.349e-05", "step": 6608, "steps": "23.35s,6608/16595" }, { "epoch": 1.991262428442302, "eta": "65:17:50", "grad_norm": 0.0057, "loss": 0.0575, "lr": "3.349e-05", "step": 6609, "steps": "23.54s,6609/16595" }, { "epoch": 1.991563724013257, "eta": "64:54:09", "grad_norm": 0.0047, "loss": 0.0446, "lr": "3.348e-05", "step": 6610, "steps": "23.4s,6610/16595" }, { "epoch": 1.9918650195842122, "eta": "65:48:40", "grad_norm": 0.0049, "loss": 0.0576, "lr": "3.348e-05", "step": 6611, "steps": "23.73s,6611/16595" }, { "epoch": 1.9921663151551672, "eta": "64:41:43", "grad_norm": 0.0048, "loss": 0.0518, "lr": "3.348e-05", "step": 6612, "steps": "23.33s,6612/16595" }, { "epoch": 1.9924676107261223, "eta": "65:32:54", "grad_norm": 0.0046, "loss": 0.0719, "lr": "3.347e-05", "step": 6613, "steps": "23.64s,6613/16595" }, { "epoch": 1.9927689062970775, "eta": "64:47:35", "grad_norm": 0.005, "loss": 0.0658, "lr": "3.347e-05", "step": 6614, "steps": "23.37s,6614/16595" }, { "epoch": 1.9930702018680324, "eta": "64:40:33", "grad_norm": 0.005, "loss": 0.0558, "lr": "3.346e-05", "step": 6615, "steps": "23.33s,6615/16595" }, { "epoch": 1.9933714974389876, "eta": "64:53:28", "grad_norm": 0.0044, "loss": 0.0662, "lr": "3.346e-05", "step": 6616, "steps": "23.41s,6616/16595" }, { "epoch": 1.9936727930099427, "eta": "65:49:37", "grad_norm": 0.0053, "loss": 0.0591, "lr": "3.345e-05", "step": 6617, "steps": "23.75s,6617/16595" }, { "epoch": 1.9939740885808979, "eta": "64:16:06", "grad_norm": 0.0086, "loss": 0.0427, "lr": "3.345e-05", "step": 6618, "steps": "23.19s,6618/16595" }, { "epoch": 1.994275384151853, "eta": "64:58:57", "grad_norm": 0.0065, "loss": 0.0618, "lr": "3.344e-05", "step": 6619, "steps": "23.45s,6619/16595" }, { "epoch": 1.9945766797228082, "eta": "65:00:13", "grad_norm": 0.0051, "loss": 0.0614, "lr": "3.344e-05", "step": 6620, "steps": "23.46s,6620/16595" }, { "epoch": 1.9948779752937633, "eta": "65:09:48", "grad_norm": 0.0061, "loss": 0.0524, "lr": "3.343e-05", "step": 6621, "steps": "23.52s,6621/16595" }, { "epoch": 1.9951792708647182, "eta": "64:36:10", "grad_norm": 0.0045, "loss": 0.047, "lr": "3.343e-05", "step": 6622, "steps": "23.32s,6622/16595" }, { "epoch": 1.9954805664356734, "eta": "65:38:56", "grad_norm": 0.005, "loss": 0.0414, "lr": "3.343e-05", "step": 6623, "steps": "23.7s,6623/16595" }, { "epoch": 1.9957818620066285, "eta": "64:38:43", "grad_norm": 0.0062, "loss": 0.0424, "lr": "3.342e-05", "step": 6624, "steps": "23.34s,6624/16595" }, { "epoch": 1.9960831575775835, "eta": "65:39:48", "grad_norm": 0.0064, "loss": 0.0476, "lr": "3.342e-05", "step": 6625, "steps": "23.71s,6625/16595" }, { "epoch": 1.9963844531485386, "eta": "65:51:02", "grad_norm": 0.0057, "loss": 0.0488, "lr": "3.341e-05", "step": 6626, "steps": "23.78s,6626/16595" }, { "epoch": 1.9966857487194938, "eta": "65:14:06", "grad_norm": 0.0065, "loss": 0.053, "lr": "3.341e-05", "step": 6627, "steps": "23.56s,6627/16595" }, { "epoch": 1.996987044290449, "eta": "65:30:19", "grad_norm": 0.0052, "loss": 0.0541, "lr": "3.340e-05", "step": 6628, "steps": "23.66s,6628/16595" }, { "epoch": 1.997288339861404, "eta": "65:14:58", "grad_norm": 0.0048, "loss": 0.05, "lr": "3.340e-05", "step": 6629, "steps": "23.57s,6629/16595" }, { "epoch": 1.9975896354323592, "eta": "65:16:14", "grad_norm": 0.0048, "loss": 0.0508, "lr": "3.339e-05", "step": 6630, "steps": "23.58s,6630/16595" }, { "epoch": 1.9978909310033144, "eta": "64:19:23", "grad_norm": 0.0056, "loss": 0.0343, "lr": "3.339e-05", "step": 6631, "steps": "23.24s,6631/16595" }, { "epoch": 1.9981922265742693, "eta": "65:32:03", "grad_norm": 0.0063, "loss": 0.0434, "lr": "3.339e-05", "step": 6632, "steps": "23.68s,6632/16595" }, { "epoch": 1.9984935221452245, "eta": "64:15:17", "grad_norm": 0.0066, "loss": 0.0297, "lr": "3.338e-05", "step": 6633, "steps": "23.22s,6633/16595" }, { "epoch": 1.9987948177161796, "eta": "65:37:54", "grad_norm": 0.0054, "loss": 0.0669, "lr": "3.338e-05", "step": 6634, "steps": "23.72s,6634/16595" }, { "epoch": 1.9990961132871345, "eta": "65:17:36", "grad_norm": 0.0045, "loss": 0.0444, "lr": "3.337e-05", "step": 6635, "steps": "23.6s,6635/16595" }, { "epoch": 1.9993974088580897, "eta": "64:40:41", "grad_norm": 0.0048, "loss": 0.0453, "lr": "3.337e-05", "step": 6636, "steps": "23.38s,6636/16595" }, { "epoch": 1.9996987044290448, "eta": "65:10:10", "grad_norm": 0.005, "loss": 0.0406, "lr": "3.336e-05", "step": 6637, "steps": "23.56s,6637/16595" }, { "epoch": 2.0, "eta": "64:15:01", "grad_norm": 0.0052, "loss": 0.0562, "lr": "3.336e-05", "step": 6638, "steps": "23.23s,6638/16595" }, { "epoch": 2.000301295570955, "eta": "138:29:56", "grad_norm": 0.0065, "loss": 0.0452, "lr": "3.335e-05", "step": 6639, "steps": "50.08s,6639/16595" }, { "epoch": 2.0006025911419103, "eta": "67:33:20", "grad_norm": 0.0058, "loss": 0.0618, "lr": "3.335e-05", "step": 6640, "steps": "24.43s,6640/16595" }, { "epoch": 2.0009038867128655, "eta": "67:18:00", "grad_norm": 0.0055, "loss": 0.0473, "lr": "3.334e-05", "step": 6641, "steps": "24.34s,6641/16595" }, { "epoch": 2.0012051822838206, "eta": "66:27:50", "grad_norm": 0.0079, "loss": 0.0609, "lr": "3.334e-05", "step": 6642, "steps": "24.04s,6642/16595" }, { "epoch": 2.0015064778547758, "eta": "65:57:34", "grad_norm": 0.0053, "loss": 0.0637, "lr": "3.334e-05", "step": 6643, "steps": "23.86s,6643/16595" }, { "epoch": 2.0018077734257305, "eta": "65:20:41", "grad_norm": 0.0056, "loss": 0.0491, "lr": "3.333e-05", "step": 6644, "steps": "23.64s,6644/16595" }, { "epoch": 2.0021090689966856, "eta": "66:53:10", "grad_norm": 0.0049, "loss": 0.0527, "lr": "3.333e-05", "step": 6645, "steps": "24.2s,6645/16595" }, { "epoch": 2.0024103645676408, "eta": "66:17:56", "grad_norm": 0.007, "loss": 0.049, "lr": "3.332e-05", "step": 6646, "steps": "23.99s,6646/16595" }, { "epoch": 2.002711660138596, "eta": "65:16:11", "grad_norm": 0.0055, "loss": 0.0493, "lr": "3.332e-05", "step": 6647, "steps": "23.62s,6647/16595" }, { "epoch": 2.003012955709551, "eta": "65:14:08", "grad_norm": 0.0057, "loss": 0.0579, "lr": "3.331e-05", "step": 6648, "steps": "23.61s,6648/16595" }, { "epoch": 2.003314251280506, "eta": "66:41:36", "grad_norm": 0.0063, "loss": 0.0717, "lr": "3.331e-05", "step": 6649, "steps": "24.14s,6649/16595" }, { "epoch": 2.0036155468514614, "eta": "66:36:13", "grad_norm": 0.0064, "loss": 0.0344, "lr": "3.330e-05", "step": 6650, "steps": "24.11s,6650/16595" }, { "epoch": 2.0039168424224165, "eta": "65:54:23", "grad_norm": 0.0062, "loss": 0.0331, "lr": "3.330e-05", "step": 6651, "steps": "23.86s,6651/16595" }, { "epoch": 2.0042181379933717, "eta": "66:45:22", "grad_norm": 0.0048, "loss": 0.0535, "lr": "3.330e-05", "step": 6652, "steps": "24.17s,6652/16595" }, { "epoch": 2.004519433564327, "eta": "65:30:24", "grad_norm": 0.0054, "loss": 0.0468, "lr": "3.329e-05", "step": 6653, "steps": "23.72s,6653/16595" }, { "epoch": 2.0048207291352815, "eta": "65:33:19", "grad_norm": 0.0053, "loss": 0.0511, "lr": "3.329e-05", "step": 6654, "steps": "23.74s,6654/16595" }, { "epoch": 2.0051220247062367, "eta": "65:52:48", "grad_norm": 0.0052, "loss": 0.0558, "lr": "3.328e-05", "step": 6655, "steps": "23.86s,6655/16595" }, { "epoch": 2.005423320277192, "eta": "65:44:07", "grad_norm": 0.0053, "loss": 0.0608, "lr": "3.328e-05", "step": 6656, "steps": "23.81s,6656/16595" }, { "epoch": 2.005724615848147, "eta": "65:58:38", "grad_norm": 0.005, "loss": 0.0621, "lr": "3.327e-05", "step": 6657, "steps": "23.9s,6657/16595" }, { "epoch": 2.006025911419102, "eta": "66:51:14", "grad_norm": 0.0054, "loss": 0.0513, "lr": "3.327e-05", "step": 6658, "steps": "24.22s,6658/16595" }, { "epoch": 2.0063272069900573, "eta": "66:29:18", "grad_norm": 0.007, "loss": 0.0509, "lr": "3.326e-05", "step": 6659, "steps": "24.09s,6659/16595" }, { "epoch": 2.0066285025610124, "eta": "66:38:50", "grad_norm": 0.006, "loss": 0.0496, "lr": "3.326e-05", "step": 6660, "steps": "24.15s,6660/16595" }, { "epoch": 2.0069297981319676, "eta": "66:06:58", "grad_norm": 0.0056, "loss": 0.067, "lr": "3.325e-05", "step": 6661, "steps": "23.96s,6661/16595" }, { "epoch": 2.0072310937029227, "eta": "65:18:34", "grad_norm": 0.0068, "loss": 0.0441, "lr": "3.325e-05", "step": 6662, "steps": "23.67s,6662/16595" }, { "epoch": 2.007532389273878, "eta": "65:24:47", "grad_norm": 0.0052, "loss": 0.0444, "lr": "3.325e-05", "step": 6663, "steps": "23.71s,6663/16595" }, { "epoch": 2.0078336848448326, "eta": "66:20:40", "grad_norm": 0.0055, "loss": 0.0333, "lr": "3.324e-05", "step": 6664, "steps": "24.05s,6664/16595" }, { "epoch": 2.0081349804157878, "eta": "66:12:00", "grad_norm": 0.0052, "loss": 0.0511, "lr": "3.324e-05", "step": 6665, "steps": "24.0s,6665/16595" }, { "epoch": 2.008436275986743, "eta": "66:43:02", "grad_norm": 0.0054, "loss": 0.0536, "lr": "3.323e-05", "step": 6666, "steps": "24.19s,6666/16595" }, { "epoch": 2.008737571557698, "eta": "66:45:56", "grad_norm": 0.005, "loss": 0.073, "lr": "3.323e-05", "step": 6667, "steps": "24.21s,6667/16595" }, { "epoch": 2.009038867128653, "eta": "65:55:54", "grad_norm": 0.0049, "loss": 0.0607, "lr": "3.322e-05", "step": 6668, "steps": "23.91s,6668/16595" }, { "epoch": 2.0093401626996084, "eta": "65:47:14", "grad_norm": 0.0052, "loss": 0.0604, "lr": "3.322e-05", "step": 6669, "steps": "23.86s,6669/16595" }, { "epoch": 2.0096414582705635, "eta": "65:13:45", "grad_norm": 0.006, "loss": 0.0527, "lr": "3.321e-05", "step": 6670, "steps": "23.66s,6670/16595" }, { "epoch": 2.0099427538415187, "eta": "65:56:22", "grad_norm": 0.0097, "loss": 0.0533, "lr": "3.321e-05", "step": 6671, "steps": "23.92s,6671/16595" }, { "epoch": 2.010244049412474, "eta": "65:17:55", "grad_norm": 0.0049, "loss": 0.0594, "lr": "3.321e-05", "step": 6672, "steps": "23.69s,6672/16595" }, { "epoch": 2.010545344983429, "eta": "65:27:27", "grad_norm": 0.0052, "loss": 0.0365, "lr": "3.320e-05", "step": 6673, "steps": "23.75s,6673/16595" }, { "epoch": 2.0108466405543837, "eta": "66:03:26", "grad_norm": 0.0051, "loss": 0.0437, "lr": "3.320e-05", "step": 6674, "steps": "23.97s,6674/16595" }, { "epoch": 2.011147936125339, "eta": "66:21:13", "grad_norm": 0.0047, "loss": 0.0494, "lr": "3.319e-05", "step": 6675, "steps": "24.08s,6675/16595" }, { "epoch": 2.011449231696294, "eta": "66:27:26", "grad_norm": 0.0057, "loss": 0.0499, "lr": "3.319e-05", "step": 6676, "steps": "24.12s,6676/16595" }, { "epoch": 2.011750527267249, "eta": "65:37:26", "grad_norm": 0.0067, "loss": 0.0475, "lr": "3.318e-05", "step": 6677, "steps": "23.82s,6677/16595" }, { "epoch": 2.0120518228382043, "eta": "66:28:17", "grad_norm": 0.0065, "loss": 0.0524, "lr": "3.318e-05", "step": 6678, "steps": "24.13s,6678/16595" }, { "epoch": 2.0123531184091594, "eta": "66:13:00", "grad_norm": 0.006, "loss": 0.0408, "lr": "3.317e-05", "step": 6679, "steps": "24.04s,6679/16595" }, { "epoch": 2.0126544139801146, "eta": "65:14:46", "grad_norm": 0.0057, "loss": 0.0584, "lr": "3.317e-05", "step": 6680, "steps": "23.69s,6680/16595" }, { "epoch": 2.0129557095510697, "eta": "65:40:48", "grad_norm": 0.0049, "loss": 0.0551, "lr": "3.316e-05", "step": 6681, "steps": "23.85s,6681/16595" }, { "epoch": 2.013257005122025, "eta": "66:33:17", "grad_norm": 0.0059, "loss": 0.0602, "lr": "3.316e-05", "step": 6682, "steps": "24.17s,6682/16595" }, { "epoch": 2.01355830069298, "eta": "65:11:56", "grad_norm": 0.0051, "loss": 0.0397, "lr": "3.316e-05", "step": 6683, "steps": "23.68s,6683/16595" }, { "epoch": 2.0138595962639347, "eta": "65:14:50", "grad_norm": 0.0049, "loss": 0.0438, "lr": "3.315e-05", "step": 6684, "steps": "23.7s,6684/16595" }, { "epoch": 2.01416089183489, "eta": "65:19:24", "grad_norm": 0.0047, "loss": 0.0558, "lr": "3.315e-05", "step": 6685, "steps": "23.73s,6685/16595" }, { "epoch": 2.014462187405845, "eta": "66:03:36", "grad_norm": 0.0048, "loss": 0.0572, "lr": "3.314e-05", "step": 6686, "steps": "24.0s,6686/16595" }, { "epoch": 2.0147634829768, "eta": "65:26:52", "grad_norm": 0.0062, "loss": 0.0442, "lr": "3.314e-05", "step": 6687, "steps": "23.78s,6687/16595" }, { "epoch": 2.0150647785477553, "eta": "64:45:11", "grad_norm": 0.0051, "loss": 0.0481, "lr": "3.313e-05", "step": 6688, "steps": "23.53s,6688/16595" }, { "epoch": 2.0153660741187105, "eta": "65:07:55", "grad_norm": 0.0045, "loss": 0.0628, "lr": "3.313e-05", "step": 6689, "steps": "23.67s,6689/16595" }, { "epoch": 2.0156673696896656, "eta": "66:00:20", "grad_norm": 0.006, "loss": 0.0494, "lr": "3.312e-05", "step": 6690, "steps": "23.99s,6690/16595" }, { "epoch": 2.015968665260621, "eta": "64:42:22", "grad_norm": 0.0078, "loss": 0.0627, "lr": "3.312e-05", "step": 6691, "steps": "23.52s,6691/16595" }, { "epoch": 2.016269960831576, "eta": "65:10:02", "grad_norm": 0.0047, "loss": 0.0533, "lr": "3.311e-05", "step": 6692, "steps": "23.69s,6692/16595" }, { "epoch": 2.016571256402531, "eta": "65:01:23", "grad_norm": 0.0065, "loss": 0.0532, "lr": "3.311e-05", "step": 6693, "steps": "23.64s,6693/16595" }, { "epoch": 2.016872551973486, "eta": "65:33:59", "grad_norm": 0.007, "loss": 0.0437, "lr": "3.311e-05", "step": 6694, "steps": "23.84s,6694/16595" }, { "epoch": 2.017173847544441, "eta": "64:45:45", "grad_norm": 0.0063, "loss": 0.0402, "lr": "3.310e-05", "step": 6695, "steps": "23.55s,6695/16595" }, { "epoch": 2.017475143115396, "eta": "65:28:15", "grad_norm": 0.0054, "loss": 0.0498, "lr": "3.310e-05", "step": 6696, "steps": "23.81s,6696/16595" }, { "epoch": 2.0177764386863513, "eta": "65:22:54", "grad_norm": 0.006, "loss": 0.0525, "lr": "3.309e-05", "step": 6697, "steps": "23.78s,6697/16595" }, { "epoch": 2.0180777342573064, "eta": "65:48:54", "grad_norm": 0.0048, "loss": 0.0407, "lr": "3.309e-05", "step": 6698, "steps": "23.94s,6698/16595" }, { "epoch": 2.0183790298282616, "eta": "65:53:27", "grad_norm": 0.0067, "loss": 0.0587, "lr": "3.308e-05", "step": 6699, "steps": "23.97s,6699/16595" }, { "epoch": 2.0186803253992167, "eta": "65:18:25", "grad_norm": 0.0056, "loss": 0.0496, "lr": "3.308e-05", "step": 6700, "steps": "23.76s,6700/16595" }, { "epoch": 2.018981620970172, "eta": "65:46:03", "grad_norm": 0.006, "loss": 0.0495, "lr": "3.307e-05", "step": 6701, "steps": "23.93s,6701/16595" }, { "epoch": 2.019282916541127, "eta": "65:57:12", "grad_norm": 0.0067, "loss": 0.0573, "lr": "3.307e-05", "step": 6702, "steps": "24.0s,6702/16595" }, { "epoch": 2.019584212112082, "eta": "65:18:52", "grad_norm": 0.0058, "loss": 0.0563, "lr": "3.307e-05", "step": 6703, "steps": "23.77s,6703/16595" }, { "epoch": 2.019885507683037, "eta": "65:43:12", "grad_norm": 0.0067, "loss": 0.0461, "lr": "3.306e-05", "step": 6704, "steps": "23.92s,6704/16595" }, { "epoch": 2.020186803253992, "eta": "64:55:00", "grad_norm": 0.0061, "loss": 0.059, "lr": "3.306e-05", "step": 6705, "steps": "23.63s,6705/16595" }, { "epoch": 2.020488098824947, "eta": "64:56:15", "grad_norm": 0.0057, "loss": 0.0602, "lr": "3.305e-05", "step": 6706, "steps": "23.64s,6706/16595" }, { "epoch": 2.0207893943959023, "eta": "65:40:22", "grad_norm": 0.0062, "loss": 0.0447, "lr": "3.305e-05", "step": 6707, "steps": "23.91s,6707/16595" }, { "epoch": 2.0210906899668575, "eta": "65:00:25", "grad_norm": 0.0049, "loss": 0.0475, "lr": "3.304e-05", "step": 6708, "steps": "23.67s,6708/16595" }, { "epoch": 2.0213919855378126, "eta": "65:14:51", "grad_norm": 0.0059, "loss": 0.062, "lr": "3.304e-05", "step": 6709, "steps": "23.76s,6709/16595" }, { "epoch": 2.021693281108768, "eta": "66:50:00", "grad_norm": 0.0101, "loss": 0.0479, "lr": "3.303e-05", "step": 6710, "steps": "24.34s,6710/16595" }, { "epoch": 2.021994576679723, "eta": "64:49:21", "grad_norm": 0.007, "loss": 0.0568, "lr": "3.303e-05", "step": 6711, "steps": "23.61s,6711/16595" }, { "epoch": 2.022295872250678, "eta": "65:46:36", "grad_norm": 0.0048, "loss": 0.0516, "lr": "3.302e-05", "step": 6712, "steps": "23.96s,6712/16595" }, { "epoch": 2.0225971678216332, "eta": "65:18:12", "grad_norm": 0.0095, "loss": 0.0422, "lr": "3.302e-05", "step": 6713, "steps": "23.79s,6713/16595" }, { "epoch": 2.022898463392588, "eta": "64:39:56", "grad_norm": 0.0048, "loss": 0.0411, "lr": "3.302e-05", "step": 6714, "steps": "23.56s,6714/16595" }, { "epoch": 2.023199758963543, "eta": "65:30:35", "grad_norm": 0.0055, "loss": 0.0394, "lr": "3.301e-05", "step": 6715, "steps": "23.87s,6715/16595" }, { "epoch": 2.0235010545344982, "eta": "65:45:00", "grad_norm": 0.0052, "loss": 0.0594, "lr": "3.301e-05", "step": 6716, "steps": "23.96s,6716/16595" }, { "epoch": 2.0238023501054534, "eta": "64:30:31", "grad_norm": 0.0061, "loss": 0.0759, "lr": "3.300e-05", "step": 6717, "steps": "23.51s,6717/16595" }, { "epoch": 2.0241036456764085, "eta": "65:11:17", "grad_norm": 0.0045, "loss": 0.0486, "lr": "3.300e-05", "step": 6718, "steps": "23.76s,6718/16595" }, { "epoch": 2.0244049412473637, "eta": "65:35:35", "grad_norm": 0.0053, "loss": 0.0461, "lr": "3.299e-05", "step": 6719, "steps": "23.91s,6719/16595" }, { "epoch": 2.024706236818319, "eta": "65:05:33", "grad_norm": 0.006, "loss": 0.0467, "lr": "3.299e-05", "step": 6720, "steps": "23.73s,6720/16595" }, { "epoch": 2.025007532389274, "eta": "64:27:19", "grad_norm": 0.0053, "loss": 0.0531, "lr": "3.298e-05", "step": 6721, "steps": "23.5s,6721/16595" }, { "epoch": 2.025308827960229, "eta": "65:55:46", "grad_norm": 0.0053, "loss": 0.0573, "lr": "3.298e-05", "step": 6722, "steps": "24.04s,6722/16595" }, { "epoch": 2.0256101235311843, "eta": "65:40:34", "grad_norm": 0.0046, "loss": 0.0537, "lr": "3.297e-05", "step": 6723, "steps": "23.95s,6723/16595" }, { "epoch": 2.025911419102139, "eta": "65:12:12", "grad_norm": 0.0057, "loss": 0.0556, "lr": "3.297e-05", "step": 6724, "steps": "23.78s,6724/16595" }, { "epoch": 2.026212714673094, "eta": "64:43:50", "grad_norm": 0.0058, "loss": 0.0549, "lr": "3.297e-05", "step": 6725, "steps": "23.61s,6725/16595" }, { "epoch": 2.0265140102440493, "eta": "64:53:19", "grad_norm": 0.0056, "loss": 0.0512, "lr": "3.296e-05", "step": 6726, "steps": "23.67s,6726/16595" }, { "epoch": 2.0268153058150045, "eta": "65:06:05", "grad_norm": 0.0052, "loss": 0.0665, "lr": "3.296e-05", "step": 6727, "steps": "23.75s,6727/16595" }, { "epoch": 2.0271166013859596, "eta": "66:01:36", "grad_norm": 0.0049, "loss": 0.0557, "lr": "3.295e-05", "step": 6728, "steps": "24.09s,6728/16595" }, { "epoch": 2.0274178969569148, "eta": "65:13:30", "grad_norm": 0.0051, "loss": 0.0538, "lr": "3.295e-05", "step": 6729, "steps": "23.8s,6729/16595" }, { "epoch": 2.02771919252787, "eta": "64:30:22", "grad_norm": 0.0052, "loss": 0.0531, "lr": "3.294e-05", "step": 6730, "steps": "23.54s,6730/16595" }, { "epoch": 2.028020488098825, "eta": "65:24:13", "grad_norm": 0.0048, "loss": 0.06, "lr": "3.294e-05", "step": 6731, "steps": "23.87s,6731/16595" }, { "epoch": 2.02832178366978, "eta": "65:35:20", "grad_norm": 0.0057, "loss": 0.0517, "lr": "3.293e-05", "step": 6732, "steps": "23.94s,6732/16595" }, { "epoch": 2.0286230792407354, "eta": "65:16:51", "grad_norm": 0.0044, "loss": 0.0485, "lr": "3.293e-05", "step": 6733, "steps": "23.83s,6733/16595" }, { "epoch": 2.02892437481169, "eta": "65:08:14", "grad_norm": 0.0045, "loss": 0.0562, "lr": "3.293e-05", "step": 6734, "steps": "23.78s,6734/16595" }, { "epoch": 2.0292256703826452, "eta": "66:18:30", "grad_norm": 0.006, "loss": 0.0425, "lr": "3.292e-05", "step": 6735, "steps": "24.21s,6735/16595" }, { "epoch": 2.0295269659536004, "eta": "64:23:05", "grad_norm": 0.0044, "loss": 0.0537, "lr": "3.292e-05", "step": 6736, "steps": "23.51s,6736/16595" }, { "epoch": 2.0298282615245555, "eta": "65:41:33", "grad_norm": 0.0053, "loss": 0.0466, "lr": "3.291e-05", "step": 6737, "steps": "23.99s,6737/16595" }, { "epoch": 2.0301295570955107, "eta": "64:32:09", "grad_norm": 0.0045, "loss": 0.0583, "lr": "3.291e-05", "step": 6738, "steps": "23.57s,6738/16595" }, { "epoch": 2.030430852666466, "eta": "65:07:54", "grad_norm": 0.004, "loss": 0.0401, "lr": "3.290e-05", "step": 6739, "steps": "23.79s,6739/16595" }, { "epoch": 2.030732148237421, "eta": "65:33:47", "grad_norm": 0.0053, "loss": 0.0418, "lr": "3.290e-05", "step": 6740, "steps": "23.95s,6740/16595" }, { "epoch": 2.031033443808376, "eta": "65:16:57", "grad_norm": 0.0052, "loss": 0.0498, "lr": "3.289e-05", "step": 6741, "steps": "23.85s,6741/16595" }, { "epoch": 2.0313347393793313, "eta": "65:00:08", "grad_norm": 0.0063, "loss": 0.0498, "lr": "3.289e-05", "step": 6742, "steps": "23.75s,6742/16595" }, { "epoch": 2.0316360349502864, "eta": "65:17:48", "grad_norm": 0.0063, "loss": 0.0399, "lr": "3.288e-05", "step": 6743, "steps": "23.86s,6743/16595" }, { "epoch": 2.031937330521241, "eta": "65:30:32", "grad_norm": 0.0049, "loss": 0.0554, "lr": "3.288e-05", "step": 6744, "steps": "23.94s,6744/16595" }, { "epoch": 2.0322386260921963, "eta": "65:53:07", "grad_norm": 0.0049, "loss": 0.0482, "lr": "3.288e-05", "step": 6745, "steps": "24.08s,6745/16595" }, { "epoch": 2.0325399216631514, "eta": "64:27:22", "grad_norm": 0.0056, "loss": 0.045, "lr": "3.287e-05", "step": 6746, "steps": "23.56s,6746/16595" }, { "epoch": 2.0328412172341066, "eta": "65:27:42", "grad_norm": 0.0047, "loss": 0.0567, "lr": "3.287e-05", "step": 6747, "steps": "23.93s,6747/16595" }, { "epoch": 2.0331425128050618, "eta": "64:51:12", "grad_norm": 0.0053, "loss": 0.0537, "lr": "3.286e-05", "step": 6748, "steps": "23.71s,6748/16595" }, { "epoch": 2.033443808376017, "eta": "64:21:16", "grad_norm": 0.0061, "loss": 0.0506, "lr": "3.286e-05", "step": 6749, "steps": "23.53s,6749/16595" }, { "epoch": 2.033745103946972, "eta": "65:28:09", "grad_norm": 0.0048, "loss": 0.0488, "lr": "3.285e-05", "step": 6750, "steps": "23.94s,6750/16595" }, { "epoch": 2.034046399517927, "eta": "65:49:05", "grad_norm": 0.0053, "loss": 0.0723, "lr": "3.285e-05", "step": 6751, "steps": "24.07s,6751/16595" }, { "epoch": 2.0343476950888824, "eta": "64:46:20", "grad_norm": 0.0045, "loss": 0.0524, "lr": "3.284e-05", "step": 6752, "steps": "23.69s,6752/16595" }, { "epoch": 2.0346489906598375, "eta": "64:31:11", "grad_norm": 0.0046, "loss": 0.0495, "lr": "3.284e-05", "step": 6753, "steps": "23.6s,6753/16595" }, { "epoch": 2.034950286230792, "eta": "65:26:33", "grad_norm": 0.0058, "loss": 0.0389, "lr": "3.283e-05", "step": 6754, "steps": "23.94s,6754/16595" }, { "epoch": 2.0352515818017474, "eta": "64:56:38", "grad_norm": 0.0045, "loss": 0.0741, "lr": "3.283e-05", "step": 6755, "steps": "23.76s,6755/16595" }, { "epoch": 2.0355528773727025, "eta": "65:09:21", "grad_norm": 0.0062, "loss": 0.0414, "lr": "3.283e-05", "step": 6756, "steps": "23.84s,6756/16595" }, { "epoch": 2.0358541729436577, "eta": "64:47:38", "grad_norm": 0.0061, "loss": 0.056, "lr": "3.282e-05", "step": 6757, "steps": "23.71s,6757/16595" }, { "epoch": 2.036155468514613, "eta": "64:57:05", "grad_norm": 0.005, "loss": 0.0498, "lr": "3.282e-05", "step": 6758, "steps": "23.77s,6758/16595" }, { "epoch": 2.036456764085568, "eta": "64:58:20", "grad_norm": 0.0058, "loss": 0.0573, "lr": "3.281e-05", "step": 6759, "steps": "23.78s,6759/16595" }, { "epoch": 2.036758059656523, "eta": "65:02:51", "grad_norm": 0.0048, "loss": 0.0616, "lr": "3.281e-05", "step": 6760, "steps": "23.81s,6760/16595" }, { "epoch": 2.0370593552274783, "eta": "64:52:37", "grad_norm": 0.0063, "loss": 0.0546, "lr": "3.280e-05", "step": 6761, "steps": "23.75s,6761/16595" }, { "epoch": 2.0373606507984334, "eta": "65:00:25", "grad_norm": 0.0056, "loss": 0.0467, "lr": "3.280e-05", "step": 6762, "steps": "23.8s,6762/16595" }, { "epoch": 2.0376619463693886, "eta": "65:39:21", "grad_norm": 0.0053, "loss": 0.0538, "lr": "3.279e-05", "step": 6763, "steps": "24.04s,6763/16595" }, { "epoch": 2.0379632419403433, "eta": "64:51:26", "grad_norm": 0.0053, "loss": 0.0491, "lr": "3.279e-05", "step": 6764, "steps": "23.75s,6764/16595" }, { "epoch": 2.0382645375112984, "eta": "64:59:14", "grad_norm": 0.0074, "loss": 0.0558, "lr": "3.278e-05", "step": 6765, "steps": "23.8s,6765/16595" }, { "epoch": 2.0385658330822536, "eta": "65:33:14", "grad_norm": 0.005, "loss": 0.0495, "lr": "3.278e-05", "step": 6766, "steps": "24.01s,6766/16595" }, { "epoch": 2.0388671286532087, "eta": "65:01:42", "grad_norm": 0.0048, "loss": 0.0664, "lr": "3.278e-05", "step": 6767, "steps": "23.82s,6767/16595" }, { "epoch": 2.039168424224164, "eta": "64:48:12", "grad_norm": 0.0049, "loss": 0.031, "lr": "3.277e-05", "step": 6768, "steps": "23.74s,6768/16595" }, { "epoch": 2.039469719795119, "eta": "64:51:05", "grad_norm": 0.0054, "loss": 0.0295, "lr": "3.277e-05", "step": 6769, "steps": "23.76s,6769/16595" }, { "epoch": 2.039771015366074, "eta": "64:31:03", "grad_norm": 0.0056, "loss": 0.0454, "lr": "3.276e-05", "step": 6770, "steps": "23.64s,6770/16595" }, { "epoch": 2.0400723109370293, "eta": "65:23:03", "grad_norm": 0.0058, "loss": 0.0563, "lr": "3.276e-05", "step": 6771, "steps": "23.96s,6771/16595" }, { "epoch": 2.0403736065079845, "eta": "64:31:53", "grad_norm": 0.006, "loss": 0.0415, "lr": "3.275e-05", "step": 6772, "steps": "23.65s,6772/16595" }, { "epoch": 2.0406749020789396, "eta": "65:25:31", "grad_norm": 0.0057, "loss": 0.0332, "lr": "3.275e-05", "step": 6773, "steps": "23.98s,6773/16595" }, { "epoch": 2.0409761976498944, "eta": "64:54:01", "grad_norm": 0.0053, "loss": 0.0599, "lr": "3.274e-05", "step": 6774, "steps": "23.79s,6774/16595" }, { "epoch": 2.0412774932208495, "eta": "65:13:16", "grad_norm": 0.0047, "loss": 0.0533, "lr": "3.274e-05", "step": 6775, "steps": "23.91s,6775/16595" }, { "epoch": 2.0415787887918047, "eta": "65:24:19", "grad_norm": 0.0081, "loss": 0.043, "lr": "3.273e-05", "step": 6776, "steps": "23.98s,6776/16595" }, { "epoch": 2.04188008436276, "eta": "64:07:01", "grad_norm": 0.0055, "loss": 0.0632, "lr": "3.273e-05", "step": 6777, "steps": "23.51s,6777/16595" }, { "epoch": 2.042181379933715, "eta": "65:07:09", "grad_norm": 0.0068, "loss": 0.058, "lr": "3.273e-05", "step": 6778, "steps": "23.88s,6778/16595" }, { "epoch": 2.04248267550467, "eta": "65:14:56", "grad_norm": 0.0041, "loss": 0.054, "lr": "3.272e-05", "step": 6779, "steps": "23.93s,6779/16595" }, { "epoch": 2.0427839710756253, "eta": "65:30:54", "grad_norm": 0.0049, "loss": 0.0633, "lr": "3.272e-05", "step": 6780, "steps": "24.03s,6780/16595" }, { "epoch": 2.0430852666465804, "eta": "63:50:43", "grad_norm": 0.006, "loss": 0.065, "lr": "3.271e-05", "step": 6781, "steps": "23.42s,6781/16595" }, { "epoch": 2.0433865622175356, "eta": "64:42:40", "grad_norm": 0.0044, "loss": 0.0365, "lr": "3.271e-05", "step": 6782, "steps": "23.74s,6782/16595" }, { "epoch": 2.0436878577884907, "eta": "65:05:10", "grad_norm": 0.0063, "loss": 0.0588, "lr": "3.270e-05", "step": 6783, "steps": "23.88s,6783/16595" }, { "epoch": 2.0439891533594454, "eta": "64:04:16", "grad_norm": 0.0055, "loss": 0.0462, "lr": "3.270e-05", "step": 6784, "steps": "23.51s,6784/16595" }, { "epoch": 2.0442904489304006, "eta": "64:41:29", "grad_norm": 0.0047, "loss": 0.0413, "lr": "3.269e-05", "step": 6785, "steps": "23.74s,6785/16595" }, { "epoch": 2.0445917445013557, "eta": "64:50:54", "grad_norm": 0.005, "loss": 0.0553, "lr": "3.269e-05", "step": 6786, "steps": "23.8s,6786/16595" }, { "epoch": 2.044893040072311, "eta": "64:42:20", "grad_norm": 0.0054, "loss": 0.0577, "lr": "3.268e-05", "step": 6787, "steps": "23.75s,6787/16595" }, { "epoch": 2.045194335643266, "eta": "64:51:44", "grad_norm": 0.0052, "loss": 0.0469, "lr": "3.268e-05", "step": 6788, "steps": "23.81s,6788/16595" }, { "epoch": 2.045495631214221, "eta": "64:18:39", "grad_norm": 0.0055, "loss": 0.0552, "lr": "3.268e-05", "step": 6789, "steps": "23.61s,6789/16595" }, { "epoch": 2.0457969267851763, "eta": "64:42:46", "grad_norm": 0.0057, "loss": 0.0371, "lr": "3.267e-05", "step": 6790, "steps": "23.76s,6790/16595" }, { "epoch": 2.0460982223561315, "eta": "64:39:06", "grad_norm": 0.0062, "loss": 0.0549, "lr": "3.267e-05", "step": 6791, "steps": "23.74s,6791/16595" }, { "epoch": 2.0463995179270866, "eta": "65:14:39", "grad_norm": 0.0052, "loss": 0.047, "lr": "3.266e-05", "step": 6792, "steps": "23.96s,6792/16595" }, { "epoch": 2.046700813498042, "eta": "64:26:53", "grad_norm": 0.0067, "loss": 0.0479, "lr": "3.266e-05", "step": 6793, "steps": "23.67s,6793/16595" }, { "epoch": 2.0470021090689965, "eta": "64:54:15", "grad_norm": 0.0074, "loss": 0.0568, "lr": "3.265e-05", "step": 6794, "steps": "23.84s,6794/16595" }, { "epoch": 2.0473034046399516, "eta": "64:27:44", "grad_norm": 0.005, "loss": 0.0458, "lr": "3.265e-05", "step": 6795, "steps": "23.68s,6795/16595" }, { "epoch": 2.047604700210907, "eta": "65:09:48", "grad_norm": 0.0062, "loss": 0.0648, "lr": "3.264e-05", "step": 6796, "steps": "23.94s,6796/16595" }, { "epoch": 2.047905995781862, "eta": "64:08:58", "grad_norm": 0.0048, "loss": 0.0491, "lr": "3.264e-05", "step": 6797, "steps": "23.57s,6797/16595" }, { "epoch": 2.048207291352817, "eta": "64:21:39", "grad_norm": 0.0055, "loss": 0.0534, "lr": "3.263e-05", "step": 6798, "steps": "23.65s,6798/16595" }, { "epoch": 2.0485085869237722, "eta": "64:14:43", "grad_norm": 0.0056, "loss": 0.0491, "lr": "3.263e-05", "step": 6799, "steps": "23.61s,6799/16595" }, { "epoch": 2.0488098824947274, "eta": "65:03:18", "grad_norm": 0.0074, "loss": 0.0603, "lr": "3.263e-05", "step": 6800, "steps": "23.91s,6800/16595" }, { "epoch": 2.0491111780656825, "eta": "136:22:53", "grad_norm": 0.006, "loss": 0.0398, "lr": "3.262e-05", "step": 6801, "steps": "50.13s,6801/16595" }, { "epoch": 2.0494124736366377, "eta": "64:18:26", "grad_norm": 0.0068, "loss": 0.0511, "lr": "3.262e-05", "step": 6802, "steps": "23.64s,6802/16595" }, { "epoch": 2.049713769207593, "eta": "65:10:16", "grad_norm": 0.0057, "loss": 0.0597, "lr": "3.261e-05", "step": 6803, "steps": "23.96s,6803/16595" }, { "epoch": 2.0500150647785476, "eta": "64:47:01", "grad_norm": 0.0053, "loss": 0.0494, "lr": "3.261e-05", "step": 6804, "steps": "23.82s,6804/16595" }, { "epoch": 2.0503163603495027, "eta": "64:58:03", "grad_norm": 0.0048, "loss": 0.0455, "lr": "3.260e-05", "step": 6805, "steps": "23.89s,6805/16595" }, { "epoch": 2.050617655920458, "eta": "64:21:45", "grad_norm": 0.0071, "loss": 0.0262, "lr": "3.260e-05", "step": 6806, "steps": "23.67s,6806/16595" }, { "epoch": 2.050918951491413, "eta": "65:03:46", "grad_norm": 0.0064, "loss": 0.0558, "lr": "3.259e-05", "step": 6807, "steps": "23.93s,6807/16595" }, { "epoch": 2.051220247062368, "eta": "64:20:58", "grad_norm": 0.0053, "loss": 0.061, "lr": "3.259e-05", "step": 6808, "steps": "23.67s,6808/16595" }, { "epoch": 2.0515215426333233, "eta": "64:31:59", "grad_norm": 0.007, "loss": 0.0703, "lr": "3.258e-05", "step": 6809, "steps": "23.74s,6809/16595" }, { "epoch": 2.0518228382042785, "eta": "64:26:42", "grad_norm": 0.0058, "loss": 0.0493, "lr": "3.258e-05", "step": 6810, "steps": "23.71s,6810/16595" }, { "epoch": 2.0521241337752336, "eta": "64:14:53", "grad_norm": 0.0054, "loss": 0.0589, "lr": "3.258e-05", "step": 6811, "steps": "23.64s,6811/16595" }, { "epoch": 2.0524254293461888, "eta": "65:26:14", "grad_norm": 0.0056, "loss": 0.0526, "lr": "3.257e-05", "step": 6812, "steps": "24.08s,6812/16595" }, { "epoch": 2.052726724917144, "eta": "64:36:55", "grad_norm": 0.006, "loss": 0.0506, "lr": "3.257e-05", "step": 6813, "steps": "23.78s,6813/16595" }, { "epoch": 2.0530280204880986, "eta": "64:34:54", "grad_norm": 0.0077, "loss": 0.0519, "lr": "3.256e-05", "step": 6814, "steps": "23.77s,6814/16595" }, { "epoch": 2.0533293160590538, "eta": "65:05:28", "grad_norm": 0.006, "loss": 0.0754, "lr": "3.256e-05", "step": 6815, "steps": "23.96s,6815/16595" }, { "epoch": 2.053630611630009, "eta": "64:40:37", "grad_norm": 0.0054, "loss": 0.0471, "lr": "3.255e-05", "step": 6816, "steps": "23.81s,6816/16595" }, { "epoch": 2.053931907200964, "eta": "64:59:47", "grad_norm": 0.0044, "loss": 0.051, "lr": "3.255e-05", "step": 6817, "steps": "23.93s,6817/16595" }, { "epoch": 2.0542332027719192, "eta": "64:36:34", "grad_norm": 0.006, "loss": 0.0453, "lr": "3.254e-05", "step": 6818, "steps": "23.79s,6818/16595" }, { "epoch": 2.0545344983428744, "eta": "65:16:55", "grad_norm": 0.0065, "loss": 0.0414, "lr": "3.254e-05", "step": 6819, "steps": "24.04s,6819/16595" }, { "epoch": 2.0548357939138295, "eta": "65:00:13", "grad_norm": 0.0048, "loss": 0.0468, "lr": "3.253e-05", "step": 6820, "steps": "23.94s,6820/16595" }, { "epoch": 2.0551370894847847, "eta": "65:24:15", "grad_norm": 0.0064, "loss": 0.0566, "lr": "3.253e-05", "step": 6821, "steps": "24.09s,6821/16595" }, { "epoch": 2.05543838505574, "eta": "65:23:51", "grad_norm": 0.0172, "loss": 0.046, "lr": "3.253e-05", "step": 6822, "steps": "24.09s,6822/16595" }, { "epoch": 2.055739680626695, "eta": "64:55:46", "grad_norm": 0.0079, "loss": 0.0534, "lr": "3.252e-05", "step": 6823, "steps": "23.92s,6823/16595" }, { "epoch": 2.0560409761976497, "eta": "63:56:44", "grad_norm": 0.0057, "loss": 0.0466, "lr": "3.252e-05", "step": 6824, "steps": "23.56s,6824/16595" }, { "epoch": 2.056342271768605, "eta": "64:28:55", "grad_norm": 0.0067, "loss": 0.0513, "lr": "3.251e-05", "step": 6825, "steps": "23.76s,6825/16595" }, { "epoch": 2.05664356733956, "eta": "63:57:35", "grad_norm": 0.0081, "loss": 0.0535, "lr": "3.251e-05", "step": 6826, "steps": "23.57s,6826/16595" }, { "epoch": 2.056944862910515, "eta": "64:23:14", "grad_norm": 0.0059, "loss": 0.0517, "lr": "3.250e-05", "step": 6827, "steps": "23.73s,6827/16595" }, { "epoch": 2.0572461584814703, "eta": "65:31:13", "grad_norm": 0.006, "loss": 0.0667, "lr": "3.250e-05", "step": 6828, "steps": "24.15s,6828/16595" }, { "epoch": 2.0575474540524255, "eta": "65:12:54", "grad_norm": 0.0049, "loss": 0.0502, "lr": "3.249e-05", "step": 6829, "steps": "24.04s,6829/16595" }, { "epoch": 2.0578487496233806, "eta": "64:39:57", "grad_norm": 0.0052, "loss": 0.0571, "lr": "3.249e-05", "step": 6830, "steps": "23.84s,6830/16595" }, { "epoch": 2.0581500451943358, "eta": "65:13:44", "grad_norm": 0.0078, "loss": 0.0573, "lr": "3.248e-05", "step": 6831, "steps": "24.05s,6831/16595" }, { "epoch": 2.058451340765291, "eta": "64:55:26", "grad_norm": 0.0051, "loss": 0.0402, "lr": "3.248e-05", "step": 6832, "steps": "23.94s,6832/16595" }, { "epoch": 2.058752636336246, "eta": "64:25:45", "grad_norm": 0.0078, "loss": 0.0427, "lr": "3.248e-05", "step": 6833, "steps": "23.76s,6833/16595" }, { "epoch": 2.0590539319072008, "eta": "64:28:36", "grad_norm": 0.0052, "loss": 0.0396, "lr": "3.247e-05", "step": 6834, "steps": "23.78s,6834/16595" }, { "epoch": 2.059355227478156, "eta": "66:09:04", "grad_norm": 0.0055, "loss": 0.0405, "lr": "3.247e-05", "step": 6835, "steps": "24.4s,6835/16595" }, { "epoch": 2.059656523049111, "eta": "64:24:33", "grad_norm": 0.0054, "loss": 0.0544, "lr": "3.246e-05", "step": 6836, "steps": "23.76s,6836/16595" }, { "epoch": 2.059957818620066, "eta": "65:12:57", "grad_norm": 0.0061, "loss": 0.0626, "lr": "3.246e-05", "step": 6837, "steps": "24.06s,6837/16595" }, { "epoch": 2.0602591141910214, "eta": "65:02:48", "grad_norm": 0.0052, "loss": 0.0534, "lr": "3.245e-05", "step": 6838, "steps": "24.0s,6838/16595" }, { "epoch": 2.0605604097619765, "eta": "63:55:44", "grad_norm": 0.0046, "loss": 0.0496, "lr": "3.245e-05", "step": 6839, "steps": "23.59s,6839/16595" }, { "epoch": 2.0608617053329317, "eta": "64:21:21", "grad_norm": 0.0045, "loss": 0.0639, "lr": "3.244e-05", "step": 6840, "steps": "23.75s,6840/16595" }, { "epoch": 2.061163000903887, "eta": "64:45:20", "grad_norm": 0.0047, "loss": 0.0526, "lr": "3.244e-05", "step": 6841, "steps": "23.9s,6841/16595" }, { "epoch": 2.061464296474842, "eta": "65:01:12", "grad_norm": 0.0049, "loss": 0.0537, "lr": "3.243e-05", "step": 6842, "steps": "24.0s,6842/16595" }, { "epoch": 2.061765592045797, "eta": "64:12:02", "grad_norm": 0.0051, "loss": 0.0422, "lr": "3.243e-05", "step": 6843, "steps": "23.7s,6843/16595" }, { "epoch": 2.062066887616752, "eta": "64:21:23", "grad_norm": 0.0054, "loss": 0.0426, "lr": "3.243e-05", "step": 6844, "steps": "23.76s,6844/16595" }, { "epoch": 2.062368183187707, "eta": "64:42:07", "grad_norm": 0.0049, "loss": 0.0491, "lr": "3.242e-05", "step": 6845, "steps": "23.89s,6845/16595" }, { "epoch": 2.062669478758662, "eta": "65:07:43", "grad_norm": 0.0054, "loss": 0.0453, "lr": "3.242e-05", "step": 6846, "steps": "24.05s,6846/16595" }, { "epoch": 2.0629707743296173, "eta": "64:46:12", "grad_norm": 0.0059, "loss": 0.0338, "lr": "3.241e-05", "step": 6847, "steps": "23.92s,6847/16595" }, { "epoch": 2.0632720699005724, "eta": "64:05:11", "grad_norm": 0.0057, "loss": 0.0615, "lr": "3.241e-05", "step": 6848, "steps": "23.67s,6848/16595" }, { "epoch": 2.0635733654715276, "eta": "64:03:10", "grad_norm": 0.005, "loss": 0.0376, "lr": "3.240e-05", "step": 6849, "steps": "23.66s,6849/16595" }, { "epoch": 2.0638746610424827, "eta": "64:36:53", "grad_norm": 0.0054, "loss": 0.0678, "lr": "3.240e-05", "step": 6850, "steps": "23.87s,6850/16595" }, { "epoch": 2.064175956613438, "eta": "64:52:43", "grad_norm": 0.0044, "loss": 0.0426, "lr": "3.239e-05", "step": 6851, "steps": "23.97s,6851/16595" }, { "epoch": 2.064477252184393, "eta": "63:40:52", "grad_norm": 0.005, "loss": 0.071, "lr": "3.239e-05", "step": 6852, "steps": "23.53s,6852/16595" }, { "epoch": 2.064778547755348, "eta": "63:42:06", "grad_norm": 0.0055, "loss": 0.053, "lr": "3.238e-05", "step": 6853, "steps": "23.54s,6853/16595" }, { "epoch": 2.065079843326303, "eta": "64:58:01", "grad_norm": 0.0063, "loss": 0.0549, "lr": "3.238e-05", "step": 6854, "steps": "24.01s,6854/16595" }, { "epoch": 2.065381138897258, "eta": "64:33:16", "grad_norm": 0.0053, "loss": 0.0557, "lr": "3.238e-05", "step": 6855, "steps": "23.86s,6855/16595" }, { "epoch": 2.065682434468213, "eta": "64:37:44", "grad_norm": 0.0061, "loss": 0.0483, "lr": "3.237e-05", "step": 6856, "steps": "23.89s,6856/16595" }, { "epoch": 2.0659837300391684, "eta": "64:04:53", "grad_norm": 0.005, "loss": 0.0436, "lr": "3.237e-05", "step": 6857, "steps": "23.69s,6857/16595" }, { "epoch": 2.0662850256101235, "eta": "64:32:04", "grad_norm": 0.006, "loss": 0.0544, "lr": "3.236e-05", "step": 6858, "steps": "23.86s,6858/16595" }, { "epoch": 2.0665863211810787, "eta": "64:46:17", "grad_norm": 0.0053, "loss": 0.0517, "lr": "3.236e-05", "step": 6859, "steps": "23.95s,6859/16595" }, { "epoch": 2.066887616752034, "eta": "64:49:07", "grad_norm": 0.0051, "loss": 0.06, "lr": "3.235e-05", "step": 6860, "steps": "23.97s,6860/16595" }, { "epoch": 2.067188912322989, "eta": "64:42:14", "grad_norm": 0.0047, "loss": 0.0598, "lr": "3.235e-05", "step": 6861, "steps": "23.93s,6861/16595" }, { "epoch": 2.067490207893944, "eta": "65:06:10", "grad_norm": 0.0055, "loss": 0.0644, "lr": "3.234e-05", "step": 6862, "steps": "24.08s,6862/16595" }, { "epoch": 2.0677915034648993, "eta": "64:34:57", "grad_norm": 0.0053, "loss": 0.0536, "lr": "3.234e-05", "step": 6863, "steps": "23.89s,6863/16595" }, { "epoch": 2.068092799035854, "eta": "64:55:38", "grad_norm": 0.0055, "loss": 0.0545, "lr": "3.233e-05", "step": 6864, "steps": "24.02s,6864/16595" }, { "epoch": 2.068394094606809, "eta": "64:17:56", "grad_norm": 0.0049, "loss": 0.0425, "lr": "3.233e-05", "step": 6865, "steps": "23.79s,6865/16595" }, { "epoch": 2.0686953901777643, "eta": "64:25:39", "grad_norm": 0.006, "loss": 0.0426, "lr": "3.233e-05", "step": 6866, "steps": "23.84s,6866/16595" }, { "epoch": 2.0689966857487194, "eta": "64:59:18", "grad_norm": 0.0052, "loss": 0.0419, "lr": "3.232e-05", "step": 6867, "steps": "24.05s,6867/16595" }, { "epoch": 2.0692979813196746, "eta": "63:20:00", "grad_norm": 0.0051, "loss": 0.0553, "lr": "3.232e-05", "step": 6868, "steps": "23.44s,6868/16595" }, { "epoch": 2.0695992768906297, "eta": "64:11:29", "grad_norm": 0.0051, "loss": 0.0396, "lr": "3.231e-05", "step": 6869, "steps": "23.76s,6869/16595" }, { "epoch": 2.069900572461585, "eta": "64:46:45", "grad_norm": 0.0083, "loss": 0.045, "lr": "3.231e-05", "step": 6870, "steps": "23.98s,6870/16595" }, { "epoch": 2.07020186803254, "eta": "63:25:19", "grad_norm": 0.0049, "loss": 0.0463, "lr": "3.230e-05", "step": 6871, "steps": "23.48s,6871/16595" }, { "epoch": 2.070503163603495, "eta": "64:16:47", "grad_norm": 0.0054, "loss": 0.0463, "lr": "3.230e-05", "step": 6872, "steps": "23.8s,6872/16595" }, { "epoch": 2.0708044591744503, "eta": "65:05:00", "grad_norm": 0.0051, "loss": 0.0411, "lr": "3.229e-05", "step": 6873, "steps": "24.1s,6873/16595" }, { "epoch": 2.071105754745405, "eta": "63:59:47", "grad_norm": 0.0053, "loss": 0.0545, "lr": "3.229e-05", "step": 6874, "steps": "23.7s,6874/16595" }, { "epoch": 2.07140705031636, "eta": "63:41:34", "grad_norm": 0.0056, "loss": 0.0537, "lr": "3.228e-05", "step": 6875, "steps": "23.59s,6875/16595" }, { "epoch": 2.0717083458873153, "eta": "64:34:38", "grad_norm": 0.0065, "loss": 0.0437, "lr": "3.228e-05", "step": 6876, "steps": "23.92s,6876/16595" }, { "epoch": 2.0720096414582705, "eta": "64:29:23", "grad_norm": 0.0049, "loss": 0.0404, "lr": "3.227e-05", "step": 6877, "steps": "23.89s,6877/16595" }, { "epoch": 2.0723109370292256, "eta": "64:32:13", "grad_norm": 0.0049, "loss": 0.0512, "lr": "3.227e-05", "step": 6878, "steps": "23.91s,6878/16595" }, { "epoch": 2.072612232600181, "eta": "64:38:18", "grad_norm": 0.0056, "loss": 0.0507, "lr": "3.227e-05", "step": 6879, "steps": "23.95s,6879/16595" }, { "epoch": 2.072913528171136, "eta": "64:44:22", "grad_norm": 0.0064, "loss": 0.0476, "lr": "3.226e-05", "step": 6880, "steps": "23.99s,6880/16595" }, { "epoch": 2.073214823742091, "eta": "63:44:04", "grad_norm": 0.0065, "loss": 0.062, "lr": "3.226e-05", "step": 6881, "steps": "23.62s,6881/16595" }, { "epoch": 2.0735161193130462, "eta": "64:19:17", "grad_norm": 0.0057, "loss": 0.0544, "lr": "3.225e-05", "step": 6882, "steps": "23.84s,6882/16595" }, { "epoch": 2.0738174148840014, "eta": "64:43:10", "grad_norm": 0.0047, "loss": 0.0448, "lr": "3.225e-05", "step": 6883, "steps": "23.99s,6883/16595" }, { "epoch": 2.074118710454956, "eta": "64:08:47", "grad_norm": 0.0062, "loss": 0.0473, "lr": "3.224e-05", "step": 6884, "steps": "23.78s,6884/16595" }, { "epoch": 2.0744200060259113, "eta": "63:47:21", "grad_norm": 0.0049, "loss": 0.0615, "lr": "3.224e-05", "step": 6885, "steps": "23.65s,6885/16595" }, { "epoch": 2.0747213015968664, "eta": "63:43:43", "grad_norm": 0.0071, "loss": 0.0584, "lr": "3.223e-05", "step": 6886, "steps": "23.63s,6886/16595" }, { "epoch": 2.0750225971678216, "eta": "64:22:09", "grad_norm": 0.0058, "loss": 0.0538, "lr": "3.223e-05", "step": 6887, "steps": "23.87s,6887/16595" }, { "epoch": 2.0753238927387767, "eta": "64:05:35", "grad_norm": 0.0059, "loss": 0.0501, "lr": "3.222e-05", "step": 6888, "steps": "23.77s,6888/16595" }, { "epoch": 2.075625188309732, "eta": "63:52:15", "grad_norm": 0.0059, "loss": 0.0411, "lr": "3.222e-05", "step": 6889, "steps": "23.69s,6889/16595" }, { "epoch": 2.075926483880687, "eta": "64:11:16", "grad_norm": 0.0054, "loss": 0.0411, "lr": "3.222e-05", "step": 6890, "steps": "23.81s,6890/16595" }, { "epoch": 2.076227779451642, "eta": "64:23:48", "grad_norm": 0.0051, "loss": 0.0584, "lr": "3.221e-05", "step": 6891, "steps": "23.89s,6891/16595" }, { "epoch": 2.0765290750225973, "eta": "65:13:32", "grad_norm": 0.006, "loss": 0.0587, "lr": "3.221e-05", "step": 6892, "steps": "24.2s,6892/16595" }, { "epoch": 2.0768303705935525, "eta": "64:31:05", "grad_norm": 0.0048, "loss": 0.0645, "lr": "3.220e-05", "step": 6893, "steps": "23.94s,6893/16595" }, { "epoch": 2.077131666164507, "eta": "64:27:27", "grad_norm": 0.0044, "loss": 0.0493, "lr": "3.220e-05", "step": 6894, "steps": "23.92s,6894/16595" }, { "epoch": 2.0774329617354623, "eta": "64:46:28", "grad_norm": 0.0049, "loss": 0.0753, "lr": "3.219e-05", "step": 6895, "steps": "24.04s,6895/16595" }, { "epoch": 2.0777342573064175, "eta": "63:47:52", "grad_norm": 0.0051, "loss": 0.0446, "lr": "3.219e-05", "step": 6896, "steps": "23.68s,6896/16595" }, { "epoch": 2.0780355528773726, "eta": "63:44:14", "grad_norm": 0.0054, "loss": 0.0665, "lr": "3.218e-05", "step": 6897, "steps": "23.66s,6897/16595" }, { "epoch": 2.078336848448328, "eta": "63:53:32", "grad_norm": 0.0049, "loss": 0.0518, "lr": "3.218e-05", "step": 6898, "steps": "23.72s,6898/16595" }, { "epoch": 2.078638144019283, "eta": "63:59:36", "grad_norm": 0.0078, "loss": 0.0316, "lr": "3.217e-05", "step": 6899, "steps": "23.76s,6899/16595" }, { "epoch": 2.078939439590238, "eta": "64:28:18", "grad_norm": 0.0052, "loss": 0.0562, "lr": "3.217e-05", "step": 6900, "steps": "23.94s,6900/16595" }, { "epoch": 2.0792407351611932, "eta": "63:49:07", "grad_norm": 0.0072, "loss": 0.0439, "lr": "3.217e-05", "step": 6901, "steps": "23.7s,6901/16595" }, { "epoch": 2.0795420307321484, "eta": "63:48:44", "grad_norm": 0.0056, "loss": 0.0583, "lr": "3.216e-05", "step": 6902, "steps": "23.7s,6902/16595" }, { "epoch": 2.0798433263031035, "eta": "64:14:11", "grad_norm": 0.0048, "loss": 0.0613, "lr": "3.216e-05", "step": 6903, "steps": "23.86s,6903/16595" }, { "epoch": 2.0801446218740582, "eta": "63:20:29", "grad_norm": 0.0048, "loss": 0.0602, "lr": "3.215e-05", "step": 6904, "steps": "23.53s,6904/16595" }, { "epoch": 2.0804459174450134, "eta": "64:37:36", "grad_norm": 0.0058, "loss": 0.0575, "lr": "3.215e-05", "step": 6905, "steps": "24.01s,6905/16595" }, { "epoch": 2.0807472130159685, "eta": "63:50:23", "grad_norm": 0.0057, "loss": 0.0582, "lr": "3.214e-05", "step": 6906, "steps": "23.72s,6906/16595" }, { "epoch": 2.0810485085869237, "eta": "64:38:25", "grad_norm": 0.0044, "loss": 0.0438, "lr": "3.214e-05", "step": 6907, "steps": "24.02s,6907/16595" }, { "epoch": 2.081349804157879, "eta": "63:35:03", "grad_norm": 0.0059, "loss": 0.0537, "lr": "3.213e-05", "step": 6908, "steps": "23.63s,6908/16595" }, { "epoch": 2.081651099728834, "eta": "64:21:29", "grad_norm": 0.0075, "loss": 0.0497, "lr": "3.213e-05", "step": 6909, "steps": "23.92s,6909/16595" }, { "epoch": 2.081952395299789, "eta": "63:32:39", "grad_norm": 0.0063, "loss": 0.0555, "lr": "3.212e-05", "step": 6910, "steps": "23.62s,6910/16595" }, { "epoch": 2.0822536908707443, "eta": "64:35:12", "grad_norm": 0.0053, "loss": 0.0513, "lr": "3.212e-05", "step": 6911, "steps": "24.01s,6911/16595" }, { "epoch": 2.0825549864416995, "eta": "62:32:09", "grad_norm": 0.005, "loss": 0.0547, "lr": "3.212e-05", "step": 6912, "steps": "23.25s,6912/16595" }, { "epoch": 2.0828562820126546, "eta": "64:15:02", "grad_norm": 0.0061, "loss": 0.0466, "lr": "3.211e-05", "step": 6913, "steps": "23.89s,6913/16595" }, { "epoch": 2.0831575775836093, "eta": "64:22:43", "grad_norm": 0.0056, "loss": 0.0484, "lr": "3.211e-05", "step": 6914, "steps": "23.94s,6914/16595" }, { "epoch": 2.0834588731545645, "eta": "63:27:28", "grad_norm": 0.0051, "loss": 0.0461, "lr": "3.210e-05", "step": 6915, "steps": "23.6s,6915/16595" }, { "epoch": 2.0837601687255196, "eta": "63:33:31", "grad_norm": 0.006, "loss": 0.0513, "lr": "3.210e-05", "step": 6916, "steps": "23.64s,6916/16595" }, { "epoch": 2.0840614642964748, "eta": "63:20:13", "grad_norm": 0.0049, "loss": 0.0414, "lr": "3.209e-05", "step": 6917, "steps": "23.56s,6917/16595" }, { "epoch": 2.08436275986743, "eta": "63:29:30", "grad_norm": 0.0053, "loss": 0.0357, "lr": "3.209e-05", "step": 6918, "steps": "23.62s,6918/16595" }, { "epoch": 2.084664055438385, "eta": "64:20:43", "grad_norm": 0.0054, "loss": 0.0599, "lr": "3.208e-05", "step": 6919, "steps": "23.94s,6919/16595" }, { "epoch": 2.08496535100934, "eta": "64:41:17", "grad_norm": 0.0064, "loss": 0.0395, "lr": "3.208e-05", "step": 6920, "steps": "24.07s,6920/16595" }, { "epoch": 2.0852666465802954, "eta": "64:15:05", "grad_norm": 0.0054, "loss": 0.0497, "lr": "3.207e-05", "step": 6921, "steps": "23.91s,6921/16595" }, { "epoch": 2.0855679421512505, "eta": "64:38:52", "grad_norm": 0.0056, "loss": 0.0458, "lr": "3.207e-05", "step": 6922, "steps": "24.06s,6922/16595" }, { "epoch": 2.0858692377222057, "eta": "63:58:10", "grad_norm": 0.0067, "loss": 0.0568, "lr": "3.206e-05", "step": 6923, "steps": "23.81s,6923/16595" }, { "epoch": 2.0861705332931604, "eta": "64:34:50", "grad_norm": 0.007, "loss": 0.0356, "lr": "3.206e-05", "step": 6924, "steps": "24.04s,6924/16595" }, { "epoch": 2.0864718288641155, "eta": "64:05:26", "grad_norm": 0.005, "loss": 0.0573, "lr": "3.206e-05", "step": 6925, "steps": "23.86s,6925/16595" }, { "epoch": 2.0867731244350707, "eta": "63:26:21", "grad_norm": 0.0058, "loss": 0.0426, "lr": "3.205e-05", "step": 6926, "steps": "23.62s,6926/16595" }, { "epoch": 2.087074420006026, "eta": "64:06:15", "grad_norm": 0.0104, "loss": 0.0387, "lr": "3.205e-05", "step": 6927, "steps": "23.87s,6927/16595" }, { "epoch": 2.087375715576981, "eta": "64:04:14", "grad_norm": 0.0062, "loss": 0.0615, "lr": "3.204e-05", "step": 6928, "steps": "23.86s,6928/16595" }, { "epoch": 2.087677011147936, "eta": "63:38:04", "grad_norm": 0.0065, "loss": 0.0489, "lr": "3.204e-05", "step": 6929, "steps": "23.7s,6929/16595" }, { "epoch": 2.0879783067188913, "eta": "64:46:56", "grad_norm": 0.0051, "loss": 0.058, "lr": "3.203e-05", "step": 6930, "steps": "24.13s,6930/16595" }, { "epoch": 2.0882796022898464, "eta": "63:46:56", "grad_norm": 0.0076, "loss": 0.0561, "lr": "3.203e-05", "step": 6931, "steps": "23.76s,6931/16595" }, { "epoch": 2.0885808978608016, "eta": "63:14:20", "grad_norm": 0.006, "loss": 0.0514, "lr": "3.202e-05", "step": 6932, "steps": "23.56s,6932/16595" }, { "epoch": 2.0888821934317567, "eta": "63:41:19", "grad_norm": 0.0054, "loss": 0.0673, "lr": "3.202e-05", "step": 6933, "steps": "23.73s,6933/16595" }, { "epoch": 2.0891834890027114, "eta": "63:50:35", "grad_norm": 0.0056, "loss": 0.0759, "lr": "3.201e-05", "step": 6934, "steps": "23.79s,6934/16595" }, { "epoch": 2.0894847845736666, "eta": "64:15:57", "grad_norm": 0.0051, "loss": 0.0536, "lr": "3.201e-05", "step": 6935, "steps": "23.95s,6935/16595" }, { "epoch": 2.0897860801446217, "eta": "63:57:50", "grad_norm": 0.0056, "loss": 0.046, "lr": "3.201e-05", "step": 6936, "steps": "23.84s,6936/16595" }, { "epoch": 2.090087375715577, "eta": "64:11:55", "grad_norm": 0.0065, "loss": 0.0338, "lr": "3.200e-05", "step": 6937, "steps": "23.93s,6937/16595" }, { "epoch": 2.090388671286532, "eta": "63:03:56", "grad_norm": 0.0062, "loss": 0.0502, "lr": "3.200e-05", "step": 6938, "steps": "23.51s,6938/16595" }, { "epoch": 2.090689966857487, "eta": "63:56:39", "grad_norm": 0.0075, "loss": 0.0396, "lr": "3.199e-05", "step": 6939, "steps": "23.84s,6939/16595" }, { "epoch": 2.0909912624284424, "eta": "64:13:57", "grad_norm": 0.0051, "loss": 0.0701, "lr": "3.199e-05", "step": 6940, "steps": "23.95s,6940/16595" }, { "epoch": 2.0912925579993975, "eta": "63:31:43", "grad_norm": 0.0075, "loss": 0.0694, "lr": "3.198e-05", "step": 6941, "steps": "23.69s,6941/16595" }, { "epoch": 2.0915938535703527, "eta": "63:24:53", "grad_norm": 0.0062, "loss": 0.041, "lr": "3.198e-05", "step": 6942, "steps": "23.65s,6942/16595" }, { "epoch": 2.091895149141308, "eta": "63:50:14", "grad_norm": 0.0063, "loss": 0.0452, "lr": "3.197e-05", "step": 6943, "steps": "23.81s,6943/16595" }, { "epoch": 2.0921964447122625, "eta": "63:40:11", "grad_norm": 0.0057, "loss": 0.0409, "lr": "3.197e-05", "step": 6944, "steps": "23.75s,6944/16595" }, { "epoch": 2.0924977402832177, "eta": "63:15:40", "grad_norm": 0.0048, "loss": 0.0554, "lr": "3.196e-05", "step": 6945, "steps": "23.6s,6945/16595" }, { "epoch": 2.092799035854173, "eta": "63:41:00", "grad_norm": 0.0067, "loss": 0.0419, "lr": "3.196e-05", "step": 6946, "steps": "23.76s,6946/16595" }, { "epoch": 2.093100331425128, "eta": "63:11:39", "grad_norm": 0.0059, "loss": 0.046, "lr": "3.195e-05", "step": 6947, "steps": "23.58s,6947/16595" }, { "epoch": 2.093401626996083, "eta": "62:58:24", "grad_norm": 0.005, "loss": 0.0725, "lr": "3.195e-05", "step": 6948, "steps": "23.5s,6948/16595" }, { "epoch": 2.0937029225670383, "eta": "63:25:20", "grad_norm": 0.0052, "loss": 0.0438, "lr": "3.195e-05", "step": 6949, "steps": "23.67s,6949/16595" }, { "epoch": 2.0940042181379934, "eta": "63:04:03", "grad_norm": 0.0056, "loss": 0.0627, "lr": "3.194e-05", "step": 6950, "steps": "23.54s,6950/16595" }, { "epoch": 2.0943055137089486, "eta": "64:59:23", "grad_norm": 0.0055, "loss": 0.0472, "lr": "3.194e-05", "step": 6951, "steps": "24.26s,6951/16595" }, { "epoch": 2.0946068092799037, "eta": "63:24:09", "grad_norm": 0.006, "loss": 0.0536, "lr": "3.193e-05", "step": 6952, "steps": "23.67s,6952/16595" }, { "epoch": 2.094908104850859, "eta": "64:45:43", "grad_norm": 0.0055, "loss": 0.0429, "lr": "3.193e-05", "step": 6953, "steps": "24.18s,6953/16595" }, { "epoch": 2.0952094004218136, "eta": "63:05:41", "grad_norm": 0.0048, "loss": 0.0569, "lr": "3.192e-05", "step": 6954, "steps": "23.56s,6954/16595" }, { "epoch": 2.0955106959927687, "eta": "63:02:05", "grad_norm": 0.0052, "loss": 0.0533, "lr": "3.192e-05", "step": 6955, "steps": "23.54s,6955/16595" }, { "epoch": 2.095811991563724, "eta": "62:58:29", "grad_norm": 0.0053, "loss": 0.067, "lr": "3.191e-05", "step": 6956, "steps": "23.52s,6956/16595" }, { "epoch": 2.096113287134679, "eta": "63:59:08", "grad_norm": 0.0059, "loss": 0.0535, "lr": "3.191e-05", "step": 6957, "steps": "23.9s,6957/16595" }, { "epoch": 2.096414582705634, "eta": "62:56:05", "grad_norm": 0.0052, "loss": 0.0447, "lr": "3.190e-05", "step": 6958, "steps": "23.51s,6958/16595" }, { "epoch": 2.0967158782765893, "eta": "63:00:31", "grad_norm": 0.0052, "loss": 0.0512, "lr": "3.190e-05", "step": 6959, "steps": "23.54s,6959/16595" }, { "epoch": 2.0970171738475445, "eta": "63:00:07", "grad_norm": 0.0056, "loss": 0.0635, "lr": "3.190e-05", "step": 6960, "steps": "23.54s,6960/16595" }, { "epoch": 2.0973184694184996, "eta": "62:59:44", "grad_norm": 0.0051, "loss": 0.0655, "lr": "3.189e-05", "step": 6961, "steps": "23.54s,6961/16595" }, { "epoch": 2.097619764989455, "eta": "63:10:35", "grad_norm": 0.0061, "loss": 0.0452, "lr": "3.189e-05", "step": 6962, "steps": "23.61s,6962/16595" }, { "epoch": 2.09792106056041, "eta": "62:58:57", "grad_norm": 0.0057, "loss": 0.0457, "lr": "3.188e-05", "step": 6963, "steps": "23.54s,6963/16595" }, { "epoch": 2.098222356131365, "eta": "63:14:36", "grad_norm": 0.0053, "loss": 0.0408, "lr": "3.188e-05", "step": 6964, "steps": "23.64s,6964/16595" }, { "epoch": 2.09852365170232, "eta": "63:54:20", "grad_norm": 0.0055, "loss": 0.0531, "lr": "3.187e-05", "step": 6965, "steps": "23.89s,6965/16595" }, { "epoch": 2.098824947273275, "eta": "63:05:48", "grad_norm": 0.0051, "loss": 0.06, "lr": "3.187e-05", "step": 6966, "steps": "23.59s,6966/16595" }, { "epoch": 2.09912624284423, "eta": "64:01:34", "grad_norm": 0.006, "loss": 0.051, "lr": "3.186e-05", "step": 6967, "steps": "23.94s,6967/16595" }, { "epoch": 2.0994275384151853, "eta": "63:25:52", "grad_norm": 0.0049, "loss": 0.06, "lr": "3.186e-05", "step": 6968, "steps": "23.72s,6968/16595" }, { "epoch": 2.0997288339861404, "eta": "63:54:21", "grad_norm": 0.0051, "loss": 0.0632, "lr": "3.185e-05", "step": 6969, "steps": "23.9s,6969/16595" }, { "epoch": 2.1000301295570956, "eta": "64:06:47", "grad_norm": 0.0051, "loss": 0.0424, "lr": "3.185e-05", "step": 6970, "steps": "23.98s,6970/16595" }, { "epoch": 2.1003314251280507, "eta": "63:31:06", "grad_norm": 0.005, "loss": 0.042, "lr": "3.184e-05", "step": 6971, "steps": "23.76s,6971/16595" }, { "epoch": 2.100632720699006, "eta": "63:59:34", "grad_norm": 0.0065, "loss": 0.0579, "lr": "3.184e-05", "step": 6972, "steps": "23.94s,6972/16595" }, { "epoch": 2.100934016269961, "eta": "62:53:25", "grad_norm": 0.0057, "loss": 0.0507, "lr": "3.184e-05", "step": 6973, "steps": "23.53s,6973/16595" }, { "epoch": 2.1012353118409157, "eta": "62:54:38", "grad_norm": 0.0049, "loss": 0.0454, "lr": "3.183e-05", "step": 6974, "steps": "23.54s,6974/16595" }, { "epoch": 2.101536607411871, "eta": "63:13:29", "grad_norm": 0.0051, "loss": 0.0417, "lr": "3.183e-05", "step": 6975, "steps": "23.66s,6975/16595" }, { "epoch": 2.101837902982826, "eta": "63:25:55", "grad_norm": 0.005, "loss": 0.0521, "lr": "3.182e-05", "step": 6976, "steps": "23.74s,6976/16595" }, { "epoch": 2.102139198553781, "eta": "63:11:05", "grad_norm": 0.0053, "loss": 0.0416, "lr": "3.182e-05", "step": 6977, "steps": "23.65s,6977/16595" }, { "epoch": 2.1024404941247363, "eta": "63:18:42", "grad_norm": 0.008, "loss": 0.0408, "lr": "3.181e-05", "step": 6978, "steps": "23.7s,6978/16595" }, { "epoch": 2.1027417896956915, "eta": "64:11:12", "grad_norm": 0.0061, "loss": 0.034, "lr": "3.181e-05", "step": 6979, "steps": "24.03s,6979/16595" }, { "epoch": 2.1030430852666466, "eta": "63:53:10", "grad_norm": 0.0047, "loss": 0.0475, "lr": "3.180e-05", "step": 6980, "steps": "23.92s,6980/16595" }, { "epoch": 2.103344380837602, "eta": "63:52:46", "grad_norm": 0.0063, "loss": 0.0574, "lr": "3.180e-05", "step": 6981, "steps": "23.92s,6981/16595" }, { "epoch": 2.103645676408557, "eta": "63:33:09", "grad_norm": 0.0062, "loss": 0.0605, "lr": "3.179e-05", "step": 6982, "steps": "23.8s,6982/16595" }, { "epoch": 2.103946971979512, "eta": "63:56:47", "grad_norm": 0.0056, "loss": 0.0575, "lr": "3.179e-05", "step": 6983, "steps": "23.95s,6983/16595" }, { "epoch": 2.1042482675504672, "eta": "63:24:21", "grad_norm": 0.0065, "loss": 0.0536, "lr": "3.178e-05", "step": 6984, "steps": "23.75s,6984/16595" }, { "epoch": 2.104549563121422, "eta": "63:14:20", "grad_norm": 0.0052, "loss": 0.0664, "lr": "3.178e-05", "step": 6985, "steps": "23.69s,6985/16595" }, { "epoch": 2.104850858692377, "eta": "63:36:22", "grad_norm": 0.005, "loss": 0.0669, "lr": "3.178e-05", "step": 6986, "steps": "23.83s,6986/16595" }, { "epoch": 2.1051521542633322, "eta": "64:03:12", "grad_norm": 0.0047, "loss": 0.0653, "lr": "3.177e-05", "step": 6987, "steps": "24.0s,6987/16595" }, { "epoch": 2.1054534498342874, "eta": "62:53:56", "grad_norm": 0.0049, "loss": 0.0422, "lr": "3.177e-05", "step": 6988, "steps": "23.57s,6988/16595" }, { "epoch": 2.1057547454052425, "eta": "63:52:47", "grad_norm": 0.0054, "loss": 0.0551, "lr": "3.176e-05", "step": 6989, "steps": "23.94s,6989/16595" }, { "epoch": 2.1060560409761977, "eta": "63:33:11", "grad_norm": 0.005, "loss": 0.0452, "lr": "3.176e-05", "step": 6990, "steps": "23.82s,6990/16595" }, { "epoch": 2.106357336547153, "eta": "63:16:46", "grad_norm": 0.0056, "loss": 0.055, "lr": "3.175e-05", "step": 6991, "steps": "23.72s,6991/16595" }, { "epoch": 2.106658632118108, "eta": "63:33:59", "grad_norm": 0.0106, "loss": 0.0562, "lr": "3.175e-05", "step": 6992, "steps": "23.83s,6992/16595" }, { "epoch": 2.106959927689063, "eta": "63:33:35", "grad_norm": 0.0052, "loss": 0.057, "lr": "3.174e-05", "step": 6993, "steps": "23.83s,6993/16595" }, { "epoch": 2.107261223260018, "eta": "63:36:23", "grad_norm": 0.0057, "loss": 0.053, "lr": "3.174e-05", "step": 6994, "steps": "23.85s,6994/16595" }, { "epoch": 2.107562518830973, "eta": "63:20:00", "grad_norm": 0.0051, "loss": 0.0625, "lr": "3.173e-05", "step": 6995, "steps": "23.75s,6995/16595" }, { "epoch": 2.107863814401928, "eta": "63:24:24", "grad_norm": 0.0055, "loss": 0.0722, "lr": "3.173e-05", "step": 6996, "steps": "23.78s,6996/16595" }, { "epoch": 2.1081651099728833, "eta": "64:02:23", "grad_norm": 0.0045, "loss": 0.036, "lr": "3.173e-05", "step": 6997, "steps": "24.02s,6997/16595" }, { "epoch": 2.1084664055438385, "eta": "63:22:00", "grad_norm": 0.0069, "loss": 0.0433, "lr": "3.172e-05", "step": 6998, "steps": "23.77s,6998/16595" }, { "epoch": 2.1087677011147936, "eta": "64:03:11", "grad_norm": 0.0046, "loss": 0.0445, "lr": "3.172e-05", "step": 6999, "steps": "24.03s,6999/16595" }, { "epoch": 2.1090689966857488, "eta": "63:46:48", "grad_norm": 0.0053, "loss": 0.0507, "lr": "3.171e-05", "step": 7000, "steps": "23.93s,7000/16595" }, { "epoch": 2.109370292256704, "eta": "133:46:58", "grad_norm": 0.0055, "loss": 0.0548, "lr": "3.171e-05", "step": 7001, "steps": "50.2s,7001/16595" }, { "epoch": 2.109671587827659, "eta": "63:01:14", "grad_norm": 0.005, "loss": 0.0378, "lr": "3.170e-05", "step": 7002, "steps": "23.65s,7002/16595" }, { "epoch": 2.109972883398614, "eta": "63:23:13", "grad_norm": 0.0059, "loss": 0.0437, "lr": "3.170e-05", "step": 7003, "steps": "23.79s,7003/16595" }, { "epoch": 2.1102741789695694, "eta": "62:50:51", "grad_norm": 0.0055, "loss": 0.0326, "lr": "3.169e-05", "step": 7004, "steps": "23.59s,7004/16595" }, { "epoch": 2.110575474540524, "eta": "62:42:28", "grad_norm": 0.006, "loss": 0.0459, "lr": "3.169e-05", "step": 7005, "steps": "23.54s,7005/16595" }, { "epoch": 2.1108767701114792, "eta": "63:10:51", "grad_norm": 0.0061, "loss": 0.0588, "lr": "3.168e-05", "step": 7006, "steps": "23.72s,7006/16595" }, { "epoch": 2.1111780656824344, "eta": "63:36:01", "grad_norm": 0.0042, "loss": 0.0408, "lr": "3.168e-05", "step": 7007, "steps": "23.88s,7007/16595" }, { "epoch": 2.1114793612533895, "eta": "62:41:17", "grad_norm": 0.0057, "loss": 0.0564, "lr": "3.167e-05", "step": 7008, "steps": "23.54s,7008/16595" }, { "epoch": 2.1117806568243447, "eta": "63:27:14", "grad_norm": 0.0042, "loss": 0.0476, "lr": "3.167e-05", "step": 7009, "steps": "23.83s,7009/16595" }, { "epoch": 2.1120819523953, "eta": "63:39:37", "grad_norm": 0.0049, "loss": 0.0571, "lr": "3.167e-05", "step": 7010, "steps": "23.91s,7010/16595" }, { "epoch": 2.112383247966255, "eta": "62:44:54", "grad_norm": 0.0064, "loss": 0.0414, "lr": "3.166e-05", "step": 7011, "steps": "23.57s,7011/16595" }, { "epoch": 2.11268454353721, "eta": "62:31:44", "grad_norm": 0.0068, "loss": 0.0539, "lr": "3.166e-05", "step": 7012, "steps": "23.49s,7012/16595" }, { "epoch": 2.1129858391081653, "eta": "62:40:56", "grad_norm": 0.0051, "loss": 0.0467, "lr": "3.165e-05", "step": 7013, "steps": "23.55s,7013/16595" }, { "epoch": 2.11328713467912, "eta": "63:36:25", "grad_norm": 0.0047, "loss": 0.0556, "lr": "3.165e-05", "step": 7014, "steps": "23.9s,7014/16595" }, { "epoch": 2.113588430250075, "eta": "63:23:15", "grad_norm": 0.006, "loss": 0.0352, "lr": "3.164e-05", "step": 7015, "steps": "23.82s,7015/16595" }, { "epoch": 2.1138897258210303, "eta": "62:38:09", "grad_norm": 0.0059, "loss": 0.0445, "lr": "3.164e-05", "step": 7016, "steps": "23.54s,7016/16595" }, { "epoch": 2.1141910213919854, "eta": "62:36:10", "grad_norm": 0.0053, "loss": 0.0383, "lr": "3.163e-05", "step": 7017, "steps": "23.53s,7017/16595" }, { "epoch": 2.1144923169629406, "eta": "63:41:13", "grad_norm": 0.0053, "loss": 0.0563, "lr": "3.163e-05", "step": 7018, "steps": "23.94s,7018/16595" }, { "epoch": 2.1147936125338958, "eta": "63:47:12", "grad_norm": 0.0052, "loss": 0.0656, "lr": "3.162e-05", "step": 7019, "steps": "23.98s,7019/16595" }, { "epoch": 2.115094908104851, "eta": "64:01:10", "grad_norm": 0.0055, "loss": 0.0549, "lr": "3.162e-05", "step": 7020, "steps": "24.07s,7020/16595" }, { "epoch": 2.115396203675806, "eta": "63:00:08", "grad_norm": 0.0058, "loss": 0.0418, "lr": "3.161e-05", "step": 7021, "steps": "23.69s,7021/16595" }, { "epoch": 2.115697499246761, "eta": "63:07:43", "grad_norm": 0.0048, "loss": 0.0484, "lr": "3.161e-05", "step": 7022, "steps": "23.74s,7022/16595" }, { "epoch": 2.1159987948177164, "eta": "62:52:57", "grad_norm": 0.0052, "loss": 0.0573, "lr": "3.161e-05", "step": 7023, "steps": "23.65s,7023/16595" }, { "epoch": 2.1163000903886715, "eta": "62:31:49", "grad_norm": 0.0058, "loss": 0.0467, "lr": "3.160e-05", "step": 7024, "steps": "23.52s,7024/16595" }, { "epoch": 2.116601385959626, "eta": "62:36:13", "grad_norm": 0.0048, "loss": 0.0527, "lr": "3.160e-05", "step": 7025, "steps": "23.55s,7025/16595" }, { "epoch": 2.1169026815305814, "eta": "63:12:30", "grad_norm": 0.0048, "loss": 0.0484, "lr": "3.159e-05", "step": 7026, "steps": "23.78s,7026/16595" }, { "epoch": 2.1172039771015365, "eta": "63:44:00", "grad_norm": 0.0052, "loss": 0.0541, "lr": "3.159e-05", "step": 7027, "steps": "23.98s,7027/16595" }, { "epoch": 2.1175052726724917, "eta": "63:30:51", "grad_norm": 0.0047, "loss": 0.0526, "lr": "3.158e-05", "step": 7028, "steps": "23.9s,7028/16595" }, { "epoch": 2.117806568243447, "eta": "64:10:18", "grad_norm": 0.0059, "loss": 0.037, "lr": "3.158e-05", "step": 7029, "steps": "24.15s,7029/16595" }, { "epoch": 2.118107863814402, "eta": "62:39:02", "grad_norm": 0.0057, "loss": 0.0467, "lr": "3.157e-05", "step": 7030, "steps": "23.58s,7030/16595" }, { "epoch": 2.118409159385357, "eta": "63:08:56", "grad_norm": 0.0121, "loss": 0.047, "lr": "3.157e-05", "step": 7031, "steps": "23.77s,7031/16595" }, { "epoch": 2.1187104549563123, "eta": "63:08:32", "grad_norm": 0.006, "loss": 0.0559, "lr": "3.156e-05", "step": 7032, "steps": "23.77s,7032/16595" }, { "epoch": 2.1190117505272674, "eta": "62:31:29", "grad_norm": 0.0056, "loss": 0.0528, "lr": "3.156e-05", "step": 7033, "steps": "23.54s,7033/16595" }, { "epoch": 2.119313046098222, "eta": "62:11:58", "grad_norm": 0.0059, "loss": 0.0589, "lr": "3.156e-05", "step": 7034, "steps": "23.42s,7034/16595" }, { "epoch": 2.1196143416691773, "eta": "63:08:56", "grad_norm": 0.0067, "loss": 0.0397, "lr": "3.155e-05", "step": 7035, "steps": "23.78s,7035/16595" }, { "epoch": 2.1199156372401324, "eta": "63:05:21", "grad_norm": 0.006, "loss": 0.0507, "lr": "3.155e-05", "step": 7036, "steps": "23.76s,7036/16595" }, { "epoch": 2.1202169328110876, "eta": "62:31:30", "grad_norm": 0.0056, "loss": 0.0478, "lr": "3.154e-05", "step": 7037, "steps": "23.55s,7037/16595" }, { "epoch": 2.1205182283820427, "eta": "63:58:43", "grad_norm": 0.0054, "loss": 0.0556, "lr": "3.154e-05", "step": 7038, "steps": "24.1s,7038/16595" }, { "epoch": 2.120819523952998, "eta": "62:30:43", "grad_norm": 0.0047, "loss": 0.0689, "lr": "3.153e-05", "step": 7039, "steps": "23.55s,7039/16595" }, { "epoch": 2.121120819523953, "eta": "63:27:40", "grad_norm": 0.006, "loss": 0.0488, "lr": "3.153e-05", "step": 7040, "steps": "23.91s,7040/16595" }, { "epoch": 2.121422115094908, "eta": "62:45:52", "grad_norm": 0.0046, "loss": 0.0509, "lr": "3.152e-05", "step": 7041, "steps": "23.65s,7041/16595" }, { "epoch": 2.1217234106658633, "eta": "62:35:55", "grad_norm": 0.0058, "loss": 0.047, "lr": "3.152e-05", "step": 7042, "steps": "23.59s,7042/16595" }, { "epoch": 2.1220247062368185, "eta": "62:05:16", "grad_norm": 0.0045, "loss": 0.0662, "lr": "3.151e-05", "step": 7043, "steps": "23.4s,7043/16595" }, { "epoch": 2.1223260018077736, "eta": "62:55:49", "grad_norm": 0.0051, "loss": 0.0409, "lr": "3.151e-05", "step": 7044, "steps": "23.72s,7044/16595" }, { "epoch": 2.1226272973787284, "eta": "62:42:42", "grad_norm": 0.0069, "loss": 0.0472, "lr": "3.150e-05", "step": 7045, "steps": "23.64s,7045/16595" }, { "epoch": 2.1229285929496835, "eta": "63:18:54", "grad_norm": 0.007, "loss": 0.0562, "lr": "3.150e-05", "step": 7046, "steps": "23.87s,7046/16595" }, { "epoch": 2.1232298885206387, "eta": "62:16:27", "grad_norm": 0.0055, "loss": 0.0556, "lr": "3.150e-05", "step": 7047, "steps": "23.48s,7047/16595" }, { "epoch": 2.123531184091594, "eta": "63:27:39", "grad_norm": 0.0047, "loss": 0.0648, "lr": "3.149e-05", "step": 7048, "steps": "23.93s,7048/16595" }, { "epoch": 2.123832479662549, "eta": "63:00:12", "grad_norm": 0.0056, "loss": 0.0536, "lr": "3.149e-05", "step": 7049, "steps": "23.76s,7049/16595" }, { "epoch": 2.124133775233504, "eta": "62:45:30", "grad_norm": 0.0131, "loss": 0.0406, "lr": "3.148e-05", "step": 7050, "steps": "23.67s,7050/16595" }, { "epoch": 2.1244350708044593, "eta": "63:31:14", "grad_norm": 0.0056, "loss": 0.053, "lr": "3.148e-05", "step": 7051, "steps": "23.96s,7051/16595" }, { "epoch": 2.1247363663754144, "eta": "62:59:01", "grad_norm": 0.0064, "loss": 0.0488, "lr": "3.147e-05", "step": 7052, "steps": "23.76s,7052/16595" }, { "epoch": 2.1250376619463696, "eta": "63:12:56", "grad_norm": 0.0059, "loss": 0.07, "lr": "3.147e-05", "step": 7053, "steps": "23.85s,7053/16595" }, { "epoch": 2.1253389575173243, "eta": "62:51:52", "grad_norm": 0.005, "loss": 0.0469, "lr": "3.146e-05", "step": 7054, "steps": "23.72s,7054/16595" }, { "epoch": 2.1256402530882794, "eta": "63:04:12", "grad_norm": 0.0051, "loss": 0.0539, "lr": "3.146e-05", "step": 7055, "steps": "23.8s,7055/16595" }, { "epoch": 2.1259415486592346, "eta": "63:30:49", "grad_norm": 0.0069, "loss": 0.053, "lr": "3.145e-05", "step": 7056, "steps": "23.97s,7056/16595" }, { "epoch": 2.1262428442301897, "eta": "62:04:35", "grad_norm": 0.0055, "loss": 0.0427, "lr": "3.145e-05", "step": 7057, "steps": "23.43s,7057/16595" }, { "epoch": 2.126544139801145, "eta": "62:56:39", "grad_norm": 0.005, "loss": 0.0457, "lr": "3.144e-05", "step": 7058, "steps": "23.76s,7058/16595" }, { "epoch": 2.1268454353721, "eta": "63:07:22", "grad_norm": 0.0047, "loss": 0.0505, "lr": "3.144e-05", "step": 7059, "steps": "23.83s,7059/16595" }, { "epoch": 2.127146730943055, "eta": "62:19:18", "grad_norm": 0.0071, "loss": 0.0729, "lr": "3.144e-05", "step": 7060, "steps": "23.53s,7060/16595" }, { "epoch": 2.1274480265140103, "eta": "62:31:37", "grad_norm": 0.0063, "loss": 0.0444, "lr": "3.143e-05", "step": 7061, "steps": "23.61s,7061/16595" }, { "epoch": 2.1277493220849655, "eta": "63:06:11", "grad_norm": 0.0065, "loss": 0.0431, "lr": "3.143e-05", "step": 7062, "steps": "23.83s,7062/16595" }, { "epoch": 2.1280506176559206, "eta": "62:22:53", "grad_norm": 0.0058, "loss": 0.0666, "lr": "3.142e-05", "step": 7063, "steps": "23.56s,7063/16595" }, { "epoch": 2.128351913226876, "eta": "63:19:41", "grad_norm": 0.0041, "loss": 0.0541, "lr": "3.142e-05", "step": 7064, "steps": "23.92s,7064/16595" }, { "epoch": 2.1286532087978305, "eta": "63:24:03", "grad_norm": 0.0055, "loss": 0.0491, "lr": "3.141e-05", "step": 7065, "steps": "23.95s,7065/16595" }, { "epoch": 2.1289545043687856, "eta": "63:22:04", "grad_norm": 0.0049, "loss": 0.0519, "lr": "3.141e-05", "step": 7066, "steps": "23.94s,7066/16595" }, { "epoch": 2.129255799939741, "eta": "63:08:58", "grad_norm": 0.0051, "loss": 0.0424, "lr": "3.140e-05", "step": 7067, "steps": "23.86s,7067/16595" }, { "epoch": 2.129557095510696, "eta": "63:21:16", "grad_norm": 0.0053, "loss": 0.0542, "lr": "3.140e-05", "step": 7068, "steps": "23.94s,7068/16595" }, { "epoch": 2.129858391081651, "eta": "62:49:07", "grad_norm": 0.0052, "loss": 0.0339, "lr": "3.139e-05", "step": 7069, "steps": "23.74s,7069/16595" }, { "epoch": 2.1301596866526062, "eta": "62:23:19", "grad_norm": 0.0056, "loss": 0.0482, "lr": "3.139e-05", "step": 7070, "steps": "23.58s,7070/16595" }, { "epoch": 2.1304609822235614, "eta": "62:21:20", "grad_norm": 0.0053, "loss": 0.0501, "lr": "3.138e-05", "step": 7071, "steps": "23.57s,7071/16595" }, { "epoch": 2.1307622777945165, "eta": "62:11:25", "grad_norm": 0.0045, "loss": 0.0411, "lr": "3.138e-05", "step": 7072, "steps": "23.51s,7072/16595" }, { "epoch": 2.1310635733654717, "eta": "62:52:17", "grad_norm": 0.0046, "loss": 0.0486, "lr": "3.138e-05", "step": 7073, "steps": "23.77s,7073/16595" }, { "epoch": 2.1313648689364264, "eta": "62:58:15", "grad_norm": 0.0058, "loss": 0.0562, "lr": "3.137e-05", "step": 7074, "steps": "23.81s,7074/16595" }, { "epoch": 2.1316661645073816, "eta": "62:37:13", "grad_norm": 0.0053, "loss": 0.0551, "lr": "3.137e-05", "step": 7075, "steps": "23.68s,7075/16595" }, { "epoch": 2.1319674600783367, "eta": "62:38:25", "grad_norm": 0.0076, "loss": 0.0592, "lr": "3.136e-05", "step": 7076, "steps": "23.69s,7076/16595" }, { "epoch": 2.132268755649292, "eta": "63:09:45", "grad_norm": 0.0054, "loss": 0.0499, "lr": "3.136e-05", "step": 7077, "steps": "23.89s,7077/16595" }, { "epoch": 2.132570051220247, "eta": "62:40:48", "grad_norm": 0.0056, "loss": 0.0489, "lr": "3.135e-05", "step": 7078, "steps": "23.71s,7078/16595" }, { "epoch": 2.132871346791202, "eta": "63:08:57", "grad_norm": 0.0062, "loss": 0.0596, "lr": "3.135e-05", "step": 7079, "steps": "23.89s,7079/16595" }, { "epoch": 2.1331726423621573, "eta": "62:47:56", "grad_norm": 0.0059, "loss": 0.0495, "lr": "3.134e-05", "step": 7080, "steps": "23.76s,7080/16595" }, { "epoch": 2.1334739379331125, "eta": "63:04:59", "grad_norm": 0.0061, "loss": 0.0498, "lr": "3.134e-05", "step": 7081, "steps": "23.87s,7081/16595" }, { "epoch": 2.1337752335040676, "eta": "62:42:23", "grad_norm": 0.005, "loss": 0.0628, "lr": "3.133e-05", "step": 7082, "steps": "23.73s,7082/16595" }, { "epoch": 2.1340765290750228, "eta": "62:07:07", "grad_norm": 0.006, "loss": 0.05, "lr": "3.133e-05", "step": 7083, "steps": "23.51s,7083/16595" }, { "epoch": 2.134377824645978, "eta": "63:11:43", "grad_norm": 0.0056, "loss": 0.0491, "lr": "3.132e-05", "step": 7084, "steps": "23.92s,7084/16595" }, { "epoch": 2.1346791202169326, "eta": "62:20:36", "grad_norm": 0.0057, "loss": 0.0412, "lr": "3.132e-05", "step": 7085, "steps": "23.6s,7085/16595" }, { "epoch": 2.1349804157878878, "eta": "63:10:55", "grad_norm": 0.0057, "loss": 0.0517, "lr": "3.132e-05", "step": 7086, "steps": "23.92s,7086/16595" }, { "epoch": 2.135281711358843, "eta": "62:19:48", "grad_norm": 0.0055, "loss": 0.0611, "lr": "3.131e-05", "step": 7087, "steps": "23.6s,7087/16595" }, { "epoch": 2.135583006929798, "eta": "62:32:05", "grad_norm": 0.0059, "loss": 0.0433, "lr": "3.131e-05", "step": 7088, "steps": "23.68s,7088/16595" }, { "epoch": 2.1358843025007532, "eta": "63:16:03", "grad_norm": 0.0046, "loss": 0.0588, "lr": "3.130e-05", "step": 7089, "steps": "23.96s,7089/16595" }, { "epoch": 2.1361855980717084, "eta": "62:29:43", "grad_norm": 0.0061, "loss": 0.0595, "lr": "3.130e-05", "step": 7090, "steps": "23.67s,7090/16595" }, { "epoch": 2.1364868936426635, "eta": "63:05:45", "grad_norm": 0.0056, "loss": 0.0494, "lr": "3.129e-05", "step": 7091, "steps": "23.9s,7091/16595" }, { "epoch": 2.1367881892136187, "eta": "62:25:45", "grad_norm": 0.0056, "loss": 0.0497, "lr": "3.129e-05", "step": 7092, "steps": "23.65s,7092/16595" }, { "epoch": 2.137089484784574, "eta": "62:45:57", "grad_norm": 0.0056, "loss": 0.0575, "lr": "3.128e-05", "step": 7093, "steps": "23.78s,7093/16595" }, { "epoch": 2.1373907803555285, "eta": "63:17:13", "grad_norm": 0.0053, "loss": 0.0554, "lr": "3.128e-05", "step": 7094, "steps": "23.98s,7094/16595" }, { "epoch": 2.1376920759264837, "eta": "62:19:50", "grad_norm": 0.0047, "loss": 0.0469, "lr": "3.127e-05", "step": 7095, "steps": "23.62s,7095/16595" }, { "epoch": 2.137993371497439, "eta": "62:32:06", "grad_norm": 0.006, "loss": 0.0638, "lr": "3.127e-05", "step": 7096, "steps": "23.7s,7096/16595" }, { "epoch": 2.138294667068394, "eta": "62:53:52", "grad_norm": 0.0082, "loss": 0.0401, "lr": "3.126e-05", "step": 7097, "steps": "23.84s,7097/16595" }, { "epoch": 2.138595962639349, "eta": "63:07:43", "grad_norm": 0.005, "loss": 0.0666, "lr": "3.126e-05", "step": 7098, "steps": "23.93s,7098/16595" }, { "epoch": 2.1388972582103043, "eta": "62:40:24", "grad_norm": 0.0057, "loss": 0.0648, "lr": "3.126e-05", "step": 7099, "steps": "23.76s,7099/16595" }, { "epoch": 2.1391985537812594, "eta": "63:02:10", "grad_norm": 0.0046, "loss": 0.0412, "lr": "3.125e-05", "step": 7100, "steps": "23.9s,7100/16595" }, { "epoch": 2.1394998493522146, "eta": "62:14:18", "grad_norm": 0.0067, "loss": 0.0623, "lr": "3.125e-05", "step": 7101, "steps": "23.6s,7101/16595" }, { "epoch": 2.1398011449231698, "eta": "63:09:17", "grad_norm": 0.008, "loss": 0.0553, "lr": "3.124e-05", "step": 7102, "steps": "23.95s,7102/16595" }, { "epoch": 2.140102440494125, "eta": "62:46:44", "grad_norm": 0.0052, "loss": 0.0563, "lr": "3.124e-05", "step": 7103, "steps": "23.81s,7103/16595" }, { "epoch": 2.14040373606508, "eta": "62:24:11", "grad_norm": 0.0059, "loss": 0.0621, "lr": "3.123e-05", "step": 7104, "steps": "23.67s,7104/16595" }, { "epoch": 2.1407050316360348, "eta": "62:23:48", "grad_norm": 0.006, "loss": 0.0452, "lr": "3.123e-05", "step": 7105, "steps": "23.67s,7105/16595" }, { "epoch": 2.14100632720699, "eta": "62:23:24", "grad_norm": 0.0099, "loss": 0.0491, "lr": "3.122e-05", "step": 7106, "steps": "23.67s,7106/16595" }, { "epoch": 2.141307622777945, "eta": "62:56:13", "grad_norm": 0.0052, "loss": 0.0602, "lr": "3.122e-05", "step": 7107, "steps": "23.88s,7107/16595" }, { "epoch": 2.1416089183489, "eta": "62:16:17", "grad_norm": 0.0052, "loss": 0.0494, "lr": "3.121e-05", "step": 7108, "steps": "23.63s,7108/16595" }, { "epoch": 2.1419102139198554, "eta": "62:30:07", "grad_norm": 0.0056, "loss": 0.0505, "lr": "3.121e-05", "step": 7109, "steps": "23.72s,7109/16595" }, { "epoch": 2.1422115094908105, "eta": "63:06:05", "grad_norm": 0.0135, "loss": 0.0505, "lr": "3.120e-05", "step": 7110, "steps": "23.95s,7110/16595" }, { "epoch": 2.1425128050617657, "eta": "61:59:18", "grad_norm": 0.0056, "loss": 0.0518, "lr": "3.120e-05", "step": 7111, "steps": "23.53s,7111/16595" }, { "epoch": 2.142814100632721, "eta": "63:00:33", "grad_norm": 0.0052, "loss": 0.0503, "lr": "3.120e-05", "step": 7112, "steps": "23.92s,7112/16595" }, { "epoch": 2.143115396203676, "eta": "63:04:53", "grad_norm": 0.0048, "loss": 0.0481, "lr": "3.119e-05", "step": 7113, "steps": "23.95s,7113/16595" }, { "epoch": 2.1434166917746307, "eta": "62:55:01", "grad_norm": 0.0052, "loss": 0.0567, "lr": "3.119e-05", "step": 7114, "steps": "23.89s,7114/16595" }, { "epoch": 2.143717987345586, "eta": "63:16:44", "grad_norm": 0.0053, "loss": 0.047, "lr": "3.118e-05", "step": 7115, "steps": "24.03s,7115/16595" }, { "epoch": 2.144019282916541, "eta": "62:43:09", "grad_norm": 0.0058, "loss": 0.0463, "lr": "3.118e-05", "step": 7116, "steps": "23.82s,7116/16595" }, { "epoch": 2.144320578487496, "eta": "62:26:58", "grad_norm": 0.0061, "loss": 0.0662, "lr": "3.117e-05", "step": 7117, "steps": "23.72s,7117/16595" }, { "epoch": 2.1446218740584513, "eta": "63:48:42", "grad_norm": 0.0053, "loss": 0.0573, "lr": "3.117e-05", "step": 7118, "steps": "24.24s,7118/16595" }, { "epoch": 2.1449231696294064, "eta": "62:59:20", "grad_norm": 0.0046, "loss": 0.0618, "lr": "3.116e-05", "step": 7119, "steps": "23.93s,7119/16595" }, { "epoch": 2.1452244652003616, "eta": "62:55:47", "grad_norm": 0.0063, "loss": 0.0426, "lr": "3.116e-05", "step": 7120, "steps": "23.91s,7120/16595" }, { "epoch": 2.1455257607713167, "eta": "63:20:39", "grad_norm": 0.0071, "loss": 0.0585, "lr": "3.115e-05", "step": 7121, "steps": "24.07s,7121/16595" }, { "epoch": 2.145827056342272, "eta": "62:20:15", "grad_norm": 0.0052, "loss": 0.0589, "lr": "3.115e-05", "step": 7122, "steps": "23.69s,7122/16595" }, { "epoch": 2.146128351913227, "eta": "62:04:04", "grad_norm": 0.0054, "loss": 0.0395, "lr": "3.114e-05", "step": 7123, "steps": "23.59s,7123/16595" }, { "epoch": 2.146429647484182, "eta": "62:24:12", "grad_norm": 0.0054, "loss": 0.0533, "lr": "3.114e-05", "step": 7124, "steps": "23.72s,7124/16595" }, { "epoch": 2.146730943055137, "eta": "62:38:00", "grad_norm": 0.0042, "loss": 0.0507, "lr": "3.114e-05", "step": 7125, "steps": "23.81s,7125/16595" }, { "epoch": 2.147032238626092, "eta": "62:20:15", "grad_norm": 0.0044, "loss": 0.0546, "lr": "3.113e-05", "step": 7126, "steps": "23.7s,7126/16595" }, { "epoch": 2.147333534197047, "eta": "62:24:35", "grad_norm": 0.0047, "loss": 0.0361, "lr": "3.113e-05", "step": 7127, "steps": "23.73s,7127/16595" }, { "epoch": 2.1476348297680024, "eta": "62:33:39", "grad_norm": 0.0064, "loss": 0.0568, "lr": "3.112e-05", "step": 7128, "steps": "23.79s,7128/16595" }, { "epoch": 2.1479361253389575, "eta": "61:23:51", "grad_norm": 0.0043, "loss": 0.0473, "lr": "3.112e-05", "step": 7129, "steps": "23.35s,7129/16595" }, { "epoch": 2.1482374209099127, "eta": "62:04:28", "grad_norm": 0.0044, "loss": 0.0519, "lr": "3.111e-05", "step": 7130, "steps": "23.61s,7130/16595" }, { "epoch": 2.148538716480868, "eta": "62:00:55", "grad_norm": 0.0063, "loss": 0.064, "lr": "3.111e-05", "step": 7131, "steps": "23.59s,7131/16595" }, { "epoch": 2.148840012051823, "eta": "62:49:25", "grad_norm": 0.0052, "loss": 0.0423, "lr": "3.110e-05", "step": 7132, "steps": "23.9s,7132/16595" }, { "epoch": 2.149141307622778, "eta": "62:30:06", "grad_norm": 0.0052, "loss": 0.0589, "lr": "3.110e-05", "step": 7133, "steps": "23.78s,7133/16595" }, { "epoch": 2.1494426031937333, "eta": "63:17:00", "grad_norm": 0.005, "loss": 0.0499, "lr": "3.109e-05", "step": 7134, "steps": "24.08s,7134/16595" }, { "epoch": 2.149743898764688, "eta": "61:56:12", "grad_norm": 0.0064, "loss": 0.0607, "lr": "3.109e-05", "step": 7135, "steps": "23.57s,7135/16595" }, { "epoch": 2.150045194335643, "eta": "62:08:25", "grad_norm": 0.0062, "loss": 0.0553, "lr": "3.108e-05", "step": 7136, "steps": "23.65s,7136/16595" }, { "epoch": 2.1503464899065983, "eta": "62:49:00", "grad_norm": 0.0049, "loss": 0.054, "lr": "3.108e-05", "step": 7137, "steps": "23.91s,7137/16595" }, { "epoch": 2.1506477854775534, "eta": "62:09:12", "grad_norm": 0.0048, "loss": 0.0571, "lr": "3.107e-05", "step": 7138, "steps": "23.66s,7138/16595" }, { "epoch": 2.1509490810485086, "eta": "62:30:52", "grad_norm": 0.0054, "loss": 0.0565, "lr": "3.107e-05", "step": 7139, "steps": "23.8s,7139/16595" }, { "epoch": 2.1512503766194637, "eta": "62:44:39", "grad_norm": 0.0046, "loss": 0.0523, "lr": "3.107e-05", "step": 7140, "steps": "23.89s,7140/16595" }, { "epoch": 2.151551672190419, "eta": "63:01:36", "grad_norm": 0.0049, "loss": 0.0449, "lr": "3.106e-05", "step": 7141, "steps": "24.0s,7141/16595" }, { "epoch": 2.151852967761374, "eta": "62:48:35", "grad_norm": 0.0066, "loss": 0.0497, "lr": "3.106e-05", "step": 7142, "steps": "23.92s,7142/16595" }, { "epoch": 2.152154263332329, "eta": "61:59:21", "grad_norm": 0.0085, "loss": 0.0453, "lr": "3.105e-05", "step": 7143, "steps": "23.61s,7143/16595" }, { "epoch": 2.1524555589032843, "eta": "63:28:45", "grad_norm": 0.005, "loss": 0.0444, "lr": "3.105e-05", "step": 7144, "steps": "24.18s,7144/16595" }, { "epoch": 2.152756854474239, "eta": "62:53:42", "grad_norm": 0.0054, "loss": 0.035, "lr": "3.104e-05", "step": 7145, "steps": "23.96s,7145/16595" }, { "epoch": 2.153058150045194, "eta": "63:13:46", "grad_norm": 0.0057, "loss": 0.0506, "lr": "3.104e-05", "step": 7146, "steps": "24.09s,7146/16595" }, { "epoch": 2.1533594456161493, "eta": "61:57:47", "grad_norm": 0.0057, "loss": 0.0623, "lr": "3.103e-05", "step": 7147, "steps": "23.61s,7147/16595" }, { "epoch": 2.1536607411871045, "eta": "63:05:05", "grad_norm": 0.005, "loss": 0.0527, "lr": "3.103e-05", "step": 7148, "steps": "24.04s,7148/16595" }, { "epoch": 2.1539620367580596, "eta": "62:06:26", "grad_norm": 0.0062, "loss": 0.0588, "lr": "3.102e-05", "step": 7149, "steps": "23.67s,7149/16595" }, { "epoch": 2.154263332329015, "eta": "63:04:17", "grad_norm": 0.0058, "loss": 0.0403, "lr": "3.102e-05", "step": 7150, "steps": "24.04s,7150/16595" }, { "epoch": 2.15456462789997, "eta": "62:19:49", "grad_norm": 0.0054, "loss": 0.0547, "lr": "3.101e-05", "step": 7151, "steps": "23.76s,7151/16595" }, { "epoch": 2.154865923470925, "eta": "63:11:21", "grad_norm": 0.0053, "loss": 0.043, "lr": "3.101e-05", "step": 7152, "steps": "24.09s,7152/16595" }, { "epoch": 2.1551672190418802, "eta": "62:28:28", "grad_norm": 0.0056, "loss": 0.0491, "lr": "3.101e-05", "step": 7153, "steps": "23.82s,7153/16595" }, { "epoch": 2.1554685146128354, "eta": "63:54:37", "grad_norm": 0.0064, "loss": 0.0428, "lr": "3.100e-05", "step": 7154, "steps": "24.37s,7154/16595" }, { "epoch": 2.15576981018379, "eta": "62:19:48", "grad_norm": 0.0059, "loss": 0.0571, "lr": "3.100e-05", "step": 7155, "steps": "23.77s,7155/16595" }, { "epoch": 2.1560711057547453, "eta": "63:44:22", "grad_norm": 0.0063, "loss": 0.048, "lr": "3.099e-05", "step": 7156, "steps": "24.31s,7156/16595" }, { "epoch": 2.1563724013257004, "eta": "62:31:36", "grad_norm": 0.0046, "loss": 0.0501, "lr": "3.099e-05", "step": 7157, "steps": "23.85s,7157/16595" }, { "epoch": 2.1566736968966556, "eta": "63:08:57", "grad_norm": 0.0052, "loss": 0.0568, "lr": "3.098e-05", "step": 7158, "steps": "24.09s,7158/16595" }, { "epoch": 2.1569749924676107, "eta": "63:02:15", "grad_norm": 0.0064, "loss": 0.036, "lr": "3.098e-05", "step": 7159, "steps": "24.05s,7159/16595" }, { "epoch": 2.157276288038566, "eta": "62:11:32", "grad_norm": 0.0042, "loss": 0.0646, "lr": "3.097e-05", "step": 7160, "steps": "23.73s,7160/16595" }, { "epoch": 2.157577583609521, "eta": "62:23:43", "grad_norm": 0.0059, "loss": 0.0439, "lr": "3.097e-05", "step": 7161, "steps": "23.81s,7161/16595" }, { "epoch": 2.157878879180476, "eta": "62:29:37", "grad_norm": 0.0051, "loss": 0.0547, "lr": "3.096e-05", "step": 7162, "steps": "23.85s,7162/16595" }, { "epoch": 2.1581801747514313, "eta": "62:16:38", "grad_norm": 0.0068, "loss": 0.0348, "lr": "3.096e-05", "step": 7163, "steps": "23.77s,7163/16595" }, { "epoch": 2.1584814703223865, "eta": "62:19:23", "grad_norm": 0.0055, "loss": 0.046, "lr": "3.095e-05", "step": 7164, "steps": "23.79s,7164/16595" }, { "epoch": 2.158782765893341, "eta": "62:23:42", "grad_norm": 0.0055, "loss": 0.0544, "lr": "3.095e-05", "step": 7165, "steps": "23.82s,7165/16595" }, { "epoch": 2.1590840614642963, "eta": "62:10:44", "grad_norm": 0.0048, "loss": 0.0421, "lr": "3.095e-05", "step": 7166, "steps": "23.74s,7166/16595" }, { "epoch": 2.1593853570352515, "eta": "62:38:37", "grad_norm": 0.006, "loss": 0.0563, "lr": "3.094e-05", "step": 7167, "steps": "23.92s,7167/16595" }, { "epoch": 2.1596866526062066, "eta": "62:11:31", "grad_norm": 0.0051, "loss": 0.0512, "lr": "3.094e-05", "step": 7168, "steps": "23.75s,7168/16595" }, { "epoch": 2.1599879481771618, "eta": "62:01:41", "grad_norm": 0.0048, "loss": 0.0359, "lr": "3.093e-05", "step": 7169, "steps": "23.69s,7169/16595" }, { "epoch": 2.160289243748117, "eta": "62:39:00", "grad_norm": 0.0045, "loss": 0.0602, "lr": "3.093e-05", "step": 7170, "steps": "23.93s,7170/16595" }, { "epoch": 2.160590539319072, "eta": "62:41:44", "grad_norm": 0.007, "loss": 0.0524, "lr": "3.092e-05", "step": 7171, "steps": "23.95s,7171/16595" }, { "epoch": 2.1608918348900272, "eta": "61:58:56", "grad_norm": 0.0053, "loss": 0.0535, "lr": "3.092e-05", "step": 7172, "steps": "23.68s,7172/16595" }, { "epoch": 2.1611931304609824, "eta": "61:52:16", "grad_norm": 0.0061, "loss": 0.0571, "lr": "3.091e-05", "step": 7173, "steps": "23.64s,7173/16595" }, { "epoch": 2.1614944260319375, "eta": "62:01:17", "grad_norm": 0.0047, "loss": 0.0529, "lr": "3.091e-05", "step": 7174, "steps": "23.7s,7174/16595" }, { "epoch": 2.1617957216028922, "eta": "62:32:18", "grad_norm": 0.0053, "loss": 0.056, "lr": "3.090e-05", "step": 7175, "steps": "23.9s,7175/16595" }, { "epoch": 2.1620970171738474, "eta": "62:41:19", "grad_norm": 0.0055, "loss": 0.0611, "lr": "3.090e-05", "step": 7176, "steps": "23.96s,7176/16595" }, { "epoch": 2.1623983127448025, "eta": "62:20:30", "grad_norm": 0.0068, "loss": 0.0492, "lr": "3.089e-05", "step": 7177, "steps": "23.83s,7177/16595" }, { "epoch": 2.1626996083157577, "eta": "62:42:05", "grad_norm": 0.0056, "loss": 0.0352, "lr": "3.089e-05", "step": 7178, "steps": "23.97s,7178/16595" }, { "epoch": 2.163000903886713, "eta": "62:10:18", "grad_norm": 0.0062, "loss": 0.0493, "lr": "3.089e-05", "step": 7179, "steps": "23.77s,7179/16595" }, { "epoch": 2.163302199457668, "eta": "62:31:52", "grad_norm": 0.0048, "loss": 0.0674, "lr": "3.088e-05", "step": 7180, "steps": "23.91s,7180/16595" }, { "epoch": 2.163603495028623, "eta": "61:31:51", "grad_norm": 0.0065, "loss": 0.0391, "lr": "3.088e-05", "step": 7181, "steps": "23.53s,7181/16595" }, { "epoch": 2.1639047905995783, "eta": "61:59:42", "grad_norm": 0.0054, "loss": 0.0671, "lr": "3.087e-05", "step": 7182, "steps": "23.71s,7182/16595" }, { "epoch": 2.1642060861705334, "eta": "61:38:54", "grad_norm": 0.0061, "loss": 0.051, "lr": "3.087e-05", "step": 7183, "steps": "23.58s,7183/16595" }, { "epoch": 2.1645073817414886, "eta": "61:38:31", "grad_norm": 0.005, "loss": 0.0605, "lr": "3.086e-05", "step": 7184, "steps": "23.58s,7184/16595" }, { "epoch": 2.1648086773124433, "eta": "62:17:20", "grad_norm": 0.0046, "loss": 0.0601, "lr": "3.086e-05", "step": 7185, "steps": "23.83s,7185/16595" }, { "epoch": 2.1651099728833985, "eta": "62:31:03", "grad_norm": 0.0048, "loss": 0.0546, "lr": "3.085e-05", "step": 7186, "steps": "23.92s,7186/16595" }, { "epoch": 2.1654112684543536, "eta": "62:22:48", "grad_norm": 0.0052, "loss": 0.0491, "lr": "3.085e-05", "step": 7187, "steps": "23.87s,7187/16595" }, { "epoch": 2.1657125640253088, "eta": "62:00:28", "grad_norm": 0.0045, "loss": 0.0524, "lr": "3.084e-05", "step": 7188, "steps": "23.73s,7188/16595" }, { "epoch": 2.166013859596264, "eta": "61:56:56", "grad_norm": 0.0047, "loss": 0.05, "lr": "3.084e-05", "step": 7189, "steps": "23.71s,7189/16595" }, { "epoch": 2.166315155167219, "eta": "61:58:06", "grad_norm": 0.0067, "loss": 0.0667, "lr": "3.083e-05", "step": 7190, "steps": "23.72s,7190/16595" }, { "epoch": 2.166616450738174, "eta": "61:24:48", "grad_norm": 0.0084, "loss": 0.047, "lr": "3.083e-05", "step": 7191, "steps": "23.51s,7191/16595" }, { "epoch": 2.1669177463091294, "eta": "61:32:14", "grad_norm": 0.0052, "loss": 0.0538, "lr": "3.082e-05", "step": 7192, "steps": "23.56s,7192/16595" }, { "epoch": 2.1672190418800845, "eta": "62:37:39", "grad_norm": 0.006, "loss": 0.0485, "lr": "3.082e-05", "step": 7193, "steps": "23.98s,7193/16595" }, { "epoch": 2.1675203374510397, "eta": "62:15:19", "grad_norm": 0.0058, "loss": 0.0538, "lr": "3.082e-05", "step": 7194, "steps": "23.84s,7194/16595" }, { "epoch": 2.1678216330219944, "eta": "62:00:50", "grad_norm": 0.0059, "loss": 0.0656, "lr": "3.081e-05", "step": 7195, "steps": "23.75s,7195/16595" }, { "epoch": 2.1681229285929495, "eta": "62:02:00", "grad_norm": 0.0052, "loss": 0.0521, "lr": "3.081e-05", "step": 7196, "steps": "23.76s,7196/16595" }, { "epoch": 2.1684242241639047, "eta": "61:52:12", "grad_norm": 0.0058, "loss": 0.0583, "lr": "3.080e-05", "step": 7197, "steps": "23.7s,7197/16595" }, { "epoch": 2.16872551973486, "eta": "62:43:29", "grad_norm": 0.0049, "loss": 0.0623, "lr": "3.080e-05", "step": 7198, "steps": "24.03s,7198/16595" }, { "epoch": 2.169026815305815, "eta": "62:11:46", "grad_norm": 0.0063, "loss": 0.0521, "lr": "3.079e-05", "step": 7199, "steps": "23.83s,7199/16595" }, { "epoch": 2.16932811087677, "eta": "61:57:17", "grad_norm": 0.0047, "loss": 0.0627, "lr": "3.079e-05", "step": 7200, "steps": "23.74s,7200/16595" }, { "epoch": 2.1696294064477253, "eta": "131:35:39", "grad_norm": 0.0056, "loss": 0.0578, "lr": "3.078e-05", "step": 7201, "steps": "50.43s,7201/16595" }, { "epoch": 2.1699307020186804, "eta": "60:35:05", "grad_norm": 0.0052, "loss": 0.0669, "lr": "3.078e-05", "step": 7202, "steps": "23.22s,7202/16595" }, { "epoch": 2.1702319975896356, "eta": "61:40:26", "grad_norm": 0.005, "loss": 0.037, "lr": "3.077e-05", "step": 7203, "steps": "23.64s,7203/16595" }, { "epoch": 2.1705332931605907, "eta": "62:08:13", "grad_norm": 0.0054, "loss": 0.0496, "lr": "3.077e-05", "step": 7204, "steps": "23.82s,7204/16595" }, { "epoch": 2.1708345887315454, "eta": "61:36:31", "grad_norm": 0.005, "loss": 0.0328, "lr": "3.076e-05", "step": 7205, "steps": "23.62s,7205/16595" }, { "epoch": 2.1711358843025006, "eta": "62:19:57", "grad_norm": 0.0056, "loss": 0.0598, "lr": "3.076e-05", "step": 7206, "steps": "23.9s,7206/16595" }, { "epoch": 2.1714371798734557, "eta": "61:56:05", "grad_norm": 0.0052, "loss": 0.0614, "lr": "3.076e-05", "step": 7207, "steps": "23.75s,7207/16595" }, { "epoch": 2.171738475444411, "eta": "62:28:32", "grad_norm": 0.006, "loss": 0.0595, "lr": "3.075e-05", "step": 7208, "steps": "23.96s,7208/16595" }, { "epoch": 2.172039771015366, "eta": "62:07:48", "grad_norm": 0.0049, "loss": 0.0617, "lr": "3.075e-05", "step": 7209, "steps": "23.83s,7209/16595" }, { "epoch": 2.172341066586321, "eta": "62:01:09", "grad_norm": 0.0056, "loss": 0.0582, "lr": "3.074e-05", "step": 7210, "steps": "23.79s,7210/16595" }, { "epoch": 2.1726423621572764, "eta": "62:13:16", "grad_norm": 0.0076, "loss": 0.0453, "lr": "3.074e-05", "step": 7211, "steps": "23.87s,7211/16595" }, { "epoch": 2.1729436577282315, "eta": "62:03:29", "grad_norm": 0.0057, "loss": 0.0613, "lr": "3.073e-05", "step": 7212, "steps": "23.81s,7212/16595" }, { "epoch": 2.1732449532991867, "eta": "62:10:54", "grad_norm": 0.0045, "loss": 0.0565, "lr": "3.073e-05", "step": 7213, "steps": "23.86s,7213/16595" }, { "epoch": 2.173546248870142, "eta": "62:26:08", "grad_norm": 0.0051, "loss": 0.0586, "lr": "3.072e-05", "step": 7214, "steps": "23.96s,7214/16595" }, { "epoch": 2.1738475444410965, "eta": "61:10:42", "grad_norm": 0.0056, "loss": 0.0654, "lr": "3.072e-05", "step": 7215, "steps": "23.48s,7215/16595" }, { "epoch": 2.1741488400120517, "eta": "62:08:09", "grad_norm": 0.0061, "loss": 0.0551, "lr": "3.071e-05", "step": 7216, "steps": "23.85s,7216/16595" }, { "epoch": 2.174450135583007, "eta": "62:21:49", "grad_norm": 0.0055, "loss": 0.0467, "lr": "3.071e-05", "step": 7217, "steps": "23.94s,7217/16595" }, { "epoch": 2.174751431153962, "eta": "61:18:54", "grad_norm": 0.0065, "loss": 0.0415, "lr": "3.070e-05", "step": 7218, "steps": "23.54s,7218/16595" }, { "epoch": 2.175052726724917, "eta": "61:37:16", "grad_norm": 0.0059, "loss": 0.057, "lr": "3.070e-05", "step": 7219, "steps": "23.66s,7219/16595" }, { "epoch": 2.1753540222958723, "eta": "62:00:18", "grad_norm": 0.0054, "loss": 0.0405, "lr": "3.069e-05", "step": 7220, "steps": "23.81s,7220/16595" }, { "epoch": 2.1756553178668274, "eta": "61:16:10", "grad_norm": 0.0065, "loss": 0.0497, "lr": "3.069e-05", "step": 7221, "steps": "23.53s,7221/16595" }, { "epoch": 2.1759566134377826, "eta": "61:40:46", "grad_norm": 0.0053, "loss": 0.044, "lr": "3.069e-05", "step": 7222, "steps": "23.69s,7222/16595" }, { "epoch": 2.1762579090087377, "eta": "61:55:59", "grad_norm": 0.0069, "loss": 0.0644, "lr": "3.068e-05", "step": 7223, "steps": "23.79s,7223/16595" }, { "epoch": 2.176559204579693, "eta": "61:58:43", "grad_norm": 0.0052, "loss": 0.0549, "lr": "3.068e-05", "step": 7224, "steps": "23.81s,7224/16595" }, { "epoch": 2.1768605001506476, "eta": "61:45:50", "grad_norm": 0.0051, "loss": 0.0424, "lr": "3.067e-05", "step": 7225, "steps": "23.73s,7225/16595" }, { "epoch": 2.1771617957216027, "eta": "61:59:29", "grad_norm": 0.0055, "loss": 0.0435, "lr": "3.067e-05", "step": 7226, "steps": "23.82s,7226/16595" }, { "epoch": 2.177463091292558, "eta": "61:45:02", "grad_norm": 0.0052, "loss": 0.0694, "lr": "3.066e-05", "step": 7227, "steps": "23.73s,7227/16595" }, { "epoch": 2.177764386863513, "eta": "62:25:14", "grad_norm": 0.006, "loss": 0.0611, "lr": "3.066e-05", "step": 7228, "steps": "23.99s,7228/16595" }, { "epoch": 2.178065682434468, "eta": "62:29:31", "grad_norm": 0.0058, "loss": 0.0505, "lr": "3.065e-05", "step": 7229, "steps": "24.02s,7229/16595" }, { "epoch": 2.1783669780054233, "eta": "61:23:34", "grad_norm": 0.0055, "loss": 0.0489, "lr": "3.065e-05", "step": 7230, "steps": "23.6s,7230/16595" }, { "epoch": 2.1786682735763785, "eta": "61:23:10", "grad_norm": 0.0053, "loss": 0.0481, "lr": "3.064e-05", "step": 7231, "steps": "23.6s,7231/16595" }, { "epoch": 2.1789695691473336, "eta": "62:03:21", "grad_norm": 0.0064, "loss": 0.0626, "lr": "3.064e-05", "step": 7232, "steps": "23.86s,7232/16595" }, { "epoch": 2.179270864718289, "eta": "61:47:21", "grad_norm": 0.0045, "loss": 0.0519, "lr": "3.063e-05", "step": 7233, "steps": "23.76s,7233/16595" }, { "epoch": 2.179572160289244, "eta": "62:27:31", "grad_norm": 0.0061, "loss": 0.0401, "lr": "3.063e-05", "step": 7234, "steps": "24.02s,7234/16595" }, { "epoch": 2.1798734558601986, "eta": "61:54:21", "grad_norm": 0.0048, "loss": 0.0673, "lr": "3.063e-05", "step": 7235, "steps": "23.81s,7235/16595" }, { "epoch": 2.180174751431154, "eta": "61:52:24", "grad_norm": 0.0048, "loss": 0.0711, "lr": "3.062e-05", "step": 7236, "steps": "23.8s,7236/16595" }, { "epoch": 2.180476047002109, "eta": "61:42:38", "grad_norm": 0.0069, "loss": 0.0605, "lr": "3.062e-05", "step": 7237, "steps": "23.74s,7237/16595" }, { "epoch": 2.180777342573064, "eta": "61:23:32", "grad_norm": 0.0045, "loss": 0.0583, "lr": "3.061e-05", "step": 7238, "steps": "23.62s,7238/16595" }, { "epoch": 2.1810786381440193, "eta": "62:22:24", "grad_norm": 0.0053, "loss": 0.0525, "lr": "3.061e-05", "step": 7239, "steps": "24.0s,7239/16595" }, { "epoch": 2.1813799337149744, "eta": "61:24:18", "grad_norm": 0.0044, "loss": 0.0526, "lr": "3.060e-05", "step": 7240, "steps": "23.63s,7240/16595" }, { "epoch": 2.1816812292859296, "eta": "61:53:32", "grad_norm": 0.0045, "loss": 0.0374, "lr": "3.060e-05", "step": 7241, "steps": "23.82s,7241/16595" }, { "epoch": 2.1819825248568847, "eta": "61:59:22", "grad_norm": 0.0059, "loss": 0.0575, "lr": "3.059e-05", "step": 7242, "steps": "23.86s,7242/16595" }, { "epoch": 2.18228382042784, "eta": "60:58:11", "grad_norm": 0.0057, "loss": 0.0485, "lr": "3.059e-05", "step": 7243, "steps": "23.47s,7243/16595" }, { "epoch": 2.182585115998795, "eta": "61:35:12", "grad_norm": 0.0045, "loss": 0.053, "lr": "3.058e-05", "step": 7244, "steps": "23.71s,7244/16595" }, { "epoch": 2.1828864115697497, "eta": "61:58:11", "grad_norm": 0.0059, "loss": 0.0557, "lr": "3.058e-05", "step": 7245, "steps": "23.86s,7245/16595" }, { "epoch": 2.183187707140705, "eta": "61:53:06", "grad_norm": 0.0051, "loss": 0.0499, "lr": "3.057e-05", "step": 7246, "steps": "23.83s,7246/16595" }, { "epoch": 2.18348900271166, "eta": "61:21:33", "grad_norm": 0.0052, "loss": 0.059, "lr": "3.057e-05", "step": 7247, "steps": "23.63s,7247/16595" }, { "epoch": 2.183790298282615, "eta": "61:47:38", "grad_norm": 0.0121, "loss": 0.0599, "lr": "3.056e-05", "step": 7248, "steps": "23.8s,7248/16595" }, { "epoch": 2.1840915938535703, "eta": "61:34:47", "grad_norm": 0.0058, "loss": 0.0569, "lr": "3.056e-05", "step": 7249, "steps": "23.72s,7249/16595" }, { "epoch": 2.1843928894245255, "eta": "61:37:30", "grad_norm": 0.0058, "loss": 0.0491, "lr": "3.056e-05", "step": 7250, "steps": "23.74s,7250/16595" }, { "epoch": 2.1846941849954806, "eta": "61:48:00", "grad_norm": 0.0068, "loss": 0.0575, "lr": "3.055e-05", "step": 7251, "steps": "23.81s,7251/16595" }, { "epoch": 2.1849954805664358, "eta": "60:56:13", "grad_norm": 0.0045, "loss": 0.0526, "lr": "3.055e-05", "step": 7252, "steps": "23.48s,7252/16595" }, { "epoch": 2.185296776137391, "eta": "61:37:52", "grad_norm": 0.0066, "loss": 0.0406, "lr": "3.054e-05", "step": 7253, "steps": "23.75s,7253/16595" }, { "epoch": 2.185598071708346, "eta": "62:10:10", "grad_norm": 0.0055, "loss": 0.0469, "lr": "3.054e-05", "step": 7254, "steps": "23.96s,7254/16595" }, { "epoch": 2.185899367279301, "eta": "61:54:12", "grad_norm": 0.0052, "loss": 0.0671, "lr": "3.053e-05", "step": 7255, "steps": "23.86s,7255/16595" }, { "epoch": 2.186200662850256, "eta": "61:08:40", "grad_norm": 0.0056, "loss": 0.039, "lr": "3.053e-05", "step": 7256, "steps": "23.57s,7256/16595" }, { "epoch": 2.186501958421211, "eta": "61:34:44", "grad_norm": 0.0062, "loss": 0.0613, "lr": "3.052e-05", "step": 7257, "steps": "23.74s,7257/16595" }, { "epoch": 2.1868032539921662, "eta": "61:26:33", "grad_norm": 0.0052, "loss": 0.0418, "lr": "3.052e-05", "step": 7258, "steps": "23.69s,7258/16595" }, { "epoch": 2.1871045495631214, "eta": "62:03:30", "grad_norm": 0.006, "loss": 0.0562, "lr": "3.051e-05", "step": 7259, "steps": "23.93s,7259/16595" }, { "epoch": 2.1874058451340765, "eta": "61:44:26", "grad_norm": 0.0132, "loss": 0.0496, "lr": "3.051e-05", "step": 7260, "steps": "23.81s,7260/16595" }, { "epoch": 2.1877071407050317, "eta": "61:40:55", "grad_norm": 0.0064, "loss": 0.0526, "lr": "3.050e-05", "step": 7261, "steps": "23.79s,7261/16595" }, { "epoch": 2.188008436275987, "eta": "61:17:12", "grad_norm": 0.0058, "loss": 0.063, "lr": "3.050e-05", "step": 7262, "steps": "23.64s,7262/16595" }, { "epoch": 2.188309731846942, "eta": "61:21:28", "grad_norm": 0.0084, "loss": 0.0473, "lr": "3.049e-05", "step": 7263, "steps": "23.67s,7263/16595" }, { "epoch": 2.188611027417897, "eta": "60:42:12", "grad_norm": 0.0054, "loss": 0.0559, "lr": "3.049e-05", "step": 7264, "steps": "23.42s,7264/16595" }, { "epoch": 2.188912322988852, "eta": "61:59:33", "grad_norm": 0.0052, "loss": 0.0516, "lr": "3.049e-05", "step": 7265, "steps": "23.92s,7265/16595" }, { "epoch": 2.189213618559807, "eta": "61:31:10", "grad_norm": 0.0155, "loss": 0.051, "lr": "3.048e-05", "step": 7266, "steps": "23.74s,7266/16595" }, { "epoch": 2.189514914130762, "eta": "62:17:25", "grad_norm": 0.0107, "loss": 0.0473, "lr": "3.048e-05", "step": 7267, "steps": "24.04s,7267/16595" }, { "epoch": 2.1898162097017173, "eta": "62:01:28", "grad_norm": 0.0049, "loss": 0.0437, "lr": "3.047e-05", "step": 7268, "steps": "23.94s,7268/16595" }, { "epoch": 2.1901175052726725, "eta": "61:54:51", "grad_norm": 0.0102, "loss": 0.0412, "lr": "3.047e-05", "step": 7269, "steps": "23.9s,7269/16595" }, { "epoch": 2.1904188008436276, "eta": "62:13:06", "grad_norm": 0.0087, "loss": 0.0505, "lr": "3.046e-05", "step": 7270, "steps": "24.02s,7270/16595" }, { "epoch": 2.1907200964145828, "eta": "61:57:10", "grad_norm": 0.0052, "loss": 0.04, "lr": "3.046e-05", "step": 7271, "steps": "23.92s,7271/16595" }, { "epoch": 2.191021391985538, "eta": "60:54:36", "grad_norm": 0.0049, "loss": 0.0699, "lr": "3.045e-05", "step": 7272, "steps": "23.52s,7272/16595" }, { "epoch": 2.191322687556493, "eta": "61:20:38", "grad_norm": 0.0065, "loss": 0.0631, "lr": "3.045e-05", "step": 7273, "steps": "23.69s,7273/16595" }, { "epoch": 2.191623983127448, "eta": "62:06:50", "grad_norm": 0.0076, "loss": 0.0571, "lr": "3.044e-05", "step": 7274, "steps": "23.99s,7274/16595" }, { "epoch": 2.1919252786984034, "eta": "62:09:33", "grad_norm": 0.0076, "loss": 0.0372, "lr": "3.044e-05", "step": 7275, "steps": "24.01s,7275/16595" }, { "epoch": 2.192226574269358, "eta": "62:44:52", "grad_norm": 0.0056, "loss": 0.0533, "lr": "3.043e-05", "step": 7276, "steps": "24.24s,7276/16595" }, { "epoch": 2.1925278698403132, "eta": "61:37:41", "grad_norm": 0.0063, "loss": 0.055, "lr": "3.043e-05", "step": 7277, "steps": "23.81s,7277/16595" }, { "epoch": 2.1928291654112684, "eta": "61:01:34", "grad_norm": 0.0065, "loss": 0.0573, "lr": "3.042e-05", "step": 7278, "steps": "23.58s,7278/16595" }, { "epoch": 2.1931304609822235, "eta": "62:56:05", "grad_norm": 0.0057, "loss": 0.0719, "lr": "3.042e-05", "step": 7279, "steps": "24.32s,7279/16595" }, { "epoch": 2.1934317565531787, "eta": "61:05:27", "grad_norm": 0.0064, "loss": 0.0507, "lr": "3.042e-05", "step": 7280, "steps": "23.61s,7280/16595" }, { "epoch": 2.193733052124134, "eta": "61:59:23", "grad_norm": 0.0087, "loss": 0.0447, "lr": "3.041e-05", "step": 7281, "steps": "23.96s,7281/16595" }, { "epoch": 2.194034347695089, "eta": "62:02:05", "grad_norm": 0.0045, "loss": 0.0558, "lr": "3.041e-05", "step": 7282, "steps": "23.98s,7282/16595" }, { "epoch": 2.194335643266044, "eta": "61:27:33", "grad_norm": 0.0061, "loss": 0.0429, "lr": "3.040e-05", "step": 7283, "steps": "23.76s,7283/16595" }, { "epoch": 2.1946369388369993, "eta": "61:03:52", "grad_norm": 0.0058, "loss": 0.0479, "lr": "3.040e-05", "step": 7284, "steps": "23.61s,7284/16595" }, { "epoch": 2.194938234407954, "eta": "61:14:20", "grad_norm": 0.0055, "loss": 0.0442, "lr": "3.039e-05", "step": 7285, "steps": "23.68s,7285/16595" }, { "epoch": 2.195239529978909, "eta": "62:14:27", "grad_norm": 0.0055, "loss": 0.0478, "lr": "3.039e-05", "step": 7286, "steps": "24.07s,7286/16595" }, { "epoch": 2.1955408255498643, "eta": "61:30:37", "grad_norm": 0.0055, "loss": 0.0495, "lr": "3.038e-05", "step": 7287, "steps": "23.79s,7287/16595" }, { "epoch": 2.1958421211208194, "eta": "61:37:58", "grad_norm": 0.0058, "loss": 0.0573, "lr": "3.038e-05", "step": 7288, "steps": "23.84s,7288/16595" }, { "epoch": 2.1961434166917746, "eta": "60:32:26", "grad_norm": 0.0063, "loss": 0.0426, "lr": "3.037e-05", "step": 7289, "steps": "23.42s,7289/16595" }, { "epoch": 2.1964447122627297, "eta": "61:34:05", "grad_norm": 0.0056, "loss": 0.0458, "lr": "3.037e-05", "step": 7290, "steps": "23.82s,7290/16595" }, { "epoch": 2.196746007833685, "eta": "61:39:53", "grad_norm": 0.005, "loss": 0.0601, "lr": "3.036e-05", "step": 7291, "steps": "23.86s,7291/16595" }, { "epoch": 2.19704730340464, "eta": "61:33:17", "grad_norm": 0.0065, "loss": 0.0441, "lr": "3.036e-05", "step": 7292, "steps": "23.82s,7292/16595" }, { "epoch": 2.197348598975595, "eta": "61:39:05", "grad_norm": 0.0057, "loss": 0.0539, "lr": "3.036e-05", "step": 7293, "steps": "23.86s,7293/16595" }, { "epoch": 2.1976498945465504, "eta": "62:17:27", "grad_norm": 0.0071, "loss": 0.0449, "lr": "3.035e-05", "step": 7294, "steps": "24.11s,7294/16595" }, { "epoch": 2.1979511901175055, "eta": "61:04:12", "grad_norm": 0.0079, "loss": 0.0731, "lr": "3.035e-05", "step": 7295, "steps": "23.64s,7295/16595" }, { "epoch": 2.19825248568846, "eta": "61:25:30", "grad_norm": 0.0077, "loss": 0.0421, "lr": "3.034e-05", "step": 7296, "steps": "23.78s,7296/16595" }, { "epoch": 2.1985537812594154, "eta": "61:45:15", "grad_norm": 0.0056, "loss": 0.0323, "lr": "3.034e-05", "step": 7297, "steps": "23.91s,7297/16595" }, { "epoch": 2.1988550768303705, "eta": "62:14:17", "grad_norm": 0.0058, "loss": 0.0556, "lr": "3.033e-05", "step": 7298, "steps": "24.1s,7298/16595" }, { "epoch": 2.1991563724013257, "eta": "61:44:27", "grad_norm": 0.0063, "loss": 0.0433, "lr": "3.033e-05", "step": 7299, "steps": "23.91s,7299/16595" }, { "epoch": 2.199457667972281, "eta": "60:52:56", "grad_norm": 0.0057, "loss": 0.0403, "lr": "3.032e-05", "step": 7300, "steps": "23.58s,7300/16595" }, { "epoch": 2.199758963543236, "eta": "61:32:48", "grad_norm": 0.0053, "loss": 0.0471, "lr": "3.032e-05", "step": 7301, "steps": "23.84s,7301/16595" }, { "epoch": 2.200060259114191, "eta": "61:15:22", "grad_norm": 0.0045, "loss": 0.0372, "lr": "3.031e-05", "step": 7302, "steps": "23.73s,7302/16595" }, { "epoch": 2.2003615546851463, "eta": "60:50:12", "grad_norm": 0.0055, "loss": 0.043, "lr": "3.031e-05", "step": 7303, "steps": "23.57s,7303/16595" }, { "epoch": 2.2006628502561014, "eta": "62:16:31", "grad_norm": 0.005, "loss": 0.0559, "lr": "3.030e-05", "step": 7304, "steps": "24.13s,7304/16595" }, { "epoch": 2.200964145827056, "eta": "61:57:32", "grad_norm": 0.0056, "loss": 0.0636, "lr": "3.030e-05", "step": 7305, "steps": "24.01s,7305/16595" }, { "epoch": 2.2012654413980113, "eta": "61:49:24", "grad_norm": 0.0049, "loss": 0.0584, "lr": "3.029e-05", "step": 7306, "steps": "23.96s,7306/16595" }, { "epoch": 2.2015667369689664, "eta": "61:22:41", "grad_norm": 0.0069, "loss": 0.039, "lr": "3.029e-05", "step": 7307, "steps": "23.79s,7307/16595" }, { "epoch": 2.2018680325399216, "eta": "61:23:50", "grad_norm": 0.0051, "loss": 0.0483, "lr": "3.029e-05", "step": 7308, "steps": "23.8s,7308/16595" }, { "epoch": 2.2021693281108767, "eta": "60:46:18", "grad_norm": 0.0052, "loss": 0.0382, "lr": "3.028e-05", "step": 7309, "steps": "23.56s,7309/16595" }, { "epoch": 2.202470623681832, "eta": "60:47:27", "grad_norm": 0.0072, "loss": 0.048, "lr": "3.028e-05", "step": 7310, "steps": "23.57s,7310/16595" }, { "epoch": 2.202771919252787, "eta": "61:21:06", "grad_norm": 0.0046, "loss": 0.037, "lr": "3.027e-05", "step": 7311, "steps": "23.79s,7311/16595" }, { "epoch": 2.203073214823742, "eta": "61:26:53", "grad_norm": 0.0054, "loss": 0.0557, "lr": "3.027e-05", "step": 7312, "steps": "23.83s,7312/16595" }, { "epoch": 2.2033745103946973, "eta": "61:28:02", "grad_norm": 0.0042, "loss": 0.0544, "lr": "3.026e-05", "step": 7313, "steps": "23.84s,7313/16595" }, { "epoch": 2.2036758059656525, "eta": "60:45:53", "grad_norm": 0.0055, "loss": 0.0536, "lr": "3.026e-05", "step": 7314, "steps": "23.57s,7314/16595" }, { "epoch": 2.2039771015366076, "eta": "61:21:04", "grad_norm": 0.0049, "loss": 0.0481, "lr": "3.025e-05", "step": 7315, "steps": "23.8s,7315/16595" }, { "epoch": 2.2042783971075623, "eta": "60:57:28", "grad_norm": 0.0055, "loss": 0.0543, "lr": "3.025e-05", "step": 7316, "steps": "23.65s,7316/16595" }, { "epoch": 2.2045796926785175, "eta": "60:47:48", "grad_norm": 0.0048, "loss": 0.0497, "lr": "3.024e-05", "step": 7317, "steps": "23.59s,7317/16595" }, { "epoch": 2.2048809882494727, "eta": "61:21:25", "grad_norm": 0.0106, "loss": 0.0581, "lr": "3.024e-05", "step": 7318, "steps": "23.81s,7318/16595" }, { "epoch": 2.205182283820428, "eta": "62:02:46", "grad_norm": 0.0053, "loss": 0.0401, "lr": "3.023e-05", "step": 7319, "steps": "24.08s,7319/16595" }, { "epoch": 2.205483579391383, "eta": "61:17:32", "grad_norm": 0.0051, "loss": 0.047, "lr": "3.023e-05", "step": 7320, "steps": "23.79s,7320/16595" }, { "epoch": 2.205784874962338, "eta": "62:05:03", "grad_norm": 0.0059, "loss": 0.0503, "lr": "3.022e-05", "step": 7321, "steps": "24.1s,7321/16595" }, { "epoch": 2.2060861705332933, "eta": "60:45:50", "grad_norm": 0.0048, "loss": 0.0536, "lr": "3.022e-05", "step": 7322, "steps": "23.59s,7322/16595" }, { "epoch": 2.2063874661042484, "eta": "61:19:26", "grad_norm": 0.0055, "loss": 0.062, "lr": "3.022e-05", "step": 7323, "steps": "23.81s,7323/16595" }, { "epoch": 2.2066887616752036, "eta": "61:20:35", "grad_norm": 0.0056, "loss": 0.074, "lr": "3.021e-05", "step": 7324, "steps": "23.82s,7324/16595" }, { "epoch": 2.2069900572461583, "eta": "60:46:12", "grad_norm": 0.0051, "loss": 0.0363, "lr": "3.021e-05", "step": 7325, "steps": "23.6s,7325/16595" }, { "epoch": 2.2072913528171134, "eta": "60:51:59", "grad_norm": 0.0054, "loss": 0.047, "lr": "3.020e-05", "step": 7326, "steps": "23.64s,7326/16595" }, { "epoch": 2.2075926483880686, "eta": "61:53:22", "grad_norm": 0.0046, "loss": 0.0494, "lr": "3.020e-05", "step": 7327, "steps": "24.04s,7327/16595" }, { "epoch": 2.2078939439590237, "eta": "60:48:06", "grad_norm": 0.0055, "loss": 0.036, "lr": "3.019e-05", "step": 7328, "steps": "23.62s,7328/16595" }, { "epoch": 2.208195239529979, "eta": "61:30:57", "grad_norm": 0.0053, "loss": 0.058, "lr": "3.019e-05", "step": 7329, "steps": "23.9s,7329/16595" }, { "epoch": 2.208496535100934, "eta": "61:01:13", "grad_norm": 0.0054, "loss": 0.054, "lr": "3.018e-05", "step": 7330, "steps": "23.71s,7330/16595" }, { "epoch": 2.208797830671889, "eta": "60:42:17", "grad_norm": 0.0054, "loss": 0.05, "lr": "3.018e-05", "step": 7331, "steps": "23.59s,7331/16595" }, { "epoch": 2.2090991262428443, "eta": "61:17:24", "grad_norm": 0.0057, "loss": 0.047, "lr": "3.017e-05", "step": 7332, "steps": "23.82s,7332/16595" }, { "epoch": 2.2094004218137995, "eta": "61:27:49", "grad_norm": 0.0049, "loss": 0.0516, "lr": "3.017e-05", "step": 7333, "steps": "23.89s,7333/16595" }, { "epoch": 2.2097017173847546, "eta": "61:11:59", "grad_norm": 0.0059, "loss": 0.041, "lr": "3.016e-05", "step": 7334, "steps": "23.79s,7334/16595" }, { "epoch": 2.21000301295571, "eta": "61:19:18", "grad_norm": 0.0056, "loss": 0.0558, "lr": "3.016e-05", "step": 7335, "steps": "23.84s,7335/16595" }, { "epoch": 2.2103043085266645, "eta": "61:21:59", "grad_norm": 0.0044, "loss": 0.0413, "lr": "3.015e-05", "step": 7336, "steps": "23.86s,7336/16595" }, { "epoch": 2.2106056040976196, "eta": "60:33:45", "grad_norm": 0.0074, "loss": 0.0414, "lr": "3.015e-05", "step": 7337, "steps": "23.55s,7337/16595" }, { "epoch": 2.210906899668575, "eta": "60:24:06", "grad_norm": 0.005, "loss": 0.0513, "lr": "3.015e-05", "step": 7338, "steps": "23.49s,7338/16595" }, { "epoch": 2.21120819523953, "eta": "60:29:53", "grad_norm": 0.009, "loss": 0.0382, "lr": "3.014e-05", "step": 7339, "steps": "23.53s,7339/16595" }, { "epoch": 2.211509490810485, "eta": "61:45:05", "grad_norm": 0.0051, "loss": 0.0534, "lr": "3.014e-05", "step": 7340, "steps": "24.02s,7340/16595" }, { "epoch": 2.2118107863814402, "eta": "60:44:32", "grad_norm": 0.0062, "loss": 0.0467, "lr": "3.013e-05", "step": 7341, "steps": "23.63s,7341/16595" }, { "epoch": 2.2121120819523954, "eta": "60:19:27", "grad_norm": 0.0053, "loss": 0.0328, "lr": "3.013e-05", "step": 7342, "steps": "23.47s,7342/16595" }, { "epoch": 2.2124133775233505, "eta": "61:09:57", "grad_norm": 0.0049, "loss": 0.0534, "lr": "3.012e-05", "step": 7343, "steps": "23.8s,7343/16595" }, { "epoch": 2.2127146730943057, "eta": "61:37:18", "grad_norm": 0.0058, "loss": 0.0529, "lr": "3.012e-05", "step": 7344, "steps": "23.98s,7344/16595" }, { "epoch": 2.2130159686652604, "eta": "61:15:20", "grad_norm": 0.0058, "loss": 0.0524, "lr": "3.011e-05", "step": 7345, "steps": "23.84s,7345/16595" }, { "epoch": 2.2133172642362156, "eta": "61:16:28", "grad_norm": 0.0061, "loss": 0.0397, "lr": "3.011e-05", "step": 7346, "steps": "23.85s,7346/16595" }, { "epoch": 2.2136185598071707, "eta": "60:42:10", "grad_norm": 0.0048, "loss": 0.0514, "lr": "3.010e-05", "step": 7347, "steps": "23.63s,7347/16595" }, { "epoch": 2.213919855378126, "eta": "61:11:03", "grad_norm": 0.0054, "loss": 0.0531, "lr": "3.010e-05", "step": 7348, "steps": "23.82s,7348/16595" }, { "epoch": 2.214221150949081, "eta": "61:35:19", "grad_norm": 0.0051, "loss": 0.0562, "lr": "3.009e-05", "step": 7349, "steps": "23.98s,7349/16595" }, { "epoch": 2.214522446520036, "eta": "61:36:27", "grad_norm": 0.0051, "loss": 0.0786, "lr": "3.009e-05", "step": 7350, "steps": "23.99s,7350/16595" }, { "epoch": 2.2148237420909913, "eta": "61:25:16", "grad_norm": 0.0045, "loss": 0.0432, "lr": "3.008e-05", "step": 7351, "steps": "23.92s,7351/16595" }, { "epoch": 2.2151250376619465, "eta": "60:55:36", "grad_norm": 0.0045, "loss": 0.0518, "lr": "3.008e-05", "step": 7352, "steps": "23.73s,7352/16595" }, { "epoch": 2.2154263332329016, "eta": "61:24:28", "grad_norm": 0.0066, "loss": 0.061, "lr": "3.008e-05", "step": 7353, "steps": "23.92s,7353/16595" }, { "epoch": 2.2157276288038568, "eta": "60:59:26", "grad_norm": 0.0069, "loss": 0.0401, "lr": "3.007e-05", "step": 7354, "steps": "23.76s,7354/16595" }, { "epoch": 2.216028924374812, "eta": "61:23:40", "grad_norm": 0.005, "loss": 0.0539, "lr": "3.007e-05", "step": 7355, "steps": "23.92s,7355/16595" }, { "epoch": 2.2163302199457666, "eta": "61:07:52", "grad_norm": 0.005, "loss": 0.0487, "lr": "3.006e-05", "step": 7356, "steps": "23.82s,7356/16595" }, { "epoch": 2.2166315155167218, "eta": "61:25:57", "grad_norm": 0.0049, "loss": 0.0462, "lr": "3.006e-05", "step": 7357, "steps": "23.94s,7357/16595" }, { "epoch": 2.216932811087677, "eta": "60:25:31", "grad_norm": 0.0078, "loss": 0.036, "lr": "3.005e-05", "step": 7358, "steps": "23.55s,7358/16595" }, { "epoch": 2.217234106658632, "eta": "60:12:48", "grad_norm": 0.0055, "loss": 0.0609, "lr": "3.005e-05", "step": 7359, "steps": "23.47s,7359/16595" }, { "epoch": 2.2175354022295872, "eta": "61:17:04", "grad_norm": 0.0054, "loss": 0.0542, "lr": "3.004e-05", "step": 7360, "steps": "23.89s,7360/16595" }, { "epoch": 2.2178366978005424, "eta": "61:07:26", "grad_norm": 0.0048, "loss": 0.0453, "lr": "3.004e-05", "step": 7361, "steps": "23.83s,7361/16595" }, { "epoch": 2.2181379933714975, "eta": "60:37:48", "grad_norm": 0.005, "loss": 0.0571, "lr": "3.003e-05", "step": 7362, "steps": "23.64s,7362/16595" }, { "epoch": 2.2184392889424527, "eta": "61:26:38", "grad_norm": 0.0052, "loss": 0.0538, "lr": "3.003e-05", "step": 7363, "steps": "23.96s,7363/16595" }, { "epoch": 2.218740584513408, "eta": "60:43:10", "grad_norm": 0.0055, "loss": 0.0516, "lr": "3.002e-05", "step": 7364, "steps": "23.68s,7364/16595" }, { "epoch": 2.2190418800843625, "eta": "61:15:04", "grad_norm": 0.0054, "loss": 0.0468, "lr": "3.002e-05", "step": 7365, "steps": "23.89s,7365/16595" }, { "epoch": 2.2193431756553177, "eta": "60:14:41", "grad_norm": 0.0048, "loss": 0.0431, "lr": "3.001e-05", "step": 7366, "steps": "23.5s,7366/16595" }, { "epoch": 2.219644471226273, "eta": "60:11:13", "grad_norm": 0.0052, "loss": 0.0576, "lr": "3.001e-05", "step": 7367, "steps": "23.48s,7367/16595" }, { "epoch": 2.219945766797228, "eta": "60:58:30", "grad_norm": 0.0049, "loss": 0.0562, "lr": "3.001e-05", "step": 7368, "steps": "23.79s,7368/16595" }, { "epoch": 2.220247062368183, "eta": "60:53:29", "grad_norm": 0.0051, "loss": 0.0515, "lr": "3.000e-05", "step": 7369, "steps": "23.76s,7369/16595" }, { "epoch": 2.2205483579391383, "eta": "61:36:09", "grad_norm": 0.0077, "loss": 0.0449, "lr": "3.000e-05", "step": 7370, "steps": "24.04s,7370/16595" }, { "epoch": 2.2208496535100934, "eta": "60:49:37", "grad_norm": 0.0054, "loss": 0.04, "lr": "2.999e-05", "step": 7371, "steps": "23.74s,7371/16595" }, { "epoch": 2.2211509490810486, "eta": "60:18:29", "grad_norm": 0.0065, "loss": 0.0566, "lr": "2.999e-05", "step": 7372, "steps": "23.54s,7372/16595" }, { "epoch": 2.2214522446520037, "eta": "60:41:09", "grad_norm": 0.0049, "loss": 0.0632, "lr": "2.998e-05", "step": 7373, "steps": "23.69s,7373/16595" }, { "epoch": 2.221753540222959, "eta": "60:56:07", "grad_norm": 0.0058, "loss": 0.0699, "lr": "2.998e-05", "step": 7374, "steps": "23.79s,7374/16595" }, { "epoch": 2.222054835793914, "eta": "60:44:58", "grad_norm": 0.0047, "loss": 0.0668, "lr": "2.997e-05", "step": 7375, "steps": "23.72s,7375/16595" }, { "epoch": 2.2223561313648688, "eta": "61:13:46", "grad_norm": 0.0067, "loss": 0.0304, "lr": "2.997e-05", "step": 7376, "steps": "23.91s,7376/16595" }, { "epoch": 2.222657426935824, "eta": "60:45:43", "grad_norm": 0.0067, "loss": 0.0489, "lr": "2.996e-05", "step": 7377, "steps": "23.73s,7377/16595" }, { "epoch": 2.222958722506779, "eta": "60:45:19", "grad_norm": 0.0051, "loss": 0.0658, "lr": "2.996e-05", "step": 7378, "steps": "23.73s,7378/16595" }, { "epoch": 2.223260018077734, "eta": "60:24:57", "grad_norm": 0.0075, "loss": 0.042, "lr": "2.995e-05", "step": 7379, "steps": "23.6s,7379/16595" }, { "epoch": 2.2235613136486894, "eta": "60:32:14", "grad_norm": 0.0081, "loss": 0.0555, "lr": "2.995e-05", "step": 7380, "steps": "23.65s,7380/16595" }, { "epoch": 2.2238626092196445, "eta": "60:22:38", "grad_norm": 0.0056, "loss": 0.0407, "lr": "2.994e-05", "step": 7381, "steps": "23.59s,7381/16595" }, { "epoch": 2.2241639047905997, "eta": "60:57:33", "grad_norm": 0.0057, "loss": 0.0399, "lr": "2.994e-05", "step": 7382, "steps": "23.82s,7382/16595" }, { "epoch": 2.224465200361555, "eta": "60:52:33", "grad_norm": 0.0045, "loss": 0.0511, "lr": "2.993e-05", "step": 7383, "steps": "23.79s,7383/16595" }, { "epoch": 2.22476649593251, "eta": "61:10:35", "grad_norm": 0.1069, "loss": 0.041, "lr": "2.993e-05", "step": 7384, "steps": "23.91s,7384/16595" }, { "epoch": 2.2250677915034647, "eta": "61:11:43", "grad_norm": 0.0055, "loss": 0.0401, "lr": "2.993e-05", "step": 7385, "steps": "23.92s,7385/16595" }, { "epoch": 2.22536908707442, "eta": "60:42:09", "grad_norm": 0.0062, "loss": 0.0569, "lr": "2.992e-05", "step": 7386, "steps": "23.73s,7386/16595" }, { "epoch": 2.225670382645375, "eta": "60:17:12", "grad_norm": 0.0043, "loss": 0.0428, "lr": "2.992e-05", "step": 7387, "steps": "23.57s,7387/16595" }, { "epoch": 2.22597167821633, "eta": "60:55:10", "grad_norm": 0.0052, "loss": 0.0585, "lr": "2.991e-05", "step": 7388, "steps": "23.82s,7388/16595" }, { "epoch": 2.2262729737872853, "eta": "60:42:30", "grad_norm": 0.0079, "loss": 0.0538, "lr": "2.991e-05", "step": 7389, "steps": "23.74s,7389/16595" }, { "epoch": 2.2265742693582404, "eta": "60:49:46", "grad_norm": 0.0058, "loss": 0.0415, "lr": "2.990e-05", "step": 7390, "steps": "23.79s,7390/16595" }, { "epoch": 2.2268755649291956, "eta": "60:17:10", "grad_norm": 0.0052, "loss": 0.0501, "lr": "2.990e-05", "step": 7391, "steps": "23.58s,7391/16595" }, { "epoch": 2.2271768605001507, "eta": "60:44:23", "grad_norm": 0.0087, "loss": 0.0623, "lr": "2.989e-05", "step": 7392, "steps": "23.76s,7392/16595" }, { "epoch": 2.227478156071106, "eta": "60:28:39", "grad_norm": 0.0044, "loss": 0.0418, "lr": "2.989e-05", "step": 7393, "steps": "23.66s,7393/16595" }, { "epoch": 2.227779451642061, "eta": "61:18:51", "grad_norm": 0.0077, "loss": 0.0519, "lr": "2.988e-05", "step": 7394, "steps": "23.99s,7394/16595" }, { "epoch": 2.228080747213016, "eta": "61:18:28", "grad_norm": 0.0065, "loss": 0.0437, "lr": "2.988e-05", "step": 7395, "steps": "23.99s,7395/16595" }, { "epoch": 2.228382042783971, "eta": "60:12:08", "grad_norm": 0.0052, "loss": 0.0516, "lr": "2.987e-05", "step": 7396, "steps": "23.56s,7396/16595" }, { "epoch": 2.228683338354926, "eta": "60:24:00", "grad_norm": 0.0057, "loss": 0.0464, "lr": "2.987e-05", "step": 7397, "steps": "23.64s,7397/16595" }, { "epoch": 2.228984633925881, "eta": "60:40:28", "grad_norm": 0.0056, "loss": 0.0484, "lr": "2.986e-05", "step": 7398, "steps": "23.75s,7398/16595" }, { "epoch": 2.2292859294968363, "eta": "61:16:52", "grad_norm": 0.0054, "loss": 0.0602, "lr": "2.986e-05", "step": 7399, "steps": "23.99s,7399/16595" }, { "epoch": 2.2295872250677915, "eta": "60:15:10", "grad_norm": 0.0059, "loss": 0.0434, "lr": "2.986e-05", "step": 7400, "steps": "23.59s,7400/16595" }, { "epoch": 2.2298885206387467, "eta": "144:31:28", "grad_norm": 0.0051, "loss": 0.0486, "lr": "2.985e-05", "step": 7401, "steps": "56.59s,7401/16595" }, { "epoch": 2.230189816209702, "eta": "60:09:47", "grad_norm": 0.0061, "loss": 0.0446, "lr": "2.985e-05", "step": 7402, "steps": "23.56s,7402/16595" }, { "epoch": 2.230491111780657, "eta": "60:00:12", "grad_norm": 0.0056, "loss": 0.0446, "lr": "2.984e-05", "step": 7403, "steps": "23.5s,7403/16595" }, { "epoch": 2.230792407351612, "eta": "60:15:07", "grad_norm": 0.0049, "loss": 0.0569, "lr": "2.984e-05", "step": 7404, "steps": "23.6s,7404/16595" }, { "epoch": 2.231093702922567, "eta": "61:32:50", "grad_norm": 0.0074, "loss": 0.0769, "lr": "2.983e-05", "step": 7405, "steps": "24.11s,7405/16595" }, { "epoch": 2.231394998493522, "eta": "60:17:24", "grad_norm": 0.0042, "loss": 0.0483, "lr": "2.983e-05", "step": 7406, "steps": "23.62s,7406/16595" }, { "epoch": 2.231696294064477, "eta": "60:43:02", "grad_norm": 0.0052, "loss": 0.0641, "lr": "2.982e-05", "step": 7407, "steps": "23.79s,7407/16595" }, { "epoch": 2.2319975896354323, "eta": "59:49:03", "grad_norm": 0.0062, "loss": 0.0518, "lr": "2.982e-05", "step": 7408, "steps": "23.44s,7408/16595" }, { "epoch": 2.2322988852063874, "eta": "60:31:31", "grad_norm": 0.0057, "loss": 0.0423, "lr": "2.981e-05", "step": 7409, "steps": "23.72s,7409/16595" }, { "epoch": 2.2326001807773426, "eta": "61:04:48", "grad_norm": 0.0065, "loss": 0.046, "lr": "2.981e-05", "step": 7410, "steps": "23.94s,7410/16595" }, { "epoch": 2.2329014763482977, "eta": "60:01:39", "grad_norm": 0.0061, "loss": 0.0521, "lr": "2.980e-05", "step": 7411, "steps": "23.53s,7411/16595" }, { "epoch": 2.233202771919253, "eta": "60:33:24", "grad_norm": 0.006, "loss": 0.0531, "lr": "2.980e-05", "step": 7412, "steps": "23.74s,7412/16595" }, { "epoch": 2.233504067490208, "eta": "60:06:59", "grad_norm": 0.0046, "loss": 0.0522, "lr": "2.979e-05", "step": 7413, "steps": "23.57s,7413/16595" }, { "epoch": 2.233805363061163, "eta": "59:49:46", "grad_norm": 0.0065, "loss": 0.067, "lr": "2.979e-05", "step": 7414, "steps": "23.46s,7414/16595" }, { "epoch": 2.2341066586321183, "eta": "61:44:07", "grad_norm": 0.0053, "loss": 0.0695, "lr": "2.979e-05", "step": 7415, "steps": "24.21s,7415/16595" }, { "epoch": 2.234407954203073, "eta": "61:00:53", "grad_norm": 0.0051, "loss": 0.0545, "lr": "2.978e-05", "step": 7416, "steps": "23.93s,7416/16595" }, { "epoch": 2.234709249774028, "eta": "60:23:46", "grad_norm": 0.0048, "loss": 0.0544, "lr": "2.978e-05", "step": 7417, "steps": "23.69s,7417/16595" }, { "epoch": 2.2350105453449833, "eta": "60:26:26", "grad_norm": 0.0056, "loss": 0.0481, "lr": "2.977e-05", "step": 7418, "steps": "23.71s,7418/16595" }, { "epoch": 2.2353118409159385, "eta": "58:54:17", "grad_norm": 0.006, "loss": 0.049, "lr": "2.977e-05", "step": 7419, "steps": "23.11s,7419/16595" }, { "epoch": 2.2356131364868936, "eta": "60:25:39", "grad_norm": 0.0053, "loss": 0.0577, "lr": "2.976e-05", "step": 7420, "steps": "23.71s,7420/16595" }, { "epoch": 2.235914432057849, "eta": "60:16:05", "grad_norm": 0.007, "loss": 0.0382, "lr": "2.976e-05", "step": 7421, "steps": "23.65s,7421/16595" }, { "epoch": 2.236215727628804, "eta": "60:04:59", "grad_norm": 0.0057, "loss": 0.0461, "lr": "2.975e-05", "step": 7422, "steps": "23.58s,7422/16595" }, { "epoch": 2.236517023199759, "eta": "60:42:48", "grad_norm": 0.0046, "loss": 0.039, "lr": "2.975e-05", "step": 7423, "steps": "23.83s,7423/16595" }, { "epoch": 2.2368183187707142, "eta": "59:38:13", "grad_norm": 0.0055, "loss": 0.0689, "lr": "2.974e-05", "step": 7424, "steps": "23.41s,7424/16595" }, { "epoch": 2.237119614341669, "eta": "60:11:27", "grad_norm": 0.0047, "loss": 0.0485, "lr": "2.974e-05", "step": 7425, "steps": "23.63s,7425/16595" }, { "epoch": 2.237420909912624, "eta": "60:26:20", "grad_norm": 0.0053, "loss": 0.0643, "lr": "2.973e-05", "step": 7426, "steps": "23.73s,7426/16595" }, { "epoch": 2.2377222054835793, "eta": "60:19:49", "grad_norm": 0.0053, "loss": 0.048, "lr": "2.973e-05", "step": 7427, "steps": "23.69s,7427/16595" }, { "epoch": 2.2380235010545344, "eta": "60:45:24", "grad_norm": 0.0069, "loss": 0.0591, "lr": "2.972e-05", "step": 7428, "steps": "23.86s,7428/16595" }, { "epoch": 2.2383247966254896, "eta": "60:25:09", "grad_norm": 0.0077, "loss": 0.0681, "lr": "2.972e-05", "step": 7429, "steps": "23.73s,7429/16595" }, { "epoch": 2.2386260921964447, "eta": "60:53:46", "grad_norm": 0.0066, "loss": 0.0431, "lr": "2.971e-05", "step": 7430, "steps": "23.92s,7430/16595" }, { "epoch": 2.2389273877674, "eta": "60:47:16", "grad_norm": 0.0057, "loss": 0.0514, "lr": "2.971e-05", "step": 7431, "steps": "23.88s,7431/16595" }, { "epoch": 2.239228683338355, "eta": "59:42:43", "grad_norm": 0.005, "loss": 0.0596, "lr": "2.971e-05", "step": 7432, "steps": "23.46s,7432/16595" }, { "epoch": 2.23952997890931, "eta": "60:28:09", "grad_norm": 0.0073, "loss": 0.0395, "lr": "2.970e-05", "step": 7433, "steps": "23.76s,7433/16595" }, { "epoch": 2.2398312744802653, "eta": "60:07:54", "grad_norm": 0.006, "loss": 0.0471, "lr": "2.970e-05", "step": 7434, "steps": "23.63s,7434/16595" }, { "epoch": 2.2401325700512205, "eta": "61:17:44", "grad_norm": 0.0055, "loss": 0.0632, "lr": "2.969e-05", "step": 7435, "steps": "24.09s,7435/16595" }, { "epoch": 2.240433865622175, "eta": "60:31:32", "grad_norm": 0.0049, "loss": 0.055, "lr": "2.969e-05", "step": 7436, "steps": "23.79s,7436/16595" }, { "epoch": 2.2407351611931303, "eta": "60:08:15", "grad_norm": 0.0057, "loss": 0.049, "lr": "2.968e-05", "step": 7437, "steps": "23.64s,7437/16595" }, { "epoch": 2.2410364567640855, "eta": "59:38:51", "grad_norm": 0.0048, "loss": 0.064, "lr": "2.968e-05", "step": 7438, "steps": "23.45s,7438/16595" }, { "epoch": 2.2413377523350406, "eta": "60:36:27", "grad_norm": 0.0064, "loss": 0.0484, "lr": "2.967e-05", "step": 7439, "steps": "23.83s,7439/16595" }, { "epoch": 2.2416390479059958, "eta": "60:49:47", "grad_norm": 0.0049, "loss": 0.0445, "lr": "2.967e-05", "step": 7440, "steps": "23.92s,7440/16595" }, { "epoch": 2.241940343476951, "eta": "60:09:43", "grad_norm": 0.0044, "loss": 0.0548, "lr": "2.966e-05", "step": 7441, "steps": "23.66s,7441/16595" }, { "epoch": 2.242241639047906, "eta": "59:57:07", "grad_norm": 0.0042, "loss": 0.053, "lr": "2.966e-05", "step": 7442, "steps": "23.58s,7442/16595" }, { "epoch": 2.2425429346188612, "eta": "60:45:32", "grad_norm": 0.0052, "loss": 0.046, "lr": "2.965e-05", "step": 7443, "steps": "23.9s,7443/16595" }, { "epoch": 2.2428442301898164, "eta": "59:56:20", "grad_norm": 0.0051, "loss": 0.058, "lr": "2.965e-05", "step": 7444, "steps": "23.58s,7444/16595" }, { "epoch": 2.243145525760771, "eta": "60:08:09", "grad_norm": 0.0054, "loss": 0.0504, "lr": "2.964e-05", "step": 7445, "steps": "23.66s,7445/16595" }, { "epoch": 2.2434468213317262, "eta": "61:11:47", "grad_norm": 0.0055, "loss": 0.0436, "lr": "2.964e-05", "step": 7446, "steps": "24.08s,7446/16595" }, { "epoch": 2.2437481169026814, "eta": "60:51:34", "grad_norm": 0.0044, "loss": 0.0558, "lr": "2.964e-05", "step": 7447, "steps": "23.95s,7447/16595" }, { "epoch": 2.2440494124736365, "eta": "60:49:39", "grad_norm": 0.0054, "loss": 0.0439, "lr": "2.963e-05", "step": 7448, "steps": "23.94s,7448/16595" }, { "epoch": 2.2443507080445917, "eta": "60:23:20", "grad_norm": 0.0052, "loss": 0.0538, "lr": "2.963e-05", "step": 7449, "steps": "23.77s,7449/16595" }, { "epoch": 2.244652003615547, "eta": "60:42:45", "grad_norm": 0.0057, "loss": 0.049, "lr": "2.962e-05", "step": 7450, "steps": "23.9s,7450/16595" }, { "epoch": 2.244953299186502, "eta": "60:54:33", "grad_norm": 0.0046, "loss": 0.051, "lr": "2.962e-05", "step": 7451, "steps": "23.98s,7451/16595" }, { "epoch": 2.245254594757457, "eta": "60:11:29", "grad_norm": 0.0055, "loss": 0.0546, "lr": "2.961e-05", "step": 7452, "steps": "23.7s,7452/16595" }, { "epoch": 2.2455558903284123, "eta": "59:43:39", "grad_norm": 0.0054, "loss": 0.0587, "lr": "2.961e-05", "step": 7453, "steps": "23.52s,7453/16595" }, { "epoch": 2.2458571858993674, "eta": "60:45:44", "grad_norm": 0.0053, "loss": 0.0563, "lr": "2.960e-05", "step": 7454, "steps": "23.93s,7454/16595" }, { "epoch": 2.2461584814703226, "eta": "61:14:16", "grad_norm": 0.007, "loss": 0.0457, "lr": "2.960e-05", "step": 7455, "steps": "24.12s,7455/16595" }, { "epoch": 2.2464597770412773, "eta": "60:28:10", "grad_norm": 0.0063, "loss": 0.0661, "lr": "2.959e-05", "step": 7456, "steps": "23.82s,7456/16595" }, { "epoch": 2.2467610726122325, "eta": "60:47:35", "grad_norm": 0.0066, "loss": 0.0522, "lr": "2.959e-05", "step": 7457, "steps": "23.95s,7457/16595" }, { "epoch": 2.2470623681831876, "eta": "60:04:32", "grad_norm": 0.007, "loss": 0.0433, "lr": "2.958e-05", "step": 7458, "steps": "23.67s,7458/16595" }, { "epoch": 2.2473636637541428, "eta": "59:58:03", "grad_norm": 0.005, "loss": 0.0622, "lr": "2.958e-05", "step": 7459, "steps": "23.63s,7459/16595" }, { "epoch": 2.247664959325098, "eta": "60:52:28", "grad_norm": 0.0056, "loss": 0.0529, "lr": "2.957e-05", "step": 7460, "steps": "23.99s,7460/16595" }, { "epoch": 2.247966254896053, "eta": "60:12:29", "grad_norm": 0.0048, "loss": 0.0509, "lr": "2.957e-05", "step": 7461, "steps": "23.73s,7461/16595" }, { "epoch": 2.248267550467008, "eta": "60:19:42", "grad_norm": 0.0045, "loss": 0.0625, "lr": "2.956e-05", "step": 7462, "steps": "23.78s,7462/16595" }, { "epoch": 2.2485688460379634, "eta": "61:08:01", "grad_norm": 0.0058, "loss": 0.0539, "lr": "2.956e-05", "step": 7463, "steps": "24.1s,7463/16595" }, { "epoch": 2.2488701416089185, "eta": "59:40:52", "grad_norm": 0.005, "loss": 0.042, "lr": "2.956e-05", "step": 7464, "steps": "23.53s,7464/16595" }, { "epoch": 2.2491714371798732, "eta": "60:03:18", "grad_norm": 0.0049, "loss": 0.0673, "lr": "2.955e-05", "step": 7465, "steps": "23.68s,7465/16595" }, { "epoch": 2.2494727327508284, "eta": "60:25:44", "grad_norm": 0.004, "loss": 0.0418, "lr": "2.955e-05", "step": 7466, "steps": "23.83s,7466/16595" }, { "epoch": 2.2497740283217835, "eta": "60:16:12", "grad_norm": 0.0057, "loss": 0.0497, "lr": "2.954e-05", "step": 7467, "steps": "23.77s,7467/16595" }, { "epoch": 2.2500753238927387, "eta": "59:21:03", "grad_norm": 0.0053, "loss": 0.056, "lr": "2.954e-05", "step": 7468, "steps": "23.41s,7468/16595" }, { "epoch": 2.250376619463694, "eta": "60:26:03", "grad_norm": 0.0062, "loss": 0.0521, "lr": "2.953e-05", "step": 7469, "steps": "23.84s,7469/16595" }, { "epoch": 2.250677915034649, "eta": "60:02:51", "grad_norm": 0.006, "loss": 0.0639, "lr": "2.953e-05", "step": 7470, "steps": "23.69s,7470/16595" }, { "epoch": 2.250979210605604, "eta": "59:50:17", "grad_norm": 0.0045, "loss": 0.0561, "lr": "2.952e-05", "step": 7471, "steps": "23.61s,7471/16595" }, { "epoch": 2.2512805061765593, "eta": "60:02:03", "grad_norm": 0.0068, "loss": 0.0512, "lr": "2.952e-05", "step": 7472, "steps": "23.69s,7472/16595" }, { "epoch": 2.2515818017475144, "eta": "60:32:04", "grad_norm": 0.0046, "loss": 0.0447, "lr": "2.951e-05", "step": 7473, "steps": "23.89s,7473/16595" }, { "epoch": 2.2518830973184696, "eta": "59:27:49", "grad_norm": 0.0055, "loss": 0.0488, "lr": "2.951e-05", "step": 7474, "steps": "23.47s,7474/16595" }, { "epoch": 2.2521843928894247, "eta": "59:36:33", "grad_norm": 0.006, "loss": 0.0362, "lr": "2.950e-05", "step": 7475, "steps": "23.53s,7475/16595" }, { "epoch": 2.2524856884603794, "eta": "59:36:10", "grad_norm": 0.0053, "loss": 0.0314, "lr": "2.950e-05", "step": 7476, "steps": "23.53s,7476/16595" }, { "epoch": 2.2527869840313346, "eta": "60:07:41", "grad_norm": 0.0065, "loss": 0.0365, "lr": "2.949e-05", "step": 7477, "steps": "23.74s,7477/16595" }, { "epoch": 2.2530882796022897, "eta": "60:10:19", "grad_norm": 0.0053, "loss": 0.0521, "lr": "2.949e-05", "step": 7478, "steps": "23.76s,7478/16595" }, { "epoch": 2.253389575173245, "eta": "59:53:13", "grad_norm": 0.0052, "loss": 0.0541, "lr": "2.949e-05", "step": 7479, "steps": "23.65s,7479/16595" }, { "epoch": 2.2536908707442, "eta": "60:20:10", "grad_norm": 0.0049, "loss": 0.0664, "lr": "2.948e-05", "step": 7480, "steps": "23.83s,7480/16595" }, { "epoch": 2.253992166315155, "eta": "61:11:25", "grad_norm": 0.0062, "loss": 0.056, "lr": "2.948e-05", "step": 7481, "steps": "24.17s,7481/16595" }, { "epoch": 2.2542934618861104, "eta": "60:26:58", "grad_norm": 0.0053, "loss": 0.0536, "lr": "2.947e-05", "step": 7482, "steps": "23.88s,7482/16595" }, { "epoch": 2.2545947574570655, "eta": "60:34:10", "grad_norm": 0.0048, "loss": 0.0581, "lr": "2.947e-05", "step": 7483, "steps": "23.93s,7483/16595" }, { "epoch": 2.2548960530280207, "eta": "60:30:44", "grad_norm": 0.005, "loss": 0.0544, "lr": "2.946e-05", "step": 7484, "steps": "23.91s,7484/16595" }, { "epoch": 2.2551973485989754, "eta": "60:19:42", "grad_norm": 0.0052, "loss": 0.0497, "lr": "2.946e-05", "step": 7485, "steps": "23.84s,7485/16595" }, { "epoch": 2.2554986441699305, "eta": "61:06:22", "grad_norm": 0.0086, "loss": 0.0492, "lr": "2.945e-05", "step": 7486, "steps": "24.15s,7486/16595" }, { "epoch": 2.2557999397408857, "eta": "60:55:20", "grad_norm": 0.0056, "loss": 0.0513, "lr": "2.945e-05", "step": 7487, "steps": "24.08s,7487/16595" }, { "epoch": 2.256101235311841, "eta": "60:21:33", "grad_norm": 0.0057, "loss": 0.045, "lr": "2.944e-05", "step": 7488, "steps": "23.86s,7488/16595" }, { "epoch": 2.256402530882796, "eta": "61:00:36", "grad_norm": 0.006, "loss": 0.043, "lr": "2.944e-05", "step": 7489, "steps": "24.12s,7489/16595" }, { "epoch": 2.256703826453751, "eta": "60:05:34", "grad_norm": 0.0063, "loss": 0.0439, "lr": "2.943e-05", "step": 7490, "steps": "23.76s,7490/16595" }, { "epoch": 2.2570051220247063, "eta": "59:50:00", "grad_norm": 0.0069, "loss": 0.0425, "lr": "2.943e-05", "step": 7491, "steps": "23.66s,7491/16595" }, { "epoch": 2.2573064175956614, "eta": "60:39:40", "grad_norm": 0.005, "loss": 0.0406, "lr": "2.942e-05", "step": 7492, "steps": "23.99s,7492/16595" }, { "epoch": 2.2576077131666166, "eta": "60:04:23", "grad_norm": 0.0049, "loss": 0.055, "lr": "2.942e-05", "step": 7493, "steps": "23.76s,7493/16595" }, { "epoch": 2.2579090087375717, "eta": "60:03:59", "grad_norm": 0.0057, "loss": 0.0456, "lr": "2.941e-05", "step": 7494, "steps": "23.76s,7494/16595" }, { "epoch": 2.258210304308527, "eta": "59:16:35", "grad_norm": 0.0049, "loss": 0.0553, "lr": "2.941e-05", "step": 7495, "steps": "23.45s,7495/16595" }, { "epoch": 2.2585115998794816, "eta": "59:11:38", "grad_norm": 0.0047, "loss": 0.0541, "lr": "2.941e-05", "step": 7496, "steps": "23.42s,7496/16595" }, { "epoch": 2.2588128954504367, "eta": "59:55:13", "grad_norm": 0.0052, "loss": 0.0497, "lr": "2.940e-05", "step": 7497, "steps": "23.71s,7497/16595" }, { "epoch": 2.259114191021392, "eta": "60:22:07", "grad_norm": 0.0047, "loss": 0.0364, "lr": "2.940e-05", "step": 7498, "steps": "23.89s,7498/16595" }, { "epoch": 2.259415486592347, "eta": "60:32:20", "grad_norm": 0.0046, "loss": 0.0436, "lr": "2.939e-05", "step": 7499, "steps": "23.96s,7499/16595" }, { "epoch": 2.259716782163302, "eta": "59:19:10", "grad_norm": 0.0051, "loss": 0.048, "lr": "2.939e-05", "step": 7500, "steps": "23.48s,7500/16595" }, { "epoch": 2.2600180777342573, "eta": "60:13:20", "grad_norm": 0.005, "loss": 0.0493, "lr": "2.938e-05", "step": 7501, "steps": "23.84s,7501/16595" }, { "epoch": 2.2603193733052125, "eta": "59:42:38", "grad_norm": 0.0059, "loss": 0.0498, "lr": "2.938e-05", "step": 7502, "steps": "23.64s,7502/16595" }, { "epoch": 2.2606206688761676, "eta": "61:07:06", "grad_norm": 0.0046, "loss": 0.058, "lr": "2.937e-05", "step": 7503, "steps": "24.2s,7503/16595" }, { "epoch": 2.260921964447123, "eta": "60:13:40", "grad_norm": 0.0085, "loss": 0.0612, "lr": "2.937e-05", "step": 7504, "steps": "23.85s,7504/16595" }, { "epoch": 2.2612232600180775, "eta": "60:28:25", "grad_norm": 0.0049, "loss": 0.0403, "lr": "2.936e-05", "step": 7505, "steps": "23.95s,7505/16595" }, { "epoch": 2.2615245555890326, "eta": "60:08:19", "grad_norm": 0.0058, "loss": 0.054, "lr": "2.936e-05", "step": 7506, "steps": "23.82s,7506/16595" }, { "epoch": 2.261825851159988, "eta": "60:32:10", "grad_norm": 0.0051, "loss": 0.0479, "lr": "2.935e-05", "step": 7507, "steps": "23.98s,7507/16595" }, { "epoch": 2.262127146730943, "eta": "60:12:04", "grad_norm": 0.0081, "loss": 0.0579, "lr": "2.935e-05", "step": 7508, "steps": "23.85s,7508/16595" }, { "epoch": 2.262428442301898, "eta": "60:32:53", "grad_norm": 0.0044, "loss": 0.0473, "lr": "2.934e-05", "step": 7509, "steps": "23.99s,7509/16595" }, { "epoch": 2.2627297378728533, "eta": "59:21:19", "grad_norm": 0.0052, "loss": 0.0558, "lr": "2.934e-05", "step": 7510, "steps": "23.52s,7510/16595" }, { "epoch": 2.2630310334438084, "eta": "60:41:10", "grad_norm": 0.0058, "loss": 0.0551, "lr": "2.933e-05", "step": 7511, "steps": "24.05s,7511/16595" }, { "epoch": 2.2633323290147636, "eta": "59:55:21", "grad_norm": 0.0054, "loss": 0.0481, "lr": "2.933e-05", "step": 7512, "steps": "23.75s,7512/16595" }, { "epoch": 2.2636336245857187, "eta": "60:23:43", "grad_norm": 0.0051, "loss": 0.0639, "lr": "2.933e-05", "step": 7513, "steps": "23.94s,7513/16595" }, { "epoch": 2.263934920156674, "eta": "59:48:30", "grad_norm": 0.0053, "loss": 0.0471, "lr": "2.932e-05", "step": 7514, "steps": "23.71s,7514/16595" }, { "epoch": 2.264236215727629, "eta": "59:36:00", "grad_norm": 0.0147, "loss": 0.0503, "lr": "2.932e-05", "step": 7515, "steps": "23.63s,7515/16595" }, { "epoch": 2.2645375112985837, "eta": "59:47:43", "grad_norm": 0.006, "loss": 0.0467, "lr": "2.931e-05", "step": 7516, "steps": "23.71s,7516/16595" }, { "epoch": 2.264838806869539, "eta": "60:03:57", "grad_norm": 0.0041, "loss": 0.054, "lr": "2.931e-05", "step": 7517, "steps": "23.82s,7517/16595" }, { "epoch": 2.265140102440494, "eta": "59:12:07", "grad_norm": 0.0067, "loss": 0.0638, "lr": "2.930e-05", "step": 7518, "steps": "23.48s,7518/16595" }, { "epoch": 2.265441398011449, "eta": "59:34:25", "grad_norm": 0.0058, "loss": 0.0474, "lr": "2.930e-05", "step": 7519, "steps": "23.63s,7519/16595" }, { "epoch": 2.2657426935824043, "eta": "59:40:05", "grad_norm": 0.0072, "loss": 0.0539, "lr": "2.929e-05", "step": 7520, "steps": "23.67s,7520/16595" }, { "epoch": 2.2660439891533595, "eta": "59:59:21", "grad_norm": 0.005, "loss": 0.056, "lr": "2.929e-05", "step": 7521, "steps": "23.8s,7521/16595" }, { "epoch": 2.2663452847243146, "eta": "60:27:41", "grad_norm": 0.0053, "loss": 0.0387, "lr": "2.928e-05", "step": 7522, "steps": "23.99s,7522/16595" }, { "epoch": 2.2666465802952698, "eta": "59:51:00", "grad_norm": 0.0045, "loss": 0.0549, "lr": "2.928e-05", "step": 7523, "steps": "23.75s,7523/16595" }, { "epoch": 2.266947875866225, "eta": "61:00:08", "grad_norm": 0.0047, "loss": 0.0508, "lr": "2.927e-05", "step": 7524, "steps": "24.21s,7524/16595" }, { "epoch": 2.2672491714371796, "eta": "59:56:15", "grad_norm": 0.0049, "loss": 0.0367, "lr": "2.927e-05", "step": 7525, "steps": "23.79s,7525/16595" }, { "epoch": 2.2675504670081352, "eta": "61:05:23", "grad_norm": 0.0071, "loss": 0.0568, "lr": "2.926e-05", "step": 7526, "steps": "24.25s,7526/16595" }, { "epoch": 2.26785176257909, "eta": "59:52:26", "grad_norm": 0.005, "loss": 0.0591, "lr": "2.926e-05", "step": 7527, "steps": "23.77s,7527/16595" }, { "epoch": 2.268153058150045, "eta": "60:01:06", "grad_norm": 0.0068, "loss": 0.0331, "lr": "2.926e-05", "step": 7528, "steps": "23.83s,7528/16595" }, { "epoch": 2.2684543537210002, "eta": "60:05:14", "grad_norm": 0.0056, "loss": 0.054, "lr": "2.925e-05", "step": 7529, "steps": "23.86s,7529/16595" }, { "epoch": 2.2687556492919554, "eta": "60:42:37", "grad_norm": 0.0058, "loss": 0.0504, "lr": "2.925e-05", "step": 7530, "steps": "24.11s,7530/16595" }, { "epoch": 2.2690569448629105, "eta": "60:01:25", "grad_norm": 0.0045, "loss": 0.0483, "lr": "2.924e-05", "step": 7531, "steps": "23.84s,7531/16595" }, { "epoch": 2.2693582404338657, "eta": "59:47:26", "grad_norm": 0.005, "loss": 0.0633, "lr": "2.924e-05", "step": 7532, "steps": "23.75s,7532/16595" }, { "epoch": 2.269659536004821, "eta": "60:42:55", "grad_norm": 0.0063, "loss": 0.0552, "lr": "2.923e-05", "step": 7533, "steps": "24.12s,7533/16595" }, { "epoch": 2.269960831575776, "eta": "60:53:05", "grad_norm": 0.0069, "loss": 0.0573, "lr": "2.923e-05", "step": 7534, "steps": "24.19s,7534/16595" }, { "epoch": 2.270262127146731, "eta": "60:58:43", "grad_norm": 0.0056, "loss": 0.0392, "lr": "2.922e-05", "step": 7535, "steps": "24.23s,7535/16595" }, { "epoch": 2.270563422717686, "eta": "61:01:20", "grad_norm": 0.0057, "loss": 0.0528, "lr": "2.922e-05", "step": 7536, "steps": "24.25s,7536/16595" }, { "epoch": 2.270864718288641, "eta": "60:30:44", "grad_norm": 0.0059, "loss": 0.0512, "lr": "2.921e-05", "step": 7537, "steps": "24.05s,7537/16595" }, { "epoch": 2.271166013859596, "eta": "60:07:42", "grad_norm": 0.0047, "loss": 0.0474, "lr": "2.921e-05", "step": 7538, "steps": "23.9s,7538/16595" }, { "epoch": 2.2714673094305513, "eta": "60:51:04", "grad_norm": 0.0056, "loss": 0.0531, "lr": "2.920e-05", "step": 7539, "steps": "24.19s,7539/16595" }, { "epoch": 2.2717686050015065, "eta": "59:47:17", "grad_norm": 0.0065, "loss": 0.0498, "lr": "2.920e-05", "step": 7540, "steps": "23.77s,7540/16595" }, { "epoch": 2.2720699005724616, "eta": "60:08:01", "grad_norm": 0.0049, "loss": 0.047, "lr": "2.919e-05", "step": 7541, "steps": "23.91s,7541/16595" }, { "epoch": 2.2723711961434168, "eta": "60:37:47", "grad_norm": 0.0048, "loss": 0.0553, "lr": "2.919e-05", "step": 7542, "steps": "24.11s,7542/16595" }, { "epoch": 2.272672491714372, "eta": "59:49:07", "grad_norm": 0.0053, "loss": 0.0477, "lr": "2.918e-05", "step": 7543, "steps": "23.79s,7543/16595" }, { "epoch": 2.272973787285327, "eta": "59:53:14", "grad_norm": 0.0053, "loss": 0.0712, "lr": "2.918e-05", "step": 7544, "steps": "23.82s,7544/16595" }, { "epoch": 2.2732750828562818, "eta": "60:45:38", "grad_norm": 0.0063, "loss": 0.0419, "lr": "2.918e-05", "step": 7545, "steps": "24.17s,7545/16595" }, { "epoch": 2.2735763784272374, "eta": "59:59:59", "grad_norm": 0.0053, "loss": 0.0514, "lr": "2.917e-05", "step": 7546, "steps": "23.87s,7546/16595" }, { "epoch": 2.273877673998192, "eta": "59:56:34", "grad_norm": 0.0056, "loss": 0.047, "lr": "2.917e-05", "step": 7547, "steps": "23.85s,7547/16595" }, { "epoch": 2.2741789695691472, "eta": "59:59:11", "grad_norm": 0.0065, "loss": 0.0508, "lr": "2.916e-05", "step": 7548, "steps": "23.87s,7548/16595" }, { "epoch": 2.2744802651401024, "eta": "59:40:42", "grad_norm": 0.0048, "loss": 0.0553, "lr": "2.916e-05", "step": 7549, "steps": "23.75s,7549/16595" }, { "epoch": 2.2747815607110575, "eta": "60:08:57", "grad_norm": 0.0049, "loss": 0.0624, "lr": "2.915e-05", "step": 7550, "steps": "23.94s,7550/16595" }, { "epoch": 2.2750828562820127, "eta": "59:58:00", "grad_norm": 0.0045, "loss": 0.0503, "lr": "2.915e-05", "step": 7551, "steps": "23.87s,7551/16595" }, { "epoch": 2.275384151852968, "eta": "59:51:34", "grad_norm": 0.0056, "loss": 0.054, "lr": "2.914e-05", "step": 7552, "steps": "23.83s,7552/16595" }, { "epoch": 2.275685447423923, "eta": "60:06:15", "grad_norm": 0.005, "loss": 0.0595, "lr": "2.914e-05", "step": 7553, "steps": "23.93s,7553/16595" }, { "epoch": 2.275986742994878, "eta": "59:47:46", "grad_norm": 0.0049, "loss": 0.0598, "lr": "2.913e-05", "step": 7554, "steps": "23.81s,7554/16595" }, { "epoch": 2.2762880385658333, "eta": "59:20:15", "grad_norm": 0.0047, "loss": 0.0541, "lr": "2.913e-05", "step": 7555, "steps": "23.63s,7555/16595" }, { "epoch": 2.276589334136788, "eta": "59:18:21", "grad_norm": 0.0063, "loss": 0.0434, "lr": "2.912e-05", "step": 7556, "steps": "23.62s,7556/16595" }, { "epoch": 2.276890629707743, "eta": "59:49:35", "grad_norm": 0.0052, "loss": 0.0553, "lr": "2.912e-05", "step": 7557, "steps": "23.83s,7557/16595" }, { "epoch": 2.2771919252786983, "eta": "59:50:42", "grad_norm": 0.0069, "loss": 0.0532, "lr": "2.911e-05", "step": 7558, "steps": "23.84s,7558/16595" }, { "epoch": 2.2774932208496534, "eta": "59:45:47", "grad_norm": 0.0047, "loss": 0.0421, "lr": "2.911e-05", "step": 7559, "steps": "23.81s,7559/16595" }, { "epoch": 2.2777945164206086, "eta": "59:16:46", "grad_norm": 0.0049, "loss": 0.0658, "lr": "2.910e-05", "step": 7560, "steps": "23.62s,7560/16595" }, { "epoch": 2.2780958119915637, "eta": "59:23:54", "grad_norm": 0.0092, "loss": 0.0427, "lr": "2.910e-05", "step": 7561, "steps": "23.67s,7561/16595" }, { "epoch": 2.278397107562519, "eta": "59:43:05", "grad_norm": 0.0053, "loss": 0.0532, "lr": "2.910e-05", "step": 7562, "steps": "23.8s,7562/16595" }, { "epoch": 2.278698403133474, "eta": "59:44:11", "grad_norm": 0.0057, "loss": 0.0411, "lr": "2.909e-05", "step": 7563, "steps": "23.81s,7563/16595" }, { "epoch": 2.278999698704429, "eta": "60:27:27", "grad_norm": 0.0068, "loss": 0.0486, "lr": "2.909e-05", "step": 7564, "steps": "24.1s,7564/16595" }, { "epoch": 2.279300994275384, "eta": "59:10:17", "grad_norm": 0.0055, "loss": 0.0665, "lr": "2.908e-05", "step": 7565, "steps": "23.59s,7565/16595" }, { "epoch": 2.2796022898463395, "eta": "58:42:48", "grad_norm": 0.0059, "loss": 0.0525, "lr": "2.908e-05", "step": 7566, "steps": "23.41s,7566/16595" }, { "epoch": 2.279903585417294, "eta": "59:41:06", "grad_norm": 0.0054, "loss": 0.0668, "lr": "2.907e-05", "step": 7567, "steps": "23.8s,7567/16595" }, { "epoch": 2.2802048809882494, "eta": "60:06:17", "grad_norm": 0.0062, "loss": 0.0626, "lr": "2.907e-05", "step": 7568, "steps": "23.97s,7568/16595" }, { "epoch": 2.2805061765592045, "eta": "60:51:01", "grad_norm": 0.0055, "loss": 0.0509, "lr": "2.906e-05", "step": 7569, "steps": "24.27s,7569/16595" }, { "epoch": 2.2808074721301597, "eta": "59:33:54", "grad_norm": 0.0051, "loss": 0.0632, "lr": "2.906e-05", "step": 7570, "steps": "23.76s,7570/16595" }, { "epoch": 2.281108767701115, "eta": "59:48:32", "grad_norm": 0.0067, "loss": 0.0488, "lr": "2.905e-05", "step": 7571, "steps": "23.86s,7571/16595" }, { "epoch": 2.28141006327207, "eta": "60:27:14", "grad_norm": 0.0046, "loss": 0.0495, "lr": "2.905e-05", "step": 7572, "steps": "24.12s,7572/16595" }, { "epoch": 2.281711358843025, "eta": "59:56:46", "grad_norm": 0.0058, "loss": 0.0556, "lr": "2.904e-05", "step": 7573, "steps": "23.92s,7573/16595" }, { "epoch": 2.2820126544139803, "eta": "59:41:20", "grad_norm": 0.006, "loss": 0.061, "lr": "2.904e-05", "step": 7574, "steps": "23.82s,7574/16595" }, { "epoch": 2.2823139499849354, "eta": "59:43:56", "grad_norm": 0.0045, "loss": 0.0502, "lr": "2.903e-05", "step": 7575, "steps": "23.84s,7575/16595" }, { "epoch": 2.28261524555589, "eta": "59:39:02", "grad_norm": 0.0072, "loss": 0.0603, "lr": "2.903e-05", "step": 7576, "steps": "23.81s,7576/16595" }, { "epoch": 2.2829165411268453, "eta": "60:31:14", "grad_norm": 0.0081, "loss": 0.0339, "lr": "2.902e-05", "step": 7577, "steps": "24.16s,7577/16595" }, { "epoch": 2.2832178366978004, "eta": "60:30:50", "grad_norm": 0.0058, "loss": 0.038, "lr": "2.902e-05", "step": 7578, "steps": "24.16s,7578/16595" }, { "epoch": 2.2835191322687556, "eta": "59:30:20", "grad_norm": 0.0049, "loss": 0.046, "lr": "2.902e-05", "step": 7579, "steps": "23.76s,7579/16595" }, { "epoch": 2.2838204278397107, "eta": "60:22:31", "grad_norm": 0.006, "loss": 0.0578, "lr": "2.901e-05", "step": 7580, "steps": "24.11s,7580/16595" }, { "epoch": 2.284121723410666, "eta": "60:25:07", "grad_norm": 0.0068, "loss": 0.0365, "lr": "2.901e-05", "step": 7581, "steps": "24.13s,7581/16595" }, { "epoch": 2.284423018981621, "eta": "59:35:09", "grad_norm": 0.0055, "loss": 0.057, "lr": "2.900e-05", "step": 7582, "steps": "23.8s,7582/16595" }, { "epoch": 2.284724314552576, "eta": "60:30:20", "grad_norm": 0.0053, "loss": 0.0514, "lr": "2.900e-05", "step": 7583, "steps": "24.17s,7583/16595" }, { "epoch": 2.2850256101235313, "eta": "59:32:51", "grad_norm": 0.0056, "loss": 0.0573, "lr": "2.899e-05", "step": 7584, "steps": "23.79s,7584/16595" }, { "epoch": 2.285326905694486, "eta": "59:53:29", "grad_norm": 0.0052, "loss": 0.0492, "lr": "2.899e-05", "step": 7585, "steps": "23.93s,7585/16595" }, { "epoch": 2.2856282012654416, "eta": "59:39:34", "grad_norm": 0.0056, "loss": 0.0516, "lr": "2.898e-05", "step": 7586, "steps": "23.84s,7586/16595" }, { "epoch": 2.2859294968363963, "eta": "59:39:10", "grad_norm": 0.0064, "loss": 0.0543, "lr": "2.898e-05", "step": 7587, "steps": "23.84s,7587/16595" }, { "epoch": 2.2862307924073515, "eta": "59:29:46", "grad_norm": 0.0057, "loss": 0.0638, "lr": "2.897e-05", "step": 7588, "steps": "23.78s,7588/16595" }, { "epoch": 2.2865320879783066, "eta": "59:42:53", "grad_norm": 0.0062, "loss": 0.0581, "lr": "2.897e-05", "step": 7589, "steps": "23.87s,7589/16595" }, { "epoch": 2.286833383549262, "eta": "58:51:27", "grad_norm": 0.005, "loss": 0.0523, "lr": "2.896e-05", "step": 7590, "steps": "23.53s,7590/16595" }, { "epoch": 2.287134679120217, "eta": "60:33:06", "grad_norm": 0.0065, "loss": 0.0397, "lr": "2.896e-05", "step": 7591, "steps": "24.21s,7591/16595" }, { "epoch": 2.287435974691172, "eta": "58:58:10", "grad_norm": 0.0065, "loss": 0.0465, "lr": "2.895e-05", "step": 7592, "steps": "23.58s,7592/16595" }, { "epoch": 2.2877372702621273, "eta": "59:38:17", "grad_norm": 0.006, "loss": 0.0656, "lr": "2.895e-05", "step": 7593, "steps": "23.85s,7593/16595" }, { "epoch": 2.2880385658330824, "eta": "59:27:23", "grad_norm": 0.0054, "loss": 0.0518, "lr": "2.894e-05", "step": 7594, "steps": "23.78s,7594/16595" }, { "epoch": 2.2883398614040376, "eta": "59:42:00", "grad_norm": 0.0043, "loss": 0.0483, "lr": "2.894e-05", "step": 7595, "steps": "23.88s,7595/16595" }, { "epoch": 2.2886411569749923, "eta": "59:40:06", "grad_norm": 0.0055, "loss": 0.0516, "lr": "2.894e-05", "step": 7596, "steps": "23.87s,7596/16595" }, { "epoch": 2.2889424525459474, "eta": "59:30:42", "grad_norm": 0.0059, "loss": 0.0572, "lr": "2.893e-05", "step": 7597, "steps": "23.81s,7597/16595" }, { "epoch": 2.2892437481169026, "eta": "59:31:48", "grad_norm": 0.0048, "loss": 0.0617, "lr": "2.893e-05", "step": 7598, "steps": "23.82s,7598/16595" }, { "epoch": 2.2895450436878577, "eta": "59:38:54", "grad_norm": 0.0054, "loss": 0.0574, "lr": "2.892e-05", "step": 7599, "steps": "23.87s,7599/16595" }, { "epoch": 2.289846339258813, "eta": "59:02:31", "grad_norm": 0.0045, "loss": 0.0587, "lr": "2.892e-05", "step": 7600, "steps": "23.63s,7600/16595" }, { "epoch": 2.290147634829768, "eta": "128:18:51", "grad_norm": 0.0049, "loss": 0.0426, "lr": "2.891e-05", "step": 7601, "steps": "51.36s,7601/16595" }, { "epoch": 2.290448930400723, "eta": "59:33:13", "grad_norm": 0.006, "loss": 0.0525, "lr": "2.891e-05", "step": 7602, "steps": "23.84s,7602/16595" }, { "epoch": 2.2907502259716783, "eta": "60:17:46", "grad_norm": 0.0048, "loss": 0.051, "lr": "2.890e-05", "step": 7603, "steps": "24.14s,7603/16595" }, { "epoch": 2.2910515215426335, "eta": "60:29:22", "grad_norm": 0.0058, "loss": 0.0489, "lr": "2.890e-05", "step": 7604, "steps": "24.22s,7604/16595" }, { "epoch": 2.291352817113588, "eta": "60:06:29", "grad_norm": 0.0064, "loss": 0.0491, "lr": "2.889e-05", "step": 7605, "steps": "24.07s,7605/16595" }, { "epoch": 2.2916541126845438, "eta": "58:55:40", "grad_norm": 0.0071, "loss": 0.0503, "lr": "2.889e-05", "step": 7606, "steps": "23.6s,7606/16595" }, { "epoch": 2.2919554082554985, "eta": "59:23:44", "grad_norm": 0.0054, "loss": 0.0409, "lr": "2.888e-05", "step": 7607, "steps": "23.79s,7607/16595" }, { "epoch": 2.2922567038264536, "eta": "59:39:49", "grad_norm": 0.0052, "loss": 0.0483, "lr": "2.888e-05", "step": 7608, "steps": "23.9s,7608/16595" }, { "epoch": 2.292557999397409, "eta": "59:27:26", "grad_norm": 0.0045, "loss": 0.0545, "lr": "2.887e-05", "step": 7609, "steps": "23.82s,7609/16595" }, { "epoch": 2.292859294968364, "eta": "60:11:58", "grad_norm": 0.0051, "loss": 0.0421, "lr": "2.887e-05", "step": 7610, "steps": "24.12s,7610/16595" }, { "epoch": 2.293160590539319, "eta": "59:19:09", "grad_norm": 0.0051, "loss": 0.0434, "lr": "2.886e-05", "step": 7611, "steps": "23.77s,7611/16595" }, { "epoch": 2.2934618861102742, "eta": "60:27:38", "grad_norm": 0.0045, "loss": 0.056, "lr": "2.886e-05", "step": 7612, "steps": "24.23s,7612/16595" }, { "epoch": 2.2937631816812294, "eta": "59:43:49", "grad_norm": 0.0054, "loss": 0.0516, "lr": "2.885e-05", "step": 7613, "steps": "23.94s,7613/16595" }, { "epoch": 2.2940644772521845, "eta": "59:34:26", "grad_norm": 0.0065, "loss": 0.0504, "lr": "2.885e-05", "step": 7614, "steps": "23.88s,7614/16595" }, { "epoch": 2.2943657728231397, "eta": "59:38:31", "grad_norm": 0.0051, "loss": 0.0635, "lr": "2.885e-05", "step": 7615, "steps": "23.91s,7615/16595" }, { "epoch": 2.2946670683940944, "eta": "59:51:36", "grad_norm": 0.006, "loss": 0.043, "lr": "2.884e-05", "step": 7616, "steps": "24.0s,7616/16595" }, { "epoch": 2.2949683639650496, "eta": "59:37:43", "grad_norm": 0.007, "loss": 0.0475, "lr": "2.884e-05", "step": 7617, "steps": "23.91s,7617/16595" }, { "epoch": 2.2952696595360047, "eta": "59:37:20", "grad_norm": 0.0061, "loss": 0.0448, "lr": "2.883e-05", "step": 7618, "steps": "23.91s,7618/16595" }, { "epoch": 2.29557095510696, "eta": "59:27:57", "grad_norm": 0.0042, "loss": 0.0573, "lr": "2.883e-05", "step": 7619, "steps": "23.85s,7619/16595" }, { "epoch": 2.295872250677915, "eta": "59:27:33", "grad_norm": 0.0062, "loss": 0.0444, "lr": "2.882e-05", "step": 7620, "steps": "23.85s,7620/16595" }, { "epoch": 2.29617354624887, "eta": "60:00:04", "grad_norm": 0.0057, "loss": 0.0603, "lr": "2.882e-05", "step": 7621, "steps": "24.07s,7621/16595" }, { "epoch": 2.2964748418198253, "eta": "60:19:06", "grad_norm": 0.0061, "loss": 0.0556, "lr": "2.881e-05", "step": 7622, "steps": "24.2s,7622/16595" }, { "epoch": 2.2967761373907805, "eta": "59:32:21", "grad_norm": 0.0057, "loss": 0.0591, "lr": "2.881e-05", "step": 7623, "steps": "23.89s,7623/16595" }, { "epoch": 2.2970774329617356, "eta": "59:15:30", "grad_norm": 0.0063, "loss": 0.0418, "lr": "2.880e-05", "step": 7624, "steps": "23.78s,7624/16595" }, { "epoch": 2.2973787285326908, "eta": "59:21:05", "grad_norm": 0.0053, "loss": 0.0489, "lr": "2.880e-05", "step": 7625, "steps": "23.82s,7625/16595" }, { "epoch": 2.297680024103646, "eta": "59:50:35", "grad_norm": 0.0071, "loss": 0.0538, "lr": "2.879e-05", "step": 7626, "steps": "24.02s,7626/16595" }, { "epoch": 2.2979813196746006, "eta": "59:24:46", "grad_norm": 0.0052, "loss": 0.0539, "lr": "2.879e-05", "step": 7627, "steps": "23.85s,7627/16595" }, { "epoch": 2.2982826152455558, "eta": "59:37:49", "grad_norm": 0.0056, "loss": 0.0377, "lr": "2.878e-05", "step": 7628, "steps": "23.94s,7628/16595" }, { "epoch": 2.298583910816511, "eta": "58:45:07", "grad_norm": 0.0057, "loss": 0.0637, "lr": "2.878e-05", "step": 7629, "steps": "23.59s,7629/16595" }, { "epoch": 2.298885206387466, "eta": "58:59:40", "grad_norm": 0.0049, "loss": 0.0472, "lr": "2.877e-05", "step": 7630, "steps": "23.69s,7630/16595" }, { "epoch": 2.2991865019584212, "eta": "59:03:46", "grad_norm": 0.0056, "loss": 0.0473, "lr": "2.877e-05", "step": 7631, "steps": "23.72s,7631/16595" }, { "epoch": 2.2994877975293764, "eta": "59:21:17", "grad_norm": 0.0069, "loss": 0.0525, "lr": "2.877e-05", "step": 7632, "steps": "23.84s,7632/16595" }, { "epoch": 2.2997890931003315, "eta": "60:23:38", "grad_norm": 0.0062, "loss": 0.057, "lr": "2.876e-05", "step": 7633, "steps": "24.26s,7633/16595" }, { "epoch": 2.3000903886712867, "eta": "58:59:35", "grad_norm": 0.0053, "loss": 0.0635, "lr": "2.876e-05", "step": 7634, "steps": "23.7s,7634/16595" }, { "epoch": 2.300391684242242, "eta": "59:49:58", "grad_norm": 0.0048, "loss": 0.0552, "lr": "2.875e-05", "step": 7635, "steps": "24.04s,7635/16595" }, { "epoch": 2.3006929798131965, "eta": "59:36:08", "grad_norm": 0.0055, "loss": 0.0371, "lr": "2.875e-05", "step": 7636, "steps": "23.95s,7636/16595" }, { "epoch": 2.3009942753841517, "eta": "59:08:51", "grad_norm": 0.0078, "loss": 0.0505, "lr": "2.874e-05", "step": 7637, "steps": "23.77s,7637/16595" }, { "epoch": 2.301295570955107, "eta": "60:30:34", "grad_norm": 0.0057, "loss": 0.0428, "lr": "2.874e-05", "step": 7638, "steps": "24.32s,7638/16595" }, { "epoch": 2.301596866526062, "eta": "59:20:00", "grad_norm": 0.0056, "loss": 0.0525, "lr": "2.873e-05", "step": 7639, "steps": "23.85s,7639/16595" }, { "epoch": 2.301898162097017, "eta": "60:01:24", "grad_norm": 0.0049, "loss": 0.0574, "lr": "2.873e-05", "step": 7640, "steps": "24.13s,7640/16595" }, { "epoch": 2.3021994576679723, "eta": "58:43:23", "grad_norm": 0.0055, "loss": 0.0535, "lr": "2.872e-05", "step": 7641, "steps": "23.61s,7641/16595" }, { "epoch": 2.3025007532389274, "eta": "59:27:46", "grad_norm": 0.005, "loss": 0.0625, "lr": "2.872e-05", "step": 7642, "steps": "23.91s,7642/16595" }, { "epoch": 2.3028020488098826, "eta": "58:57:31", "grad_norm": 0.0048, "loss": 0.0647, "lr": "2.871e-05", "step": 7643, "steps": "23.71s,7643/16595" }, { "epoch": 2.3031033443808377, "eta": "59:09:04", "grad_norm": 0.0052, "loss": 0.0622, "lr": "2.871e-05", "step": 7644, "steps": "23.79s,7644/16595" }, { "epoch": 2.303404639951793, "eta": "59:23:35", "grad_norm": 0.005, "loss": 0.0533, "lr": "2.870e-05", "step": 7645, "steps": "23.89s,7645/16595" }, { "epoch": 2.303705935522748, "eta": "59:20:12", "grad_norm": 0.0052, "loss": 0.0517, "lr": "2.870e-05", "step": 7646, "steps": "23.87s,7646/16595" }, { "epoch": 2.3040072310937028, "eta": "59:33:14", "grad_norm": 0.0058, "loss": 0.0432, "lr": "2.869e-05", "step": 7647, "steps": "23.96s,7647/16595" }, { "epoch": 2.304308526664658, "eta": "59:05:59", "grad_norm": 0.0047, "loss": 0.052, "lr": "2.869e-05", "step": 7648, "steps": "23.78s,7648/16595" }, { "epoch": 2.304609822235613, "eta": "58:13:24", "grad_norm": 0.0048, "loss": 0.0511, "lr": "2.869e-05", "step": 7649, "steps": "23.43s,7649/16595" }, { "epoch": 2.304911117806568, "eta": "59:55:53", "grad_norm": 0.0052, "loss": 0.0592, "lr": "2.868e-05", "step": 7650, "steps": "24.12s,7650/16595" }, { "epoch": 2.3052124133775234, "eta": "61:15:59", "grad_norm": 0.0056, "loss": 0.0507, "lr": "2.868e-05", "step": 7651, "steps": "24.66s,7651/16595" }, { "epoch": 2.3055137089484785, "eta": "59:16:19", "grad_norm": 0.0084, "loss": 0.0479, "lr": "2.867e-05", "step": 7652, "steps": "23.86s,7652/16595" }, { "epoch": 2.3058150045194337, "eta": "59:02:31", "grad_norm": 0.0056, "loss": 0.0492, "lr": "2.867e-05", "step": 7653, "steps": "23.77s,7653/16595" }, { "epoch": 2.306116300090389, "eta": "59:00:38", "grad_norm": 0.005, "loss": 0.049, "lr": "2.866e-05", "step": 7654, "steps": "23.76s,7654/16595" }, { "epoch": 2.306417595661344, "eta": "59:47:55", "grad_norm": 0.0062, "loss": 0.0648, "lr": "2.866e-05", "step": 7655, "steps": "24.08s,7655/16595" }, { "epoch": 2.3067188912322987, "eta": "58:59:50", "grad_norm": 0.0041, "loss": 0.0723, "lr": "2.865e-05", "step": 7656, "steps": "23.76s,7656/16595" }, { "epoch": 2.307020186803254, "eta": "58:43:03", "grad_norm": 0.0065, "loss": 0.0507, "lr": "2.865e-05", "step": 7657, "steps": "23.65s,7657/16595" }, { "epoch": 2.307321482374209, "eta": "59:43:44", "grad_norm": 0.0522, "loss": 0.0495, "lr": "2.864e-05", "step": 7658, "steps": "24.06s,7658/16595" }, { "epoch": 2.307622777945164, "eta": "59:43:20", "grad_norm": 0.0052, "loss": 0.0521, "lr": "2.864e-05", "step": 7659, "steps": "24.06s,7659/16595" }, { "epoch": 2.3079240735161193, "eta": "59:05:42", "grad_norm": 0.0074, "loss": 0.0396, "lr": "2.863e-05", "step": 7660, "steps": "23.81s,7660/16595" }, { "epoch": 2.3082253690870744, "eta": "59:08:17", "grad_norm": 0.0045, "loss": 0.0449, "lr": "2.863e-05", "step": 7661, "steps": "23.83s,7661/16595" }, { "epoch": 2.3085266646580296, "eta": "59:04:54", "grad_norm": 0.0067, "loss": 0.0401, "lr": "2.862e-05", "step": 7662, "steps": "23.81s,7662/16595" }, { "epoch": 2.3088279602289847, "eta": "58:07:56", "grad_norm": 0.0048, "loss": 0.0588, "lr": "2.862e-05", "step": 7663, "steps": "23.43s,7663/16595" }, { "epoch": 2.30912925579994, "eta": "59:21:58", "grad_norm": 0.0063, "loss": 0.0618, "lr": "2.861e-05", "step": 7664, "steps": "23.93s,7664/16595" }, { "epoch": 2.309430551370895, "eta": "59:14:08", "grad_norm": 0.0057, "loss": 0.0462, "lr": "2.861e-05", "step": 7665, "steps": "23.88s,7665/16595" }, { "epoch": 2.30973184694185, "eta": "59:24:09", "grad_norm": 0.0057, "loss": 0.0406, "lr": "2.861e-05", "step": 7666, "steps": "23.95s,7666/16595" }, { "epoch": 2.310033142512805, "eta": "59:52:01", "grad_norm": 0.0052, "loss": 0.0366, "lr": "2.860e-05", "step": 7667, "steps": "24.14s,7667/16595" }, { "epoch": 2.31033443808376, "eta": "59:05:30", "grad_norm": 0.0056, "loss": 0.0567, "lr": "2.860e-05", "step": 7668, "steps": "23.83s,7668/16595" }, { "epoch": 2.310635733654715, "eta": "59:48:15", "grad_norm": 0.0046, "loss": 0.0526, "lr": "2.859e-05", "step": 7669, "steps": "24.12s,7669/16595" }, { "epoch": 2.3109370292256703, "eta": "59:12:09", "grad_norm": 0.0065, "loss": 0.0621, "lr": "2.859e-05", "step": 7670, "steps": "23.88s,7670/16595" }, { "epoch": 2.3112383247966255, "eta": "58:18:12", "grad_norm": 0.0049, "loss": 0.0426, "lr": "2.858e-05", "step": 7671, "steps": "23.52s,7671/16595" }, { "epoch": 2.3115396203675806, "eta": "59:08:22", "grad_norm": 0.0049, "loss": 0.0474, "lr": "2.858e-05", "step": 7672, "steps": "23.86s,7672/16595" }, { "epoch": 2.311840915938536, "eta": "58:17:25", "grad_norm": 0.0049, "loss": 0.0537, "lr": "2.857e-05", "step": 7673, "steps": "23.52s,7673/16595" }, { "epoch": 2.312142211509491, "eta": "58:42:18", "grad_norm": 0.005, "loss": 0.0638, "lr": "2.857e-05", "step": 7674, "steps": "23.69s,7674/16595" }, { "epoch": 2.312443507080446, "eta": "58:24:04", "grad_norm": 0.0051, "loss": 0.0496, "lr": "2.856e-05", "step": 7675, "steps": "23.57s,7675/16595" }, { "epoch": 2.312744802651401, "eta": "59:27:36", "grad_norm": 0.0073, "loss": 0.04, "lr": "2.856e-05", "step": 7676, "steps": "24.0s,7676/16595" }, { "epoch": 2.313046098222356, "eta": "58:29:13", "grad_norm": 0.0054, "loss": 0.0449, "lr": "2.855e-05", "step": 7677, "steps": "23.61s,7677/16595" }, { "epoch": 2.313347393793311, "eta": "59:05:59", "grad_norm": 0.0051, "loss": 0.0523, "lr": "2.855e-05", "step": 7678, "steps": "23.86s,7678/16595" }, { "epoch": 2.3136486893642663, "eta": "59:10:03", "grad_norm": 0.0051, "loss": 0.0462, "lr": "2.854e-05", "step": 7679, "steps": "23.89s,7679/16595" }, { "epoch": 2.3139499849352214, "eta": "59:51:15", "grad_norm": 0.0048, "loss": 0.042, "lr": "2.854e-05", "step": 7680, "steps": "24.17s,7680/16595" }, { "epoch": 2.3142512805061766, "eta": "59:25:36", "grad_norm": 0.005, "loss": 0.0483, "lr": "2.853e-05", "step": 7681, "steps": "24.0s,7681/16595" }, { "epoch": 2.3145525760771317, "eta": "58:43:36", "grad_norm": 0.0054, "loss": 0.0308, "lr": "2.853e-05", "step": 7682, "steps": "23.72s,7682/16595" }, { "epoch": 2.314853871648087, "eta": "58:37:16", "grad_norm": 0.0049, "loss": 0.0604, "lr": "2.852e-05", "step": 7683, "steps": "23.68s,7683/16595" }, { "epoch": 2.315155167219042, "eta": "59:08:03", "grad_norm": 0.0067, "loss": 0.0469, "lr": "2.852e-05", "step": 7684, "steps": "23.89s,7684/16595" }, { "epoch": 2.315456462789997, "eta": "58:58:45", "grad_norm": 0.0055, "loss": 0.0535, "lr": "2.852e-05", "step": 7685, "steps": "23.83s,7685/16595" }, { "epoch": 2.3157577583609523, "eta": "58:58:21", "grad_norm": 0.0062, "loss": 0.0375, "lr": "2.851e-05", "step": 7686, "steps": "23.83s,7686/16595" }, { "epoch": 2.316059053931907, "eta": "59:36:33", "grad_norm": 0.0066, "loss": 0.0556, "lr": "2.851e-05", "step": 7687, "steps": "24.09s,7687/16595" }, { "epoch": 2.316360349502862, "eta": "59:34:40", "grad_norm": 0.0087, "loss": 0.0673, "lr": "2.850e-05", "step": 7688, "steps": "24.08s,7688/16595" }, { "epoch": 2.3166616450738173, "eta": "59:47:38", "grad_norm": 0.0054, "loss": 0.0391, "lr": "2.850e-05", "step": 7689, "steps": "24.17s,7689/16595" }, { "epoch": 2.3169629406447725, "eta": "58:06:18", "grad_norm": 0.0056, "loss": 0.0666, "lr": "2.849e-05", "step": 7690, "steps": "23.49s,7690/16595" }, { "epoch": 2.3172642362157276, "eta": "58:51:55", "grad_norm": 0.0047, "loss": 0.0508, "lr": "2.849e-05", "step": 7691, "steps": "23.8s,7691/16595" }, { "epoch": 2.317565531786683, "eta": "58:07:00", "grad_norm": 0.005, "loss": 0.0618, "lr": "2.848e-05", "step": 7692, "steps": "23.5s,7692/16595" }, { "epoch": 2.317866827357638, "eta": "58:45:11", "grad_norm": 0.0058, "loss": 0.0579, "lr": "2.848e-05", "step": 7693, "steps": "23.76s,7693/16595" }, { "epoch": 2.318168122928593, "eta": "59:23:22", "grad_norm": 0.0055, "loss": 0.0557, "lr": "2.847e-05", "step": 7694, "steps": "24.02s,7694/16595" }, { "epoch": 2.3184694184995482, "eta": "58:25:07", "grad_norm": 0.0058, "loss": 0.0465, "lr": "2.847e-05", "step": 7695, "steps": "23.63s,7695/16595" }, { "epoch": 2.318770714070503, "eta": "59:22:33", "grad_norm": 0.0068, "loss": 0.0465, "lr": "2.846e-05", "step": 7696, "steps": "24.02s,7696/16595" }, { "epoch": 2.319072009641458, "eta": "58:46:34", "grad_norm": 0.0064, "loss": 0.0472, "lr": "2.846e-05", "step": 7697, "steps": "23.78s,7697/16595" }, { "epoch": 2.3193733052124132, "eta": "58:12:04", "grad_norm": 0.0061, "loss": 0.0517, "lr": "2.845e-05", "step": 7698, "steps": "23.55s,7698/16595" }, { "epoch": 2.3196746007833684, "eta": "58:23:32", "grad_norm": 0.0052, "loss": 0.059, "lr": "2.845e-05", "step": 7699, "steps": "23.63s,7699/16595" }, { "epoch": 2.3199758963543236, "eta": "58:26:06", "grad_norm": 0.005, "loss": 0.0478, "lr": "2.844e-05", "step": 7700, "steps": "23.65s,7700/16595" }, { "epoch": 2.3202771919252787, "eta": "59:53:10", "grad_norm": 0.0051, "loss": 0.0442, "lr": "2.844e-05", "step": 7701, "steps": "24.24s,7701/16595" }, { "epoch": 2.320578487496234, "eta": "58:17:54", "grad_norm": 0.005, "loss": 0.0568, "lr": "2.844e-05", "step": 7702, "steps": "23.6s,7702/16595" }, { "epoch": 2.320879783067189, "eta": "59:24:12", "grad_norm": 0.0057, "loss": 0.0516, "lr": "2.843e-05", "step": 7703, "steps": "24.05s,7703/16595" }, { "epoch": 2.321181078638144, "eta": "58:37:52", "grad_norm": 0.0079, "loss": 0.0395, "lr": "2.843e-05", "step": 7704, "steps": "23.74s,7704/16595" }, { "epoch": 2.3214823742090993, "eta": "59:21:55", "grad_norm": 0.0052, "loss": 0.0565, "lr": "2.842e-05", "step": 7705, "steps": "24.04s,7705/16595" }, { "epoch": 2.3217836697800545, "eta": "58:29:40", "grad_norm": 0.0049, "loss": 0.0568, "lr": "2.842e-05", "step": 7706, "steps": "23.69s,7706/16595" }, { "epoch": 2.322084965351009, "eta": "58:24:50", "grad_norm": 0.0059, "loss": 0.0574, "lr": "2.841e-05", "step": 7707, "steps": "23.66s,7707/16595" }, { "epoch": 2.3223862609219643, "eta": "58:40:43", "grad_norm": 0.0057, "loss": 0.0434, "lr": "2.841e-05", "step": 7708, "steps": "23.77s,7708/16595" }, { "epoch": 2.3226875564929195, "eta": "58:47:44", "grad_norm": 0.0059, "loss": 0.042, "lr": "2.840e-05", "step": 7709, "steps": "23.82s,7709/16595" }, { "epoch": 2.3229888520638746, "eta": "58:26:36", "grad_norm": 0.0059, "loss": 0.0379, "lr": "2.840e-05", "step": 7710, "steps": "23.68s,7710/16595" }, { "epoch": 2.3232901476348298, "eta": "59:13:36", "grad_norm": 0.0055, "loss": 0.0437, "lr": "2.839e-05", "step": 7711, "steps": "24.0s,7711/16595" }, { "epoch": 2.323591443205785, "eta": "58:34:42", "grad_norm": 0.0053, "loss": 0.045, "lr": "2.839e-05", "step": 7712, "steps": "23.74s,7712/16595" }, { "epoch": 2.32389273877674, "eta": "58:01:44", "grad_norm": 0.005, "loss": 0.0489, "lr": "2.838e-05", "step": 7713, "steps": "23.52s,7713/16595" }, { "epoch": 2.3241940343476952, "eta": "59:15:21", "grad_norm": 0.005, "loss": 0.0619, "lr": "2.838e-05", "step": 7714, "steps": "24.02s,7714/16595" }, { "epoch": 2.3244953299186504, "eta": "59:34:12", "grad_norm": 0.0052, "loss": 0.0588, "lr": "2.837e-05", "step": 7715, "steps": "24.15s,7715/16595" }, { "epoch": 2.324796625489605, "eta": "59:20:28", "grad_norm": 0.006, "loss": 0.0508, "lr": "2.837e-05", "step": 7716, "steps": "24.06s,7716/16595" }, { "epoch": 2.3250979210605602, "eta": "58:16:27", "grad_norm": 0.0053, "loss": 0.0448, "lr": "2.836e-05", "step": 7717, "steps": "23.63s,7717/16595" }, { "epoch": 2.3253992166315154, "eta": "59:12:16", "grad_norm": 0.0048, "loss": 0.0539, "lr": "2.836e-05", "step": 7718, "steps": "24.01s,7718/16595" }, { "epoch": 2.3257005122024705, "eta": "57:47:33", "grad_norm": 0.0056, "loss": 0.0515, "lr": "2.835e-05", "step": 7719, "steps": "23.44s,7719/16595" }, { "epoch": 2.3260018077734257, "eta": "59:10:00", "grad_norm": 0.0055, "loss": 0.0512, "lr": "2.835e-05", "step": 7720, "steps": "24.0s,7720/16595" }, { "epoch": 2.326303103344381, "eta": "57:20:09", "grad_norm": 0.0056, "loss": 0.0613, "lr": "2.835e-05", "step": 7721, "steps": "23.26s,7721/16595" }, { "epoch": 2.326604398915336, "eta": "58:49:58", "grad_norm": 0.0058, "loss": 0.0443, "lr": "2.834e-05", "step": 7722, "steps": "23.87s,7722/16595" }, { "epoch": 2.326905694486291, "eta": "58:52:32", "grad_norm": 0.0061, "loss": 0.0525, "lr": "2.834e-05", "step": 7723, "steps": "23.89s,7723/16595" }, { "epoch": 2.3272069900572463, "eta": "58:24:02", "grad_norm": 0.0049, "loss": 0.0409, "lr": "2.833e-05", "step": 7724, "steps": "23.7s,7724/16595" }, { "epoch": 2.3275082856282014, "eta": "58:35:28", "grad_norm": 0.0051, "loss": 0.0383, "lr": "2.833e-05", "step": 7725, "steps": "23.78s,7725/16595" }, { "epoch": 2.3278095811991566, "eta": "60:02:17", "grad_norm": 0.006, "loss": 0.0557, "lr": "2.832e-05", "step": 7726, "steps": "24.37s,7726/16595" }, { "epoch": 2.3281108767701113, "eta": "59:08:40", "grad_norm": 0.0066, "loss": 0.0427, "lr": "2.832e-05", "step": 7727, "steps": "24.01s,7727/16595" }, { "epoch": 2.3284121723410665, "eta": "59:18:37", "grad_norm": 0.0052, "loss": 0.0457, "lr": "2.831e-05", "step": 7728, "steps": "24.08s,7728/16595" }, { "epoch": 2.3287134679120216, "eta": "59:19:41", "grad_norm": 0.0049, "loss": 0.0521, "lr": "2.831e-05", "step": 7729, "steps": "24.09s,7729/16595" }, { "epoch": 2.3290147634829768, "eta": "59:59:11", "grad_norm": 0.0054, "loss": 0.0596, "lr": "2.830e-05", "step": 7730, "steps": "24.36s,7730/16595" }, { "epoch": 2.329316059053932, "eta": "59:11:30", "grad_norm": 0.0045, "loss": 0.0494, "lr": "2.830e-05", "step": 7731, "steps": "24.04s,7731/16595" }, { "epoch": 2.329617354624887, "eta": "59:31:47", "grad_norm": 0.0054, "loss": 0.041, "lr": "2.829e-05", "step": 7732, "steps": "24.18s,7732/16595" }, { "epoch": 2.329918650195842, "eta": "58:42:38", "grad_norm": 0.0062, "loss": 0.0451, "lr": "2.829e-05", "step": 7733, "steps": "23.85s,7733/16595" }, { "epoch": 2.3302199457667974, "eta": "57:59:25", "grad_norm": 0.0071, "loss": 0.0595, "lr": "2.828e-05", "step": 7734, "steps": "23.56s,7734/16595" }, { "epoch": 2.3305212413377525, "eta": "58:50:42", "grad_norm": 0.0053, "loss": 0.0553, "lr": "2.828e-05", "step": 7735, "steps": "23.91s,7735/16595" }, { "epoch": 2.330822536908707, "eta": "58:37:01", "grad_norm": 0.0061, "loss": 0.059, "lr": "2.827e-05", "step": 7736, "steps": "23.82s,7736/16595" }, { "epoch": 2.3311238324796624, "eta": "58:49:54", "grad_norm": 0.0049, "loss": 0.0604, "lr": "2.827e-05", "step": 7737, "steps": "23.91s,7737/16595" }, { "epoch": 2.3314251280506175, "eta": "58:37:42", "grad_norm": 0.0053, "loss": 0.0574, "lr": "2.826e-05", "step": 7738, "steps": "23.83s,7738/16595" }, { "epoch": 2.3317264236215727, "eta": "59:12:43", "grad_norm": 0.0049, "loss": 0.0577, "lr": "2.826e-05", "step": 7739, "steps": "24.07s,7739/16595" }, { "epoch": 2.332027719192528, "eta": "58:35:26", "grad_norm": 0.0061, "loss": 0.0492, "lr": "2.826e-05", "step": 7740, "steps": "23.82s,7740/16595" }, { "epoch": 2.332329014763483, "eta": "57:41:54", "grad_norm": 0.005, "loss": 0.0593, "lr": "2.825e-05", "step": 7741, "steps": "23.46s,7741/16595" }, { "epoch": 2.332630310334438, "eta": "59:39:33", "grad_norm": 0.0065, "loss": 0.0572, "lr": "2.825e-05", "step": 7742, "steps": "24.26s,7742/16595" }, { "epoch": 2.3329316059053933, "eta": "59:09:39", "grad_norm": 0.0046, "loss": 0.0534, "lr": "2.824e-05", "step": 7743, "steps": "24.06s,7743/16595" }, { "epoch": 2.3332329014763484, "eta": "58:14:40", "grad_norm": 0.0056, "loss": 0.0417, "lr": "2.824e-05", "step": 7744, "steps": "23.69s,7744/16595" }, { "epoch": 2.3335341970473036, "eta": "58:27:33", "grad_norm": 0.0049, "loss": 0.0732, "lr": "2.823e-05", "step": 7745, "steps": "23.78s,7745/16595" }, { "epoch": 2.3338354926182587, "eta": "58:58:07", "grad_norm": 0.0087, "loss": 0.0494, "lr": "2.823e-05", "step": 7746, "steps": "23.99s,7746/16595" }, { "epoch": 2.3341367881892134, "eta": "58:31:10", "grad_norm": 0.0048, "loss": 0.055, "lr": "2.822e-05", "step": 7747, "steps": "23.81s,7747/16595" }, { "epoch": 2.3344380837601686, "eta": "58:27:50", "grad_norm": 0.0061, "loss": 0.0473, "lr": "2.822e-05", "step": 7748, "steps": "23.79s,7748/16595" }, { "epoch": 2.3347393793311237, "eta": "57:22:34", "grad_norm": 0.0047, "loss": 0.0487, "lr": "2.821e-05", "step": 7749, "steps": "23.35s,7749/16595" }, { "epoch": 2.335040674902079, "eta": "59:17:09", "grad_norm": 0.0049, "loss": 0.0631, "lr": "2.821e-05", "step": 7750, "steps": "24.13s,7750/16595" }, { "epoch": 2.335341970473034, "eta": "58:34:00", "grad_norm": 0.0055, "loss": 0.0502, "lr": "2.820e-05", "step": 7751, "steps": "23.84s,7751/16595" }, { "epoch": 2.335643266043989, "eta": "57:44:58", "grad_norm": 0.0045, "loss": 0.0611, "lr": "2.820e-05", "step": 7752, "steps": "23.51s,7752/16595" }, { "epoch": 2.3359445616149443, "eta": "58:17:00", "grad_norm": 0.0068, "loss": 0.0652, "lr": "2.819e-05", "step": 7753, "steps": "23.73s,7753/16595" }, { "epoch": 2.3362458571858995, "eta": "58:54:55", "grad_norm": 0.0053, "loss": 0.053, "lr": "2.819e-05", "step": 7754, "steps": "23.99s,7754/16595" }, { "epoch": 2.3365471527568547, "eta": "58:26:32", "grad_norm": 0.0089, "loss": 0.0293, "lr": "2.818e-05", "step": 7755, "steps": "23.8s,7755/16595" }, { "epoch": 2.3368484483278094, "eta": "58:42:20", "grad_norm": 0.0052, "loss": 0.0528, "lr": "2.818e-05", "step": 7756, "steps": "23.91s,7756/16595" }, { "epoch": 2.3371497438987645, "eta": "58:50:46", "grad_norm": 0.0055, "loss": 0.0475, "lr": "2.818e-05", "step": 7757, "steps": "23.97s,7757/16595" }, { "epoch": 2.3374510394697197, "eta": "58:26:48", "grad_norm": 0.0049, "loss": 0.0533, "lr": "2.817e-05", "step": 7758, "steps": "23.81s,7758/16595" }, { "epoch": 2.337752335040675, "eta": "58:42:37", "grad_norm": 0.0058, "loss": 0.0522, "lr": "2.817e-05", "step": 7759, "steps": "23.92s,7759/16595" }, { "epoch": 2.33805363061163, "eta": "58:59:53", "grad_norm": 0.0052, "loss": 0.0462, "lr": "2.816e-05", "step": 7760, "steps": "24.04s,7760/16595" }, { "epoch": 2.338354926182585, "eta": "58:31:30", "grad_norm": 0.0067, "loss": 0.0451, "lr": "2.816e-05", "step": 7761, "steps": "23.85s,7761/16595" }, { "epoch": 2.3386562217535403, "eta": "58:25:13", "grad_norm": 0.0049, "loss": 0.0494, "lr": "2.815e-05", "step": 7762, "steps": "23.81s,7762/16595" }, { "epoch": 2.3389575173244954, "eta": "58:17:28", "grad_norm": 0.0053, "loss": 0.0405, "lr": "2.815e-05", "step": 7763, "steps": "23.76s,7763/16595" }, { "epoch": 2.3392588128954506, "eta": "57:47:38", "grad_norm": 0.0056, "loss": 0.0437, "lr": "2.814e-05", "step": 7764, "steps": "23.56s,7764/16595" }, { "epoch": 2.3395601084664057, "eta": "57:56:04", "grad_norm": 0.0051, "loss": 0.0489, "lr": "2.814e-05", "step": 7765, "steps": "23.62s,7765/16595" }, { "epoch": 2.339861404037361, "eta": "59:01:54", "grad_norm": 0.006, "loss": 0.0627, "lr": "2.813e-05", "step": 7766, "steps": "24.07s,7766/16595" }, { "epoch": 2.3401626996083156, "eta": "59:10:19", "grad_norm": 0.0045, "loss": 0.0545, "lr": "2.813e-05", "step": 7767, "steps": "24.13s,7767/16595" }, { "epoch": 2.3404639951792707, "eta": "58:47:51", "grad_norm": 0.006, "loss": 0.0466, "lr": "2.812e-05", "step": 7768, "steps": "23.98s,7768/16595" }, { "epoch": 2.340765290750226, "eta": "58:48:55", "grad_norm": 0.005, "loss": 0.0546, "lr": "2.812e-05", "step": 7769, "steps": "23.99s,7769/16595" }, { "epoch": 2.341066586321181, "eta": "58:35:17", "grad_norm": 0.0056, "loss": 0.0527, "lr": "2.811e-05", "step": 7770, "steps": "23.9s,7770/16595" }, { "epoch": 2.341367881892136, "eta": "58:59:53", "grad_norm": 0.0079, "loss": 0.0394, "lr": "2.811e-05", "step": 7771, "steps": "24.07s,7771/16595" }, { "epoch": 2.3416691774630913, "eta": "58:34:29", "grad_norm": 0.0059, "loss": 0.0488, "lr": "2.810e-05", "step": 7772, "steps": "23.9s,7772/16595" }, { "epoch": 2.3419704730340465, "eta": "58:09:06", "grad_norm": 0.006, "loss": 0.0546, "lr": "2.810e-05", "step": 7773, "steps": "23.73s,7773/16595" }, { "epoch": 2.3422717686050016, "eta": "59:08:58", "grad_norm": 0.0055, "loss": 0.0656, "lr": "2.809e-05", "step": 7774, "steps": "24.14s,7774/16595" }, { "epoch": 2.342573064175957, "eta": "58:25:57", "grad_norm": 0.0061, "loss": 0.0629, "lr": "2.809e-05", "step": 7775, "steps": "23.85s,7775/16595" }, { "epoch": 2.3428743597469115, "eta": "58:21:08", "grad_norm": 0.0062, "loss": 0.0346, "lr": "2.809e-05", "step": 7776, "steps": "23.82s,7776/16595" }, { "epoch": 2.3431756553178666, "eta": "57:26:22", "grad_norm": 0.0053, "loss": 0.0637, "lr": "2.808e-05", "step": 7777, "steps": "23.45s,7777/16595" }, { "epoch": 2.343476950888822, "eta": "58:30:38", "grad_norm": 0.005, "loss": 0.0775, "lr": "2.808e-05", "step": 7778, "steps": "23.89s,7778/16595" }, { "epoch": 2.343778246459777, "eta": "57:57:54", "grad_norm": 0.006, "loss": 0.044, "lr": "2.807e-05", "step": 7779, "steps": "23.67s,7779/16595" }, { "epoch": 2.344079542030732, "eta": "58:18:05", "grad_norm": 0.0052, "loss": 0.0557, "lr": "2.807e-05", "step": 7780, "steps": "23.81s,7780/16595" }, { "epoch": 2.3443808376016873, "eta": "58:13:16", "grad_norm": 0.0045, "loss": 0.0634, "lr": "2.806e-05", "step": 7781, "steps": "23.78s,7781/16595" }, { "epoch": 2.3446821331726424, "eta": "57:56:43", "grad_norm": 0.0049, "loss": 0.0428, "lr": "2.806e-05", "step": 7782, "steps": "23.67s,7782/16595" }, { "epoch": 2.3449834287435976, "eta": "57:54:51", "grad_norm": 0.0079, "loss": 0.0422, "lr": "2.805e-05", "step": 7783, "steps": "23.66s,7783/16595" }, { "epoch": 2.3452847243145527, "eta": "58:19:26", "grad_norm": 0.0054, "loss": 0.064, "lr": "2.805e-05", "step": 7784, "steps": "23.83s,7784/16595" }, { "epoch": 2.345586019885508, "eta": "58:11:41", "grad_norm": 0.006, "loss": 0.039, "lr": "2.804e-05", "step": 7785, "steps": "23.78s,7785/16595" }, { "epoch": 2.345887315456463, "eta": "57:50:44", "grad_norm": 0.0077, "loss": 0.0438, "lr": "2.804e-05", "step": 7786, "steps": "23.64s,7786/16595" }, { "epoch": 2.3461886110274177, "eta": "58:15:18", "grad_norm": 0.008, "loss": 0.0399, "lr": "2.803e-05", "step": 7787, "steps": "23.81s,7787/16595" }, { "epoch": 2.346489906598373, "eta": "58:17:50", "grad_norm": 0.0056, "loss": 0.0521, "lr": "2.803e-05", "step": 7788, "steps": "23.83s,7788/16595" }, { "epoch": 2.346791202169328, "eta": "57:40:45", "grad_norm": 0.005, "loss": 0.0393, "lr": "2.802e-05", "step": 7789, "steps": "23.58s,7789/16595" }, { "epoch": 2.347092497740283, "eta": "58:06:46", "grad_norm": 0.005, "loss": 0.0406, "lr": "2.802e-05", "step": 7790, "steps": "23.76s,7790/16595" }, { "epoch": 2.3473937933112383, "eta": "59:15:20", "grad_norm": 0.0048, "loss": 0.0381, "lr": "2.801e-05", "step": 7791, "steps": "24.23s,7791/16595" }, { "epoch": 2.3476950888821935, "eta": "58:48:32", "grad_norm": 0.0051, "loss": 0.061, "lr": "2.801e-05", "step": 7792, "steps": "24.05s,7792/16595" }, { "epoch": 2.3479963844531486, "eta": "57:43:35", "grad_norm": 0.008, "loss": 0.0416, "lr": "2.800e-05", "step": 7793, "steps": "23.61s,7793/16595" }, { "epoch": 2.3482976800241038, "eta": "57:38:47", "grad_norm": 0.0053, "loss": 0.0465, "lr": "2.800e-05", "step": 7794, "steps": "23.58s,7794/16595" }, { "epoch": 2.348598975595059, "eta": "59:21:04", "grad_norm": 0.0064, "loss": 0.0527, "lr": "2.800e-05", "step": 7795, "steps": "24.28s,7795/16595" }, { "epoch": 2.3489002711660136, "eta": "59:01:35", "grad_norm": 0.006, "loss": 0.0565, "lr": "2.799e-05", "step": 7796, "steps": "24.15s,7796/16595" }, { "epoch": 2.349201566736969, "eta": "57:40:32", "grad_norm": 0.0069, "loss": 0.0532, "lr": "2.799e-05", "step": 7797, "steps": "23.6s,7797/16595" }, { "epoch": 2.349502862307924, "eta": "58:10:56", "grad_norm": 0.0049, "loss": 0.047, "lr": "2.798e-05", "step": 7798, "steps": "23.81s,7798/16595" }, { "epoch": 2.349804157878879, "eta": "58:32:32", "grad_norm": 0.0057, "loss": 0.0331, "lr": "2.798e-05", "step": 7799, "steps": "23.96s,7799/16595" }, { "epoch": 2.3501054534498342, "eta": "58:21:52", "grad_norm": 0.0067, "loss": 0.0469, "lr": "2.797e-05", "step": 7800, "steps": "23.89s,7800/16595" }, { "epoch": 2.3504067490207894, "eta": "107:20:08", "grad_norm": 0.0051, "loss": 0.063, "lr": "2.797e-05", "step": 7801, "steps": "43.94s,7801/16595" }, { "epoch": 2.3507080445917445, "eta": "58:02:01", "grad_norm": 0.005, "loss": 0.0616, "lr": "2.796e-05", "step": 7802, "steps": "23.76s,7802/16595" }, { "epoch": 2.3510093401626997, "eta": "57:36:43", "grad_norm": 0.0059, "loss": 0.0615, "lr": "2.796e-05", "step": 7803, "steps": "23.59s,7803/16595" }, { "epoch": 2.351310635733655, "eta": "57:40:43", "grad_norm": 0.0058, "loss": 0.047, "lr": "2.795e-05", "step": 7804, "steps": "23.62s,7804/16595" }, { "epoch": 2.35161193130461, "eta": "58:37:27", "grad_norm": 0.0053, "loss": 0.0658, "lr": "2.795e-05", "step": 7805, "steps": "24.01s,7805/16595" }, { "epoch": 2.351913226875565, "eta": "58:01:54", "grad_norm": 0.0054, "loss": 0.0454, "lr": "2.794e-05", "step": 7806, "steps": "23.77s,7806/16595" }, { "epoch": 2.35221452244652, "eta": "58:11:45", "grad_norm": 0.0054, "loss": 0.0624, "lr": "2.794e-05", "step": 7807, "steps": "23.84s,7807/16595" }, { "epoch": 2.352515818017475, "eta": "57:09:51", "grad_norm": 0.005, "loss": 0.0379, "lr": "2.793e-05", "step": 7808, "steps": "23.42s,7808/16595" }, { "epoch": 2.35281711358843, "eta": "58:13:53", "grad_norm": 0.0056, "loss": 0.0489, "lr": "2.793e-05", "step": 7809, "steps": "23.86s,7809/16595" }, { "epoch": 2.3531184091593853, "eta": "58:50:06", "grad_norm": 0.005, "loss": 0.0466, "lr": "2.792e-05", "step": 7810, "steps": "24.11s,7810/16595" }, { "epoch": 2.3534197047303405, "eta": "58:33:36", "grad_norm": 0.0058, "loss": 0.0485, "lr": "2.792e-05", "step": 7811, "steps": "24.0s,7811/16595" }, { "epoch": 2.3537210003012956, "eta": "58:46:22", "grad_norm": 0.0087, "loss": 0.0507, "lr": "2.791e-05", "step": 7812, "steps": "24.09s,7812/16595" }, { "epoch": 2.3540222958722508, "eta": "58:28:24", "grad_norm": 0.007, "loss": 0.0425, "lr": "2.791e-05", "step": 7813, "steps": "23.97s,7813/16595" }, { "epoch": 2.354323591443206, "eta": "58:16:18", "grad_norm": 0.0056, "loss": 0.0539, "lr": "2.791e-05", "step": 7814, "steps": "23.89s,7814/16595" }, { "epoch": 2.354624887014161, "eta": "58:15:54", "grad_norm": 0.0052, "loss": 0.0554, "lr": "2.790e-05", "step": 7815, "steps": "23.89s,7815/16595" }, { "epoch": 2.3549261825851158, "eta": "57:34:32", "grad_norm": 0.0061, "loss": 0.0525, "lr": "2.790e-05", "step": 7816, "steps": "23.61s,7816/16595" }, { "epoch": 2.355227478156071, "eta": "59:01:55", "grad_norm": 0.0053, "loss": 0.0489, "lr": "2.789e-05", "step": 7817, "steps": "24.21s,7817/16595" }, { "epoch": 2.355528773727026, "eta": "58:05:55", "grad_norm": 0.0056, "loss": 0.0568, "lr": "2.789e-05", "step": 7818, "steps": "23.83s,7818/16595" }, { "epoch": 2.355830069297981, "eta": "57:59:41", "grad_norm": 0.0058, "loss": 0.0584, "lr": "2.788e-05", "step": 7819, "steps": "23.79s,7819/16595" }, { "epoch": 2.3561313648689364, "eta": "58:06:36", "grad_norm": 0.0058, "loss": 0.0493, "lr": "2.788e-05", "step": 7820, "steps": "23.84s,7820/16595" }, { "epoch": 2.3564326604398915, "eta": "57:45:43", "grad_norm": 0.0051, "loss": 0.0473, "lr": "2.787e-05", "step": 7821, "steps": "23.7s,7821/16595" }, { "epoch": 2.3567339560108467, "eta": "58:11:39", "grad_norm": 0.0055, "loss": 0.0437, "lr": "2.787e-05", "step": 7822, "steps": "23.88s,7822/16595" }, { "epoch": 2.357035251581802, "eta": "58:17:06", "grad_norm": 0.0044, "loss": 0.0546, "lr": "2.786e-05", "step": 7823, "steps": "23.92s,7823/16595" }, { "epoch": 2.357336547152757, "eta": "58:29:51", "grad_norm": 0.0054, "loss": 0.0453, "lr": "2.786e-05", "step": 7824, "steps": "24.01s,7824/16595" }, { "epoch": 2.357637842723712, "eta": "58:23:36", "grad_norm": 0.0058, "loss": 0.0382, "lr": "2.785e-05", "step": 7825, "steps": "23.97s,7825/16595" }, { "epoch": 2.3579391382946673, "eta": "58:11:31", "grad_norm": 0.0057, "loss": 0.0661, "lr": "2.785e-05", "step": 7826, "steps": "23.89s,7826/16595" }, { "epoch": 2.358240433865622, "eta": "58:52:02", "grad_norm": 0.0056, "loss": 0.0421, "lr": "2.784e-05", "step": 7827, "steps": "24.17s,7827/16595" }, { "epoch": 2.358541729436577, "eta": "58:00:29", "grad_norm": 0.0049, "loss": 0.0543, "lr": "2.784e-05", "step": 7828, "steps": "23.82s,7828/16595" }, { "epoch": 2.3588430250075323, "eta": "57:30:52", "grad_norm": 0.0069, "loss": 0.0525, "lr": "2.783e-05", "step": 7829, "steps": "23.62s,7829/16595" }, { "epoch": 2.3591443205784874, "eta": "57:18:48", "grad_norm": 0.0059, "loss": 0.0481, "lr": "2.783e-05", "step": 7830, "steps": "23.54s,7830/16595" }, { "epoch": 2.3594456161494426, "eta": "58:37:17", "grad_norm": 0.006, "loss": 0.0461, "lr": "2.782e-05", "step": 7831, "steps": "24.08s,7831/16595" }, { "epoch": 2.3597469117203977, "eta": "57:55:59", "grad_norm": 0.0067, "loss": 0.0335, "lr": "2.782e-05", "step": 7832, "steps": "23.8s,7832/16595" }, { "epoch": 2.360048207291353, "eta": "57:23:27", "grad_norm": 0.0092, "loss": 0.057, "lr": "2.782e-05", "step": 7833, "steps": "23.58s,7833/16595" }, { "epoch": 2.360349502862308, "eta": "58:46:18", "grad_norm": 0.0056, "loss": 0.0545, "lr": "2.781e-05", "step": 7834, "steps": "24.15s,7834/16595" }, { "epoch": 2.360650798433263, "eta": "58:09:24", "grad_norm": 0.0048, "loss": 0.0445, "lr": "2.781e-05", "step": 7835, "steps": "23.9s,7835/16595" }, { "epoch": 2.360952094004218, "eta": "58:04:37", "grad_norm": 0.0054, "loss": 0.051, "lr": "2.780e-05", "step": 7836, "steps": "23.87s,7836/16595" }, { "epoch": 2.361253389575173, "eta": "57:43:47", "grad_norm": 0.0069, "loss": 0.0703, "lr": "2.780e-05", "step": 7837, "steps": "23.73s,7837/16595" }, { "epoch": 2.361554685146128, "eta": "58:19:52", "grad_norm": 0.011, "loss": 0.0483, "lr": "2.779e-05", "step": 7838, "steps": "23.98s,7838/16595" }, { "epoch": 2.3618559807170834, "eta": "58:23:51", "grad_norm": 0.0058, "loss": 0.0473, "lr": "2.779e-05", "step": 7839, "steps": "24.01s,7839/16595" }, { "epoch": 2.3621572762880385, "eta": "58:01:34", "grad_norm": 0.0063, "loss": 0.0579, "lr": "2.778e-05", "step": 7840, "steps": "23.86s,7840/16595" }, { "epoch": 2.3624585718589937, "eta": "57:53:52", "grad_norm": 0.0048, "loss": 0.0596, "lr": "2.778e-05", "step": 7841, "steps": "23.81s,7841/16595" }, { "epoch": 2.362759867429949, "eta": "58:03:41", "grad_norm": 0.0055, "loss": 0.0671, "lr": "2.777e-05", "step": 7842, "steps": "23.88s,7842/16595" }, { "epoch": 2.363061163000904, "eta": "57:00:34", "grad_norm": 0.0061, "loss": 0.0697, "lr": "2.777e-05", "step": 7843, "steps": "23.45s,7843/16595" }, { "epoch": 2.363362458571859, "eta": "57:58:31", "grad_norm": 0.0052, "loss": 0.0723, "lr": "2.776e-05", "step": 7844, "steps": "23.85s,7844/16595" }, { "epoch": 2.3636637541428143, "eta": "58:06:52", "grad_norm": 0.007, "loss": 0.0467, "lr": "2.776e-05", "step": 7845, "steps": "23.91s,7845/16595" }, { "epoch": 2.3639650497137694, "eta": "58:34:10", "grad_norm": 0.0054, "loss": 0.0377, "lr": "2.775e-05", "step": 7846, "steps": "24.1s,7846/16595" }, { "epoch": 2.364266345284724, "eta": "57:58:47", "grad_norm": 0.0046, "loss": 0.0585, "lr": "2.775e-05", "step": 7847, "steps": "23.86s,7847/16595" }, { "epoch": 2.3645676408556793, "eta": "58:20:15", "grad_norm": 0.0057, "loss": 0.05, "lr": "2.774e-05", "step": 7848, "steps": "24.01s,7848/16595" }, { "epoch": 2.3648689364266344, "eta": "58:27:08", "grad_norm": 0.0052, "loss": 0.062, "lr": "2.774e-05", "step": 7849, "steps": "24.06s,7849/16595" }, { "epoch": 2.3651702319975896, "eta": "57:50:18", "grad_norm": 0.0056, "loss": 0.0524, "lr": "2.773e-05", "step": 7850, "steps": "23.81s,7850/16595" }, { "epoch": 2.3654715275685447, "eta": "57:26:35", "grad_norm": 0.0059, "loss": 0.0417, "lr": "2.773e-05", "step": 7851, "steps": "23.65s,7851/16595" }, { "epoch": 2.3657728231395, "eta": "58:04:05", "grad_norm": 0.0045, "loss": 0.048, "lr": "2.773e-05", "step": 7852, "steps": "23.91s,7852/16595" }, { "epoch": 2.366074118710455, "eta": "57:54:56", "grad_norm": 0.0051, "loss": 0.0523, "lr": "2.772e-05", "step": 7853, "steps": "23.85s,7853/16595" }, { "epoch": 2.36637541428141, "eta": "56:21:18", "grad_norm": 0.0048, "loss": 0.0775, "lr": "2.772e-05", "step": 7854, "steps": "23.21s,7854/16595" }, { "epoch": 2.3666767098523653, "eta": "57:43:57", "grad_norm": 0.0059, "loss": 0.0482, "lr": "2.771e-05", "step": 7855, "steps": "23.78s,7855/16595" }, { "epoch": 2.36697800542332, "eta": "58:17:03", "grad_norm": 0.0071, "loss": 0.0495, "lr": "2.771e-05", "step": 7856, "steps": "24.01s,7856/16595" }, { "epoch": 2.367279300994275, "eta": "57:50:26", "grad_norm": 0.0047, "loss": 0.0506, "lr": "2.770e-05", "step": 7857, "steps": "23.83s,7857/16595" }, { "epoch": 2.3675805965652303, "eta": "58:19:10", "grad_norm": 0.0046, "loss": 0.0567, "lr": "2.770e-05", "step": 7858, "steps": "24.03s,7858/16595" }, { "epoch": 2.3678818921361855, "eta": "57:52:33", "grad_norm": 0.0051, "loss": 0.0493, "lr": "2.769e-05", "step": 7859, "steps": "23.85s,7859/16595" }, { "epoch": 2.3681831877071406, "eta": "57:04:07", "grad_norm": 0.006, "loss": 0.0481, "lr": "2.769e-05", "step": 7860, "steps": "23.52s,7860/16595" }, { "epoch": 2.368484483278096, "eta": "57:27:01", "grad_norm": 0.0082, "loss": 0.0507, "lr": "2.768e-05", "step": 7861, "steps": "23.68s,7861/16595" }, { "epoch": 2.368785778849051, "eta": "57:39:43", "grad_norm": 0.004, "loss": 0.0501, "lr": "2.768e-05", "step": 7862, "steps": "23.77s,7862/16595" }, { "epoch": 2.369087074420006, "eta": "58:04:04", "grad_norm": 0.0052, "loss": 0.0488, "lr": "2.767e-05", "step": 7863, "steps": "23.94s,7863/16595" }, { "epoch": 2.3693883699909613, "eta": "57:22:55", "grad_norm": 0.0043, "loss": 0.034, "lr": "2.767e-05", "step": 7864, "steps": "23.66s,7864/16595" }, { "epoch": 2.3696896655619164, "eta": "58:03:16", "grad_norm": 0.0055, "loss": 0.065, "lr": "2.766e-05", "step": 7865, "steps": "23.94s,7865/16595" }, { "epoch": 2.3699909611328716, "eta": "57:41:02", "grad_norm": 0.0073, "loss": 0.0437, "lr": "2.766e-05", "step": 7866, "steps": "23.79s,7866/16595" }, { "epoch": 2.3702922567038263, "eta": "57:31:55", "grad_norm": 0.0047, "loss": 0.0454, "lr": "2.765e-05", "step": 7867, "steps": "23.73s,7867/16595" }, { "epoch": 2.3705935522747814, "eta": "57:28:37", "grad_norm": 0.005, "loss": 0.0427, "lr": "2.765e-05", "step": 7868, "steps": "23.71s,7868/16595" }, { "epoch": 2.3708948478457366, "eta": "57:25:18", "grad_norm": 0.0065, "loss": 0.0491, "lr": "2.764e-05", "step": 7869, "steps": "23.69s,7869/16595" }, { "epoch": 2.3711961434166917, "eta": "57:20:33", "grad_norm": 0.0056, "loss": 0.0504, "lr": "2.764e-05", "step": 7870, "steps": "23.66s,7870/16595" }, { "epoch": 2.371497438987647, "eta": "57:21:37", "grad_norm": 0.005, "loss": 0.0401, "lr": "2.764e-05", "step": 7871, "steps": "23.67s,7871/16595" }, { "epoch": 2.371798734558602, "eta": "58:00:28", "grad_norm": 0.0047, "loss": 0.0533, "lr": "2.763e-05", "step": 7872, "steps": "23.94s,7872/16595" }, { "epoch": 2.372100030129557, "eta": "57:55:43", "grad_norm": 0.0056, "loss": 0.0501, "lr": "2.763e-05", "step": 7873, "steps": "23.91s,7873/16595" }, { "epoch": 2.3724013257005123, "eta": "57:34:58", "grad_norm": 0.0069, "loss": 0.0534, "lr": "2.762e-05", "step": 7874, "steps": "23.77s,7874/16595" }, { "epoch": 2.3727026212714675, "eta": "58:00:44", "grad_norm": 0.0048, "loss": 0.0376, "lr": "2.762e-05", "step": 7875, "steps": "23.95s,7875/16595" }, { "epoch": 2.373003916842422, "eta": "58:32:18", "grad_norm": 0.0068, "loss": 0.0402, "lr": "2.761e-05", "step": 7876, "steps": "24.17s,7876/16595" }, { "epoch": 2.3733052124133773, "eta": "58:04:17", "grad_norm": 0.0048, "loss": 0.0418, "lr": "2.761e-05", "step": 7877, "steps": "23.98s,7877/16595" }, { "epoch": 2.3736065079843325, "eta": "57:26:07", "grad_norm": 0.0051, "loss": 0.0554, "lr": "2.760e-05", "step": 7878, "steps": "23.72s,7878/16595" }, { "epoch": 2.3739078035552876, "eta": "57:41:42", "grad_norm": 0.0056, "loss": 0.056, "lr": "2.760e-05", "step": 7879, "steps": "23.83s,7879/16595" }, { "epoch": 2.374209099126243, "eta": "57:48:34", "grad_norm": 0.0056, "loss": 0.0546, "lr": "2.759e-05", "step": 7880, "steps": "23.88s,7880/16595" }, { "epoch": 2.374510394697198, "eta": "57:51:04", "grad_norm": 0.0054, "loss": 0.0682, "lr": "2.759e-05", "step": 7881, "steps": "23.9s,7881/16595" }, { "epoch": 2.374811690268153, "eta": "57:28:53", "grad_norm": 0.0043, "loss": 0.0431, "lr": "2.758e-05", "step": 7882, "steps": "23.75s,7882/16595" }, { "epoch": 2.3751129858391082, "eta": "58:10:36", "grad_norm": 0.0049, "loss": 0.0478, "lr": "2.758e-05", "step": 7883, "steps": "24.04s,7883/16595" }, { "epoch": 2.3754142814100634, "eta": "58:01:29", "grad_norm": 0.0046, "loss": 0.0631, "lr": "2.757e-05", "step": 7884, "steps": "23.98s,7884/16595" }, { "epoch": 2.3757155769810185, "eta": "58:01:05", "grad_norm": 0.0052, "loss": 0.0511, "lr": "2.757e-05", "step": 7885, "steps": "23.98s,7885/16595" }, { "epoch": 2.3760168725519737, "eta": "57:57:47", "grad_norm": 0.0051, "loss": 0.0432, "lr": "2.756e-05", "step": 7886, "steps": "23.96s,7886/16595" }, { "epoch": 2.3763181681229284, "eta": "57:57:23", "grad_norm": 0.0045, "loss": 0.0636, "lr": "2.756e-05", "step": 7887, "steps": "23.96s,7887/16595" }, { "epoch": 2.3766194636938835, "eta": "57:39:34", "grad_norm": 0.0058, "loss": 0.0477, "lr": "2.755e-05", "step": 7888, "steps": "23.84s,7888/16595" }, { "epoch": 2.3769207592648387, "eta": "58:28:31", "grad_norm": 0.0046, "loss": 0.0578, "lr": "2.755e-05", "step": 7889, "steps": "24.18s,7889/16595" }, { "epoch": 2.377222054835794, "eta": "58:48:25", "grad_norm": 0.0056, "loss": 0.051, "lr": "2.754e-05", "step": 7890, "steps": "24.32s,7890/16595" }, { "epoch": 2.377523350406749, "eta": "57:36:56", "grad_norm": 0.006, "loss": 0.0561, "lr": "2.754e-05", "step": 7891, "steps": "23.83s,7891/16595" }, { "epoch": 2.377824645977704, "eta": "57:04:37", "grad_norm": 0.0056, "loss": 0.0557, "lr": "2.754e-05", "step": 7892, "steps": "23.61s,7892/16595" }, { "epoch": 2.3781259415486593, "eta": "57:46:17", "grad_norm": 0.0052, "loss": 0.0521, "lr": "2.753e-05", "step": 7893, "steps": "23.9s,7893/16595" }, { "epoch": 2.3784272371196145, "eta": "57:47:20", "grad_norm": 0.0064, "loss": 0.042, "lr": "2.753e-05", "step": 7894, "steps": "23.91s,7894/16595" }, { "epoch": 2.3787285326905696, "eta": "58:42:03", "grad_norm": 0.0079, "loss": 0.0558, "lr": "2.752e-05", "step": 7895, "steps": "24.29s,7895/16595" }, { "epoch": 2.3790298282615243, "eta": "57:27:42", "grad_norm": 0.0053, "loss": 0.0653, "lr": "2.752e-05", "step": 7896, "steps": "23.78s,7896/16595" }, { "epoch": 2.37933112383248, "eta": "57:33:06", "grad_norm": 0.0046, "loss": 0.0459, "lr": "2.751e-05", "step": 7897, "steps": "23.82s,7897/16595" }, { "epoch": 2.3796324194034346, "eta": "57:41:24", "grad_norm": 0.0061, "loss": 0.0541, "lr": "2.751e-05", "step": 7898, "steps": "23.88s,7898/16595" }, { "epoch": 2.3799337149743898, "eta": "57:30:51", "grad_norm": 0.0061, "loss": 0.0497, "lr": "2.750e-05", "step": 7899, "steps": "23.81s,7899/16595" }, { "epoch": 2.380235010545345, "eta": "57:23:13", "grad_norm": 0.0059, "loss": 0.0444, "lr": "2.750e-05", "step": 7900, "steps": "23.76s,7900/16595" }, { "epoch": 2.3805363061163, "eta": "58:14:59", "grad_norm": 0.0052, "loss": 0.0649, "lr": "2.749e-05", "step": 7901, "steps": "24.12s,7901/16595" }, { "epoch": 2.3808376016872552, "eta": "57:26:46", "grad_norm": 0.005, "loss": 0.0575, "lr": "2.749e-05", "step": 7902, "steps": "23.79s,7902/16595" }, { "epoch": 2.3811388972582104, "eta": "57:35:04", "grad_norm": 0.0057, "loss": 0.0549, "lr": "2.748e-05", "step": 7903, "steps": "23.85s,7903/16595" }, { "epoch": 2.3814401928291655, "eta": "57:11:29", "grad_norm": 0.0058, "loss": 0.0568, "lr": "2.748e-05", "step": 7904, "steps": "23.69s,7904/16595" }, { "epoch": 2.3817414884001207, "eta": "57:45:51", "grad_norm": 0.0055, "loss": 0.0395, "lr": "2.747e-05", "step": 7905, "steps": "23.93s,7905/16595" }, { "epoch": 2.382042783971076, "eta": "57:36:46", "grad_norm": 0.0093, "loss": 0.0533, "lr": "2.747e-05", "step": 7906, "steps": "23.87s,7906/16595" }, { "epoch": 2.3823440795420305, "eta": "58:11:07", "grad_norm": 0.0058, "loss": 0.0573, "lr": "2.746e-05", "step": 7907, "steps": "24.11s,7907/16595" }, { "epoch": 2.3826453751129857, "eta": "57:11:21", "grad_norm": 0.005, "loss": 0.0466, "lr": "2.746e-05", "step": 7908, "steps": "23.7s,7908/16595" }, { "epoch": 2.382946670683941, "eta": "58:01:38", "grad_norm": 0.0053, "loss": 0.0444, "lr": "2.745e-05", "step": 7909, "steps": "24.05s,7909/16595" }, { "epoch": 2.383247966254896, "eta": "58:21:30", "grad_norm": 0.0054, "loss": 0.0534, "lr": "2.745e-05", "step": 7910, "steps": "24.19s,7910/16595" }, { "epoch": 2.383549261825851, "eta": "57:18:51", "grad_norm": 0.006, "loss": 0.0561, "lr": "2.745e-05", "step": 7911, "steps": "23.76s,7911/16595" }, { "epoch": 2.3838505573968063, "eta": "57:54:38", "grad_norm": 0.0047, "loss": 0.0551, "lr": "2.744e-05", "step": 7912, "steps": "24.01s,7912/16595" }, { "epoch": 2.3841518529677614, "eta": "57:41:13", "grad_norm": 0.0052, "loss": 0.0427, "lr": "2.744e-05", "step": 7913, "steps": "23.92s,7913/16595" }, { "epoch": 2.3844531485387166, "eta": "58:16:59", "grad_norm": 0.0077, "loss": 0.0571, "lr": "2.743e-05", "step": 7914, "steps": "24.17s,7914/16595" }, { "epoch": 2.3847544441096717, "eta": "56:46:54", "grad_norm": 0.0056, "loss": 0.0596, "lr": "2.743e-05", "step": 7915, "steps": "23.55s,7915/16595" }, { "epoch": 2.3850557396806265, "eta": "56:56:37", "grad_norm": 0.0058, "loss": 0.0457, "lr": "2.742e-05", "step": 7916, "steps": "23.62s,7916/16595" }, { "epoch": 2.385357035251582, "eta": "56:46:06", "grad_norm": 0.0069, "loss": 0.0548, "lr": "2.742e-05", "step": 7917, "steps": "23.55s,7917/16595" }, { "epoch": 2.3856583308225368, "eta": "57:23:19", "grad_norm": 0.0048, "loss": 0.0521, "lr": "2.741e-05", "step": 7918, "steps": "23.81s,7918/16595" }, { "epoch": 2.385959626393492, "eta": "58:03:24", "grad_norm": 0.005, "loss": 0.0502, "lr": "2.741e-05", "step": 7919, "steps": "24.09s,7919/16595" }, { "epoch": 2.386260921964447, "eta": "58:13:08", "grad_norm": 0.0057, "loss": 0.0606, "lr": "2.740e-05", "step": 7920, "steps": "24.16s,7920/16595" }, { "epoch": 2.386562217535402, "eta": "56:57:33", "grad_norm": 0.0061, "loss": 0.065, "lr": "2.740e-05", "step": 7921, "steps": "23.64s,7921/16595" }, { "epoch": 2.3868635131063574, "eta": "57:08:43", "grad_norm": 0.006, "loss": 0.0441, "lr": "2.739e-05", "step": 7922, "steps": "23.72s,7922/16595" }, { "epoch": 2.3871648086773125, "eta": "57:31:27", "grad_norm": 0.0063, "loss": 0.0513, "lr": "2.739e-05", "step": 7923, "steps": "23.88s,7923/16595" }, { "epoch": 2.3874661042482677, "eta": "57:07:56", "grad_norm": 0.0056, "loss": 0.0438, "lr": "2.738e-05", "step": 7924, "steps": "23.72s,7924/16595" }, { "epoch": 2.387767399819223, "eta": "57:08:59", "grad_norm": 0.0056, "loss": 0.0583, "lr": "2.738e-05", "step": 7925, "steps": "23.73s,7925/16595" }, { "epoch": 2.388068695390178, "eta": "58:04:56", "grad_norm": 0.0051, "loss": 0.0561, "lr": "2.737e-05", "step": 7926, "steps": "24.12s,7926/16595" }, { "epoch": 2.3883699909611327, "eta": "57:28:25", "grad_norm": 0.0054, "loss": 0.0494, "lr": "2.737e-05", "step": 7927, "steps": "23.87s,7927/16595" }, { "epoch": 2.388671286532088, "eta": "58:02:41", "grad_norm": 0.0066, "loss": 0.0509, "lr": "2.736e-05", "step": 7928, "steps": "24.11s,7928/16595" }, { "epoch": 2.388972582103043, "eta": "56:44:17", "grad_norm": 0.0062, "loss": 0.043, "lr": "2.736e-05", "step": 7929, "steps": "23.57s,7929/16595" }, { "epoch": 2.389273877673998, "eta": "57:22:53", "grad_norm": 0.0048, "loss": 0.0662, "lr": "2.736e-05", "step": 7930, "steps": "23.84s,7930/16595" }, { "epoch": 2.3895751732449533, "eta": "57:21:03", "grad_norm": 0.0054, "loss": 0.0396, "lr": "2.735e-05", "step": 7931, "steps": "23.83s,7931/16595" }, { "epoch": 2.3898764688159084, "eta": "57:56:45", "grad_norm": 0.0062, "loss": 0.0532, "lr": "2.735e-05", "step": 7932, "steps": "24.08s,7932/16595" }, { "epoch": 2.3901777643868636, "eta": "57:26:01", "grad_norm": 0.0053, "loss": 0.0607, "lr": "2.734e-05", "step": 7933, "steps": "23.87s,7933/16595" }, { "epoch": 2.3904790599578187, "eta": "57:06:52", "grad_norm": 0.0059, "loss": 0.071, "lr": "2.734e-05", "step": 7934, "steps": "23.74s,7934/16595" }, { "epoch": 2.390780355528774, "eta": "58:22:58", "grad_norm": 0.0071, "loss": 0.0549, "lr": "2.733e-05", "step": 7935, "steps": "24.27s,7935/16595" }, { "epoch": 2.3910816510997286, "eta": "58:36:59", "grad_norm": 0.007, "loss": 0.0658, "lr": "2.733e-05", "step": 7936, "steps": "24.37s,7936/16595" }, { "epoch": 2.391382946670684, "eta": "57:43:12", "grad_norm": 0.0045, "loss": 0.0514, "lr": "2.732e-05", "step": 7937, "steps": "24.0s,7937/16595" }, { "epoch": 2.391684242241639, "eta": "57:32:42", "grad_norm": 0.0058, "loss": 0.0338, "lr": "2.732e-05", "step": 7938, "steps": "23.93s,7938/16595" }, { "epoch": 2.391985537812594, "eta": "56:50:27", "grad_norm": 0.0056, "loss": 0.0626, "lr": "2.731e-05", "step": 7939, "steps": "23.64s,7939/16595" }, { "epoch": 2.392286833383549, "eta": "57:42:00", "grad_norm": 0.0048, "loss": 0.0509, "lr": "2.731e-05", "step": 7940, "steps": "24.0s,7940/16595" }, { "epoch": 2.3925881289545043, "eta": "56:35:15", "grad_norm": 0.0064, "loss": 0.0388, "lr": "2.730e-05", "step": 7941, "steps": "23.54s,7941/16595" }, { "epoch": 2.3928894245254595, "eta": "56:44:57", "grad_norm": 0.0051, "loss": 0.0746, "lr": "2.730e-05", "step": 7942, "steps": "23.61s,7942/16595" }, { "epoch": 2.3931907200964146, "eta": "57:20:36", "grad_norm": 0.007, "loss": 0.0449, "lr": "2.729e-05", "step": 7943, "steps": "23.86s,7943/16595" }, { "epoch": 2.39349201566737, "eta": "58:07:47", "grad_norm": 0.0058, "loss": 0.0439, "lr": "2.729e-05", "step": 7944, "steps": "24.19s,7944/16595" }, { "epoch": 2.393793311238325, "eta": "57:45:46", "grad_norm": 0.0048, "loss": 0.0541, "lr": "2.728e-05", "step": 7945, "steps": "24.04s,7945/16595" }, { "epoch": 2.39409460680928, "eta": "57:19:25", "grad_norm": 0.006, "loss": 0.0286, "lr": "2.728e-05", "step": 7946, "steps": "23.86s,7946/16595" }, { "epoch": 2.394395902380235, "eta": "57:34:52", "grad_norm": 0.0056, "loss": 0.0519, "lr": "2.727e-05", "step": 7947, "steps": "23.97s,7947/16595" }, { "epoch": 2.39469719795119, "eta": "58:19:09", "grad_norm": 0.0056, "loss": 0.0496, "lr": "2.727e-05", "step": 7948, "steps": "24.28s,7948/16595" }, { "epoch": 2.394998493522145, "eta": "57:48:29", "grad_norm": 0.0051, "loss": 0.0541, "lr": "2.726e-05", "step": 7949, "steps": "24.07s,7949/16595" }, { "epoch": 2.3952997890931003, "eta": "57:16:23", "grad_norm": 0.0085, "loss": 0.0381, "lr": "2.726e-05", "step": 7950, "steps": "23.85s,7950/16595" }, { "epoch": 2.3956010846640554, "eta": "56:58:42", "grad_norm": 0.0052, "loss": 0.0533, "lr": "2.726e-05", "step": 7951, "steps": "23.73s,7951/16595" }, { "epoch": 2.3959023802350106, "eta": "56:46:46", "grad_norm": 0.0058, "loss": 0.0545, "lr": "2.725e-05", "step": 7952, "steps": "23.65s,7952/16595" }, { "epoch": 2.3962036758059657, "eta": "56:55:01", "grad_norm": 0.0066, "loss": 0.0585, "lr": "2.725e-05", "step": 7953, "steps": "23.71s,7953/16595" }, { "epoch": 2.396504971376921, "eta": "57:52:14", "grad_norm": 0.0055, "loss": 0.0395, "lr": "2.724e-05", "step": 7954, "steps": "24.11s,7954/16595" }, { "epoch": 2.396806266947876, "eta": "57:01:26", "grad_norm": 0.0051, "loss": 0.0562, "lr": "2.724e-05", "step": 7955, "steps": "23.76s,7955/16595" }, { "epoch": 2.3971075625188307, "eta": "57:35:36", "grad_norm": 0.0043, "loss": 0.044, "lr": "2.723e-05", "step": 7956, "steps": "24.0s,7956/16595" }, { "epoch": 2.3974088580897863, "eta": "57:20:48", "grad_norm": 0.0057, "loss": 0.0569, "lr": "2.723e-05", "step": 7957, "steps": "23.9s,7957/16595" }, { "epoch": 2.397710153660741, "eta": "57:43:26", "grad_norm": 0.0057, "loss": 0.041, "lr": "2.722e-05", "step": 7958, "steps": "24.06s,7958/16595" }, { "epoch": 2.398011449231696, "eta": "56:45:27", "grad_norm": 0.004, "loss": 0.0503, "lr": "2.722e-05", "step": 7959, "steps": "23.66s,7959/16595" }, { "epoch": 2.3983127448026513, "eta": "56:40:45", "grad_norm": 0.0054, "loss": 0.0461, "lr": "2.721e-05", "step": 7960, "steps": "23.63s,7960/16595" }, { "epoch": 2.3986140403736065, "eta": "56:48:59", "grad_norm": 0.0049, "loss": 0.0511, "lr": "2.721e-05", "step": 7961, "steps": "23.69s,7961/16595" }, { "epoch": 2.3989153359445616, "eta": "57:07:18", "grad_norm": 0.0053, "loss": 0.0509, "lr": "2.720e-05", "step": 7962, "steps": "23.82s,7962/16595" }, { "epoch": 2.399216631515517, "eta": "56:56:50", "grad_norm": 0.0079, "loss": 0.0421, "lr": "2.720e-05", "step": 7963, "steps": "23.75s,7963/16595" }, { "epoch": 2.399517927086472, "eta": "57:23:46", "grad_norm": 0.005, "loss": 0.053, "lr": "2.719e-05", "step": 7964, "steps": "23.94s,7964/16595" }, { "epoch": 2.399819222657427, "eta": "56:58:55", "grad_norm": 0.0067, "loss": 0.0486, "lr": "2.719e-05", "step": 7965, "steps": "23.77s,7965/16595" }, { "epoch": 2.4001205182283822, "eta": "57:14:20", "grad_norm": 0.0062, "loss": 0.053, "lr": "2.718e-05", "step": 7966, "steps": "23.88s,7966/16595" }, { "epoch": 2.400421813799337, "eta": "56:33:40", "grad_norm": 0.0046, "loss": 0.0373, "lr": "2.718e-05", "step": 7967, "steps": "23.6s,7967/16595" }, { "epoch": 2.400723109370292, "eta": "57:53:48", "grad_norm": 0.0085, "loss": 0.0428, "lr": "2.717e-05", "step": 7968, "steps": "24.16s,7968/16595" }, { "epoch": 2.4010244049412472, "eta": "57:24:38", "grad_norm": 0.006, "loss": 0.0447, "lr": "2.717e-05", "step": 7969, "steps": "23.96s,7969/16595" }, { "epoch": 2.4013257005122024, "eta": "57:02:41", "grad_norm": 0.0048, "loss": 0.0603, "lr": "2.717e-05", "step": 7970, "steps": "23.81s,7970/16595" }, { "epoch": 2.4016269960831576, "eta": "57:25:17", "grad_norm": 0.0054, "loss": 0.0436, "lr": "2.716e-05", "step": 7971, "steps": "23.97s,7971/16595" }, { "epoch": 2.4019282916541127, "eta": "56:53:16", "grad_norm": 0.0065, "loss": 0.0403, "lr": "2.716e-05", "step": 7972, "steps": "23.75s,7972/16595" }, { "epoch": 2.402229587225068, "eta": "57:31:40", "grad_norm": 0.0046, "loss": 0.0479, "lr": "2.715e-05", "step": 7973, "steps": "24.02s,7973/16595" }, { "epoch": 2.402530882796023, "eta": "57:08:17", "grad_norm": 0.0048, "loss": 0.0558, "lr": "2.715e-05", "step": 7974, "steps": "23.86s,7974/16595" }, { "epoch": 2.402832178366978, "eta": "58:34:05", "grad_norm": 0.0052, "loss": 0.0514, "lr": "2.714e-05", "step": 7975, "steps": "24.46s,7975/16595" }, { "epoch": 2.4031334739379333, "eta": "56:24:23", "grad_norm": 0.0047, "loss": 0.0551, "lr": "2.714e-05", "step": 7976, "steps": "23.56s,7976/16595" }, { "epoch": 2.4034347695088885, "eta": "56:57:02", "grad_norm": 0.0057, "loss": 0.0597, "lr": "2.713e-05", "step": 7977, "steps": "23.79s,7977/16595" }, { "epoch": 2.403736065079843, "eta": "56:22:10", "grad_norm": 0.0066, "loss": 0.0577, "lr": "2.713e-05", "step": 7978, "steps": "23.55s,7978/16595" }, { "epoch": 2.4040373606507983, "eta": "57:03:25", "grad_norm": 0.0058, "loss": 0.0465, "lr": "2.712e-05", "step": 7979, "steps": "23.84s,7979/16595" }, { "epoch": 2.4043386562217535, "eta": "57:20:15", "grad_norm": 0.0049, "loss": 0.0656, "lr": "2.712e-05", "step": 7980, "steps": "23.96s,7980/16595" }, { "epoch": 2.4046399517927086, "eta": "56:28:10", "grad_norm": 0.0057, "loss": 0.0648, "lr": "2.711e-05", "step": 7981, "steps": "23.6s,7981/16595" }, { "epoch": 2.4049412473636638, "eta": "56:45:00", "grad_norm": 0.0054, "loss": 0.0399, "lr": "2.711e-05", "step": 7982, "steps": "23.72s,7982/16595" }, { "epoch": 2.405242542934619, "eta": "57:46:19", "grad_norm": 0.0055, "loss": 0.068, "lr": "2.710e-05", "step": 7983, "steps": "24.15s,7983/16595" }, { "epoch": 2.405543838505574, "eta": "57:21:31", "grad_norm": 0.006, "loss": 0.0492, "lr": "2.710e-05", "step": 7984, "steps": "23.98s,7984/16595" }, { "epoch": 2.4058451340765292, "eta": "56:52:25", "grad_norm": 0.005, "loss": 0.0654, "lr": "2.709e-05", "step": 7985, "steps": "23.78s,7985/16595" }, { "epoch": 2.4061464296474844, "eta": "56:59:12", "grad_norm": 0.0052, "loss": 0.045, "lr": "2.709e-05", "step": 7986, "steps": "23.83s,7986/16595" }, { "epoch": 2.406447725218439, "eta": "56:15:46", "grad_norm": 0.0051, "loss": 0.0573, "lr": "2.708e-05", "step": 7987, "steps": "23.53s,7987/16595" }, { "epoch": 2.4067490207893942, "eta": "56:59:50", "grad_norm": 0.0072, "loss": 0.0429, "lr": "2.708e-05", "step": 7988, "steps": "23.84s,7988/16595" }, { "epoch": 2.4070503163603494, "eta": "56:27:53", "grad_norm": 0.0063, "loss": 0.0664, "lr": "2.707e-05", "step": 7989, "steps": "23.62s,7989/16595" }, { "epoch": 2.4073516119313045, "eta": "57:33:28", "grad_norm": 0.006, "loss": 0.041, "lr": "2.707e-05", "step": 7990, "steps": "24.08s,7990/16595" }, { "epoch": 2.4076529075022597, "eta": "56:52:55", "grad_norm": 0.0055, "loss": 0.0446, "lr": "2.707e-05", "step": 7991, "steps": "23.8s,7991/16595" }, { "epoch": 2.407954203073215, "eta": "57:15:27", "grad_norm": 0.005, "loss": 0.041, "lr": "2.706e-05", "step": 7992, "steps": "23.96s,7992/16595" }, { "epoch": 2.40825549864417, "eta": "60:25:44", "grad_norm": 0.0061, "loss": 0.0588, "lr": "2.706e-05", "step": 7993, "steps": "25.29s,7993/16595" }, { "epoch": 2.408556794215125, "eta": "56:57:27", "grad_norm": 0.0049, "loss": 0.0467, "lr": "2.705e-05", "step": 7994, "steps": "23.84s,7994/16595" }, { "epoch": 2.4088580897860803, "eta": "57:34:20", "grad_norm": 0.0055, "loss": 0.0759, "lr": "2.705e-05", "step": 7995, "steps": "24.1s,7995/16595" }, { "epoch": 2.4091593853570354, "eta": "56:58:06", "grad_norm": 0.0046, "loss": 0.044, "lr": "2.704e-05", "step": 7996, "steps": "23.85s,7996/16595" }, { "epoch": 2.4094606809279906, "eta": "56:13:16", "grad_norm": 0.0069, "loss": 0.0632, "lr": "2.704e-05", "step": 7997, "steps": "23.54s,7997/16595" }, { "epoch": 2.4097619764989453, "eta": "56:50:08", "grad_norm": 0.0055, "loss": 0.0528, "lr": "2.703e-05", "step": 7998, "steps": "23.8s,7998/16595" }, { "epoch": 2.4100632720699005, "eta": "57:19:49", "grad_norm": 0.0067, "loss": 0.046, "lr": "2.703e-05", "step": 7999, "steps": "24.01s,7999/16595" }, { "epoch": 2.4103645676408556, "eta": "58:13:52", "grad_norm": 0.0074, "loss": 0.0475, "lr": "2.702e-05", "step": 8000, "steps": "24.39s,8000/16595" }, { "epoch": 2.4106658632118108, "eta": "96:08:00", "grad_norm": 0.0068, "loss": 0.0683, "lr": "2.702e-05", "step": 8001, "steps": "40.27s,8001/16595" }, { "epoch": 2.410967158782766, "eta": "56:51:25", "grad_norm": 0.0068, "loss": 0.042, "lr": "2.701e-05", "step": 8002, "steps": "23.82s,8002/16595" }, { "epoch": 2.411268454353721, "eta": "56:33:50", "grad_norm": 0.0065, "loss": 0.0463, "lr": "2.701e-05", "step": 8003, "steps": "23.7s,8003/16595" }, { "epoch": 2.411569749924676, "eta": "56:33:26", "grad_norm": 0.0076, "loss": 0.0464, "lr": "2.700e-05", "step": 8004, "steps": "23.7s,8004/16595" }, { "epoch": 2.4118710454956314, "eta": "57:10:16", "grad_norm": 0.0057, "loss": 0.0527, "lr": "2.700e-05", "step": 8005, "steps": "23.96s,8005/16595" }, { "epoch": 2.4121723410665865, "eta": "56:21:12", "grad_norm": 0.0046, "loss": 0.0601, "lr": "2.699e-05", "step": 8006, "steps": "23.62s,8006/16595" }, { "epoch": 2.412473636637541, "eta": "58:23:54", "grad_norm": 0.006, "loss": 0.051, "lr": "2.699e-05", "step": 8007, "steps": "24.48s,8007/16595" }, { "epoch": 2.4127749322084964, "eta": "56:13:15", "grad_norm": 0.0088, "loss": 0.0575, "lr": "2.698e-05", "step": 8008, "steps": "23.57s,8008/16595" }, { "epoch": 2.4130762277794515, "eta": "56:54:21", "grad_norm": 0.0063, "loss": 0.0449, "lr": "2.698e-05", "step": 8009, "steps": "23.86s,8009/16595" }, { "epoch": 2.4133775233504067, "eta": "56:41:05", "grad_norm": 0.0054, "loss": 0.06, "lr": "2.698e-05", "step": 8010, "steps": "23.77s,8010/16595" }, { "epoch": 2.413678818921362, "eta": "57:20:45", "grad_norm": 0.0058, "loss": 0.0506, "lr": "2.697e-05", "step": 8011, "steps": "24.05s,8011/16595" }, { "epoch": 2.413980114492317, "eta": "56:50:18", "grad_norm": 0.0055, "loss": 0.0478, "lr": "2.697e-05", "step": 8012, "steps": "23.84s,8012/16595" }, { "epoch": 2.414281410063272, "eta": "56:31:19", "grad_norm": 0.0051, "loss": 0.0584, "lr": "2.696e-05", "step": 8013, "steps": "23.71s,8013/16595" }, { "epoch": 2.4145827056342273, "eta": "56:55:14", "grad_norm": 0.0051, "loss": 0.0519, "lr": "2.696e-05", "step": 8014, "steps": "23.88s,8014/16595" }, { "epoch": 2.4148840012051824, "eta": "56:59:07", "grad_norm": 0.0041, "loss": 0.047, "lr": "2.695e-05", "step": 8015, "steps": "23.91s,8015/16595" }, { "epoch": 2.4151852967761376, "eta": "57:03:01", "grad_norm": 0.0043, "loss": 0.0497, "lr": "2.695e-05", "step": 8016, "steps": "23.94s,8016/16595" }, { "epoch": 2.4154865923470927, "eta": "56:25:27", "grad_norm": 0.0052, "loss": 0.0482, "lr": "2.694e-05", "step": 8017, "steps": "23.68s,8017/16595" }, { "epoch": 2.4157878879180474, "eta": "56:33:37", "grad_norm": 0.0068, "loss": 0.0497, "lr": "2.694e-05", "step": 8018, "steps": "23.74s,8018/16595" }, { "epoch": 2.4160891834890026, "eta": "56:16:05", "grad_norm": 0.0052, "loss": 0.0523, "lr": "2.693e-05", "step": 8019, "steps": "23.62s,8019/16595" }, { "epoch": 2.4163904790599577, "eta": "56:48:33", "grad_norm": 0.0064, "loss": 0.0595, "lr": "2.693e-05", "step": 8020, "steps": "23.85s,8020/16595" }, { "epoch": 2.416691774630913, "eta": "57:26:44", "grad_norm": 0.0051, "loss": 0.0489, "lr": "2.692e-05", "step": 8021, "steps": "24.12s,8021/16595" }, { "epoch": 2.416993070201868, "eta": "57:19:12", "grad_norm": 0.0052, "loss": 0.0673, "lr": "2.692e-05", "step": 8022, "steps": "24.07s,8022/16595" }, { "epoch": 2.417294365772823, "eta": "57:05:56", "grad_norm": 0.0082, "loss": 0.0447, "lr": "2.691e-05", "step": 8023, "steps": "23.98s,8023/16595" }, { "epoch": 2.4175956613437783, "eta": "56:19:49", "grad_norm": 0.0059, "loss": 0.0374, "lr": "2.691e-05", "step": 8024, "steps": "23.66s,8024/16595" }, { "epoch": 2.4178969569147335, "eta": "56:35:08", "grad_norm": 0.0052, "loss": 0.0448, "lr": "2.690e-05", "step": 8025, "steps": "23.77s,8025/16595" }, { "epoch": 2.4181982524856886, "eta": "57:16:10", "grad_norm": 0.0053, "loss": 0.0485, "lr": "2.690e-05", "step": 8026, "steps": "24.06s,8026/16595" }, { "epoch": 2.4184995480566434, "eta": "57:11:29", "grad_norm": 0.0049, "loss": 0.0556, "lr": "2.689e-05", "step": 8027, "steps": "24.03s,8027/16595" }, { "epoch": 2.4188008436275985, "eta": "56:46:48", "grad_norm": 0.0057, "loss": 0.0303, "lr": "2.689e-05", "step": 8028, "steps": "23.86s,8028/16595" }, { "epoch": 2.4191021391985537, "eta": "57:06:24", "grad_norm": 0.0201, "loss": 0.0507, "lr": "2.688e-05", "step": 8029, "steps": "24.0s,8029/16595" }, { "epoch": 2.419403434769509, "eta": "57:18:50", "grad_norm": 0.0056, "loss": 0.0532, "lr": "2.688e-05", "step": 8030, "steps": "24.09s,8030/16595" }, { "epoch": 2.419704730340464, "eta": "56:15:38", "grad_norm": 0.0054, "loss": 0.0448, "lr": "2.688e-05", "step": 8031, "steps": "23.65s,8031/16595" }, { "epoch": 2.420006025911419, "eta": "56:39:30", "grad_norm": 0.0049, "loss": 0.0393, "lr": "2.687e-05", "step": 8032, "steps": "23.82s,8032/16595" }, { "epoch": 2.4203073214823743, "eta": "55:53:27", "grad_norm": 0.0052, "loss": 0.0494, "lr": "2.687e-05", "step": 8033, "steps": "23.5s,8033/16595" }, { "epoch": 2.4206086170533294, "eta": "56:48:42", "grad_norm": 0.0051, "loss": 0.0624, "lr": "2.686e-05", "step": 8034, "steps": "23.89s,8034/16595" }, { "epoch": 2.4209099126242846, "eta": "56:28:20", "grad_norm": 0.0051, "loss": 0.0384, "lr": "2.686e-05", "step": 8035, "steps": "23.75s,8035/16595" }, { "epoch": 2.4212112081952397, "eta": "56:55:02", "grad_norm": 0.0066, "loss": 0.047, "lr": "2.685e-05", "step": 8036, "steps": "23.94s,8036/16595" }, { "epoch": 2.421512503766195, "eta": "55:36:11", "grad_norm": 0.0062, "loss": 0.0417, "lr": "2.685e-05", "step": 8037, "steps": "23.39s,8037/16595" }, { "epoch": 2.4218137993371496, "eta": "56:41:24", "grad_norm": 0.0077, "loss": 0.0512, "lr": "2.684e-05", "step": 8038, "steps": "23.85s,8038/16595" }, { "epoch": 2.4221150949081047, "eta": "56:32:27", "grad_norm": 0.0054, "loss": 0.0473, "lr": "2.684e-05", "step": 8039, "steps": "23.79s,8039/16595" }, { "epoch": 2.42241639047906, "eta": "57:00:34", "grad_norm": 0.0052, "loss": 0.0489, "lr": "2.683e-05", "step": 8040, "steps": "23.99s,8040/16595" }, { "epoch": 2.422717686050015, "eta": "56:44:29", "grad_norm": 0.0065, "loss": 0.0412, "lr": "2.683e-05", "step": 8041, "steps": "23.88s,8041/16595" }, { "epoch": 2.42301898162097, "eta": "56:56:55", "grad_norm": 0.0074, "loss": 0.0519, "lr": "2.682e-05", "step": 8042, "steps": "23.97s,8042/16595" }, { "epoch": 2.4233202771919253, "eta": "56:47:58", "grad_norm": 0.0056, "loss": 0.0486, "lr": "2.682e-05", "step": 8043, "steps": "23.91s,8043/16595" }, { "epoch": 2.4236215727628805, "eta": "57:33:10", "grad_norm": 0.0055, "loss": 0.0393, "lr": "2.681e-05", "step": 8044, "steps": "24.23s,8044/16595" }, { "epoch": 2.4239228683338356, "eta": "55:57:18", "grad_norm": 0.0058, "loss": 0.0603, "lr": "2.681e-05", "step": 8045, "steps": "23.56s,8045/16595" }, { "epoch": 2.424224163904791, "eta": "55:41:14", "grad_norm": 0.0048, "loss": 0.0502, "lr": "2.680e-05", "step": 8046, "steps": "23.45s,8046/16595" }, { "epoch": 2.4245254594757455, "eta": "56:22:09", "grad_norm": 0.0105, "loss": 0.0566, "lr": "2.680e-05", "step": 8047, "steps": "23.74s,8047/16595" }, { "epoch": 2.4248267550467006, "eta": "57:35:50", "grad_norm": 0.0051, "loss": 0.067, "lr": "2.679e-05", "step": 8048, "steps": "24.26s,8048/16595" }, { "epoch": 2.425128050617656, "eta": "56:18:31", "grad_norm": 0.0044, "loss": 0.0471, "lr": "2.679e-05", "step": 8049, "steps": "23.72s,8049/16595" }, { "epoch": 2.425429346188611, "eta": "56:32:21", "grad_norm": 0.0057, "loss": 0.0523, "lr": "2.678e-05", "step": 8050, "steps": "23.82s,8050/16595" }, { "epoch": 2.425730641759566, "eta": "56:23:25", "grad_norm": 0.0067, "loss": 0.0556, "lr": "2.678e-05", "step": 8051, "steps": "23.76s,8051/16595" }, { "epoch": 2.4260319373305212, "eta": "56:44:23", "grad_norm": 0.0084, "loss": 0.045, "lr": "2.678e-05", "step": 8052, "steps": "23.91s,8052/16595" }, { "epoch": 2.4263332329014764, "eta": "57:15:18", "grad_norm": 0.0057, "loss": 0.036, "lr": "2.677e-05", "step": 8053, "steps": "24.13s,8053/16595" }, { "epoch": 2.4266345284724316, "eta": "57:12:03", "grad_norm": 0.0051, "loss": 0.0443, "lr": "2.677e-05", "step": 8054, "steps": "24.11s,8054/16595" }, { "epoch": 2.4269358240433867, "eta": "56:33:13", "grad_norm": 0.0051, "loss": 0.0592, "lr": "2.676e-05", "step": 8055, "steps": "23.84s,8055/16595" }, { "epoch": 2.427237119614342, "eta": "56:47:03", "grad_norm": 0.0052, "loss": 0.0537, "lr": "2.676e-05", "step": 8056, "steps": "23.94s,8056/16595" }, { "epoch": 2.427538415185297, "eta": "56:03:58", "grad_norm": 0.0065, "loss": 0.0414, "lr": "2.675e-05", "step": 8057, "steps": "23.64s,8057/16595" }, { "epoch": 2.4278397107562517, "eta": "55:57:53", "grad_norm": 0.0055, "loss": 0.0428, "lr": "2.675e-05", "step": 8058, "steps": "23.6s,8058/16595" }, { "epoch": 2.428141006327207, "eta": "56:45:51", "grad_norm": 0.0053, "loss": 0.0442, "lr": "2.674e-05", "step": 8059, "steps": "23.94s,8059/16595" }, { "epoch": 2.428442301898162, "eta": "56:07:03", "grad_norm": 0.0049, "loss": 0.0511, "lr": "2.674e-05", "step": 8060, "steps": "23.67s,8060/16595" }, { "epoch": 2.428743597469117, "eta": "57:04:58", "grad_norm": 0.0046, "loss": 0.0492, "lr": "2.673e-05", "step": 8061, "steps": "24.08s,8061/16595" }, { "epoch": 2.4290448930400723, "eta": "55:54:53", "grad_norm": 0.0054, "loss": 0.0686, "lr": "2.673e-05", "step": 8062, "steps": "23.59s,8062/16595" }, { "epoch": 2.4293461886110275, "eta": "56:08:43", "grad_norm": 0.0085, "loss": 0.0443, "lr": "2.672e-05", "step": 8063, "steps": "23.69s,8063/16595" }, { "epoch": 2.4296474841819826, "eta": "56:56:39", "grad_norm": 0.0086, "loss": 0.0493, "lr": "2.672e-05", "step": 8064, "steps": "24.03s,8064/16595" }, { "epoch": 2.4299487797529378, "eta": "56:56:15", "grad_norm": 0.0051, "loss": 0.0479, "lr": "2.671e-05", "step": 8065, "steps": "24.03s,8065/16595" }, { "epoch": 2.430250075323893, "eta": "56:10:22", "grad_norm": 0.0051, "loss": 0.0545, "lr": "2.671e-05", "step": 8066, "steps": "23.71s,8066/16595" }, { "epoch": 2.4305513708948476, "eta": "57:16:47", "grad_norm": 0.0052, "loss": 0.0547, "lr": "2.670e-05", "step": 8067, "steps": "24.18s,8067/16595" }, { "epoch": 2.430852666465803, "eta": "55:53:57", "grad_norm": 0.005, "loss": 0.0531, "lr": "2.670e-05", "step": 8068, "steps": "23.6s,8068/16595" }, { "epoch": 2.431153962036758, "eta": "56:03:30", "grad_norm": 0.0047, "loss": 0.0626, "lr": "2.669e-05", "step": 8069, "steps": "23.67s,8069/16595" }, { "epoch": 2.431455257607713, "eta": "56:15:54", "grad_norm": 0.0052, "loss": 0.0508, "lr": "2.669e-05", "step": 8070, "steps": "23.76s,8070/16595" }, { "epoch": 2.4317565531786682, "eta": "56:22:36", "grad_norm": 0.0049, "loss": 0.0414, "lr": "2.669e-05", "step": 8071, "steps": "23.81s,8071/16595" }, { "epoch": 2.4320578487496234, "eta": "56:36:24", "grad_norm": 0.0053, "loss": 0.0568, "lr": "2.668e-05", "step": 8072, "steps": "23.91s,8072/16595" }, { "epoch": 2.4323591443205785, "eta": "56:23:14", "grad_norm": 0.0057, "loss": 0.0697, "lr": "2.668e-05", "step": 8073, "steps": "23.82s,8073/16595" }, { "epoch": 2.4326604398915337, "eta": "57:01:10", "grad_norm": 0.0061, "loss": 0.0641, "lr": "2.667e-05", "step": 8074, "steps": "24.09s,8074/16595" }, { "epoch": 2.432961735462489, "eta": "56:36:38", "grad_norm": 0.0056, "loss": 0.0648, "lr": "2.667e-05", "step": 8075, "steps": "23.92s,8075/16595" }, { "epoch": 2.433263031033444, "eta": "56:53:16", "grad_norm": 0.0048, "loss": 0.0504, "lr": "2.666e-05", "step": 8076, "steps": "24.04s,8076/16595" }, { "epoch": 2.433564326604399, "eta": "57:25:31", "grad_norm": 0.0052, "loss": 0.0457, "lr": "2.666e-05", "step": 8077, "steps": "24.27s,8077/16595" }, { "epoch": 2.433865622175354, "eta": "56:25:30", "grad_norm": 0.0053, "loss": 0.0364, "lr": "2.665e-05", "step": 8078, "steps": "23.85s,8078/16595" }, { "epoch": 2.434166917746309, "eta": "56:00:58", "grad_norm": 0.0061, "loss": 0.0479, "lr": "2.665e-05", "step": 8079, "steps": "23.68s,8079/16595" }, { "epoch": 2.434468213317264, "eta": "58:04:03", "grad_norm": 0.0049, "loss": 0.0471, "lr": "2.664e-05", "step": 8080, "steps": "24.55s,8080/16595" }, { "epoch": 2.4347695088882193, "eta": "55:53:05", "grad_norm": 0.0056, "loss": 0.0501, "lr": "2.664e-05", "step": 8081, "steps": "23.63s,8081/16595" }, { "epoch": 2.4350708044591745, "eta": "56:55:07", "grad_norm": 0.0047, "loss": 0.0533, "lr": "2.663e-05", "step": 8082, "steps": "24.07s,8082/16595" }, { "epoch": 2.4353721000301296, "eta": "56:43:22", "grad_norm": 0.0075, "loss": 0.0401, "lr": "2.663e-05", "step": 8083, "steps": "23.99s,8083/16595" }, { "epoch": 2.4356733956010848, "eta": "56:00:25", "grad_norm": 0.0057, "loss": 0.0603, "lr": "2.662e-05", "step": 8084, "steps": "23.69s,8084/16595" }, { "epoch": 2.43597469117204, "eta": "56:11:22", "grad_norm": 0.0058, "loss": 0.0445, "lr": "2.662e-05", "step": 8085, "steps": "23.77s,8085/16595" }, { "epoch": 2.436275986742995, "eta": "56:40:45", "grad_norm": 0.0051, "loss": 0.0453, "lr": "2.661e-05", "step": 8086, "steps": "23.98s,8086/16595" }, { "epoch": 2.4365772823139498, "eta": "56:33:16", "grad_norm": 0.0054, "loss": 0.0406, "lr": "2.661e-05", "step": 8087, "steps": "23.93s,8087/16595" }, { "epoch": 2.436878577884905, "eta": "55:57:25", "grad_norm": 0.006, "loss": 0.0579, "lr": "2.660e-05", "step": 8088, "steps": "23.68s,8088/16595" }, { "epoch": 2.43717987345586, "eta": "55:37:11", "grad_norm": 0.0054, "loss": 0.0528, "lr": "2.660e-05", "step": 8089, "steps": "23.54s,8089/16595" }, { "epoch": 2.437481169026815, "eta": "55:53:48", "grad_norm": 0.0067, "loss": 0.0486, "lr": "2.659e-05", "step": 8090, "steps": "23.66s,8090/16595" }, { "epoch": 2.4377824645977704, "eta": "56:11:50", "grad_norm": 0.0046, "loss": 0.0451, "lr": "2.659e-05", "step": 8091, "steps": "23.79s,8091/16595" }, { "epoch": 2.4380837601687255, "eta": "56:48:17", "grad_norm": 0.0044, "loss": 0.049, "lr": "2.659e-05", "step": 8092, "steps": "24.05s,8092/16595" }, { "epoch": 2.4383850557396807, "eta": "56:11:02", "grad_norm": 0.0057, "loss": 0.0445, "lr": "2.658e-05", "step": 8093, "steps": "23.79s,8093/16595" }, { "epoch": 2.438686351310636, "eta": "56:10:38", "grad_norm": 0.0044, "loss": 0.0534, "lr": "2.658e-05", "step": 8094, "steps": "23.79s,8094/16595" }, { "epoch": 2.438987646881591, "eta": "56:44:15", "grad_norm": 0.0043, "loss": 0.0397, "lr": "2.657e-05", "step": 8095, "steps": "24.03s,8095/16595" }, { "epoch": 2.439288942452546, "eta": "57:32:00", "grad_norm": 0.0064, "loss": 0.05, "lr": "2.657e-05", "step": 8096, "steps": "24.37s,8096/16595" }, { "epoch": 2.4395902380235013, "eta": "56:06:37", "grad_norm": 0.0065, "loss": 0.0441, "lr": "2.656e-05", "step": 8097, "steps": "23.77s,8097/16595" }, { "epoch": 2.439891533594456, "eta": "56:35:58", "grad_norm": 0.0058, "loss": 0.0425, "lr": "2.656e-05", "step": 8098, "steps": "23.98s,8098/16595" }, { "epoch": 2.440192829165411, "eta": "56:59:38", "grad_norm": 0.0063, "loss": 0.0504, "lr": "2.655e-05", "step": 8099, "steps": "24.15s,8099/16595" }, { "epoch": 2.4404941247363663, "eta": "56:16:45", "grad_norm": 0.0046, "loss": 0.0521, "lr": "2.655e-05", "step": 8100, "steps": "23.85s,8100/16595" }, { "epoch": 2.4407954203073214, "eta": "56:20:36", "grad_norm": 0.0064, "loss": 0.0422, "lr": "2.654e-05", "step": 8101, "steps": "23.88s,8101/16595" }, { "epoch": 2.4410967158782766, "eta": "56:31:32", "grad_norm": 0.0053, "loss": 0.045, "lr": "2.654e-05", "step": 8102, "steps": "23.96s,8102/16595" }, { "epoch": 2.4413980114492317, "eta": "57:06:31", "grad_norm": 0.0056, "loss": 0.0382, "lr": "2.653e-05", "step": 8103, "steps": "24.21s,8103/16595" }, { "epoch": 2.441699307020187, "eta": "55:53:56", "grad_norm": 0.006, "loss": 0.0554, "lr": "2.653e-05", "step": 8104, "steps": "23.7s,8104/16595" }, { "epoch": 2.442000602591142, "eta": "56:19:01", "grad_norm": 0.0055, "loss": 0.0594, "lr": "2.652e-05", "step": 8105, "steps": "23.88s,8105/16595" }, { "epoch": 2.442301898162097, "eta": "57:13:48", "grad_norm": 0.0081, "loss": 0.0467, "lr": "2.652e-05", "step": 8106, "steps": "24.27s,8106/16595" }, { "epoch": 2.442603193733052, "eta": "56:50:45", "grad_norm": 0.0114, "loss": 0.0588, "lr": "2.651e-05", "step": 8107, "steps": "24.11s,8107/16595" }, { "epoch": 2.442904489304007, "eta": "56:09:20", "grad_norm": 0.0065, "loss": 0.0392, "lr": "2.651e-05", "step": 8108, "steps": "23.82s,8108/16595" }, { "epoch": 2.443205784874962, "eta": "56:48:32", "grad_norm": 0.0059, "loss": 0.0586, "lr": "2.650e-05", "step": 8109, "steps": "24.1s,8109/16595" }, { "epoch": 2.4435070804459174, "eta": "56:21:16", "grad_norm": 0.0052, "loss": 0.0422, "lr": "2.650e-05", "step": 8110, "steps": "23.91s,8110/16595" }, { "epoch": 2.4438083760168725, "eta": "55:21:29", "grad_norm": 0.0063, "loss": 0.0492, "lr": "2.649e-05", "step": 8111, "steps": "23.49s,8111/16595" }, { "epoch": 2.4441096715878277, "eta": "56:13:24", "grad_norm": 0.0053, "loss": 0.0543, "lr": "2.649e-05", "step": 8112, "steps": "23.86s,8112/16595" }, { "epoch": 2.444410967158783, "eta": "56:31:23", "grad_norm": 0.0052, "loss": 0.0605, "lr": "2.649e-05", "step": 8113, "steps": "23.99s,8113/16595" }, { "epoch": 2.444712262729738, "eta": "56:05:32", "grad_norm": 0.0052, "loss": 0.0496, "lr": "2.648e-05", "step": 8114, "steps": "23.81s,8114/16595" }, { "epoch": 2.445013558300693, "eta": "56:43:18", "grad_norm": 0.0052, "loss": 0.0481, "lr": "2.648e-05", "step": 8115, "steps": "24.08s,8115/16595" }, { "epoch": 2.4453148538716483, "eta": "55:43:33", "grad_norm": 0.0052, "loss": 0.0305, "lr": "2.647e-05", "step": 8116, "steps": "23.66s,8116/16595" }, { "epoch": 2.4456161494426034, "eta": "55:36:05", "grad_norm": 0.0049, "loss": 0.0558, "lr": "2.647e-05", "step": 8117, "steps": "23.61s,8117/16595" }, { "epoch": 2.445917445013558, "eta": "56:25:08", "grad_norm": 0.0053, "loss": 0.0521, "lr": "2.646e-05", "step": 8118, "steps": "23.96s,8118/16595" }, { "epoch": 2.4462187405845133, "eta": "56:13:26", "grad_norm": 0.0047, "loss": 0.0473, "lr": "2.646e-05", "step": 8119, "steps": "23.88s,8119/16595" }, { "epoch": 2.4465200361554684, "eta": "56:44:07", "grad_norm": 0.005, "loss": 0.0588, "lr": "2.645e-05", "step": 8120, "steps": "24.1s,8120/16595" }, { "epoch": 2.4468213317264236, "eta": "56:36:39", "grad_norm": 0.0094, "loss": 0.0596, "lr": "2.645e-05", "step": 8121, "steps": "24.05s,8121/16595" }, { "epoch": 2.4471226272973787, "eta": "55:56:43", "grad_norm": 0.0051, "loss": 0.0442, "lr": "2.644e-05", "step": 8122, "steps": "23.77s,8122/16595" }, { "epoch": 2.447423922868334, "eta": "55:33:43", "grad_norm": 0.005, "loss": 0.0561, "lr": "2.644e-05", "step": 8123, "steps": "23.61s,8123/16595" }, { "epoch": 2.447725218439289, "eta": "56:19:55", "grad_norm": 0.006, "loss": 0.0307, "lr": "2.643e-05", "step": 8124, "steps": "23.94s,8124/16595" }, { "epoch": 2.448026514010244, "eta": "57:18:49", "grad_norm": 0.0053, "loss": 0.0579, "lr": "2.643e-05", "step": 8125, "steps": "24.36s,8125/16595" }, { "epoch": 2.4483278095811993, "eta": "55:50:54", "grad_norm": 0.0046, "loss": 0.0461, "lr": "2.642e-05", "step": 8126, "steps": "23.74s,8126/16595" }, { "epoch": 2.448629105152154, "eta": "55:32:09", "grad_norm": 0.005, "loss": 0.0684, "lr": "2.642e-05", "step": 8127, "steps": "23.61s,8127/16595" }, { "epoch": 2.448930400723109, "eta": "55:57:09", "grad_norm": 0.0051, "loss": 0.0579, "lr": "2.641e-05", "step": 8128, "steps": "23.79s,8128/16595" }, { "epoch": 2.4492316962940643, "eta": "55:53:56", "grad_norm": 0.0064, "loss": 0.0415, "lr": "2.641e-05", "step": 8129, "steps": "23.77s,8129/16595" }, { "epoch": 2.4495329918650195, "eta": "55:52:08", "grad_norm": 0.0051, "loss": 0.0451, "lr": "2.640e-05", "step": 8130, "steps": "23.76s,8130/16595" }, { "epoch": 2.4498342874359746, "eta": "55:58:47", "grad_norm": 0.0056, "loss": 0.0514, "lr": "2.640e-05", "step": 8131, "steps": "23.81s,8131/16595" }, { "epoch": 2.45013558300693, "eta": "55:51:20", "grad_norm": 0.0051, "loss": 0.0455, "lr": "2.639e-05", "step": 8132, "steps": "23.76s,8132/16595" }, { "epoch": 2.450436878577885, "eta": "56:58:38", "grad_norm": 0.0052, "loss": 0.0466, "lr": "2.639e-05", "step": 8133, "steps": "24.24s,8133/16595" }, { "epoch": 2.45073817414884, "eta": "55:51:57", "grad_norm": 0.0059, "loss": 0.0696, "lr": "2.639e-05", "step": 8134, "steps": "23.77s,8134/16595" }, { "epoch": 2.4510394697197952, "eta": "56:02:51", "grad_norm": 0.0056, "loss": 0.0567, "lr": "2.638e-05", "step": 8135, "steps": "23.85s,8135/16595" }, { "epoch": 2.4513407652907504, "eta": "56:53:12", "grad_norm": 0.0049, "loss": 0.0515, "lr": "2.638e-05", "step": 8136, "steps": "24.21s,8136/16595" }, { "epoch": 2.4516420608617056, "eta": "56:03:27", "grad_norm": 0.0054, "loss": 0.0443, "lr": "2.637e-05", "step": 8137, "steps": "23.86s,8137/16595" }, { "epoch": 2.4519433564326603, "eta": "55:26:25", "grad_norm": 0.0055, "loss": 0.0437, "lr": "2.637e-05", "step": 8138, "steps": "23.6s,8138/16595" }, { "epoch": 2.4522446520036154, "eta": "56:04:04", "grad_norm": 0.0043, "loss": 0.0399, "lr": "2.636e-05", "step": 8139, "steps": "23.87s,8139/16595" }, { "epoch": 2.4525459475745706, "eta": "56:24:49", "grad_norm": 0.0048, "loss": 0.0478, "lr": "2.636e-05", "step": 8140, "steps": "24.02s,8140/16595" }, { "epoch": 2.4528472431455257, "eta": "57:06:41", "grad_norm": 0.0051, "loss": 0.0411, "lr": "2.635e-05", "step": 8141, "steps": "24.32s,8141/16595" }, { "epoch": 2.453148538716481, "eta": "56:26:50", "grad_norm": 0.0063, "loss": 0.0504, "lr": "2.635e-05", "step": 8142, "steps": "24.04s,8142/16595" }, { "epoch": 2.453449834287436, "eta": "55:58:15", "grad_norm": 0.0054, "loss": 0.0456, "lr": "2.634e-05", "step": 8143, "steps": "23.84s,8143/16595" }, { "epoch": 2.453751129858391, "eta": "55:57:51", "grad_norm": 0.0095, "loss": 0.0585, "lr": "2.634e-05", "step": 8144, "steps": "23.84s,8144/16595" }, { "epoch": 2.4540524254293463, "eta": "56:29:51", "grad_norm": 0.0046, "loss": 0.0576, "lr": "2.633e-05", "step": 8145, "steps": "24.07s,8145/16595" }, { "epoch": 2.4543537210003015, "eta": "55:00:44", "grad_norm": 0.0049, "loss": 0.0578, "lr": "2.633e-05", "step": 8146, "steps": "23.44s,8146/16595" }, { "epoch": 2.454655016571256, "eta": "56:19:12", "grad_norm": 0.005, "loss": 0.0542, "lr": "2.632e-05", "step": 8147, "steps": "24.0s,8147/16595" }, { "epoch": 2.4549563121422113, "eta": "56:13:10", "grad_norm": 0.0055, "loss": 0.0496, "lr": "2.632e-05", "step": 8148, "steps": "23.96s,8148/16595" }, { "epoch": 2.4552576077131665, "eta": "55:24:54", "grad_norm": 0.0054, "loss": 0.0459, "lr": "2.631e-05", "step": 8149, "steps": "23.62s,8149/16595" }, { "epoch": 2.4555589032841216, "eta": "55:31:33", "grad_norm": 0.0062, "loss": 0.0521, "lr": "2.631e-05", "step": 8150, "steps": "23.67s,8150/16595" }, { "epoch": 2.455860198855077, "eta": "56:48:33", "grad_norm": 0.0067, "loss": 0.0569, "lr": "2.630e-05", "step": 8151, "steps": "24.22s,8151/16595" }, { "epoch": 2.456161494426032, "eta": "56:15:47", "grad_norm": 0.0068, "loss": 0.0564, "lr": "2.630e-05", "step": 8152, "steps": "23.99s,8152/16595" }, { "epoch": 2.456462789996987, "eta": "55:57:06", "grad_norm": 0.0053, "loss": 0.039, "lr": "2.629e-05", "step": 8153, "steps": "23.86s,8153/16595" }, { "epoch": 2.4567640855679422, "eta": "55:35:36", "grad_norm": 0.006, "loss": 0.0588, "lr": "2.629e-05", "step": 8154, "steps": "23.71s,8154/16595" }, { "epoch": 2.4570653811388974, "eta": "55:30:59", "grad_norm": 0.0064, "loss": 0.0618, "lr": "2.629e-05", "step": 8155, "steps": "23.68s,8155/16595" }, { "epoch": 2.4573666767098525, "eta": "56:09:58", "grad_norm": 0.0056, "loss": 0.047, "lr": "2.628e-05", "step": 8156, "steps": "23.96s,8156/16595" }, { "epoch": 2.4576679722808077, "eta": "55:52:41", "grad_norm": 0.0052, "loss": 0.0611, "lr": "2.628e-05", "step": 8157, "steps": "23.84s,8157/16595" }, { "epoch": 2.4579692678517624, "eta": "56:06:21", "grad_norm": 0.0055, "loss": 0.043, "lr": "2.627e-05", "step": 8158, "steps": "23.94s,8158/16595" }, { "epoch": 2.4582705634227175, "eta": "55:50:29", "grad_norm": 0.0054, "loss": 0.0681, "lr": "2.627e-05", "step": 8159, "steps": "23.83s,8159/16595" }, { "epoch": 2.4585718589936727, "eta": "55:47:17", "grad_norm": 0.0056, "loss": 0.052, "lr": "2.626e-05", "step": 8160, "steps": "23.81s,8160/16595" }, { "epoch": 2.458873154564628, "eta": "55:44:04", "grad_norm": 0.0054, "loss": 0.0473, "lr": "2.626e-05", "step": 8161, "steps": "23.79s,8161/16595" }, { "epoch": 2.459174450135583, "eta": "55:22:36", "grad_norm": 0.0053, "loss": 0.0496, "lr": "2.625e-05", "step": 8162, "steps": "23.64s,8162/16595" }, { "epoch": 2.459475745706538, "eta": "56:05:46", "grad_norm": 0.0056, "loss": 0.0439, "lr": "2.625e-05", "step": 8163, "steps": "23.95s,8163/16595" }, { "epoch": 2.4597770412774933, "eta": "55:24:37", "grad_norm": 0.0049, "loss": 0.0512, "lr": "2.624e-05", "step": 8164, "steps": "23.66s,8164/16595" }, { "epoch": 2.4600783368484485, "eta": "56:14:48", "grad_norm": 0.0047, "loss": 0.0597, "lr": "2.624e-05", "step": 8165, "steps": "24.02s,8165/16595" }, { "epoch": 2.4603796324194036, "eta": "55:12:35", "grad_norm": 0.0049, "loss": 0.0516, "lr": "2.623e-05", "step": 8166, "steps": "23.58s,8166/16595" }, { "epoch": 2.4606809279903583, "eta": "55:52:56", "grad_norm": 0.0049, "loss": 0.07, "lr": "2.623e-05", "step": 8167, "steps": "23.87s,8167/16595" }, { "epoch": 2.4609822235613135, "eta": "56:10:48", "grad_norm": 0.0058, "loss": 0.0612, "lr": "2.622e-05", "step": 8168, "steps": "24.0s,8168/16595" }, { "epoch": 2.4612835191322686, "eta": "55:31:04", "grad_norm": 0.0055, "loss": 0.049, "lr": "2.622e-05", "step": 8169, "steps": "23.72s,8169/16595" }, { "epoch": 2.4615848147032238, "eta": "56:35:16", "grad_norm": 0.0062, "loss": 0.0491, "lr": "2.621e-05", "step": 8170, "steps": "24.18s,8170/16595" }, { "epoch": 2.461886110274179, "eta": "56:13:48", "grad_norm": 0.0056, "loss": 0.045, "lr": "2.621e-05", "step": 8171, "steps": "24.03s,8171/16595" }, { "epoch": 2.462187405845134, "eta": "55:31:17", "grad_norm": 0.0049, "loss": 0.0539, "lr": "2.620e-05", "step": 8172, "steps": "23.73s,8172/16595" }, { "epoch": 2.462488701416089, "eta": "56:22:50", "grad_norm": 0.0058, "loss": 0.0517, "lr": "2.620e-05", "step": 8173, "steps": "24.1s,8173/16595" }, { "epoch": 2.4627899969870444, "eta": "56:35:03", "grad_norm": 0.0051, "loss": 0.0503, "lr": "2.619e-05", "step": 8174, "steps": "24.19s,8174/16595" }, { "epoch": 2.4630912925579995, "eta": "55:58:10", "grad_norm": 0.0058, "loss": 0.0593, "lr": "2.619e-05", "step": 8175, "steps": "23.93s,8175/16595" }, { "epoch": 2.4633925881289547, "eta": "55:56:22", "grad_norm": 0.0067, "loss": 0.0504, "lr": "2.619e-05", "step": 8176, "steps": "23.92s,8176/16595" }, { "epoch": 2.46369388369991, "eta": "55:44:45", "grad_norm": 0.0058, "loss": 0.0416, "lr": "2.618e-05", "step": 8177, "steps": "23.84s,8177/16595" }, { "epoch": 2.4639951792708645, "eta": "55:21:54", "grad_norm": 0.0056, "loss": 0.0457, "lr": "2.618e-05", "step": 8178, "steps": "23.68s,8178/16595" }, { "epoch": 2.4642964748418197, "eta": "55:03:16", "grad_norm": 0.005, "loss": 0.0442, "lr": "2.617e-05", "step": 8179, "steps": "23.55s,8179/16595" }, { "epoch": 2.464597770412775, "eta": "55:40:45", "grad_norm": 0.0049, "loss": 0.0585, "lr": "2.617e-05", "step": 8180, "steps": "23.82s,8180/16595" }, { "epoch": 2.46489906598373, "eta": "56:02:47", "grad_norm": 0.0053, "loss": 0.0569, "lr": "2.616e-05", "step": 8181, "steps": "23.98s,8181/16595" }, { "epoch": 2.465200361554685, "eta": "55:59:35", "grad_norm": 0.0056, "loss": 0.0598, "lr": "2.616e-05", "step": 8182, "steps": "23.96s,8182/16595" }, { "epoch": 2.4655016571256403, "eta": "55:38:09", "grad_norm": 0.0057, "loss": 0.0461, "lr": "2.615e-05", "step": 8183, "steps": "23.81s,8183/16595" }, { "epoch": 2.4658029526965954, "eta": "55:37:45", "grad_norm": 0.0067, "loss": 0.0465, "lr": "2.615e-05", "step": 8184, "steps": "23.81s,8184/16595" }, { "epoch": 2.4661042482675506, "eta": "55:19:08", "grad_norm": 0.0064, "loss": 0.0435, "lr": "2.614e-05", "step": 8185, "steps": "23.68s,8185/16595" }, { "epoch": 2.4664055438385057, "eta": "56:41:26", "grad_norm": 0.0054, "loss": 0.0535, "lr": "2.614e-05", "step": 8186, "steps": "24.27s,8186/16595" }, { "epoch": 2.4667068394094604, "eta": "56:03:12", "grad_norm": 0.0052, "loss": 0.0483, "lr": "2.613e-05", "step": 8187, "steps": "24.0s,8187/16595" }, { "epoch": 2.4670081349804156, "eta": "55:59:59", "grad_norm": 0.0057, "loss": 0.0444, "lr": "2.613e-05", "step": 8188, "steps": "23.98s,8188/16595" }, { "epoch": 2.4673094305513708, "eta": "55:44:11", "grad_norm": 0.0058, "loss": 0.0481, "lr": "2.612e-05", "step": 8189, "steps": "23.87s,8189/16595" }, { "epoch": 2.467610726122326, "eta": "55:35:23", "grad_norm": 0.0058, "loss": 0.0421, "lr": "2.612e-05", "step": 8190, "steps": "23.81s,8190/16595" }, { "epoch": 2.467912021693281, "eta": "55:19:34", "grad_norm": 0.0059, "loss": 0.0479, "lr": "2.611e-05", "step": 8191, "steps": "23.7s,8191/16595" }, { "epoch": 2.468213317264236, "eta": "57:01:25", "grad_norm": 0.0052, "loss": 0.0586, "lr": "2.611e-05", "step": 8192, "steps": "24.43s,8192/16595" }, { "epoch": 2.4685146128351914, "eta": "55:22:59", "grad_norm": 0.0056, "loss": 0.0586, "lr": "2.610e-05", "step": 8193, "steps": "23.73s,8193/16595" }, { "epoch": 2.4688159084061465, "eta": "55:07:11", "grad_norm": 0.0054, "loss": 0.0447, "lr": "2.610e-05", "step": 8194, "steps": "23.62s,8194/16595" }, { "epoch": 2.4691172039771017, "eta": "54:55:36", "grad_norm": 0.0066, "loss": 0.0538, "lr": "2.609e-05", "step": 8195, "steps": "23.54s,8195/16595" }, { "epoch": 2.469418499548057, "eta": "55:40:00", "grad_norm": 0.0053, "loss": 0.0675, "lr": "2.609e-05", "step": 8196, "steps": "23.86s,8196/16595" }, { "epoch": 2.469719795119012, "eta": "55:45:12", "grad_norm": 0.0075, "loss": 0.0504, "lr": "2.609e-05", "step": 8197, "steps": "23.9s,8197/16595" }, { "epoch": 2.4700210906899667, "eta": "55:09:49", "grad_norm": 0.0053, "loss": 0.0321, "lr": "2.608e-05", "step": 8198, "steps": "23.65s,8198/16595" }, { "epoch": 2.470322386260922, "eta": "55:48:36", "grad_norm": 0.0059, "loss": 0.0375, "lr": "2.608e-05", "step": 8199, "steps": "23.93s,8199/16595" }, { "epoch": 2.470623681831877, "eta": "56:21:47", "grad_norm": 0.0048, "loss": 0.0464, "lr": "2.607e-05", "step": 8200, "steps": "24.17s,8200/16595" } ], "logging_steps": 1, "max_steps": 16595, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }