Jinwu01's picture
Upload folder using huggingface_hub
c3dc34b verified
{"current_steps": 10, "total_steps": 5000, "loss": 0.1903, "lr": 2e-08, "epoch": 0.002, "percentage": 0.2, "elapsed_time": "0:00:02", "remaining_time": "0:19:48"}
{"current_steps": 20, "total_steps": 5000, "loss": 0.2403, "lr": 4e-08, "epoch": 0.004, "percentage": 0.4, "elapsed_time": "0:00:04", "remaining_time": "0:17:29"}
{"current_steps": 30, "total_steps": 5000, "loss": 0.3415, "lr": 6e-08, "epoch": 0.006, "percentage": 0.6, "elapsed_time": "0:00:06", "remaining_time": "0:17:20"}
{"current_steps": 40, "total_steps": 5000, "loss": 0.2319, "lr": 8e-08, "epoch": 0.008, "percentage": 0.8, "elapsed_time": "0:00:08", "remaining_time": "0:16:46"}
{"current_steps": 50, "total_steps": 5000, "loss": 0.1534, "lr": 1e-07, "epoch": 0.01, "percentage": 1.0, "elapsed_time": "0:00:09", "remaining_time": "0:15:46"}
{"current_steps": 60, "total_steps": 5000, "loss": 0.2303, "lr": 1.2e-07, "epoch": 0.012, "percentage": 1.2, "elapsed_time": "0:00:11", "remaining_time": "0:15:20"}
{"current_steps": 70, "total_steps": 5000, "loss": 0.3153, "lr": 1.4e-07, "epoch": 0.014, "percentage": 1.4, "elapsed_time": "0:00:13", "remaining_time": "0:15:31"}
{"current_steps": 80, "total_steps": 5000, "loss": 0.2112, "lr": 1.6e-07, "epoch": 0.016, "percentage": 1.6, "elapsed_time": "0:00:14", "remaining_time": "0:15:03"}
{"current_steps": 90, "total_steps": 5000, "loss": 0.204, "lr": 1.8e-07, "epoch": 0.018, "percentage": 1.8, "elapsed_time": "0:00:16", "remaining_time": "0:14:41"}
{"current_steps": 100, "total_steps": 5000, "loss": 0.1409, "lr": 2e-07, "epoch": 0.02, "percentage": 2.0, "elapsed_time": "0:00:17", "remaining_time": "0:14:15"}
{"current_steps": 110, "total_steps": 5000, "loss": 0.2463, "lr": 2.1999999999999998e-07, "epoch": 0.022, "percentage": 2.2, "elapsed_time": "0:00:19", "remaining_time": "0:14:09"}
{"current_steps": 120, "total_steps": 5000, "loss": 0.2355, "lr": 2.4e-07, "epoch": 0.024, "percentage": 2.4, "elapsed_time": "0:00:20", "remaining_time": "0:14:04"}
{"current_steps": 130, "total_steps": 5000, "loss": 0.2277, "lr": 2.6e-07, "epoch": 0.026, "percentage": 2.6, "elapsed_time": "0:00:22", "remaining_time": "0:13:59"}
{"current_steps": 140, "total_steps": 5000, "loss": 0.1573, "lr": 2.8e-07, "epoch": 0.028, "percentage": 2.8, "elapsed_time": "0:00:23", "remaining_time": "0:13:42"}
{"current_steps": 150, "total_steps": 5000, "loss": 0.2451, "lr": 3e-07, "epoch": 0.03, "percentage": 3.0, "elapsed_time": "0:00:25", "remaining_time": "0:13:40"}
{"current_steps": 160, "total_steps": 5000, "loss": 0.1121, "lr": 3.2e-07, "epoch": 0.032, "percentage": 3.2, "elapsed_time": "0:00:26", "remaining_time": "0:13:26"}
{"current_steps": 170, "total_steps": 5000, "loss": 0.2163, "lr": 3.4000000000000003e-07, "epoch": 0.034, "percentage": 3.4, "elapsed_time": "0:00:28", "remaining_time": "0:13:25"}
{"current_steps": 180, "total_steps": 5000, "loss": 0.2381, "lr": 3.6e-07, "epoch": 0.036, "percentage": 3.6, "elapsed_time": "0:00:29", "remaining_time": "0:13:23"}
{"current_steps": 190, "total_steps": 5000, "loss": 0.2397, "lr": 3.7999999999999996e-07, "epoch": 0.038, "percentage": 3.8, "elapsed_time": "0:00:31", "remaining_time": "0:13:19"}
{"current_steps": 200, "total_steps": 5000, "loss": 0.2632, "lr": 4e-07, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:00:33", "remaining_time": "0:13:16"}
{"current_steps": 210, "total_steps": 5000, "loss": 0.216, "lr": 4.1999999999999995e-07, "epoch": 0.042, "percentage": 4.2, "elapsed_time": "0:00:34", "remaining_time": "0:13:15"}
{"current_steps": 220, "total_steps": 5000, "loss": 0.1347, "lr": 4.3999999999999997e-07, "epoch": 0.044, "percentage": 4.4, "elapsed_time": "0:00:35", "remaining_time": "0:13:00"}
{"current_steps": 230, "total_steps": 5000, "loss": 0.1863, "lr": 4.6e-07, "epoch": 0.046, "percentage": 4.6, "elapsed_time": "0:00:37", "remaining_time": "0:12:54"}
{"current_steps": 240, "total_steps": 5000, "loss": 0.0353, "lr": 4.8e-07, "epoch": 0.048, "percentage": 4.8, "elapsed_time": "0:00:38", "remaining_time": "0:12:36"}
{"current_steps": 250, "total_steps": 5000, "loss": 0.2479, "lr": 5e-07, "epoch": 0.05, "percentage": 5.0, "elapsed_time": "0:00:39", "remaining_time": "0:12:35"}
{"current_steps": 260, "total_steps": 5000, "loss": 0.2976, "lr": 5.2e-07, "epoch": 0.052, "percentage": 5.2, "elapsed_time": "0:00:41", "remaining_time": "0:12:39"}
{"current_steps": 270, "total_steps": 5000, "loss": 0.218, "lr": 5.4e-07, "epoch": 0.054, "percentage": 5.4, "elapsed_time": "0:00:43", "remaining_time": "0:12:37"}
{"current_steps": 280, "total_steps": 5000, "loss": 0.1472, "lr": 5.6e-07, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:00:44", "remaining_time": "0:12:32"}
{"current_steps": 290, "total_steps": 5000, "loss": 0.1957, "lr": 5.8e-07, "epoch": 0.058, "percentage": 5.8, "elapsed_time": "0:00:46", "remaining_time": "0:12:31"}
{"current_steps": 300, "total_steps": 5000, "loss": 0.2272, "lr": 6e-07, "epoch": 0.06, "percentage": 6.0, "elapsed_time": "0:00:48", "remaining_time": "0:12:33"}
{"current_steps": 310, "total_steps": 5000, "loss": 0.201, "lr": 6.2e-07, "epoch": 0.062, "percentage": 6.2, "elapsed_time": "0:00:49", "remaining_time": "0:12:29"}
{"current_steps": 320, "total_steps": 5000, "loss": 0.188, "lr": 6.4e-07, "epoch": 0.064, "percentage": 6.4, "elapsed_time": "0:00:50", "remaining_time": "0:12:25"}
{"current_steps": 330, "total_steps": 5000, "loss": 0.2443, "lr": 6.6e-07, "epoch": 0.066, "percentage": 6.6, "elapsed_time": "0:00:52", "remaining_time": "0:12:27"}
{"current_steps": 340, "total_steps": 5000, "loss": 0.3293, "lr": 6.800000000000001e-07, "epoch": 0.068, "percentage": 6.8, "elapsed_time": "0:00:54", "remaining_time": "0:12:31"}
{"current_steps": 350, "total_steps": 5000, "loss": 0.1562, "lr": 7e-07, "epoch": 0.07, "percentage": 7.0, "elapsed_time": "0:00:56", "remaining_time": "0:12:25"}
{"current_steps": 360, "total_steps": 5000, "loss": 0.174, "lr": 7.2e-07, "epoch": 0.072, "percentage": 7.2, "elapsed_time": "0:00:57", "remaining_time": "0:12:19"}
{"current_steps": 370, "total_steps": 5000, "loss": 0.2139, "lr": 7.4e-07, "epoch": 0.074, "percentage": 7.4, "elapsed_time": "0:00:58", "remaining_time": "0:12:18"}
{"current_steps": 380, "total_steps": 5000, "loss": 0.1278, "lr": 7.599999999999999e-07, "epoch": 0.076, "percentage": 7.6, "elapsed_time": "0:01:00", "remaining_time": "0:12:11"}
{"current_steps": 390, "total_steps": 5000, "loss": 0.2084, "lr": 7.799999999999999e-07, "epoch": 0.078, "percentage": 7.8, "elapsed_time": "0:01:01", "remaining_time": "0:12:08"}
{"current_steps": 400, "total_steps": 5000, "loss": 0.1944, "lr": 8e-07, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:01:03", "remaining_time": "0:12:05"}
{"current_steps": 410, "total_steps": 5000, "loss": 0.2289, "lr": 8.199999999999999e-07, "epoch": 0.082, "percentage": 8.2, "elapsed_time": "0:01:04", "remaining_time": "0:12:04"}
{"current_steps": 420, "total_steps": 5000, "loss": 0.2451, "lr": 8.399999999999999e-07, "epoch": 0.084, "percentage": 8.4, "elapsed_time": "0:01:06", "remaining_time": "0:12:01"}
{"current_steps": 430, "total_steps": 5000, "loss": 0.2605, "lr": 8.599999999999999e-07, "epoch": 0.086, "percentage": 8.6, "elapsed_time": "0:01:07", "remaining_time": "0:11:58"}
{"current_steps": 440, "total_steps": 5000, "loss": 0.1546, "lr": 8.799999999999999e-07, "epoch": 0.088, "percentage": 8.8, "elapsed_time": "0:01:09", "remaining_time": "0:11:55"}
{"current_steps": 450, "total_steps": 5000, "loss": 0.201, "lr": 9e-07, "epoch": 0.09, "percentage": 9.0, "elapsed_time": "0:01:10", "remaining_time": "0:11:54"}
{"current_steps": 460, "total_steps": 5000, "loss": 0.1955, "lr": 9.2e-07, "epoch": 0.092, "percentage": 9.2, "elapsed_time": "0:01:12", "remaining_time": "0:11:53"}
{"current_steps": 470, "total_steps": 5000, "loss": 0.3065, "lr": 9.399999999999999e-07, "epoch": 0.094, "percentage": 9.4, "elapsed_time": "0:01:14", "remaining_time": "0:11:54"}
{"current_steps": 480, "total_steps": 5000, "loss": 0.2489, "lr": 9.6e-07, "epoch": 0.096, "percentage": 9.6, "elapsed_time": "0:01:16", "remaining_time": "0:11:55"}
{"current_steps": 490, "total_steps": 5000, "loss": 0.4369, "lr": 9.8e-07, "epoch": 0.098, "percentage": 9.8, "elapsed_time": "0:01:18", "remaining_time": "0:12:02"}
{"current_steps": 500, "total_steps": 5000, "loss": 0.2276, "lr": 1e-06, "epoch": 0.1, "percentage": 10.0, "elapsed_time": "0:01:20", "remaining_time": "0:12:01"}
{"current_steps": 510, "total_steps": 5000, "loss": 0.1132, "lr": 9.999878153526972e-07, "epoch": 0.102, "percentage": 10.2, "elapsed_time": "0:01:21", "remaining_time": "0:11:56"}
{"current_steps": 520, "total_steps": 5000, "loss": 0.3207, "lr": 9.99951262004652e-07, "epoch": 0.104, "percentage": 10.4, "elapsed_time": "0:01:23", "remaining_time": "0:11:58"}
{"current_steps": 530, "total_steps": 5000, "loss": 0.1522, "lr": 9.998903417374226e-07, "epoch": 0.106, "percentage": 10.6, "elapsed_time": "0:01:24", "remaining_time": "0:11:56"}
{"current_steps": 540, "total_steps": 5000, "loss": 0.1751, "lr": 9.99805057520177e-07, "epoch": 0.108, "percentage": 10.8, "elapsed_time": "0:01:26", "remaining_time": "0:11:53"}
{"current_steps": 550, "total_steps": 5000, "loss": 0.1791, "lr": 9.996954135095478e-07, "epoch": 0.11, "percentage": 11.0, "elapsed_time": "0:01:27", "remaining_time": "0:11:50"}
{"current_steps": 560, "total_steps": 5000, "loss": 0.2804, "lr": 9.99561415049429e-07, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:01:29", "remaining_time": "0:11:50"}
{"current_steps": 570, "total_steps": 5000, "loss": 0.1606, "lr": 9.99403068670717e-07, "epoch": 0.114, "percentage": 11.4, "elapsed_time": "0:01:31", "remaining_time": "0:11:47"}
{"current_steps": 580, "total_steps": 5000, "loss": 0.2415, "lr": 9.992203820909905e-07, "epoch": 0.116, "percentage": 11.6, "elapsed_time": "0:01:32", "remaining_time": "0:11:48"}
{"current_steps": 590, "total_steps": 5000, "loss": 0.204, "lr": 9.990133642141357e-07, "epoch": 0.118, "percentage": 11.8, "elapsed_time": "0:01:34", "remaining_time": "0:11:46"}
{"current_steps": 600, "total_steps": 5000, "loss": 0.2245, "lr": 9.98782025129912e-07, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:01:36", "remaining_time": "0:11:45"}
{"current_steps": 610, "total_steps": 5000, "loss": 0.2463, "lr": 9.9852637611346e-07, "epoch": 0.122, "percentage": 12.2, "elapsed_time": "0:01:37", "remaining_time": "0:11:43"}
{"current_steps": 620, "total_steps": 5000, "loss": 0.212, "lr": 9.982464296247522e-07, "epoch": 0.124, "percentage": 12.4, "elapsed_time": "0:01:39", "remaining_time": "0:11:42"}
{"current_steps": 630, "total_steps": 5000, "loss": 0.2933, "lr": 9.97942199307985e-07, "epoch": 0.126, "percentage": 12.6, "elapsed_time": "0:01:41", "remaining_time": "0:11:42"}
{"current_steps": 640, "total_steps": 5000, "loss": 0.2437, "lr": 9.976136999909155e-07, "epoch": 0.128, "percentage": 12.8, "elapsed_time": "0:01:43", "remaining_time": "0:11:42"}
{"current_steps": 650, "total_steps": 5000, "loss": 0.4017, "lr": 9.972609476841365e-07, "epoch": 0.13, "percentage": 13.0, "elapsed_time": "0:01:45", "remaining_time": "0:11:46"}
{"current_steps": 660, "total_steps": 5000, "loss": 0.2055, "lr": 9.968839595802981e-07, "epoch": 0.132, "percentage": 13.2, "elapsed_time": "0:01:47", "remaining_time": "0:11:45"}
{"current_steps": 670, "total_steps": 5000, "loss": 0.0685, "lr": 9.964827540532684e-07, "epoch": 0.134, "percentage": 13.4, "elapsed_time": "0:01:48", "remaining_time": "0:11:39"}
{"current_steps": 680, "total_steps": 5000, "loss": 0.2145, "lr": 9.960573506572389e-07, "epoch": 0.136, "percentage": 13.6, "elapsed_time": "0:01:49", "remaining_time": "0:11:38"}
{"current_steps": 690, "total_steps": 5000, "loss": 0.2836, "lr": 9.956077701257707e-07, "epoch": 0.138, "percentage": 13.8, "elapsed_time": "0:01:51", "remaining_time": "0:11:38"}
{"current_steps": 700, "total_steps": 5000, "loss": 0.3285, "lr": 9.95134034370785e-07, "epoch": 0.14, "percentage": 14.0, "elapsed_time": "0:01:53", "remaining_time": "0:11:39"}
{"current_steps": 710, "total_steps": 5000, "loss": 0.2677, "lr": 9.946361664814943e-07, "epoch": 0.142, "percentage": 14.2, "elapsed_time": "0:01:55", "remaining_time": "0:11:38"}
{"current_steps": 720, "total_steps": 5000, "loss": 0.1567, "lr": 9.941141907232763e-07, "epoch": 0.144, "percentage": 14.4, "elapsed_time": "0:01:57", "remaining_time": "0:11:36"}
{"current_steps": 730, "total_steps": 5000, "loss": 0.149, "lr": 9.93568132536494e-07, "epoch": 0.146, "percentage": 14.6, "elapsed_time": "0:01:58", "remaining_time": "0:11:32"}
{"current_steps": 740, "total_steps": 5000, "loss": 0.3243, "lr": 9.929980185352525e-07, "epoch": 0.148, "percentage": 14.8, "elapsed_time": "0:02:00", "remaining_time": "0:11:34"}
{"current_steps": 750, "total_steps": 5000, "loss": 0.3521, "lr": 9.92403876506104e-07, "epoch": 0.15, "percentage": 15.0, "elapsed_time": "0:02:02", "remaining_time": "0:11:36"}
{"current_steps": 760, "total_steps": 5000, "loss": 0.2648, "lr": 9.91785735406693e-07, "epoch": 0.152, "percentage": 15.2, "elapsed_time": "0:02:04", "remaining_time": "0:11:34"}
{"current_steps": 770, "total_steps": 5000, "loss": 0.2882, "lr": 9.911436253643443e-07, "epoch": 0.154, "percentage": 15.4, "elapsed_time": "0:02:06", "remaining_time": "0:11:33"}
{"current_steps": 780, "total_steps": 5000, "loss": 0.1974, "lr": 9.904775776745956e-07, "epoch": 0.156, "percentage": 15.6, "elapsed_time": "0:02:07", "remaining_time": "0:11:31"}
{"current_steps": 790, "total_steps": 5000, "loss": 0.3992, "lr": 9.89787624799672e-07, "epoch": 0.158, "percentage": 15.8, "elapsed_time": "0:02:09", "remaining_time": "0:11:31"}
{"current_steps": 800, "total_steps": 5000, "loss": 0.1949, "lr": 9.890738003669027e-07, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:02:11", "remaining_time": "0:11:29"}
{"current_steps": 810, "total_steps": 5000, "loss": 0.1667, "lr": 9.883361391670839e-07, "epoch": 0.162, "percentage": 16.2, "elapsed_time": "0:02:12", "remaining_time": "0:11:27"}
{"current_steps": 820, "total_steps": 5000, "loss": 0.151, "lr": 9.875746771527815e-07, "epoch": 0.164, "percentage": 16.4, "elapsed_time": "0:02:14", "remaining_time": "0:11:24"}
{"current_steps": 830, "total_steps": 5000, "loss": 0.2299, "lr": 9.8678945143658e-07, "epoch": 0.166, "percentage": 16.6, "elapsed_time": "0:02:15", "remaining_time": "0:11:22"}
{"current_steps": 840, "total_steps": 5000, "loss": 0.2426, "lr": 9.859805002892731e-07, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:02:17", "remaining_time": "0:11:21"}
{"current_steps": 850, "total_steps": 5000, "loss": 0.3274, "lr": 9.851478631379982e-07, "epoch": 0.17, "percentage": 17.0, "elapsed_time": "0:02:19", "remaining_time": "0:11:22"}
{"current_steps": 860, "total_steps": 5000, "loss": 0.1971, "lr": 9.842915805643156e-07, "epoch": 0.172, "percentage": 17.2, "elapsed_time": "0:02:21", "remaining_time": "0:11:20"}
{"current_steps": 870, "total_steps": 5000, "loss": 0.1315, "lr": 9.834116943022297e-07, "epoch": 0.174, "percentage": 17.4, "elapsed_time": "0:02:22", "remaining_time": "0:11:16"}
{"current_steps": 880, "total_steps": 5000, "loss": 0.2402, "lr": 9.825082472361556e-07, "epoch": 0.176, "percentage": 17.6, "elapsed_time": "0:02:24", "remaining_time": "0:11:16"}
{"current_steps": 890, "total_steps": 5000, "loss": 0.198, "lr": 9.81581283398829e-07, "epoch": 0.178, "percentage": 17.8, "elapsed_time": "0:02:26", "remaining_time": "0:11:14"}
{"current_steps": 900, "total_steps": 5000, "loss": 0.2376, "lr": 9.806308479691594e-07, "epoch": 0.18, "percentage": 18.0, "elapsed_time": "0:02:27", "remaining_time": "0:11:13"}
{"current_steps": 910, "total_steps": 5000, "loss": 0.2244, "lr": 9.796569872700287e-07, "epoch": 0.182, "percentage": 18.2, "elapsed_time": "0:02:29", "remaining_time": "0:11:13"}
{"current_steps": 920, "total_steps": 5000, "loss": 0.1149, "lr": 9.786597487660335e-07, "epoch": 0.184, "percentage": 18.4, "elapsed_time": "0:02:30", "remaining_time": "0:11:09"}
{"current_steps": 930, "total_steps": 5000, "loss": 0.2305, "lr": 9.776391810611718e-07, "epoch": 0.186, "percentage": 18.6, "elapsed_time": "0:02:32", "remaining_time": "0:11:08"}
{"current_steps": 940, "total_steps": 5000, "loss": 0.1016, "lr": 9.765953338964734e-07, "epoch": 0.188, "percentage": 18.8, "elapsed_time": "0:02:34", "remaining_time": "0:11:05"}
{"current_steps": 950, "total_steps": 5000, "loss": 0.1416, "lr": 9.755282581475767e-07, "epoch": 0.19, "percentage": 19.0, "elapsed_time": "0:02:35", "remaining_time": "0:11:02"}
{"current_steps": 960, "total_steps": 5000, "loss": 0.3388, "lr": 9.744380058222482e-07, "epoch": 0.192, "percentage": 19.2, "elapsed_time": "0:02:37", "remaining_time": "0:11:04"}
{"current_steps": 970, "total_steps": 5000, "loss": 0.2386, "lr": 9.733246300578482e-07, "epoch": 0.194, "percentage": 19.4, "elapsed_time": "0:02:39", "remaining_time": "0:11:03"}
{"current_steps": 980, "total_steps": 5000, "loss": 0.1735, "lr": 9.721881851187405e-07, "epoch": 0.196, "percentage": 19.6, "elapsed_time": "0:02:41", "remaining_time": "0:11:01"}
{"current_steps": 990, "total_steps": 5000, "loss": 0.2637, "lr": 9.710287263936483e-07, "epoch": 0.198, "percentage": 19.8, "elapsed_time": "0:02:43", "remaining_time": "0:11:01"}
{"current_steps": 1000, "total_steps": 5000, "loss": 0.2161, "lr": 9.698463103929541e-07, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:02:44", "remaining_time": "0:10:58"}
{"current_steps": 1010, "total_steps": 5000, "loss": 0.2611, "lr": 9.686409947459457e-07, "epoch": 0.202, "percentage": 20.2, "elapsed_time": "0:02:46", "remaining_time": "0:10:57"}
{"current_steps": 1020, "total_steps": 5000, "loss": 0.1744, "lr": 9.674128381980071e-07, "epoch": 0.204, "percentage": 20.4, "elapsed_time": "0:02:48", "remaining_time": "0:10:55"}
{"current_steps": 1030, "total_steps": 5000, "loss": 0.2382, "lr": 9.661619006077561e-07, "epoch": 0.206, "percentage": 20.6, "elapsed_time": "0:02:49", "remaining_time": "0:10:54"}
{"current_steps": 1040, "total_steps": 5000, "loss": 0.2746, "lr": 9.648882429441256e-07, "epoch": 0.208, "percentage": 20.8, "elapsed_time": "0:02:51", "remaining_time": "0:10:54"}
{"current_steps": 1050, "total_steps": 5000, "loss": 0.1473, "lr": 9.635919272833937e-07, "epoch": 0.21, "percentage": 21.0, "elapsed_time": "0:02:53", "remaining_time": "0:10:52"}
{"current_steps": 1060, "total_steps": 5000, "loss": 0.2144, "lr": 9.622730168061567e-07, "epoch": 0.212, "percentage": 21.2, "elapsed_time": "0:02:55", "remaining_time": "0:10:50"}
{"current_steps": 1070, "total_steps": 5000, "loss": 0.2516, "lr": 9.609315757942502e-07, "epoch": 0.214, "percentage": 21.4, "elapsed_time": "0:02:56", "remaining_time": "0:10:49"}
{"current_steps": 1080, "total_steps": 5000, "loss": 0.3733, "lr": 9.595676696276171e-07, "epoch": 0.216, "percentage": 21.6, "elapsed_time": "0:02:59", "remaining_time": "0:10:49"}
{"current_steps": 1090, "total_steps": 5000, "loss": 0.1464, "lr": 9.581813647811197e-07, "epoch": 0.218, "percentage": 21.8, "elapsed_time": "0:03:00", "remaining_time": "0:10:46"}
{"current_steps": 1100, "total_steps": 5000, "loss": 0.249, "lr": 9.567727288213004e-07, "epoch": 0.22, "percentage": 22.0, "elapsed_time": "0:03:02", "remaining_time": "0:10:45"}
{"current_steps": 1110, "total_steps": 5000, "loss": 0.1656, "lr": 9.553418304030885e-07, "epoch": 0.222, "percentage": 22.2, "elapsed_time": "0:03:03", "remaining_time": "0:10:43"}
{"current_steps": 1120, "total_steps": 5000, "loss": 0.0984, "lr": 9.538887392664543e-07, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:03:04", "remaining_time": "0:10:39"}
{"current_steps": 1130, "total_steps": 5000, "loss": 0.1974, "lr": 9.524135262330098e-07, "epoch": 0.226, "percentage": 22.6, "elapsed_time": "0:03:06", "remaining_time": "0:10:37"}
{"current_steps": 1140, "total_steps": 5000, "loss": 0.2129, "lr": 9.509162632025569e-07, "epoch": 0.228, "percentage": 22.8, "elapsed_time": "0:03:07", "remaining_time": "0:10:36"}
{"current_steps": 1150, "total_steps": 5000, "loss": 0.3028, "lr": 9.493970231495834e-07, "epoch": 0.23, "percentage": 23.0, "elapsed_time": "0:03:09", "remaining_time": "0:10:35"}
{"current_steps": 1160, "total_steps": 5000, "loss": 0.1714, "lr": 9.478558801197064e-07, "epoch": 0.232, "percentage": 23.2, "elapsed_time": "0:03:11", "remaining_time": "0:10:32"}
{"current_steps": 1170, "total_steps": 5000, "loss": 0.1583, "lr": 9.462929092260628e-07, "epoch": 0.234, "percentage": 23.4, "elapsed_time": "0:03:12", "remaining_time": "0:10:28"}
{"current_steps": 1180, "total_steps": 5000, "loss": 0.237, "lr": 9.447081866456487e-07, "epoch": 0.236, "percentage": 23.6, "elapsed_time": "0:03:13", "remaining_time": "0:10:27"}
{"current_steps": 1190, "total_steps": 5000, "loss": 0.1604, "lr": 9.431017896156073e-07, "epoch": 0.238, "percentage": 23.8, "elapsed_time": "0:03:15", "remaining_time": "0:10:25"}
{"current_steps": 1200, "total_steps": 5000, "loss": 0.2498, "lr": 9.414737964294634e-07, "epoch": 0.24, "percentage": 24.0, "elapsed_time": "0:03:17", "remaining_time": "0:10:24"}
{"current_steps": 1210, "total_steps": 5000, "loss": 0.1914, "lr": 9.398242864333083e-07, "epoch": 0.242, "percentage": 24.2, "elapsed_time": "0:03:18", "remaining_time": "0:10:22"}
{"current_steps": 1220, "total_steps": 5000, "loss": 0.2974, "lr": 9.381533400219317e-07, "epoch": 0.244, "percentage": 24.4, "elapsed_time": "0:03:20", "remaining_time": "0:10:21"}
{"current_steps": 1230, "total_steps": 5000, "loss": 0.1905, "lr": 9.364610386349047e-07, "epoch": 0.246, "percentage": 24.6, "elapsed_time": "0:03:22", "remaining_time": "0:10:20"}
{"current_steps": 1240, "total_steps": 5000, "loss": 0.3763, "lr": 9.347474647526095e-07, "epoch": 0.248, "percentage": 24.8, "elapsed_time": "0:03:24", "remaining_time": "0:10:20"}
{"current_steps": 1250, "total_steps": 5000, "loss": 0.2157, "lr": 9.330127018922193e-07, "epoch": 0.25, "percentage": 25.0, "elapsed_time": "0:03:26", "remaining_time": "0:10:19"}
{"current_steps": 1260, "total_steps": 5000, "loss": 0.2944, "lr": 9.312568346036287e-07, "epoch": 0.252, "percentage": 25.2, "elapsed_time": "0:03:28", "remaining_time": "0:10:19"}
{"current_steps": 1270, "total_steps": 5000, "loss": 0.3299, "lr": 9.294799484653322e-07, "epoch": 0.254, "percentage": 25.4, "elapsed_time": "0:03:30", "remaining_time": "0:10:19"}
{"current_steps": 1280, "total_steps": 5000, "loss": 0.2198, "lr": 9.276821300802533e-07, "epoch": 0.256, "percentage": 25.6, "elapsed_time": "0:03:32", "remaining_time": "0:10:17"}
{"current_steps": 1290, "total_steps": 5000, "loss": 0.1441, "lr": 9.258634670715237e-07, "epoch": 0.258, "percentage": 25.8, "elapsed_time": "0:03:33", "remaining_time": "0:10:15"}
{"current_steps": 1300, "total_steps": 5000, "loss": 0.2248, "lr": 9.240240480782129e-07, "epoch": 0.26, "percentage": 26.0, "elapsed_time": "0:03:35", "remaining_time": "0:10:14"}
{"current_steps": 1310, "total_steps": 5000, "loss": 0.1801, "lr": 9.221639627510075e-07, "epoch": 0.262, "percentage": 26.2, "elapsed_time": "0:03:37", "remaining_time": "0:10:12"}
{"current_steps": 1320, "total_steps": 5000, "loss": 0.1365, "lr": 9.202833017478421e-07, "epoch": 0.264, "percentage": 26.4, "elapsed_time": "0:03:38", "remaining_time": "0:10:10"}
{"current_steps": 1330, "total_steps": 5000, "loss": 0.1392, "lr": 9.183821567294808e-07, "epoch": 0.266, "percentage": 26.6, "elapsed_time": "0:03:40", "remaining_time": "0:10:07"}
{"current_steps": 1340, "total_steps": 5000, "loss": 0.2375, "lr": 9.164606203550497e-07, "epoch": 0.268, "percentage": 26.8, "elapsed_time": "0:03:41", "remaining_time": "0:10:06"}
{"current_steps": 1350, "total_steps": 5000, "loss": 0.2166, "lr": 9.145187862775208e-07, "epoch": 0.27, "percentage": 27.0, "elapsed_time": "0:03:43", "remaining_time": "0:10:04"}
{"current_steps": 1360, "total_steps": 5000, "loss": 0.2524, "lr": 9.125567491391475e-07, "epoch": 0.272, "percentage": 27.2, "elapsed_time": "0:03:45", "remaining_time": "0:10:03"}
{"current_steps": 1370, "total_steps": 5000, "loss": 0.116, "lr": 9.10574604566852e-07, "epoch": 0.274, "percentage": 27.4, "elapsed_time": "0:03:46", "remaining_time": "0:10:01"}
{"current_steps": 1380, "total_steps": 5000, "loss": 0.1629, "lr": 9.085724491675642e-07, "epoch": 0.276, "percentage": 27.6, "elapsed_time": "0:03:48", "remaining_time": "0:09:58"}
{"current_steps": 1390, "total_steps": 5000, "loss": 0.2715, "lr": 9.065503805235137e-07, "epoch": 0.278, "percentage": 27.8, "elapsed_time": "0:03:50", "remaining_time": "0:09:58"}
{"current_steps": 1400, "total_steps": 5000, "loss": 0.1993, "lr": 9.045084971874737e-07, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:03:52", "remaining_time": "0:09:57"}
{"current_steps": 1410, "total_steps": 5000, "loss": 0.227, "lr": 9.02446898677957e-07, "epoch": 0.282, "percentage": 28.2, "elapsed_time": "0:03:54", "remaining_time": "0:09:55"}
{"current_steps": 1420, "total_steps": 5000, "loss": 0.2065, "lr": 9.003656854743666e-07, "epoch": 0.284, "percentage": 28.4, "elapsed_time": "0:03:55", "remaining_time": "0:09:54"}
{"current_steps": 1430, "total_steps": 5000, "loss": 0.2645, "lr": 8.982649590120981e-07, "epoch": 0.286, "percentage": 28.6, "elapsed_time": "0:03:57", "remaining_time": "0:09:54"}
{"current_steps": 1440, "total_steps": 5000, "loss": 0.2705, "lr": 8.961448216775953e-07, "epoch": 0.288, "percentage": 28.8, "elapsed_time": "0:03:59", "remaining_time": "0:09:53"}
{"current_steps": 1450, "total_steps": 5000, "loss": 0.2481, "lr": 8.940053768033608e-07, "epoch": 0.29, "percentage": 29.0, "elapsed_time": "0:04:02", "remaining_time": "0:09:52"}
{"current_steps": 1460, "total_steps": 5000, "loss": 0.1207, "lr": 8.918467286629198e-07, "epoch": 0.292, "percentage": 29.2, "elapsed_time": "0:04:03", "remaining_time": "0:09:49"}
{"current_steps": 1470, "total_steps": 5000, "loss": 0.1948, "lr": 8.896689824657371e-07, "epoch": 0.294, "percentage": 29.4, "elapsed_time": "0:04:05", "remaining_time": "0:09:48"}
{"current_steps": 1480, "total_steps": 5000, "loss": 0.1255, "lr": 8.874722443520898e-07, "epoch": 0.296, "percentage": 29.6, "elapsed_time": "0:04:06", "remaining_time": "0:09:45"}
{"current_steps": 1490, "total_steps": 5000, "loss": 0.0875, "lr": 8.852566213878946e-07, "epoch": 0.298, "percentage": 29.8, "elapsed_time": "0:04:07", "remaining_time": "0:09:43"}
{"current_steps": 1500, "total_steps": 5000, "loss": 0.4261, "lr": 8.83022221559489e-07, "epoch": 0.3, "percentage": 30.0, "elapsed_time": "0:04:09", "remaining_time": "0:09:43"}
{"current_steps": 1510, "total_steps": 5000, "loss": 0.2974, "lr": 8.807691537683684e-07, "epoch": 0.302, "percentage": 30.2, "elapsed_time": "0:04:11", "remaining_time": "0:09:41"}
{"current_steps": 1520, "total_steps": 5000, "loss": 0.2012, "lr": 8.784975278258782e-07, "epoch": 0.304, "percentage": 30.4, "elapsed_time": "0:04:13", "remaining_time": "0:09:40"}
{"current_steps": 1530, "total_steps": 5000, "loss": 0.2722, "lr": 8.762074544478621e-07, "epoch": 0.306, "percentage": 30.6, "elapsed_time": "0:04:15", "remaining_time": "0:09:38"}
{"current_steps": 1540, "total_steps": 5000, "loss": 0.1886, "lr": 8.73899045249266e-07, "epoch": 0.308, "percentage": 30.8, "elapsed_time": "0:04:16", "remaining_time": "0:09:37"}
{"current_steps": 1550, "total_steps": 5000, "loss": 0.2372, "lr": 8.71572412738697e-07, "epoch": 0.31, "percentage": 31.0, "elapsed_time": "0:04:18", "remaining_time": "0:09:35"}
{"current_steps": 1560, "total_steps": 5000, "loss": 0.274, "lr": 8.69227670312942e-07, "epoch": 0.312, "percentage": 31.2, "elapsed_time": "0:04:20", "remaining_time": "0:09:34"}
{"current_steps": 1570, "total_steps": 5000, "loss": 0.0607, "lr": 8.668649322514381e-07, "epoch": 0.314, "percentage": 31.4, "elapsed_time": "0:04:21", "remaining_time": "0:09:31"}
{"current_steps": 1580, "total_steps": 5000, "loss": 0.2427, "lr": 8.644843137107057e-07, "epoch": 0.316, "percentage": 31.6, "elapsed_time": "0:04:23", "remaining_time": "0:09:30"}
{"current_steps": 1590, "total_steps": 5000, "loss": 0.1347, "lr": 8.620859307187338e-07, "epoch": 0.318, "percentage": 31.8, "elapsed_time": "0:04:24", "remaining_time": "0:09:28"}
{"current_steps": 1600, "total_steps": 5000, "loss": 0.1957, "lr": 8.596699001693255e-07, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "0:04:26", "remaining_time": "0:09:26"}
{"current_steps": 1610, "total_steps": 5000, "loss": 0.2206, "lr": 8.572363398164016e-07, "epoch": 0.322, "percentage": 32.2, "elapsed_time": "0:04:28", "remaining_time": "0:09:25"}
{"current_steps": 1620, "total_steps": 5000, "loss": 0.2685, "lr": 8.547853682682604e-07, "epoch": 0.324, "percentage": 32.4, "elapsed_time": "0:04:30", "remaining_time": "0:09:24"}
{"current_steps": 1630, "total_steps": 5000, "loss": 0.2696, "lr": 8.523171049817973e-07, "epoch": 0.326, "percentage": 32.6, "elapsed_time": "0:04:32", "remaining_time": "0:09:24"}
{"current_steps": 1640, "total_steps": 5000, "loss": 0.2392, "lr": 8.498316702566826e-07, "epoch": 0.328, "percentage": 32.8, "elapsed_time": "0:04:34", "remaining_time": "0:09:22"}
{"current_steps": 1650, "total_steps": 5000, "loss": 0.2293, "lr": 8.473291852294986e-07, "epoch": 0.33, "percentage": 33.0, "elapsed_time": "0:04:36", "remaining_time": "0:09:21"}
{"current_steps": 1660, "total_steps": 5000, "loss": 0.2758, "lr": 8.448097718678348e-07, "epoch": 0.332, "percentage": 33.2, "elapsed_time": "0:04:38", "remaining_time": "0:09:20"}
{"current_steps": 1670, "total_steps": 5000, "loss": 0.1503, "lr": 8.422735529643443e-07, "epoch": 0.334, "percentage": 33.4, "elapsed_time": "0:04:39", "remaining_time": "0:09:18"}
{"current_steps": 1680, "total_steps": 5000, "loss": 0.0285, "lr": 8.397206521307583e-07, "epoch": 0.336, "percentage": 33.6, "elapsed_time": "0:04:40", "remaining_time": "0:09:14"}
{"current_steps": 1690, "total_steps": 5000, "loss": 0.135, "lr": 8.371511937918617e-07, "epoch": 0.338, "percentage": 33.8, "elapsed_time": "0:04:41", "remaining_time": "0:09:12"}
{"current_steps": 1700, "total_steps": 5000, "loss": 0.1756, "lr": 8.34565303179429e-07, "epoch": 0.34, "percentage": 34.0, "elapsed_time": "0:04:43", "remaining_time": "0:09:10"}
{"current_steps": 1710, "total_steps": 5000, "loss": 0.12, "lr": 8.319631063261207e-07, "epoch": 0.342, "percentage": 34.2, "elapsed_time": "0:04:44", "remaining_time": "0:09:07"}
{"current_steps": 1720, "total_steps": 5000, "loss": 0.2617, "lr": 8.293447300593402e-07, "epoch": 0.344, "percentage": 34.4, "elapsed_time": "0:04:46", "remaining_time": "0:09:06"}
{"current_steps": 1730, "total_steps": 5000, "loss": 0.2053, "lr": 8.267103019950528e-07, "epoch": 0.346, "percentage": 34.6, "elapsed_time": "0:04:48", "remaining_time": "0:09:05"}
{"current_steps": 1740, "total_steps": 5000, "loss": 0.2521, "lr": 8.240599505315654e-07, "epoch": 0.348, "percentage": 34.8, "elapsed_time": "0:04:50", "remaining_time": "0:09:04"}
{"current_steps": 1750, "total_steps": 5000, "loss": 0.1694, "lr": 8.213938048432696e-07, "epoch": 0.35, "percentage": 35.0, "elapsed_time": "0:04:52", "remaining_time": "0:09:02"}
{"current_steps": 1760, "total_steps": 5000, "loss": 0.1555, "lr": 8.187119948743449e-07, "epoch": 0.352, "percentage": 35.2, "elapsed_time": "0:04:53", "remaining_time": "0:09:00"}
{"current_steps": 1770, "total_steps": 5000, "loss": 0.2196, "lr": 8.160146513324254e-07, "epoch": 0.354, "percentage": 35.4, "elapsed_time": "0:04:55", "remaining_time": "0:08:58"}
{"current_steps": 1780, "total_steps": 5000, "loss": 0.2752, "lr": 8.133019056822302e-07, "epoch": 0.356, "percentage": 35.6, "elapsed_time": "0:04:57", "remaining_time": "0:08:57"}
{"current_steps": 1790, "total_steps": 5000, "loss": 0.212, "lr": 8.105738901391551e-07, "epoch": 0.358, "percentage": 35.8, "elapsed_time": "0:04:59", "remaining_time": "0:08:56"}
{"current_steps": 1800, "total_steps": 5000, "loss": 0.2336, "lr": 8.07830737662829e-07, "epoch": 0.36, "percentage": 36.0, "elapsed_time": "0:05:00", "remaining_time": "0:08:55"}
{"current_steps": 1810, "total_steps": 5000, "loss": 0.1185, "lr": 8.050725819506339e-07, "epoch": 0.362, "percentage": 36.2, "elapsed_time": "0:05:02", "remaining_time": "0:08:52"}
{"current_steps": 1820, "total_steps": 5000, "loss": 0.2184, "lr": 8.022995574311875e-07, "epoch": 0.364, "percentage": 36.4, "elapsed_time": "0:05:04", "remaining_time": "0:08:52"}
{"current_steps": 1830, "total_steps": 5000, "loss": 0.2807, "lr": 7.995117992577928e-07, "epoch": 0.366, "percentage": 36.6, "elapsed_time": "0:05:06", "remaining_time": "0:08:51"}
{"current_steps": 1840, "total_steps": 5000, "loss": 0.1165, "lr": 7.967094433018508e-07, "epoch": 0.368, "percentage": 36.8, "elapsed_time": "0:05:07", "remaining_time": "0:08:48"}
{"current_steps": 1850, "total_steps": 5000, "loss": 0.1799, "lr": 7.938926261462365e-07, "epoch": 0.37, "percentage": 37.0, "elapsed_time": "0:05:09", "remaining_time": "0:08:46"}
{"current_steps": 1860, "total_steps": 5000, "loss": 0.2926, "lr": 7.910614850786447e-07, "epoch": 0.372, "percentage": 37.2, "elapsed_time": "0:05:11", "remaining_time": "0:08:45"}
{"current_steps": 1870, "total_steps": 5000, "loss": 0.2575, "lr": 7.882161580848966e-07, "epoch": 0.374, "percentage": 37.4, "elapsed_time": "0:05:13", "remaining_time": "0:08:44"}
{"current_steps": 1880, "total_steps": 5000, "loss": 0.1874, "lr": 7.853567838422159e-07, "epoch": 0.376, "percentage": 37.6, "elapsed_time": "0:05:14", "remaining_time": "0:08:42"}
{"current_steps": 1890, "total_steps": 5000, "loss": 0.1535, "lr": 7.82483501712469e-07, "epoch": 0.378, "percentage": 37.8, "elapsed_time": "0:05:16", "remaining_time": "0:08:40"}
{"current_steps": 1900, "total_steps": 5000, "loss": 0.0704, "lr": 7.795964517353733e-07, "epoch": 0.38, "percentage": 38.0, "elapsed_time": "0:05:17", "remaining_time": "0:08:37"}
{"current_steps": 1910, "total_steps": 5000, "loss": 0.2371, "lr": 7.76695774621672e-07, "epoch": 0.382, "percentage": 38.2, "elapsed_time": "0:05:19", "remaining_time": "0:08:36"}
{"current_steps": 1920, "total_steps": 5000, "loss": 0.1903, "lr": 7.737816117462751e-07, "epoch": 0.384, "percentage": 38.4, "elapsed_time": "0:05:21", "remaining_time": "0:08:35"}
{"current_steps": 1930, "total_steps": 5000, "loss": 0.1282, "lr": 7.7085410514137e-07, "epoch": 0.386, "percentage": 38.6, "elapsed_time": "0:05:22", "remaining_time": "0:08:33"}
{"current_steps": 1940, "total_steps": 5000, "loss": 0.2183, "lr": 7.679133974894982e-07, "epoch": 0.388, "percentage": 38.8, "elapsed_time": "0:05:24", "remaining_time": "0:08:31"}
{"current_steps": 1950, "total_steps": 5000, "loss": 0.1082, "lr": 7.649596321166024e-07, "epoch": 0.39, "percentage": 39.0, "elapsed_time": "0:05:25", "remaining_time": "0:08:29"}
{"current_steps": 1960, "total_steps": 5000, "loss": 0.1404, "lr": 7.619929529850396e-07, "epoch": 0.392, "percentage": 39.2, "elapsed_time": "0:05:27", "remaining_time": "0:08:27"}
{"current_steps": 1970, "total_steps": 5000, "loss": 0.1441, "lr": 7.590135046865651e-07, "epoch": 0.394, "percentage": 39.4, "elapsed_time": "0:05:28", "remaining_time": "0:08:25"}
{"current_steps": 1980, "total_steps": 5000, "loss": 0.1038, "lr": 7.560214324352858e-07, "epoch": 0.396, "percentage": 39.6, "elapsed_time": "0:05:29", "remaining_time": "0:08:23"}
{"current_steps": 1990, "total_steps": 5000, "loss": 0.2091, "lr": 7.530168820605818e-07, "epoch": 0.398, "percentage": 39.8, "elapsed_time": "0:05:31", "remaining_time": "0:08:21"}
{"current_steps": 2000, "total_steps": 5000, "loss": 0.1659, "lr": 7.5e-07, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:05:33", "remaining_time": "0:08:19"}
{"current_steps": 2010, "total_steps": 5000, "loss": 0.1804, "lr": 7.469709332921154e-07, "epoch": 0.402, "percentage": 40.2, "elapsed_time": "0:05:34", "remaining_time": "0:08:18"}
{"current_steps": 2020, "total_steps": 5000, "loss": 0.2499, "lr": 7.439298295693663e-07, "epoch": 0.404, "percentage": 40.4, "elapsed_time": "0:05:37", "remaining_time": "0:08:17"}
{"current_steps": 2030, "total_steps": 5000, "loss": 0.2162, "lr": 7.408768370508576e-07, "epoch": 0.406, "percentage": 40.6, "elapsed_time": "0:05:39", "remaining_time": "0:08:16"}
{"current_steps": 2040, "total_steps": 5000, "loss": 0.1557, "lr": 7.378121045351377e-07, "epoch": 0.408, "percentage": 40.8, "elapsed_time": "0:05:40", "remaining_time": "0:08:14"}
{"current_steps": 2050, "total_steps": 5000, "loss": 0.2083, "lr": 7.347357813929454e-07, "epoch": 0.41, "percentage": 41.0, "elapsed_time": "0:05:42", "remaining_time": "0:08:12"}
{"current_steps": 2060, "total_steps": 5000, "loss": 0.1489, "lr": 7.316480175599308e-07, "epoch": 0.412, "percentage": 41.2, "elapsed_time": "0:05:43", "remaining_time": "0:08:10"}
{"current_steps": 2070, "total_steps": 5000, "loss": 0.1723, "lr": 7.285489635293471e-07, "epoch": 0.414, "percentage": 41.4, "elapsed_time": "0:05:45", "remaining_time": "0:08:08"}
{"current_steps": 2080, "total_steps": 5000, "loss": 0.1862, "lr": 7.254387703447153e-07, "epoch": 0.416, "percentage": 41.6, "elapsed_time": "0:05:47", "remaining_time": "0:08:07"}
{"current_steps": 2090, "total_steps": 5000, "loss": 0.1223, "lr": 7.223175895924637e-07, "epoch": 0.418, "percentage": 41.8, "elapsed_time": "0:05:48", "remaining_time": "0:08:05"}
{"current_steps": 2100, "total_steps": 5000, "loss": 0.1192, "lr": 7.191855733945386e-07, "epoch": 0.42, "percentage": 42.0, "elapsed_time": "0:05:49", "remaining_time": "0:08:03"}
{"current_steps": 2110, "total_steps": 5000, "loss": 0.2137, "lr": 7.160428744009912e-07, "epoch": 0.422, "percentage": 42.2, "elapsed_time": "0:05:51", "remaining_time": "0:08:01"}
{"current_steps": 2120, "total_steps": 5000, "loss": 0.2785, "lr": 7.128896457825363e-07, "epoch": 0.424, "percentage": 42.4, "elapsed_time": "0:05:53", "remaining_time": "0:08:00"}
{"current_steps": 2130, "total_steps": 5000, "loss": 0.137, "lr": 7.097260412230885e-07, "epoch": 0.426, "percentage": 42.6, "elapsed_time": "0:05:55", "remaining_time": "0:07:58"}
{"current_steps": 2140, "total_steps": 5000, "loss": 0.164, "lr": 7.065522149122709e-07, "epoch": 0.428, "percentage": 42.8, "elapsed_time": "0:05:56", "remaining_time": "0:07:56"}
{"current_steps": 2150, "total_steps": 5000, "loss": 0.1038, "lr": 7.033683215379002e-07, "epoch": 0.43, "percentage": 43.0, "elapsed_time": "0:05:57", "remaining_time": "0:07:54"}
{"current_steps": 2160, "total_steps": 5000, "loss": 0.3109, "lr": 7.001745162784475e-07, "epoch": 0.432, "percentage": 43.2, "elapsed_time": "0:06:00", "remaining_time": "0:07:53"}
{"current_steps": 2170, "total_steps": 5000, "loss": 0.2861, "lr": 6.969709547954755e-07, "epoch": 0.434, "percentage": 43.4, "elapsed_time": "0:06:02", "remaining_time": "0:07:52"}
{"current_steps": 2180, "total_steps": 5000, "loss": 0.1104, "lr": 6.937577932260514e-07, "epoch": 0.436, "percentage": 43.6, "elapsed_time": "0:06:03", "remaining_time": "0:07:49"}
{"current_steps": 2190, "total_steps": 5000, "loss": 0.144, "lr": 6.905351881751371e-07, "epoch": 0.438, "percentage": 43.8, "elapsed_time": "0:06:04", "remaining_time": "0:07:47"}
{"current_steps": 2200, "total_steps": 5000, "loss": 0.0941, "lr": 6.87303296707956e-07, "epoch": 0.44, "percentage": 44.0, "elapsed_time": "0:06:05", "remaining_time": "0:07:45"}
{"current_steps": 2210, "total_steps": 5000, "loss": 0.2653, "lr": 6.840622763423391e-07, "epoch": 0.442, "percentage": 44.2, "elapsed_time": "0:06:08", "remaining_time": "0:07:44"}
{"current_steps": 2220, "total_steps": 5000, "loss": 0.2248, "lr": 6.80812285041046e-07, "epoch": 0.444, "percentage": 44.4, "elapsed_time": "0:06:09", "remaining_time": "0:07:43"}
{"current_steps": 2230, "total_steps": 5000, "loss": 0.1519, "lr": 6.775534812040686e-07, "epoch": 0.446, "percentage": 44.6, "elapsed_time": "0:06:11", "remaining_time": "0:07:41"}
{"current_steps": 2240, "total_steps": 5000, "loss": 0.201, "lr": 6.742860236609076e-07, "epoch": 0.448, "percentage": 44.8, "elapsed_time": "0:06:13", "remaining_time": "0:07:39"}
{"current_steps": 2250, "total_steps": 5000, "loss": 0.232, "lr": 6.710100716628344e-07, "epoch": 0.45, "percentage": 45.0, "elapsed_time": "0:06:15", "remaining_time": "0:07:38"}
{"current_steps": 2260, "total_steps": 5000, "loss": 0.1519, "lr": 6.677257848751276e-07, "epoch": 0.452, "percentage": 45.2, "elapsed_time": "0:06:16", "remaining_time": "0:07:36"}
{"current_steps": 2270, "total_steps": 5000, "loss": 0.135, "lr": 6.644333233692916e-07, "epoch": 0.454, "percentage": 45.4, "elapsed_time": "0:06:18", "remaining_time": "0:07:34"}
{"current_steps": 2280, "total_steps": 5000, "loss": 0.1017, "lr": 6.611328476152556e-07, "epoch": 0.456, "percentage": 45.6, "elapsed_time": "0:06:19", "remaining_time": "0:07:32"}
{"current_steps": 2290, "total_steps": 5000, "loss": 0.2447, "lr": 6.578245184735512e-07, "epoch": 0.458, "percentage": 45.8, "elapsed_time": "0:06:21", "remaining_time": "0:07:31"}
{"current_steps": 2300, "total_steps": 5000, "loss": 0.2115, "lr": 6.545084971874736e-07, "epoch": 0.46, "percentage": 46.0, "elapsed_time": "0:06:22", "remaining_time": "0:07:29"}
{"current_steps": 2310, "total_steps": 5000, "loss": 0.1666, "lr": 6.511849453752223e-07, "epoch": 0.462, "percentage": 46.2, "elapsed_time": "0:06:24", "remaining_time": "0:07:27"}
{"current_steps": 2320, "total_steps": 5000, "loss": 0.1613, "lr": 6.478540250220233e-07, "epoch": 0.464, "percentage": 46.4, "elapsed_time": "0:06:26", "remaining_time": "0:07:26"}
{"current_steps": 2330, "total_steps": 5000, "loss": 0.1578, "lr": 6.445158984722358e-07, "epoch": 0.466, "percentage": 46.6, "elapsed_time": "0:06:27", "remaining_time": "0:07:24"}
{"current_steps": 2340, "total_steps": 5000, "loss": 0.246, "lr": 6.411707284214383e-07, "epoch": 0.468, "percentage": 46.8, "elapsed_time": "0:06:29", "remaining_time": "0:07:23"}
{"current_steps": 2350, "total_steps": 5000, "loss": 0.2042, "lr": 6.378186779084995e-07, "epoch": 0.47, "percentage": 47.0, "elapsed_time": "0:06:31", "remaining_time": "0:07:21"}
{"current_steps": 2360, "total_steps": 5000, "loss": 0.229, "lr": 6.344599103076328e-07, "epoch": 0.472, "percentage": 47.2, "elapsed_time": "0:06:33", "remaining_time": "0:07:20"}
{"current_steps": 2370, "total_steps": 5000, "loss": 0.297, "lr": 6.310945893204324e-07, "epoch": 0.474, "percentage": 47.4, "elapsed_time": "0:06:35", "remaining_time": "0:07:19"}
{"current_steps": 2380, "total_steps": 5000, "loss": 0.1169, "lr": 6.277228789678953e-07, "epoch": 0.476, "percentage": 47.6, "elapsed_time": "0:06:37", "remaining_time": "0:07:17"}
{"current_steps": 2390, "total_steps": 5000, "loss": 0.256, "lr": 6.243449435824276e-07, "epoch": 0.478, "percentage": 47.8, "elapsed_time": "0:06:39", "remaining_time": "0:07:16"}
{"current_steps": 2400, "total_steps": 5000, "loss": 0.143, "lr": 6.209609477998338e-07, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "0:06:40", "remaining_time": "0:07:14"}
{"current_steps": 2410, "total_steps": 5000, "loss": 0.1413, "lr": 6.17571056551295e-07, "epoch": 0.482, "percentage": 48.2, "elapsed_time": "0:06:42", "remaining_time": "0:07:12"}
{"current_steps": 2420, "total_steps": 5000, "loss": 0.2189, "lr": 6.141754350553279e-07, "epoch": 0.484, "percentage": 48.4, "elapsed_time": "0:06:44", "remaining_time": "0:07:10"}
{"current_steps": 2430, "total_steps": 5000, "loss": 0.1325, "lr": 6.107742488097338e-07, "epoch": 0.486, "percentage": 48.6, "elapsed_time": "0:06:45", "remaining_time": "0:07:08"}
{"current_steps": 2440, "total_steps": 5000, "loss": 0.1428, "lr": 6.073676635835316e-07, "epoch": 0.488, "percentage": 48.8, "elapsed_time": "0:06:46", "remaining_time": "0:07:06"}
{"current_steps": 2450, "total_steps": 5000, "loss": 0.1964, "lr": 6.039558454088795e-07, "epoch": 0.49, "percentage": 49.0, "elapsed_time": "0:06:48", "remaining_time": "0:07:05"}
{"current_steps": 2460, "total_steps": 5000, "loss": 0.1928, "lr": 6.005389605729824e-07, "epoch": 0.492, "percentage": 49.2, "elapsed_time": "0:06:50", "remaining_time": "0:07:03"}
{"current_steps": 2470, "total_steps": 5000, "loss": 0.1368, "lr": 5.97117175609986e-07, "epoch": 0.494, "percentage": 49.4, "elapsed_time": "0:06:51", "remaining_time": "0:07:01"}
{"current_steps": 2480, "total_steps": 5000, "loss": 0.2274, "lr": 5.936906572928624e-07, "epoch": 0.496, "percentage": 49.6, "elapsed_time": "0:06:53", "remaining_time": "0:07:00"}
{"current_steps": 2490, "total_steps": 5000, "loss": 0.1892, "lr": 5.9025957262528e-07, "epoch": 0.498, "percentage": 49.8, "elapsed_time": "0:06:55", "remaining_time": "0:06:58"}
{"current_steps": 2500, "total_steps": 5000, "loss": 0.214, "lr": 5.868240888334652e-07, "epoch": 0.5, "percentage": 50.0, "elapsed_time": "0:06:57", "remaining_time": "0:06:57"}
{"current_steps": 2510, "total_steps": 5000, "loss": 0.1929, "lr": 5.833843733580512e-07, "epoch": 0.502, "percentage": 50.2, "elapsed_time": "0:06:59", "remaining_time": "0:06:55"}
{"current_steps": 2520, "total_steps": 5000, "loss": 0.0885, "lr": 5.799405938459174e-07, "epoch": 0.504, "percentage": 50.4, "elapsed_time": "0:07:00", "remaining_time": "0:06:53"}
{"current_steps": 2530, "total_steps": 5000, "loss": 0.2027, "lr": 5.764929181420191e-07, "epoch": 0.506, "percentage": 50.6, "elapsed_time": "0:07:02", "remaining_time": "0:06:52"}
{"current_steps": 2540, "total_steps": 5000, "loss": 0.2108, "lr": 5.730415142812058e-07, "epoch": 0.508, "percentage": 50.8, "elapsed_time": "0:07:04", "remaining_time": "0:06:51"}
{"current_steps": 2550, "total_steps": 5000, "loss": 0.2387, "lr": 5.695865504800327e-07, "epoch": 0.51, "percentage": 51.0, "elapsed_time": "0:07:06", "remaining_time": "0:06:49"}
{"current_steps": 2560, "total_steps": 5000, "loss": 0.2252, "lr": 5.661281951285612e-07, "epoch": 0.512, "percentage": 51.2, "elapsed_time": "0:07:08", "remaining_time": "0:06:48"}
{"current_steps": 2570, "total_steps": 5000, "loss": 0.2047, "lr": 5.626666167821521e-07, "epoch": 0.514, "percentage": 51.4, "elapsed_time": "0:07:10", "remaining_time": "0:06:46"}
{"current_steps": 2580, "total_steps": 5000, "loss": 0.1942, "lr": 5.592019841532506e-07, "epoch": 0.516, "percentage": 51.6, "elapsed_time": "0:07:12", "remaining_time": "0:06:45"}
{"current_steps": 2590, "total_steps": 5000, "loss": 0.2018, "lr": 5.557344661031627e-07, "epoch": 0.518, "percentage": 51.8, "elapsed_time": "0:07:14", "remaining_time": "0:06:43"}
{"current_steps": 2600, "total_steps": 5000, "loss": 0.1997, "lr": 5.522642316338268e-07, "epoch": 0.52, "percentage": 52.0, "elapsed_time": "0:07:15", "remaining_time": "0:06:42"}
{"current_steps": 2610, "total_steps": 5000, "loss": 0.1112, "lr": 5.487914498795747e-07, "epoch": 0.522, "percentage": 52.2, "elapsed_time": "0:07:17", "remaining_time": "0:06:40"}
{"current_steps": 2620, "total_steps": 5000, "loss": 0.2175, "lr": 5.453162900988901e-07, "epoch": 0.524, "percentage": 52.4, "elapsed_time": "0:07:18", "remaining_time": "0:06:38"}
{"current_steps": 2630, "total_steps": 5000, "loss": 0.2222, "lr": 5.418389216661578e-07, "epoch": 0.526, "percentage": 52.6, "elapsed_time": "0:07:20", "remaining_time": "0:06:37"}
{"current_steps": 2640, "total_steps": 5000, "loss": 0.1489, "lr": 5.383595140634093e-07, "epoch": 0.528, "percentage": 52.8, "elapsed_time": "0:07:22", "remaining_time": "0:06:35"}
{"current_steps": 2650, "total_steps": 5000, "loss": 0.2534, "lr": 5.348782368720625e-07, "epoch": 0.53, "percentage": 53.0, "elapsed_time": "0:07:24", "remaining_time": "0:06:33"}
{"current_steps": 2660, "total_steps": 5000, "loss": 0.2596, "lr": 5.313952597646567e-07, "epoch": 0.532, "percentage": 53.2, "elapsed_time": "0:07:26", "remaining_time": "0:06:32"}
{"current_steps": 2670, "total_steps": 5000, "loss": 0.254, "lr": 5.27910752496582e-07, "epoch": 0.534, "percentage": 53.4, "elapsed_time": "0:07:28", "remaining_time": "0:06:31"}
{"current_steps": 2680, "total_steps": 5000, "loss": 0.1684, "lr": 5.244248848978067e-07, "epoch": 0.536, "percentage": 53.6, "elapsed_time": "0:07:30", "remaining_time": "0:06:29"}
{"current_steps": 2690, "total_steps": 5000, "loss": 0.18, "lr": 5.209378268645997e-07, "epoch": 0.538, "percentage": 53.8, "elapsed_time": "0:07:31", "remaining_time": "0:06:28"}
{"current_steps": 2700, "total_steps": 5000, "loss": 0.2821, "lr": 5.174497483512505e-07, "epoch": 0.54, "percentage": 54.0, "elapsed_time": "0:07:33", "remaining_time": "0:06:26"}
{"current_steps": 2710, "total_steps": 5000, "loss": 0.1157, "lr": 5.139608193617844e-07, "epoch": 0.542, "percentage": 54.2, "elapsed_time": "0:07:35", "remaining_time": "0:06:24"}
{"current_steps": 2720, "total_steps": 5000, "loss": 0.0984, "lr": 5.104712099416785e-07, "epoch": 0.544, "percentage": 54.4, "elapsed_time": "0:07:36", "remaining_time": "0:06:22"}
{"current_steps": 2730, "total_steps": 5000, "loss": 0.2217, "lr": 5.069810901695727e-07, "epoch": 0.546, "percentage": 54.6, "elapsed_time": "0:07:38", "remaining_time": "0:06:21"}
{"current_steps": 2740, "total_steps": 5000, "loss": 0.1595, "lr": 5.034906301489807e-07, "epoch": 0.548, "percentage": 54.8, "elapsed_time": "0:07:40", "remaining_time": "0:06:19"}
{"current_steps": 2750, "total_steps": 5000, "loss": 0.1954, "lr": 5e-07, "epoch": 0.55, "percentage": 55.0, "elapsed_time": "0:07:42", "remaining_time": "0:06:18"}
{"current_steps": 2760, "total_steps": 5000, "loss": 0.2079, "lr": 4.965093698510192e-07, "epoch": 0.552, "percentage": 55.2, "elapsed_time": "0:07:43", "remaining_time": "0:06:16"}
{"current_steps": 2770, "total_steps": 5000, "loss": 0.2526, "lr": 4.930189098304274e-07, "epoch": 0.554, "percentage": 55.4, "elapsed_time": "0:07:45", "remaining_time": "0:06:14"}
{"current_steps": 2780, "total_steps": 5000, "loss": 0.1786, "lr": 4.895287900583216e-07, "epoch": 0.556, "percentage": 55.6, "elapsed_time": "0:07:47", "remaining_time": "0:06:13"}
{"current_steps": 2790, "total_steps": 5000, "loss": 0.1701, "lr": 4.860391806382156e-07, "epoch": 0.558, "percentage": 55.8, "elapsed_time": "0:07:49", "remaining_time": "0:06:11"}
{"current_steps": 2800, "total_steps": 5000, "loss": 0.1811, "lr": 4.825502516487496e-07, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:07:50", "remaining_time": "0:06:09"}
{"current_steps": 2810, "total_steps": 5000, "loss": 0.2274, "lr": 4.790621731354002e-07, "epoch": 0.562, "percentage": 56.2, "elapsed_time": "0:07:52", "remaining_time": "0:06:08"}
{"current_steps": 2820, "total_steps": 5000, "loss": 0.0856, "lr": 4.7557511510219335e-07, "epoch": 0.564, "percentage": 56.4, "elapsed_time": "0:07:53", "remaining_time": "0:06:06"}
{"current_steps": 2830, "total_steps": 5000, "loss": 0.149, "lr": 4.7208924750341805e-07, "epoch": 0.566, "percentage": 56.6, "elapsed_time": "0:07:55", "remaining_time": "0:06:04"}
{"current_steps": 2840, "total_steps": 5000, "loss": 0.2554, "lr": 4.686047402353433e-07, "epoch": 0.568, "percentage": 56.8, "elapsed_time": "0:07:57", "remaining_time": "0:06:03"}
{"current_steps": 2850, "total_steps": 5000, "loss": 0.059, "lr": 4.6512176312793735e-07, "epoch": 0.57, "percentage": 57.0, "elapsed_time": "0:07:58", "remaining_time": "0:06:01"}
{"current_steps": 2860, "total_steps": 5000, "loss": 0.1526, "lr": 4.6164048593659065e-07, "epoch": 0.572, "percentage": 57.2, "elapsed_time": "0:08:00", "remaining_time": "0:05:59"}
{"current_steps": 2870, "total_steps": 5000, "loss": 0.2571, "lr": 4.5816107833384233e-07, "epoch": 0.574, "percentage": 57.4, "elapsed_time": "0:08:02", "remaining_time": "0:05:57"}
{"current_steps": 2880, "total_steps": 5000, "loss": 0.1313, "lr": 4.5468370990110997e-07, "epoch": 0.576, "percentage": 57.6, "elapsed_time": "0:08:03", "remaining_time": "0:05:56"}
{"current_steps": 2890, "total_steps": 5000, "loss": 0.2444, "lr": 4.512085501204253e-07, "epoch": 0.578, "percentage": 57.8, "elapsed_time": "0:08:05", "remaining_time": "0:05:54"}
{"current_steps": 2900, "total_steps": 5000, "loss": 0.1362, "lr": 4.477357683661733e-07, "epoch": 0.58, "percentage": 58.0, "elapsed_time": "0:08:07", "remaining_time": "0:05:52"}
{"current_steps": 2910, "total_steps": 5000, "loss": 0.1642, "lr": 4.442655338968373e-07, "epoch": 0.582, "percentage": 58.2, "elapsed_time": "0:08:09", "remaining_time": "0:05:51"}
{"current_steps": 2920, "total_steps": 5000, "loss": 0.2219, "lr": 4.407980158467495e-07, "epoch": 0.584, "percentage": 58.4, "elapsed_time": "0:08:11", "remaining_time": "0:05:49"}
{"current_steps": 2930, "total_steps": 5000, "loss": 0.1358, "lr": 4.3733338321784777e-07, "epoch": 0.586, "percentage": 58.6, "elapsed_time": "0:08:12", "remaining_time": "0:05:47"}
{"current_steps": 2940, "total_steps": 5000, "loss": 0.2484, "lr": 4.338718048714387e-07, "epoch": 0.588, "percentage": 58.8, "elapsed_time": "0:08:14", "remaining_time": "0:05:46"}
{"current_steps": 2950, "total_steps": 5000, "loss": 0.2279, "lr": 4.304134495199674e-07, "epoch": 0.59, "percentage": 59.0, "elapsed_time": "0:08:16", "remaining_time": "0:05:44"}
{"current_steps": 2960, "total_steps": 5000, "loss": 0.155, "lr": 4.2695848571879424e-07, "epoch": 0.592, "percentage": 59.2, "elapsed_time": "0:08:17", "remaining_time": "0:05:43"}
{"current_steps": 2970, "total_steps": 5000, "loss": 0.2144, "lr": 4.23507081857981e-07, "epoch": 0.594, "percentage": 59.4, "elapsed_time": "0:08:19", "remaining_time": "0:05:41"}
{"current_steps": 2980, "total_steps": 5000, "loss": 0.1034, "lr": 4.200594061540826e-07, "epoch": 0.596, "percentage": 59.6, "elapsed_time": "0:08:20", "remaining_time": "0:05:39"}
{"current_steps": 2990, "total_steps": 5000, "loss": 0.084, "lr": 4.166156266419489e-07, "epoch": 0.598, "percentage": 59.8, "elapsed_time": "0:08:22", "remaining_time": "0:05:37"}
{"current_steps": 3000, "total_steps": 5000, "loss": 0.1944, "lr": 4.131759111665348e-07, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:08:23", "remaining_time": "0:05:35"}
{"current_steps": 3010, "total_steps": 5000, "loss": 0.1805, "lr": 4.0974042737472005e-07, "epoch": 0.602, "percentage": 60.2, "elapsed_time": "0:08:25", "remaining_time": "0:05:34"}
{"current_steps": 3020, "total_steps": 5000, "loss": 0.121, "lr": 4.0630934270713755e-07, "epoch": 0.604, "percentage": 60.4, "elapsed_time": "0:08:26", "remaining_time": "0:05:32"}
{"current_steps": 3030, "total_steps": 5000, "loss": 0.1631, "lr": 4.028828243900141e-07, "epoch": 0.606, "percentage": 60.6, "elapsed_time": "0:08:28", "remaining_time": "0:05:30"}
{"current_steps": 3040, "total_steps": 5000, "loss": 0.1618, "lr": 3.9946103942701775e-07, "epoch": 0.608, "percentage": 60.8, "elapsed_time": "0:08:30", "remaining_time": "0:05:28"}
{"current_steps": 3050, "total_steps": 5000, "loss": 0.3025, "lr": 3.960441545911204e-07, "epoch": 0.61, "percentage": 61.0, "elapsed_time": "0:08:32", "remaining_time": "0:05:27"}
{"current_steps": 3060, "total_steps": 5000, "loss": 0.3172, "lr": 3.9263233641646836e-07, "epoch": 0.612, "percentage": 61.2, "elapsed_time": "0:08:34", "remaining_time": "0:05:26"}
{"current_steps": 3070, "total_steps": 5000, "loss": 0.1701, "lr": 3.8922575119026635e-07, "epoch": 0.614, "percentage": 61.4, "elapsed_time": "0:08:36", "remaining_time": "0:05:24"}
{"current_steps": 3080, "total_steps": 5000, "loss": 0.2202, "lr": 3.8582456494467206e-07, "epoch": 0.616, "percentage": 61.6, "elapsed_time": "0:08:38", "remaining_time": "0:05:23"}
{"current_steps": 3090, "total_steps": 5000, "loss": 0.1956, "lr": 3.8242894344870495e-07, "epoch": 0.618, "percentage": 61.8, "elapsed_time": "0:08:40", "remaining_time": "0:05:21"}
{"current_steps": 3100, "total_steps": 5000, "loss": 0.1548, "lr": 3.790390522001662e-07, "epoch": 0.62, "percentage": 62.0, "elapsed_time": "0:08:42", "remaining_time": "0:05:20"}
{"current_steps": 3110, "total_steps": 5000, "loss": 0.1647, "lr": 3.7565505641757266e-07, "epoch": 0.622, "percentage": 62.2, "elapsed_time": "0:08:43", "remaining_time": "0:05:18"}
{"current_steps": 3120, "total_steps": 5000, "loss": 0.1113, "lr": 3.722771210321048e-07, "epoch": 0.624, "percentage": 62.4, "elapsed_time": "0:08:45", "remaining_time": "0:05:16"}
{"current_steps": 3130, "total_steps": 5000, "loss": 0.2053, "lr": 3.689054106795677e-07, "epoch": 0.626, "percentage": 62.6, "elapsed_time": "0:08:47", "remaining_time": "0:05:14"}
{"current_steps": 3140, "total_steps": 5000, "loss": 0.153, "lr": 3.6554008969236715e-07, "epoch": 0.628, "percentage": 62.8, "elapsed_time": "0:08:48", "remaining_time": "0:05:13"}
{"current_steps": 3150, "total_steps": 5000, "loss": 0.1937, "lr": 3.621813220915004e-07, "epoch": 0.63, "percentage": 63.0, "elapsed_time": "0:08:50", "remaining_time": "0:05:11"}
{"current_steps": 3160, "total_steps": 5000, "loss": 0.0739, "lr": 3.5882927157856167e-07, "epoch": 0.632, "percentage": 63.2, "elapsed_time": "0:08:51", "remaining_time": "0:05:09"}
{"current_steps": 3170, "total_steps": 5000, "loss": 0.1403, "lr": 3.554841015277641e-07, "epoch": 0.634, "percentage": 63.4, "elapsed_time": "0:08:53", "remaining_time": "0:05:07"}
{"current_steps": 3180, "total_steps": 5000, "loss": 0.186, "lr": 3.521459749779768e-07, "epoch": 0.636, "percentage": 63.6, "elapsed_time": "0:08:54", "remaining_time": "0:05:06"}
{"current_steps": 3190, "total_steps": 5000, "loss": 0.2311, "lr": 3.488150546247778e-07, "epoch": 0.638, "percentage": 63.8, "elapsed_time": "0:08:56", "remaining_time": "0:05:04"}
{"current_steps": 3200, "total_steps": 5000, "loss": 0.2173, "lr": 3.454915028125263e-07, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "0:08:58", "remaining_time": "0:05:03"}
{"current_steps": 3210, "total_steps": 5000, "loss": 0.1359, "lr": 3.421754815264488e-07, "epoch": 0.642, "percentage": 64.2, "elapsed_time": "0:09:00", "remaining_time": "0:05:01"}
{"current_steps": 3220, "total_steps": 5000, "loss": 0.2212, "lr": 3.388671523847445e-07, "epoch": 0.644, "percentage": 64.4, "elapsed_time": "0:09:02", "remaining_time": "0:04:59"}
{"current_steps": 3230, "total_steps": 5000, "loss": 0.2237, "lr": 3.3556667663070835e-07, "epoch": 0.646, "percentage": 64.6, "elapsed_time": "0:09:04", "remaining_time": "0:04:58"}
{"current_steps": 3240, "total_steps": 5000, "loss": 0.1364, "lr": 3.3227421512487255e-07, "epoch": 0.648, "percentage": 64.8, "elapsed_time": "0:09:06", "remaining_time": "0:04:56"}
{"current_steps": 3250, "total_steps": 5000, "loss": 0.1204, "lr": 3.2898992833716563e-07, "epoch": 0.65, "percentage": 65.0, "elapsed_time": "0:09:07", "remaining_time": "0:04:54"}
{"current_steps": 3260, "total_steps": 5000, "loss": 0.1476, "lr": 3.257139763390925e-07, "epoch": 0.652, "percentage": 65.2, "elapsed_time": "0:09:09", "remaining_time": "0:04:53"}
{"current_steps": 3270, "total_steps": 5000, "loss": 0.2089, "lr": 3.2244651879593156e-07, "epoch": 0.654, "percentage": 65.4, "elapsed_time": "0:09:10", "remaining_time": "0:04:51"}
{"current_steps": 3280, "total_steps": 5000, "loss": 0.1223, "lr": 3.191877149589539e-07, "epoch": 0.656, "percentage": 65.6, "elapsed_time": "0:09:12", "remaining_time": "0:04:49"}
{"current_steps": 3290, "total_steps": 5000, "loss": 0.1637, "lr": 3.15937723657661e-07, "epoch": 0.658, "percentage": 65.8, "elapsed_time": "0:09:13", "remaining_time": "0:04:47"}
{"current_steps": 3300, "total_steps": 5000, "loss": 0.1503, "lr": 3.1269670329204393e-07, "epoch": 0.66, "percentage": 66.0, "elapsed_time": "0:09:15", "remaining_time": "0:04:46"}
{"current_steps": 3310, "total_steps": 5000, "loss": 0.1257, "lr": 3.0946481182486297e-07, "epoch": 0.662, "percentage": 66.2, "elapsed_time": "0:09:16", "remaining_time": "0:04:44"}
{"current_steps": 3320, "total_steps": 5000, "loss": 0.214, "lr": 3.0624220677394854e-07, "epoch": 0.664, "percentage": 66.4, "elapsed_time": "0:09:18", "remaining_time": "0:04:42"}
{"current_steps": 3330, "total_steps": 5000, "loss": 0.3025, "lr": 3.0302904520452443e-07, "epoch": 0.666, "percentage": 66.6, "elapsed_time": "0:09:21", "remaining_time": "0:04:41"}
{"current_steps": 3340, "total_steps": 5000, "loss": 0.2296, "lr": 2.9982548372155256e-07, "epoch": 0.668, "percentage": 66.8, "elapsed_time": "0:09:22", "remaining_time": "0:04:39"}
{"current_steps": 3350, "total_steps": 5000, "loss": 0.1282, "lr": 2.9663167846209996e-07, "epoch": 0.67, "percentage": 67.0, "elapsed_time": "0:09:24", "remaining_time": "0:04:37"}
{"current_steps": 3360, "total_steps": 5000, "loss": 0.2626, "lr": 2.9344778508772914e-07, "epoch": 0.672, "percentage": 67.2, "elapsed_time": "0:09:26", "remaining_time": "0:04:36"}
{"current_steps": 3370, "total_steps": 5000, "loss": 0.1824, "lr": 2.902739587769114e-07, "epoch": 0.674, "percentage": 67.4, "elapsed_time": "0:09:28", "remaining_time": "0:04:34"}
{"current_steps": 3380, "total_steps": 5000, "loss": 0.0924, "lr": 2.8711035421746363e-07, "epoch": 0.676, "percentage": 67.6, "elapsed_time": "0:09:29", "remaining_time": "0:04:33"}
{"current_steps": 3390, "total_steps": 5000, "loss": 0.2372, "lr": 2.8395712559900874e-07, "epoch": 0.678, "percentage": 67.8, "elapsed_time": "0:09:31", "remaining_time": "0:04:31"}
{"current_steps": 3400, "total_steps": 5000, "loss": 0.3402, "lr": 2.808144266054612e-07, "epoch": 0.68, "percentage": 68.0, "elapsed_time": "0:09:33", "remaining_time": "0:04:29"}
{"current_steps": 3410, "total_steps": 5000, "loss": 0.1164, "lr": 2.776824104075364e-07, "epoch": 0.682, "percentage": 68.2, "elapsed_time": "0:09:35", "remaining_time": "0:04:28"}
{"current_steps": 3420, "total_steps": 5000, "loss": 0.1703, "lr": 2.745612296552847e-07, "epoch": 0.684, "percentage": 68.4, "elapsed_time": "0:09:36", "remaining_time": "0:04:26"}
{"current_steps": 3430, "total_steps": 5000, "loss": 0.1506, "lr": 2.71451036470653e-07, "epoch": 0.686, "percentage": 68.6, "elapsed_time": "0:09:38", "remaining_time": "0:04:24"}
{"current_steps": 3440, "total_steps": 5000, "loss": 0.0584, "lr": 2.683519824400692e-07, "epoch": 0.688, "percentage": 68.8, "elapsed_time": "0:09:39", "remaining_time": "0:04:22"}
{"current_steps": 3450, "total_steps": 5000, "loss": 0.191, "lr": 2.6526421860705473e-07, "epoch": 0.69, "percentage": 69.0, "elapsed_time": "0:09:40", "remaining_time": "0:04:20"}
{"current_steps": 3460, "total_steps": 5000, "loss": 0.105, "lr": 2.621878954648623e-07, "epoch": 0.692, "percentage": 69.2, "elapsed_time": "0:09:42", "remaining_time": "0:04:19"}
{"current_steps": 3470, "total_steps": 5000, "loss": 0.1391, "lr": 2.591231629491423e-07, "epoch": 0.694, "percentage": 69.4, "elapsed_time": "0:09:43", "remaining_time": "0:04:17"}
{"current_steps": 3480, "total_steps": 5000, "loss": 0.0587, "lr": 2.5607017043063353e-07, "epoch": 0.696, "percentage": 69.6, "elapsed_time": "0:09:44", "remaining_time": "0:04:15"}
{"current_steps": 3490, "total_steps": 5000, "loss": 0.1216, "lr": 2.530290667078846e-07, "epoch": 0.698, "percentage": 69.8, "elapsed_time": "0:09:45", "remaining_time": "0:04:13"}
{"current_steps": 3500, "total_steps": 5000, "loss": 0.135, "lr": 2.500000000000001e-07, "epoch": 0.7, "percentage": 70.0, "elapsed_time": "0:09:47", "remaining_time": "0:04:11"}
{"current_steps": 3510, "total_steps": 5000, "loss": 0.1703, "lr": 2.469831179394182e-07, "epoch": 0.702, "percentage": 70.2, "elapsed_time": "0:09:49", "remaining_time": "0:04:10"}
{"current_steps": 3520, "total_steps": 5000, "loss": 0.1405, "lr": 2.439785675647143e-07, "epoch": 0.704, "percentage": 70.4, "elapsed_time": "0:09:50", "remaining_time": "0:04:08"}
{"current_steps": 3530, "total_steps": 5000, "loss": 0.1304, "lr": 2.4098649531343494e-07, "epoch": 0.706, "percentage": 70.6, "elapsed_time": "0:09:52", "remaining_time": "0:04:06"}
{"current_steps": 3540, "total_steps": 5000, "loss": 0.1459, "lr": 2.380070470149605e-07, "epoch": 0.708, "percentage": 70.8, "elapsed_time": "0:09:53", "remaining_time": "0:04:04"}
{"current_steps": 3550, "total_steps": 5000, "loss": 0.1686, "lr": 2.350403678833976e-07, "epoch": 0.71, "percentage": 71.0, "elapsed_time": "0:09:55", "remaining_time": "0:04:03"}
{"current_steps": 3560, "total_steps": 5000, "loss": 0.0577, "lr": 2.3208660251050156e-07, "epoch": 0.712, "percentage": 71.2, "elapsed_time": "0:09:56", "remaining_time": "0:04:01"}
{"current_steps": 3570, "total_steps": 5000, "loss": 0.1926, "lr": 2.2914589485863012e-07, "epoch": 0.714, "percentage": 71.4, "elapsed_time": "0:09:58", "remaining_time": "0:03:59"}
{"current_steps": 3580, "total_steps": 5000, "loss": 0.1804, "lr": 2.262183882537249e-07, "epoch": 0.716, "percentage": 71.6, "elapsed_time": "0:10:00", "remaining_time": "0:03:58"}
{"current_steps": 3590, "total_steps": 5000, "loss": 0.1719, "lr": 2.23304225378328e-07, "epoch": 0.718, "percentage": 71.8, "elapsed_time": "0:10:01", "remaining_time": "0:03:56"}
{"current_steps": 3600, "total_steps": 5000, "loss": 0.1243, "lr": 2.2040354826462664e-07, "epoch": 0.72, "percentage": 72.0, "elapsed_time": "0:10:03", "remaining_time": "0:03:54"}
{"current_steps": 3610, "total_steps": 5000, "loss": 0.2673, "lr": 2.1751649828753106e-07, "epoch": 0.722, "percentage": 72.2, "elapsed_time": "0:10:05", "remaining_time": "0:03:53"}
{"current_steps": 3620, "total_steps": 5000, "loss": 0.2706, "lr": 2.146432161577842e-07, "epoch": 0.724, "percentage": 72.4, "elapsed_time": "0:10:07", "remaining_time": "0:03:51"}
{"current_steps": 3630, "total_steps": 5000, "loss": 0.1071, "lr": 2.117838419151034e-07, "epoch": 0.726, "percentage": 72.6, "elapsed_time": "0:10:09", "remaining_time": "0:03:49"}
{"current_steps": 3640, "total_steps": 5000, "loss": 0.1986, "lr": 2.0893851492135532e-07, "epoch": 0.728, "percentage": 72.8, "elapsed_time": "0:10:11", "remaining_time": "0:03:48"}
{"current_steps": 3650, "total_steps": 5000, "loss": 0.119, "lr": 2.0610737385376348e-07, "epoch": 0.73, "percentage": 73.0, "elapsed_time": "0:10:12", "remaining_time": "0:03:46"}
{"current_steps": 3660, "total_steps": 5000, "loss": 0.2265, "lr": 2.0329055669814933e-07, "epoch": 0.732, "percentage": 73.2, "elapsed_time": "0:10:14", "remaining_time": "0:03:44"}
{"current_steps": 3670, "total_steps": 5000, "loss": 0.2161, "lr": 2.0048820074220711e-07, "epoch": 0.734, "percentage": 73.4, "elapsed_time": "0:10:16", "remaining_time": "0:03:43"}
{"current_steps": 3680, "total_steps": 5000, "loss": 0.16, "lr": 1.9770044256881258e-07, "epoch": 0.736, "percentage": 73.6, "elapsed_time": "0:10:17", "remaining_time": "0:03:41"}
{"current_steps": 3690, "total_steps": 5000, "loss": 0.1463, "lr": 1.9492741804936618e-07, "epoch": 0.738, "percentage": 73.8, "elapsed_time": "0:10:19", "remaining_time": "0:03:39"}
{"current_steps": 3700, "total_steps": 5000, "loss": 0.1947, "lr": 1.9216926233717084e-07, "epoch": 0.74, "percentage": 74.0, "elapsed_time": "0:10:21", "remaining_time": "0:03:38"}
{"current_steps": 3710, "total_steps": 5000, "loss": 0.1406, "lr": 1.8942610986084484e-07, "epoch": 0.742, "percentage": 74.2, "elapsed_time": "0:10:22", "remaining_time": "0:03:36"}
{"current_steps": 3720, "total_steps": 5000, "loss": 0.1202, "lr": 1.8669809431776988e-07, "epoch": 0.744, "percentage": 74.4, "elapsed_time": "0:10:24", "remaining_time": "0:03:34"}
{"current_steps": 3730, "total_steps": 5000, "loss": 0.2124, "lr": 1.8398534866757455e-07, "epoch": 0.746, "percentage": 74.6, "elapsed_time": "0:10:26", "remaining_time": "0:03:33"}
{"current_steps": 3740, "total_steps": 5000, "loss": 0.1895, "lr": 1.812880051256551e-07, "epoch": 0.748, "percentage": 74.8, "elapsed_time": "0:10:27", "remaining_time": "0:03:31"}
{"current_steps": 3750, "total_steps": 5000, "loss": 0.2038, "lr": 1.7860619515673032e-07, "epoch": 0.75, "percentage": 75.0, "elapsed_time": "0:10:29", "remaining_time": "0:03:29"}
{"current_steps": 3760, "total_steps": 5000, "loss": 0.196, "lr": 1.7594004946843454e-07, "epoch": 0.752, "percentage": 75.2, "elapsed_time": "0:10:31", "remaining_time": "0:03:28"}
{"current_steps": 3770, "total_steps": 5000, "loss": 0.2282, "lr": 1.7328969800494726e-07, "epoch": 0.754, "percentage": 75.4, "elapsed_time": "0:10:33", "remaining_time": "0:03:26"}
{"current_steps": 3780, "total_steps": 5000, "loss": 0.218, "lr": 1.7065526994065972e-07, "epoch": 0.756, "percentage": 75.6, "elapsed_time": "0:10:35", "remaining_time": "0:03:25"}
{"current_steps": 3790, "total_steps": 5000, "loss": 0.1441, "lr": 1.6803689367387918e-07, "epoch": 0.758, "percentage": 75.8, "elapsed_time": "0:10:37", "remaining_time": "0:03:23"}
{"current_steps": 3800, "total_steps": 5000, "loss": 0.1192, "lr": 1.6543469682057104e-07, "epoch": 0.76, "percentage": 76.0, "elapsed_time": "0:10:38", "remaining_time": "0:03:21"}
{"current_steps": 3810, "total_steps": 5000, "loss": 0.312, "lr": 1.6284880620813846e-07, "epoch": 0.762, "percentage": 76.2, "elapsed_time": "0:10:40", "remaining_time": "0:03:20"}
{"current_steps": 3820, "total_steps": 5000, "loss": 0.1183, "lr": 1.6027934786924185e-07, "epoch": 0.764, "percentage": 76.4, "elapsed_time": "0:10:42", "remaining_time": "0:03:18"}
{"current_steps": 3830, "total_steps": 5000, "loss": 0.1443, "lr": 1.5772644703565564e-07, "epoch": 0.766, "percentage": 76.6, "elapsed_time": "0:10:44", "remaining_time": "0:03:16"}
{"current_steps": 3840, "total_steps": 5000, "loss": 0.1898, "lr": 1.551902281321651e-07, "epoch": 0.768, "percentage": 76.8, "elapsed_time": "0:10:45", "remaining_time": "0:03:15"}
{"current_steps": 3850, "total_steps": 5000, "loss": 0.2005, "lr": 1.5267081477050131e-07, "epoch": 0.77, "percentage": 77.0, "elapsed_time": "0:10:47", "remaining_time": "0:03:13"}
{"current_steps": 3860, "total_steps": 5000, "loss": 0.233, "lr": 1.5016832974331723e-07, "epoch": 0.772, "percentage": 77.2, "elapsed_time": "0:10:49", "remaining_time": "0:03:11"}
{"current_steps": 3870, "total_steps": 5000, "loss": 0.116, "lr": 1.4768289501820263e-07, "epoch": 0.774, "percentage": 77.4, "elapsed_time": "0:10:50", "remaining_time": "0:03:09"}
{"current_steps": 3880, "total_steps": 5000, "loss": 0.1664, "lr": 1.4521463173173965e-07, "epoch": 0.776, "percentage": 77.6, "elapsed_time": "0:10:52", "remaining_time": "0:03:08"}
{"current_steps": 3890, "total_steps": 5000, "loss": 0.2515, "lr": 1.4276366018359842e-07, "epoch": 0.778, "percentage": 77.8, "elapsed_time": "0:10:54", "remaining_time": "0:03:06"}
{"current_steps": 3900, "total_steps": 5000, "loss": 0.1169, "lr": 1.4033009983067452e-07, "epoch": 0.78, "percentage": 78.0, "elapsed_time": "0:10:55", "remaining_time": "0:03:04"}
{"current_steps": 3910, "total_steps": 5000, "loss": 0.0598, "lr": 1.3791406928126635e-07, "epoch": 0.782, "percentage": 78.2, "elapsed_time": "0:10:56", "remaining_time": "0:03:03"}
{"current_steps": 3920, "total_steps": 5000, "loss": 0.313, "lr": 1.3551568628929432e-07, "epoch": 0.784, "percentage": 78.4, "elapsed_time": "0:10:59", "remaining_time": "0:03:01"}
{"current_steps": 3930, "total_steps": 5000, "loss": 0.1434, "lr": 1.3313506774856175e-07, "epoch": 0.786, "percentage": 78.6, "elapsed_time": "0:11:00", "remaining_time": "0:02:59"}
{"current_steps": 3940, "total_steps": 5000, "loss": 0.1699, "lr": 1.3077232968705805e-07, "epoch": 0.788, "percentage": 78.8, "elapsed_time": "0:11:02", "remaining_time": "0:02:58"}
{"current_steps": 3950, "total_steps": 5000, "loss": 0.0944, "lr": 1.284275872613028e-07, "epoch": 0.79, "percentage": 79.0, "elapsed_time": "0:11:03", "remaining_time": "0:02:56"}
{"current_steps": 3960, "total_steps": 5000, "loss": 0.1457, "lr": 1.2610095475073413e-07, "epoch": 0.792, "percentage": 79.2, "elapsed_time": "0:11:05", "remaining_time": "0:02:54"}
{"current_steps": 3970, "total_steps": 5000, "loss": 0.1773, "lr": 1.2379254555213786e-07, "epoch": 0.794, "percentage": 79.4, "elapsed_time": "0:11:07", "remaining_time": "0:02:53"}
{"current_steps": 3980, "total_steps": 5000, "loss": 0.1956, "lr": 1.2150247217412185e-07, "epoch": 0.796, "percentage": 79.6, "elapsed_time": "0:11:09", "remaining_time": "0:02:51"}
{"current_steps": 3990, "total_steps": 5000, "loss": 0.2178, "lr": 1.192308462316317e-07, "epoch": 0.798, "percentage": 79.8, "elapsed_time": "0:11:11", "remaining_time": "0:02:49"}
{"current_steps": 4000, "total_steps": 5000, "loss": 0.1097, "lr": 1.1697777844051104e-07, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:11:12", "remaining_time": "0:02:48"}
{"current_steps": 4010, "total_steps": 5000, "loss": 0.1238, "lr": 1.1474337861210543e-07, "epoch": 0.802, "percentage": 80.2, "elapsed_time": "0:11:14", "remaining_time": "0:02:46"}
{"current_steps": 4020, "total_steps": 5000, "loss": 0.1531, "lr": 1.1252775564791023e-07, "epoch": 0.804, "percentage": 80.4, "elapsed_time": "0:11:16", "remaining_time": "0:02:44"}
{"current_steps": 4030, "total_steps": 5000, "loss": 0.1954, "lr": 1.1033101753426282e-07, "epoch": 0.806, "percentage": 80.6, "elapsed_time": "0:11:18", "remaining_time": "0:02:43"}
{"current_steps": 4040, "total_steps": 5000, "loss": 0.1523, "lr": 1.0815327133708013e-07, "epoch": 0.808, "percentage": 80.8, "elapsed_time": "0:11:19", "remaining_time": "0:02:41"}
{"current_steps": 4050, "total_steps": 5000, "loss": 0.2394, "lr": 1.0599462319663904e-07, "epoch": 0.81, "percentage": 81.0, "elapsed_time": "0:11:21", "remaining_time": "0:02:39"}
{"current_steps": 4060, "total_steps": 5000, "loss": 0.177, "lr": 1.038551783224047e-07, "epoch": 0.812, "percentage": 81.2, "elapsed_time": "0:11:22", "remaining_time": "0:02:38"}
{"current_steps": 4070, "total_steps": 5000, "loss": 0.2096, "lr": 1.0173504098790186e-07, "epoch": 0.814, "percentage": 81.4, "elapsed_time": "0:11:24", "remaining_time": "0:02:36"}
{"current_steps": 4080, "total_steps": 5000, "loss": 0.1841, "lr": 9.963431452563331e-08, "epoch": 0.816, "percentage": 81.6, "elapsed_time": "0:11:26", "remaining_time": "0:02:34"}
{"current_steps": 4090, "total_steps": 5000, "loss": 0.2342, "lr": 9.755310132204297e-08, "epoch": 0.818, "percentage": 81.8, "elapsed_time": "0:11:28", "remaining_time": "0:02:33"}
{"current_steps": 4100, "total_steps": 5000, "loss": 0.1852, "lr": 9.549150281252632e-08, "epoch": 0.82, "percentage": 82.0, "elapsed_time": "0:11:30", "remaining_time": "0:02:31"}
{"current_steps": 4110, "total_steps": 5000, "loss": 0.1833, "lr": 9.344961947648622e-08, "epoch": 0.822, "percentage": 82.2, "elapsed_time": "0:11:32", "remaining_time": "0:02:29"}
{"current_steps": 4120, "total_steps": 5000, "loss": 0.1947, "lr": 9.142755083243575e-08, "epoch": 0.824, "percentage": 82.4, "elapsed_time": "0:11:33", "remaining_time": "0:02:28"}
{"current_steps": 4130, "total_steps": 5000, "loss": 0.076, "lr": 8.942539543314798e-08, "epoch": 0.826, "percentage": 82.6, "elapsed_time": "0:11:34", "remaining_time": "0:02:26"}
{"current_steps": 4140, "total_steps": 5000, "loss": 0.2069, "lr": 8.744325086085247e-08, "epoch": 0.828, "percentage": 82.8, "elapsed_time": "0:11:36", "remaining_time": "0:02:24"}
{"current_steps": 4150, "total_steps": 5000, "loss": 0.1643, "lr": 8.548121372247919e-08, "epoch": 0.83, "percentage": 83.0, "elapsed_time": "0:11:38", "remaining_time": "0:02:23"}
{"current_steps": 4160, "total_steps": 5000, "loss": 0.2224, "lr": 8.353937964495028e-08, "epoch": 0.832, "percentage": 83.2, "elapsed_time": "0:11:40", "remaining_time": "0:02:21"}
{"current_steps": 4170, "total_steps": 5000, "loss": 0.1301, "lr": 8.161784327051919e-08, "epoch": 0.834, "percentage": 83.4, "elapsed_time": "0:11:42", "remaining_time": "0:02:19"}
{"current_steps": 4180, "total_steps": 5000, "loss": 0.1773, "lr": 7.971669825215787e-08, "epoch": 0.836, "percentage": 83.6, "elapsed_time": "0:11:43", "remaining_time": "0:02:18"}
{"current_steps": 4190, "total_steps": 5000, "loss": 0.1138, "lr": 7.783603724899257e-08, "epoch": 0.838, "percentage": 83.8, "elapsed_time": "0:11:45", "remaining_time": "0:02:16"}
{"current_steps": 4200, "total_steps": 5000, "loss": 0.1488, "lr": 7.597595192178702e-08, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:11:47", "remaining_time": "0:02:14"}
{"current_steps": 4210, "total_steps": 5000, "loss": 0.1796, "lr": 7.413653292847616e-08, "epoch": 0.842, "percentage": 84.2, "elapsed_time": "0:11:48", "remaining_time": "0:02:12"}
{"current_steps": 4220, "total_steps": 5000, "loss": 0.1397, "lr": 7.23178699197467e-08, "epoch": 0.844, "percentage": 84.4, "elapsed_time": "0:11:50", "remaining_time": "0:02:11"}
{"current_steps": 4230, "total_steps": 5000, "loss": 0.2052, "lr": 7.052005153466778e-08, "epoch": 0.846, "percentage": 84.6, "elapsed_time": "0:11:52", "remaining_time": "0:02:09"}
{"current_steps": 4240, "total_steps": 5000, "loss": 0.1092, "lr": 6.874316539637126e-08, "epoch": 0.848, "percentage": 84.8, "elapsed_time": "0:11:53", "remaining_time": "0:02:07"}
{"current_steps": 4250, "total_steps": 5000, "loss": 0.1719, "lr": 6.698729810778064e-08, "epoch": 0.85, "percentage": 85.0, "elapsed_time": "0:11:55", "remaining_time": "0:02:06"}
{"current_steps": 4260, "total_steps": 5000, "loss": 0.1219, "lr": 6.52525352473905e-08, "epoch": 0.852, "percentage": 85.2, "elapsed_time": "0:11:56", "remaining_time": "0:02:04"}
{"current_steps": 4270, "total_steps": 5000, "loss": 0.2, "lr": 6.353896136509524e-08, "epoch": 0.854, "percentage": 85.4, "elapsed_time": "0:11:58", "remaining_time": "0:02:02"}
{"current_steps": 4280, "total_steps": 5000, "loss": 0.1799, "lr": 6.184665997806831e-08, "epoch": 0.856, "percentage": 85.6, "elapsed_time": "0:12:00", "remaining_time": "0:02:01"}
{"current_steps": 4290, "total_steps": 5000, "loss": 0.1433, "lr": 6.017571356669182e-08, "epoch": 0.858, "percentage": 85.8, "elapsed_time": "0:12:02", "remaining_time": "0:01:59"}
{"current_steps": 4300, "total_steps": 5000, "loss": 0.1186, "lr": 5.8526203570536504e-08, "epoch": 0.86, "percentage": 86.0, "elapsed_time": "0:12:03", "remaining_time": "0:01:57"}
{"current_steps": 4310, "total_steps": 5000, "loss": 0.0735, "lr": 5.689821038439263e-08, "epoch": 0.862, "percentage": 86.2, "elapsed_time": "0:12:04", "remaining_time": "0:01:55"}
{"current_steps": 4320, "total_steps": 5000, "loss": 0.1492, "lr": 5.529181335435124e-08, "epoch": 0.864, "percentage": 86.4, "elapsed_time": "0:12:06", "remaining_time": "0:01:54"}
{"current_steps": 4330, "total_steps": 5000, "loss": 0.1799, "lr": 5.37070907739372e-08, "epoch": 0.866, "percentage": 86.6, "elapsed_time": "0:12:08", "remaining_time": "0:01:52"}
{"current_steps": 4340, "total_steps": 5000, "loss": 0.1065, "lr": 5.2144119880293544e-08, "epoch": 0.868, "percentage": 86.8, "elapsed_time": "0:12:09", "remaining_time": "0:01:50"}
{"current_steps": 4350, "total_steps": 5000, "loss": 0.099, "lr": 5.060297685041659e-08, "epoch": 0.87, "percentage": 87.0, "elapsed_time": "0:12:10", "remaining_time": "0:01:49"}
{"current_steps": 4360, "total_steps": 5000, "loss": 0.1629, "lr": 4.908373679744315e-08, "epoch": 0.872, "percentage": 87.2, "elapsed_time": "0:12:12", "remaining_time": "0:01:47"}
{"current_steps": 4370, "total_steps": 5000, "loss": 0.2036, "lr": 4.758647376699032e-08, "epoch": 0.874, "percentage": 87.4, "elapsed_time": "0:12:14", "remaining_time": "0:01:45"}
{"current_steps": 4380, "total_steps": 5000, "loss": 0.2889, "lr": 4.611126073354571e-08, "epoch": 0.876, "percentage": 87.6, "elapsed_time": "0:12:16", "remaining_time": "0:01:44"}
{"current_steps": 4390, "total_steps": 5000, "loss": 0.1851, "lr": 4.465816959691149e-08, "epoch": 0.878, "percentage": 87.8, "elapsed_time": "0:12:18", "remaining_time": "0:01:42"}
{"current_steps": 4400, "total_steps": 5000, "loss": 0.1456, "lr": 4.322727117869951e-08, "epoch": 0.88, "percentage": 88.0, "elapsed_time": "0:12:20", "remaining_time": "0:01:40"}
{"current_steps": 4410, "total_steps": 5000, "loss": 0.1768, "lr": 4.181863521888018e-08, "epoch": 0.882, "percentage": 88.2, "elapsed_time": "0:12:21", "remaining_time": "0:01:39"}
{"current_steps": 4420, "total_steps": 5000, "loss": 0.1277, "lr": 4.043233037238281e-08, "epoch": 0.884, "percentage": 88.4, "elapsed_time": "0:12:23", "remaining_time": "0:01:37"}
{"current_steps": 4430, "total_steps": 5000, "loss": 0.1703, "lr": 3.9068424205749794e-08, "epoch": 0.886, "percentage": 88.6, "elapsed_time": "0:12:24", "remaining_time": "0:01:35"}
{"current_steps": 4440, "total_steps": 5000, "loss": 0.2544, "lr": 3.7726983193843485e-08, "epoch": 0.888, "percentage": 88.8, "elapsed_time": "0:12:26", "remaining_time": "0:01:34"}
{"current_steps": 4450, "total_steps": 5000, "loss": 0.2151, "lr": 3.6408072716606345e-08, "epoch": 0.89, "percentage": 89.0, "elapsed_time": "0:12:28", "remaining_time": "0:01:32"}
{"current_steps": 4460, "total_steps": 5000, "loss": 0.176, "lr": 3.5111757055874326e-08, "epoch": 0.892, "percentage": 89.2, "elapsed_time": "0:12:30", "remaining_time": "0:01:30"}
{"current_steps": 4470, "total_steps": 5000, "loss": 0.2373, "lr": 3.3838099392243915e-08, "epoch": 0.894, "percentage": 89.4, "elapsed_time": "0:12:32", "remaining_time": "0:01:29"}
{"current_steps": 4480, "total_steps": 5000, "loss": 0.164, "lr": 3.258716180199278e-08, "epoch": 0.896, "percentage": 89.6, "elapsed_time": "0:12:34", "remaining_time": "0:01:27"}
{"current_steps": 4490, "total_steps": 5000, "loss": 0.1046, "lr": 3.135900525405427e-08, "epoch": 0.898, "percentage": 89.8, "elapsed_time": "0:12:35", "remaining_time": "0:01:25"}
{"current_steps": 4500, "total_steps": 5000, "loss": 0.2611, "lr": 3.015368960704584e-08, "epoch": 0.9, "percentage": 90.0, "elapsed_time": "0:12:37", "remaining_time": "0:01:24"}
{"current_steps": 4510, "total_steps": 5000, "loss": 0.1775, "lr": 2.8971273606351655e-08, "epoch": 0.902, "percentage": 90.2, "elapsed_time": "0:12:39", "remaining_time": "0:01:22"}
{"current_steps": 4520, "total_steps": 5000, "loss": 0.1356, "lr": 2.78118148812595e-08, "epoch": 0.904, "percentage": 90.4, "elapsed_time": "0:12:41", "remaining_time": "0:01:20"}
{"current_steps": 4530, "total_steps": 5000, "loss": 0.1012, "lr": 2.667536994215186e-08, "epoch": 0.906, "percentage": 90.6, "elapsed_time": "0:12:42", "remaining_time": "0:01:19"}
{"current_steps": 4540, "total_steps": 5000, "loss": 0.161, "lr": 2.5561994177751732e-08, "epoch": 0.908, "percentage": 90.8, "elapsed_time": "0:12:44", "remaining_time": "0:01:17"}
{"current_steps": 4550, "total_steps": 5000, "loss": 0.1071, "lr": 2.4471741852423233e-08, "epoch": 0.91, "percentage": 91.0, "elapsed_time": "0:12:45", "remaining_time": "0:01:15"}
{"current_steps": 4560, "total_steps": 5000, "loss": 0.1885, "lr": 2.3404666103526537e-08, "epoch": 0.912, "percentage": 91.2, "elapsed_time": "0:12:47", "remaining_time": "0:01:14"}
{"current_steps": 4570, "total_steps": 5000, "loss": 0.1625, "lr": 2.2360818938828187e-08, "epoch": 0.914, "percentage": 91.4, "elapsed_time": "0:12:49", "remaining_time": "0:01:12"}
{"current_steps": 4580, "total_steps": 5000, "loss": 0.1428, "lr": 2.1340251233966377e-08, "epoch": 0.916, "percentage": 91.6, "elapsed_time": "0:12:50", "remaining_time": "0:01:10"}
{"current_steps": 4590, "total_steps": 5000, "loss": 0.0951, "lr": 2.0343012729971243e-08, "epoch": 0.918, "percentage": 91.8, "elapsed_time": "0:12:51", "remaining_time": "0:01:08"}
{"current_steps": 4600, "total_steps": 5000, "loss": 0.1465, "lr": 1.936915203084055e-08, "epoch": 0.92, "percentage": 92.0, "elapsed_time": "0:12:53", "remaining_time": "0:01:07"}
{"current_steps": 4610, "total_steps": 5000, "loss": 0.1271, "lr": 1.8418716601170947e-08, "epoch": 0.922, "percentage": 92.2, "elapsed_time": "0:12:54", "remaining_time": "0:01:05"}
{"current_steps": 4620, "total_steps": 5000, "loss": 0.2774, "lr": 1.7491752763844292e-08, "epoch": 0.924, "percentage": 92.4, "elapsed_time": "0:12:56", "remaining_time": "0:01:03"}
{"current_steps": 4630, "total_steps": 5000, "loss": 0.0971, "lr": 1.658830569777031e-08, "epoch": 0.926, "percentage": 92.6, "elapsed_time": "0:12:58", "remaining_time": "0:01:02"}
{"current_steps": 4640, "total_steps": 5000, "loss": 0.1732, "lr": 1.570841943568446e-08, "epoch": 0.928, "percentage": 92.8, "elapsed_time": "0:13:00", "remaining_time": "0:01:00"}
{"current_steps": 4650, "total_steps": 5000, "loss": 0.2104, "lr": 1.4852136862001763e-08, "epoch": 0.93, "percentage": 93.0, "elapsed_time": "0:13:02", "remaining_time": "0:00:58"}
{"current_steps": 4660, "total_steps": 5000, "loss": 0.1566, "lr": 1.4019499710726911e-08, "epoch": 0.932, "percentage": 93.2, "elapsed_time": "0:13:03", "remaining_time": "0:00:57"}
{"current_steps": 4670, "total_steps": 5000, "loss": 0.1474, "lr": 1.3210548563419855e-08, "epoch": 0.934, "percentage": 93.4, "elapsed_time": "0:13:05", "remaining_time": "0:00:55"}
{"current_steps": 4680, "total_steps": 5000, "loss": 0.188, "lr": 1.2425322847218367e-08, "epoch": 0.936, "percentage": 93.6, "elapsed_time": "0:13:07", "remaining_time": "0:00:53"}
{"current_steps": 4690, "total_steps": 5000, "loss": 0.267, "lr": 1.166386083291604e-08, "epoch": 0.938, "percentage": 93.8, "elapsed_time": "0:13:09", "remaining_time": "0:00:52"}
{"current_steps": 4700, "total_steps": 5000, "loss": 0.1695, "lr": 1.0926199633097154e-08, "epoch": 0.94, "percentage": 94.0, "elapsed_time": "0:13:10", "remaining_time": "0:00:50"}
{"current_steps": 4710, "total_steps": 5000, "loss": 0.1638, "lr": 1.0212375200327972e-08, "epoch": 0.942, "percentage": 94.2, "elapsed_time": "0:13:12", "remaining_time": "0:00:48"}
{"current_steps": 4720, "total_steps": 5000, "loss": 0.0975, "lr": 9.522422325404233e-09, "epoch": 0.944, "percentage": 94.4, "elapsed_time": "0:13:13", "remaining_time": "0:00:47"}
{"current_steps": 4730, "total_steps": 5000, "loss": 0.222, "lr": 8.856374635655695e-09, "epoch": 0.946, "percentage": 94.6, "elapsed_time": "0:13:15", "remaining_time": "0:00:45"}
{"current_steps": 4740, "total_steps": 5000, "loss": 0.231, "lr": 8.214264593307096e-09, "epoch": 0.948, "percentage": 94.8, "elapsed_time": "0:13:17", "remaining_time": "0:00:43"}
{"current_steps": 4750, "total_steps": 5000, "loss": 0.1663, "lr": 7.59612349389599e-09, "epoch": 0.95, "percentage": 95.0, "elapsed_time": "0:13:19", "remaining_time": "0:00:42"}
{"current_steps": 4760, "total_steps": 5000, "loss": 0.0257, "lr": 7.0019814647475636e-09, "epoch": 0.952, "percentage": 95.2, "elapsed_time": "0:13:20", "remaining_time": "0:00:40"}
{"current_steps": 4770, "total_steps": 5000, "loss": 0.1889, "lr": 6.431867463506046e-09, "epoch": 0.954, "percentage": 95.4, "elapsed_time": "0:13:22", "remaining_time": "0:00:38"}
{"current_steps": 4780, "total_steps": 5000, "loss": 0.1868, "lr": 5.8858092767236076e-09, "epoch": 0.956, "percentage": 95.6, "elapsed_time": "0:13:24", "remaining_time": "0:00:37"}
{"current_steps": 4790, "total_steps": 5000, "loss": 0.1717, "lr": 5.3638335185058335e-09, "epoch": 0.958, "percentage": 95.8, "elapsed_time": "0:13:26", "remaining_time": "0:00:35"}
{"current_steps": 4800, "total_steps": 5000, "loss": 0.2297, "lr": 4.865965629214819e-09, "epoch": 0.96, "percentage": 96.0, "elapsed_time": "0:13:27", "remaining_time": "0:00:33"}
{"current_steps": 4810, "total_steps": 5000, "loss": 0.1361, "lr": 4.3922298742291585e-09, "epoch": 0.962, "percentage": 96.2, "elapsed_time": "0:13:29", "remaining_time": "0:00:31"}
{"current_steps": 4820, "total_steps": 5000, "loss": 0.2255, "lr": 3.9426493427611175e-09, "epoch": 0.964, "percentage": 96.4, "elapsed_time": "0:13:31", "remaining_time": "0:00:30"}
{"current_steps": 4830, "total_steps": 5000, "loss": 0.0901, "lr": 3.5172459467315286e-09, "epoch": 0.966, "percentage": 96.6, "elapsed_time": "0:13:32", "remaining_time": "0:00:28"}
{"current_steps": 4840, "total_steps": 5000, "loss": 0.1494, "lr": 3.116040419701815e-09, "epoch": 0.968, "percentage": 96.8, "elapsed_time": "0:13:34", "remaining_time": "0:00:26"}
{"current_steps": 4850, "total_steps": 5000, "loss": 0.1942, "lr": 2.739052315863355e-09, "epoch": 0.97, "percentage": 97.0, "elapsed_time": "0:13:36", "remaining_time": "0:00:25"}
{"current_steps": 4860, "total_steps": 5000, "loss": 0.2808, "lr": 2.3863000090844076e-09, "epoch": 0.972, "percentage": 97.2, "elapsed_time": "0:13:38", "remaining_time": "0:00:23"}
{"current_steps": 4870, "total_steps": 5000, "loss": 0.1568, "lr": 2.057800692014833e-09, "epoch": 0.974, "percentage": 97.4, "elapsed_time": "0:13:40", "remaining_time": "0:00:21"}
{"current_steps": 4880, "total_steps": 5000, "loss": 0.2903, "lr": 1.7535703752478147e-09, "epoch": 0.976, "percentage": 97.6, "elapsed_time": "0:13:42", "remaining_time": "0:00:20"}
{"current_steps": 4890, "total_steps": 5000, "loss": 0.1769, "lr": 1.4736238865398765e-09, "epoch": 0.978, "percentage": 97.8, "elapsed_time": "0:13:44", "remaining_time": "0:00:18"}
{"current_steps": 4900, "total_steps": 5000, "loss": 0.2085, "lr": 1.217974870087901e-09, "epoch": 0.98, "percentage": 98.0, "elapsed_time": "0:13:45", "remaining_time": "0:00:16"}
{"current_steps": 4910, "total_steps": 5000, "loss": 0.2805, "lr": 9.866357858642205e-10, "epoch": 0.982, "percentage": 98.2, "elapsed_time": "0:13:48", "remaining_time": "0:00:15"}
{"current_steps": 4920, "total_steps": 5000, "loss": 0.1946, "lr": 7.79617909009489e-10, "epoch": 0.984, "percentage": 98.4, "elapsed_time": "0:13:49", "remaining_time": "0:00:13"}
{"current_steps": 4930, "total_steps": 5000, "loss": 0.1962, "lr": 5.969313292830125e-10, "epoch": 0.986, "percentage": 98.6, "elapsed_time": "0:13:51", "remaining_time": "0:00:11"}
{"current_steps": 4940, "total_steps": 5000, "loss": 0.2275, "lr": 4.3858495057080836e-10, "epoch": 0.988, "percentage": 98.8, "elapsed_time": "0:13:53", "remaining_time": "0:00:10"}
{"current_steps": 4950, "total_steps": 5000, "loss": 0.1399, "lr": 3.0458649045211894e-10, "epoch": 0.99, "percentage": 99.0, "elapsed_time": "0:13:54", "remaining_time": "0:00:08"}
{"current_steps": 4960, "total_steps": 5000, "loss": 0.0858, "lr": 1.9494247982282387e-10, "epoch": 0.992, "percentage": 99.2, "elapsed_time": "0:13:56", "remaining_time": "0:00:06"}
{"current_steps": 4970, "total_steps": 5000, "loss": 0.1723, "lr": 1.0965826257725019e-10, "epoch": 0.994, "percentage": 99.4, "elapsed_time": "0:13:57", "remaining_time": "0:00:05"}
{"current_steps": 4980, "total_steps": 5000, "loss": 0.1224, "lr": 4.873799534788059e-11, "epoch": 0.996, "percentage": 99.6, "elapsed_time": "0:13:59", "remaining_time": "0:00:03"}
{"current_steps": 4990, "total_steps": 5000, "loss": 0.2228, "lr": 1.2184647302626582e-11, "epoch": 0.998, "percentage": 99.8, "elapsed_time": "0:14:01", "remaining_time": "0:00:01"}
{"current_steps": 5000, "total_steps": 5000, "loss": 0.2303, "lr": 0.0, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:14:03", "remaining_time": "0:00:00"}
{"current_steps": 5000, "total_steps": 5000, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:14:03", "remaining_time": "0:00:00"}